Files
SHARK-Studio/amdshark/examples/amdshark_eager/eager_mode.py
pdhirajkumarprasad fe03539901 Migration to AMDShark (#2182)
Signed-off-by: pdhirajkumarprasad <dhirajp@amd.com>
2025-11-20 12:52:07 +05:30

149 lines
3.7 KiB
Python

# Copyright 2020 The Nod Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from torch.utils.cpp_extension import load_inline, include_paths
from torch_mlir.eager_mode import torch_mlir_tensor
from torch_mlir.eager_mode.torch_mlir_tensor import TorchMLIRTensor
from amdshark.iree_eager_backend import EagerModeIREELinalgOnTensorsBackend
from amdshark.amdshark_runner import AMDSharkEagerMode
def test_cpu():
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("cpu")
t = torch.ones((10, 10), device="cpu")
u = 2 * torch.ones((10, 10), device="cpu")
tt = TorchMLIRTensor(t)
print(tt)
uu = TorchMLIRTensor(u)
print(uu)
for i in range(NUM_ITERS):
yy = tt + uu
print(type(yy))
print(yy.elem.to_host())
yy = tt * uu
print(type(yy))
print(yy.elem.to_host())
def test_gpu():
source = """
#include <iostream>
#include "cuda.h"
#include "cuda_runtime_api.h"
using namespace std;
void print_free_mem() {
int num_gpus;
size_t free, total;
cudaSetDevice(0);
int id;
cudaGetDevice(&id);
cudaMemGetInfo(&free, &total);
cout << "GPU " << id << " memory: used=" << (total-free)/(1<<20) << endl;
}
"""
gpu_stats = load_inline(
name="inline_extension",
cpp_sources=[source],
extra_include_paths=include_paths(cuda=True),
functions=["print_free_mem"],
)
torch_mlir_tensor.backend = EagerModeIREELinalgOnTensorsBackend("gpu")
t = torch.ones((10, 10), device="cpu")
u = 2 * torch.ones((10, 10), device="cpu")
tt = TorchMLIRTensor(t)
print(tt)
uu = TorchMLIRTensor(u)
print(uu)
for i in range(NUM_ITERS):
yy = tt + uu
print(yy.elem.to_host())
yy = tt * uu
print(yy.elem.to_host())
gpu_stats.print_free_mem()
def test_python_mode_ref_backend():
# hide this wherever you want?
_ = AMDSharkEagerMode("refbackend")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
print(i)
yy = t + u
print(yy.elem)
yy = t * u
print(yy.elem)
def test_python_mode_iree_cpu():
# hide this wherever you want?
_ = AMDSharkEagerMode("cpu")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
yy = t + u
print(type(yy))
print(yy.elem.to_host())
yy = t * u
print(type(yy))
print(yy.elem.to_host())
def test_python_mode_iree_gpu():
_ = AMDSharkEagerMode("gpu")
t = torch.ones((10, 10), device="cpu")
u = torch.ones((10, 10), device="cpu")
print(t)
print(u)
for i in range(NUM_ITERS):
yy = t + u
print(type(yy))
print(yy.elem.to_host())
yy = t * u
print(type(yy))
print(yy.elem.to_host())
if __name__ == "__main__":
NUM_ITERS = 10
test_cpu()
if torch.cuda.is_available():
test_gpu()
test_python_mode_ref_backend()
test_python_mode_iree_cpu()
test_python_mode_iree_gpu()