mirror of
https://github.com/ROCm/ROCm.git
synced 2026-02-21 03:00:39 -05:00
add prints in c code
This commit is contained in:
@@ -1107,92 +1107,126 @@ def generate_launcher(constants, signature):
|
||||
|
||||
format = "iiiiiKKOOO" + ''.join([format_of(_extracted_type(ty)) for ty in signature.values()])
|
||||
|
||||
# generate glue code
|
||||
if torch.version.hip is not None:
|
||||
src = f"""
|
||||
#define __HIP_PLATFORM_AMD__
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <Python.h>
|
||||
static inline void gpuAssert(hipError_t code, const char *file, int line)
|
||||
{{
|
||||
if (code != HIP_SUCCESS)
|
||||
{{
|
||||
const char* prefix = "Triton Error [CUDA]: ";
|
||||
const char* str = hipGetErrorString(code);
|
||||
char err[1024] = {{0}};
|
||||
strcat(err, prefix);
|
||||
strcat(err, str);
|
||||
PyErr_SetString(PyExc_RuntimeError, err);
|
||||
}}
|
||||
}}
|
||||
#define CUDA_CHECK(ans) {{ gpuAssert((ans), __FILE__, __LINE__); }}
|
||||
void _launch(int gridX, int gridY, int gridZ, int num_warps, int shared_memory, hipStream_t stream, hipFunction_t function, {arg_decls}) {{
|
||||
void *params[] = {{ {', '.join(f"&arg{i}" for i in signature.keys() if i not in constants)} }};
|
||||
if(gridX*gridY*gridZ > 0){{
|
||||
hipModuleLaunchKernel(function, gridX, gridY, gridZ, 32*num_warps, 1, 1, shared_memory, stream, params, 0);
|
||||
}}
|
||||
}}
|
||||
static inline hipDeviceptr_t getPointer(PyObject *obj, int idx) {{
|
||||
if (PyLong_Check(obj)) {{
|
||||
return (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj);
|
||||
}}
|
||||
if (obj == Py_None) {{
|
||||
return (hipDeviceptr_t)0;
|
||||
}}
|
||||
PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
|
||||
if(ptr){{
|
||||
PyObject *empty_tuple = PyTuple_New(0);
|
||||
PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
|
||||
Py_DECREF(empty_tuple);
|
||||
Py_DECREF(ptr);
|
||||
if (!PyLong_Check(ret)) {{
|
||||
PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
|
||||
#define __HIP_PLATFORM_AMD__
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <Python.h>
|
||||
|
||||
static inline void gpuAssert(hipError_t code, const char *file, int line)
|
||||
{{
|
||||
if (code != HIP_SUCCESS)
|
||||
{{
|
||||
const char* prefix = "Triton Error [HIP]: ";
|
||||
const char* str = hipGetErrorString(code);
|
||||
char err[1024] = {{0}};
|
||||
strcat(err, prefix);
|
||||
strcat(err, str);
|
||||
PyErr_SetString(PyExc_RuntimeError, err);
|
||||
}}
|
||||
return (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
|
||||
}}
|
||||
PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
|
||||
return (hipDeviceptr_t)0;
|
||||
}}
|
||||
static PyObject* launch(PyObject* self, PyObject* args) {{
|
||||
// printf("launch(PyObject* self, PyObject* args)");
|
||||
int gridX, gridY, gridZ;
|
||||
uint64_t _stream;
|
||||
uint64_t _function;
|
||||
int num_warps;
|
||||
int shared_memory;
|
||||
{' '.join([f"{_extracted_type(ty)} _arg{i}; " for i, ty in signature.items()])}
|
||||
if(!PyArg_ParseTuple(args, \"{format}\", &gridX, &gridY, &gridZ, &num_warps, &shared_memory, &_stream, &_function, {', '.join(f"&_arg{i}" for i, ty in signature.items())})) {{
|
||||
return NULL;
|
||||
}}
|
||||
_launch(gridX, gridY, gridZ, num_warps, shared_memory, (hipStream_t)_stream, (hipFunction_t)_function, {', '.join(f"getPointer(_arg{i},{i})" if ty[0]=="*" else f"_arg{i}"for i, ty in signature.items())});
|
||||
if(PyErr_Occurred()) {{
|
||||
return NULL;
|
||||
}}
|
||||
// return None
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}}
|
||||
static PyMethodDef ModuleMethods[] = {{
|
||||
{{"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"}},
|
||||
{{NULL, NULL, 0, NULL}} // sentinel
|
||||
}};
|
||||
static struct PyModuleDef ModuleDef = {{
|
||||
PyModuleDef_HEAD_INIT,
|
||||
\"launcher\",
|
||||
NULL, //documentation
|
||||
-1, //size
|
||||
ModuleMethods
|
||||
}};
|
||||
PyMODINIT_FUNC PyInit_launcher(void) {{
|
||||
PyObject *m = PyModule_Create(&ModuleDef);
|
||||
if(m == NULL) {{
|
||||
return NULL;
|
||||
}}
|
||||
PyModule_AddFunctions(m, ModuleMethods);
|
||||
return m;
|
||||
}}
|
||||
"""
|
||||
}}
|
||||
|
||||
#define HIP_CHECK(ans) {{ gpuAssert((ans), __FILE__, __LINE__); }}
|
||||
|
||||
void _launch(int gridX, int gridY, int gridZ, int num_warps, int shared_memory, hipStream_t stream, hipFunction_t function, {arg_decls}) {{
|
||||
void *params[] = {{ {', '.join(f"&arg{i}" for i in signature.keys() if i not in constants)} }};
|
||||
if(gridX*gridY*gridZ > 0){{
|
||||
HIP_CHECK(hipModuleLaunchKernel(function, gridX, gridY, gridZ, 32*num_warps, 1, 1, shared_memory, stream, params, 0));
|
||||
// hipModuleLaunchKernel(function, gridX, gridY, gridZ, 32*num_warps, 1, 1, shared_memory, stream, params, 0);
|
||||
}}
|
||||
}}
|
||||
|
||||
static inline hipDeviceptr_t getPointer(PyObject *obj, int idx) {{
|
||||
if (PyLong_Check(obj)) {{
|
||||
return (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj);
|
||||
}}
|
||||
if (obj == Py_None) {{
|
||||
return (hipDeviceptr_t)0;
|
||||
}}
|
||||
PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
|
||||
if(ptr){{
|
||||
PyObject *empty_tuple = PyTuple_New(0);
|
||||
PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
|
||||
Py_DECREF(empty_tuple);
|
||||
Py_DECREF(ptr);
|
||||
if (!PyLong_Check(ret)) {{
|
||||
PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
|
||||
}}
|
||||
return (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
|
||||
}}
|
||||
PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
|
||||
return (hipDeviceptr_t)0;
|
||||
}}
|
||||
|
||||
static PyObject* launch(PyObject* self, PyObject* args) {{
|
||||
int gridX, gridY, gridZ;
|
||||
uint64_t _stream;
|
||||
uint64_t _function;
|
||||
int num_warps;
|
||||
int shared_memory;
|
||||
PyObject *launch_enter_hook = NULL;
|
||||
PyObject *launch_exit_hook = NULL;
|
||||
PyObject *compiled_kernel = NULL;
|
||||
PyObject *hook_ret = NULL;
|
||||
{' '.join([f"{_extracted_type(ty)} _arg{i}; " for i, ty in signature.items()])}
|
||||
if(!PyArg_ParseTuple(args, \"{format}\", &gridX, &gridY, &gridZ, &num_warps, &shared_memory, &_stream, &_function, &launch_enter_hook, &launch_exit_hook, &compiled_kernel, {', '.join(f"&_arg{i}" for i, ty in signature.items())})) {{
|
||||
return NULL;
|
||||
}}
|
||||
|
||||
if (launch_enter_hook != Py_None) {{
|
||||
PyObject *new_args = PyTuple_Pack(1, compiled_kernel);
|
||||
hook_ret = PyObject_CallObject(launch_enter_hook, new_args);
|
||||
Py_DECREF(new_args);
|
||||
}}
|
||||
|
||||
_launch(gridX, gridY, gridZ, num_warps, shared_memory, (hipStream_t)_stream, (hipFunction_t)_function, {', '.join(f"getPointer(_arg{i},{i})" if ty[0]=="*" else f"_arg{i}"for i, ty in signature.items())});
|
||||
|
||||
if (launch_exit_hook != Py_None) {{
|
||||
PyObject *new_args = NULL;
|
||||
if (hook_ret) {{
|
||||
new_args = PyTuple_Pack(2, compiled_kernel, hook_ret);
|
||||
}} else {{
|
||||
new_args = PyTuple_Pack(1, compiled_kernel);
|
||||
}}
|
||||
hook_ret = PyObject_CallObject(launch_exit_hook, new_args);
|
||||
Py_DECREF(new_args);
|
||||
}}
|
||||
|
||||
if (hook_ret) {{
|
||||
Py_DECREF(hook_ret);
|
||||
}}
|
||||
if(PyErr_Occurred()) {{
|
||||
return NULL;
|
||||
}}
|
||||
// return None
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}}
|
||||
|
||||
static PyMethodDef ModuleMethods[] = {{
|
||||
{{"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"}},
|
||||
{{NULL, NULL, 0, NULL}} // sentinel
|
||||
}};
|
||||
|
||||
static struct PyModuleDef ModuleDef = {{
|
||||
PyModuleDef_HEAD_INIT,
|
||||
\"launcher\",
|
||||
NULL, //documentation
|
||||
-1, //size
|
||||
ModuleMethods
|
||||
}};
|
||||
|
||||
PyMODINIT_FUNC PyInit_launcher(void) {{
|
||||
PyObject *m = PyModule_Create(&ModuleDef);
|
||||
if(m == NULL) {{
|
||||
return NULL;
|
||||
}}
|
||||
PyModule_AddFunctions(m, ModuleMethods);
|
||||
return m;
|
||||
}}
|
||||
"""
|
||||
else:
|
||||
# generate glue code
|
||||
src = f"""
|
||||
#include \"cuda.h\"
|
||||
#include <Python.h>
|
||||
@@ -1606,7 +1640,6 @@ def compile(fn, **kwargs):
|
||||
# build compilation stages
|
||||
if torch.version.hip is not None:
|
||||
gfx_arch = os.environ.get('MI_GPU_ARCH', _get_amdgpu_arch())
|
||||
print(f"gfx_arch = {gfx_arch}")
|
||||
if gfx_arch is None:
|
||||
raise RuntimeError('gfx_arch is None (not specified)')
|
||||
stages = {
|
||||
@@ -1655,14 +1688,14 @@ def compile(fn, **kwargs):
|
||||
import re
|
||||
match = re.search(prototype_pattern[ir], src, re.MULTILINE)
|
||||
name, signature = match.group(1), match.group(2)
|
||||
print(name, signature)
|
||||
# print(name, signature)
|
||||
types = re.findall(arg_type_pattern[ir], signature)
|
||||
print(types)
|
||||
# print(types)
|
||||
param_tys = [convert_type_repr(ty) for ty in types]
|
||||
signature = {k: v for k, v in enumerate(param_tys)}
|
||||
first_stage = list(stages.keys()).index(ir)
|
||||
|
||||
print(f"name, signature, constants={name, signature, constants}")
|
||||
# print(f"name, signature, constants={name, signature, constants}")
|
||||
# cache manager
|
||||
so_path = make_stub(name, signature, constants)
|
||||
# create cache manager
|
||||
@@ -1672,7 +1705,7 @@ def compile(fn, **kwargs):
|
||||
name, ext = fn.__name__, "ast"
|
||||
else:
|
||||
name, ext = os.path.basename(fn).split(".")
|
||||
print(f"name, ext = {name, ext}")
|
||||
# print(f"name, ext = {name, ext}")
|
||||
|
||||
# load metadata if any
|
||||
metadata = None
|
||||
@@ -1711,12 +1744,12 @@ def compile(fn, **kwargs):
|
||||
if ir == "ptx":
|
||||
metadata["name"] = ptx_get_kernel_name(next_module)
|
||||
if ir == "amdgcn":
|
||||
print(f"next_module: {next_module}")
|
||||
print(f"asm:{asm}")
|
||||
# print(f"next_module: {next_module}")
|
||||
# print(f"asm:{asm}")
|
||||
metadata["name"] = amdgcn_get_kernel_name(next_module[0])
|
||||
asm["amdgcn"] = next_module[0]
|
||||
asm["hsaco_path"] = next_module[1]
|
||||
print(f"asm:{asm}")
|
||||
# print(f"asm:{asm}")
|
||||
|
||||
module = next_module
|
||||
# write-back metadata
|
||||
@@ -1772,10 +1805,18 @@ class CompiledKernel:
|
||||
# because it involves doing runtime things
|
||||
# (e.g., checking amount of shared memory on current device)
|
||||
self.metadata = metadata
|
||||
# if torch.version.hip is not None:
|
||||
# self.hip_module = None
|
||||
# self.hip_function = None
|
||||
# else:
|
||||
self.cu_module = None
|
||||
self.cu_function = None
|
||||
|
||||
def _init_handles(self):
|
||||
# if torch.version.hip is not None:
|
||||
# if self.hip_module is not None:
|
||||
# return
|
||||
# else:
|
||||
if self.cu_module is not None:
|
||||
return
|
||||
device = torch.cuda.current_device()
|
||||
@@ -1788,6 +1829,8 @@ class CompiledKernel:
|
||||
# raise OutOfResources(self.shared, max_shared, "shared memory")
|
||||
print(f"self.metadata['name']: {self.metadata['name']}")
|
||||
print(f"self.asm['hsaco_path']: {self.asm['hsaco_path']}")
|
||||
print(f"self.shared: {self.shared}")
|
||||
print(f"device: {device}")
|
||||
mod, func, n_regs, n_spills = hip_utils.load_binary(self.metadata["name"], self.asm["hsaco_path"], self.shared, device)
|
||||
self.cu_module = mod
|
||||
self.cu_function = func
|
||||
@@ -1995,6 +2038,7 @@ class HIPUtils(object):
|
||||
return """
|
||||
#define __HIP_PLATFORM_AMD__
|
||||
#include <hip/hip_runtime.h>
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -2017,21 +2061,33 @@ class HIPUtils(object):
|
||||
Py_ssize_t data_size;
|
||||
int shared;
|
||||
int device;
|
||||
if(!PyArg_ParseTuple(args, "ssii", &name, &data, &shared, &device)) {
|
||||
if(!PyArg_ParseTuple(args, "ss#ii", &name, &data, &data_size, &shared, &device)) {
|
||||
return NULL;
|
||||
}
|
||||
hipFunction_t fun;
|
||||
// Read HSACO.
|
||||
printf("name: %s\\n", name);
|
||||
printf("data: %s\\n", data);
|
||||
printf("data_size: %ld\\n", data_size);
|
||||
printf("shared: %d\\n", shared);
|
||||
printf("device: %d\\n", device);
|
||||
|
||||
|
||||
// Open HSACO file
|
||||
FILE* hsaco_file;
|
||||
if ((hsaco_file = fopen(data, "rb")) == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Read HSCAO file into Buffer
|
||||
fseek(hsaco_file, 0L, SEEK_END);
|
||||
size_t hsaco_file_size = ftell(hsaco_file);
|
||||
unsigned char* hsaco = (unsigned char*) malloc(hsaco_file_size * sizeof(unsigned char));
|
||||
rewind(hsaco_file);
|
||||
fread(hsaco, sizeof(unsigned char), hsaco_file_size, hsaco_file);
|
||||
fclose(hsaco_file);
|
||||
printf("hsaco_file_size: %ld\\n", hsaco_file_size);
|
||||
printf("hsaco: %s\\n", hsaco);
|
||||
|
||||
// set HIP options
|
||||
hipJitOption opt[] = {hipJitOptionErrorLogBufferSizeBytes, hipJitOptionErrorLogBuffer,
|
||||
hipJitOptionInfoLogBufferSizeBytes, hipJitOptionInfoLogBuffer,
|
||||
hipJitOptionLogVerbose};
|
||||
@@ -2042,10 +2098,17 @@ class HIPUtils(object):
|
||||
void *optval[] = {(void *)(uintptr_t)errbufsize,
|
||||
(void *)_err, (void *)(uintptr_t)logbufsize,
|
||||
(void *)_log, (void *)1};
|
||||
|
||||
// launch HIP Binary
|
||||
hipModule_t mod;
|
||||
hipFunction_t fun;
|
||||
// hipModuleLoadData(&mod, hsaco);
|
||||
hipModuleLoadDataEx(&mod, hsaco, 5, opt, optval);
|
||||
hipModuleGetFunction(&fun, mod, name);
|
||||
free(hsaco);
|
||||
printf("fun: %d\\n", fun);
|
||||
printf("mod: %d\\n", mod);
|
||||
|
||||
// get allocated registers and spilled registers from the function
|
||||
int n_regs = 0;
|
||||
int n_spills = 0;
|
||||
@@ -2095,4 +2158,4 @@ class HIPUtils(object):
|
||||
spec = importlib.util.spec_from_file_location("hip_utils", cache._make_path(fname))
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
self.load_binary = mod.load_binary
|
||||
self.load_binary = mod.load_binary
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
#include "cuda.h"
|
||||
#include <Python.h>
|
||||
|
||||
static inline void gpuAssert(CUresult code, const char *file, int line) {
|
||||
{
|
||||
if (code != CUDA_SUCCESS) {
|
||||
{
|
||||
const char *prefix = "Triton Error [CUDA]: ";
|
||||
const char *str;
|
||||
cuGetErrorString(code, &str);
|
||||
char err[1024] = {{0}};
|
||||
strcat(err, prefix);
|
||||
strcat(err, str);
|
||||
PyErr_SetString(PyExc_RuntimeError, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define CUDA_CHECK(ans) \
|
||||
{ \
|
||||
{ gpuAssert((ans), __FILE__, __LINE__); } \
|
||||
}
|
||||
|
||||
void _launch(int gridX, int gridY, int gridZ, int num_warps, int shared_memory,
|
||||
CUstream stream, CUfunction function, {arg_decls}) {
|
||||
{
|
||||
void *params[] = {{ {', '.join(f"&arg{i}" for i in signature.keys() if i not in constants)} }};
|
||||
if (gridX * gridY * gridZ > 0) {
|
||||
{
|
||||
CUDA_CHECK(cuLaunchKernel(function, gridX, gridY, gridZ, 32 * num_warps,
|
||||
1, 1, shared_memory, stream, params, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline CUdeviceptr getPointer(PyObject *obj, int idx) {
|
||||
{
|
||||
if (PyLong_Check(obj)) {
|
||||
{ return (CUdeviceptr)PyLong_AsUnsignedLongLong(obj); }
|
||||
}
|
||||
if (obj == Py_None) {
|
||||
{ return (CUdeviceptr)0; }
|
||||
}
|
||||
PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
|
||||
if (ptr) {
|
||||
{
|
||||
PyObject *empty_tuple = PyTuple_New(0);
|
||||
PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
|
||||
Py_DECREF(empty_tuple);
|
||||
Py_DECREF(ptr);
|
||||
if (!PyLong_Check(ret)) {
|
||||
{
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError,
|
||||
"data_ptr method of Pointer object must return 64-bit int");
|
||||
}
|
||||
}
|
||||
return (CUdeviceptr)PyLong_AsUnsignedLongLong(ret);
|
||||
}
|
||||
}
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError,
|
||||
"Pointer argument must be either uint64 or have data_ptr method");
|
||||
return (CUdeviceptr)0;
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *launch(PyObject *self, PyObject *args) {
|
||||
{
|
||||
int gridX, gridY, gridZ;
|
||||
uint64_t _stream;
|
||||
uint64_t _function;
|
||||
int num_warps;
|
||||
int shared_memory;
|
||||
PyObject *launch_enter_hook = NULL;
|
||||
PyObject *launch_exit_hook = NULL;
|
||||
PyObject *compiled_kernel = NULL;
|
||||
PyObject *hook_ret = NULL;
|
||||
{' '.join([f"{_extracted_type(ty)} _arg{i}; " for i, ty in signature.items()])
|
||||
}
|
||||
if(!PyArg_ParseTuple(args, \"{format}\", &gridX, &gridY, &gridZ, &num_warps, &shared_memory, &_stream, &_function, &launch_enter_hook, &launch_exit_hook, &compiled_kernel, {', '.join(f"&_arg{
|
||||
i}" for i, ty in signature.items())})) {{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (launch_enter_hook != Py_None) {
|
||||
{
|
||||
PyObject *new_args = PyTuple_Pack(1, compiled_kernel);
|
||||
hook_ret = PyObject_CallObject(launch_enter_hook, new_args);
|
||||
Py_DECREF(new_args);
|
||||
}
|
||||
}
|
||||
|
||||
_launch(gridX, gridY, gridZ, num_warps, shared_memory, (CUstream)_stream,
|
||||
(CUfunction)_function, {', '.join(f"getPointer(_arg{i},{i})" if ty[0]=="*" else f"_arg{i}"for i, ty in signature.items())
|
||||
});
|
||||
|
||||
if (launch_exit_hook != Py_None) {
|
||||
{
|
||||
PyObject *new_args = NULL;
|
||||
if (hook_ret) {
|
||||
{ new_args = PyTuple_Pack(2, compiled_kernel, hook_ret); }
|
||||
} else {
|
||||
{ new_args = PyTuple_Pack(1, compiled_kernel); }
|
||||
}
|
||||
hook_ret = PyObject_CallObject(launch_exit_hook, new_args);
|
||||
Py_DECREF(new_args);
|
||||
}
|
||||
}
|
||||
|
||||
if (hook_ret) {
|
||||
{ Py_DECREF(hook_ret); }
|
||||
}
|
||||
if (PyErr_Occurred()) {
|
||||
{ return NULL; }
|
||||
}
|
||||
// return None
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
}
|
||||
|
||||
static PyMethodDef ModuleMethods[] = {{
|
||||
{{"launch", launch, METH_VARARGS,
|
||||
"Entry point for all kernels with this signature"}},
|
||||
{{NULL, NULL, 0, NULL}} // sentinel
|
||||
}};
|
||||
|
||||
static struct PyModuleDef ModuleDef = {{
|
||||
PyModuleDef_HEAD_INIT,
|
||||
\"launcher\",
|
||||
NULL, //documentation
|
||||
-1, //size
|
||||
ModuleMethods
|
||||
}};
|
||||
|
||||
PyMODINIT_FUNC PyInit_launcher(void) {
|
||||
{
|
||||
PyObject *m = PyModule_Create(&ModuleDef);
|
||||
if (m == NULL) {
|
||||
{ return NULL; }
|
||||
}
|
||||
PyModule_AddFunctions(m, ModuleMethods);
|
||||
return m;
|
||||
}
|
||||
}
|
||||
@@ -1,113 +0,0 @@
|
||||
#define __HIP_PLATFORM_AMD__
|
||||
#include <Python.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
static inline void gpuAssert(hipError_t code, const char *file, int line) {
|
||||
{
|
||||
if (code != HIP_SUCCESS) {
|
||||
{
|
||||
const char *prefix = "Triton Error [CUDA]: ";
|
||||
const char *str = hipGetErrorString(code);
|
||||
char err[1024] = {{0}};
|
||||
strcat(err, prefix);
|
||||
strcat(err, str);
|
||||
PyErr_SetString(PyExc_RuntimeError, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#define CUDA_CHECK(ans) \
|
||||
{ \
|
||||
{ gpuAssert((ans), __FILE__, __LINE__); } \
|
||||
}
|
||||
void _launch(int gridX, int gridY, int gridZ, int num_warps, int shared_memory,
|
||||
hipStream_t stream, hipFunction_t function, {arg_decls}) {
|
||||
{
|
||||
void *params[] = {{ {', '.join(f"&arg{i}" for i in signature.keys() if i not in constants)} }};
|
||||
if (gridX * gridY * gridZ > 0) {
|
||||
{
|
||||
hipModuleLaunchKernel(function, gridX, gridY, gridZ, 32 * num_warps, 1, 1,
|
||||
shared_memory, stream, params, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
static inline hipDeviceptr_t getPointer(PyObject *obj, int idx) {
|
||||
{
|
||||
if (PyLong_Check(obj)) {
|
||||
{ return (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj); }
|
||||
}
|
||||
if (obj == Py_None) {
|
||||
{ return (hipDeviceptr_t)0; }
|
||||
}
|
||||
PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
|
||||
if (ptr) {
|
||||
{
|
||||
PyObject *empty_tuple = PyTuple_New(0);
|
||||
PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
|
||||
Py_DECREF(empty_tuple);
|
||||
Py_DECREF(ptr);
|
||||
if (!PyLong_Check(ret)) {
|
||||
{
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError,
|
||||
"data_ptr method of Pointer object must return 64-bit int");
|
||||
}
|
||||
}
|
||||
return (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
|
||||
}
|
||||
}
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError,
|
||||
"Pointer argument must be either uint64 or have data_ptr method");
|
||||
return (hipDeviceptr_t)0;
|
||||
}
|
||||
}
|
||||
static PyObject *launch(PyObject *self, PyObject *args) {
|
||||
{
|
||||
// printf("launch(PyObject* self, PyObject* args)");
|
||||
int gridX, gridY, gridZ;
|
||||
uint64_t _stream;
|
||||
uint64_t _function;
|
||||
int num_warps;
|
||||
int shared_memory;
|
||||
{' '.join([f"{_extracted_type(ty)} _arg{i}; " for i, ty in signature.items()])
|
||||
}
|
||||
if(!PyArg_ParseTuple(args, \"{format}\", &gridX, &gridY, &gridZ, &num_warps, &shared_memory, &_stream, &_function, {', '.join(f"&_arg{
|
||||
i}" for i, ty in signature.items())})) {{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
_launch(gridX, gridY, gridZ, num_warps, shared_memory, (hipStream_t)_stream,
|
||||
(hipFunction_t)_function, {', '.join(f"getPointer(_arg{i},{i})" if ty[0]=="*" else f"_arg{i}"for i, ty in signature.items())
|
||||
});
|
||||
if (PyErr_Occurred()) {
|
||||
{ return NULL; }
|
||||
}
|
||||
// return None
|
||||
Py_INCREF(Py_None);
|
||||
return Py_None;
|
||||
}
|
||||
}
|
||||
static PyMethodDef ModuleMethods[] = {{
|
||||
{{"launch", launch, METH_VARARGS,
|
||||
"Entry point for all kernels with this signature"}},
|
||||
{{NULL, NULL, 0, NULL}} // sentinel
|
||||
}};
|
||||
static struct PyModuleDef ModuleDef = {{
|
||||
PyModuleDef_HEAD_INIT,
|
||||
\"launcher\",
|
||||
NULL, //documentation
|
||||
-1, //size
|
||||
ModuleMethods
|
||||
}};
|
||||
PyMODINIT_FUNC PyInit_launcher(void) {
|
||||
{
|
||||
PyObject *m = PyModule_Create(&ModuleDef);
|
||||
if (m == NULL) {
|
||||
{ return NULL; }
|
||||
}
|
||||
PyModule_AddFunctions(m, ModuleMethods);
|
||||
return m;
|
||||
}
|
||||
}
|
||||
@@ -13,4 +13,4 @@ chmod -R 777 $LOG_DIR
|
||||
bash scripts/amd/clean.sh
|
||||
bash scripts/amd/build.sh
|
||||
bash scripts/amd/test.sh backtrace 2>&1 |tee $LOG_DIR/test.log
|
||||
bash scripts/amd/cache_print.sh 2>&1 |tee $LOG_DIR/cache.log
|
||||
# bash scripts/amd/cache_print.sh 2>&1 |tee $LOG_DIR/cache.log
|
||||
@@ -10,6 +10,8 @@ rm -rf $LOG_DIR
|
||||
mkdir -p $LOG_DIR
|
||||
chmod -R 777 $LOG_DIR
|
||||
|
||||
sh scripts/amd/clean.sh
|
||||
|
||||
# check for backtrace
|
||||
if [ "$1" == "backtrace" ]; then
|
||||
sudo apt install gdb -y
|
||||
@@ -26,12 +28,11 @@ if [ "$1" == "backtrace" ]; then
|
||||
|
||||
else
|
||||
|
||||
# sh scripts/amd/clean.sh
|
||||
|
||||
# pytest -rfs --verbose python/tests 2>&1 | tee $LOG_DIR/test_all.log
|
||||
# pytest -rfs --verbose "python/tests/test_compiler.py" 2>&1 | tee $LOG_DIR/test_compiler.log
|
||||
# pytest -rfs --verbose "python/tests/test_core_amd.py" 2>&1 | tee $LOG_DIR/test_core_amd.log
|
||||
pytest -rfs --verbose "python/tests/test_core_amd.py::test_empty_kernel[float32]" 2>&1 | tee $LOG_DIR/test_core_amd.log
|
||||
# pytest -rfs --verbose "python/tests/test_core_amd.py::test_empty_kernel" 2>&1 | tee $LOG_DIR/test_empty_kernel.log
|
||||
pytest -rfs --verbose "python/tests/test_core_amd.py::test_empty_kernel[float32]" 2>&1 | tee $LOG_DIR/test_empty_kernel_float32.log
|
||||
# pytest -rfs --verbose "python/test/unit/language/test_core.py" 2>&1 | tee $LOG_DIR/test_core_amd.log
|
||||
# pytest -rfs --verbose "python/tests/test_core.py" 2>&1 | tee $LOG_DIR/test_core.log
|
||||
# pytest -rfs --verbose "python/tests/test_core.py::test_math_op" | tee $LOG_DIR/test_math_op.log
|
||||
|
||||
Reference in New Issue
Block a user