mirror of
https://github.com/ROCm/ROCm.git
synced 2026-02-21 03:00:39 -05:00
This PR intends to provide a mechanism to support a third-party backend at runtime to generate the backend-specific code. The mechanism provided a common class to abstract the third-party backend logic and two essential functions to register and get the third-party backend at runtime. - `BaseBackend`: A common class to abstract the third-party backend logic - `register_backend`: Register a third-party backend with a given device type - `get_backend`: Get the third-party backend with a given device type Generally, a third-party backend must inherit from `BaseBackend` and implement all the member functions according to the backend characteristics. As long as the backend implementation is ready, the third-party backend can invoke `register_backend` to register it under a given device. During the kernel compilation and execution, the mechanism will get the registered backend to generate the kernel and launcher code for a given device. This PR added a dummy backend to simulate a third-party backend and demonstrate the usage. - [test_device_backend.py](https://github.com/openai/triton/pull/1643/files#diff-bbe4d50624f2d11bf17c878a1ed4d422918c124c182cf9357b993240c385bea1): To define a third-party backend and register the backend - [ExtensionBackend](https://github.com/openai/triton/pull/1643/files#diff-bbe4d50624f2d11bf17c878a1ed4d422918c124c182cf9357b993240c385bea1R123): Inherit from the `BaseBackend` and implement some specific logic like [filter out some compile stages](https://github.com/openai/triton/pull/1643/files#diff-bbe4d50624f2d11bf17c878a1ed4d422918c124c182cf9357b993240c385bea1R129-R135) - [Register the `ExtensionBackend` for `CPU`](https://github.com/openai/triton/pull/1643/files#diff-bbe4d50624f2d11bf17c878a1ed4d422918c124c182cf9357b993240c385bea1R279) - [extension_backend.c](https://github.com/openai/triton/pull/1643/files#diff-169c1d08b3a0a7b343cfa3258fbc32b47e0f6c46305a112652fa1bdaaec89d29): To provide the utility function to load kernel binary and get the backend properties.
43 lines
1.4 KiB
C
43 lines
1.4 KiB
C
#include <Python.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
static PyObject *getDeviceProperties(PyObject *self, PyObject *args) {
|
|
// create a struct to hold device properties
|
|
return Py_BuildValue("{s:i, s:i, s:i, s:i, s:i}", "max_shared_mem", 1024,
|
|
"multiprocessor_count", 16, "sm_clock_rate", 2100,
|
|
"mem_clock_rate", 2300, "mem_bus_width", 2400);
|
|
}
|
|
|
|
static PyObject *loadBinary(PyObject *self, PyObject *args) {
|
|
// get allocated registers and spilled registers from the function
|
|
int n_regs = 0;
|
|
int n_spills = 0;
|
|
int mod = 0;
|
|
int fun = 0;
|
|
return Py_BuildValue("(KKii)", (uint64_t)mod, (uint64_t)fun, n_regs,
|
|
n_spills);
|
|
}
|
|
|
|
static PyMethodDef ModuleMethods[] = {
|
|
{"load_binary", loadBinary, METH_VARARGS,
|
|
"Load dummy binary for the extension device"},
|
|
{"get_device_properties", getDeviceProperties, METH_VARARGS,
|
|
"Get the properties for the extension device"},
|
|
{NULL, NULL, 0, NULL} // sentinel
|
|
};
|
|
|
|
static struct PyModuleDef ModuleDef = {PyModuleDef_HEAD_INIT, "ext_utils",
|
|
NULL, // documentation
|
|
-1, // size
|
|
ModuleMethods};
|
|
|
|
PyMODINIT_FUNC PyInit_ext_utils(void) {
|
|
PyObject *m = PyModule_Create(&ModuleDef);
|
|
if (m == NULL) {
|
|
return NULL;
|
|
}
|
|
PyModule_AddFunctions(m, ModuleMethods);
|
|
return m;
|
|
}
|