mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-02-15 00:55:11 -05:00
use comgr to compile (#3248)
* use comgr to compile * fast * bfloat16 * move comgr to it's own file * cleaner style * comgr in new place * comgr free + dtype cleanup
This commit is contained in:
47
tinygrad/runtime/compiler/hip_comgr.py
Normal file
47
tinygrad/runtime/compiler/hip_comgr.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import ctypes
|
||||
import tinygrad.runtime.autogen.comgr as comgr
|
||||
|
||||
def check(status):
|
||||
if status != 0:
|
||||
comgr.amd_comgr_status_string(status, ctypes.byref(status_str := ctypes.POINTER(ctypes.c_char)()))
|
||||
raise RuntimeError(f"comgr fail {status}, {ctypes.string_at(status_str).decode()}")
|
||||
|
||||
def _get_comgr_data(data_set, data_type):
|
||||
check(comgr.amd_comgr_action_data_get_data(data_set, data_type, 0, ctypes.byref(data_exec := comgr.amd_comgr_data_t())))
|
||||
check(comgr.amd_comgr_get_data(data_exec, ctypes.byref(sz := ctypes.c_uint64()), None))
|
||||
check(comgr.amd_comgr_get_data(data_exec, ctypes.byref(sz), (dat := ctypes.create_string_buffer(sz.value))))
|
||||
check(comgr.amd_comgr_release_data(data_exec))
|
||||
return bytes(dat)
|
||||
|
||||
# AMD_COMGR_SAVE_TEMPS=1 AMD_COMGR_REDIRECT_LOGS=stdout AMD_COMGR_EMIT_VERBOSE_LOGS=1
|
||||
def compile_hip(prg:str, arch="gfx1100") -> bytes:
|
||||
check(comgr.amd_comgr_create_action_info(ctypes.byref(action_info := comgr.amd_comgr_action_info_t())))
|
||||
check(comgr.amd_comgr_action_info_set_language(action_info, comgr.AMD_COMGR_LANGUAGE_HIP))
|
||||
check(comgr.amd_comgr_action_info_set_isa_name(action_info, b"amdgcn-amd-amdhsa--" + arch.encode()))
|
||||
check(comgr.amd_comgr_action_info_set_logging(action_info, True))
|
||||
|
||||
check(comgr.amd_comgr_create_data_set(ctypes.byref(data_set_src := comgr.amd_comgr_data_set_t())))
|
||||
check(comgr.amd_comgr_create_data_set(ctypes.byref(data_set_bc := comgr.amd_comgr_data_set_t())))
|
||||
check(comgr.amd_comgr_create_data_set(ctypes.byref(data_set_reloc := comgr.amd_comgr_data_set_t())))
|
||||
check(comgr.amd_comgr_create_data_set(ctypes.byref(data_set_exec := comgr.amd_comgr_data_set_t())))
|
||||
|
||||
check(comgr.amd_comgr_create_data(comgr.AMD_COMGR_DATA_KIND_SOURCE, ctypes.byref(data_src := comgr.amd_comgr_data_t())))
|
||||
check(comgr.amd_comgr_set_data(data_src, len(rprg := prg.encode()), rprg))
|
||||
check(comgr.amd_comgr_set_data_name(data_src, b"<null>"))
|
||||
|
||||
check(comgr.amd_comgr_data_set_add(data_set_src, data_src))
|
||||
# -include hiprtc_runtime.h was removed
|
||||
check(comgr.amd_comgr_action_info_set_options(action_info, b"-O3 -mcumode --hip-version=6.0.32830 -DHIP_VERSION_MAJOR=6 -DHIP_VERSION_MINOR=0 -DHIP_VERSION_PATCH=32830 -D__HIPCC_RTC__ -std=c++14 -nogpuinc -Wno-gnu-line-marker -Wno-missing-prototypes --offload-arch=gfx1100 -I/opt/rocm/include -Xclang -disable-llvm-passes")) # noqa: E501
|
||||
status = comgr.amd_comgr_do_action(comgr.AMD_COMGR_ACTION_COMPILE_SOURCE_WITH_DEVICE_LIBS_TO_BC, action_info, data_set_src, data_set_bc)
|
||||
if status != 0:
|
||||
print(_get_comgr_data(data_set_bc, comgr.AMD_COMGR_DATA_KIND_LOG).decode())
|
||||
raise RuntimeError("compile failed")
|
||||
check(comgr.amd_comgr_action_info_set_options(action_info, b"-O3 -mllvm -amdgpu-internalize-symbols"))
|
||||
check(comgr.amd_comgr_do_action(comgr.AMD_COMGR_ACTION_CODEGEN_BC_TO_RELOCATABLE, action_info, data_set_bc, data_set_reloc))
|
||||
check(comgr.amd_comgr_action_info_set_options(action_info, b""))
|
||||
check(comgr.amd_comgr_do_action(comgr.AMD_COMGR_ACTION_LINK_RELOCATABLE_TO_EXECUTABLE, action_info, data_set_reloc, data_set_exec))
|
||||
ret = _get_comgr_data(data_set_exec, comgr.AMD_COMGR_DATA_KIND_EXECUTABLE)
|
||||
check(comgr.amd_comgr_release_data(data_src))
|
||||
for x in [data_set_src, data_set_bc, data_set_reloc, data_set_exec]: check(comgr.amd_comgr_destroy_data_set(x))
|
||||
check(comgr.amd_comgr_destroy_action_info(action_info))
|
||||
return ret
|
||||
@@ -3,10 +3,11 @@ import ctypes, functools, subprocess, io
|
||||
from typing import Tuple, TypeVar, List, Any, cast, Set
|
||||
import tinygrad.runtime.autogen.hip as hip
|
||||
from tinygrad.helpers import DEBUG, getenv, init_c_var
|
||||
from tinygrad.helpers import from_mv, round_up, to_mv, colored, init_c_struct_t, to_char_p_p, get_bytes
|
||||
from tinygrad.helpers import from_mv, round_up, to_mv, colored, init_c_struct_t
|
||||
from tinygrad.device import Compiled, LRUAllocator, MallocAllocator, BufferOptions, JITRunner, Device, Buffer, update_stats
|
||||
from tinygrad.renderer.cstyle import HIPRenderer
|
||||
from tinygrad.codegen.kernel import LinearizerOptions
|
||||
from tinygrad.runtime.compiler.hip_comgr import compile_hip
|
||||
|
||||
# The default HIP stream is used for everything.
|
||||
MOCKHIP = getenv("MOCKHIP") # for CI. don't run kernels, only check if they compile
|
||||
@@ -21,13 +22,6 @@ def hip_set_device(d:int):
|
||||
def check(status):
|
||||
if status != 0: raise RuntimeError(f"HIP Error {status}, {ctypes.string_at(hip.hipGetErrorString(status)).decode()}")
|
||||
|
||||
def compile_hip(prg:str, arch="gfx1100") -> bytes:
|
||||
check(hip.hiprtcCreateProgram(ctypes.byref(prog := hip.hiprtcProgram()), prg.encode(), "<null>".encode(), 0, None, None))
|
||||
compile_options = [f'--offload-arch={arch}', '-I/opt/rocm/include']
|
||||
status = hip.hiprtcCompileProgram(prog, len(compile_options), to_char_p_p([o.encode() for o in compile_options]))
|
||||
if status != 0: raise RuntimeError(f"compile failed: {get_bytes(prog, hip.hiprtcGetProgramLogSize, hip.hiprtcGetProgramLog, check).decode()}")
|
||||
return get_bytes(prog, hip.hiprtcGetCodeSize, hip.hiprtcGetCode, check)
|
||||
|
||||
class HIPProgram:
|
||||
def __init__(self, device:int, name:str, lib:bytes):
|
||||
self.device, self.name, self.lib = device, name, lib
|
||||
|
||||
Reference in New Issue
Block a user