KFDProgram -> AMDProgram (#4268)

This commit is contained in:
nimlgen
2024-04-24 00:29:50 +03:00
committed by GitHub
parent 17328ded7d
commit f3b4dff7c9
2 changed files with 5 additions and 5 deletions

View File

@@ -5,7 +5,7 @@ import tinygrad.runtime.autogen.amd_gpu as amd_gpu
import tinygrad.runtime.autogen.kfd as kfd
import tinygrad.runtime.autogen.hsa as hsa
from tinygrad.engine.schedule import create_schedule
from tinygrad.runtime.ops_amd import kio, KFDProgram
from tinygrad.runtime.ops_amd import kio, AMDProgram
from tinygrad.helpers import to_mv
DISPATCH_INIT_VALUE = 0x21 | 0x8000
@@ -51,7 +51,7 @@ if __name__ == "__main__":
b.lazydata.buffer.allocate()
si = create_schedule([b.lazydata])[-1]
runner = dev.get_runner(*si.ast)
prg: KFDProgram = runner.clprg
prg: AMDProgram = runner.clprg
print("device initted")
# Compute Queue

View File

@@ -194,7 +194,7 @@ class HWPM4Queue:
amd_gpu.PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(gl2)]
return self
def exec(self, prg:KFDProgram, kernargs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), completion_signal=None):
def exec(self, prg:AMDProgram, kernargs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), completion_signal=None):
self.hdp_flush()
self.invalidate_cache()
code = hsa.amd_kernel_code_t.from_address(prg.handle) # NOTE: this is wrong, it's not this object
@@ -344,7 +344,7 @@ class HWCopyQueue:
value=0, mask=0xffffffff, interval=0x04, retry_count=0xfff))
return self
class KFDProgram:
class AMDProgram:
def __init__(self, device:AMDDevice, name:str, lib:bytes):
# TODO; this API needs the type signature of the function and global_size/local_size
self.device, self.name, self.lib = device, name, lib
@@ -597,7 +597,7 @@ class AMDDevice(Compiled):
self.pm4_write_pointer = to_mv(self.pm4_queue.write_pointer_address, 8).cast("Q")
self.pm4_doorbell = to_mv(self.doorbells + self.pm4_queue.doorbell_offset - self.doorbells_base, 4).cast("I")
super().__init__(device, AMDAllocator(self), AMDCompiler(self.arch), functools.partial(KFDProgram, self))
super().__init__(device, AMDAllocator(self), AMDCompiler(self.arch), functools.partial(AMDProgram, self))
def _submit_sdma(self, dest, src, copy_size, wait_signals=None, completion_signal=None):
q = HWCopyQueue()