mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 07:28:15 -05:00
KFDProgram -> AMDProgram (#4268)
This commit is contained in:
@@ -5,7 +5,7 @@ import tinygrad.runtime.autogen.amd_gpu as amd_gpu
|
||||
import tinygrad.runtime.autogen.kfd as kfd
|
||||
import tinygrad.runtime.autogen.hsa as hsa
|
||||
from tinygrad.engine.schedule import create_schedule
|
||||
from tinygrad.runtime.ops_amd import kio, KFDProgram
|
||||
from tinygrad.runtime.ops_amd import kio, AMDProgram
|
||||
from tinygrad.helpers import to_mv
|
||||
|
||||
DISPATCH_INIT_VALUE = 0x21 | 0x8000
|
||||
@@ -51,7 +51,7 @@ if __name__ == "__main__":
|
||||
b.lazydata.buffer.allocate()
|
||||
si = create_schedule([b.lazydata])[-1]
|
||||
runner = dev.get_runner(*si.ast)
|
||||
prg: KFDProgram = runner.clprg
|
||||
prg: AMDProgram = runner.clprg
|
||||
print("device initted")
|
||||
|
||||
# Compute Queue
|
||||
|
||||
@@ -194,7 +194,7 @@ class HWPM4Queue:
|
||||
amd_gpu.PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(gl2)]
|
||||
return self
|
||||
|
||||
def exec(self, prg:KFDProgram, kernargs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), completion_signal=None):
|
||||
def exec(self, prg:AMDProgram, kernargs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1), completion_signal=None):
|
||||
self.hdp_flush()
|
||||
self.invalidate_cache()
|
||||
code = hsa.amd_kernel_code_t.from_address(prg.handle) # NOTE: this is wrong, it's not this object
|
||||
@@ -344,7 +344,7 @@ class HWCopyQueue:
|
||||
value=0, mask=0xffffffff, interval=0x04, retry_count=0xfff))
|
||||
return self
|
||||
|
||||
class KFDProgram:
|
||||
class AMDProgram:
|
||||
def __init__(self, device:AMDDevice, name:str, lib:bytes):
|
||||
# TODO; this API needs the type signature of the function and global_size/local_size
|
||||
self.device, self.name, self.lib = device, name, lib
|
||||
@@ -597,7 +597,7 @@ class AMDDevice(Compiled):
|
||||
self.pm4_write_pointer = to_mv(self.pm4_queue.write_pointer_address, 8).cast("Q")
|
||||
self.pm4_doorbell = to_mv(self.doorbells + self.pm4_queue.doorbell_offset - self.doorbells_base, 4).cast("I")
|
||||
|
||||
super().__init__(device, AMDAllocator(self), AMDCompiler(self.arch), functools.partial(KFDProgram, self))
|
||||
super().__init__(device, AMDAllocator(self), AMDCompiler(self.arch), functools.partial(AMDProgram, self))
|
||||
|
||||
def _submit_sdma(self, dest, src, copy_size, wait_signals=None, completion_signal=None):
|
||||
q = HWCopyQueue()
|
||||
|
||||
Reference in New Issue
Block a user