mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
remove aql remnants in amd (#4346)
This commit is contained in:
@@ -78,25 +78,8 @@ class AMDCompiler(Compiler):
|
||||
|
||||
SDMA_MAX_COPY_SIZE = 0x400000
|
||||
PAGE_SIZE = 0x1000
|
||||
|
||||
SIGNAL_SIZE, SIGNAL_COUNT = ctypes.sizeof(hsa.amd_signal_t), 16384
|
||||
|
||||
VENDOR_HEADER = hsa.HSA_PACKET_TYPE_VENDOR_SPECIFIC << hsa.HSA_PACKET_HEADER_TYPE
|
||||
|
||||
DISPATCH_KERNEL_SETUP = 3 << hsa.HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS
|
||||
DISPATCH_KERNEL_HEADER = 1 << hsa.HSA_PACKET_HEADER_BARRIER
|
||||
DISPATCH_KERNEL_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE
|
||||
DISPATCH_KERNEL_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE
|
||||
DISPATCH_KERNEL_HEADER |= hsa.HSA_PACKET_TYPE_KERNEL_DISPATCH << hsa.HSA_PACKET_HEADER_TYPE
|
||||
|
||||
BARRIER_HEADER = 1 << hsa.HSA_PACKET_HEADER_BARRIER
|
||||
BARRIER_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE
|
||||
BARRIER_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE
|
||||
BARRIER_HEADER |= hsa.HSA_PACKET_TYPE_BARRIER_AND << hsa.HSA_PACKET_HEADER_TYPE
|
||||
|
||||
SHT_PROGBITS = 0x1
|
||||
SHF_ALLOC = 0x2
|
||||
|
||||
SHT_PROGBITS, SHF_ALLOC = 0x1, 0x2
|
||||
EMPTY_SIGNAL = hsa.hsa_signal_t()
|
||||
SIGNAL_VALUE_OFFSET = getattr(hsa.amd_signal_t, 'value').offset
|
||||
|
||||
@@ -196,15 +179,12 @@ class HWPM4Queue:
|
||||
def exec(self, prg:AMDProgram, kernargs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1)):
|
||||
self.hdp_flush()
|
||||
self.invalidate_cache()
|
||||
code = hsa.amd_kernel_code_t.from_address(prg.handle) # NOTE: this is wrong, it's not this object
|
||||
|
||||
code = hsa.amd_kernel_code_t.from_address(prg.handle) # NOTE: this is wrong, it's not this object
|
||||
assert code.kernel_code_properties & 0x400 == 0x400 # ENABLE_WAVEFRONT_SIZE32
|
||||
assert code.workitem_private_segment_byte_size == 0
|
||||
assert code.max_scratch_backing_memory_byte_size == 0
|
||||
assert code.kernel_code_prefetch_byte_size == 0
|
||||
#assert (mod:=(prg.group_segment_size%32)) == 0, f"group_segment_size is {prg.group_segment_size} mod is {mod}"
|
||||
#assert prg.private_segment_size == 0
|
||||
#for s in format_struct(code): print(s)
|
||||
#print(hex(code.compute_pgm_rsrc1), hex(code.compute_pgm_rsrc2))
|
||||
rsrc1, rsrc2 = code.compute_pgm_rsrc1, code.compute_pgm_rsrc2
|
||||
|
||||
# this is required
|
||||
@@ -550,10 +530,8 @@ class AMDDevice(Compiled):
|
||||
self.completion_signal = AMDDevice._get_signal(self.device_id*2, sync_event=sync_event)
|
||||
self.signal_sdma = AMDDevice._get_signal(self.device_id*2+1, sync_event=kio.create_event(AMDDevice.kfd, auto_reset=1))
|
||||
|
||||
self.eop_buffer = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
|
||||
self.kernargs = self._gpu_alloc(0x1000000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
|
||||
self.kernargs_ptr = self.kernargs.va_addr
|
||||
self.ctx_save_restore_address = self._gpu_alloc(0x2C02000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
|
||||
|
||||
# scratch setup
|
||||
max_cu_id = self.properties['simd_count'] // self.properties['simd_per_cu'] - 1
|
||||
|
||||
Reference in New Issue
Block a user