From d2f89615b2164cd0a6dfd68f6ab9d75474213f39 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Tue, 30 Apr 2024 23:36:02 +0300 Subject: [PATCH] remove aql remnants in amd (#4346) --- tinygrad/runtime/ops_amd.py | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index 600fdb8f42..bfe876b360 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -78,25 +78,8 @@ class AMDCompiler(Compiler): SDMA_MAX_COPY_SIZE = 0x400000 PAGE_SIZE = 0x1000 - SIGNAL_SIZE, SIGNAL_COUNT = ctypes.sizeof(hsa.amd_signal_t), 16384 - -VENDOR_HEADER = hsa.HSA_PACKET_TYPE_VENDOR_SPECIFIC << hsa.HSA_PACKET_HEADER_TYPE - -DISPATCH_KERNEL_SETUP = 3 << hsa.HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS -DISPATCH_KERNEL_HEADER = 1 << hsa.HSA_PACKET_HEADER_BARRIER -DISPATCH_KERNEL_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE -DISPATCH_KERNEL_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE -DISPATCH_KERNEL_HEADER |= hsa.HSA_PACKET_TYPE_KERNEL_DISPATCH << hsa.HSA_PACKET_HEADER_TYPE - -BARRIER_HEADER = 1 << hsa.HSA_PACKET_HEADER_BARRIER -BARRIER_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE -BARRIER_HEADER |= hsa.HSA_FENCE_SCOPE_SYSTEM << hsa.HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE -BARRIER_HEADER |= hsa.HSA_PACKET_TYPE_BARRIER_AND << hsa.HSA_PACKET_HEADER_TYPE - -SHT_PROGBITS = 0x1 -SHF_ALLOC = 0x2 - +SHT_PROGBITS, SHF_ALLOC = 0x1, 0x2 EMPTY_SIGNAL = hsa.hsa_signal_t() SIGNAL_VALUE_OFFSET = getattr(hsa.amd_signal_t, 'value').offset @@ -196,15 +179,12 @@ class HWPM4Queue: def exec(self, prg:AMDProgram, kernargs, global_size:Tuple[int,int,int]=(1,1,1), local_size:Tuple[int,int,int]=(1,1,1)): self.hdp_flush() self.invalidate_cache() - code = hsa.amd_kernel_code_t.from_address(prg.handle) # NOTE: this is wrong, it's not this object + + code = hsa.amd_kernel_code_t.from_address(prg.handle) # NOTE: this is wrong, it's not this object assert code.kernel_code_properties & 0x400 == 0x400 # ENABLE_WAVEFRONT_SIZE32 assert code.workitem_private_segment_byte_size == 0 assert code.max_scratch_backing_memory_byte_size == 0 assert code.kernel_code_prefetch_byte_size == 0 - #assert (mod:=(prg.group_segment_size%32)) == 0, f"group_segment_size is {prg.group_segment_size} mod is {mod}" - #assert prg.private_segment_size == 0 - #for s in format_struct(code): print(s) - #print(hex(code.compute_pgm_rsrc1), hex(code.compute_pgm_rsrc2)) rsrc1, rsrc2 = code.compute_pgm_rsrc1, code.compute_pgm_rsrc2 # this is required @@ -550,10 +530,8 @@ class AMDDevice(Compiled): self.completion_signal = AMDDevice._get_signal(self.device_id*2, sync_event=sync_event) self.signal_sdma = AMDDevice._get_signal(self.device_id*2+1, sync_event=kio.create_event(AMDDevice.kfd, auto_reset=1)) - self.eop_buffer = self._gpu_alloc(0x1000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM) self.kernargs = self._gpu_alloc(0x1000000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM) self.kernargs_ptr = self.kernargs.va_addr - self.ctx_save_restore_address = self._gpu_alloc(0x2C02000, kfd.KFD_IOC_ALLOC_MEM_FLAGS_VRAM) # scratch setup max_cu_id = self.properties['simd_count'] // self.properties['simd_per_cu'] - 1