mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
amd: fix aql remote (#15368)
This commit is contained in:
1
.github/workflows/benchmark.yml
vendored
1
.github/workflows/benchmark.yml
vendored
@@ -709,6 +709,7 @@ jobs:
|
||||
PYTHONPATH=. python3 extra/remote/serve.py 6482 &
|
||||
sleep 1
|
||||
DEBUG=2 PYTHONPATH=. REMOTE=127.0.0.1:6482 AM_RESET=1 AMD=1 AMD_IFACE=PCI python3 test/test_tiny.py
|
||||
DEBUG=2 PYTHONPATH=. REMOTE=127.0.0.1:6482 AM_RESET=1 AMD=1 AMD_AQL=1 AMD_IFACE=PCI python3 test/test_tiny.py
|
||||
pkill -f 'extra/remote/serve.py' || true
|
||||
- name: Run process replay tests
|
||||
run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py
|
||||
|
||||
@@ -1015,11 +1015,11 @@ class AMDDevice(HCQCompiled):
|
||||
gart = self.iface.alloc(0x100, uncached=True, cpu_access=True)
|
||||
|
||||
if queue_type == kfd.KFD_IOC_QUEUE_TYPE_COMPUTE_AQL:
|
||||
aql_desc = hsa.amd_queue_t(queue_properties=hsa.AMD_QUEUE_PROPERTIES_IS_PTR64 | hsa.AMD_QUEUE_PROPERTIES_ENABLE_PROFILING,
|
||||
self.aql_gart = gart
|
||||
self.aql_desc = hsa.amd_queue_t(queue_properties=hsa.AMD_QUEUE_PROPERTIES_IS_PTR64 | hsa.AMD_QUEUE_PROPERTIES_ENABLE_PROFILING,
|
||||
read_dispatch_id_field_base_byte_offset=getattr(hsa.amd_queue_t, 'read_dispatch_id').offset,
|
||||
max_cu_id=(self.cu_cnt * self.xccs) - 1, max_wave_id=self.waves_per_cu - 1)
|
||||
gart.cpu_view().view(fmt='B')[:ctypes.sizeof(aql_desc)] = bytes(aql_desc)
|
||||
self.aql_desc = hsa.amd_queue_t.from_address(gart.cpu_view().addr)
|
||||
self.aql_gart.cpu_view().view(fmt='B')[:ctypes.sizeof(self.aql_desc)] = bytes(self.aql_desc)
|
||||
|
||||
cwsr_buffer_size = round_up((ctx_save_restore_size + debug_memory_size) * self.xccs, mmap.PAGESIZE)
|
||||
cwsr_buffer = self.iface.alloc(cwsr_buffer_size) if ctx_save_restore_size else None
|
||||
@@ -1067,6 +1067,7 @@ class AMDDevice(HCQCompiled):
|
||||
int.from_bytes(rsrc1_t(BASE_ADDRESS_HI=hi32(self.scratch.va_addr), SWIZZLE_ENABLE=1), 'little'),
|
||||
lo32(size_per_xcc), int.from_bytes(bytes(rsrc3_t(**rsrc)), 'little')]
|
||||
self.aql_desc.compute_tmpring_size = self.tmpring_size
|
||||
self.aql_gart.cpu_view()[:ctypes.sizeof(self.aql_desc)] = bytes(self.aql_desc)
|
||||
|
||||
def invalidate_caches(self):
|
||||
self.hw_compute_queue_t().memory_barrier().signal(self.timeline_signal, self.next_timeline()).submit(self)
|
||||
|
||||
Reference in New Issue
Block a user