mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 23:48:01 -05:00
amd doorbell size is 64bits (#4448)
* amd doorbell size ids 64bits * add test * test to pass 32bit boundary is more correct * no need to round there
This commit is contained in:
27
test/external/external_test_amd.py
vendored
Normal file
27
test/external/external_test_amd.py
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
import unittest
|
||||
from tinygrad import Device, Tensor
|
||||
from tinygrad.engine.schedule import create_schedule
|
||||
from tinygrad.runtime.ops_amd import AMDDevice
|
||||
|
||||
class TestAMD(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(self):
|
||||
TestAMD.d0: AMDDevice = Device["AMD"]
|
||||
TestAMD.a = Tensor([0.,1.], device="AMD").realize()
|
||||
TestAMD.b = self.a + 1
|
||||
si = create_schedule([self.b.lazydata])[-1]
|
||||
TestAMD.d0_runner = TestAMD.d0.get_runner(*si.ast)
|
||||
TestAMD.b.lazydata.buffer.allocate()
|
||||
|
||||
def test_amd_ring_64bit_doorbell(self):
|
||||
TestAMD.d0.pm4_write_pointer[0] = TestAMD.d0.pm4_write_pointer[0] + (2 << 32) - TestAMD.d0.pm4_ring.size // 4
|
||||
for _ in range(2000):
|
||||
TestAMD.d0_runner.clprg(TestAMD.b.lazydata.buffer._buf, TestAMD.a.lazydata.buffer._buf,
|
||||
global_size=TestAMD.d0_runner.global_size, local_size=TestAMD.d0_runner.local_size)
|
||||
TestAMD.d0_runner.clprg(TestAMD.a.lazydata.buffer._buf, TestAMD.b.lazydata.buffer._buf,
|
||||
global_size=TestAMD.d0_runner.global_size, local_size=TestAMD.d0_runner.local_size)
|
||||
assert (val:=TestAMD.a.lazydata.buffer.as_buffer().cast("f")[0]) == 4000.0, f"got val {val}"
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
@@ -554,7 +554,7 @@ class AMDDevice(Compiled):
|
||||
|
||||
self.sdma_read_pointer = to_mv(self.sdma_queue.read_pointer_address, 8).cast("Q")
|
||||
self.sdma_write_pointer = to_mv(self.sdma_queue.write_pointer_address, 8).cast("Q")
|
||||
self.sdma_doorbell = to_mv(self.doorbells + self.sdma_queue.doorbell_offset - self.doorbells_base, 4).cast("I")
|
||||
self.sdma_doorbell = to_mv(self.doorbells + self.sdma_queue.doorbell_offset - self.doorbells_base, 8).cast("Q")
|
||||
self.sdma_doorbell_value = 0
|
||||
|
||||
# PM4 Queue
|
||||
@@ -572,7 +572,7 @@ class AMDDevice(Compiled):
|
||||
|
||||
self.pm4_read_pointer = to_mv(self.pm4_queue.read_pointer_address, 8).cast("Q")
|
||||
self.pm4_write_pointer = to_mv(self.pm4_queue.write_pointer_address, 8).cast("Q")
|
||||
self.pm4_doorbell = to_mv(self.doorbells + self.pm4_queue.doorbell_offset - self.doorbells_base, 4).cast("I")
|
||||
self.pm4_doorbell = to_mv(self.doorbells + self.pm4_queue.doorbell_offset - self.doorbells_base, 8).cast("Q")
|
||||
|
||||
super().__init__(device, AMDAllocator(self), AMDCompiler(self.arch), functools.partial(AMDProgram, self))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user