mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-22 13:28:06 -05:00
usbgpu: copyin over 100mb/s (#10259)
* usbgpu: over 100mb/s * align * h
This commit is contained in:
@@ -820,8 +820,8 @@ class USBIface(PCIIface):
|
||||
self.usb._pci_cacheable += [self.bars[2]] # doorbell region is cacheable
|
||||
|
||||
# special regions
|
||||
self.copy_bufs = [self._new_dma_region(ctrl_addr=0xf000, sys_addr=0x200000, size=0x4000)]
|
||||
self.sys_buf, self.sys_next_off = self._new_dma_region(ctrl_addr=0xa000, sys_addr=0x820000, size=0x1000), 0
|
||||
self.copy_bufs = [self._new_dma_region(ctrl_addr=0xf000, sys_addr=0x200000, size=0x80000)]
|
||||
self.sys_buf, self.sys_next_off = self._new_dma_region(ctrl_addr=0xa000, sys_addr=0x820000, size=0x1000), 0x800
|
||||
|
||||
def _new_dma_region(self, ctrl_addr, sys_addr, size):
|
||||
region = self.adev.mm.map_range(vaddr:=self.adev.mm.alloc_vaddr(size=size), size, [(sys_addr, size)], system=True, snooped=False, uncached=True)
|
||||
|
||||
@@ -119,8 +119,8 @@ class ASM24Controller:
|
||||
self._pci_cache: dict[int, int|None] = {}
|
||||
|
||||
# Init controller.
|
||||
self.exec_ops([WriteOp(0x54b, b' '), WriteOp(0x5a8, b'\x02'), WriteOp(0x5f8, b'\x04'), WriteOp(0x7ec, b'\x01\x00\x00\x00'),
|
||||
WriteOp(0xc422, b'\x02'), WriteOp(0x0, b'\x33')])
|
||||
self.exec_ops([WriteOp(0x54b, b' '), WriteOp(0x54e, b'\x04'), WriteOp(0x5a8, b'\x02'), WriteOp(0x5f8, b'\x04'),
|
||||
WriteOp(0x7ec, b'\x01\x00\x00\x00'), WriteOp(0xc422, b'\x02'), WriteOp(0x0, b'\x33')])
|
||||
|
||||
def exec_ops(self, ops:Sequence[WriteOp|ReadOp|ScsiWriteOp]):
|
||||
cdbs:list[bytes] = []
|
||||
@@ -152,7 +152,14 @@ class ASM24Controller:
|
||||
def write(self, base_addr:int, data:bytes, ignore_cache:bool=True): return self.exec_ops([WriteOp(base_addr, data, ignore_cache)])
|
||||
|
||||
def scsi_write(self, buf:bytes, lba:int=0):
|
||||
self.exec_ops([ScsiWriteOp(buf, lba), WriteOp(0x171, b'\xff\xff\xff', ignore_cache=True), WriteOp(0xce6e, b'\x00\x00', ignore_cache=True)])
|
||||
if len(buf) > 0x4000: buf += b'\x00' * (round_up(len(buf), 0x10000) - len(buf))
|
||||
|
||||
for i in range(0, len(buf), 0x10000):
|
||||
self.exec_ops([ScsiWriteOp(buf[i:i+0x10000], lba), WriteOp(0x171, b'\xff\xff\xff', ignore_cache=True)])
|
||||
self.exec_ops([WriteOp(0xce6e, b'\x00\x00', ignore_cache=True)])
|
||||
|
||||
if len(buf) > 0x4000:
|
||||
for i in range(4): self.exec_ops([WriteOp(0xce40 + i, b'\x00', ignore_cache=True)])
|
||||
|
||||
def read(self, base_addr:int, length:int, stride:int=0xff) -> bytes:
|
||||
parts = self.exec_ops([ReadOp(base_addr + off, min(stride, length - off)) for off in range(0, length, stride)])
|
||||
@@ -253,7 +260,7 @@ class USBMMIOInterface(MMIOInterface):
|
||||
|
||||
if not self.pcimem:
|
||||
# Fast path for writing into buffer 0xf000
|
||||
use_cache = 0xa000 <= self.addr <= 0xb200
|
||||
use_cache = 0xa800 <= self.addr <= 0xb000
|
||||
return self.usb.scsi_write(bytes(data)) if self.addr == 0xf000 else self.usb.write(self.addr + off, bytes(data), ignore_cache=not use_cache)
|
||||
|
||||
_, acc_sz = self._acc_size(len(data) * struct.calcsize(self.fmt))
|
||||
|
||||
Reference in New Issue
Block a user