mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-05 05:04:27 -05:00
pylint 4.0.0 (#12642)
* cpu: fix spacing * fix pylint * fix pylint * pylint 4.0.0 * lambda * keep eval for now * im so sorry
This commit is contained in:
@@ -30,10 +30,6 @@ persistent=yes
|
||||
# Specify a configuration file.
|
||||
#rcfile=
|
||||
|
||||
# When enabled, pylint would attempt to guess common misconfiguration and emit
|
||||
# user-friendly hints instead of false-positive error messages
|
||||
suggestion-mode=yes
|
||||
|
||||
# Allow loading of arbitrary C extensions. Extensions are imported into the
|
||||
# active Python interpreter and may run arbitrary code.
|
||||
unsafe-load-any-extension=no
|
||||
|
||||
@@ -153,7 +153,7 @@ CORRECT_DIVMOD_FOLDING, FUSE_OPTIM = ContextVar("CORRECT_DIVMOD_FOLDING", 0), Co
|
||||
ALLOW_DEVICE_USAGE, MAX_BUFFER_SIZE = ContextVar("ALLOW_DEVICE_USAGE", 1), ContextVar("MAX_BUFFER_SIZE", 0)
|
||||
FUSE_ATTENTION = ContextVar("FUSE_ATTENTION", 0)
|
||||
EMULATE = ContextVar("EMULATE", "")
|
||||
CPU_COUNT = ContextVar("CPU_COUNT", max(1, len(aff(0)) if (aff:=getattr(os, "sched_getaffinity", None)) else (os.cpu_count() or 1)))
|
||||
CPU_COUNT = ContextVar("CPU_COUNT", max(1, len(os.sched_getaffinity(0)) if (aff:=getattr(os, "sched_getaffinity", None)) else (os.cpu_count() or 1)))
|
||||
CPU_LLVM, AMD_LLVM = ContextVar("CPU_LLVM", 0), ContextVar("AMD_LLVM", 1)
|
||||
VIZ = PROFILE = ContextVar("VIZ", 0)
|
||||
SPEC = ContextVar("SPEC", 0)
|
||||
@@ -352,10 +352,10 @@ def capstone_flatdump(lib: bytes):
|
||||
print(f"{instr.address:#08x}: {instr.mnemonic}\t{instr.op_str}")
|
||||
sys.stdout.flush()
|
||||
|
||||
def wait_cond(cb, value=True, timeout_ms=10000, msg="") -> bool:
|
||||
def wait_cond(cb, *args, value=True, timeout_ms=10000, msg="") -> bool:
|
||||
start_time = int(time.perf_counter() * 1000)
|
||||
while int(time.perf_counter() * 1000) - start_time < timeout_ms:
|
||||
if (val:=cb()) == value: return val
|
||||
if (val:=cb(*args)) == value: return val
|
||||
raise TimeoutError(f"{msg}. Timed out after {timeout_ms} ms, condition not met: {val} != {value}")
|
||||
|
||||
# *** ctypes helpers
|
||||
|
||||
@@ -713,7 +713,7 @@ class PCIIface(PCIIfaceBase):
|
||||
def device_fini(self): self.dev_impl.fini()
|
||||
|
||||
class USBIface(PCIIface):
|
||||
def __init__(self, dev, dev_id):
|
||||
def __init__(self, dev, dev_id): # pylint: disable=super-init-not-called
|
||||
self.dev = dev
|
||||
self.usb = ASM24Controller()
|
||||
self.bars = setup_pci_bars(self.usb, gpu_bus=4, mem_base=0x10000000, pref_mem_base=(32 << 30))
|
||||
|
||||
@@ -424,7 +424,7 @@ class RemoteConnection:
|
||||
conns = RemoteConnection.all.keys()
|
||||
datas = {conn: conn.req.serialize() for conn in conns}
|
||||
reqs, hashes, hash_datas = sum(len(c.req._q) for c in conns), sum(len(c.req._h) for c in conns), sum(len(data) for data in datas.values())
|
||||
resps = []
|
||||
ret, resps = None, []
|
||||
with Timing(f"*** send {reqs:-3d} requests {hashes:-3d} hashes with len {hash_datas/1024:.2f} kB in ", enabled=DEBUG>=3):
|
||||
for conn,data in datas.items(): conn.conn.request("POST", "/batch", data)
|
||||
for conn in datas.keys():
|
||||
|
||||
@@ -113,7 +113,7 @@ class AM_GMC(AM_IP):
|
||||
for eng_i in range(18): self.adev.wreg_pair(f"reg{ip}VM_INVALIDATE_ENG{eng_i}_ADDR_RANGE", "_LO32", "_HI32", 0x1fffffffff)
|
||||
self.hub_initted[ip] = True
|
||||
|
||||
@functools.cache
|
||||
@functools.cache # pylint: disable=method-cache-max-size-none
|
||||
def get_pte_flags(self, pte_lv, is_table, frag, uncached, system, snooped, valid, extra=0):
|
||||
extra |= (am.AMDGPU_PTE_SYSTEM * system) | (am.AMDGPU_PTE_SNOOPED * snooped) | (am.AMDGPU_PTE_VALID * valid) | am.AMDGPU_PTE_FRAG(frag)
|
||||
if not is_table: extra |= (am.AMDGPU_PTE_WRITEABLE | am.AMDGPU_PTE_READABLE | am.AMDGPU_PTE_EXECUTABLE)
|
||||
@@ -175,7 +175,7 @@ class AM_SMU(AM_IP):
|
||||
|
||||
def _send_msg(self, msg:int, param:int, read_back_arg=False, timeout=10000, debug=False): # default timeout is 10 seconds
|
||||
self._smu_cmn_send_msg(msg, param, debug=debug)
|
||||
wait_cond(lambda: (self.adev.mmMP1_SMN_C2PMSG_90 if not debug else self.adev.mmMP1_SMN_C2PMSG_54).read(), value=1, timeout_ms=timeout,
|
||||
wait_cond((self.adev.mmMP1_SMN_C2PMSG_90 if not debug else self.adev.mmMP1_SMN_C2PMSG_54).read, value=1, timeout_ms=timeout,
|
||||
msg=f"SMU msg {msg:#x} timeout")
|
||||
return (self.adev.mmMP1_SMN_C2PMSG_82 if not debug else self.adev.mmMP1_SMN_C2PMSG_53).read() if read_back_arg else None
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ def elf_loader(blob:bytes, force_section_align:int=1) -> tuple[memoryview, list[
|
||||
for sh, trgt_sh_name, c_rels in rel + rela:
|
||||
target_image_off = next(tsh for tsh in sections if tsh.name == trgt_sh_name).header.sh_addr
|
||||
rels = [(r.r_offset, symtab[libc.ELF64_R_SYM(r.r_info)], libc.ELF64_R_TYPE(r.r_info), getattr(r, "r_addend", 0)) for r in c_rels]
|
||||
for roff, sym, r_type_, r_addend in rels:
|
||||
for _, sym, _, _ in rels:
|
||||
if sym.st_shndx == 0: raise RuntimeError(f'Attempting to relocate against an undefined symbol {repr(_strtab(sh_strtab, sym.st_name))}')
|
||||
relocs += [(target_image_off + roff, sections[sym.st_shndx].header.sh_addr + sym.st_value, rtype, raddend) for roff, sym, rtype, raddend in rels]
|
||||
|
||||
|
||||
@@ -30,10 +30,10 @@ class TLSFAllocator:
|
||||
self.blocks:dict[int, tuple[int, int|None, int|None, bool]] = {0: (size, None, None, True)} # size, next, prev, is_free
|
||||
self._insert_block(0, size)
|
||||
|
||||
@functools.cache
|
||||
@functools.cache # pylint: disable=method-cache-max-size-none
|
||||
def lv1(self, size): return size.bit_length()
|
||||
|
||||
@functools.cache
|
||||
@functools.cache # pylint: disable=method-cache-max-size-none
|
||||
def lv2(self, size): return (size - (1 << (size.bit_length() - 1))) // (1 << max(0, size.bit_length() - self.l2_cnt))
|
||||
|
||||
def _insert_block(self, start:int, size:int, prev:int|None=None):
|
||||
@@ -209,7 +209,7 @@ class MemoryManager:
|
||||
if getenv("MM_DEBUG", 0): print(f"mm {self.dev.devfmt}: unmapping {vaddr=:#x} ({size=:#x})")
|
||||
|
||||
ctx = PageTableTraverseContext(self.dev, self.root_page_table, vaddr, free_pts=True)
|
||||
for off, pt, pte_idx, pte_cnt, pte_covers in ctx.next(size):
|
||||
for _, pt, pte_idx, pte_cnt, _ in ctx.next(size):
|
||||
for pte_id in range(pte_idx, pte_idx + pte_cnt):
|
||||
assert pt.valid(pte_id), f"PTE not mapped: {pt.entry(pte_id):#x}"
|
||||
pt.set_entry(pte_id, paddr=0x0, valid=False)
|
||||
|
||||
@@ -124,6 +124,7 @@ class NV_FLCN(NV_IP):
|
||||
def __patch(cmd_id, cmd):
|
||||
patched_image = bytearray(image)
|
||||
|
||||
dmem_offset = 0
|
||||
hdr = nv.FALCON_APPLICATION_INTERFACE_HEADER_V1.from_buffer_copy(image[(app_hdr_off:=self.desc_v3.IMEMLoadSize+self.desc_v3.InterfaceOffset):])
|
||||
ents = (nv.FALCON_APPLICATION_INTERFACE_ENTRY_V1 * hdr.entryCount).from_buffer_copy(image[app_hdr_off + ctypes.sizeof(hdr):])
|
||||
for i in range(hdr.entryCount):
|
||||
@@ -334,7 +335,7 @@ class NV_GSP(NV_IP):
|
||||
# Fill up arguments
|
||||
queue_args = nv.MESSAGE_QUEUE_INIT_ARGUMENTS(sharedMemPhysAddr=queues_sysmem[0], pageTableEntryCount=pte_cnt, cmdQueueOffset=pt_size,
|
||||
statQueueOffset=pt_size + queue_size)
|
||||
rm_args, self.rm_args_sysmem = self.nvdev._alloc_boot_struct(nv.GSP_ARGUMENTS_CACHED(bDmemStack=True, messageQueueInitArguments=queue_args))
|
||||
_, self.rm_args_sysmem = self.nvdev._alloc_boot_struct(nv.GSP_ARGUMENTS_CACHED(bDmemStack=True, messageQueueInitArguments=queue_args))
|
||||
|
||||
# Build command queue header
|
||||
self.cmd_q_va, self.stat_q_va = queues_va + pt_size, queues_va + pt_size + queue_size
|
||||
@@ -481,7 +482,7 @@ class NV_GSP(NV_IP):
|
||||
params.ramfcMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=ramfc_alloc.paddrs[0][0], size=0x200, addressSpace=2, cacheAttrib=0)
|
||||
params.instanceMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=ramfc_alloc.paddrs[0][0], size=0x1000, addressSpace=2, cacheAttrib=0)
|
||||
|
||||
method_va, method_sysmem = System.alloc_sysmem(0x5000, contiguous=True)
|
||||
_, method_sysmem = System.alloc_sysmem(0x5000, contiguous=True)
|
||||
params.mthdbufMem = nv_gpu.NV_MEMORY_DESC_PARAMS(base=method_sysmem[0], size=0x5000, addressSpace=1, cacheAttrib=0)
|
||||
|
||||
if client is not None and client != self.priv_root and params.hObjectError != 0:
|
||||
@@ -557,7 +558,7 @@ class NV_GSP(NV_IP):
|
||||
self.nvdev.wreg(addr, (self.nvdev.rreg(addr) & ~mask) | (val & mask))
|
||||
elif op == 0x2: # reg poll
|
||||
addr, mask, val, _, _ = next(cmd_iter), next(cmd_iter), next(cmd_iter), next(cmd_iter), next(cmd_iter)
|
||||
wait_cond(lambda: (self.nvdev.rreg(addr) & mask), value=val, msg=f"Register {addr:#x} not equal to {val:#x} after polling")
|
||||
wait_cond(lambda a, m: (self.nvdev.rreg(a) & m), addr, mask, value=val, msg=f"Register {addr:#x} not equal to {val:#x} after polling")
|
||||
elif op == 0x3: time.sleep(next(cmd_iter) / 1e6) # delay us
|
||||
elif op == 0x4: # save reg
|
||||
addr, index = next(cmd_iter), next(cmd_iter)
|
||||
|
||||
@@ -152,6 +152,8 @@ class NVDev(PCIDevImplBase):
|
||||
return gzip.decompress(struct.pack("<4BL2B", 0x1f, 0x8b, 8, 0, 0, 0, 3) + image) if "COMPRESSION: YES" in info else image
|
||||
|
||||
def include(self, file:str):
|
||||
def _do_eval(s:str): return eval(s) # pylint: disable=eval-used
|
||||
|
||||
regs_off = {'NV_PFALCON_FALCON': 0x0, 'NV_PGSP_FALCON': 0x0, 'NV_PSEC_FALCON': 0x0, 'NV_PRISCV_RISCV': 0x1000, 'NV_PGC6_AON': 0x0, 'NV_PFSP': 0x0,
|
||||
'NV_PGC6_BSI': 0x0, 'NV_PFALCON_FBIF': 0x600, 'NV_PFALCON2_FALCON': 0x1000, 'NV_PBUS': 0x0, 'NV_PFB': 0x0, 'NV_PMC': 0x0, 'NV_PGSP_QUEUE': 0x0,
|
||||
'NV_VIRTUAL_FUNCTION':0xb80000}
|
||||
@@ -163,13 +165,13 @@ class NVDev(PCIDevImplBase):
|
||||
name, hi, lo = m.groups()
|
||||
|
||||
reg = next((r for r in self.reg_names if name.startswith(r+"_")), None)
|
||||
if reg is not None: self.__dict__[reg].add_field(name[len(reg)+1:].lower(), eval(lo), eval(hi))
|
||||
else: self.reg_offsets[name] = (eval(lo), eval(hi))
|
||||
if reg is not None: self.__dict__[reg].add_field(name[len(reg)+1:].lower(), _do_eval(lo), _do_eval(hi))
|
||||
else: self.reg_offsets[name] = (_do_eval(lo), _do_eval(hi))
|
||||
continue
|
||||
|
||||
if m:=re.match(r'#define\s+(\w+)\s*\(\s*(\w+)\s*\)\s*(.+)', raw): # reg set
|
||||
fn = m.groups()[2].strip().rstrip('\\').split('/*')[0].rstrip()
|
||||
name, value = m.groups()[0], eval(f"lambda {m.groups()[1]}: {fn}")
|
||||
name, value = m.groups()[0], _do_eval(f"lambda {m.groups()[1]}: {fn}")
|
||||
elif m:=re.match(r'#define\s+(\w+)\s+([0-9A-Fa-fx]+)(?![^\n]*:)', raw): name, value = m.groups()[0], int(m.groups()[1], 0) # reg value
|
||||
else: continue
|
||||
|
||||
|
||||
@@ -10,14 +10,14 @@ MAP_FIXED, MAP_LOCKED, MAP_POPULATE, MAP_NORESERVE = 0x10, 0 if OSX else 0x2000,
|
||||
class _System:
|
||||
def reserve_hugepages(self, cnt): os.system(f"sudo sh -c 'echo {cnt} > /proc/sys/vm/nr_hugepages'")
|
||||
|
||||
def memory_barrier(self): lib.atomic_thread_fence(__ATOMIC_SEQ_CST:=5) if (lib:=self.atomic_lib()) is not None else None
|
||||
def memory_barrier(self): lib.atomic_thread_fence(__ATOMIC_SEQ_CST:=5) if (lib:=self.atomic_lib) is not None else None
|
||||
|
||||
def lock_memory(self, addr:int, size:int):
|
||||
if libc.mlock(ctypes.c_void_p(addr), size): raise RuntimeError(f"Failed to lock memory at {addr:#x} with size {size:#x}")
|
||||
|
||||
def system_paddrs(self, vaddr:int, size:int) -> list[int]:
|
||||
self.pagemap().seek(vaddr // mmap.PAGESIZE * 8)
|
||||
return [(x & ((1<<55) - 1)) * mmap.PAGESIZE for x in array.array('Q', self.pagemap().read(size//mmap.PAGESIZE*8, binary=True))]
|
||||
self.pagemap.seek(vaddr // mmap.PAGESIZE * 8)
|
||||
return [(x & ((1<<55) - 1)) * mmap.PAGESIZE for x in array.array('Q', self.pagemap.read(size//mmap.PAGESIZE*8, binary=True))]
|
||||
|
||||
def alloc_sysmem(self, size:int, vaddr:int=0, contiguous:bool=False, data:bytes|None=None) -> tuple[int, list[int]]:
|
||||
assert not contiguous or size <= (2 << 20), "Contiguous allocation is only supported for sizes up to 2MB"
|
||||
@@ -36,17 +36,17 @@ class _System:
|
||||
if vendor == target_vendor and device in target_devices: result.append(pcibus)
|
||||
return sorted(result)
|
||||
|
||||
@functools.cache
|
||||
@functools.cached_property
|
||||
def atomic_lib(self): return ctypes.CDLL(ctypes.util.find_library('atomic')) if sys.platform == "linux" else None
|
||||
|
||||
@functools.cache
|
||||
@functools.cached_property
|
||||
def pagemap(self) -> FileIOInterface:
|
||||
if FileIOInterface(reloc_sysfs:="/proc/sys/vm/compact_unevictable_allowed", os.O_RDONLY).read()[0] != "0":
|
||||
os.system(cmd:=f"sudo sh -c 'echo 0 > {reloc_sysfs}'")
|
||||
assert FileIOInterface(reloc_sysfs, os.O_RDONLY).read()[0] == "0", f"Failed to disable migration of locked pages. Please run {cmd} manually."
|
||||
return FileIOInterface("/proc/self/pagemap", os.O_RDONLY)
|
||||
|
||||
@functools.cache
|
||||
@functools.cached_property
|
||||
def vfio(self) -> FileIOInterface|None:
|
||||
try:
|
||||
if not FileIOInterface.exists("/sys/module/vfio"): os.system("sudo modprobe vfio-pci disable_idle_d3=1")
|
||||
@@ -90,7 +90,7 @@ class PCIDevice:
|
||||
" to allow python accessing device or run with sudo") from e
|
||||
raise RuntimeError(f"Cannot resize BAR {i}: {e}. Ensure the resizable BAR option is enabled on your system.") from e
|
||||
|
||||
if getenv("VFIO", 0) and (vfio_fd:=System.vfio()) is not None:
|
||||
if getenv("VFIO", 0) and (vfio_fd:=System.vfio) is not None:
|
||||
FileIOInterface(f"/sys/bus/pci/devices/{self.pcibus}/driver_override", os.O_WRONLY).write("vfio-pci")
|
||||
FileIOInterface("/sys/bus/pci/drivers_probe", os.O_WRONLY).write(self.pcibus)
|
||||
iommu_group = FileIOInterface.readlink(f"/sys/bus/pci/devices/{self.pcibus}/iommu_group").split('/')[-1]
|
||||
|
||||
@@ -229,7 +229,7 @@ class ASM24Controller:
|
||||
for i in range(0, len(ops), bs:=(4 if OSX else 16)): self.exec_ops(list(itertools.chain.from_iterable(ops[i:i+bs])))
|
||||
|
||||
class USBMMIOInterface(MMIOInterface):
|
||||
def __init__(self, usb, addr, size, fmt, pcimem=True):
|
||||
def __init__(self, usb, addr, size, fmt, pcimem=True): # pylint: disable=super-init-not-called
|
||||
self.usb, self.addr, self.nbytes, self.fmt, self.pcimem, self.el_sz = usb, addr, size, fmt, pcimem, struct.calcsize(fmt)
|
||||
|
||||
def __getitem__(self, index): return self._access_items(index)
|
||||
@@ -256,13 +256,14 @@ class USBMMIOInterface(MMIOInterface):
|
||||
|
||||
acc, acc_size = self._acc_size(sz)
|
||||
return bytes(array.array(acc, [self._acc_one(off + i * acc_size, acc_size) for i in range(sz // acc_size)]))
|
||||
else: # write op
|
||||
data = struct.pack(self.fmt, data) if isinstance(data, int) else bytes(data)
|
||||
|
||||
if not self.pcimem:
|
||||
# Fast path for writing into buffer 0xf000
|
||||
use_cache = 0xa800 <= self.addr <= 0xb000
|
||||
return self.usb.scsi_write(bytes(data)) if self.addr == 0xf000 else self.usb.write(self.addr + off, bytes(data), ignore_cache=not use_cache)
|
||||
# write op
|
||||
data = struct.pack(self.fmt, data) if isinstance(data, int) else bytes(data)
|
||||
|
||||
_, acc_sz = self._acc_size(len(data) * struct.calcsize(self.fmt))
|
||||
self.usb.pcie_mem_write(self.addr+off, [int.from_bytes(data[i:i+acc_sz], "little") for i in range(0, len(data), acc_sz)], acc_sz)
|
||||
if not self.pcimem:
|
||||
# Fast path for writing into buffer 0xf000
|
||||
use_cache = 0xa800 <= self.addr <= 0xb000
|
||||
return self.usb.scsi_write(bytes(data)) if self.addr == 0xf000 else self.usb.write(self.addr + off, bytes(data), ignore_cache=not use_cache)
|
||||
|
||||
_, acc_sz = self._acc_size(len(data) * struct.calcsize(self.fmt))
|
||||
self.usb.pcie_mem_write(self.addr+off, [int.from_bytes(data[i:i+acc_sz], "little") for i in range(0, len(data), acc_sz)], acc_sz)
|
||||
|
||||
Reference in New Issue
Block a user