In-tree autogen: libc.py (#13217)

* checkout changes from autogen branch * parents * pylint happy * move sys to system in helpers.py * typo * typo
2026-01-07 22:23:55 -05:00 · 2025-11-11 22:13:48 -05:00
parent 222bb12ddf
commit 41a098a82d
16 changed files with 4460 additions and 6146 deletions
--- a/.github/workflows/autogen.yml
+++ b/.github/workflows/autogen.yml
@@ -71,13 +71,10 @@ jobs:
        diff /tmp/sqtt.py.bak tinygrad/runtime/autogen/sqtt.py
    - name: Verify Linux autogen
      run: |
-        cp tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
        cp tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak
        cp tinygrad/runtime/autogen/ib.py /tmp/ib.py.bak
-        ./autogen_stubs.sh libc
        ./autogen_stubs.sh io_uring
        ./autogen_stubs.sh ib
-        diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py
        diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py
        diff /tmp/ib.py.bak tinygrad/runtime/autogen/ib.py
    - name: Verify WebGPU autogen
@@ -95,3 +92,21 @@ jobs:
        cp tinygrad/runtime/autogen/mesa.py /tmp/mesa.py.bak
        ./autogen_stubs.sh mesa
        diff /tmp/mesa.py.bak tinygrad/runtime/autogen/mesa.py
+  autogen-ng:
+    name: In-tree Autogen
+    runs-on: ubuntu-24.04
+    timeout-minutes: 15
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Setup Environment
+      uses: ./.github/actions/setup-tinygrad
+      with:
+        pydeps: 'clang>=20'
+    - name: Install autogen support packages
+      run: sudo apt-get install -y --no-install-recommends libclang-20-dev
+    - name: Verify Linux autogen
+      run: |
+        mv tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
+        python3 -c "from tinygrad.runtime.autogen import libc"
+        diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -233,7 +233,7 @@ jobs:
        python-version: '3.11'
        deps: linting
    - name: Lint bad-indentation and trailing-whitespace with pylint
-      run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string='  ' --recursive=y .
+      run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string='  ' --recursive=y . --ignore-paths='tinygrad/runtime/autogen'
    - name: Lint with ruff
      run: |
        pip3 install --upgrade --force-reinstall ruff==0.11.0
--- a/autogen_stubs.sh
+++ b/autogen_stubs.sh
@@ -254,23 +254,6 @@ generate_ib() {
  fixup $BASE/ib.py
 }

-generate_libc() {
-  clang2py -k cdefstum \
-    $(dpkg -L libc6-dev | grep sys/mman.h) \
-    $(dpkg -L libc6-dev | grep sys/syscall.h) \
-    /usr/include/string.h \
-    /usr/include/elf.h \
-    /usr/include/unistd.h \
-    /usr/include/asm-generic/mman-common.h \
-    -o $BASE/libc.py
-
-  sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/libc.py
-  sed -i "s\FIXME_STUB\libc\g" $BASE/libc.py
-  sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path, use_errno=True)\g" $BASE/libc.py
-
-  fixup $BASE/libc.py
-}
-
 generate_llvm() {
  INC="$(llvm-config-14 --includedir)"
  clang2py -k cdefstum \
@@ -554,7 +537,6 @@ elif [ "$1" == "sqtt" ]; then generate_sqtt
 elif [ "$1" == "qcom" ]; then generate_qcom
 elif [ "$1" == "io_uring" ]; then generate_io_uring
 elif [ "$1" == "ib" ]; then generate_ib
-elif [ "$1" == "libc" ]; then generate_libc
 elif [ "$1" == "llvm" ]; then generate_llvm
 elif [ "$1" == "kgsl" ]; then generate_kgsl
 elif [ "$1" == "adreno" ]; then generate_adreno
@@ -563,6 +545,6 @@ elif [ "$1" == "vfio" ]; then generate_vfio
 elif [ "$1" == "webgpu" ]; then generate_webgpu
 elif [ "$1" == "libusb" ]; then generate_libusb
 elif [ "$1" == "mesa" ]; then generate_mesa
-elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_libc; generate_am; generate_webgpu; generate_mesa
+elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_am; generate_webgpu; generate_mesa
 else echo "usage: $0 <type>"
 fi
--- a/test/unit/test_autogen.py
+++ b/test/unit/test_autogen.py
@@ -0,0 +1,47 @@
+import ctypes, subprocess, tempfile, unittest
+from tinygrad.helpers import WIN
+from tinygrad.runtime.support.c import Struct
+
+class TestAutogen(unittest.TestCase):
+  def test_packed_struct_sizeof(self):
+    layout = [('a', ctypes.c_char), ('b', ctypes.c_int, 5), ('c', ctypes.c_char)]
+    class X(ctypes.Structure): _fields_, _layout_ = layout, 'gcc-sysv'
+    class Y(ctypes.Structure): _fields_, _pack_, _layout_ = layout, 1, 'ms'
+    class Z(Struct): _packed_, _fields_ = True, layout
+    self.assertNotEqual(ctypes.sizeof(X), 4) # ctypes bug! gcc-13.3.0 says this should have size 4
+    self.assertEqual(ctypes.sizeof(Y), 6)
+    self.assertEqual(ctypes.sizeof(Z), 3)
+    layout = [('a', ctypes.c_int, 31), ('b', ctypes.c_int, 31), ('c', ctypes.c_int, 1), ('d', ctypes.c_int, 1)]
+    class Foo(ctypes.Structure): _fields_, _layout_ = layout, 'gcc-sysv'
+    class Bar(ctypes.Structure): _fields_, _pack_, _layout_ = layout, 1, 'ms'
+    class Baz(Struct): _fields_, _packed_ = layout, True
+    self.assertEqual(ctypes.sizeof(Foo), 12)
+    self.assertEqual(ctypes.sizeof(Bar), 12)
+    self.assertEqual(ctypes.sizeof(Baz), 8)
+
+  @unittest.skipIf(WIN, "doesn't compile on windows")
+  def test_packed_struct_interop(self):
+    class Baz(Struct): pass
+    Baz._packed_ = True
+    Baz._fields_ = [('a', ctypes.c_int, 30), ('b', ctypes.c_int, 30), ('c', ctypes.c_int, 2), ('d', ctypes.c_int, 2)]
+    src = '''
+      struct __attribute__((packed)) baz {
+        int a:30;
+        int b:30;
+        int c:2;
+        int d:2;
+      };
+
+      int test(struct baz x) {
+        return x.a + x.b + x.c + x.d;
+      }
+    '''
+    args = ('-x', 'c', '-fPIC', '-shared')
+    with tempfile.NamedTemporaryFile(suffix=".so") as f:
+      subprocess.check_output(('clang',) + args + ('-', '-o', f.name), input=src.encode('utf-8'))
+      b = Baz(0xAA000, 0x00BB0, 0, 1)
+      test = ctypes.CDLL(f.name).test
+      test.argtypes = [Baz]
+      self.assertEqual(test(b), b.a + b.b + b.c + b.d)
+
+if __name__ == "__main__": unittest.main()
--- a/tinygrad/helpers.py
+++ b/tinygrad/helpers.py
@@ -361,10 +361,12 @@ def fetch(url:str, name:pathlib.Path|str|None=None, subdir:str|None=None, gunzip

 # *** Exec helpers

+def system(cmd, **kwargs): return subprocess.check_output(cmd.split(), **kwargs).decode().strip()
+
 def cpu_objdump(lib, objdump_tool='objdump'):
  with tempfile.NamedTemporaryFile(delete=True) as f:
    pathlib.Path(f.name).write_bytes(lib)
-    print(subprocess.check_output([objdump_tool, '-d', f.name]).decode('utf-8'))
+    print(system(f"{objdump_tool} -d {f.name}"))

 def capstone_flatdump(lib: bytes):
  try: import capstone
--- a/tinygrad/runtime/autogen/init.py
+++ b/tinygrad/runtime/autogen/init.py
@@ -0,0 +1,17 @@
+import importlib, pathlib
+from tinygrad.helpers import system
+
+root = (here:=pathlib.Path(__file__).parent).parents[2]
+
+def load(name, dll, files, **kwargs):
+  if not (f:=(root/(path:=kwargs.pop("path", __name__)).replace('.','/')/f"{name}.py")).exists():
+    files = files() if callable(files) else files
+    f.write_text(importlib.import_module("tinygrad.runtime.support.autogen").gen(dll, files, **kwargs))
+  return importlib.import_module(f"{path}.{name.replace('/', '.')}")
+
+def __getattr__(nm):
+  match nm:
+    case "libc": return load("libc", ["find_library('c')"], lambda: (
+      [i for i in system("dpkg -L libc6-dev").split() if 'sys/mman.h' in i or 'sys/syscall.h' in i] +
+      ["/usr/include/string.h", "/usr/include/elf.h", "/usr/include/unistd.h", "/usr/include/asm-generic/mman-common.h"]), use_errno=True)
+    case _: raise AttributeError(f"no such autogen: {nm}")
--- a/tinygrad/runtime/autogen/libc.py
+++ b/tinygrad/runtime/autogen/libc.py
--- a/tinygrad/runtime/ops_dsp.py
+++ b/tinygrad/runtime/ops_dsp.py
@@ -5,7 +5,7 @@ from tinygrad.device import BufferSpec, Compiled, Allocator, Compiler
 from tinygrad.runtime.ops_cpu import CPUAllocator
 from tinygrad.dtype import dtypes, DType, PtrDType
 from tinygrad.uop.ops import Ops, UOp
-from tinygrad.helpers import getenv, round_up, mv_address, to_mv, cpu_objdump, DEBUG
+from tinygrad.helpers import getenv, round_up, mv_address, to_mv, cpu_objdump, system, DEBUG
 from tinygrad.renderer.cstyle import ClangRenderer
 from tinygrad.runtime.autogen import libc, qcom_dsp
 if getenv("IOCTL"): import extra.dsp.run # noqa: F401 # pylint: disable=unused-import
@@ -123,10 +123,9 @@ class ClangCompiler(Compiler):

  def compile(self, src:str) -> bytes:
    # TODO: remove file write. sadly clang doesn't like the use of /dev/stdout here
-    with tempfile.NamedTemporaryFile(delete=True) as output_file:
-      subprocess.check_output([getenv("CC", 'clang'), *self.args, '-O2', '-Wall', '-Werror', '-x', 'c', '-fPIC', '-ffreestanding', '-nostdlib',
-                               '-', '-o', str(output_file.name)], input=src.encode('utf-8'))
-      return pathlib.Path(output_file.name).read_bytes()
+    with tempfile.NamedTemporaryFile(delete=True) as f:
+      system(f"{getenv('CC','clang')} {' '.join(self.args)} -O2 -Wall -Werror -x c -fPIC -ffreestanding -nostdlib - -o {f.name}", input=src.encode())
+      return pathlib.Path(f.name).read_bytes()

  def disassemble(self, lib:bytes): return cpu_objdump(lib, self.objdump_tool)

--- a/tinygrad/runtime/support/autogen.py
+++ b/tinygrad/runtime/support/autogen.py
@@ -0,0 +1,126 @@
+import ctypes.util, importlib.metadata, itertools, re, functools, os
+from tinygrad.helpers import flatten, unwrap
+from clang.cindex import Config, Index, CursorKind as CK, TranslationUnit as TU, LinkageKind as LK, TokenKind as ToK, TypeKind as TK
+from clang.cindex import PrintingPolicy as PP, PrintingPolicyProperty as PPP, SourceRange
+
+assert importlib.metadata.version('clang')[:2] == "20"
+if not Config.loaded: Config.set_library_file(os.getenv("LIBCLANG_PATH", ctypes.util.find_library("clang-20")))
+
+def fst(c): return next(c.get_children())
+def last(c): return list(c.get_children())[-1]
+def readext(f, fst, snd=None):
+  with open(f, "r") as f:
+    f.seek(start:=(fst.start.offset if isinstance(fst, SourceRange) else fst))
+    return f.read((fst.end.offset if isinstance(fst, SourceRange) else snd)-start)
+def attrs(c): return list(filter(lambda k: (v:=k.value) >= 400 and v < 500, map(lambda c: c.kind, c.get_children())))
+
+base_rules = [(r'\s*\\\n\s*', ' '), (r'\s*\n\s*', ' '), (r'//.*', ''), (r'/\*.*?\*/', ''), (r'\b(0[xX][0-9a-fA-F]+|\d+)[uUlL]+\b', r'\1'),
+              (r'\b0+(?=\d)', ''), (r'\s*&&\s*', r' and '), (r'\s*\|\|\s*', r' or '), (r'\s*!\s*', ' not '),
+              (r'(struct|union|enum)\s*([a-zA-Z_][a-zA-Z0-9_]*\b)', r'\1_\2'),
+              (r'\((unsigned )?(char|uint64_t)\)', ''), (r'^.*\d+:\d+.*$', ''), (r'^.*\w##\w.*$', '')]
+
+ints = (TK.INT, TK.UINT, TK.LONG, TK.ULONG, TK.LONGLONG, TK.ULONGLONG)
+
+def gen(dll, files, args=[], prolog=[], rules=[], epilog=[], recsym=False, use_errno=False, anon_names={}, types={}, parse_macros=True):
+  macros, lines, anoncnt, types = [], [], itertools.count().__next__, {k:(v,True) for k,v in types.items()}
+  def tname(t, suggested_name=None, typedef=None) -> str:
+    suggested_name = anon_names.get(f"{(decl:=t.get_declaration()).location.file}:{decl.location.line}", suggested_name)
+    nonlocal lines, types, anoncnt
+    tmap = {TK.VOID:"None", TK.CHAR_U:"ctypes.c_ubyte", TK.UCHAR:"ctypes.c_ubyte", TK.CHAR_S:"ctypes.c_char", TK.SCHAR:"ctypes.c_char",
+            **{getattr(TK, k):f"ctypes.c_{k.lower()}" for k in ["BOOL", "WCHAR", "FLOAT", "DOUBLE", "LONGDOUBLE"]},
+            **{getattr(TK, k):f"ctypes.c_{'u' if 'U' in k else ''}int{sz}" for sz,k in
+               [(16, "USHORT"), (16, "SHORT"), (32, "UINT"), (32, "INT"), (64, "ULONG"), (64, "LONG"), (64, "ULONGLONG"), (64, "LONGLONG")]}}
+
+    if t.kind in tmap: return tmap[t.kind]
+    if t.spelling in types and types[t.spelling][1]: return types[t.spelling][0]
+    if ((f:=t).kind in (fks:=(TK.FUNCTIONPROTO, TK.FUNCTIONNOPROTO))) or (t.kind == TK.POINTER and (f:=t.get_pointee()).kind in fks):
+      return f"ctypes.CFUNCTYPE({tname(f.get_result())}{(', '+', '.join(map(tname, f.argument_types()))) if f.kind==TK.FUNCTIONPROTO else ''})"
+    match t.kind:
+      case TK.POINTER: return "ctypes.c_void_p" if (ptr:=t.get_pointee()).kind == TK.VOID else f"ctypes.POINTER({tname(ptr)})"
+      case TK.ELABORATED: return tname(t.get_named_type(), suggested_name)
+      case TK.TYPEDEF if t.spelling == t.get_canonical().spelling: return tname(t.get_canonical())
+      case TK.TYPEDEF:
+        defined, nm = (canon:=t.get_canonical()).spelling in types, tname(canon, typedef=t.spelling.replace('::', '_'))
+        types[t.spelling] = nm if t.spelling.startswith("__") else t.spelling.replace('::', '_'), True
+        # RECORDs need to handle typedefs specially to allow for self-reference
+        if canon.kind != TK.RECORD or defined: lines.append(f"{t.spelling.replace('::', '_')} = {nm}")
+        return types[t.spelling][0]
+      case TK.RECORD:
+        # TODO: packed unions
+        # TODO: pragma pack support
+        # check for forward declaration
+        if t.spelling in types: types[t.spelling] = (nm:=types[t.spelling][0]), len(list(t.get_fields())) != 0
+        else:
+          if decl.is_anonymous():
+            types[t.spelling] = (nm:=(suggested_name or (f"_anon{'struct' if decl.kind == CK.STRUCT_DECL else 'union'}{anoncnt()}")), True)
+          else: types[t.spelling] = (nm:=t.spelling.replace(' ', '_').replace('::', '_')), len(list(t.get_fields())) != 0
+          lines.append(f"class {nm}({'Struct' if decl.kind==CK.STRUCT_DECL else 'ctypes.Union'}): pass")
+          if typedef: lines.append(f"{typedef} = {nm}")
+        acnt = itertools.count().__next__
+        ll=["  ("+((fn:=f"'_{acnt()}'")+f", {tname(f.type, nm+fn[1:-1])}" if f.is_anonymous_record_decl() else f"'{f.spelling}', "+
+            tname(f.type, f'{nm}_{f.spelling}'))+(f',{f.get_bitfield_width()}' if f.is_bitfield() else '')+")," for f in t.get_fields()]
+        lines.extend(([f"{nm}._anonymous_ = ["+", ".join(f"'_{i}'" for i in range(n))+"]"] if (n:=acnt()) else [])+
+                     ([f"{nm}._packed_ = True"] * (CK.PACKED_ATTR in attrs(decl)))+([f"{nm}._fields_ = [",*ll,"]"] if ll else []))
+        return nm
+      case TK.ENUM:
+        # TODO: C++ and GNU C have forward declared enums
+        if decl.is_anonymous(): types[t.spelling] = suggested_name or f"_anonenum{anoncnt()}", True
+        else: types[t.spelling] = t.spelling.replace(' ', '_').replace('::', '_'), True
+        lines.append(f"{types[t.spelling][0]} = CEnum({tname(decl.enum_type)})\n" +
+                     "\n".join(f"{e.spelling} = {types[t.spelling][0]}.define('{e.spelling}', {e.enum_value})" for e in decl.get_children()
+                     if e.kind == CK.ENUM_CONSTANT_DECL) + "\n")
+        return types[t.spelling][0]
+      case TK.CONSTANTARRAY:
+        return f"({tname(t.get_array_element_type(), suggested_name.rstrip('s') if suggested_name else None)} * {t.get_array_size()})"
+      case TK.INCOMPLETEARRAY: return f"({tname(t.get_array_element_type(), suggested_name.rstrip('s') if suggested_name else None)} * 0)"
+      case _: raise NotImplementedError(f"unsupported type {t.kind}")
+
+  for f in files:
+    tu = Index.create().parse(f, args, options=TU.PARSE_DETAILED_PROCESSING_RECORD)
+    (pp:=PP.create(tu.cursor)).set_property(PPP.TerseOutput, 1)
+    for c in tu.cursor.walk_preorder():
+      if str(c.location.file) != str(f) and (not recsym or c.kind not in (CK.FUNCTION_DECL,)): continue
+      rollback = lines, types
+      try:
+        match c.kind:
+          case CK.FUNCTION_DECL if c.linkage == LK.EXTERNAL and dll:
+            # TODO: we could support name-mangling
+            lines.append(f"# {c.pretty_printed(pp)}\ntry: ({c.spelling}:=dll.{c.spelling}).restype, {c.spelling}.argtypes = "
+              f"{tname(c.result_type)}, [{', '.join(tname(arg.type) for arg in c.get_arguments())}]\nexcept AttributeError: pass\n")
+          case CK.STRUCT_DECL | CK.UNION_DECL | CK.TYPEDEF_DECL | CK.ENUM_DECL: tname(c.type)
+          case CK.MACRO_DEFINITION if parse_macros and len(toks:=list(c.get_tokens())) > 1:
+            if toks[1].spelling == '(' and toks[0].extent.end.column == toks[1].extent.start.column:
+              it = iter(toks[1:])
+              _args = [t.spelling for t in itertools.takewhile(lambda t:t.spelling!=')', it) if t.kind == ToK.IDENTIFIER]
+              if len(body:=list(it)) == 0: continue
+              macros += [f"{c.spelling} = lambda {','.join(_args)}: {readext(f, body[0].location.offset, toks[-1].extent.end.offset)}"]
+            else: macros += [f"{c.spelling} = {readext(f, toks[1].location.offset, toks[-1].extent.end.offset)}"]
+          case CK.VAR_DECL if c.linkage == LK.INTERNAL:
+            if (c.type.kind == TK.CONSTANTARRAY and c.type.get_array_element_type().get_canonical().kind in ints and
+                (init:=last(c)).kind == CK.INIT_LIST_EXPR and all(re.match(r"\[.*\].*=", readext(f, c.extent)) for c in init.get_children())):
+              cs = init.get_children()
+              macros += [f"{c.spelling} = {{{','.join(f'{readext(f,next(it:=c.get_children()).extent)}:{readext(f,next(it).extent)}' for c in cs)}}}"]
+            elif c.type.get_canonical().kind in ints: macros += [f"{c.spelling} = {readext(f, last(c).extent)}"]
+            else: macros += [f"{c.spelling} = {tname(c.type)}({readext(f, last(c).extent)})"]
+          case CK.VAR_DECL if c.linkage == LK.EXTERNAL and dll:
+            lines.append(f"try: {c.spelling} = {tname(c.type)}.in_dll(dll, '{c.spelling}')\nexcept (ValueError,AttributeError): pass")
+      except NotImplementedError as e:
+        print(f"skipping {c.spelling}: {e}")
+        lines, types = rollback
+  main = (f"# mypy: ignore-errors\nimport ctypes{', os' if any('os' in s for s in dll) else ''}\n"
+    "from tinygrad.helpers import unwrap\nfrom tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR\n" + '\n'.join([*prolog,
+      *(["from ctypes.util import find_library"]*any('find_library' in s for s in dll)),
+      *(["def dll():",*flatten([[f"  try: return ctypes.CDLL(unwrap({d}){', use_errno=True' if use_errno else ''})",'  except: pass'] for d in dll]),
+         "  return None", "dll = dll()\n"]*bool(dll)), *lines]) + '\n')
+  macros = [r for m in macros if (r:=functools.reduce(lambda s,r:re.sub(r[0], r[1], s), rules + base_rules, m))]
+  while True:
+    try:
+      exec(main + '\n'.join(macros), {})
+      break
+    except (SyntaxError, NameError, TypeError) as e:
+      macrono = unwrap(e.lineno if isinstance(e, SyntaxError) else unwrap(unwrap(e.__traceback__).tb_next).tb_lineno) - main.count('\n') - 1
+      assert macrono >= 0 and macrono < len(macros), f"error outside macro range: {e}"
+      print(f"skipping {macros[macrono]}: {e}")
+      del macros[macrono]
+    except Exception as e: raise Exception("parsing failed") from e
+  return main + '\n'.join(macros + epilog)
--- a/tinygrad/runtime/support/c.py
+++ b/tinygrad/runtime/support/c.py
@@ -0,0 +1,72 @@
+import ctypes, functools, sys
+from typing import TYPE_CHECKING
+
+def _do_ioctl(__idir, __base, __nr, __struct, __fd, **kwargs):
+  import tinygrad.runtime.support.hcq as hcq, fcntl
+  ioctl = __fd.ioctl if isinstance(__fd, hcq.FileIOInterface) else functools.partial(fcntl.ioctl, __fd)
+  if (rc:=ioctl((__idir<<30)|(ctypes.sizeof(out:=__struct(**kwargs))<<16)|(__base<<8)|__nr, out)): raise RuntimeError(f"ioctl returned {rc}")
+  return out
+
+def _IO(base, nr): return functools.partial(_do_ioctl, 0, ord(base) if isinstance(base, str) else base, nr, None)
+def _IOW(base, nr, typ): return functools.partial(_do_ioctl, 1, ord(base) if isinstance(base, str) else base, nr, typ)
+def _IOR(base, nr, typ): return functools.partial(_do_ioctl, 2, ord(base) if isinstance(base, str) else base, nr, typ)
+def _IOWR(base, nr, typ): return functools.partial(_do_ioctl, 3, ord(base) if isinstance(base, str) else base, nr, typ)
+
+def CEnum(typ: type[ctypes._SimpleCData]):
+  class _CEnum(typ): # type: ignore
+    _val_to_name_: dict[int,str] = {}
+
+    @classmethod
+    def from_param(cls, val): return val if isinstance(val, cls) else cls(val)
+    @classmethod
+    def get(cls, val, default="unknown"): return cls._val_to_name_.get(val.value if isinstance(val, cls) else val, default)
+    @classmethod
+    def items(cls): return cls._val_to_name_.items()
+    @classmethod
+    def define(cls, name, val):
+      cls._val_to_name_[val] = name
+      return val
+
+    def __eq__(self, other): return self.value == other
+    def __repr__(self): return self.get(self) if self.value in self.__class__._val_to_name_ else str(self.value)
+
+  return _CEnum
+
+# supports gcc (C11) __attribute__((packed))
+if TYPE_CHECKING: Struct = ctypes.Structure
+else:
+  class MetaStruct(type(ctypes.Structure)):
+    def __new__(mcs, name, bases, dct):
+      fields = dct.pop("_fields_", None)
+      cls = super().__new__(mcs, name, bases, dct)
+      if dct.get("_packed_", False) and fields is not None: mcs._build(cls, fields)
+      return cls
+
+    def __setattr__(cls, k, v):
+      # NB: _fields_ must be set after _packed_ because PyCStructType_setattro marks _fields_ as final.
+      if k == "_fields_" and getattr(cls, "_packed_", False): type(cls)._build(cls, v)
+      elif k == "_packed_" and hasattr(cls, "_fields_"): type(cls)._build(cls, cls._fields_)
+      else: super().__setattr__(k, v)
+
+    @staticmethod
+    def _build(cls, fields):
+      o = 0
+      for n,t,b in [(f[0], f[1], f[2] if len(f) == 3 else 0) for f in fields]:
+        if b == 0: o = (o + 7) & ~7
+        m = (1 << (sz:=ctypes.sizeof(t)*8 if b == 0 else b)) - 1
+        def _s(self,v,m,s,b): self._data[:] = ((int.from_bytes(self._data,sys.byteorder)&~(m<<s))|((v&m)<<s)).to_bytes(len(self._data), sys.byteorder)
+        setattr(cls, n, property(functools.partial(lambda self,m,s:(int.from_bytes(self._data,sys.byteorder)>>s)&m,m=m,s=o),
+                                 functools.partial(_s,m=m,s=o,b=b)))
+        o += sz
+
+      type(ctypes.Structure).__setattr__(cls, '_fields_', [('_data', ctypes.c_ubyte * ((o + 7) // 8))])
+      type(ctypes.Structure).__setattr__(cls, '_packed_', True)
+      setattr(cls, '_packed_fields_', fields)
+
+  class Struct(ctypes.Structure, metaclass=MetaStruct):
+    def __init__(self, *args, **kwargs):
+      if hasattr(self, '_packed_fields_'):
+        for f,v in zip(self._packed_fields_, args): setattr(self, f[0], v)
+        for k,v in kwargs.items(): setattr(self, k, v)
+      else: super().__init__(*args, **kwargs)
+
--- a/tinygrad/runtime/support/compiler_amd.py
+++ b/tinygrad/runtime/support/compiler_amd.py
@@ -1,4 +1,5 @@
-import ctypes, subprocess
+import ctypes
+from tinygrad.helpers import system
 import tinygrad.runtime.autogen.comgr as comgr
 assert comgr.AMD_COMGR_LANGUAGE_HIP == 4
 try:
@@ -13,7 +14,7 @@ from tinygrad.runtime.support.compiler_cpu import LLVMCompiler
 from tinygrad.helpers import OSX, to_char_p_p

 def amdgpu_disassemble(lib:bytes):
-  asm = subprocess.check_output(["llvm-objdump" if OSX else "/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], input=lib).decode("utf-8").splitlines()
+  asm = system(f"{'llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines()
  while asm and ("s_nop 0" in asm[-1] or "s_code_end" in asm[-1]): asm.pop()
  print("\n".join(asm))

--- a/tinygrad/runtime/support/compiler_cuda.py
+++ b/tinygrad/runtime/support/compiler_cuda.py
@@ -1,6 +1,6 @@
 import subprocess, hashlib, tempfile, ctypes, re, pathlib
 from typing import Callable
-from tinygrad.helpers import to_char_p_p, colored, init_c_var, getenv
+from tinygrad.helpers import to_char_p_p, colored, init_c_var, getenv, system
 import tinygrad.runtime.autogen.nvrtc as nvrtc
 from tinygrad.device import Compiler, CompileError

@@ -37,7 +37,7 @@ def cuda_disassemble(lib:bytes, arch:str):
    fn = (pathlib.Path(tempfile.gettempdir()) / f"tinycuda_{hashlib.md5(lib).hexdigest()}").as_posix()
    with open(fn, "wb") as f: f.write(lib)
    subprocess.run(["ptxas", f"-arch={arch}", "-o", fn, fn], check=False, stderr=subprocess.DEVNULL) # optional ptx -> sass step for CUDA=1
-    print(subprocess.check_output(['nvdisasm', fn]).decode('utf-8'))
+    print(system(f'nvdisasm {fn}'))
  except Exception as e: print("Failed to generate SASS", str(e), "Make sure your PATH contains ptxas/nvdisasm binary of compatible version.")

 class CUDACompiler(Compiler):
--- a/tinygrad/runtime/support/compiler_mesa.py
+++ b/tinygrad/runtime/support/compiler_mesa.py
@@ -1,6 +1,6 @@
-import base64, ctypes, pathlib, tempfile, hashlib, subprocess
+import base64, ctypes, pathlib, tempfile, hashlib
 from tinygrad.device import Compiler
-from tinygrad.helpers import cpu_objdump
+from tinygrad.helpers import cpu_objdump, system
 import tinygrad.runtime.autogen.mesa as mesa
 from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, expect, cerr
 try: import tinygrad.runtime.autogen.llvm as llvm
@@ -82,5 +82,5 @@ class NAKCompiler(NIRCompiler):
    try:
      fn = (pathlib.Path(tempfile.gettempdir()) / f"tinynak_{hashlib.md5(lib).hexdigest()}").as_posix()
      with open(fn, "wb") as f: f.write(lib[ctypes.sizeof(mesa.struct_nak_shader_info):])
-      print(subprocess.check_output(['nvdisasm', "-b", f"SM{self.arch[3:]}", fn]).decode('utf-8'))
+      print(system(f"nvdisasm -b SM{self.arch[3:]} {fn}"))
    except Exception as e: print("Failed to generate SASS", str(e), "Make sure your PATH contains nvdisasm binary of compatible version.")
--- a/tinygrad/runtime/support/elf.py
+++ b/tinygrad/runtime/support/elf.py
@@ -1,7 +1,7 @@
 import struct, ctypes, ctypes.util
 from dataclasses import dataclass
 from tinygrad.helpers import getbits, i2u, unwrap
-import tinygrad.runtime.autogen.libc as libc
+from tinygrad.runtime.autogen import libc

@dataclass(frozen=True)
 class ElfSection: name:str; header:libc.Elf64_Shdr; content:bytes # noqa: E702
--- a/tinygrad/runtime/support/llvm.py
+++ b/tinygrad/runtime/support/llvm.py
@@ -1,5 +1,5 @@
-import ctypes.util, os, sys, subprocess
-from tinygrad.helpers import DEBUG, OSX, getenv
+import ctypes.util, os, sys
+from tinygrad.helpers import DEBUG, OSX, getenv, system

 if sys.platform == 'win32':
  # Windows llvm distribution doesn't seem to add itself to PATH or anywhere else where it can be easily retrieved from.
@@ -10,7 +10,7 @@ if sys.platform == 'win32':
 elif OSX:
  # Will raise FileNotFoundError if brew is not installed
  # `brew --prefix` will return even if formula is not installed
-  if not os.path.exists(brew_prefix:=subprocess.check_output(['brew', '--prefix', 'llvm@20']).decode().strip()):
+  if not os.path.exists(brew_prefix:=system("brew --prefix llvm@20")):
    raise FileNotFoundError('LLVM not found, you can install it with `brew install llvm@20`')
  LLVM_PATH: str|None = os.path.join(brew_prefix, 'lib', 'libLLVM.dylib')
 else:
--- a/tinygrad/runtime/support/webgpu.py
+++ b/tinygrad/runtime/support/webgpu.py
@@ -1,10 +1,10 @@
-import ctypes.util, os, subprocess, platform, sysconfig
-from tinygrad.helpers import OSX
+import ctypes.util, os, platform, sysconfig
+from tinygrad.helpers import system, OSX

 WEBGPU_PATH: str | None

 if OSX:
-  if not os.path.exists(brew_prefix:=subprocess.check_output(['brew', '--prefix', 'dawn']).decode().strip()):
+  if not os.path.exists(brew_prefix:=system("brew --prefix dawn")):
    raise FileNotFoundError('dawn library not found. Install it with `brew tap wpmed92/dawn && brew install dawn`')
  WEBGPU_PATH = os.path.join(brew_prefix, 'lib', 'libwebgpu_dawn.dylib')
 elif platform.system() == "Windows":