In-tree autogen: libc.py (#13217)

* checkout changes from autogen branch

* parents

* pylint happy

* move sys to system in helpers.py

* typo

* typo
This commit is contained in:
Christopher Milan
2025-11-11 22:13:48 -05:00
committed by GitHub
parent 222bb12ddf
commit 41a098a82d
16 changed files with 4460 additions and 6146 deletions

View File

@@ -71,13 +71,10 @@ jobs:
diff /tmp/sqtt.py.bak tinygrad/runtime/autogen/sqtt.py
- name: Verify Linux autogen
run: |
cp tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
cp tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak
cp tinygrad/runtime/autogen/ib.py /tmp/ib.py.bak
./autogen_stubs.sh libc
./autogen_stubs.sh io_uring
./autogen_stubs.sh ib
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py
diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py
diff /tmp/ib.py.bak tinygrad/runtime/autogen/ib.py
- name: Verify WebGPU autogen
@@ -95,3 +92,21 @@ jobs:
cp tinygrad/runtime/autogen/mesa.py /tmp/mesa.py.bak
./autogen_stubs.sh mesa
diff /tmp/mesa.py.bak tinygrad/runtime/autogen/mesa.py
autogen-ng:
name: In-tree Autogen
runs-on: ubuntu-24.04
timeout-minutes: 15
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
pydeps: 'clang>=20'
- name: Install autogen support packages
run: sudo apt-get install -y --no-install-recommends libclang-20-dev
- name: Verify Linux autogen
run: |
mv tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
python3 -c "from tinygrad.runtime.autogen import libc"
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py

View File

@@ -233,7 +233,7 @@ jobs:
python-version: '3.11'
deps: linting
- name: Lint bad-indentation and trailing-whitespace with pylint
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y .
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y . --ignore-paths='tinygrad/runtime/autogen'
- name: Lint with ruff
run: |
pip3 install --upgrade --force-reinstall ruff==0.11.0

View File

@@ -254,23 +254,6 @@ generate_ib() {
fixup $BASE/ib.py
}
generate_libc() {
clang2py -k cdefstum \
$(dpkg -L libc6-dev | grep sys/mman.h) \
$(dpkg -L libc6-dev | grep sys/syscall.h) \
/usr/include/string.h \
/usr/include/elf.h \
/usr/include/unistd.h \
/usr/include/asm-generic/mman-common.h \
-o $BASE/libc.py
sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/libc.py
sed -i "s\FIXME_STUB\libc\g" $BASE/libc.py
sed -i "s\FunctionFactoryStub()\None if (libc_path := ctypes.util.find_library('c')) is None else ctypes.CDLL(libc_path, use_errno=True)\g" $BASE/libc.py
fixup $BASE/libc.py
}
generate_llvm() {
INC="$(llvm-config-14 --includedir)"
clang2py -k cdefstum \
@@ -554,7 +537,6 @@ elif [ "$1" == "sqtt" ]; then generate_sqtt
elif [ "$1" == "qcom" ]; then generate_qcom
elif [ "$1" == "io_uring" ]; then generate_io_uring
elif [ "$1" == "ib" ]; then generate_ib
elif [ "$1" == "libc" ]; then generate_libc
elif [ "$1" == "llvm" ]; then generate_llvm
elif [ "$1" == "kgsl" ]; then generate_kgsl
elif [ "$1" == "adreno" ]; then generate_adreno
@@ -563,6 +545,6 @@ elif [ "$1" == "vfio" ]; then generate_vfio
elif [ "$1" == "webgpu" ]; then generate_webgpu
elif [ "$1" == "libusb" ]; then generate_libusb
elif [ "$1" == "mesa" ]; then generate_mesa
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_libc; generate_am; generate_webgpu; generate_mesa
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_am; generate_webgpu; generate_mesa
else echo "usage: $0 <type>"
fi

47
test/unit/test_autogen.py Normal file
View File

@@ -0,0 +1,47 @@
import ctypes, subprocess, tempfile, unittest
from tinygrad.helpers import WIN
from tinygrad.runtime.support.c import Struct
class TestAutogen(unittest.TestCase):
def test_packed_struct_sizeof(self):
layout = [('a', ctypes.c_char), ('b', ctypes.c_int, 5), ('c', ctypes.c_char)]
class X(ctypes.Structure): _fields_, _layout_ = layout, 'gcc-sysv'
class Y(ctypes.Structure): _fields_, _pack_, _layout_ = layout, 1, 'ms'
class Z(Struct): _packed_, _fields_ = True, layout
self.assertNotEqual(ctypes.sizeof(X), 4) # ctypes bug! gcc-13.3.0 says this should have size 4
self.assertEqual(ctypes.sizeof(Y), 6)
self.assertEqual(ctypes.sizeof(Z), 3)
layout = [('a', ctypes.c_int, 31), ('b', ctypes.c_int, 31), ('c', ctypes.c_int, 1), ('d', ctypes.c_int, 1)]
class Foo(ctypes.Structure): _fields_, _layout_ = layout, 'gcc-sysv'
class Bar(ctypes.Structure): _fields_, _pack_, _layout_ = layout, 1, 'ms'
class Baz(Struct): _fields_, _packed_ = layout, True
self.assertEqual(ctypes.sizeof(Foo), 12)
self.assertEqual(ctypes.sizeof(Bar), 12)
self.assertEqual(ctypes.sizeof(Baz), 8)
@unittest.skipIf(WIN, "doesn't compile on windows")
def test_packed_struct_interop(self):
class Baz(Struct): pass
Baz._packed_ = True
Baz._fields_ = [('a', ctypes.c_int, 30), ('b', ctypes.c_int, 30), ('c', ctypes.c_int, 2), ('d', ctypes.c_int, 2)]
src = '''
struct __attribute__((packed)) baz {
int a:30;
int b:30;
int c:2;
int d:2;
};
int test(struct baz x) {
return x.a + x.b + x.c + x.d;
}
'''
args = ('-x', 'c', '-fPIC', '-shared')
with tempfile.NamedTemporaryFile(suffix=".so") as f:
subprocess.check_output(('clang',) + args + ('-', '-o', f.name), input=src.encode('utf-8'))
b = Baz(0xAA000, 0x00BB0, 0, 1)
test = ctypes.CDLL(f.name).test
test.argtypes = [Baz]
self.assertEqual(test(b), b.a + b.b + b.c + b.d)
if __name__ == "__main__": unittest.main()

View File

@@ -361,10 +361,12 @@ def fetch(url:str, name:pathlib.Path|str|None=None, subdir:str|None=None, gunzip
# *** Exec helpers
def system(cmd, **kwargs): return subprocess.check_output(cmd.split(), **kwargs).decode().strip()
def cpu_objdump(lib, objdump_tool='objdump'):
with tempfile.NamedTemporaryFile(delete=True) as f:
pathlib.Path(f.name).write_bytes(lib)
print(subprocess.check_output([objdump_tool, '-d', f.name]).decode('utf-8'))
print(system(f"{objdump_tool} -d {f.name}"))
def capstone_flatdump(lib: bytes):
try: import capstone

View File

@@ -0,0 +1,17 @@
import importlib, pathlib
from tinygrad.helpers import system
root = (here:=pathlib.Path(__file__).parent).parents[2]
def load(name, dll, files, **kwargs):
if not (f:=(root/(path:=kwargs.pop("path", __name__)).replace('.','/')/f"{name}.py")).exists():
files = files() if callable(files) else files
f.write_text(importlib.import_module("tinygrad.runtime.support.autogen").gen(dll, files, **kwargs))
return importlib.import_module(f"{path}.{name.replace('/', '.')}")
def __getattr__(nm):
match nm:
case "libc": return load("libc", ["find_library('c')"], lambda: (
[i for i in system("dpkg -L libc6-dev").split() if 'sys/mman.h' in i or 'sys/syscall.h' in i] +
["/usr/include/string.h", "/usr/include/elf.h", "/usr/include/unistd.h", "/usr/include/asm-generic/mman-common.h"]), use_errno=True)
case _: raise AttributeError(f"no such autogen: {nm}")

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,7 @@ from tinygrad.device import BufferSpec, Compiled, Allocator, Compiler
from tinygrad.runtime.ops_cpu import CPUAllocator
from tinygrad.dtype import dtypes, DType, PtrDType
from tinygrad.uop.ops import Ops, UOp
from tinygrad.helpers import getenv, round_up, mv_address, to_mv, cpu_objdump, DEBUG
from tinygrad.helpers import getenv, round_up, mv_address, to_mv, cpu_objdump, system, DEBUG
from tinygrad.renderer.cstyle import ClangRenderer
from tinygrad.runtime.autogen import libc, qcom_dsp
if getenv("IOCTL"): import extra.dsp.run # noqa: F401 # pylint: disable=unused-import
@@ -123,10 +123,9 @@ class ClangCompiler(Compiler):
def compile(self, src:str) -> bytes:
# TODO: remove file write. sadly clang doesn't like the use of /dev/stdout here
with tempfile.NamedTemporaryFile(delete=True) as output_file:
subprocess.check_output([getenv("CC", 'clang'), *self.args, '-O2', '-Wall', '-Werror', '-x', 'c', '-fPIC', '-ffreestanding', '-nostdlib',
'-', '-o', str(output_file.name)], input=src.encode('utf-8'))
return pathlib.Path(output_file.name).read_bytes()
with tempfile.NamedTemporaryFile(delete=True) as f:
system(f"{getenv('CC','clang')} {' '.join(self.args)} -O2 -Wall -Werror -x c -fPIC -ffreestanding -nostdlib - -o {f.name}", input=src.encode())
return pathlib.Path(f.name).read_bytes()
def disassemble(self, lib:bytes): return cpu_objdump(lib, self.objdump_tool)

View File

@@ -0,0 +1,126 @@
import ctypes.util, importlib.metadata, itertools, re, functools, os
from tinygrad.helpers import flatten, unwrap
from clang.cindex import Config, Index, CursorKind as CK, TranslationUnit as TU, LinkageKind as LK, TokenKind as ToK, TypeKind as TK
from clang.cindex import PrintingPolicy as PP, PrintingPolicyProperty as PPP, SourceRange
assert importlib.metadata.version('clang')[:2] == "20"
if not Config.loaded: Config.set_library_file(os.getenv("LIBCLANG_PATH", ctypes.util.find_library("clang-20")))
def fst(c): return next(c.get_children())
def last(c): return list(c.get_children())[-1]
def readext(f, fst, snd=None):
with open(f, "r") as f:
f.seek(start:=(fst.start.offset if isinstance(fst, SourceRange) else fst))
return f.read((fst.end.offset if isinstance(fst, SourceRange) else snd)-start)
def attrs(c): return list(filter(lambda k: (v:=k.value) >= 400 and v < 500, map(lambda c: c.kind, c.get_children())))
base_rules = [(r'\s*\\\n\s*', ' '), (r'\s*\n\s*', ' '), (r'//.*', ''), (r'/\*.*?\*/', ''), (r'\b(0[xX][0-9a-fA-F]+|\d+)[uUlL]+\b', r'\1'),
(r'\b0+(?=\d)', ''), (r'\s*&&\s*', r' and '), (r'\s*\|\|\s*', r' or '), (r'\s*!\s*', ' not '),
(r'(struct|union|enum)\s*([a-zA-Z_][a-zA-Z0-9_]*\b)', r'\1_\2'),
(r'\((unsigned )?(char|uint64_t)\)', ''), (r'^.*\d+:\d+.*$', ''), (r'^.*\w##\w.*$', '')]
ints = (TK.INT, TK.UINT, TK.LONG, TK.ULONG, TK.LONGLONG, TK.ULONGLONG)
def gen(dll, files, args=[], prolog=[], rules=[], epilog=[], recsym=False, use_errno=False, anon_names={}, types={}, parse_macros=True):
macros, lines, anoncnt, types = [], [], itertools.count().__next__, {k:(v,True) for k,v in types.items()}
def tname(t, suggested_name=None, typedef=None) -> str:
suggested_name = anon_names.get(f"{(decl:=t.get_declaration()).location.file}:{decl.location.line}", suggested_name)
nonlocal lines, types, anoncnt
tmap = {TK.VOID:"None", TK.CHAR_U:"ctypes.c_ubyte", TK.UCHAR:"ctypes.c_ubyte", TK.CHAR_S:"ctypes.c_char", TK.SCHAR:"ctypes.c_char",
**{getattr(TK, k):f"ctypes.c_{k.lower()}" for k in ["BOOL", "WCHAR", "FLOAT", "DOUBLE", "LONGDOUBLE"]},
**{getattr(TK, k):f"ctypes.c_{'u' if 'U' in k else ''}int{sz}" for sz,k in
[(16, "USHORT"), (16, "SHORT"), (32, "UINT"), (32, "INT"), (64, "ULONG"), (64, "LONG"), (64, "ULONGLONG"), (64, "LONGLONG")]}}
if t.kind in tmap: return tmap[t.kind]
if t.spelling in types and types[t.spelling][1]: return types[t.spelling][0]
if ((f:=t).kind in (fks:=(TK.FUNCTIONPROTO, TK.FUNCTIONNOPROTO))) or (t.kind == TK.POINTER and (f:=t.get_pointee()).kind in fks):
return f"ctypes.CFUNCTYPE({tname(f.get_result())}{(', '+', '.join(map(tname, f.argument_types()))) if f.kind==TK.FUNCTIONPROTO else ''})"
match t.kind:
case TK.POINTER: return "ctypes.c_void_p" if (ptr:=t.get_pointee()).kind == TK.VOID else f"ctypes.POINTER({tname(ptr)})"
case TK.ELABORATED: return tname(t.get_named_type(), suggested_name)
case TK.TYPEDEF if t.spelling == t.get_canonical().spelling: return tname(t.get_canonical())
case TK.TYPEDEF:
defined, nm = (canon:=t.get_canonical()).spelling in types, tname(canon, typedef=t.spelling.replace('::', '_'))
types[t.spelling] = nm if t.spelling.startswith("__") else t.spelling.replace('::', '_'), True
# RECORDs need to handle typedefs specially to allow for self-reference
if canon.kind != TK.RECORD or defined: lines.append(f"{t.spelling.replace('::', '_')} = {nm}")
return types[t.spelling][0]
case TK.RECORD:
# TODO: packed unions
# TODO: pragma pack support
# check for forward declaration
if t.spelling in types: types[t.spelling] = (nm:=types[t.spelling][0]), len(list(t.get_fields())) != 0
else:
if decl.is_anonymous():
types[t.spelling] = (nm:=(suggested_name or (f"_anon{'struct' if decl.kind == CK.STRUCT_DECL else 'union'}{anoncnt()}")), True)
else: types[t.spelling] = (nm:=t.spelling.replace(' ', '_').replace('::', '_')), len(list(t.get_fields())) != 0
lines.append(f"class {nm}({'Struct' if decl.kind==CK.STRUCT_DECL else 'ctypes.Union'}): pass")
if typedef: lines.append(f"{typedef} = {nm}")
acnt = itertools.count().__next__
ll=[" ("+((fn:=f"'_{acnt()}'")+f", {tname(f.type, nm+fn[1:-1])}" if f.is_anonymous_record_decl() else f"'{f.spelling}', "+
tname(f.type, f'{nm}_{f.spelling}'))+(f',{f.get_bitfield_width()}' if f.is_bitfield() else '')+")," for f in t.get_fields()]
lines.extend(([f"{nm}._anonymous_ = ["+", ".join(f"'_{i}'" for i in range(n))+"]"] if (n:=acnt()) else [])+
([f"{nm}._packed_ = True"] * (CK.PACKED_ATTR in attrs(decl)))+([f"{nm}._fields_ = [",*ll,"]"] if ll else []))
return nm
case TK.ENUM:
# TODO: C++ and GNU C have forward declared enums
if decl.is_anonymous(): types[t.spelling] = suggested_name or f"_anonenum{anoncnt()}", True
else: types[t.spelling] = t.spelling.replace(' ', '_').replace('::', '_'), True
lines.append(f"{types[t.spelling][0]} = CEnum({tname(decl.enum_type)})\n" +
"\n".join(f"{e.spelling} = {types[t.spelling][0]}.define('{e.spelling}', {e.enum_value})" for e in decl.get_children()
if e.kind == CK.ENUM_CONSTANT_DECL) + "\n")
return types[t.spelling][0]
case TK.CONSTANTARRAY:
return f"({tname(t.get_array_element_type(), suggested_name.rstrip('s') if suggested_name else None)} * {t.get_array_size()})"
case TK.INCOMPLETEARRAY: return f"({tname(t.get_array_element_type(), suggested_name.rstrip('s') if suggested_name else None)} * 0)"
case _: raise NotImplementedError(f"unsupported type {t.kind}")
for f in files:
tu = Index.create().parse(f, args, options=TU.PARSE_DETAILED_PROCESSING_RECORD)
(pp:=PP.create(tu.cursor)).set_property(PPP.TerseOutput, 1)
for c in tu.cursor.walk_preorder():
if str(c.location.file) != str(f) and (not recsym or c.kind not in (CK.FUNCTION_DECL,)): continue
rollback = lines, types
try:
match c.kind:
case CK.FUNCTION_DECL if c.linkage == LK.EXTERNAL and dll:
# TODO: we could support name-mangling
lines.append(f"# {c.pretty_printed(pp)}\ntry: ({c.spelling}:=dll.{c.spelling}).restype, {c.spelling}.argtypes = "
f"{tname(c.result_type)}, [{', '.join(tname(arg.type) for arg in c.get_arguments())}]\nexcept AttributeError: pass\n")
case CK.STRUCT_DECL | CK.UNION_DECL | CK.TYPEDEF_DECL | CK.ENUM_DECL: tname(c.type)
case CK.MACRO_DEFINITION if parse_macros and len(toks:=list(c.get_tokens())) > 1:
if toks[1].spelling == '(' and toks[0].extent.end.column == toks[1].extent.start.column:
it = iter(toks[1:])
_args = [t.spelling for t in itertools.takewhile(lambda t:t.spelling!=')', it) if t.kind == ToK.IDENTIFIER]
if len(body:=list(it)) == 0: continue
macros += [f"{c.spelling} = lambda {','.join(_args)}: {readext(f, body[0].location.offset, toks[-1].extent.end.offset)}"]
else: macros += [f"{c.spelling} = {readext(f, toks[1].location.offset, toks[-1].extent.end.offset)}"]
case CK.VAR_DECL if c.linkage == LK.INTERNAL:
if (c.type.kind == TK.CONSTANTARRAY and c.type.get_array_element_type().get_canonical().kind in ints and
(init:=last(c)).kind == CK.INIT_LIST_EXPR and all(re.match(r"\[.*\].*=", readext(f, c.extent)) for c in init.get_children())):
cs = init.get_children()
macros += [f"{c.spelling} = {{{','.join(f'{readext(f,next(it:=c.get_children()).extent)}:{readext(f,next(it).extent)}' for c in cs)}}}"]
elif c.type.get_canonical().kind in ints: macros += [f"{c.spelling} = {readext(f, last(c).extent)}"]
else: macros += [f"{c.spelling} = {tname(c.type)}({readext(f, last(c).extent)})"]
case CK.VAR_DECL if c.linkage == LK.EXTERNAL and dll:
lines.append(f"try: {c.spelling} = {tname(c.type)}.in_dll(dll, '{c.spelling}')\nexcept (ValueError,AttributeError): pass")
except NotImplementedError as e:
print(f"skipping {c.spelling}: {e}")
lines, types = rollback
main = (f"# mypy: ignore-errors\nimport ctypes{', os' if any('os' in s for s in dll) else ''}\n"
"from tinygrad.helpers import unwrap\nfrom tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR\n" + '\n'.join([*prolog,
*(["from ctypes.util import find_library"]*any('find_library' in s for s in dll)),
*(["def dll():",*flatten([[f" try: return ctypes.CDLL(unwrap({d}){', use_errno=True' if use_errno else ''})",' except: pass'] for d in dll]),
" return None", "dll = dll()\n"]*bool(dll)), *lines]) + '\n')
macros = [r for m in macros if (r:=functools.reduce(lambda s,r:re.sub(r[0], r[1], s), rules + base_rules, m))]
while True:
try:
exec(main + '\n'.join(macros), {})
break
except (SyntaxError, NameError, TypeError) as e:
macrono = unwrap(e.lineno if isinstance(e, SyntaxError) else unwrap(unwrap(e.__traceback__).tb_next).tb_lineno) - main.count('\n') - 1
assert macrono >= 0 and macrono < len(macros), f"error outside macro range: {e}"
print(f"skipping {macros[macrono]}: {e}")
del macros[macrono]
except Exception as e: raise Exception("parsing failed") from e
return main + '\n'.join(macros + epilog)

View File

@@ -0,0 +1,72 @@
import ctypes, functools, sys
from typing import TYPE_CHECKING
def _do_ioctl(__idir, __base, __nr, __struct, __fd, **kwargs):
import tinygrad.runtime.support.hcq as hcq, fcntl
ioctl = __fd.ioctl if isinstance(__fd, hcq.FileIOInterface) else functools.partial(fcntl.ioctl, __fd)
if (rc:=ioctl((__idir<<30)|(ctypes.sizeof(out:=__struct(**kwargs))<<16)|(__base<<8)|__nr, out)): raise RuntimeError(f"ioctl returned {rc}")
return out
def _IO(base, nr): return functools.partial(_do_ioctl, 0, ord(base) if isinstance(base, str) else base, nr, None)
def _IOW(base, nr, typ): return functools.partial(_do_ioctl, 1, ord(base) if isinstance(base, str) else base, nr, typ)
def _IOR(base, nr, typ): return functools.partial(_do_ioctl, 2, ord(base) if isinstance(base, str) else base, nr, typ)
def _IOWR(base, nr, typ): return functools.partial(_do_ioctl, 3, ord(base) if isinstance(base, str) else base, nr, typ)
def CEnum(typ: type[ctypes._SimpleCData]):
class _CEnum(typ): # type: ignore
_val_to_name_: dict[int,str] = {}
@classmethod
def from_param(cls, val): return val if isinstance(val, cls) else cls(val)
@classmethod
def get(cls, val, default="unknown"): return cls._val_to_name_.get(val.value if isinstance(val, cls) else val, default)
@classmethod
def items(cls): return cls._val_to_name_.items()
@classmethod
def define(cls, name, val):
cls._val_to_name_[val] = name
return val
def __eq__(self, other): return self.value == other
def __repr__(self): return self.get(self) if self.value in self.__class__._val_to_name_ else str(self.value)
return _CEnum
# supports gcc (C11) __attribute__((packed))
if TYPE_CHECKING: Struct = ctypes.Structure
else:
class MetaStruct(type(ctypes.Structure)):
def __new__(mcs, name, bases, dct):
fields = dct.pop("_fields_", None)
cls = super().__new__(mcs, name, bases, dct)
if dct.get("_packed_", False) and fields is not None: mcs._build(cls, fields)
return cls
def __setattr__(cls, k, v):
# NB: _fields_ must be set after _packed_ because PyCStructType_setattro marks _fields_ as final.
if k == "_fields_" and getattr(cls, "_packed_", False): type(cls)._build(cls, v)
elif k == "_packed_" and hasattr(cls, "_fields_"): type(cls)._build(cls, cls._fields_)
else: super().__setattr__(k, v)
@staticmethod
def _build(cls, fields):
o = 0
for n,t,b in [(f[0], f[1], f[2] if len(f) == 3 else 0) for f in fields]:
if b == 0: o = (o + 7) & ~7
m = (1 << (sz:=ctypes.sizeof(t)*8 if b == 0 else b)) - 1
def _s(self,v,m,s,b): self._data[:] = ((int.from_bytes(self._data,sys.byteorder)&~(m<<s))|((v&m)<<s)).to_bytes(len(self._data), sys.byteorder)
setattr(cls, n, property(functools.partial(lambda self,m,s:(int.from_bytes(self._data,sys.byteorder)>>s)&m,m=m,s=o),
functools.partial(_s,m=m,s=o,b=b)))
o += sz
type(ctypes.Structure).__setattr__(cls, '_fields_', [('_data', ctypes.c_ubyte * ((o + 7) // 8))])
type(ctypes.Structure).__setattr__(cls, '_packed_', True)
setattr(cls, '_packed_fields_', fields)
class Struct(ctypes.Structure, metaclass=MetaStruct):
def __init__(self, *args, **kwargs):
if hasattr(self, '_packed_fields_'):
for f,v in zip(self._packed_fields_, args): setattr(self, f[0], v)
for k,v in kwargs.items(): setattr(self, k, v)
else: super().__init__(*args, **kwargs)

View File

@@ -1,4 +1,5 @@
import ctypes, subprocess
import ctypes
from tinygrad.helpers import system
import tinygrad.runtime.autogen.comgr as comgr
assert comgr.AMD_COMGR_LANGUAGE_HIP == 4
try:
@@ -13,7 +14,7 @@ from tinygrad.runtime.support.compiler_cpu import LLVMCompiler
from tinygrad.helpers import OSX, to_char_p_p
def amdgpu_disassemble(lib:bytes):
asm = subprocess.check_output(["llvm-objdump" if OSX else "/opt/rocm/llvm/bin/llvm-objdump", '-d', '-'], input=lib).decode("utf-8").splitlines()
asm = system(f"{'llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines()
while asm and ("s_nop 0" in asm[-1] or "s_code_end" in asm[-1]): asm.pop()
print("\n".join(asm))

View File

@@ -1,6 +1,6 @@
import subprocess, hashlib, tempfile, ctypes, re, pathlib
from typing import Callable
from tinygrad.helpers import to_char_p_p, colored, init_c_var, getenv
from tinygrad.helpers import to_char_p_p, colored, init_c_var, getenv, system
import tinygrad.runtime.autogen.nvrtc as nvrtc
from tinygrad.device import Compiler, CompileError
@@ -37,7 +37,7 @@ def cuda_disassemble(lib:bytes, arch:str):
fn = (pathlib.Path(tempfile.gettempdir()) / f"tinycuda_{hashlib.md5(lib).hexdigest()}").as_posix()
with open(fn, "wb") as f: f.write(lib)
subprocess.run(["ptxas", f"-arch={arch}", "-o", fn, fn], check=False, stderr=subprocess.DEVNULL) # optional ptx -> sass step for CUDA=1
print(subprocess.check_output(['nvdisasm', fn]).decode('utf-8'))
print(system(f'nvdisasm {fn}'))
except Exception as e: print("Failed to generate SASS", str(e), "Make sure your PATH contains ptxas/nvdisasm binary of compatible version.")
class CUDACompiler(Compiler):

View File

@@ -1,6 +1,6 @@
import base64, ctypes, pathlib, tempfile, hashlib, subprocess
import base64, ctypes, pathlib, tempfile, hashlib
from tinygrad.device import Compiler
from tinygrad.helpers import cpu_objdump
from tinygrad.helpers import cpu_objdump, system
import tinygrad.runtime.autogen.mesa as mesa
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, expect, cerr
try: import tinygrad.runtime.autogen.llvm as llvm
@@ -82,5 +82,5 @@ class NAKCompiler(NIRCompiler):
try:
fn = (pathlib.Path(tempfile.gettempdir()) / f"tinynak_{hashlib.md5(lib).hexdigest()}").as_posix()
with open(fn, "wb") as f: f.write(lib[ctypes.sizeof(mesa.struct_nak_shader_info):])
print(subprocess.check_output(['nvdisasm', "-b", f"SM{self.arch[3:]}", fn]).decode('utf-8'))
print(system(f"nvdisasm -b SM{self.arch[3:]} {fn}"))
except Exception as e: print("Failed to generate SASS", str(e), "Make sure your PATH contains nvdisasm binary of compatible version.")

View File

@@ -1,7 +1,7 @@
import struct, ctypes, ctypes.util
from dataclasses import dataclass
from tinygrad.helpers import getbits, i2u, unwrap
import tinygrad.runtime.autogen.libc as libc
from tinygrad.runtime.autogen import libc
@dataclass(frozen=True)
class ElfSection: name:str; header:libc.Elf64_Shdr; content:bytes # noqa: E702

View File

@@ -1,5 +1,5 @@
import ctypes.util, os, sys, subprocess
from tinygrad.helpers import DEBUG, OSX, getenv
import ctypes.util, os, sys
from tinygrad.helpers import DEBUG, OSX, getenv, system
if sys.platform == 'win32':
# Windows llvm distribution doesn't seem to add itself to PATH or anywhere else where it can be easily retrieved from.
@@ -10,7 +10,7 @@ if sys.platform == 'win32':
elif OSX:
# Will raise FileNotFoundError if brew is not installed
# `brew --prefix` will return even if formula is not installed
if not os.path.exists(brew_prefix:=subprocess.check_output(['brew', '--prefix', 'llvm@20']).decode().strip()):
if not os.path.exists(brew_prefix:=system("brew --prefix llvm@20")):
raise FileNotFoundError('LLVM not found, you can install it with `brew install llvm@20`')
LLVM_PATH: str|None = os.path.join(brew_prefix, 'lib', 'libLLVM.dylib')
else:

View File

@@ -1,10 +1,10 @@
import ctypes.util, os, subprocess, platform, sysconfig
from tinygrad.helpers import OSX
import ctypes.util, os, platform, sysconfig
from tinygrad.helpers import system, OSX
WEBGPU_PATH: str | None
if OSX:
if not os.path.exists(brew_prefix:=subprocess.check_output(['brew', '--prefix', 'dawn']).decode().strip()):
if not os.path.exists(brew_prefix:=system("brew --prefix dawn")):
raise FileNotFoundError('dawn library not found. Install it with `brew tap wpmed92/dawn && brew install dawn`')
WEBGPU_PATH = os.path.join(brew_prefix, 'lib', 'libwebgpu_dawn.dylib')
elif platform.system() == "Windows":