From 27f7ea478b1ea4e37db1187b56719d616fe3ae06 Mon Sep 17 00:00:00 2001 From: Christopher Milan Date: Sun, 8 Feb 2026 21:39:03 -0800 Subject: [PATCH] new style DSP renderer (#14636) * new style DSP renderer * cleanup --- tinygrad/runtime/ops_dsp.py | 49 ++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/tinygrad/runtime/ops_dsp.py b/tinygrad/runtime/ops_dsp.py index 012a798ba7..28bb115803 100644 --- a/tinygrad/runtime/ops_dsp.py +++ b/tinygrad/runtime/ops_dsp.py @@ -45,6 +45,8 @@ class DSPRenderer(ClangRenderer): type_map = { **ClangRenderer.type_map, dtypes.uint64: "unsigned long long", dtypes.int64: "long long" } code_for_op = {k:v for k,v in ClangRenderer.code_for_op.items() if k != Ops.SQRT} + def __init__(self): self.compiler = DSPCompiler() + def _render_defines(self, uops) -> list[str]: return ['''/* DSP boilerplate */ struct dcvs_v2_req { int type; int _pad; _Bool dcvs_enable; char dcvs_option; _Bool set_latency; int latency; _Bool set_dcvs_params; short _pad2; char target_corner; char min_corner; char max_corner; int _pad3[3];};''','int HAP_power_set(void*, void*);', @@ -116,28 +118,11 @@ class DSPAllocator(Allocator['DSPDevice']): def _copyout(self, dest:memoryview, src:DSPBuffer): ctypes.memmove(mv_address(dest), src.va_addr, dest.nbytes) def _offset(self, buf, size:int, offset:int): return DSPBuffer(buf.va_addr+offset, size, buf.share_info, buf.offset+offset) -class ClangCompiler(Compiler): - def __init__(self, cachekey="compile_clang", args:list[str]|None=None, objdump_tool='objdump'): - self.args = ['-shared', '-march=native'] if args is None else args - self.objdump_tool = objdump_tool - super().__init__(cachekey) - - def compile(self, src:str) -> bytes: - # TODO: remove file write. sadly clang doesn't like the use of /dev/stdout here - with tempfile.NamedTemporaryFile(delete=True) as f: - system(f"{getenv('CC','clang')} {' '.join(self.args)} -O2 -Wall -Werror -x c -fPIC -ffreestanding -nostdlib - -o {f.name}", input=src.encode()) - return pathlib.Path(f.name).read_bytes() - - def disassemble(self, lib:bytes): return cpu_objdump(lib, self.objdump_tool) - -class DSPDevice(Compiled): - def __init__(self, device:str=""): - compiler_args = ["--target=hexagon", "-mcpu=hexagonv65", "-fuse-ld=lld", "-nostdlib", "-mhvx=v65", "-mhvx-length=128b"] - if getenv("MOCKDSP"): - mock_compilers = CompilerSet([CompilerPair(MockDSPRenderer, functools.partial(ClangCompiler, None, ["-static"]+compiler_args, 'llvm-objdump'))]) - super().__init__(device, DSPAllocator(self), mock_compilers, MockDSPProgram) +class DSPCompiler(Compiler): + def __init__(self, mock:bool=False): + compiler_args = "--target=hexagon -mcpu=hexagonv65 -fuse-ld=lld -nostdlib -mhvx=v65 -mhvx-length=128b" + if mock: self.args = f"-static {compiler_args}" else: - self.ion_fd = os.open('/dev/ion', os.O_RDONLY) # Generate link script to pass into clang. Aligning all used sections to 4k fixes invoke problem. sections = ['text', 'rela.plt', 'rela.dyn', 'plt', 'data', 'bss', 'hash', 'dynamic', 'got', 'got.plt', 'dynsym', 'dynstr', 'symtab', 'shstrtab', 'strtab'] @@ -146,8 +131,25 @@ class DSPDevice(Compiled): self.link_ld.write(f"SECTIONS {{ . = 0x0; {sections_link}\n /DISCARD/ : {{ *(.note .note.* .gnu.hash .comment) }} }}".encode()) self.link_ld.flush() - compiler = functools.partial(ClangCompiler, "compile_dsp", ["-shared"] + compiler_args + [f"-T{self.link_ld.name}"], 'llvm-objdump') - super().__init__(device, DSPAllocator(self), CompilerSet([CompilerPair(DSPRenderer, compiler)]), functools.partial(DSPProgram, self)) + self.args = f"-shared {compiler_args} -T{self.link_ld.name}" + + super().__init__(None if mock else "compile_dsp") + + def compile(self, src:str) -> bytes: + # TODO: remove file write. sadly clang doesn't like the use of /dev/stdout here + with tempfile.NamedTemporaryFile(delete=True) as f: + system(f"{getenv('CC','clang')} {self.args} -O2 -Wall -Werror -x c -fPIC -ffreestanding -nostdlib - -o {f.name}", input=src.encode()) + return pathlib.Path(f.name).read_bytes() + + def disassemble(self, lib:bytes): return cpu_objdump(lib, "llvm-objdump") + + +class DSPDevice(Compiled): + def __init__(self, device:str=""): + if getenv("MOCKDSP"): super().__init__(device, DSPAllocator(self), CompilerSet([CompilerPair(MockDSPRenderer)]), MockDSPProgram) + else: + self.ion_fd = os.open('/dev/ion', os.O_RDONLY) + super().__init__(device, DSPAllocator(self), CompilerSet([CompilerPair(DSPRenderer)]), functools.partial(DSPProgram, self)) fastrpc_shell = memoryview(bytearray(pathlib.Path('/dsp/cdsp/fastrpc_shell_3').read_bytes())) self.shell_buf = self.allocator.alloc(round_up(fastrpc_shell.nbytes, 0x1000), BufferSpec(nolru=True)) ctypes.memmove(self.shell_buf.va_addr, mv_address(fastrpc_shell), fastrpc_shell.nbytes) @@ -268,6 +270,7 @@ static void *mmap2(void *addr, unsigned int length, int prot, int flags, int fd, return (void*)syscall((long)addr, length, prot, flags, fd, offset, 222); }}''' class MockDSPRenderer(DSPRenderer): + def __init__(self): self.compiler = DSPCompiler(mock=True) def _render_defines(self, uops) -> list[str]: return ClangRenderer._render_defines(self, uops) def _render_entry(self, function_name:str, bufs:list[tuple[str,tuple[DType,bool]]]) -> str: # https://gpages.juszkiewicz.com.pl/syscalls-table/syscalls.html