From 4ad787ece20a4b91ebe1638ba13735c18a6b02db Mon Sep 17 00:00:00 2001 From: Christopher Milan Date: Sun, 8 Feb 2026 18:05:01 -0800 Subject: [PATCH] new style CPULLVMRenderer (#14629) --- tinygrad/renderer/llvmir.py | 3 +++ tinygrad/runtime/ops_cpu.py | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tinygrad/renderer/llvmir.py b/tinygrad/renderer/llvmir.py index cb0794e6b6..90b7a39bee 100644 --- a/tinygrad/renderer/llvmir.py +++ b/tinygrad/renderer/llvmir.py @@ -205,6 +205,9 @@ class CPULLVMRenderer(LLVMRenderer): string_rewrite = base_rewrite + PatternMatcher([(UPat(Ops.WMMA, name="wmma"), render_wmma_amx)]) def render(self, uops: list[UOp]) -> str: return "\n".join((k:=self._render_kernel(uops))[0] + (k[1], self._render_footer(uops))) def _render_footer(self, uops: list[UOp]) -> str: return 'attributes #0 = { alwaysinline nounwind "no-builtins" "no-trapping-math"="true" }' + def __init__(self): + from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler + self.compiler = CPULLVMCompiler() barrier = 'fence syncscope("workgroup") release\ntail call void @llvm.amdgcn.s.barrier()\nfence syncscope("workgroup") acquire\n' code_for_workitem = {"g": lambda x: f"tail call i32 @llvm.amdgcn.workgroup.id.{chr(120+int(x))}()", diff --git a/tinygrad/runtime/ops_cpu.py b/tinygrad/runtime/ops_cpu.py index 3ade3550e7..980a1b5930 100644 --- a/tinygrad/runtime/ops_cpu.py +++ b/tinygrad/runtime/ops_cpu.py @@ -8,7 +8,6 @@ from tinygrad.runtime.support.hcq import CLikeArgsState from tinygrad.renderer.cstyle import ClangJITRenderer from tinygrad.renderer.llvmir import CPULLVMRenderer from tinygrad.renderer.nir import LVPRenderer -from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler from tinygrad.runtime.support.elf import jit_loader from tinygrad.uop.ops import sint @@ -134,6 +133,6 @@ class CPUDevice(HCQCompiled): def __init__(self, device:str=""): self.tasks:queue.Queue = queue.Queue() CPUWorker(self, self.tasks, thread_id=0).start() - compilers = CompilerSet([CompilerPair(ClangJITRenderer, None), CompilerPair(CPULLVMRenderer, CPULLVMCompiler, ctrl_var=CPU_LLVM), + compilers = CompilerSet([CompilerPair(ClangJITRenderer, None), CompilerPair(CPULLVMRenderer, None, ctrl_var=CPU_LLVM), CompilerPair(LVPRenderer, None, ctrl_var=CPU_LVP)], ctrl_var=CPU_CC) super().__init__(device, CPUAllocator(self), compilers, functools.partial(CPUProgram, self), CPUSignal, CPUComputeQueue)