From 6c109f4d759f41cea6067d866babcdb65a5875a1 Mon Sep 17 00:00:00 2001 From: Garret Castro <53013571+TheVanadium@users.noreply.github.com> Date: Sun, 25 Jan 2026 21:12:39 -0800 Subject: [PATCH] LLVM: CPU threading support (#14320) * make generic llvmrenderer class for cpu and amd * move `tensor_cores` back to parent * remove empty line * restore extra matcher position * add threading * dont need to add core_id here * dont move code for workitem * cleanup --------- Co-authored-by: TheVanadium --- tinygrad/renderer/llvmir.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tinygrad/renderer/llvmir.py b/tinygrad/renderer/llvmir.py index 616fababbe..c1f859aa3f 100644 --- a/tinygrad/renderer/llvmir.py +++ b/tinygrad/renderer/llvmir.py @@ -6,7 +6,7 @@ from tinygrad.renderer.cstyle import AMDHIPRenderer, create_non_native_float_pat from tinygrad.uop.decompositions import xexp2, xlog2 from tinygrad.uop.ops import UOp, PatternMatcher, UPat, Ops, GroupOp, range_str from tinygrad.dtype import dtypes, float_to_fp8, DType, PtrDType, truncate -from tinygrad.helpers import prod, AMX +from tinygrad.helpers import prod, AMX, CPU_COUNT, getenv def ldt(dt:DType): if dt.vcount > 1: return f"<{dt.vcount} x {ldt(dt.scalar())}>" @@ -199,7 +199,8 @@ class LLVMRenderer(Renderer): class CPULLVMRenderer(LLVMRenderer): device = "CPU" has_local = False - global_max: tuple[int, ...] | None = None + has_threads = bool(getenv("THREADS", 1)) + global_max = (CPU_COUNT.value, 0, 0) abi = 'win64cc' if sys.platform == 'win32' else None string_rewrite = base_rewrite + PatternMatcher([(UPat(Ops.WMMA, name="wmma"), render_wmma_amx)]) def render(self, uops: list[UOp]) -> str: return "\n".join((k:=self._render_kernel(uops))[0] + (k[1], self._render_footer(uops)))