From 6d3385c284629beaa94324c71090b03acba9d9b1 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Mon, 17 Nov 2025 14:43:23 -0800 Subject: [PATCH] print special ops in postrange (#13318) * print special ops in postrange * fix on OSX --- extra/sqtt/active_sqtt_parse.py | 18 ++++++++++-------- test/external/external_test_example.py | 5 +++-- tinygrad/codegen/opt/postrange.py | 6 ++++-- tinygrad/runtime/support/compiler_amd.py | 2 +- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/extra/sqtt/active_sqtt_parse.py b/extra/sqtt/active_sqtt_parse.py index 23eab51986..6cbe5fc1eb 100644 --- a/extra/sqtt/active_sqtt_parse.py +++ b/extra/sqtt/active_sqtt_parse.py @@ -8,17 +8,19 @@ os.environ["AMD_LLVM"] = "0" from dataclasses import replace import atexit, contextlib from tinygrad import Tensor -from tinygrad.helpers import system, getenv +from tinygrad.helpers import system, OSX from tinygrad.runtime.ops_amd import AMDProgram from extra.sqtt.roc import decode, WaveExec, ProfileSQTTEvent from tinygrad.device import Device, ProfileDeviceEvent from extra.sqtt.attempt_sqtt_parse import parse_sqtt_print_packets -def set_power(x): system(f"sudo /opt/rocm/bin/amd-smi set -l {x}") -@atexit.register -def reset_power(): set_power("auto") -set_power("stable_std") +# TODO: should really check for AM driver / USB +if not OSX: + def set_power(x): system(f"sudo /opt/rocm/bin/amd-smi set -l {x}") + @atexit.register + def reset_power(): set_power("auto") + set_power("stable_std") dev = Device["AMD"] @@ -30,9 +32,9 @@ def save_sqtt(): yield sqtt events = dev.profile_events+[ProfileDeviceEvent("AMD", props=dev.device_props())] - #rctx = decode(events) - #assert len(rctx.inst_execs) > 0, "empty sqtt output" - #sqtt.update(rctx.inst_execs) + rctx = decode(events) + assert len(rctx.inst_execs) > 0, "empty sqtt output" + sqtt.update(rctx.inst_execs) for e in events: if isinstance(e, ProfileSQTTEvent): diff --git a/test/external/external_test_example.py b/test/external/external_test_example.py index a2740f3a5e..1114a5917d 100644 --- a/test/external/external_test_example.py +++ b/test/external/external_test_example.py @@ -1,13 +1,14 @@ import unittest from tinygrad import Device from tinygrad.tensor import Tensor -from tinygrad.helpers import getenv, CI +from tinygrad.helpers import getenv, CI, OSX def multidevice_test(fxn): exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",") def ret(self): for device in Device._devices: - if device in ["REMOTE", "DISK", "NPY", "FAKE", "DSP", "NULL"]: continue + # broken on OSX USB AMD, why? + if device in ["REMOTE", "DISK", "NPY", "FAKE", "DSP", "NULL"] or (OSX and device in ["AMD"]): continue if not CI: print(device) if device in exclude_devices: if not CI: print(f"WARNING: {device} test is excluded") diff --git a/tinygrad/codegen/opt/postrange.py b/tinygrad/codegen/opt/postrange.py index 91cafbf48d..c9dad5c85d 100644 --- a/tinygrad/codegen/opt/postrange.py +++ b/tinygrad/codegen/opt/postrange.py @@ -52,8 +52,10 @@ class Scheduler: def get_optimized_ast(self, name_override:str|None=None): if name_override is not None: name = name_override else: - kernel_type = "r" if self.reduceop is not None else "E" - name = kernel_type + colored('_', 'BLACK').join(['']+[colored(x.src[0].render(), color) for x,color in zip(self.rngs, self.colors())]) + k_type = "r" if self.reduceop is not None else "E" + special_uops = sorted([x for x in self.ast.toposort() if x.op is Ops.SPECIAL], key=lambda x: x.arg) + special_ops = [colored(str(x.vmax+1), "blue" if x.arg[0] == "g" else "cyan") for x in special_uops] + name = k_type + colored('_', 'BLACK').join(['']+special_ops+[colored(x.src[0].render(), color) for x,color in zip(self.rngs, self.colors())]) Scheduler.kernel_cnt[(function_name := to_function_name(name))] += 1 num = f"n{Scheduler.kernel_cnt[function_name]-1}" if Scheduler.kernel_cnt[function_name] > 1 else "" name += colored(num, 'BLACK') diff --git a/tinygrad/runtime/support/compiler_amd.py b/tinygrad/runtime/support/compiler_amd.py index ec7e004031..033270975a 100644 --- a/tinygrad/runtime/support/compiler_amd.py +++ b/tinygrad/runtime/support/compiler_amd.py @@ -13,7 +13,7 @@ from tinygrad.runtime.support.compiler_cpu import LLVMCompiler from tinygrad.helpers import OSX, to_char_p_p def amdgpu_disassemble(lib:bytes): - asm = system(f"{'llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines() + asm = system(f"{'/opt/homebrew/opt/llvm/bin/llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines() while asm and ("s_nop 0" in asm[-1] or "s_code_end" in asm[-1]): asm.pop() print("\n".join(asm))