print special ops in postrange (#13318)

* print special ops in postrange

* fix on OSX
This commit is contained in:
George Hotz
2025-11-17 14:43:23 -08:00
committed by GitHub
parent b637093be9
commit 6d3385c284
4 changed files with 18 additions and 13 deletions

View File

@@ -8,17 +8,19 @@ os.environ["AMD_LLVM"] = "0"
from dataclasses import replace from dataclasses import replace
import atexit, contextlib import atexit, contextlib
from tinygrad import Tensor from tinygrad import Tensor
from tinygrad.helpers import system, getenv from tinygrad.helpers import system, OSX
from tinygrad.runtime.ops_amd import AMDProgram from tinygrad.runtime.ops_amd import AMDProgram
from extra.sqtt.roc import decode, WaveExec, ProfileSQTTEvent from extra.sqtt.roc import decode, WaveExec, ProfileSQTTEvent
from tinygrad.device import Device, ProfileDeviceEvent from tinygrad.device import Device, ProfileDeviceEvent
from extra.sqtt.attempt_sqtt_parse import parse_sqtt_print_packets from extra.sqtt.attempt_sqtt_parse import parse_sqtt_print_packets
def set_power(x): system(f"sudo /opt/rocm/bin/amd-smi set -l {x}") # TODO: should really check for AM driver / USB
@atexit.register if not OSX:
def reset_power(): set_power("auto") def set_power(x): system(f"sudo /opt/rocm/bin/amd-smi set -l {x}")
set_power("stable_std") @atexit.register
def reset_power(): set_power("auto")
set_power("stable_std")
dev = Device["AMD"] dev = Device["AMD"]
@@ -30,9 +32,9 @@ def save_sqtt():
yield sqtt yield sqtt
events = dev.profile_events+[ProfileDeviceEvent("AMD", props=dev.device_props())] events = dev.profile_events+[ProfileDeviceEvent("AMD", props=dev.device_props())]
#rctx = decode(events) rctx = decode(events)
#assert len(rctx.inst_execs) > 0, "empty sqtt output" assert len(rctx.inst_execs) > 0, "empty sqtt output"
#sqtt.update(rctx.inst_execs) sqtt.update(rctx.inst_execs)
for e in events: for e in events:
if isinstance(e, ProfileSQTTEvent): if isinstance(e, ProfileSQTTEvent):

View File

@@ -1,13 +1,14 @@
import unittest import unittest
from tinygrad import Device from tinygrad import Device
from tinygrad.tensor import Tensor from tinygrad.tensor import Tensor
from tinygrad.helpers import getenv, CI from tinygrad.helpers import getenv, CI, OSX
def multidevice_test(fxn): def multidevice_test(fxn):
exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",") exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",")
def ret(self): def ret(self):
for device in Device._devices: for device in Device._devices:
if device in ["REMOTE", "DISK", "NPY", "FAKE", "DSP", "NULL"]: continue # broken on OSX USB AMD, why?
if device in ["REMOTE", "DISK", "NPY", "FAKE", "DSP", "NULL"] or (OSX and device in ["AMD"]): continue
if not CI: print(device) if not CI: print(device)
if device in exclude_devices: if device in exclude_devices:
if not CI: print(f"WARNING: {device} test is excluded") if not CI: print(f"WARNING: {device} test is excluded")

View File

@@ -52,8 +52,10 @@ class Scheduler:
def get_optimized_ast(self, name_override:str|None=None): def get_optimized_ast(self, name_override:str|None=None):
if name_override is not None: name = name_override if name_override is not None: name = name_override
else: else:
kernel_type = "r" if self.reduceop is not None else "E" k_type = "r" if self.reduceop is not None else "E"
name = kernel_type + colored('_', 'BLACK').join(['']+[colored(x.src[0].render(), color) for x,color in zip(self.rngs, self.colors())]) special_uops = sorted([x for x in self.ast.toposort() if x.op is Ops.SPECIAL], key=lambda x: x.arg)
special_ops = [colored(str(x.vmax+1), "blue" if x.arg[0] == "g" else "cyan") for x in special_uops]
name = k_type + colored('_', 'BLACK').join(['']+special_ops+[colored(x.src[0].render(), color) for x,color in zip(self.rngs, self.colors())])
Scheduler.kernel_cnt[(function_name := to_function_name(name))] += 1 Scheduler.kernel_cnt[(function_name := to_function_name(name))] += 1
num = f"n{Scheduler.kernel_cnt[function_name]-1}" if Scheduler.kernel_cnt[function_name] > 1 else "" num = f"n{Scheduler.kernel_cnt[function_name]-1}" if Scheduler.kernel_cnt[function_name] > 1 else ""
name += colored(num, 'BLACK') name += colored(num, 'BLACK')

View File

@@ -13,7 +13,7 @@ from tinygrad.runtime.support.compiler_cpu import LLVMCompiler
from tinygrad.helpers import OSX, to_char_p_p from tinygrad.helpers import OSX, to_char_p_p
def amdgpu_disassemble(lib:bytes): def amdgpu_disassemble(lib:bytes):
asm = system(f"{'llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines() asm = system(f"{'/opt/homebrew/opt/llvm/bin/llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines()
while asm and ("s_nop 0" in asm[-1] or "s_code_end" in asm[-1]): asm.pop() while asm and ("s_nop 0" in asm[-1] or "s_code_end" in asm[-1]): asm.pop()
print("\n".join(asm)) print("\n".join(asm))