print special ops in postrange (#13318)

* print special ops in postrange

* fix on OSX
This commit is contained in:
George Hotz
2025-11-17 14:43:23 -08:00
committed by GitHub
parent b637093be9
commit 6d3385c284
4 changed files with 18 additions and 13 deletions

View File

@@ -8,17 +8,19 @@ os.environ["AMD_LLVM"] = "0"
from dataclasses import replace
import atexit, contextlib
from tinygrad import Tensor
from tinygrad.helpers import system, getenv
from tinygrad.helpers import system, OSX
from tinygrad.runtime.ops_amd import AMDProgram
from extra.sqtt.roc import decode, WaveExec, ProfileSQTTEvent
from tinygrad.device import Device, ProfileDeviceEvent
from extra.sqtt.attempt_sqtt_parse import parse_sqtt_print_packets
def set_power(x): system(f"sudo /opt/rocm/bin/amd-smi set -l {x}")
@atexit.register
def reset_power(): set_power("auto")
set_power("stable_std")
# TODO: should really check for AM driver / USB
if not OSX:
def set_power(x): system(f"sudo /opt/rocm/bin/amd-smi set -l {x}")
@atexit.register
def reset_power(): set_power("auto")
set_power("stable_std")
dev = Device["AMD"]
@@ -30,9 +32,9 @@ def save_sqtt():
yield sqtt
events = dev.profile_events+[ProfileDeviceEvent("AMD", props=dev.device_props())]
#rctx = decode(events)
#assert len(rctx.inst_execs) > 0, "empty sqtt output"
#sqtt.update(rctx.inst_execs)
rctx = decode(events)
assert len(rctx.inst_execs) > 0, "empty sqtt output"
sqtt.update(rctx.inst_execs)
for e in events:
if isinstance(e, ProfileSQTTEvent):

View File

@@ -1,13 +1,14 @@
import unittest
from tinygrad import Device
from tinygrad.tensor import Tensor
from tinygrad.helpers import getenv, CI
from tinygrad.helpers import getenv, CI, OSX
def multidevice_test(fxn):
exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",")
def ret(self):
for device in Device._devices:
if device in ["REMOTE", "DISK", "NPY", "FAKE", "DSP", "NULL"]: continue
# broken on OSX USB AMD, why?
if device in ["REMOTE", "DISK", "NPY", "FAKE", "DSP", "NULL"] or (OSX and device in ["AMD"]): continue
if not CI: print(device)
if device in exclude_devices:
if not CI: print(f"WARNING: {device} test is excluded")

View File

@@ -52,8 +52,10 @@ class Scheduler:
def get_optimized_ast(self, name_override:str|None=None):
if name_override is not None: name = name_override
else:
kernel_type = "r" if self.reduceop is not None else "E"
name = kernel_type + colored('_', 'BLACK').join(['']+[colored(x.src[0].render(), color) for x,color in zip(self.rngs, self.colors())])
k_type = "r" if self.reduceop is not None else "E"
special_uops = sorted([x for x in self.ast.toposort() if x.op is Ops.SPECIAL], key=lambda x: x.arg)
special_ops = [colored(str(x.vmax+1), "blue" if x.arg[0] == "g" else "cyan") for x in special_uops]
name = k_type + colored('_', 'BLACK').join(['']+special_ops+[colored(x.src[0].render(), color) for x,color in zip(self.rngs, self.colors())])
Scheduler.kernel_cnt[(function_name := to_function_name(name))] += 1
num = f"n{Scheduler.kernel_cnt[function_name]-1}" if Scheduler.kernel_cnt[function_name] > 1 else ""
name += colored(num, 'BLACK')

View File

@@ -13,7 +13,7 @@ from tinygrad.runtime.support.compiler_cpu import LLVMCompiler
from tinygrad.helpers import OSX, to_char_p_p
def amdgpu_disassemble(lib:bytes):
asm = system(f"{'llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines()
asm = system(f"{'/opt/homebrew/opt/llvm/bin/llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines()
while asm and ("s_nop 0" in asm[-1] or "s_code_end" in asm[-1]): asm.pop()
print("\n".join(asm))