mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
print special ops in postrange (#13318)
* print special ops in postrange * fix on OSX
This commit is contained in:
@@ -8,17 +8,19 @@ os.environ["AMD_LLVM"] = "0"
|
|||||||
from dataclasses import replace
|
from dataclasses import replace
|
||||||
import atexit, contextlib
|
import atexit, contextlib
|
||||||
from tinygrad import Tensor
|
from tinygrad import Tensor
|
||||||
from tinygrad.helpers import system, getenv
|
from tinygrad.helpers import system, OSX
|
||||||
from tinygrad.runtime.ops_amd import AMDProgram
|
from tinygrad.runtime.ops_amd import AMDProgram
|
||||||
from extra.sqtt.roc import decode, WaveExec, ProfileSQTTEvent
|
from extra.sqtt.roc import decode, WaveExec, ProfileSQTTEvent
|
||||||
from tinygrad.device import Device, ProfileDeviceEvent
|
from tinygrad.device import Device, ProfileDeviceEvent
|
||||||
|
|
||||||
from extra.sqtt.attempt_sqtt_parse import parse_sqtt_print_packets
|
from extra.sqtt.attempt_sqtt_parse import parse_sqtt_print_packets
|
||||||
|
|
||||||
def set_power(x): system(f"sudo /opt/rocm/bin/amd-smi set -l {x}")
|
# TODO: should really check for AM driver / USB
|
||||||
@atexit.register
|
if not OSX:
|
||||||
def reset_power(): set_power("auto")
|
def set_power(x): system(f"sudo /opt/rocm/bin/amd-smi set -l {x}")
|
||||||
set_power("stable_std")
|
@atexit.register
|
||||||
|
def reset_power(): set_power("auto")
|
||||||
|
set_power("stable_std")
|
||||||
|
|
||||||
dev = Device["AMD"]
|
dev = Device["AMD"]
|
||||||
|
|
||||||
@@ -30,9 +32,9 @@ def save_sqtt():
|
|||||||
yield sqtt
|
yield sqtt
|
||||||
events = dev.profile_events+[ProfileDeviceEvent("AMD", props=dev.device_props())]
|
events = dev.profile_events+[ProfileDeviceEvent("AMD", props=dev.device_props())]
|
||||||
|
|
||||||
#rctx = decode(events)
|
rctx = decode(events)
|
||||||
#assert len(rctx.inst_execs) > 0, "empty sqtt output"
|
assert len(rctx.inst_execs) > 0, "empty sqtt output"
|
||||||
#sqtt.update(rctx.inst_execs)
|
sqtt.update(rctx.inst_execs)
|
||||||
|
|
||||||
for e in events:
|
for e in events:
|
||||||
if isinstance(e, ProfileSQTTEvent):
|
if isinstance(e, ProfileSQTTEvent):
|
||||||
|
|||||||
5
test/external/external_test_example.py
vendored
5
test/external/external_test_example.py
vendored
@@ -1,13 +1,14 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from tinygrad import Device
|
from tinygrad import Device
|
||||||
from tinygrad.tensor import Tensor
|
from tinygrad.tensor import Tensor
|
||||||
from tinygrad.helpers import getenv, CI
|
from tinygrad.helpers import getenv, CI, OSX
|
||||||
|
|
||||||
def multidevice_test(fxn):
|
def multidevice_test(fxn):
|
||||||
exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",")
|
exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",")
|
||||||
def ret(self):
|
def ret(self):
|
||||||
for device in Device._devices:
|
for device in Device._devices:
|
||||||
if device in ["REMOTE", "DISK", "NPY", "FAKE", "DSP", "NULL"]: continue
|
# broken on OSX USB AMD, why?
|
||||||
|
if device in ["REMOTE", "DISK", "NPY", "FAKE", "DSP", "NULL"] or (OSX and device in ["AMD"]): continue
|
||||||
if not CI: print(device)
|
if not CI: print(device)
|
||||||
if device in exclude_devices:
|
if device in exclude_devices:
|
||||||
if not CI: print(f"WARNING: {device} test is excluded")
|
if not CI: print(f"WARNING: {device} test is excluded")
|
||||||
|
|||||||
@@ -52,8 +52,10 @@ class Scheduler:
|
|||||||
def get_optimized_ast(self, name_override:str|None=None):
|
def get_optimized_ast(self, name_override:str|None=None):
|
||||||
if name_override is not None: name = name_override
|
if name_override is not None: name = name_override
|
||||||
else:
|
else:
|
||||||
kernel_type = "r" if self.reduceop is not None else "E"
|
k_type = "r" if self.reduceop is not None else "E"
|
||||||
name = kernel_type + colored('_', 'BLACK').join(['']+[colored(x.src[0].render(), color) for x,color in zip(self.rngs, self.colors())])
|
special_uops = sorted([x for x in self.ast.toposort() if x.op is Ops.SPECIAL], key=lambda x: x.arg)
|
||||||
|
special_ops = [colored(str(x.vmax+1), "blue" if x.arg[0] == "g" else "cyan") for x in special_uops]
|
||||||
|
name = k_type + colored('_', 'BLACK').join(['']+special_ops+[colored(x.src[0].render(), color) for x,color in zip(self.rngs, self.colors())])
|
||||||
Scheduler.kernel_cnt[(function_name := to_function_name(name))] += 1
|
Scheduler.kernel_cnt[(function_name := to_function_name(name))] += 1
|
||||||
num = f"n{Scheduler.kernel_cnt[function_name]-1}" if Scheduler.kernel_cnt[function_name] > 1 else ""
|
num = f"n{Scheduler.kernel_cnt[function_name]-1}" if Scheduler.kernel_cnt[function_name] > 1 else ""
|
||||||
name += colored(num, 'BLACK')
|
name += colored(num, 'BLACK')
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from tinygrad.runtime.support.compiler_cpu import LLVMCompiler
|
|||||||
from tinygrad.helpers import OSX, to_char_p_p
|
from tinygrad.helpers import OSX, to_char_p_p
|
||||||
|
|
||||||
def amdgpu_disassemble(lib:bytes):
|
def amdgpu_disassemble(lib:bytes):
|
||||||
asm = system(f"{'llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines()
|
asm = system(f"{'/opt/homebrew/opt/llvm/bin/llvm-objdump' if OSX else '/opt/rocm/llvm/bin/llvm-objdump'} -d -", input=lib).splitlines()
|
||||||
while asm and ("s_nop 0" in asm[-1] or "s_code_end" in asm[-1]): asm.pop()
|
while asm and ("s_nop 0" in asm[-1] or "s_code_end" in asm[-1]): asm.pop()
|
||||||
print("\n".join(asm))
|
print("\n".join(asm))
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user