viz: cli.py cleanups, do not require PYTHONPATH (#15085)

* cleanup the print

* sys.exit

* equal check

* cleanup unpacker

* cli doesn't need PYTHONPATH

* no semicolons

* %s/PYTHONPATH=. //g
This commit is contained in:
qazal
2026-03-02 12:24:38 +02:00
committed by GitHub
parent 5ff278446c
commit f7aeff6061
4 changed files with 55 additions and 58 deletions

View File

@@ -3,4 +3,4 @@ export BENCHMARK=5
export EVAL_BS=0
export VIZ=${VIZ:--1}
examples/mlperf/training_submission_v6.0/tinycorp/benchmarks/llama8b/implementations/tinybox_8xMI350X/dev_run.sh
PYTHONPATH="." extra/viz/cli.py --profile --device "AMD" --top 20
extra/viz/cli.py --profile --device "AMD" --top 20

View File

@@ -1,17 +1,17 @@
A command line tool for exploring the VIZ trace.
After running with VIZ=-1, use `PYTHONPATH=. extra/viz/cli.py` to explore the saved trace files.
After running with VIZ=-1, use `extra/viz/cli.py` to explore the saved trace files.
## Inspect runtime profiling
Use `PYTHONPATH=. extra/viz/cli.py --profile` to list all traced devices.
Use `extra/viz/cli.py --profile` to list all traced devices.
List top slowest kernels on a device: `--profile --device "AMD"`
List samples of a kernel on a device: `--profile --device "AMD" --kernel E_3`
## Inspect codegen and PatternMatcher
Use `PYTHONPATH=. extra/viz/cli.py --rewrites` to list all traced kernels.
Use `extra/viz/cli.py --rewrites` to list all traced kernels.
List all codegen steps for a kernel: `--rewrites --kernel E_3`
Get source code: `--rewrites --kernel E_3 --select "View Source"`

View File

@@ -1,44 +1,73 @@
#!/usr/bin/env python3
import os
os.environ["VIZ"] = "0"
import argparse, pathlib
import argparse, pathlib, sys, struct, json
from typing import Iterator
from tinygrad.viz import serve as viz
from tinygrad.uop.ops import RewriteTrace
from tinygrad.helpers import temp, ansistrip, colored, time_to_str, ansilen
from test.null.test_viz import load_profile
# ** generic helpers
def optional_eq(val:dict, arg:str|None) -> bool: return arg is None or ansistrip(val["name"]) == arg
def print_data(data:dict) -> None:
if isinstance(data.get("value"), Iterator):
for m in data["value"]:
if m.get("uop"):
print("Input UOp:")
print(m["uop"])
if not m["diff"]: continue
print("Rewrites:")
fp = pathlib.Path(m["upat"][0][0])
print(f"{fp.parent.name}/{fp.name}:{m['upat'][0][1]}")
print(m["upat"][1])
for line in m["diff"]:
color = "red" if line.startswith("-") else "green" if line.startswith("+") else None
print(colored(line, color))
if m.get("uop"): print(f"Input UOp:\n{m['uop']}")
if m.get("diff"):
loc = pathlib.Path(m["upat"][0][0])
print(f"Rewrite at {loc.parent.name}/{loc.name}:{m['upat'][0][1]}\n{m['upat'][1]}")
for line in m["diff"]: print(colored(line, "red" if line.startswith("-") else "green" if line.startswith("+") else None))
if data.get("src") is not None: print(data["src"])
# ** Profiler trace decoder
# 0 means None, otherwise it's an enum value
def option(i:int) -> int|None: return None if i == 0 else i-1
def decode_profile(data:bytes) -> dict:
ret, off = data, 0
def u(fmt:str) -> tuple:
nonlocal off
vals = struct.unpack_from(fmt, ret, off)
off += struct.calcsize(fmt)
return vals
total_dur, global_peak, index_len, layout_len = u("<IQII")
strings, dtypes, markers = json.loads(ret[off:off+index_len]).values()
off += index_len
layout:dict[str, dict] = {}
for _ in range(layout_len):
klen = u("<B")[0]
k = ret[off:off+klen].decode()
off += klen
layout[k] = v = {"events":[]}
event_type, event_count = u("<BI")
if event_type == 0:
for _ in range(event_count):
name, ref, key, st, dur, fmt = u("<IIIIfI")
v["events"].append({"name":strings[name], "ref":option(ref), "key":option(key), "st":st, "dur":dur, "fmt":strings[fmt]})
else:
v["peak"] = u("<Q")[0]
for _ in range(event_count):
alloc, ts, key = u("<BII")
if alloc: v["events"].append({"event":"alloc", "ts":ts, "key":key, "arg": {"dtype":strings[u("<I")[0]], "sz":u("<Q")[0]}})
else: v["events"].append({"event":"free", "ts":ts, "key":key, "arg": {"users":[u("<IIIB") for _ in range(u("<I")[0])]}})
return {"dur":total_dur, "peak":global_peak, "layout":layout, "markers":markers}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
g_mode = parser.add_argument_group("mode")
g_mode.add_argument("--profile", action="store_true", help="View profile trace")
g_mode.add_argument("--rewrites", action="store_true", help="View rewrites trace")
g_common = parser.add_argument_group("common options")
g_common.add_argument("--kernel", type=str, default=None, metavar="NAME", help="Select a kernel by name (optional name, default: only list names)")
g_profile = parser.add_argument_group("profile options")
g_profile.add_argument("--device", type=str, default=None, metavar="NAME", help="Select a device (optional name, default: only list names)")
g_profile.add_argument("--top", type=int, default=10, metavar="N", help="Number of top kernels to show (-1 for all, default: 10)")
g_rewrites = parser.add_argument_group("rewrites options")
g_rewrites.add_argument("--select", type=str, default=None, metavar="NAME",
help="Select an item within the chosen kernel (optional name, default: only list names)")
g_common = parser.add_argument_group("common options")
g_common.add_argument("--kernel", type=str, default=None, metavar="NAME", help="Select a kernel by name (optional name, default: only list names)")
parser.add_argument("--profile-path", type=pathlib.Path, metavar="PATH", help="Path to profile (optional file, default: latest profile)",
default=pathlib.Path(temp("profile.pkl", append_user=True)))
parser.add_argument("--rewrites-path", type=pathlib.Path, metavar="PATH", help="Path to rewrites (optional file, default: latest rewrites)",
@@ -46,14 +75,14 @@ if __name__ == "__main__":
args = parser.parse_args()
if not args.profile and not args.rewrites:
parser.print_help()
exit(0)
sys.exit(0)
viz.trace = viz.load_pickle(args.rewrites_path, default=RewriteTrace([], [], {}))
viz.ctxs = viz.get_rewrites(viz.trace)
if args.profile:
from tabulate import tabulate
profile = load_profile(viz.load_pickle(args.profile_path, default=[]))
profile = decode_profile(viz.get_profile(viz.load_pickle(args.profile_path, default=[])))
agg, total, n = {}, 0, 0
if args.device is None: print("Select a device:")
for k,v in profile["layout"].items():
@@ -63,7 +92,7 @@ if __name__ == "__main__":
for e in v.get("events", []):
et = e["dur"]*1e-6
if args.kernel is not None:
if ansistrip(e["name"]) == args.kernel and n < 10:
if optional_eq(e, args.kernel) and n < 10:
ptm = colored(time_to_str(et, w=9), "yellow" if et > 0.01 else None) if et is not None else ""
name = e["name"]+(" " * (46 - ansilen(e["name"])))
print(f"{name} {ptm}/{(et or 0)*1e3:9.2f}ms "+e['fmt'].replace('\n', ' | ')+" ")
@@ -81,7 +110,7 @@ if __name__ == "__main__":
other_t = total-sum(t for _, (t, _) in sel)
table.append([f"Other ({len(other)} unique)", time_to_str(other_t, w=9), sum(c for _,(_,c) in other), f"{other_t/total*100.0:.2f}%"])
print(tabulate(table, headers=["name", "total", "count", "pct"], tablefmt="github"))
exit(0)
sys.exit(0)
for k in viz.ctxs:
if not optional_eq(k, args.kernel): continue

View File

@@ -1,4 +1,4 @@
import unittest, decimal, json, struct, sys
import unittest, decimal, sys
from dataclasses import dataclass
from typing import Generator
@@ -357,41 +357,9 @@ class TestVizIntegration(BaseTestViz):
from tinygrad.device import ProfileDeviceEvent, ProfileGraphEvent, ProfileGraphEntry
from tinygrad.viz.serve import get_profile
from extra.viz.cli import decode_profile
class TinyUnpacker:
def __init__(self, buf): self.buf, self.offset = buf, 0
def __call__(self, fmt:str) -> tuple:
ret = struct.unpack_from(fmt, self.buf, self.offset)
self.offset += struct.calcsize(fmt)
return ret
# 0 means None, otherwise it's an enum value
def option(i:int) -> int|None: return None if i == 0 else i-1
def load_profile(lst:list[ProfileEvent]) -> dict:
ret = get_profile(lst)
u = TinyUnpacker(ret)
total_dur, global_peak, index_len, layout_len = u("<IQII")
strings, dtypes, markers = json.loads(ret[u.offset:u.offset+index_len]).values()
u.offset += index_len
layout:dict[str, dict] = {}
for _ in range(layout_len):
klen = u("<B")[0]
k = ret[u.offset:u.offset+klen].decode()
u.offset += klen
layout[k] = v = {"events":[]}
event_type, event_count = u("<BI")
if event_type == 0:
for _ in range(event_count):
name, ref, key, st, dur, fmt = u("<IIIIfI")
v["events"].append({"name":strings[name], "ref":option(ref), "key":option(key), "st":st, "dur":dur, "fmt":strings[fmt]})
else:
v["peak"] = u("<Q")[0]
for _ in range(event_count):
alloc, ts, key = u("<BII")
if alloc: v["events"].append({"event":"alloc", "ts":ts, "key":key, "arg": {"dtype":strings[u("<I")[0]], "sz":u("<Q")[0]}})
else: v["events"].append({"event":"free", "ts":ts, "key":key, "arg": {"users":[u("<IIIB") for _ in range(u("<I")[0])]}})
return {"dur":total_dur, "peak":global_peak, "layout":layout, "markers":markers}
def load_profile(lst:list[ProfileEvent]) -> dict: return decode_profile(get_profile(lst))
class TestVizProfiler(BaseTestViz):
def test_transfer_uses_copy_device(self):