From a80fb4e64147ff0fd476e89f186f045c44b62e4b Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Fri, 6 Feb 2026 09:08:09 -0500 Subject: [PATCH] viz: better ordering of device engines in profiler (#14590) --- test/null/test_viz.py | 4 ++-- tinygrad/viz/serve.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/test/null/test_viz.py b/test/null/test_viz.py index 9b41dd1577..1597045eeb 100644 --- a/test/null/test_viz.py +++ b/test/null/test_viz.py @@ -463,11 +463,11 @@ class TestVizProfiler(BaseTestViz): def test_layout_order(self): def fn(): return - for dname in ["TINY", "USER", "TEST:1 N1", "TEST:2 N1", "TEST:1 N2"]: + for dname in ["TINY", "USER", "TEST:1 N1", "TEST:2 N1", "TEST:1 N2", "TEST:1:ENGINE:0", "TEST:1"]: with cpu_profile("fn", dname): fn() layout = list(load_profile(cpu_events)["layout"]) self.assertListEqual(layout[:2], ["USER","TINY"]) - self.assertListEqual(layout[2:], ["TEST:1 N1","TEST:1 N2", "TEST:2 N1"]) + self.assertListEqual(layout[2:], ["TEST:1", "TEST:1:ENGINE:0", "TEST:1 N1","TEST:1 N2", "TEST:2 N1"]) def _alloc(b:int): a = Tensor.empty(b, device="NULL", dtype=dtypes.char) diff --git a/tinygrad/viz/serve.py b/tinygrad/viz/serve.py index 90893a2bdf..9eb5e18698 100755 --- a/tinygrad/viz/serve.py +++ b/tinygrad/viz/serve.py @@ -375,9 +375,12 @@ def unpack_sqtt(key:tuple[str, int], data:list, p:ProfileProgramEvent) -> tuple[ def device_sort_fn(k:str) -> tuple[int, str, int]: order = {"GC": 0, "USER": 1, "TINY": 2, "DISK": 999} - dname = k.split()[0] + dname, *rest = k.split() dev_rank = next((v for k,v in order.items() if dname.startswith(k)), len(order)) - return (dev_rank, dname, len(k)) + if len(parts:=dname.split(":")) < 2 or not parts[1].isdigit(): parts.insert(1, "0") + eng_rank = 2 if rest else 1 if len(parts) > 2 else 0 + # 3 levels of hierarchy: device class, index in multi device, engine within device + return (dev_rank, parts[1], eng_rank) def get_profile(profile:list[ProfileEvent], sort_fn:Callable[[str], Any]=device_sort_fn) -> bytes|None: # start by getting the time diffs