viz: add trace data for inflight buffers (#12728)

* viz: add trace data for inflight buffers * add test_inflight_buf * temp stores the keys * update tests / use Tensor.ones
2026-01-09 15:08:02 -05:00 · 2025-10-16 19:15:03 +08:00
parent af4479c169
commit 533f18b22c
3 changed files with 23 additions and 13 deletions
--- a/test/unit/test_viz.py
+++ b/test/unit/test_viz.py
@@ -442,7 +442,7 @@ class TestVizMemoryLayout(BaseTestViz):
    profile_ret = load_profile(Buffer.profile_events)
    ret = profile_ret["layout"][f"{a.device} Memory"]
    self.assertEqual(ret["peak"], 2)
-    self.assertEqual(len(ret["events"]), 2)
+    self.assertEqual(len(ret["events"]), 4)

  def test_del_once(self):
    a = _alloc(1)
@@ -451,7 +451,7 @@ class TestVizMemoryLayout(BaseTestViz):
    profile_ret = load_profile(Buffer.profile_events)
    ret = profile_ret["layout"][f"{b.device} Memory"]
    self.assertEqual(ret["peak"], 1)
-    self.assertEqual(len(ret["events"]), 3)
+    self.assertEqual(len(ret["events"]), 4)

  def test_alloc_free(self):
    a = _alloc(1)
@@ -461,7 +461,7 @@ class TestVizMemoryLayout(BaseTestViz):
    profile_ret = load_profile(Buffer.profile_events)
    ret = profile_ret["layout"][f"{c.device} Memory"]
    self.assertEqual(ret["peak"], 2)
-    self.assertEqual(len(ret["events"]), 4)
+    self.assertEqual(len(ret["events"]), 6)

  def test_free_last(self):
    bufs = []
@@ -480,15 +480,24 @@ class TestVizMemoryLayout(BaseTestViz):
    self.assertEqual(len(profile["markers"]), 6)

  def test_producer_simple(self):
-    a = Tensor.empty(10, device="NULL")
-    Tensor.realize(a.add(1), a.add(2))
-    b = Tensor.empty(10, device="NULL")
-    Tensor.realize(b.add(1))
+    a = Tensor.ones(10, device="NULL")
+    Tensor.realize(a.add(1).contiguous())
+    b = Tensor.ones(10, device="NULL")
+    Tensor.realize(b.add(1).contiguous())
    profile = load_profile(cpu_events+Buffer.profile_events)
    buffers = profile["layout"]["NULL Memory"]["events"]
    programs = profile["layout"]["NULL"]["events"]
    user_cnt = [len(b["arg"]["users"]) for b in buffers if b["arg"].get("users")]
    self.assertEqual(len(user_cnt), len(programs))

+  def test_inflight_buf(self):
+    a = Tensor.empty(1, device="NULL")
+    n = 4
+    for i in range(n): (a+i).realize()
+    profile = load_profile(cpu_events+Buffer.profile_events)
+    buffers = profile["layout"]["NULL Memory"]["events"]
+    user_cnt = [len(b["arg"]["users"]) for b in buffers if b["arg"].get("users")]
+    self.assertEqual(max(user_cnt), n)
+
 if __name__ == "__main__":
  unittest.main()
--- a/tinygrad/viz/js/index.js
+++ b/tinygrad/viz/js/index.js
@@ -272,10 +272,6 @@ async function renderProfiler() {
          }
        }
      }
-      for (const [_, v] of temp) {
-        v.x.push(x);
-        v.y.push(v.y.at(-1));
-      }
      timestamps.push(dur);
      const height = heightScale(peak);
      const yscale = d3.scaleLinear().domain([0, peak]).range([height, 0]);
--- a/tinygrad/viz/serve.py
+++ b/tinygrad/viz/serve.py
@@ -152,12 +152,17 @@ def timeline_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:
    events.append(struct.pack("<IIIfI", enum_str(name, scache), option(ref), st-start_ts, dur, enum_str(info or "", scache)))
  return struct.pack("<BI", 0, len(events))+b"".join(events) if events else None

+def encode_mem_free(key:int, ts:int, execs:list[ProfilePointEvent], scache:dict) -> bytes:
+  kernel_names = [enum_str(ei.key, scache) for ei in execs]
+  return struct.pack(f"<BIII{len(kernel_names)}I", 0, ts, key, len(kernel_names), *kernel_names)
+
 def mem_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int, end_ts:int, peaks:list[int], dtype_size:dict[str, int],
               scache:dict[str, int]) -> bytes|None:
  peak, mem = 0, 0
  temp:dict[int, int] = {}
  events:list[bytes] = []
  buf_ei:dict[int, list[ProfilePointEvent]] = {}
+
  for st,_,_,e in dev_events:
    if not isinstance(e, ProfilePointEvent): continue
    if e.name == "alloc":
@@ -170,9 +175,9 @@ def mem_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int,
    if e.name == "exec" and e.arg["bufs"]:
      for b in e.arg["bufs"]: buf_ei.setdefault(b, []).append(e)
    if e.name == "free":
-      kernel_names = [enum_str(ei.key, scache) for ei in buf_ei.pop(e.key, [])]
-      events.append(struct.pack(f"<BIII{len(kernel_names)}I", 0, int(e.ts) - start_ts, e.key, len(kernel_names), *kernel_names))
+      events.append(encode_mem_free(e.key, int(e.ts) - start_ts, buf_ei.pop(e.key, []), scache))
      mem -= temp.pop(e.key)
+  for t in temp: events.append(encode_mem_free(t, end_ts-start_ts, buf_ei.pop(t, []), scache))
  peaks.append(peak)
  return struct.pack("<BIQ", 1, len(events), peak)+b"".join(events) if events else None