viz: add trace data for inflight buffers (#12728)

* viz: add trace data for inflight buffers

* add test_inflight_buf

* temp stores the keys

* update tests / use Tensor.ones
This commit is contained in:
qazal
2025-10-16 19:15:03 +08:00
committed by GitHub
parent af4479c169
commit 533f18b22c
3 changed files with 23 additions and 13 deletions

View File

@@ -442,7 +442,7 @@ class TestVizMemoryLayout(BaseTestViz):
profile_ret = load_profile(Buffer.profile_events)
ret = profile_ret["layout"][f"{a.device} Memory"]
self.assertEqual(ret["peak"], 2)
self.assertEqual(len(ret["events"]), 2)
self.assertEqual(len(ret["events"]), 4)
def test_del_once(self):
a = _alloc(1)
@@ -451,7 +451,7 @@ class TestVizMemoryLayout(BaseTestViz):
profile_ret = load_profile(Buffer.profile_events)
ret = profile_ret["layout"][f"{b.device} Memory"]
self.assertEqual(ret["peak"], 1)
self.assertEqual(len(ret["events"]), 3)
self.assertEqual(len(ret["events"]), 4)
def test_alloc_free(self):
a = _alloc(1)
@@ -461,7 +461,7 @@ class TestVizMemoryLayout(BaseTestViz):
profile_ret = load_profile(Buffer.profile_events)
ret = profile_ret["layout"][f"{c.device} Memory"]
self.assertEqual(ret["peak"], 2)
self.assertEqual(len(ret["events"]), 4)
self.assertEqual(len(ret["events"]), 6)
def test_free_last(self):
bufs = []
@@ -480,15 +480,24 @@ class TestVizMemoryLayout(BaseTestViz):
self.assertEqual(len(profile["markers"]), 6)
def test_producer_simple(self):
a = Tensor.empty(10, device="NULL")
Tensor.realize(a.add(1), a.add(2))
b = Tensor.empty(10, device="NULL")
Tensor.realize(b.add(1))
a = Tensor.ones(10, device="NULL")
Tensor.realize(a.add(1).contiguous())
b = Tensor.ones(10, device="NULL")
Tensor.realize(b.add(1).contiguous())
profile = load_profile(cpu_events+Buffer.profile_events)
buffers = profile["layout"]["NULL Memory"]["events"]
programs = profile["layout"]["NULL"]["events"]
user_cnt = [len(b["arg"]["users"]) for b in buffers if b["arg"].get("users")]
self.assertEqual(len(user_cnt), len(programs))
def test_inflight_buf(self):
a = Tensor.empty(1, device="NULL")
n = 4
for i in range(n): (a+i).realize()
profile = load_profile(cpu_events+Buffer.profile_events)
buffers = profile["layout"]["NULL Memory"]["events"]
user_cnt = [len(b["arg"]["users"]) for b in buffers if b["arg"].get("users")]
self.assertEqual(max(user_cnt), n)
if __name__ == "__main__":
unittest.main()

View File

@@ -272,10 +272,6 @@ async function renderProfiler() {
}
}
}
for (const [_, v] of temp) {
v.x.push(x);
v.y.push(v.y.at(-1));
}
timestamps.push(dur);
const height = heightScale(peak);
const yscale = d3.scaleLinear().domain([0, peak]).range([height, 0]);

View File

@@ -152,12 +152,17 @@ def timeline_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:
events.append(struct.pack("<IIIfI", enum_str(name, scache), option(ref), st-start_ts, dur, enum_str(info or "", scache)))
return struct.pack("<BI", 0, len(events))+b"".join(events) if events else None
def encode_mem_free(key:int, ts:int, execs:list[ProfilePointEvent], scache:dict) -> bytes:
kernel_names = [enum_str(ei.key, scache) for ei in execs]
return struct.pack(f"<BIII{len(kernel_names)}I", 0, ts, key, len(kernel_names), *kernel_names)
def mem_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int, end_ts:int, peaks:list[int], dtype_size:dict[str, int],
scache:dict[str, int]) -> bytes|None:
peak, mem = 0, 0
temp:dict[int, int] = {}
events:list[bytes] = []
buf_ei:dict[int, list[ProfilePointEvent]] = {}
for st,_,_,e in dev_events:
if not isinstance(e, ProfilePointEvent): continue
if e.name == "alloc":
@@ -170,9 +175,9 @@ def mem_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int,
if e.name == "exec" and e.arg["bufs"]:
for b in e.arg["bufs"]: buf_ei.setdefault(b, []).append(e)
if e.name == "free":
kernel_names = [enum_str(ei.key, scache) for ei in buf_ei.pop(e.key, [])]
events.append(struct.pack(f"<BIII{len(kernel_names)}I", 0, int(e.ts) - start_ts, e.key, len(kernel_names), *kernel_names))
events.append(encode_mem_free(e.key, int(e.ts) - start_ts, buf_ei.pop(e.key, []), scache))
mem -= temp.pop(e.key)
for t in temp: events.append(encode_mem_free(t, end_ts-start_ts, buf_ei.pop(t, []), scache))
peaks.append(peak)
return struct.pack("<BIQ", 1, len(events), peak)+b"".join(events) if events else None