mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
@@ -180,9 +180,9 @@ def enum_str(s, cache:dict[str, int]) -> int:
|
||||
|
||||
def option(s:int|None) -> int: return 0 if s is None else s+1
|
||||
|
||||
def rel_ts(ts:int|Decimal, start_ts:int) -> int:
|
||||
def rel_ts(ts:int|Decimal, start_ts:int, ctx:str="") -> int:
|
||||
val = int(ts) - start_ts
|
||||
if val < 0 or val > 0xFFFFFFFF: raise ValueError(f"timestamp out of range: ts={ts} start={start_ts} {val}")
|
||||
if val < 0 or val > 0xFFFFFFFF: raise ValueError(f"timestamp out of range: {ctx} diff={val} (ts={ts} start={start_ts})")
|
||||
return val
|
||||
|
||||
# Profiler API
|
||||
@@ -228,7 +228,8 @@ def timeline_layout(dev_events:list[tuple[int, int, float, DevEvent]], start_ts:
|
||||
membw = e.name.ret / (dur * 1e-6)
|
||||
fmt.append(f"{membw*1e-9:.0f} GB/s" if membw < 1e13 else f"{membw*1e-12:.0f} TB/s")
|
||||
elif e.name.tb: fmt.append("TB:"+json.dumps(e.name.tb))
|
||||
events.append(struct.pack("<IIIIfI", enum_str(name, scache), option(ref), option(key), rel_ts(st,start_ts), dur, enum_str("\n".join(fmt),scache)))
|
||||
events.append(struct.pack("<IIIIfI", enum_str(name, scache), option(ref), option(key), rel_ts(st,start_ts, f"'{name}' on {e.device}"),
|
||||
dur, enum_str("\n".join(fmt),scache)))
|
||||
return struct.pack("<BI", 0, len(events))+b"".join(events) if events else None
|
||||
|
||||
def encode_mem_free(key:int, ts:int, execs:list[ProfilePointEvent], scache:dict) -> bytes:
|
||||
@@ -242,7 +243,7 @@ def encode_mem_free(key:int, ts:int, execs:list[ProfilePointEvent], scache:dict)
|
||||
def graph_layout(k:str, dev_events:list[tuple[int, int, float, DevEvent]], start_ts:int, end_ts:int, peaks:list[int], dtype_size:dict[str, int],
|
||||
scache:dict[str, int]) -> tuple[str, bytes|None]:
|
||||
if k.startswith("LINE:"):
|
||||
xy = [(rel_ts(e.ts, start_ts), e.key) for st,_,_,e in dev_events if isinstance(e, ProfilePointEvent)]
|
||||
xy = [(rel_ts(e.ts, start_ts, f"line '{k}' on {e.device}"), e.key) for st,_,_,e in dev_events if isinstance(e, ProfilePointEvent)]
|
||||
peaks.append(peak:=max([y for _,y in xy]))
|
||||
return k.replace("LINE:", ""), struct.pack("<BIBQ", 1, len(xy), 1, peak)+b"".join(struct.pack("<IQ", x, y) for x,y in xy)
|
||||
peak, mem = 0, 0
|
||||
@@ -253,7 +254,7 @@ def graph_layout(k:str, dev_events:list[tuple[int, int, float, DevEvent]], start
|
||||
if not isinstance(e, ProfilePointEvent): continue
|
||||
if e.name == "alloc":
|
||||
safe_sz = min(1_000_000_000_000, e.arg["sz"])
|
||||
events.append(struct.pack("<BIIIQ", 1, rel_ts(e.ts, start_ts), e.key, enum_str(e.arg["dtype"].name, scache), safe_sz))
|
||||
events.append(struct.pack("<BIIIQ", 1, rel_ts(e.ts, start_ts, f"alloc on {e.device}"), e.key, enum_str(e.arg["dtype"].name, scache), safe_sz))
|
||||
dtype_size.setdefault(e.arg["dtype"].name, e.arg["dtype"].itemsize)
|
||||
temp[e.key] = nbytes = safe_sz*e.arg["dtype"].itemsize
|
||||
mem += nbytes
|
||||
@@ -261,9 +262,9 @@ def graph_layout(k:str, dev_events:list[tuple[int, int, float, DevEvent]], start
|
||||
if e.name == "exec" and e.arg["bufs"]:
|
||||
for b in e.arg["bufs"]: buf_ei.setdefault(b, []).append(e)
|
||||
if e.name == "free":
|
||||
events.append(encode_mem_free(e.key, rel_ts(e.ts, start_ts), buf_ei.pop(e.key, []), scache))
|
||||
events.append(encode_mem_free(e.key, rel_ts(e.ts, start_ts, f"free on {e.device}"), buf_ei.pop(e.key, []), scache))
|
||||
mem -= temp.pop(e.key)
|
||||
for t in temp: events.append(encode_mem_free(t, rel_ts(end_ts, start_ts), buf_ei.pop(t, []), scache))
|
||||
for t in temp: events.append(encode_mem_free(t, rel_ts(end_ts, start_ts, f"end_ts for {k}"), buf_ei.pop(t, []), scache))
|
||||
peaks.append(peak)
|
||||
return f"{k} Memory", struct.pack("<BIBQ", 1, len(events), 0, peak)+b"".join(events) if events else None
|
||||
|
||||
@@ -482,9 +483,10 @@ def get_profile(profile:list[ProfileEvent], sort_fn:Callable[[str], Any]=device_
|
||||
layout.update([graph_layout(k, v, start_ts, unwrap(end_ts), peaks, dtype_size, scache)])
|
||||
sorted_layout = sorted([k for k,v in layout.items() if v is not None], key=sort_fn)
|
||||
ret = [b"".join([struct.pack("<B", len(k)), k.encode(), unwrap(layout[k])]) for k in sorted_layout]
|
||||
index = json.dumps({"strings":list(scache), "dtypeSize":dtype_size, "markers":[{"ts":rel_ts(e.ts, start_ts), **e.arg} for e in markers],
|
||||
index = json.dumps({"strings":list(scache), "dtypeSize":dtype_size,
|
||||
"markers":[{"ts":rel_ts(e.ts, start_ts, f"marker '{e.arg.get('name','?')}'"), **e.arg} for e in markers],
|
||||
**ext_data}).encode()
|
||||
return struct.pack("<IQII", rel_ts(unwrap(end_ts), start_ts), max(peaks,default=0), len(index), len(ret))+index+b"".join(ret)
|
||||
return struct.pack("<IQII", rel_ts(unwrap(end_ts), start_ts, "end_ts"), max(peaks,default=0), len(index), len(ret))+index+b"".join(ret)
|
||||
|
||||
# ** PMA counters
|
||||
|
||||
|
||||
Reference in New Issue
Block a user