viz: improve disasm of s_code_end (#15258)

* viz: improve amd disasm of s_code_end * better tests * order was good
2026-04-29 03:00:14 -04:00 · 2026-03-13 20:31:14 +02:00
parent a191ac0566
commit 6209ddfc90
2 changed files with 11 additions and 7 deletions
--- a/test/testextra/test_cfg_viz.py
+++ b/test/testextra/test_cfg_viz.py
@@ -58,7 +58,8 @@ class TestCfg(unittest.TestCase):
    k.emit(s_endpgm())
    k.emit(s_code_end())
    ei = run_asm("diamond", k)
-    cfg = amdgpu_cfg(ei.prg.p.lib, self.arch)["data"]
+    ret = amdgpu_cfg(ei.prg.p.lib, self.arch)
+    cfg = ret["data"]
    self.assertEqual(len(cfg["blocks"]), 5)
    edge_count = sum(len(v) for v in cfg["paths"].values())
    self.assertEqual(edge_count, 5)
@@ -69,8 +70,11 @@ class TestCfg(unittest.TestCase):
    self.assertEqual(len(references["r0"]), 2)
    insts = [cfg["pc_tokens"][pc][0]["st"] for pc in references["r0"]]
    self.assertEqual(insts, ['s_mov_b32', 's_cmp_eq_u64'])
-    end_block_content = "\n".join(" ".join(t["st"] for t in cfg["pc_tokens"][pc]) for pc in list(cfg["blocks"].values())[-1])
-    self.assertEqual(end_block_content, "s_endpgm\ns_code_end (217x)")
+    end_block = [" ".join(t["st"] for t in cfg["pc_tokens"][pc]) for pc in list(cfg["blocks"].values())[-1]]
+    code_line = ret["src"].splitlines()[-1].strip()
+    self.assertEqual(len(end_block), 2)
+    for st in [end_block[-1], code_line]:
+      assert st.startswith("s_code_end") and st.endswith("x)"), st

  def test_loop(self):
    k = Kernel(arch=Device["AMD"].arch)
--- a/tinygrad/viz/serve.py
+++ b/tinygrad/viz/serve.py
@@ -525,11 +525,7 @@ def amdgpu_cfg(lib:bytes, target:str) -> dict:
  curr:int|None = None
  blocks:dict[int, list[int]] = {}
  paths:dict[int, dict[int, int]] = {}
-  lines:list[str] = []
-  disasm = {pc:str(inst) for pc,inst in pc_table.items()}
-  asm_width = max(len(asm) for asm in disasm.values())
  for pc, inst in pc_table.items():
-    lines.append(f"  {disasm[pc]:<{asm_width}}  // {pc:012X}")
    if pc in leaders:
      paths[curr:=pc] = {}
      blocks[pc] = []
@@ -550,6 +546,9 @@ def amdgpu_cfg(lib:bytes, target:str) -> dict:
      elif name in {"op","opx","opy"}: tokens.append({"st":(op_name:=val.name.lower()), "keys":[op_name], "kind":0})
      elif name != "encoding" and val != field.default: tokens.append({"st":(s:=repr(val)), "keys":[s], "kind":1})
  # show a smaller view for repeated instructions in the graph
+  lines:list[str] = []
+  disasm = {pc:str(inst) for pc,inst in pc_table.items()}
+  asm_width = max(len(asm) for asm in disasm.values())
  for pcs in blocks.values():
    new_pcs:list[int] = []
    i, n = 0, len(pcs)
@@ -560,6 +559,7 @@ def amdgpu_cfg(lib:bytes, target:str) -> dict:
      if j-i>1:
        pc_tokens[pcs[i]].append({"st":f"({j-i}x)", "keys":[], "kind":0})
        for k in range(i+1, j): del pc_tokens[pcs[k]]
+      lines.append(f"  {disasm[pcs[i]]:<{asm_width}}  # {pcs[i]:012X}"+(f"...{pcs[j-1]:012X} ({j-i}x)" if j-i>1 else ""))
      i = j
    pcs[:] = new_pcs
  from tinygrad.runtime.autogen import amdgpu_kd