diff --git a/test/testextra/test_cfg_viz.py b/test/testextra/test_cfg_viz.py index c6306482e6..9911fb5e0a 100644 --- a/test/testextra/test_cfg_viz.py +++ b/test/testextra/test_cfg_viz.py @@ -69,6 +69,8 @@ class TestCfg(unittest.TestCase): self.assertEqual(len(references["r0"]), 2) insts = [cfg["pc_tokens"][pc][0]["st"] for pc in references["r0"]] self.assertEqual(insts, ['s_mov_b32', 's_cmp_eq_u64']) + end_block_content = "\n".join(" ".join(t["st"] for t in cfg["pc_tokens"][pc]) for pc in list(cfg["blocks"].values())[-1]) + self.assertEqual(end_block_content, "s_endpgm\ns_code_end (217x)") def test_loop(self): k = Kernel(arch=Device["AMD"].arch) diff --git a/tinygrad/viz/serve.py b/tinygrad/viz/serve.py index 5a3c4060d2..0e3cc801bb 100755 --- a/tinygrad/viz/serve.py +++ b/tinygrad/viz/serve.py @@ -533,6 +533,19 @@ def amdgpu_cfg(lib:bytes, target:str) -> dict: if isinstance(val:=getattr(inst, name), Reg): tokens.append({"st":val.fmt(), "keys":[f"r{val.offset+i}" for i in range(val.sz)], "kind":1}) elif name in {"op","opx","opy"}: tokens.append({"st":(op_name:=val.name.lower()), "keys":[op_name], "kind":0}) elif name != "encoding" and val != field.default: tokens.append({"st":(s:=repr(val)), "keys":[s], "kind":1}) + # show a smaller view for repeated instructions in the graph + for pcs in blocks.values(): + new_pcs:list[int] = [] + i, n = 0, len(pcs) + while i < n: + j = i+1 + while j1: + pc_tokens[pcs[i]].append({"st":f"({j-i}x)", "keys":[], "kind":0}) + for k in range(i+1, j): del pc_tokens[pcs[k]] + i = j + pcs[:] = new_pcs from tinygrad.runtime.autogen import amdgpu_kd kd = amdgpu_kd.llvm_amdhsa_kernel_descriptor_t.from_buffer_copy(bytearray(get_elf_section(lib, ".rodata").content)) vgpr_gran = kd.compute_pgm_rsrc1 & amdgpu_kd.COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT