set .amdhsa_kernarg_size in asm test (#13826)

2026-01-08 22:48:25 -05:00 · 2025-12-25 13:08:14 +09:00
parent da1cb6a9ec
commit a1c1684b91
2 changed files with 6 additions and 4 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -642,7 +642,7 @@ jobs:
        if: matrix.backend=='amdllvm'
        run: python test/device/test_amd_llvm.py
      - name: Run pytest (amd)
-        run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/test_jit.py test/test_graph.py test/test_multitensor.py test/device/test_hcq.py --durations=20
+        run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/test_jit.py test/test_graph.py test/test_multitensor.py test/device/test_hcq.py test/testextra/test_cfg_viz.py --durations=20
      - name: Run pytest (amd)
        run: python -m pytest test/external/external_test_am.py --durations=20
      - name: Run TRANSCENDENTAL math
--- a/test/testextra/test_cfg_viz.py
+++ b/test/testextra/test_cfg_viz.py
@@ -4,7 +4,7 @@ import textwrap
 from tinygrad import Device, Tensor
 from tinygrad.uop.ops import UOp, Ops, track_rewrites
 from tinygrad.renderer import ProgramSpec
-from tinygrad.helpers import TracingKey
+from tinygrad.helpers import TracingKey, getenv
 from tinygrad.engine.realize import ExecItem, CompiledRunner

 # TODO: use the RDNA3 renderer when it's in master
@@ -18,6 +18,7 @@ fn_name:
 .rodata
 .p2align 6
 .amdhsa_kernel fn_name
+  .amdhsa_kernarg_size 8
  .amdhsa_user_sgpr_kernarg_segment_ptr 1
  .amdhsa_next_free_vgpr .amdgcn.next_free_vgpr
  .amdhsa_next_free_sgpr .amdgcn.next_free_sgpr
@@ -53,12 +54,13 @@ amdhsa.kernels:

@track_rewrites(name=lambda *args,ret,**kwargs: TracingKey(ret.name, ret=ret))
 def run_asm(name:str, src:str) -> ProgramSpec:
-  prg = ProgramSpec(name, template.replace("fn_name", name).replace("INSTRUCTION", textwrap.dedent(src)), Device.DEFAULT, UOp(Ops.SINK))
+  prg = ProgramSpec(name, template.replace("fn_name", name).replace("INSTRUCTION", textwrap.dedent(src)), Device.DEFAULT, UOp(Ops.SINK),
+                    global_size=[1, 1, 1], local_size=[1, 1, 1], globals=[0])
  ei = ExecItem(UOp(Ops.SINK), [Tensor.empty(1).uop.buffer.ensure_allocated()], prg=CompiledRunner(prg))
  ei.run()
  return prg

-@unittest.skipUnless(Device.DEFAULT == "AMD", "only on AMD")
+@unittest.skipUnless(Device.DEFAULT == "AMD" and not getenv("AMD_LLVM"), "only on AMD with comgr")
 class TestCfg(unittest.TestCase):
  def setUp(self):
    arch = Device["AMD"].arch