3 tests fail

2026-01-08 06:34:03 -05:00 · 2025-12-29 22:12:45 +00:00
parent d0e470c308
commit 170e8825c7
4 changed files with 23 additions and 18 deletions
--- a/extra/assembly/amd/asm.py
+++ b/extra/assembly/amd/asm.py
@@ -97,7 +97,12 @@ def disasm(inst: Inst) -> str:
    else:
      op_name = getattr(autogen, f"{cls_name}Op")(op_val).name.lower() if hasattr(autogen, f"{cls_name}Op") else f"op_{op_val}"
  except (ValueError, KeyError): op_name = f"op_{op_val}"
-  def fmt_src(v): return f"0x{inst._literal:x}" if v == 255 and getattr(inst, '_literal', None) else decode_src(v)
+  def fmt_src(v):
+    lit = getattr(inst, '_literal', None)
+    if v == 255 and lit is not None:
+      # Format negative literals as unsigned 32-bit hex (AMD assembler doesn't accept 0x-xxx)
+      return f"0x{lit & 0xffffffff:x}" if lit < 0 else f"0x{lit:x}"
+    return decode_src(v)

  # VOP1
  if cls_name == 'VOP1':
--- a/tinygrad/renderer/rdna_new.py
+++ b/tinygrad/renderer/rdna_new.py
@@ -10,9 +10,9 @@ else:
 from tinygrad.renderer.rdna_uops import rdna_matcher
 from tinygrad.renderer.cstyle import create_non_native_float_pats, cast_float_to_bf16
 from tinygrad.codegen.opt import tc
-from extra.assembly.rdna3.lib import Inst
-from extra.assembly.rdna3.asm import waitcnt
-from extra.assembly.rdna3.autogen import (
+from extra.assembly.amd.dsl import Inst
+from extra.assembly.amd.asm import waitcnt
+from extra.assembly.amd.autogen.rdna3 import (
  v, s, VGPR, SGPR, VCC_LO, EXEC_LO, NULL,
  # VOP1
  v_mov_b32_e32,
@@ -23,13 +23,13 @@ from extra.assembly.rdna3.autogen import (
  v_exp_f32_e32, v_log_f32_e32, v_trunc_f32_e32, v_sin_f32_e32, v_fract_f32_e32,
  v_cvt_f64_f32_e32, v_cvt_f32_f64_e32, v_cvt_f64_i32_e32, v_cvt_f64_u32_e32,
  v_cvt_i32_f64_e32, v_cvt_u32_f64_e32, v_trunc_f64_e32, v_floor_f64_e32,
-  # VOP3 (e64) versions for high registers
-  v_cvt_f16_f32_e64 as _v_cvt_f16_f32_e64,
-  v_cvt_f32_f16_e64 as _v_cvt_f32_f16_e64,
-  v_cvt_f32_i32_e64 as _v_cvt_f32_i32_e64,
-  v_cvt_i32_f32_e64 as _v_cvt_i32_f32_e64,
-  v_cvt_f32_u32_e64 as _v_cvt_f32_u32_e64,
-  v_cvt_u32_f32_e64 as _v_cvt_u32_f32_e64,
+  # VOP3 versions for high registers (named without suffix in new autogen)
+  v_cvt_f16_f32 as _v_cvt_f16_f32_e64,
+  v_cvt_f32_f16 as _v_cvt_f32_f16_e64,
+  v_cvt_f32_i32 as _v_cvt_f32_i32_e64,
+  v_cvt_i32_f32 as _v_cvt_i32_f32_e64,
+  v_cvt_f32_u32 as _v_cvt_f32_u32_e64,
+  v_cvt_u32_f32 as _v_cvt_u32_f32_e64,
  # VOP2
  v_add_f32_e32, v_sub_f32_e32, v_mul_f32_e32, v_and_b32_e32, v_or_b32_e32, v_xor_b32_e32,
  v_add_nc_u32_e32, v_sub_nc_u32_e32, v_lshlrev_b32_e32, v_lshrrev_b32_e32, v_ashrrev_i32_e32,
@@ -37,11 +37,11 @@ from extra.assembly.rdna3.autogen import (
  # VOP3
  v_fma_f32, v_fma_f64, v_mad_u64_u32, v_mad_i64_i32, v_lshlrev_b64, v_lshrrev_b64, v_ashrrev_i64,
  v_mul_lo_u32, v_mul_hi_u32, v_bfe_u32, v_bfe_i32,
-  v_add_co_u32, v_add_co_ci_u32_e32, v_cndmask_b32_e64, v_add_f64, v_mul_f64, v_sub_co_u32, v_sub_co_ci_u32_e32,
-  v_cmp_lt_f32_e32, v_cmp_eq_f32_e32, v_cmp_neq_f32_e32, v_cmp_gt_f32_e32,
-  v_cmp_lt_f64_e32, v_cmp_eq_f64_e32, v_cmp_neq_f64_e32, v_cmp_gt_f64_e32,
-  v_cmp_lt_i32_e32, v_cmp_eq_i32_e32, v_cmp_ne_i32_e32, v_cmp_gt_i32_e32,
-  v_cmp_lt_u32_e32, v_cmp_eq_u32_e32, v_cmp_ne_u32_e32, v_cmp_gt_u32_e32,
+  v_add_co_u32, v_add_co_ci_u32_e32, v_cndmask_b32 as v_cndmask_b32_e64, v_add_f64, v_mul_f64, v_sub_co_u32, v_sub_co_ci_u32_e32,
+  v_cmp_lt_f32 as v_cmp_lt_f32_e32, v_cmp_eq_f32 as v_cmp_eq_f32_e32, v_cmp_neq_f32 as v_cmp_neq_f32_e32, v_cmp_gt_f32 as v_cmp_gt_f32_e32,
+  v_cmp_lt_f64 as v_cmp_lt_f64_e32, v_cmp_eq_f64 as v_cmp_eq_f64_e32, v_cmp_neq_f64 as v_cmp_neq_f64_e32, v_cmp_gt_f64 as v_cmp_gt_f64_e32,
+  v_cmp_lt_i32 as v_cmp_lt_i32_e32, v_cmp_eq_i32 as v_cmp_eq_i32_e32, v_cmp_ne_i32 as v_cmp_ne_i32_e32, v_cmp_gt_i32 as v_cmp_gt_i32_e32,
+  v_cmp_lt_u32 as v_cmp_lt_u32_e32, v_cmp_eq_u32 as v_cmp_eq_u32_e32, v_cmp_ne_u32 as v_cmp_ne_u32_e32, v_cmp_gt_u32 as v_cmp_gt_u32_e32,
  # SOPP/SOP
  s_endpgm, s_waitcnt, s_barrier, s_sendmsg, s_mov_b32, s_and_saveexec_b32,
  # SMEM
--- a/tinygrad/renderer/rdna_regalloc.py
+++ b/tinygrad/renderer/rdna_regalloc.py
@@ -3,7 +3,7 @@ from collections import defaultdict
 from tinygrad.uop.ops import Ops, UOp
 from tinygrad.dtype import DType, PtrDType, AddrSpace, dtypes
 from tinygrad.helpers import getenv
-from extra.assembly.rdna3.autogen import VGPR, SGPR
+from extra.assembly.amd.dsl import VGPR, SGPR

 class RDNARegAlloc:
  """Register allocator for RDNA3 with liveness analysis and register reuse."""
--- a/tinygrad/renderer/rdna_regalloc_ilp.py
+++ b/tinygrad/renderer/rdna_regalloc_ilp.py
@@ -13,7 +13,7 @@ from ortools.sat.python import cp_model  # requires: pip install ortools
 from tinygrad.uop.ops import Ops, UOp
 from tinygrad.dtype import DType, PtrDType, AddrSpace, dtypes
 from tinygrad.helpers import getenv
-from extra.assembly.rdna3.autogen import VGPR, SGPR
+from extra.assembly.amd.dsl import VGPR, SGPR

 DEBUG_ILP = getenv("RDNA_ILP_DEBUG", 0)