From 170e8825c7afadd3671907ad7b424d39f0fb2ea7 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Mon, 29 Dec 2025 22:12:45 +0000 Subject: [PATCH] 3 tests fail --- extra/assembly/amd/asm.py | 7 +++++- tinygrad/renderer/rdna_new.py | 30 +++++++++++++------------- tinygrad/renderer/rdna_regalloc.py | 2 +- tinygrad/renderer/rdna_regalloc_ilp.py | 2 +- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/extra/assembly/amd/asm.py b/extra/assembly/amd/asm.py index 8f56211007..6becfd037e 100644 --- a/extra/assembly/amd/asm.py +++ b/extra/assembly/amd/asm.py @@ -97,7 +97,12 @@ def disasm(inst: Inst) -> str: else: op_name = getattr(autogen, f"{cls_name}Op")(op_val).name.lower() if hasattr(autogen, f"{cls_name}Op") else f"op_{op_val}" except (ValueError, KeyError): op_name = f"op_{op_val}" - def fmt_src(v): return f"0x{inst._literal:x}" if v == 255 and getattr(inst, '_literal', None) else decode_src(v) + def fmt_src(v): + lit = getattr(inst, '_literal', None) + if v == 255 and lit is not None: + # Format negative literals as unsigned 32-bit hex (AMD assembler doesn't accept 0x-xxx) + return f"0x{lit & 0xffffffff:x}" if lit < 0 else f"0x{lit:x}" + return decode_src(v) # VOP1 if cls_name == 'VOP1': diff --git a/tinygrad/renderer/rdna_new.py b/tinygrad/renderer/rdna_new.py index 93a97e2201..ec5b45ffe7 100644 --- a/tinygrad/renderer/rdna_new.py +++ b/tinygrad/renderer/rdna_new.py @@ -10,9 +10,9 @@ else: from tinygrad.renderer.rdna_uops import rdna_matcher from tinygrad.renderer.cstyle import create_non_native_float_pats, cast_float_to_bf16 from tinygrad.codegen.opt import tc -from extra.assembly.rdna3.lib import Inst -from extra.assembly.rdna3.asm import waitcnt -from extra.assembly.rdna3.autogen import ( +from extra.assembly.amd.dsl import Inst +from extra.assembly.amd.asm import waitcnt +from extra.assembly.amd.autogen.rdna3 import ( v, s, VGPR, SGPR, VCC_LO, EXEC_LO, NULL, # VOP1 v_mov_b32_e32, @@ -23,13 +23,13 @@ from extra.assembly.rdna3.autogen import ( v_exp_f32_e32, v_log_f32_e32, v_trunc_f32_e32, v_sin_f32_e32, v_fract_f32_e32, v_cvt_f64_f32_e32, v_cvt_f32_f64_e32, v_cvt_f64_i32_e32, v_cvt_f64_u32_e32, v_cvt_i32_f64_e32, v_cvt_u32_f64_e32, v_trunc_f64_e32, v_floor_f64_e32, - # VOP3 (e64) versions for high registers - v_cvt_f16_f32_e64 as _v_cvt_f16_f32_e64, - v_cvt_f32_f16_e64 as _v_cvt_f32_f16_e64, - v_cvt_f32_i32_e64 as _v_cvt_f32_i32_e64, - v_cvt_i32_f32_e64 as _v_cvt_i32_f32_e64, - v_cvt_f32_u32_e64 as _v_cvt_f32_u32_e64, - v_cvt_u32_f32_e64 as _v_cvt_u32_f32_e64, + # VOP3 versions for high registers (named without suffix in new autogen) + v_cvt_f16_f32 as _v_cvt_f16_f32_e64, + v_cvt_f32_f16 as _v_cvt_f32_f16_e64, + v_cvt_f32_i32 as _v_cvt_f32_i32_e64, + v_cvt_i32_f32 as _v_cvt_i32_f32_e64, + v_cvt_f32_u32 as _v_cvt_f32_u32_e64, + v_cvt_u32_f32 as _v_cvt_u32_f32_e64, # VOP2 v_add_f32_e32, v_sub_f32_e32, v_mul_f32_e32, v_and_b32_e32, v_or_b32_e32, v_xor_b32_e32, v_add_nc_u32_e32, v_sub_nc_u32_e32, v_lshlrev_b32_e32, v_lshrrev_b32_e32, v_ashrrev_i32_e32, @@ -37,11 +37,11 @@ from extra.assembly.rdna3.autogen import ( # VOP3 v_fma_f32, v_fma_f64, v_mad_u64_u32, v_mad_i64_i32, v_lshlrev_b64, v_lshrrev_b64, v_ashrrev_i64, v_mul_lo_u32, v_mul_hi_u32, v_bfe_u32, v_bfe_i32, - v_add_co_u32, v_add_co_ci_u32_e32, v_cndmask_b32_e64, v_add_f64, v_mul_f64, v_sub_co_u32, v_sub_co_ci_u32_e32, - v_cmp_lt_f32_e32, v_cmp_eq_f32_e32, v_cmp_neq_f32_e32, v_cmp_gt_f32_e32, - v_cmp_lt_f64_e32, v_cmp_eq_f64_e32, v_cmp_neq_f64_e32, v_cmp_gt_f64_e32, - v_cmp_lt_i32_e32, v_cmp_eq_i32_e32, v_cmp_ne_i32_e32, v_cmp_gt_i32_e32, - v_cmp_lt_u32_e32, v_cmp_eq_u32_e32, v_cmp_ne_u32_e32, v_cmp_gt_u32_e32, + v_add_co_u32, v_add_co_ci_u32_e32, v_cndmask_b32 as v_cndmask_b32_e64, v_add_f64, v_mul_f64, v_sub_co_u32, v_sub_co_ci_u32_e32, + v_cmp_lt_f32 as v_cmp_lt_f32_e32, v_cmp_eq_f32 as v_cmp_eq_f32_e32, v_cmp_neq_f32 as v_cmp_neq_f32_e32, v_cmp_gt_f32 as v_cmp_gt_f32_e32, + v_cmp_lt_f64 as v_cmp_lt_f64_e32, v_cmp_eq_f64 as v_cmp_eq_f64_e32, v_cmp_neq_f64 as v_cmp_neq_f64_e32, v_cmp_gt_f64 as v_cmp_gt_f64_e32, + v_cmp_lt_i32 as v_cmp_lt_i32_e32, v_cmp_eq_i32 as v_cmp_eq_i32_e32, v_cmp_ne_i32 as v_cmp_ne_i32_e32, v_cmp_gt_i32 as v_cmp_gt_i32_e32, + v_cmp_lt_u32 as v_cmp_lt_u32_e32, v_cmp_eq_u32 as v_cmp_eq_u32_e32, v_cmp_ne_u32 as v_cmp_ne_u32_e32, v_cmp_gt_u32 as v_cmp_gt_u32_e32, # SOPP/SOP s_endpgm, s_waitcnt, s_barrier, s_sendmsg, s_mov_b32, s_and_saveexec_b32, # SMEM diff --git a/tinygrad/renderer/rdna_regalloc.py b/tinygrad/renderer/rdna_regalloc.py index 9279b441e6..a15d7d33db 100644 --- a/tinygrad/renderer/rdna_regalloc.py +++ b/tinygrad/renderer/rdna_regalloc.py @@ -3,7 +3,7 @@ from collections import defaultdict from tinygrad.uop.ops import Ops, UOp from tinygrad.dtype import DType, PtrDType, AddrSpace, dtypes from tinygrad.helpers import getenv -from extra.assembly.rdna3.autogen import VGPR, SGPR +from extra.assembly.amd.dsl import VGPR, SGPR class RDNARegAlloc: """Register allocator for RDNA3 with liveness analysis and register reuse.""" diff --git a/tinygrad/renderer/rdna_regalloc_ilp.py b/tinygrad/renderer/rdna_regalloc_ilp.py index 7c15c64342..e0ec616a20 100644 --- a/tinygrad/renderer/rdna_regalloc_ilp.py +++ b/tinygrad/renderer/rdna_regalloc_ilp.py @@ -13,7 +13,7 @@ from ortools.sat.python import cp_model # requires: pip install ortools from tinygrad.uop.ops import Ops, UOp from tinygrad.dtype import DType, PtrDType, AddrSpace, dtypes from tinygrad.helpers import getenv -from extra.assembly.rdna3.autogen import VGPR, SGPR +from extra.assembly.amd.dsl import VGPR, SGPR DEBUG_ILP = getenv("RDNA_ILP_DEBUG", 0)