From 433248c998bd0254bad17b44db3162c29ac794b0 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Tue, 30 Dec 2025 18:05:09 +0000 Subject: [PATCH] assembly/amd: only reg emu --- extra/assembly/amd/autogen/rdna3/gen_pcode.py | 3953 ++++++++--------- extra/assembly/amd/emu.py | 162 +- extra/assembly/amd/pcode.py | 47 +- extra/assembly/amd/test/test_pcode.py | 14 +- 4 files changed, 2092 insertions(+), 2084 deletions(-) diff --git a/extra/assembly/amd/autogen/rdna3/gen_pcode.py b/extra/assembly/amd/autogen/rdna3/gen_pcode.py index dcc4999f37..159a0de591 100644 --- a/extra/assembly/amd/autogen/rdna3/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna3/gen_pcode.py @@ -5,37 +5,37 @@ from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(S0, D0): +def _SOP1Op_S_MOV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.b32 = S0.b32 D0.b32 = S0.b32 -def _SOP1Op_S_MOV_B64(S0, D0): +def _SOP1Op_S_MOV_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.b64 = S0.b64 D0.b64 = S0.b64 -def _SOP1Op_S_CMOV_B32(S0, D0, SCC): +def _SOP1Op_S_CMOV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if SCC then # D0.b32 = S0.b32 # endif if SCC: D0.b32 = S0.b32 -def _SOP1Op_S_CMOV_B64(S0, D0, SCC): +def _SOP1Op_S_CMOV_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if SCC then # D0.b64 = S0.b64 # endif if SCC: D0.b64 = S0.b64 -def _SOP1Op_S_BREV_B32(S0, D0): +def _SOP1Op_S_BREV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32[31 : 0] = S0.u32[0 : 31] D0.u32[31 : 0] = S0.u32[0 : 31] -def _SOP1Op_S_BREV_B64(S0, D0): +def _SOP1Op_S_BREV_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[63 : 0] = S0.u64[0 : 63] D0.u64[63 : 0] = S0.u64[0 : 63] -def _SOP1Op_S_CTZ_I32_B32(S0, D0, tmp): +def _SOP1Op_S_CTZ_I32_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -51,7 +51,7 @@ def _SOP1Op_S_CTZ_I32_B32(S0, D0, tmp): tmp._val = int(i) D0.i32 = tmp -def _SOP1Op_S_CTZ_I32_B64(S0, D0, tmp): +def _SOP1Op_S_CTZ_I32_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -67,7 +67,7 @@ def _SOP1Op_S_CTZ_I32_B64(S0, D0, tmp): tmp._val = int(i) D0.i32 = tmp -def _SOP1Op_S_CLZ_I32_U32(S0, D0, tmp): +def _SOP1Op_S_CLZ_I32_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -83,7 +83,7 @@ def _SOP1Op_S_CLZ_I32_U32(S0, D0, tmp): tmp._val = int(i) D0.i32 = tmp -def _SOP1Op_S_CLZ_I32_U64(S0, D0, tmp): +def _SOP1Op_S_CLZ_I32_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -99,7 +99,7 @@ def _SOP1Op_S_CLZ_I32_U64(S0, D0, tmp): tmp._val = int(i) D0.i32 = tmp -def _SOP1Op_S_CLS_I32(S0, D0, tmp): +def _SOP1Op_S_CLS_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -115,7 +115,7 @@ def _SOP1Op_S_CLS_I32(S0, D0, tmp): tmp._val = int(i) D0.i32 = tmp -def _SOP1Op_S_CLS_I32_I64(S0, D0, tmp): +def _SOP1Op_S_CLS_I32_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = -1; # // Set if all bits are the same # for i in 1 : 63 do @@ -131,31 +131,31 @@ def _SOP1Op_S_CLS_I32_I64(S0, D0, tmp): tmp._val = int(i) D0.i32 = tmp -def _SOP1Op_S_SEXT_I32_I8(S0, D0): +def _SOP1Op_S_SEXT_I32_I8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(signext(S0.i8)) D0.i32 = (signext(S0.i8)) -def _SOP1Op_S_SEXT_I32_I16(S0, D0): +def _SOP1Op_S_SEXT_I32_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(signext(S0.i16)) D0.i32 = (signext(S0.i16)) -def _SOP1Op_S_BITSET0_B32(S0, D0): +def _SOP1Op_S_BITSET0_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32[S0.u32[4 : 0]] = 1'0U D0.u32[S0.u32[4 : 0]] = 0 -def _SOP1Op_S_BITSET0_B64(S0, D0): +def _SOP1Op_S_BITSET0_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[S0.u32[5 : 0]] = 1'0U D0.u64[S0.u32[5 : 0]] = 0 -def _SOP1Op_S_BITSET1_B32(S0, D0): +def _SOP1Op_S_BITSET1_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32[S0.u32[4 : 0]] = 1'1U D0.u32[S0.u32[4 : 0]] = 1 -def _SOP1Op_S_BITSET1_B64(S0, D0): +def _SOP1Op_S_BITSET1_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[S0.u32[5 : 0]] = 1'1U D0.u64[S0.u32[5 : 0]] = 1 -def _SOP1Op_S_BITREPLICATE_B64_B32(S0, D0, tmp): +def _SOP1Op_S_BITREPLICATE_B64_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S0.u32; # for i in 0 : 31 do # D0.u64[i * 2] = tmp[i]; @@ -166,13 +166,13 @@ def _SOP1Op_S_BITREPLICATE_B64_B32(S0, D0, tmp): D0.u64[i * 2] = tmp[i] D0.u64[i * 2 + 1] = tmp[i] -def _SOP1Op_S_ABS_I32(S0, D0, SCC): +def _SOP1Op_S_ABS_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; # SCC = D0.i32 != 0 D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) SCC._val = int(D0.i32 != 0) -def _SOP1Op_S_BCNT0_I32_B32(S0, D0, SCC, tmp): +def _SOP1Op_S_BCNT0_I32_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'0U ? 1 : 0 @@ -185,7 +185,7 @@ def _SOP1Op_S_BCNT0_I32_B32(S0, D0, SCC, tmp): D0.i32 = tmp SCC._val = int(D0.u32 != 0) -def _SOP1Op_S_BCNT0_I32_B64(S0, D0, SCC, tmp): +def _SOP1Op_S_BCNT0_I32_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'0U ? 1 : 0 @@ -198,7 +198,7 @@ def _SOP1Op_S_BCNT0_I32_B64(S0, D0, SCC, tmp): D0.i32 = tmp SCC._val = int(D0.u64 != 0) -def _SOP1Op_S_BCNT1_I32_B32(S0, D0, SCC, tmp): +def _SOP1Op_S_BCNT1_I32_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'1U ? 1 : 0 @@ -211,7 +211,7 @@ def _SOP1Op_S_BCNT1_I32_B32(S0, D0, SCC, tmp): D0.i32 = tmp SCC._val = int(D0.u32 != 0) -def _SOP1Op_S_BCNT1_I32_B64(S0, D0, SCC, tmp): +def _SOP1Op_S_BCNT1_I32_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'1U ? 1 : 0 @@ -224,7 +224,7 @@ def _SOP1Op_S_BCNT1_I32_B64(S0, D0, SCC, tmp): D0.i32 = tmp SCC._val = int(D0.u64 != 0) -def _SOP1Op_S_QUADMASK_B32(S0, D0, SCC, tmp): +def _SOP1Op_S_QUADMASK_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 0U; # for i in 0 : 7 do # tmp[i] = S0.u32[i * 4 +: 4] != 0U @@ -237,7 +237,7 @@ def _SOP1Op_S_QUADMASK_B32(S0, D0, SCC, tmp): D0.u32 = tmp SCC._val = int(D0.u32 != 0) -def _SOP1Op_S_QUADMASK_B64(S0, D0, SCC, tmp): +def _SOP1Op_S_QUADMASK_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 0ULL; # for i in 0 : 15 do # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL @@ -250,7 +250,7 @@ def _SOP1Op_S_QUADMASK_B64(S0, D0, SCC, tmp): D0.u64 = tmp SCC._val = int(D0.u64 != 0) -def _SOP1Op_S_WQM_B32(S0, D0, SCC, tmp): +def _SOP1Op_S_WQM_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 0U; # declare i : 6'U; # for i in 6'0U : 6'31U do @@ -264,7 +264,7 @@ def _SOP1Op_S_WQM_B32(S0, D0, SCC, tmp): D0.u32 = tmp SCC._val = int(D0.u32 != 0) -def _SOP1Op_S_WQM_B64(S0, D0, SCC, tmp): +def _SOP1Op_S_WQM_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 0ULL; # declare i : 6'U; # for i in 6'0U : 6'63U do @@ -278,259 +278,259 @@ def _SOP1Op_S_WQM_B64(S0, D0, SCC, tmp): D0.u64 = tmp SCC._val = int(D0.u64 != 0) -def _SOP1Op_S_NOT_B32(S0, D0, SCC): +def _SOP1Op_S_NOT_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ~S0.u32; # SCC = D0.u32 != 0U D0.u32 = ~S0.u32 SCC._val = int(D0.u32 != 0) -def _SOP1Op_S_NOT_B64(S0, D0, SCC): +def _SOP1Op_S_NOT_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = ~S0.u64; # SCC = D0.u64 != 0ULL D0.u64 = ~S0.u64 SCC._val = int(D0.u64 != 0) -def _SOP1Op_S_AND_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_AND_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; # EXEC.u32 = (S0.u32 & EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = (S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_AND_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_AND_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; # EXEC.u64 = (S0.u64 & EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = (S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_OR_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_OR_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u32; # EXEC.u32 = (S0.u32 | EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = (S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_OR_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_OR_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u64; # EXEC.u64 = (S0.u64 | EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = (S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_XOR_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_XOR_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; # EXEC.u32 = (S0.u32 ^ EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = (S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; # EXEC.u64 = (S0.u64 ^ EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = (S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_NAND_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_NAND_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; # EXEC.u32 = ~(S0.u32 & EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = ~(S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; # EXEC.u64 = ~(S0.u64 & EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = ~(S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_NOR_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_NOR_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; # EXEC.u32 = ~(S0.u32 | EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = ~(S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; # EXEC.u64 = ~(S0.u64 | EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = ~(S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_XNOR_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_XNOR_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; # EXEC.u32 = ~(S0.u32 ^ EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = ~(S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = ~(S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; # EXEC.u32 = (~S0.u32 & EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; # EXEC.u64 = (~S0.u64 & EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; # EXEC.u32 = (~S0.u32 | EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = (~S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; # EXEC.u64 = (~S0.u64 | EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = (~S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; # EXEC.u32 = (S0.u32 & ~EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; # EXEC.u64 = (S0.u64 & ~EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; # EXEC.u32 = (S0.u32 | ~EXEC.u32); # D0.u32 = saveexec.u32; # SCC = EXEC.u32 != 0U - saveexec._val = int(EXEC.u32) + saveexec = Reg(int(EXEC.u32)) EXEC.u32 = (S0.u32 | ~EXEC.u32) D0.u32 = saveexec.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(S0, D0, SCC, EXEC, saveexec): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; # EXEC.u64 = (S0.u64 | ~EXEC.u64); # D0.u64 = saveexec.u64; # SCC = EXEC.u64 != 0ULL - saveexec._val = int(EXEC.u64) + saveexec = Reg(int(EXEC.u64)) EXEC.u64 = (S0.u64 | ~EXEC.u64) D0.u64 = saveexec.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_AND_NOT0_WREXEC_B32(S0, D0, SCC, EXEC): +def _SOP1Op_S_AND_NOT0_WREXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -541,7 +541,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B32(S0, D0, SCC, EXEC): D0.u32 = EXEC.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_AND_NOT0_WREXEC_B64(S0, D0, SCC, EXEC): +def _SOP1Op_S_AND_NOT0_WREXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -552,7 +552,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B64(S0, D0, SCC, EXEC): D0.u64 = EXEC.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_AND_NOT1_WREXEC_B32(S0, D0, SCC, EXEC): +def _SOP1Op_S_AND_NOT1_WREXEC_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -563,7 +563,7 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B32(S0, D0, SCC, EXEC): D0.u32 = EXEC.u32 SCC._val = int(EXEC.u32 != 0) -def _SOP1Op_S_AND_NOT1_WREXEC_B64(S0, D0, SCC, EXEC): +def _SOP1Op_S_AND_NOT1_WREXEC_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -574,15 +574,15 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B64(S0, D0, SCC, EXEC): D0.u64 = EXEC.u64 SCC._val = int(EXEC.u64 != 0) -def _SOP1Op_S_SENDMSG_RTN_B32(VCC): +def _SOP1Op_S_SENDMSG_RTN_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # If SDST is VCC then VCCZ is undefined. pass -def _SOP1Op_S_SENDMSG_RTN_B64(VCC): +def _SOP1Op_S_SENDMSG_RTN_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # If SDST is VCC then VCCZ is undefined. pass -def _SOP1Op_S_CEIL_F32(S0, D0): +def _SOP1Op_S_CEIL_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -591,7 +591,7 @@ def _SOP1Op_S_CEIL_F32(S0, D0): if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 -def _SOP1Op_S_FLOOR_F32(S0, D0): +def _SOP1Op_S_FLOOR_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -600,11 +600,11 @@ def _SOP1Op_S_FLOOR_F32(S0, D0): if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 -def _SOP1Op_S_TRUNC_F32(S0, D0): +def _SOP1Op_S_TRUNC_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32) D0.f32 = trunc(S0.f32) -def _SOP1Op_S_RNDNE_F32(S0, D0): +def _SOP1Op_S_RNDNE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -613,35 +613,35 @@ def _SOP1Op_S_RNDNE_F32(S0, D0): if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 -def _SOP1Op_S_CVT_F32_I32(S0, D0): +def _SOP1Op_S_CVT_F32_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = i32_to_f32(S0.i32) D0.f32 = i32_to_f32(S0.i32) -def _SOP1Op_S_CVT_F32_U32(S0, D0): +def _SOP1Op_S_CVT_F32_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0.u32) D0.f32 = u32_to_f32(S0.u32) -def _SOP1Op_S_CVT_I32_F32(S0, D0): +def _SOP1Op_S_CVT_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f32_to_i32(S0.f32) D0.i32 = f32_to_i32(S0.f32) -def _SOP1Op_S_CVT_U32_F32(S0, D0): +def _SOP1Op_S_CVT_U32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = f32_to_u32(S0.f32) D0.u32 = f32_to_u32(S0.f32) -def _SOP1Op_S_CVT_F16_F32(S0, D0): +def _SOP1Op_S_CVT_F16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = f32_to_f16(S0.f32) D0.f16 = f32_to_f16(S0.f32) -def _SOP1Op_S_CVT_F32_F16(S0, D0): +def _SOP1Op_S_CVT_F32_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = f16_to_f32(S0.f16) D0.f32 = f16_to_f32(S0.f16) -def _SOP1Op_S_CVT_HI_F32_F16(S0, D0): +def _SOP1Op_S_CVT_HI_F32_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = f16_to_f32(S0[31 : 16].f16) D0.f32 = f16_to_f32(S0[31 : 16].f16) -def _SOP1Op_S_CEIL_F16(S0, D0): +def _SOP1Op_S_CEIL_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -650,7 +650,7 @@ def _SOP1Op_S_CEIL_F16(S0, D0): if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 -def _SOP1Op_S_FLOOR_F16(S0, D0): +def _SOP1Op_S_FLOOR_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -659,11 +659,11 @@ def _SOP1Op_S_FLOOR_F16(S0, D0): if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 -def _SOP1Op_S_TRUNC_F16(S0, D0): +def _SOP1Op_S_TRUNC_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16) D0.f16 = trunc(S0.f16) -def _SOP1Op_S_RNDNE_F16(S0, D0): +def _SOP1Op_S_RNDNE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -673,80 +673,80 @@ def _SOP1Op_S_RNDNE_F16(S0, D0): D0.f16 -= 1.0 SOP1Op_FUNCTIONS = { - SOP1Op.S_MOV_B32: (_SOP1Op_S_MOV_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_MOV_B64: (_SOP1Op_S_MOV_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CMOV_B32: (_SOP1Op_S_CMOV_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC'))), - SOP1Op.S_CMOV_B64: (_SOP1Op_S_CMOV_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC'))), - SOP1Op.S_BREV_B32: (_SOP1Op_S_BREV_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_BREV_B64: (_SOP1Op_S_BREV_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CTZ_I32_B32: (_SOP1Op_S_CTZ_I32_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - SOP1Op.S_CTZ_I32_B64: (_SOP1Op_S_CTZ_I32_B64, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - SOP1Op.S_CLZ_I32_U32: (_SOP1Op_S_CLZ_I32_U32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - SOP1Op.S_CLZ_I32_U64: (_SOP1Op_S_CLZ_I32_U64, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - SOP1Op.S_CLS_I32: (_SOP1Op_S_CLS_I32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - SOP1Op.S_CLS_I32_I64: (_SOP1Op_S_CLS_I32_I64, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - SOP1Op.S_SEXT_I32_I8: (_SOP1Op_S_SEXT_I32_I8, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_SEXT_I32_I16: (_SOP1Op_S_SEXT_I32_I16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_BITSET0_B32: (_SOP1Op_S_BITSET0_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_BITSET0_B64: (_SOP1Op_S_BITSET0_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_BITSET1_B32: (_SOP1Op_S_BITSET1_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_BITSET1_B64: (_SOP1Op_S_BITSET1_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_BITREPLICATE_B64_B32: (_SOP1Op_S_BITREPLICATE_B64_B32, (True, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - SOP1Op.S_ABS_I32: (_SOP1Op_S_ABS_I32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC'))), - SOP1Op.S_BCNT0_I32_B32: (_SOP1Op_S_BCNT0_I32_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC', 'tmp'))), - SOP1Op.S_BCNT0_I32_B64: (_SOP1Op_S_BCNT0_I32_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC', 'tmp'))), - SOP1Op.S_BCNT1_I32_B32: (_SOP1Op_S_BCNT1_I32_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC', 'tmp'))), - SOP1Op.S_BCNT1_I32_B64: (_SOP1Op_S_BCNT1_I32_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC', 'tmp'))), - SOP1Op.S_QUADMASK_B32: (_SOP1Op_S_QUADMASK_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC', 'tmp'))), - SOP1Op.S_QUADMASK_B64: (_SOP1Op_S_QUADMASK_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC', 'tmp'))), - SOP1Op.S_WQM_B32: (_SOP1Op_S_WQM_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC', 'tmp'))), - SOP1Op.S_WQM_B64: (_SOP1Op_S_WQM_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC', 'tmp'))), - SOP1Op.S_NOT_B32: (_SOP1Op_S_NOT_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC'))), - SOP1Op.S_NOT_B64: (_SOP1Op_S_NOT_B64, (True, False, False, False, False, False, False, False, ('S0', 'D0', 'SCC'))), - SOP1Op.S_AND_SAVEEXEC_B32: (_SOP1Op_S_AND_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_AND_SAVEEXEC_B64: (_SOP1Op_S_AND_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_OR_SAVEEXEC_B32: (_SOP1Op_S_OR_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_OR_SAVEEXEC_B64: (_SOP1Op_S_OR_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_XOR_SAVEEXEC_B32: (_SOP1Op_S_XOR_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_XOR_SAVEEXEC_B64: (_SOP1Op_S_XOR_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_NAND_SAVEEXEC_B32: (_SOP1Op_S_NAND_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_NAND_SAVEEXEC_B64: (_SOP1Op_S_NAND_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_NOR_SAVEEXEC_B32: (_SOP1Op_S_NOR_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_NOR_SAVEEXEC_B64: (_SOP1Op_S_NOR_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_XNOR_SAVEEXEC_B32: (_SOP1Op_S_XNOR_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_XNOR_SAVEEXEC_B64: (_SOP1Op_S_XNOR_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_AND_NOT0_SAVEEXEC_B32: (_SOP1Op_S_AND_NOT0_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_AND_NOT0_SAVEEXEC_B64: (_SOP1Op_S_AND_NOT0_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_OR_NOT0_SAVEEXEC_B32: (_SOP1Op_S_OR_NOT0_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_OR_NOT0_SAVEEXEC_B64: (_SOP1Op_S_OR_NOT0_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_AND_NOT1_SAVEEXEC_B32: (_SOP1Op_S_AND_NOT1_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_AND_NOT1_SAVEEXEC_B64: (_SOP1Op_S_AND_NOT1_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_OR_NOT1_SAVEEXEC_B32: (_SOP1Op_S_OR_NOT1_SAVEEXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_OR_NOT1_SAVEEXEC_B64: (_SOP1Op_S_OR_NOT1_SAVEEXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC', 'saveexec'))), - SOP1Op.S_AND_NOT0_WREXEC_B32: (_SOP1Op_S_AND_NOT0_WREXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC'))), - SOP1Op.S_AND_NOT0_WREXEC_B64: (_SOP1Op_S_AND_NOT0_WREXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC'))), - SOP1Op.S_AND_NOT1_WREXEC_B32: (_SOP1Op_S_AND_NOT1_WREXEC_B32, (False, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC'))), - SOP1Op.S_AND_NOT1_WREXEC_B64: (_SOP1Op_S_AND_NOT1_WREXEC_B64, (True, False, False, False, False, False, False, True, ('S0', 'D0', 'SCC', 'EXEC'))), - SOP1Op.S_SENDMSG_RTN_B32: (_SOP1Op_S_SENDMSG_RTN_B32, (False, False, False, False, False, False, True, False, ('VCC',))), - SOP1Op.S_SENDMSG_RTN_B64: (_SOP1Op_S_SENDMSG_RTN_B64, (False, False, False, False, False, False, True, False, ('VCC',))), - SOP1Op.S_CEIL_F32: (_SOP1Op_S_CEIL_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_FLOOR_F32: (_SOP1Op_S_FLOOR_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_TRUNC_F32: (_SOP1Op_S_TRUNC_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_RNDNE_F32: (_SOP1Op_S_RNDNE_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CVT_F32_I32: (_SOP1Op_S_CVT_F32_I32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CVT_F32_U32: (_SOP1Op_S_CVT_F32_U32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CVT_I32_F32: (_SOP1Op_S_CVT_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CVT_U32_F32: (_SOP1Op_S_CVT_U32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CVT_F16_F32: (_SOP1Op_S_CVT_F16_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CVT_F32_F16: (_SOP1Op_S_CVT_F32_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CVT_HI_F32_F16: (_SOP1Op_S_CVT_HI_F32_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_CEIL_F16: (_SOP1Op_S_CEIL_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_FLOOR_F16: (_SOP1Op_S_FLOOR_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_TRUNC_F16: (_SOP1Op_S_TRUNC_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - SOP1Op.S_RNDNE_F16: (_SOP1Op_S_RNDNE_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), + SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, + SOP1Op.S_MOV_B64: _SOP1Op_S_MOV_B64, + SOP1Op.S_CMOV_B32: _SOP1Op_S_CMOV_B32, + SOP1Op.S_CMOV_B64: _SOP1Op_S_CMOV_B64, + SOP1Op.S_BREV_B32: _SOP1Op_S_BREV_B32, + SOP1Op.S_BREV_B64: _SOP1Op_S_BREV_B64, + SOP1Op.S_CTZ_I32_B32: _SOP1Op_S_CTZ_I32_B32, + SOP1Op.S_CTZ_I32_B64: _SOP1Op_S_CTZ_I32_B64, + SOP1Op.S_CLZ_I32_U32: _SOP1Op_S_CLZ_I32_U32, + SOP1Op.S_CLZ_I32_U64: _SOP1Op_S_CLZ_I32_U64, + SOP1Op.S_CLS_I32: _SOP1Op_S_CLS_I32, + SOP1Op.S_CLS_I32_I64: _SOP1Op_S_CLS_I32_I64, + SOP1Op.S_SEXT_I32_I8: _SOP1Op_S_SEXT_I32_I8, + SOP1Op.S_SEXT_I32_I16: _SOP1Op_S_SEXT_I32_I16, + SOP1Op.S_BITSET0_B32: _SOP1Op_S_BITSET0_B32, + SOP1Op.S_BITSET0_B64: _SOP1Op_S_BITSET0_B64, + SOP1Op.S_BITSET1_B32: _SOP1Op_S_BITSET1_B32, + SOP1Op.S_BITSET1_B64: _SOP1Op_S_BITSET1_B64, + SOP1Op.S_BITREPLICATE_B64_B32: _SOP1Op_S_BITREPLICATE_B64_B32, + SOP1Op.S_ABS_I32: _SOP1Op_S_ABS_I32, + SOP1Op.S_BCNT0_I32_B32: _SOP1Op_S_BCNT0_I32_B32, + SOP1Op.S_BCNT0_I32_B64: _SOP1Op_S_BCNT0_I32_B64, + SOP1Op.S_BCNT1_I32_B32: _SOP1Op_S_BCNT1_I32_B32, + SOP1Op.S_BCNT1_I32_B64: _SOP1Op_S_BCNT1_I32_B64, + SOP1Op.S_QUADMASK_B32: _SOP1Op_S_QUADMASK_B32, + SOP1Op.S_QUADMASK_B64: _SOP1Op_S_QUADMASK_B64, + SOP1Op.S_WQM_B32: _SOP1Op_S_WQM_B32, + SOP1Op.S_WQM_B64: _SOP1Op_S_WQM_B64, + SOP1Op.S_NOT_B32: _SOP1Op_S_NOT_B32, + SOP1Op.S_NOT_B64: _SOP1Op_S_NOT_B64, + SOP1Op.S_AND_SAVEEXEC_B32: _SOP1Op_S_AND_SAVEEXEC_B32, + SOP1Op.S_AND_SAVEEXEC_B64: _SOP1Op_S_AND_SAVEEXEC_B64, + SOP1Op.S_OR_SAVEEXEC_B32: _SOP1Op_S_OR_SAVEEXEC_B32, + SOP1Op.S_OR_SAVEEXEC_B64: _SOP1Op_S_OR_SAVEEXEC_B64, + SOP1Op.S_XOR_SAVEEXEC_B32: _SOP1Op_S_XOR_SAVEEXEC_B32, + SOP1Op.S_XOR_SAVEEXEC_B64: _SOP1Op_S_XOR_SAVEEXEC_B64, + SOP1Op.S_NAND_SAVEEXEC_B32: _SOP1Op_S_NAND_SAVEEXEC_B32, + SOP1Op.S_NAND_SAVEEXEC_B64: _SOP1Op_S_NAND_SAVEEXEC_B64, + SOP1Op.S_NOR_SAVEEXEC_B32: _SOP1Op_S_NOR_SAVEEXEC_B32, + SOP1Op.S_NOR_SAVEEXEC_B64: _SOP1Op_S_NOR_SAVEEXEC_B64, + SOP1Op.S_XNOR_SAVEEXEC_B32: _SOP1Op_S_XNOR_SAVEEXEC_B32, + SOP1Op.S_XNOR_SAVEEXEC_B64: _SOP1Op_S_XNOR_SAVEEXEC_B64, + SOP1Op.S_AND_NOT0_SAVEEXEC_B32: _SOP1Op_S_AND_NOT0_SAVEEXEC_B32, + SOP1Op.S_AND_NOT0_SAVEEXEC_B64: _SOP1Op_S_AND_NOT0_SAVEEXEC_B64, + SOP1Op.S_OR_NOT0_SAVEEXEC_B32: _SOP1Op_S_OR_NOT0_SAVEEXEC_B32, + SOP1Op.S_OR_NOT0_SAVEEXEC_B64: _SOP1Op_S_OR_NOT0_SAVEEXEC_B64, + SOP1Op.S_AND_NOT1_SAVEEXEC_B32: _SOP1Op_S_AND_NOT1_SAVEEXEC_B32, + SOP1Op.S_AND_NOT1_SAVEEXEC_B64: _SOP1Op_S_AND_NOT1_SAVEEXEC_B64, + SOP1Op.S_OR_NOT1_SAVEEXEC_B32: _SOP1Op_S_OR_NOT1_SAVEEXEC_B32, + SOP1Op.S_OR_NOT1_SAVEEXEC_B64: _SOP1Op_S_OR_NOT1_SAVEEXEC_B64, + SOP1Op.S_AND_NOT0_WREXEC_B32: _SOP1Op_S_AND_NOT0_WREXEC_B32, + SOP1Op.S_AND_NOT0_WREXEC_B64: _SOP1Op_S_AND_NOT0_WREXEC_B64, + SOP1Op.S_AND_NOT1_WREXEC_B32: _SOP1Op_S_AND_NOT1_WREXEC_B32, + SOP1Op.S_AND_NOT1_WREXEC_B64: _SOP1Op_S_AND_NOT1_WREXEC_B64, + SOP1Op.S_SENDMSG_RTN_B32: _SOP1Op_S_SENDMSG_RTN_B32, + SOP1Op.S_SENDMSG_RTN_B64: _SOP1Op_S_SENDMSG_RTN_B64, + SOP1Op.S_CEIL_F32: _SOP1Op_S_CEIL_F32, + SOP1Op.S_FLOOR_F32: _SOP1Op_S_FLOOR_F32, + SOP1Op.S_TRUNC_F32: _SOP1Op_S_TRUNC_F32, + SOP1Op.S_RNDNE_F32: _SOP1Op_S_RNDNE_F32, + SOP1Op.S_CVT_F32_I32: _SOP1Op_S_CVT_F32_I32, + SOP1Op.S_CVT_F32_U32: _SOP1Op_S_CVT_F32_U32, + SOP1Op.S_CVT_I32_F32: _SOP1Op_S_CVT_I32_F32, + SOP1Op.S_CVT_U32_F32: _SOP1Op_S_CVT_U32_F32, + SOP1Op.S_CVT_F16_F32: _SOP1Op_S_CVT_F16_F32, + SOP1Op.S_CVT_F32_F16: _SOP1Op_S_CVT_F32_F16, + SOP1Op.S_CVT_HI_F32_F16: _SOP1Op_S_CVT_HI_F32_F16, + SOP1Op.S_CEIL_F16: _SOP1Op_S_CEIL_F16, + SOP1Op.S_FLOOR_F16: _SOP1Op_S_FLOOR_F16, + SOP1Op.S_TRUNC_F16: _SOP1Op_S_TRUNC_F16, + SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_U32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_ADD_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -754,7 +754,7 @@ def _SOP2Op_S_ADD_U32(S0, S1, D0, SCC, tmp): SCC._val = int(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 -def _SOP2Op_S_SUB_U32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_SUB_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S0.u32 - S1.u32; # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -762,7 +762,7 @@ def _SOP2Op_S_SUB_U32(S0, S1, D0, SCC, tmp): SCC._val = int(((1) if (S1.u32 > S0.u32) else (0))) D0.u32 = tmp.u32 -def _SOP2Op_S_ADD_I32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_ADD_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S0.i32 + S1.i32; # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -770,7 +770,7 @@ def _SOP2Op_S_ADD_I32(S0, S1, D0, SCC, tmp): SCC._val = int(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 -def _SOP2Op_S_SUB_I32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_SUB_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S0.i32 - S1.i32; # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -778,7 +778,7 @@ def _SOP2Op_S_SUB_I32(S0, S1, D0, SCC, tmp): SCC._val = int(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 -def _SOP2Op_S_ADDC_U32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_ADDC_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -786,7 +786,7 @@ def _SOP2Op_S_ADDC_U32(S0, S1, D0, SCC, tmp): SCC._val = int(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 -def _SOP2Op_S_SUBB_U32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_SUBB_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S0.u32 - S1.u32 - SCC.u32; # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -794,7 +794,7 @@ def _SOP2Op_S_SUBB_U32(S0, S1, D0, SCC, tmp): SCC._val = int(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) D0.u32 = tmp.u32 -def _SOP2Op_S_ABSDIFF_I32(S0, S1, D0, SCC): +def _SOP2Op_S_ABSDIFF_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 - S1.i32; # if D0.i32 < 0 then # D0.i32 = -D0.i32 @@ -805,43 +805,43 @@ def _SOP2Op_S_ABSDIFF_I32(S0, S1, D0, SCC): D0.i32 = -D0.i32 SCC._val = int(D0.i32 != 0) -def _SOP2Op_S_LSHL_B32(S0, S1, D0, SCC): +def _SOP2Op_S_LSHL_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 << S1[4 : 0].u32); # SCC = D0.u32 != 0U D0.u32 = (S0.u32 << S1[4 : 0].u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_LSHL_B64(S0, S1, D0, SCC): +def _SOP2Op_S_LSHL_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S0.u64 << S1[5 : 0].u32); # SCC = D0.u64 != 0ULL D0.u64 = (S0.u64 << S1[5 : 0].u32) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_LSHR_B32(S0, S1, D0, SCC): +def _SOP2Op_S_LSHR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 >> S1[4 : 0].u32); # SCC = D0.u32 != 0U D0.u32 = (S0.u32 >> S1[4 : 0].u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_LSHR_B64(S0, S1, D0, SCC): +def _SOP2Op_S_LSHR_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S0.u64 >> S1[5 : 0].u32); # SCC = D0.u64 != 0ULL D0.u64 = (S0.u64 >> S1[5 : 0].u32) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_ASHR_I32(S0, S1, D0, SCC): +def _SOP2Op_S_ASHR_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); # SCC = D0.i32 != 0 D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) SCC._val = int(D0.i32 != 0) -def _SOP2Op_S_ASHR_I64(S0, S1, D0, SCC): +def _SOP2Op_S_ASHR_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); # SCC = D0.i64 != 0LL D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) SCC._val = int(D0.i64 != 0) -def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -849,7 +849,7 @@ def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, D0, SCC, tmp): SCC._val = int(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 -def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -857,7 +857,7 @@ def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, D0, SCC, tmp): SCC._val = int(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 -def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -865,7 +865,7 @@ def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, D0, SCC, tmp): SCC._val = int(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 -def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -873,133 +873,133 @@ def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, D0, SCC, tmp): SCC._val = int(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 -def _SOP2Op_S_MIN_I32(S0, S1, D0, SCC): +def _SOP2Op_S_MIN_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.i32 < S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 SCC._val = int(S0.i32 < S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) -def _SOP2Op_S_MIN_U32(S0, S1, D0, SCC): +def _SOP2Op_S_MIN_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 < S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 SCC._val = int(S0.u32 < S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) -def _SOP2Op_S_MAX_I32(S0, S1, D0, SCC): +def _SOP2Op_S_MAX_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.i32 >= S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 SCC._val = int(S0.i32 >= S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) -def _SOP2Op_S_MAX_U32(S0, S1, D0, SCC): +def _SOP2Op_S_MAX_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 >= S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 SCC._val = int(S0.u32 >= S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) -def _SOP2Op_S_AND_B32(S0, S1, D0, SCC): +def _SOP2Op_S_AND_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 & S1.u32); # SCC = D0.u32 != 0U D0.u32 = (S0.u32 & S1.u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_AND_B64(S0, S1, D0, SCC): +def _SOP2Op_S_AND_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL D0.u64 = (S0.u64 & S1.u64) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_OR_B32(S0, S1, D0, SCC): +def _SOP2Op_S_OR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 | S1.u32); # SCC = D0.u32 != 0U D0.u32 = (S0.u32 | S1.u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_OR_B64(S0, S1, D0, SCC): +def _SOP2Op_S_OR_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL D0.u64 = (S0.u64 | S1.u64) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_XOR_B32(S0, S1, D0, SCC): +def _SOP2Op_S_XOR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U D0.u32 = (S0.u32 ^ S1.u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_XOR_B64(S0, S1, D0, SCC): +def _SOP2Op_S_XOR_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL D0.u64 = (S0.u64 ^ S1.u64) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_NAND_B32(S0, S1, D0, SCC): +def _SOP2Op_S_NAND_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ~(S0.u32 & S1.u32); # SCC = D0.u32 != 0U D0.u32 = ~(S0.u32 & S1.u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_NAND_B64(S0, S1, D0, SCC): +def _SOP2Op_S_NAND_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = ~(S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL D0.u64 = ~(S0.u64 & S1.u64) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_NOR_B32(S0, S1, D0, SCC): +def _SOP2Op_S_NOR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ~(S0.u32 | S1.u32); # SCC = D0.u32 != 0U D0.u32 = ~(S0.u32 | S1.u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_NOR_B64(S0, S1, D0, SCC): +def _SOP2Op_S_NOR_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = ~(S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL D0.u64 = ~(S0.u64 | S1.u64) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_XNOR_B32(S0, S1, D0, SCC): +def _SOP2Op_S_XNOR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ~(S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U D0.u32 = ~(S0.u32 ^ S1.u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_XNOR_B64(S0, S1, D0, SCC): +def _SOP2Op_S_XNOR_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = ~(S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL D0.u64 = ~(S0.u64 ^ S1.u64) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_AND_NOT1_B32(S0, S1, D0, SCC): +def _SOP2Op_S_AND_NOT1_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 & ~S1.u32); # SCC = D0.u32 != 0U D0.u32 = (S0.u32 & ~S1.u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_AND_NOT1_B64(S0, S1, D0, SCC): +def _SOP2Op_S_AND_NOT1_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S0.u64 & ~S1.u64); # SCC = D0.u64 != 0ULL D0.u64 = (S0.u64 & ~S1.u64) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_OR_NOT1_B32(S0, S1, D0, SCC): +def _SOP2Op_S_OR_NOT1_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 | ~S1.u32); # SCC = D0.u32 != 0U D0.u32 = (S0.u32 | ~S1.u32) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_OR_NOT1_B64(S0, S1, D0, SCC): +def _SOP2Op_S_OR_NOT1_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S0.u64 | ~S1.u64); # SCC = D0.u64 != 0ULL D0.u64 = (S0.u64 | ~S1.u64) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_BFE_U32(S0, S1, D0, SCC): +def _SOP2Op_S_BFE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); # SCC = D0.u32 != 0U D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC._val = int(D0.u32 != 0) -def _SOP2Op_S_BFE_I32(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_BFE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); # SCC = D0.i32 != 0 @@ -1007,13 +1007,13 @@ def _SOP2Op_S_BFE_I32(S0, S1, D0, SCC, tmp): D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) SCC._val = int(D0.i32 != 0) -def _SOP2Op_S_BFE_U64(S0, S1, D0, SCC): +def _SOP2Op_S_BFE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); # SCC = D0.u64 != 0ULL D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC._val = int(D0.u64 != 0) -def _SOP2Op_S_BFE_I64(S0, S1, D0, SCC, tmp): +def _SOP2Op_S_BFE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); # SCC = D0.i64 != 0LL @@ -1021,59 +1021,59 @@ def _SOP2Op_S_BFE_I64(S0, S1, D0, SCC, tmp): D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) SCC._val = int(D0.i64 != 0) -def _SOP2Op_S_BFM_B32(S0, S1, D0): +def _SOP2Op_S_BFM_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) -def _SOP2Op_S_BFM_B64(S0, S1, D0): +def _SOP2Op_S_BFM_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) -def _SOP2Op_S_MUL_I32(S0, S1, D0): +def _SOP2Op_S_MUL_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 * S1.i32 D0.i32 = S0.i32 * S1.i32 -def _SOP2Op_S_MUL_HI_U32(S0, S1, D0): +def _SOP2Op_S_MUL_HI_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) D0.u32 = (((S0.u32) * (S1.u32)) >> 32) -def _SOP2Op_S_MUL_HI_I32(S0, S1, D0): +def _SOP2Op_S_MUL_HI_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) D0.i32 = (((S0.i32) * (S1.i32)) >> 32) -def _SOP2Op_S_CSELECT_B32(S0, S1, D0, SCC): +def _SOP2Op_S_CSELECT_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = SCC ? S0.u32 : S1.u32 D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) -def _SOP2Op_S_CSELECT_B64(S0, S1, D0, SCC): +def _SOP2Op_S_CSELECT_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = SCC ? S0.u64 : S1.u64 D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) -def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, D0): +def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } D0._val = int(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) -def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, D0): +def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } D0._val = int(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) -def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, D0): +def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } D0._val = int(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) -def _SOP2Op_S_PACK_HL_B32_B16(S0, S1, D0): +def _SOP2Op_S_PACK_HL_B32_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } D0._val = int(_pack(S1[15 : 0].u16, S0[31 : 16].u16)) -def _SOP2Op_S_ADD_F32(S0, S1, D0): +def _SOP2Op_S_ADD_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 + S1.f32 D0.f32 = S0.f32 + S1.f32 -def _SOP2Op_S_SUB_F32(S0, S1, D0): +def _SOP2Op_S_SUB_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 - S1.f32 D0.f32 = S0.f32 - S1.f32 -def _SOP2Op_S_MIN_F32(S0, S1, D0): +def _SOP2Op_S_MIN_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -1126,7 +1126,7 @@ def _SOP2Op_S_MIN_F32(S0, S1, D0): else: D0.f32 = S1.f32 -def _SOP2Op_S_MAX_F32(S0, S1, D0): +def _SOP2Op_S_MAX_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -1179,23 +1179,23 @@ def _SOP2Op_S_MAX_F32(S0, S1, D0): else: D0.f32 = S1.f32 -def _SOP2Op_S_MUL_F32(S0, S1, D0): +def _SOP2Op_S_MUL_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 * S1.f32 D0.f32 = S0.f32 * S1.f32 -def _SOP2Op_S_FMAAK_F32(S0, S1, D0, SIMM32): +def _SOP2Op_S_FMAAK_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) + D0.f32 = fma(S0.f32, S1.f32, S2.f32) -def _SOP2Op_S_FMAMK_F32(S0, S1, D0, SIMM32): +def _SOP2Op_S_FMAMK_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) + D0.f32 = fma(S0.f32, S2.f32, S1.f32) -def _SOP2Op_S_FMAC_F32(S0, S1, D0): +def _SOP2Op_S_FMAC_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) D0.f32 = fma(S0.f32, S1.f32, D0.f32) -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(S0, S1, tmp): +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -1203,15 +1203,15 @@ def _SOP2Op_S_CVT_PK_RTZ_F16_F32(S0, S1, tmp): tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) -def _SOP2Op_S_ADD_F16(S0, S1, D0): +def _SOP2Op_S_ADD_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 + S1.f16 D0.f16 = S0.f16 + S1.f16 -def _SOP2Op_S_SUB_F16(S0, S1, D0): +def _SOP2Op_S_SUB_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 - S1.f16 D0.f16 = S0.f16 - S1.f16 -def _SOP2Op_S_MIN_F16(S0, S1, D0): +def _SOP2Op_S_MIN_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -1264,7 +1264,7 @@ def _SOP2Op_S_MIN_F16(S0, S1, D0): else: D0.f16 = S1.f16 -def _SOP2Op_S_MAX_F16(S0, S1, D0): +def _SOP2Op_S_MAX_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -1317,452 +1317,452 @@ def _SOP2Op_S_MAX_F16(S0, S1, D0): else: D0.f16 = S1.f16 -def _SOP2Op_S_MUL_F16(S0, S1, D0): +def _SOP2Op_S_MUL_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 * S1.f16 D0.f16 = S0.f16 * S1.f16 -def _SOP2Op_S_FMAC_F16(S0, S1, D0): +def _SOP2Op_S_FMAC_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) D0.f16 = fma(S0.f16, S1.f16, D0.f16) SOP2Op_FUNCTIONS = { - SOP2Op.S_ADD_U32: (_SOP2Op_S_ADD_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_SUB_U32: (_SOP2Op_S_SUB_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_ADD_I32: (_SOP2Op_S_ADD_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_SUB_I32: (_SOP2Op_S_SUB_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_ADDC_U32: (_SOP2Op_S_ADDC_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_SUBB_U32: (_SOP2Op_S_SUBB_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_ABSDIFF_I32: (_SOP2Op_S_ABSDIFF_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_LSHL_B32: (_SOP2Op_S_LSHL_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_LSHL_B64: (_SOP2Op_S_LSHL_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_LSHR_B32: (_SOP2Op_S_LSHR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_LSHR_B64: (_SOP2Op_S_LSHR_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_ASHR_I32: (_SOP2Op_S_ASHR_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_ASHR_I64: (_SOP2Op_S_ASHR_I64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_LSHL1_ADD_U32: (_SOP2Op_S_LSHL1_ADD_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_LSHL2_ADD_U32: (_SOP2Op_S_LSHL2_ADD_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_LSHL3_ADD_U32: (_SOP2Op_S_LSHL3_ADD_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_LSHL4_ADD_U32: (_SOP2Op_S_LSHL4_ADD_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_MIN_I32: (_SOP2Op_S_MIN_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_MIN_U32: (_SOP2Op_S_MIN_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_MAX_I32: (_SOP2Op_S_MAX_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_MAX_U32: (_SOP2Op_S_MAX_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_AND_B32: (_SOP2Op_S_AND_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_AND_B64: (_SOP2Op_S_AND_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_OR_B32: (_SOP2Op_S_OR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_OR_B64: (_SOP2Op_S_OR_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_XOR_B32: (_SOP2Op_S_XOR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_XOR_B64: (_SOP2Op_S_XOR_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_NAND_B32: (_SOP2Op_S_NAND_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_NAND_B64: (_SOP2Op_S_NAND_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_NOR_B32: (_SOP2Op_S_NOR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_NOR_B64: (_SOP2Op_S_NOR_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_XNOR_B32: (_SOP2Op_S_XNOR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_XNOR_B64: (_SOP2Op_S_XNOR_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_AND_NOT1_B32: (_SOP2Op_S_AND_NOT1_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_AND_NOT1_B64: (_SOP2Op_S_AND_NOT1_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_OR_NOT1_B32: (_SOP2Op_S_OR_NOT1_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_OR_NOT1_B64: (_SOP2Op_S_OR_NOT1_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_BFE_U32: (_SOP2Op_S_BFE_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_BFE_I32: (_SOP2Op_S_BFE_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_BFE_U64: (_SOP2Op_S_BFE_U64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_BFE_I64: (_SOP2Op_S_BFE_I64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC', 'tmp'))), - SOP2Op.S_BFM_B32: (_SOP2Op_S_BFM_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_BFM_B64: (_SOP2Op_S_BFM_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MUL_I32: (_SOP2Op_S_MUL_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MUL_HI_U32: (_SOP2Op_S_MUL_HI_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MUL_HI_I32: (_SOP2Op_S_MUL_HI_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_CSELECT_B32: (_SOP2Op_S_CSELECT_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_CSELECT_B64: (_SOP2Op_S_CSELECT_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SCC'))), - SOP2Op.S_PACK_LL_B32_B16: (_SOP2Op_S_PACK_LL_B32_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_PACK_LH_B32_B16: (_SOP2Op_S_PACK_LH_B32_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_PACK_HH_B32_B16: (_SOP2Op_S_PACK_HH_B32_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_PACK_HL_B32_B16: (_SOP2Op_S_PACK_HL_B32_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_ADD_F32: (_SOP2Op_S_ADD_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_SUB_F32: (_SOP2Op_S_SUB_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MIN_F32: (_SOP2Op_S_MIN_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MAX_F32: (_SOP2Op_S_MAX_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MUL_F32: (_SOP2Op_S_MUL_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_FMAAK_F32: (_SOP2Op_S_FMAAK_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SIMM32'))), - SOP2Op.S_FMAMK_F32: (_SOP2Op_S_FMAMK_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SIMM32'))), - SOP2Op.S_FMAC_F32: (_SOP2Op_S_FMAC_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_CVT_PK_RTZ_F16_F32: (_SOP2Op_S_CVT_PK_RTZ_F16_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - SOP2Op.S_ADD_F16: (_SOP2Op_S_ADD_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_SUB_F16: (_SOP2Op_S_SUB_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MIN_F16: (_SOP2Op_S_MIN_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MAX_F16: (_SOP2Op_S_MAX_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_MUL_F16: (_SOP2Op_S_MUL_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - SOP2Op.S_FMAC_F16: (_SOP2Op_S_FMAC_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), + SOP2Op.S_ADD_U32: _SOP2Op_S_ADD_U32, + SOP2Op.S_SUB_U32: _SOP2Op_S_SUB_U32, + SOP2Op.S_ADD_I32: _SOP2Op_S_ADD_I32, + SOP2Op.S_SUB_I32: _SOP2Op_S_SUB_I32, + SOP2Op.S_ADDC_U32: _SOP2Op_S_ADDC_U32, + SOP2Op.S_SUBB_U32: _SOP2Op_S_SUBB_U32, + SOP2Op.S_ABSDIFF_I32: _SOP2Op_S_ABSDIFF_I32, + SOP2Op.S_LSHL_B32: _SOP2Op_S_LSHL_B32, + SOP2Op.S_LSHL_B64: _SOP2Op_S_LSHL_B64, + SOP2Op.S_LSHR_B32: _SOP2Op_S_LSHR_B32, + SOP2Op.S_LSHR_B64: _SOP2Op_S_LSHR_B64, + SOP2Op.S_ASHR_I32: _SOP2Op_S_ASHR_I32, + SOP2Op.S_ASHR_I64: _SOP2Op_S_ASHR_I64, + SOP2Op.S_LSHL1_ADD_U32: _SOP2Op_S_LSHL1_ADD_U32, + SOP2Op.S_LSHL2_ADD_U32: _SOP2Op_S_LSHL2_ADD_U32, + SOP2Op.S_LSHL3_ADD_U32: _SOP2Op_S_LSHL3_ADD_U32, + SOP2Op.S_LSHL4_ADD_U32: _SOP2Op_S_LSHL4_ADD_U32, + SOP2Op.S_MIN_I32: _SOP2Op_S_MIN_I32, + SOP2Op.S_MIN_U32: _SOP2Op_S_MIN_U32, + SOP2Op.S_MAX_I32: _SOP2Op_S_MAX_I32, + SOP2Op.S_MAX_U32: _SOP2Op_S_MAX_U32, + SOP2Op.S_AND_B32: _SOP2Op_S_AND_B32, + SOP2Op.S_AND_B64: _SOP2Op_S_AND_B64, + SOP2Op.S_OR_B32: _SOP2Op_S_OR_B32, + SOP2Op.S_OR_B64: _SOP2Op_S_OR_B64, + SOP2Op.S_XOR_B32: _SOP2Op_S_XOR_B32, + SOP2Op.S_XOR_B64: _SOP2Op_S_XOR_B64, + SOP2Op.S_NAND_B32: _SOP2Op_S_NAND_B32, + SOP2Op.S_NAND_B64: _SOP2Op_S_NAND_B64, + SOP2Op.S_NOR_B32: _SOP2Op_S_NOR_B32, + SOP2Op.S_NOR_B64: _SOP2Op_S_NOR_B64, + SOP2Op.S_XNOR_B32: _SOP2Op_S_XNOR_B32, + SOP2Op.S_XNOR_B64: _SOP2Op_S_XNOR_B64, + SOP2Op.S_AND_NOT1_B32: _SOP2Op_S_AND_NOT1_B32, + SOP2Op.S_AND_NOT1_B64: _SOP2Op_S_AND_NOT1_B64, + SOP2Op.S_OR_NOT1_B32: _SOP2Op_S_OR_NOT1_B32, + SOP2Op.S_OR_NOT1_B64: _SOP2Op_S_OR_NOT1_B64, + SOP2Op.S_BFE_U32: _SOP2Op_S_BFE_U32, + SOP2Op.S_BFE_I32: _SOP2Op_S_BFE_I32, + SOP2Op.S_BFE_U64: _SOP2Op_S_BFE_U64, + SOP2Op.S_BFE_I64: _SOP2Op_S_BFE_I64, + SOP2Op.S_BFM_B32: _SOP2Op_S_BFM_B32, + SOP2Op.S_BFM_B64: _SOP2Op_S_BFM_B64, + SOP2Op.S_MUL_I32: _SOP2Op_S_MUL_I32, + SOP2Op.S_MUL_HI_U32: _SOP2Op_S_MUL_HI_U32, + SOP2Op.S_MUL_HI_I32: _SOP2Op_S_MUL_HI_I32, + SOP2Op.S_CSELECT_B32: _SOP2Op_S_CSELECT_B32, + SOP2Op.S_CSELECT_B64: _SOP2Op_S_CSELECT_B64, + SOP2Op.S_PACK_LL_B32_B16: _SOP2Op_S_PACK_LL_B32_B16, + SOP2Op.S_PACK_LH_B32_B16: _SOP2Op_S_PACK_LH_B32_B16, + SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16, + SOP2Op.S_PACK_HL_B32_B16: _SOP2Op_S_PACK_HL_B32_B16, + SOP2Op.S_ADD_F32: _SOP2Op_S_ADD_F32, + SOP2Op.S_SUB_F32: _SOP2Op_S_SUB_F32, + SOP2Op.S_MIN_F32: _SOP2Op_S_MIN_F32, + SOP2Op.S_MAX_F32: _SOP2Op_S_MAX_F32, + SOP2Op.S_MUL_F32: _SOP2Op_S_MUL_F32, + SOP2Op.S_FMAAK_F32: _SOP2Op_S_FMAAK_F32, + SOP2Op.S_FMAMK_F32: _SOP2Op_S_FMAMK_F32, + SOP2Op.S_FMAC_F32: _SOP2Op_S_FMAC_F32, + SOP2Op.S_CVT_PK_RTZ_F16_F32: _SOP2Op_S_CVT_PK_RTZ_F16_F32, + SOP2Op.S_ADD_F16: _SOP2Op_S_ADD_F16, + SOP2Op.S_SUB_F16: _SOP2Op_S_SUB_F16, + SOP2Op.S_MIN_F16: _SOP2Op_S_MIN_F16, + SOP2Op.S_MAX_F16: _SOP2Op_S_MAX_F16, + SOP2Op.S_MUL_F16: _SOP2Op_S_MUL_F16, + SOP2Op.S_FMAC_F16: _SOP2Op_S_FMAC_F16, } -def _SOPCOp_S_CMP_EQ_I32(S0, S1, SCC): +def _SOPCOp_S_CMP_EQ_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.i32 == S1.i32 SCC._val = int(S0.i32 == S1.i32) -def _SOPCOp_S_CMP_LG_I32(S0, S1, SCC): +def _SOPCOp_S_CMP_LG_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.i32 <> S1.i32 SCC._val = int(S0.i32 != S1.i32) -def _SOPCOp_S_CMP_GT_I32(S0, S1, SCC): +def _SOPCOp_S_CMP_GT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.i32 > S1.i32 SCC._val = int(S0.i32 > S1.i32) -def _SOPCOp_S_CMP_GE_I32(S0, S1, SCC): +def _SOPCOp_S_CMP_GE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.i32 >= S1.i32 SCC._val = int(S0.i32 >= S1.i32) -def _SOPCOp_S_CMP_LT_I32(S0, S1, SCC): +def _SOPCOp_S_CMP_LT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.i32 < S1.i32 SCC._val = int(S0.i32 < S1.i32) -def _SOPCOp_S_CMP_LE_I32(S0, S1, SCC): +def _SOPCOp_S_CMP_LE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.i32 <= S1.i32 SCC._val = int(S0.i32 <= S1.i32) -def _SOPCOp_S_CMP_EQ_U32(S0, S1, SCC): +def _SOPCOp_S_CMP_EQ_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 == S1.u32 SCC._val = int(S0.u32 == S1.u32) -def _SOPCOp_S_CMP_LG_U32(S0, S1, SCC): +def _SOPCOp_S_CMP_LG_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 <> S1.u32 SCC._val = int(S0.u32 != S1.u32) -def _SOPCOp_S_CMP_GT_U32(S0, S1, SCC): +def _SOPCOp_S_CMP_GT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 > S1.u32 SCC._val = int(S0.u32 > S1.u32) -def _SOPCOp_S_CMP_GE_U32(S0, S1, SCC): +def _SOPCOp_S_CMP_GE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 >= S1.u32 SCC._val = int(S0.u32 >= S1.u32) -def _SOPCOp_S_CMP_LT_U32(S0, S1, SCC): +def _SOPCOp_S_CMP_LT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 < S1.u32 SCC._val = int(S0.u32 < S1.u32) -def _SOPCOp_S_CMP_LE_U32(S0, S1, SCC): +def _SOPCOp_S_CMP_LE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 <= S1.u32 SCC._val = int(S0.u32 <= S1.u32) -def _SOPCOp_S_BITCMP0_B32(S0, S1, SCC): +def _SOPCOp_S_BITCMP0_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U SCC._val = int(S0.u32[S1.u32[4 : 0]] == 0) -def _SOPCOp_S_BITCMP1_B32(S0, S1, SCC): +def _SOPCOp_S_BITCMP1_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U SCC._val = int(S0.u32[S1.u32[4 : 0]] == 1) -def _SOPCOp_S_BITCMP0_B64(S0, S1, SCC): +def _SOPCOp_S_BITCMP0_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U SCC._val = int(S0.u64[S1.u32[5 : 0]] == 0) -def _SOPCOp_S_BITCMP1_B64(S0, S1, SCC): +def _SOPCOp_S_BITCMP1_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U SCC._val = int(S0.u64[S1.u32[5 : 0]] == 1) -def _SOPCOp_S_CMP_EQ_U64(S0, S1, SCC): +def _SOPCOp_S_CMP_EQ_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u64 == S1.u64 SCC._val = int(S0.u64 == S1.u64) -def _SOPCOp_S_CMP_LG_U64(S0, S1, SCC): +def _SOPCOp_S_CMP_LG_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u64 <> S1.u64 SCC._val = int(S0.u64 != S1.u64) -def _SOPCOp_S_CMP_LT_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_LT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f32 < S1.f32 SCC._val = int(S0.f32 < S1.f32) -def _SOPCOp_S_CMP_LT_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_LT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f16 < S1.f16 SCC._val = int(S0.f16 < S1.f16) -def _SOPCOp_S_CMP_EQ_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_EQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f32 == S1.f32 SCC._val = int(S0.f32 == S1.f32) -def _SOPCOp_S_CMP_EQ_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_EQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f16 == S1.f16 SCC._val = int(S0.f16 == S1.f16) -def _SOPCOp_S_CMP_LE_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_LE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f32 <= S1.f32 SCC._val = int(S0.f32 <= S1.f32) -def _SOPCOp_S_CMP_LE_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_LE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f16 <= S1.f16 SCC._val = int(S0.f16 <= S1.f16) -def _SOPCOp_S_CMP_GT_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_GT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f32 > S1.f32 SCC._val = int(S0.f32 > S1.f32) -def _SOPCOp_S_CMP_GT_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_GT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f16 > S1.f16 SCC._val = int(S0.f16 > S1.f16) -def _SOPCOp_S_CMP_LG_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_LG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f32 <> S1.f32 SCC._val = int(S0.f32 != S1.f32) -def _SOPCOp_S_CMP_LG_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_LG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f16 <> S1.f16 SCC._val = int(S0.f16 != S1.f16) -def _SOPCOp_S_CMP_GE_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_GE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f32 >= S1.f32 SCC._val = int(S0.f32 >= S1.f32) -def _SOPCOp_S_CMP_GE_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_GE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.f16 >= S1.f16 SCC._val = int(S0.f16 >= S1.f16) -def _SOPCOp_S_CMP_O_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_O_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) SCC._val = int(( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))) -def _SOPCOp_S_CMP_O_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_O_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) SCC._val = int(( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))) -def _SOPCOp_S_CMP_U_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_U_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) SCC._val = int((isNAN(F(S0.f32)) or isNAN(F(S1.f32)))) -def _SOPCOp_S_CMP_U_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_U_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) SCC._val = int((isNAN(F(S0.f16)) or isNAN(F(S1.f16)))) -def _SOPCOp_S_CMP_NGE_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_NGE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < SCC._val = int( not (S0.f32 >= S1.f32)) -def _SOPCOp_S_CMP_NGE_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_NGE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < SCC._val = int( not (S0.f16 >= S1.f16)) -def _SOPCOp_S_CMP_NLG_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_NLG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == SCC._val = int( not (S0.f32 != S1.f32)) -def _SOPCOp_S_CMP_NLG_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_NLG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == SCC._val = int( not (S0.f16 != S1.f16)) -def _SOPCOp_S_CMP_NGT_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_NGT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= SCC._val = int( not (S0.f32 > S1.f32)) -def _SOPCOp_S_CMP_NGT_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_NGT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= SCC._val = int( not (S0.f16 > S1.f16)) -def _SOPCOp_S_CMP_NLE_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_NLE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > SCC._val = int( not (S0.f32 <= S1.f32)) -def _SOPCOp_S_CMP_NLE_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_NLE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > SCC._val = int( not (S0.f16 <= S1.f16)) -def _SOPCOp_S_CMP_NEQ_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_NEQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != SCC._val = int( not (S0.f32 == S1.f32)) -def _SOPCOp_S_CMP_NEQ_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_NEQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != SCC._val = int( not (S0.f16 == S1.f16)) -def _SOPCOp_S_CMP_NLT_F32(S0, S1, SCC): +def _SOPCOp_S_CMP_NLT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= SCC._val = int( not (S0.f32 < S1.f32)) -def _SOPCOp_S_CMP_NLT_F16(S0, S1, SCC): +def _SOPCOp_S_CMP_NLT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= SCC._val = int( not (S0.f16 < S1.f16)) SOPCOp_FUNCTIONS = { - SOPCOp.S_CMP_EQ_I32: (_SOPCOp_S_CMP_EQ_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LG_I32: (_SOPCOp_S_CMP_LG_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_GT_I32: (_SOPCOp_S_CMP_GT_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_GE_I32: (_SOPCOp_S_CMP_GE_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LT_I32: (_SOPCOp_S_CMP_LT_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LE_I32: (_SOPCOp_S_CMP_LE_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_EQ_U32: (_SOPCOp_S_CMP_EQ_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LG_U32: (_SOPCOp_S_CMP_LG_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_GT_U32: (_SOPCOp_S_CMP_GT_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_GE_U32: (_SOPCOp_S_CMP_GE_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LT_U32: (_SOPCOp_S_CMP_LT_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LE_U32: (_SOPCOp_S_CMP_LE_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_BITCMP0_B32: (_SOPCOp_S_BITCMP0_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_BITCMP1_B32: (_SOPCOp_S_BITCMP1_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_BITCMP0_B64: (_SOPCOp_S_BITCMP0_B64, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_BITCMP1_B64: (_SOPCOp_S_BITCMP1_B64, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_EQ_U64: (_SOPCOp_S_CMP_EQ_U64, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LG_U64: (_SOPCOp_S_CMP_LG_U64, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LT_F32: (_SOPCOp_S_CMP_LT_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LT_F16: (_SOPCOp_S_CMP_LT_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_EQ_F32: (_SOPCOp_S_CMP_EQ_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_EQ_F16: (_SOPCOp_S_CMP_EQ_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LE_F32: (_SOPCOp_S_CMP_LE_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LE_F16: (_SOPCOp_S_CMP_LE_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_GT_F32: (_SOPCOp_S_CMP_GT_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_GT_F16: (_SOPCOp_S_CMP_GT_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LG_F32: (_SOPCOp_S_CMP_LG_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_LG_F16: (_SOPCOp_S_CMP_LG_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_GE_F32: (_SOPCOp_S_CMP_GE_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_GE_F16: (_SOPCOp_S_CMP_GE_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_O_F32: (_SOPCOp_S_CMP_O_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_O_F16: (_SOPCOp_S_CMP_O_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_U_F32: (_SOPCOp_S_CMP_U_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_U_F16: (_SOPCOp_S_CMP_U_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NGE_F32: (_SOPCOp_S_CMP_NGE_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NGE_F16: (_SOPCOp_S_CMP_NGE_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NLG_F32: (_SOPCOp_S_CMP_NLG_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NLG_F16: (_SOPCOp_S_CMP_NLG_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NGT_F32: (_SOPCOp_S_CMP_NGT_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NGT_F16: (_SOPCOp_S_CMP_NGT_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NLE_F32: (_SOPCOp_S_CMP_NLE_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NLE_F16: (_SOPCOp_S_CMP_NLE_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NEQ_F32: (_SOPCOp_S_CMP_NEQ_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NEQ_F16: (_SOPCOp_S_CMP_NEQ_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NLT_F32: (_SOPCOp_S_CMP_NLT_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), - SOPCOp.S_CMP_NLT_F16: (_SOPCOp_S_CMP_NLT_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'SCC'))), + SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, + SOPCOp.S_CMP_LG_I32: _SOPCOp_S_CMP_LG_I32, + SOPCOp.S_CMP_GT_I32: _SOPCOp_S_CMP_GT_I32, + SOPCOp.S_CMP_GE_I32: _SOPCOp_S_CMP_GE_I32, + SOPCOp.S_CMP_LT_I32: _SOPCOp_S_CMP_LT_I32, + SOPCOp.S_CMP_LE_I32: _SOPCOp_S_CMP_LE_I32, + SOPCOp.S_CMP_EQ_U32: _SOPCOp_S_CMP_EQ_U32, + SOPCOp.S_CMP_LG_U32: _SOPCOp_S_CMP_LG_U32, + SOPCOp.S_CMP_GT_U32: _SOPCOp_S_CMP_GT_U32, + SOPCOp.S_CMP_GE_U32: _SOPCOp_S_CMP_GE_U32, + SOPCOp.S_CMP_LT_U32: _SOPCOp_S_CMP_LT_U32, + SOPCOp.S_CMP_LE_U32: _SOPCOp_S_CMP_LE_U32, + SOPCOp.S_BITCMP0_B32: _SOPCOp_S_BITCMP0_B32, + SOPCOp.S_BITCMP1_B32: _SOPCOp_S_BITCMP1_B32, + SOPCOp.S_BITCMP0_B64: _SOPCOp_S_BITCMP0_B64, + SOPCOp.S_BITCMP1_B64: _SOPCOp_S_BITCMP1_B64, + SOPCOp.S_CMP_EQ_U64: _SOPCOp_S_CMP_EQ_U64, + SOPCOp.S_CMP_LG_U64: _SOPCOp_S_CMP_LG_U64, + SOPCOp.S_CMP_LT_F32: _SOPCOp_S_CMP_LT_F32, + SOPCOp.S_CMP_LT_F16: _SOPCOp_S_CMP_LT_F16, + SOPCOp.S_CMP_EQ_F32: _SOPCOp_S_CMP_EQ_F32, + SOPCOp.S_CMP_EQ_F16: _SOPCOp_S_CMP_EQ_F16, + SOPCOp.S_CMP_LE_F32: _SOPCOp_S_CMP_LE_F32, + SOPCOp.S_CMP_LE_F16: _SOPCOp_S_CMP_LE_F16, + SOPCOp.S_CMP_GT_F32: _SOPCOp_S_CMP_GT_F32, + SOPCOp.S_CMP_GT_F16: _SOPCOp_S_CMP_GT_F16, + SOPCOp.S_CMP_LG_F32: _SOPCOp_S_CMP_LG_F32, + SOPCOp.S_CMP_LG_F16: _SOPCOp_S_CMP_LG_F16, + SOPCOp.S_CMP_GE_F32: _SOPCOp_S_CMP_GE_F32, + SOPCOp.S_CMP_GE_F16: _SOPCOp_S_CMP_GE_F16, + SOPCOp.S_CMP_O_F32: _SOPCOp_S_CMP_O_F32, + SOPCOp.S_CMP_O_F16: _SOPCOp_S_CMP_O_F16, + SOPCOp.S_CMP_U_F32: _SOPCOp_S_CMP_U_F32, + SOPCOp.S_CMP_U_F16: _SOPCOp_S_CMP_U_F16, + SOPCOp.S_CMP_NGE_F32: _SOPCOp_S_CMP_NGE_F32, + SOPCOp.S_CMP_NGE_F16: _SOPCOp_S_CMP_NGE_F16, + SOPCOp.S_CMP_NLG_F32: _SOPCOp_S_CMP_NLG_F32, + SOPCOp.S_CMP_NLG_F16: _SOPCOp_S_CMP_NLG_F16, + SOPCOp.S_CMP_NGT_F32: _SOPCOp_S_CMP_NGT_F32, + SOPCOp.S_CMP_NGT_F16: _SOPCOp_S_CMP_NGT_F16, + SOPCOp.S_CMP_NLE_F32: _SOPCOp_S_CMP_NLE_F32, + SOPCOp.S_CMP_NLE_F16: _SOPCOp_S_CMP_NLE_F16, + SOPCOp.S_CMP_NEQ_F32: _SOPCOp_S_CMP_NEQ_F32, + SOPCOp.S_CMP_NEQ_F16: _SOPCOp_S_CMP_NEQ_F16, + SOPCOp.S_CMP_NLT_F32: _SOPCOp_S_CMP_NLT_F32, + SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(D0, SIMM16): +def _SOPKOp_S_MOVK_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(signext(SIMM16.i16)) - D0.i32 = (signext(SIMM16.i16)) + D0.i32 = (signext(S2.i16)) -def _SOPKOp_S_VERSION(): +def _SOPKOp_S_VERSION(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Do nothing - for use by tools only pass -def _SOPKOp_S_CMOVK_I32(D0, SCC, SIMM16): +def _SOPKOp_S_CMOVK_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if SCC then # D0.i32 = 32'I(signext(SIMM16.i16)) # endif if SCC: - D0.i32 = (signext(SIMM16.i16)) + D0.i32 = (signext(S2.i16)) -def _SOPKOp_S_CMPK_EQ_I32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_EQ_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = 64'I(S0.i32) == signext(SIMM16.i16) - SCC._val = int((S0.i32) == signext(SIMM16.i16)) + SCC._val = int((S0.i32) == signext(S2.i16)) -def _SOPKOp_S_CMPK_LG_I32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_LG_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = 64'I(S0.i32) != signext(SIMM16.i16) - SCC._val = int((S0.i32) != signext(SIMM16.i16)) + SCC._val = int((S0.i32) != signext(S2.i16)) -def _SOPKOp_S_CMPK_GT_I32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_GT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = 64'I(S0.i32) > signext(SIMM16.i16) - SCC._val = int((S0.i32) > signext(SIMM16.i16)) + SCC._val = int((S0.i32) > signext(S2.i16)) -def _SOPKOp_S_CMPK_GE_I32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_GE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = 64'I(S0.i32) >= signext(SIMM16.i16) - SCC._val = int((S0.i32) >= signext(SIMM16.i16)) + SCC._val = int((S0.i32) >= signext(S2.i16)) -def _SOPKOp_S_CMPK_LT_I32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_LT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = 64'I(S0.i32) < signext(SIMM16.i16) - SCC._val = int((S0.i32) < signext(SIMM16.i16)) + SCC._val = int((S0.i32) < signext(S2.i16)) -def _SOPKOp_S_CMPK_LE_I32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_LE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = 64'I(S0.i32) <= signext(SIMM16.i16) - SCC._val = int((S0.i32) <= signext(SIMM16.i16)) + SCC._val = int((S0.i32) <= signext(S2.i16)) -def _SOPKOp_S_CMPK_EQ_U32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_EQ_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 == 32'U(SIMM16.u16) - SCC._val = int(S0.u32 == (SIMM16.u16)) + SCC._val = int(S0.u32 == (S2.u16)) -def _SOPKOp_S_CMPK_LG_U32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_LG_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 != 32'U(SIMM16.u16) - SCC._val = int(S0.u32 != (SIMM16.u16)) + SCC._val = int(S0.u32 != (S2.u16)) -def _SOPKOp_S_CMPK_GT_U32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_GT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 > 32'U(SIMM16.u16) - SCC._val = int(S0.u32 > (SIMM16.u16)) + SCC._val = int(S0.u32 > (S2.u16)) -def _SOPKOp_S_CMPK_GE_U32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_GE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 >= 32'U(SIMM16.u16) - SCC._val = int(S0.u32 >= (SIMM16.u16)) + SCC._val = int(S0.u32 >= (S2.u16)) -def _SOPKOp_S_CMPK_LT_U32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_LT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 < 32'U(SIMM16.u16) - SCC._val = int(S0.u32 < (SIMM16.u16)) + SCC._val = int(S0.u32 < (S2.u16)) -def _SOPKOp_S_CMPK_LE_U32(S0, SCC, SIMM16): +def _SOPKOp_S_CMPK_LE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # SCC = S0.u32 <= 32'U(SIMM16.u16) - SCC._val = int(S0.u32 <= (SIMM16.u16)) + SCC._val = int(S0.u32 <= (S2.u16)) -def _SOPKOp_S_ADDK_I32(D0, SCC, tmp, SIMM16): +def _SOPKOp_S_ADDK_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = D0.i32; # D0.i32 = 32'I(64'I(D0.i32) + signext(SIMM16.i16)); # SCC = ((tmp[31] == SIMM16.i16[15]) && (tmp[31] != D0.i32[31])); tmp._val = int(D0.i32) - D0.i32 = ((D0.i32) + signext(SIMM16.i16)) - SCC._val = int(((tmp[31] == SIMM16.i16[15]) and (tmp[31] != D0.i32[31]))) + D0.i32 = ((D0.i32) + signext(S2.i16)) + SCC._val = int(((tmp[31] == S2.i16[15]) and (tmp[31] != D0.i32[31]))) -def _SOPKOp_S_MULK_I32(D0, SIMM16): +def _SOPKOp_S_MULK_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(64'I(D0.i32) * signext(SIMM16.i16)) - D0.i32 = ((D0.i32) * signext(SIMM16.i16)) + D0.i32 = ((D0.i32) * signext(S2.i16)) SOPKOp_FUNCTIONS = { - SOPKOp.S_MOVK_I32: (_SOPKOp_S_MOVK_I32, (False, False, False, False, False, False, False, False, ('D0', 'SIMM16'))), - SOPKOp.S_VERSION: (_SOPKOp_S_VERSION, (False, False, False, False, False, False, False, False, ())), - SOPKOp.S_CMOVK_I32: (_SOPKOp_S_CMOVK_I32, (False, False, False, False, False, False, False, False, ('D0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_EQ_I32: (_SOPKOp_S_CMPK_EQ_I32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_LG_I32: (_SOPKOp_S_CMPK_LG_I32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_GT_I32: (_SOPKOp_S_CMPK_GT_I32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_GE_I32: (_SOPKOp_S_CMPK_GE_I32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_LT_I32: (_SOPKOp_S_CMPK_LT_I32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_LE_I32: (_SOPKOp_S_CMPK_LE_I32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_EQ_U32: (_SOPKOp_S_CMPK_EQ_U32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_LG_U32: (_SOPKOp_S_CMPK_LG_U32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_GT_U32: (_SOPKOp_S_CMPK_GT_U32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_GE_U32: (_SOPKOp_S_CMPK_GE_U32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_LT_U32: (_SOPKOp_S_CMPK_LT_U32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_CMPK_LE_U32: (_SOPKOp_S_CMPK_LE_U32, (False, False, False, False, False, False, False, False, ('S0', 'SCC', 'SIMM16'))), - SOPKOp.S_ADDK_I32: (_SOPKOp_S_ADDK_I32, (False, False, False, False, False, False, False, False, ('D0', 'SCC', 'tmp', 'SIMM16'))), - SOPKOp.S_MULK_I32: (_SOPKOp_S_MULK_I32, (False, False, False, False, False, False, False, False, ('D0', 'SIMM16'))), + SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, + SOPKOp.S_VERSION: _SOPKOp_S_VERSION, + SOPKOp.S_CMOVK_I32: _SOPKOp_S_CMOVK_I32, + SOPKOp.S_CMPK_EQ_I32: _SOPKOp_S_CMPK_EQ_I32, + SOPKOp.S_CMPK_LG_I32: _SOPKOp_S_CMPK_LG_I32, + SOPKOp.S_CMPK_GT_I32: _SOPKOp_S_CMPK_GT_I32, + SOPKOp.S_CMPK_GE_I32: _SOPKOp_S_CMPK_GE_I32, + SOPKOp.S_CMPK_LT_I32: _SOPKOp_S_CMPK_LT_I32, + SOPKOp.S_CMPK_LE_I32: _SOPKOp_S_CMPK_LE_I32, + SOPKOp.S_CMPK_EQ_U32: _SOPKOp_S_CMPK_EQ_U32, + SOPKOp.S_CMPK_LG_U32: _SOPKOp_S_CMPK_LG_U32, + SOPKOp.S_CMPK_GT_U32: _SOPKOp_S_CMPK_GT_U32, + SOPKOp.S_CMPK_GE_U32: _SOPKOp_S_CMPK_GE_U32, + SOPKOp.S_CMPK_LT_U32: _SOPKOp_S_CMPK_LT_U32, + SOPKOp.S_CMPK_LE_U32: _SOPKOp_S_CMPK_LE_U32, + SOPKOp.S_ADDK_I32: _SOPKOp_S_ADDK_I32, + SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, } -def _SOPPOp_S_NOP(SIMM16): +def _SOPPOp_S_NOP(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # for i in 0U : SIMM16.u16[3 : 0].u32 do # endfor - for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): + for i in range(0, int(S2.u16[3 : 0].u32)+1): pass -def _SOPPOp_S_DELAY_ALU(EXEC): +def _SOPPOp_S_DELAY_ALU(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence # // 1 cycle delay here # // 2 cycles delay here pass -def _SOPPOp_S_TRAP(): +def _SOPPOp_S_TRAP(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // PC passed into trap handler points to S_TRAP itself, # // trap base address pass SOPPOp_FUNCTIONS = { - SOPPOp.S_NOP: (_SOPPOp_S_NOP, (False, False, False, False, False, False, False, False, ('SIMM16',))), - SOPPOp.S_DELAY_ALU: (_SOPPOp_S_DELAY_ALU, (False, False, False, False, False, False, False, True, ('EXEC',))), - SOPPOp.S_TRAP: (_SOPPOp_S_TRAP, (False, False, False, False, False, False, False, False, ())), + SOPPOp.S_NOP: _SOPPOp_S_NOP, + SOPPOp.S_DELAY_ALU: _SOPPOp_S_DELAY_ALU, + SOPPOp.S_TRAP: _SOPPOp_S_TRAP, } -def _VOP1Op_V_MOV_B32(S0, D0): +def _VOP1Op_V_MOV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.b32 = S0.b32 D0.b32 = S0.b32 -def _VOP1Op_V_READFIRSTLANE_B32(D0, EXEC, SRC0, VGPR): +def _VOP1Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -1797,83 +1797,83 @@ def _VOP1Op_V_READFIRSTLANE_B32(D0, EXEC, SRC0, VGPR): lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] -def _VOP1Op_V_CVT_I32_F64(S0, D0): +def _VOP1Op_V_CVT_I32_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f64_to_i32(S0.f64) D0.i32 = f64_to_i32(S0.f64) -def _VOP1Op_V_CVT_F64_I32(S0, D0): +def _VOP1Op_V_CVT_F64_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = i32_to_f64(S0.i32) D0.f64 = i32_to_f64(S0.i32) -def _VOP1Op_V_CVT_F32_I32(S0, D0): +def _VOP1Op_V_CVT_F32_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = i32_to_f32(S0.i32) D0.f32 = i32_to_f32(S0.i32) -def _VOP1Op_V_CVT_F32_U32(S0, D0): +def _VOP1Op_V_CVT_F32_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0.u32) D0.f32 = u32_to_f32(S0.u32) -def _VOP1Op_V_CVT_U32_F32(S0, D0): +def _VOP1Op_V_CVT_U32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = f32_to_u32(S0.f32) D0.u32 = f32_to_u32(S0.f32) -def _VOP1Op_V_CVT_I32_F32(S0, D0): +def _VOP1Op_V_CVT_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f32_to_i32(S0.f32) D0.i32 = f32_to_i32(S0.f32) -def _VOP1Op_V_CVT_F16_F32(S0, D0): +def _VOP1Op_V_CVT_F16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = f32_to_f16(S0.f32) D0.f16 = f32_to_f16(S0.f32) -def _VOP1Op_V_CVT_F32_F16(S0, D0): +def _VOP1Op_V_CVT_F32_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = f16_to_f32(S0.f16) D0.f32 = f16_to_f32(S0.f16) -def _VOP1Op_V_CVT_NEAREST_I32_F32(S0, D0): +def _VOP1Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) -def _VOP1Op_V_CVT_FLOOR_I32_F32(S0, D0): +def _VOP1Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f32_to_i32(floor(S0.f32)) D0.i32 = f32_to_i32(floor(S0.f32)) -def _VOP1Op_V_CVT_F32_F64(S0, D0): +def _VOP1Op_V_CVT_F32_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = f64_to_f32(S0.f64) D0.f32 = f64_to_f32(S0.f64) -def _VOP1Op_V_CVT_F64_F32(S0, D0): +def _VOP1Op_V_CVT_F64_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = f32_to_f64(S0.f32) D0.f64 = f32_to_f64(S0.f32) -def _VOP1Op_V_CVT_F32_UBYTE0(S0, D0): +def _VOP1Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0[7 : 0].u32) D0.f32 = u32_to_f32(S0[7 : 0].u32) -def _VOP1Op_V_CVT_F32_UBYTE1(S0, D0): +def _VOP1Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0[15 : 8].u32) D0.f32 = u32_to_f32(S0[15 : 8].u32) -def _VOP1Op_V_CVT_F32_UBYTE2(S0, D0): +def _VOP1Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0[23 : 16].u32) D0.f32 = u32_to_f32(S0[23 : 16].u32) -def _VOP1Op_V_CVT_F32_UBYTE3(S0, D0): +def _VOP1Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0[31 : 24].u32) D0.f32 = u32_to_f32(S0[31 : 24].u32) -def _VOP1Op_V_CVT_U32_F64(S0, D0): +def _VOP1Op_V_CVT_U32_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = f64_to_u32(S0.f64) D0.u32 = f64_to_u32(S0.f64) -def _VOP1Op_V_CVT_F64_U32(S0, D0): +def _VOP1Op_V_CVT_F64_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = u32_to_f64(S0.u32) D0.f64 = u32_to_f64(S0.u32) -def _VOP1Op_V_TRUNC_F64(S0, D0): +def _VOP1Op_V_TRUNC_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = trunc(S0.f64) D0.f64 = trunc(S0.f64) -def _VOP1Op_V_CEIL_F64(S0, D0): +def _VOP1Op_V_CEIL_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -1882,7 +1882,7 @@ def _VOP1Op_V_CEIL_F64(S0, D0): if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 -def _VOP1Op_V_RNDNE_F64(S0, D0): +def _VOP1Op_V_RNDNE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -1891,7 +1891,7 @@ def _VOP1Op_V_RNDNE_F64(S0, D0): if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 -def _VOP1Op_V_FLOOR_F64(S0, D0): +def _VOP1Op_V_FLOOR_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -1900,19 +1900,19 @@ def _VOP1Op_V_FLOOR_F64(S0, D0): if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 -def _VOP1Op_V_MOV_B16(S0, D0): +def _VOP1Op_V_MOV_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.b16 = S0.b16 D0.b16 = S0.b16 -def _VOP1Op_V_FRACT_F32(S0, D0): +def _VOP1Op_V_FRACT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 + -floor(S0.f32) D0.f32 = S0.f32 + -floor(S0.f32) -def _VOP1Op_V_TRUNC_F32(S0, D0): +def _VOP1Op_V_TRUNC_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32) D0.f32 = trunc(S0.f32) -def _VOP1Op_V_CEIL_F32(S0, D0): +def _VOP1Op_V_CEIL_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -1921,7 +1921,7 @@ def _VOP1Op_V_CEIL_F32(S0, D0): if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 -def _VOP1Op_V_RNDNE_F32(S0, D0): +def _VOP1Op_V_RNDNE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -1930,7 +1930,7 @@ def _VOP1Op_V_RNDNE_F32(S0, D0): if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 -def _VOP1Op_V_FLOOR_F32(S0, D0): +def _VOP1Op_V_FLOOR_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -1939,60 +1939,60 @@ def _VOP1Op_V_FLOOR_F32(S0, D0): if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 -def _VOP1Op_V_EXP_F32(S0, D0): +def _VOP1Op_V_EXP_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = pow(2.0F, S0.f32) D0.f32 = pow(2.0, S0.f32) -def _VOP1Op_V_LOG_F32(S0, D0): +def _VOP1Op_V_LOG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = log2(S0.f32) D0.f32 = log2(S0.f32) -def _VOP1Op_V_RCP_F32(S0, D0): +def _VOP1Op_V_RCP_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = 1.0F / S0.f32 D0.f32 = 1.0 / S0.f32 -def _VOP1Op_V_RCP_IFLAG_F32(S0, D0): +def _VOP1Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception D0.f32 = 1.0 / S0.f32 -def _VOP1Op_V_RSQ_F32(S0, D0): +def _VOP1Op_V_RSQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = 1.0F / sqrt(S0.f32) D0.f32 = 1.0 / sqrt(S0.f32) -def _VOP1Op_V_RCP_F64(S0, D0): +def _VOP1Op_V_RCP_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = 1.0 / S0.f64 D0.f64 = 1.0 / S0.f64 -def _VOP1Op_V_RSQ_F64(S0, D0): +def _VOP1Op_V_RSQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = 1.0 / sqrt(S0.f64) D0.f64 = 1.0 / sqrt(S0.f64) -def _VOP1Op_V_SQRT_F32(S0, D0): +def _VOP1Op_V_SQRT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = sqrt(S0.f32) D0.f32 = sqrt(S0.f32) -def _VOP1Op_V_SQRT_F64(S0, D0): +def _VOP1Op_V_SQRT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = sqrt(S0.f64) D0.f64 = sqrt(S0.f64) -def _VOP1Op_V_SIN_F32(S0, D0): +def _VOP1Op_V_SIN_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) D0.f32 = sin(S0.f32 * F(PI * 2.0)) -def _VOP1Op_V_COS_F32(S0, D0): +def _VOP1Op_V_COS_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) D0.f32 = cos(S0.f32 * F(PI * 2.0)) -def _VOP1Op_V_NOT_B32(S0, D0): +def _VOP1Op_V_NOT_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ~S0.u32 D0.u32 = ~S0.u32 -def _VOP1Op_V_BFREV_B32(S0, D0): +def _VOP1Op_V_BFREV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32[31 : 0] = S0.u32[0 : 31] D0.u32[31 : 0] = S0.u32[0 : 31] -def _VOP1Op_V_CLZ_I32_U32(S0, D0): +def _VOP1Op_V_CLZ_I32_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -2006,7 +2006,7 @@ def _VOP1Op_V_CLZ_I32_U32(S0, D0): if S0.u32[31 - i] == 1: D0.i32 = i; break -def _VOP1Op_V_CTZ_I32_B32(S0, D0): +def _VOP1Op_V_CTZ_I32_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -2020,7 +2020,7 @@ def _VOP1Op_V_CTZ_I32_B32(S0, D0): if S0.u32[i] == 1: D0.i32 = i; break -def _VOP1Op_V_CLS_I32(S0, D0): +def _VOP1Op_V_CLS_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -2034,7 +2034,7 @@ def _VOP1Op_V_CLS_I32(S0, D0): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i -def _VOP1Op_V_FREXP_EXP_I32_F64(S0, D0): +def _VOP1Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -2045,7 +2045,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F64(S0, D0): else: D0.i32 = exponent(S0.f64) - 1023 + 1 -def _VOP1Op_V_FREXP_MANT_F64(S0, D0): +def _VOP1Op_V_FREXP_MANT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -2056,11 +2056,11 @@ def _VOP1Op_V_FREXP_MANT_F64(S0, D0): else: D0.f64 = mantissa(S0.f64) -def _VOP1Op_V_FRACT_F64(S0, D0): +def _VOP1Op_V_FRACT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = S0.f64 + -floor(S0.f64) D0.f64 = S0.f64 + -floor(S0.f64) -def _VOP1Op_V_FREXP_EXP_I32_F32(S0, D0): +def _VOP1Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -2071,7 +2071,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F32(S0, D0): else: D0.i32 = exponent(S0.f32) - 127 + 1 -def _VOP1Op_V_FREXP_MANT_F32(S0, D0): +def _VOP1Op_V_FREXP_MANT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -2082,50 +2082,50 @@ def _VOP1Op_V_FREXP_MANT_F32(S0, D0): else: D0.f32 = mantissa(S0.f32) -def _VOP1Op_V_MOVRELS_B32(D0, laneId, SRC0, VGPR): +def _VOP1Op_V_MOVRELS_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 -def _VOP1Op_V_CVT_F16_U16(S0, D0): +def _VOP1Op_V_CVT_F16_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = u16_to_f16(S0.u16) D0.f16 = u16_to_f16(S0.u16) -def _VOP1Op_V_CVT_F16_I16(S0, D0): +def _VOP1Op_V_CVT_F16_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = i16_to_f16(S0.i16) D0.f16 = i16_to_f16(S0.i16) -def _VOP1Op_V_CVT_U16_F16(S0, D0): +def _VOP1Op_V_CVT_U16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = f16_to_u16(S0.f16) D0.u16 = f16_to_u16(S0.f16) -def _VOP1Op_V_CVT_I16_F16(S0, D0): +def _VOP1Op_V_CVT_I16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = f16_to_i16(S0.f16) D0.i16 = f16_to_i16(S0.f16) -def _VOP1Op_V_RCP_F16(S0, D0): +def _VOP1Op_V_RCP_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = 16'1.0 / S0.f16 D0.f16 = 1.0 / S0.f16 -def _VOP1Op_V_SQRT_F16(S0, D0): +def _VOP1Op_V_SQRT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = sqrt(S0.f16) D0.f16 = sqrt(S0.f16) -def _VOP1Op_V_RSQ_F16(S0, D0): +def _VOP1Op_V_RSQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = 16'1.0 / sqrt(S0.f16) D0.f16 = 1.0 / sqrt(S0.f16) -def _VOP1Op_V_LOG_F16(S0, D0): +def _VOP1Op_V_LOG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = log2(S0.f16) D0.f16 = log2(S0.f16) -def _VOP1Op_V_EXP_F16(S0, D0): +def _VOP1Op_V_EXP_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = pow(16'2.0, S0.f16) D0.f16 = pow(2.0, S0.f16) -def _VOP1Op_V_FREXP_MANT_F16(S0, D0): +def _VOP1Op_V_FREXP_MANT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -2136,7 +2136,7 @@ def _VOP1Op_V_FREXP_MANT_F16(S0, D0): else: D0.f16 = mantissa(S0.f16) -def _VOP1Op_V_FREXP_EXP_I16_F16(S0, D0): +def _VOP1Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -2147,7 +2147,7 @@ def _VOP1Op_V_FREXP_EXP_I16_F16(S0, D0): else: D0.i16 = (exponent(S0.f16) - 15 + 1) -def _VOP1Op_V_FLOOR_F16(S0, D0): +def _VOP1Op_V_FLOOR_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -2156,7 +2156,7 @@ def _VOP1Op_V_FLOOR_F16(S0, D0): if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 -def _VOP1Op_V_CEIL_F16(S0, D0): +def _VOP1Op_V_CEIL_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -2165,11 +2165,11 @@ def _VOP1Op_V_CEIL_F16(S0, D0): if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 -def _VOP1Op_V_TRUNC_F16(S0, D0): +def _VOP1Op_V_TRUNC_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16) D0.f16 = trunc(S0.f16) -def _VOP1Op_V_RNDNE_F16(S0, D0): +def _VOP1Op_V_RNDNE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -2178,27 +2178,27 @@ def _VOP1Op_V_RNDNE_F16(S0, D0): if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 -def _VOP1Op_V_FRACT_F16(S0, D0): +def _VOP1Op_V_FRACT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 + -floor(S0.f16) D0.f16 = S0.f16 + -floor(S0.f16) -def _VOP1Op_V_SIN_F16(S0, D0): +def _VOP1Op_V_SIN_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) D0.f16 = sin(S0.f16 * F(PI * 2.0)) -def _VOP1Op_V_COS_F16(S0, D0): +def _VOP1Op_V_COS_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) D0.f16 = cos(S0.f16 * F(PI * 2.0)) -def _VOP1Op_V_CVT_NORM_I16_F16(S0, D0): +def _VOP1Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = f16_to_snorm(S0.f16) D0.i16 = f16_to_snorm(S0.f16) -def _VOP1Op_V_CVT_NORM_U16_F16(S0, D0): +def _VOP1Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = f16_to_unorm(S0.f16) D0.u16 = f16_to_unorm(S0.f16) -def _VOP1Op_V_SWAP_B32(S0, D0, tmp): +def _VOP1Op_V_SWAP_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = D0.b32; # D0.b32 = S0.b32; # S0.b32 = tmp @@ -2206,7 +2206,7 @@ def _VOP1Op_V_SWAP_B32(S0, D0, tmp): D0.b32 = S0.b32 S0.b32 = tmp -def _VOP1Op_V_SWAP_B16(S0, D0, tmp): +def _VOP1Op_V_SWAP_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = D0.b16; # D0.b16 = S0.b16; # S0.b16 = tmp @@ -2214,103 +2214,103 @@ def _VOP1Op_V_SWAP_B16(S0, D0, tmp): D0.b16 = S0.b16 S0.b16 = tmp -def _VOP1Op_V_NOT_B16(S0, D0): +def _VOP1Op_V_NOT_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = ~S0.u16 D0.u16 = ~S0.u16 -def _VOP1Op_V_CVT_I32_I16(S0, D0): +def _VOP1Op_V_CVT_I32_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(signext(S0.i16)) D0.i32 = (signext(S0.i16)) -def _VOP1Op_V_CVT_U32_U16(S0, D0): +def _VOP1Op_V_CVT_U32_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0 = { 16'0, S0.u16 } D0._val = int(_pack(0, S0.u16)) VOP1Op_FUNCTIONS = { - VOP1Op.V_MOV_B32: (_VOP1Op_V_MOV_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_READFIRSTLANE_B32: (_VOP1Op_V_READFIRSTLANE_B32, (False, False, False, False, False, False, False, True, ('D0', 'EXEC', 'SRC0', 'VGPR'))), - VOP1Op.V_CVT_I32_F64: (_VOP1Op_V_CVT_I32_F64, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F64_I32: (_VOP1Op_V_CVT_F64_I32, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F32_I32: (_VOP1Op_V_CVT_F32_I32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F32_U32: (_VOP1Op_V_CVT_F32_U32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_U32_F32: (_VOP1Op_V_CVT_U32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_I32_F32: (_VOP1Op_V_CVT_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F16_F32: (_VOP1Op_V_CVT_F16_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F32_F16: (_VOP1Op_V_CVT_F32_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_NEAREST_I32_F32: (_VOP1Op_V_CVT_NEAREST_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_FLOOR_I32_F32: (_VOP1Op_V_CVT_FLOOR_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F32_F64: (_VOP1Op_V_CVT_F32_F64, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F64_F32: (_VOP1Op_V_CVT_F64_F32, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F32_UBYTE0: (_VOP1Op_V_CVT_F32_UBYTE0, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F32_UBYTE1: (_VOP1Op_V_CVT_F32_UBYTE1, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F32_UBYTE2: (_VOP1Op_V_CVT_F32_UBYTE2, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F32_UBYTE3: (_VOP1Op_V_CVT_F32_UBYTE3, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_U32_F64: (_VOP1Op_V_CVT_U32_F64, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F64_U32: (_VOP1Op_V_CVT_F64_U32, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_TRUNC_F64: (_VOP1Op_V_TRUNC_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CEIL_F64: (_VOP1Op_V_CEIL_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RNDNE_F64: (_VOP1Op_V_RNDNE_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FLOOR_F64: (_VOP1Op_V_FLOOR_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_MOV_B16: (_VOP1Op_V_MOV_B16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FRACT_F32: (_VOP1Op_V_FRACT_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_TRUNC_F32: (_VOP1Op_V_TRUNC_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CEIL_F32: (_VOP1Op_V_CEIL_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RNDNE_F32: (_VOP1Op_V_RNDNE_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FLOOR_F32: (_VOP1Op_V_FLOOR_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_EXP_F32: (_VOP1Op_V_EXP_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_LOG_F32: (_VOP1Op_V_LOG_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RCP_F32: (_VOP1Op_V_RCP_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RCP_IFLAG_F32: (_VOP1Op_V_RCP_IFLAG_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RSQ_F32: (_VOP1Op_V_RSQ_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RCP_F64: (_VOP1Op_V_RCP_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RSQ_F64: (_VOP1Op_V_RSQ_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_SQRT_F32: (_VOP1Op_V_SQRT_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_SQRT_F64: (_VOP1Op_V_SQRT_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_SIN_F32: (_VOP1Op_V_SIN_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_COS_F32: (_VOP1Op_V_COS_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_NOT_B32: (_VOP1Op_V_NOT_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_BFREV_B32: (_VOP1Op_V_BFREV_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CLZ_I32_U32: (_VOP1Op_V_CLZ_I32_U32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CTZ_I32_B32: (_VOP1Op_V_CTZ_I32_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CLS_I32: (_VOP1Op_V_CLS_I32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FREXP_EXP_I32_F64: (_VOP1Op_V_FREXP_EXP_I32_F64, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FREXP_MANT_F64: (_VOP1Op_V_FREXP_MANT_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FRACT_F64: (_VOP1Op_V_FRACT_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FREXP_EXP_I32_F32: (_VOP1Op_V_FREXP_EXP_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FREXP_MANT_F32: (_VOP1Op_V_FREXP_MANT_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_MOVRELS_B32: (_VOP1Op_V_MOVRELS_B32, (False, False, False, False, False, False, False, False, ('D0', 'laneId', 'SRC0', 'VGPR'))), - VOP1Op.V_CVT_F16_U16: (_VOP1Op_V_CVT_F16_U16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_F16_I16: (_VOP1Op_V_CVT_F16_I16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_U16_F16: (_VOP1Op_V_CVT_U16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_I16_F16: (_VOP1Op_V_CVT_I16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RCP_F16: (_VOP1Op_V_RCP_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_SQRT_F16: (_VOP1Op_V_SQRT_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RSQ_F16: (_VOP1Op_V_RSQ_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_LOG_F16: (_VOP1Op_V_LOG_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_EXP_F16: (_VOP1Op_V_EXP_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FREXP_MANT_F16: (_VOP1Op_V_FREXP_MANT_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FREXP_EXP_I16_F16: (_VOP1Op_V_FREXP_EXP_I16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FLOOR_F16: (_VOP1Op_V_FLOOR_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CEIL_F16: (_VOP1Op_V_CEIL_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_TRUNC_F16: (_VOP1Op_V_TRUNC_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_RNDNE_F16: (_VOP1Op_V_RNDNE_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_FRACT_F16: (_VOP1Op_V_FRACT_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_SIN_F16: (_VOP1Op_V_SIN_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_COS_F16: (_VOP1Op_V_COS_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_NORM_I16_F16: (_VOP1Op_V_CVT_NORM_I16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_NORM_U16_F16: (_VOP1Op_V_CVT_NORM_U16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_SWAP_B32: (_VOP1Op_V_SWAP_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - VOP1Op.V_SWAP_B16: (_VOP1Op_V_SWAP_B16, (False, False, False, False, False, False, False, False, ('S0', 'D0', 'tmp'))), - VOP1Op.V_NOT_B16: (_VOP1Op_V_NOT_B16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_I32_I16: (_VOP1Op_V_CVT_I32_I16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP1Op.V_CVT_U32_U16: (_VOP1Op_V_CVT_U32_U16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), + VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, + VOP1Op.V_READFIRSTLANE_B32: _VOP1Op_V_READFIRSTLANE_B32, + VOP1Op.V_CVT_I32_F64: _VOP1Op_V_CVT_I32_F64, + VOP1Op.V_CVT_F64_I32: _VOP1Op_V_CVT_F64_I32, + VOP1Op.V_CVT_F32_I32: _VOP1Op_V_CVT_F32_I32, + VOP1Op.V_CVT_F32_U32: _VOP1Op_V_CVT_F32_U32, + VOP1Op.V_CVT_U32_F32: _VOP1Op_V_CVT_U32_F32, + VOP1Op.V_CVT_I32_F32: _VOP1Op_V_CVT_I32_F32, + VOP1Op.V_CVT_F16_F32: _VOP1Op_V_CVT_F16_F32, + VOP1Op.V_CVT_F32_F16: _VOP1Op_V_CVT_F32_F16, + VOP1Op.V_CVT_NEAREST_I32_F32: _VOP1Op_V_CVT_NEAREST_I32_F32, + VOP1Op.V_CVT_FLOOR_I32_F32: _VOP1Op_V_CVT_FLOOR_I32_F32, + VOP1Op.V_CVT_F32_F64: _VOP1Op_V_CVT_F32_F64, + VOP1Op.V_CVT_F64_F32: _VOP1Op_V_CVT_F64_F32, + VOP1Op.V_CVT_F32_UBYTE0: _VOP1Op_V_CVT_F32_UBYTE0, + VOP1Op.V_CVT_F32_UBYTE1: _VOP1Op_V_CVT_F32_UBYTE1, + VOP1Op.V_CVT_F32_UBYTE2: _VOP1Op_V_CVT_F32_UBYTE2, + VOP1Op.V_CVT_F32_UBYTE3: _VOP1Op_V_CVT_F32_UBYTE3, + VOP1Op.V_CVT_U32_F64: _VOP1Op_V_CVT_U32_F64, + VOP1Op.V_CVT_F64_U32: _VOP1Op_V_CVT_F64_U32, + VOP1Op.V_TRUNC_F64: _VOP1Op_V_TRUNC_F64, + VOP1Op.V_CEIL_F64: _VOP1Op_V_CEIL_F64, + VOP1Op.V_RNDNE_F64: _VOP1Op_V_RNDNE_F64, + VOP1Op.V_FLOOR_F64: _VOP1Op_V_FLOOR_F64, + VOP1Op.V_MOV_B16: _VOP1Op_V_MOV_B16, + VOP1Op.V_FRACT_F32: _VOP1Op_V_FRACT_F32, + VOP1Op.V_TRUNC_F32: _VOP1Op_V_TRUNC_F32, + VOP1Op.V_CEIL_F32: _VOP1Op_V_CEIL_F32, + VOP1Op.V_RNDNE_F32: _VOP1Op_V_RNDNE_F32, + VOP1Op.V_FLOOR_F32: _VOP1Op_V_FLOOR_F32, + VOP1Op.V_EXP_F32: _VOP1Op_V_EXP_F32, + VOP1Op.V_LOG_F32: _VOP1Op_V_LOG_F32, + VOP1Op.V_RCP_F32: _VOP1Op_V_RCP_F32, + VOP1Op.V_RCP_IFLAG_F32: _VOP1Op_V_RCP_IFLAG_F32, + VOP1Op.V_RSQ_F32: _VOP1Op_V_RSQ_F32, + VOP1Op.V_RCP_F64: _VOP1Op_V_RCP_F64, + VOP1Op.V_RSQ_F64: _VOP1Op_V_RSQ_F64, + VOP1Op.V_SQRT_F32: _VOP1Op_V_SQRT_F32, + VOP1Op.V_SQRT_F64: _VOP1Op_V_SQRT_F64, + VOP1Op.V_SIN_F32: _VOP1Op_V_SIN_F32, + VOP1Op.V_COS_F32: _VOP1Op_V_COS_F32, + VOP1Op.V_NOT_B32: _VOP1Op_V_NOT_B32, + VOP1Op.V_BFREV_B32: _VOP1Op_V_BFREV_B32, + VOP1Op.V_CLZ_I32_U32: _VOP1Op_V_CLZ_I32_U32, + VOP1Op.V_CTZ_I32_B32: _VOP1Op_V_CTZ_I32_B32, + VOP1Op.V_CLS_I32: _VOP1Op_V_CLS_I32, + VOP1Op.V_FREXP_EXP_I32_F64: _VOP1Op_V_FREXP_EXP_I32_F64, + VOP1Op.V_FREXP_MANT_F64: _VOP1Op_V_FREXP_MANT_F64, + VOP1Op.V_FRACT_F64: _VOP1Op_V_FRACT_F64, + VOP1Op.V_FREXP_EXP_I32_F32: _VOP1Op_V_FREXP_EXP_I32_F32, + VOP1Op.V_FREXP_MANT_F32: _VOP1Op_V_FREXP_MANT_F32, + VOP1Op.V_MOVRELS_B32: _VOP1Op_V_MOVRELS_B32, + VOP1Op.V_CVT_F16_U16: _VOP1Op_V_CVT_F16_U16, + VOP1Op.V_CVT_F16_I16: _VOP1Op_V_CVT_F16_I16, + VOP1Op.V_CVT_U16_F16: _VOP1Op_V_CVT_U16_F16, + VOP1Op.V_CVT_I16_F16: _VOP1Op_V_CVT_I16_F16, + VOP1Op.V_RCP_F16: _VOP1Op_V_RCP_F16, + VOP1Op.V_SQRT_F16: _VOP1Op_V_SQRT_F16, + VOP1Op.V_RSQ_F16: _VOP1Op_V_RSQ_F16, + VOP1Op.V_LOG_F16: _VOP1Op_V_LOG_F16, + VOP1Op.V_EXP_F16: _VOP1Op_V_EXP_F16, + VOP1Op.V_FREXP_MANT_F16: _VOP1Op_V_FREXP_MANT_F16, + VOP1Op.V_FREXP_EXP_I16_F16: _VOP1Op_V_FREXP_EXP_I16_F16, + VOP1Op.V_FLOOR_F16: _VOP1Op_V_FLOOR_F16, + VOP1Op.V_CEIL_F16: _VOP1Op_V_CEIL_F16, + VOP1Op.V_TRUNC_F16: _VOP1Op_V_TRUNC_F16, + VOP1Op.V_RNDNE_F16: _VOP1Op_V_RNDNE_F16, + VOP1Op.V_FRACT_F16: _VOP1Op_V_FRACT_F16, + VOP1Op.V_SIN_F16: _VOP1Op_V_SIN_F16, + VOP1Op.V_COS_F16: _VOP1Op_V_COS_F16, + VOP1Op.V_CVT_NORM_I16_F16: _VOP1Op_V_CVT_NORM_I16_F16, + VOP1Op.V_CVT_NORM_U16_F16: _VOP1Op_V_CVT_NORM_U16_F16, + VOP1Op.V_SWAP_B32: _VOP1Op_V_SWAP_B32, + VOP1Op.V_SWAP_B16: _VOP1Op_V_SWAP_B16, + VOP1Op.V_NOT_B16: _VOP1Op_V_NOT_B16, + VOP1Op.V_CVT_I32_I16: _VOP1Op_V_CVT_I32_I16, + VOP1Op.V_CVT_U32_U16: _VOP1Op_V_CVT_U32_U16, } -def _VOP2Op_V_CNDMASK_B32(S0, S1, D0, VCC, laneId): +def _VOP2Op_V_CNDMASK_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) -def _VOP2Op_V_DOT2ACC_F32_F16(S0, S1, D0, tmp): +def _VOP2Op_V_DOT2ACC_F32_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = D0.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -2320,19 +2320,19 @@ def _VOP2Op_V_DOT2ACC_F32_F16(S0, S1, D0, tmp): tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp -def _VOP2Op_V_ADD_F32(S0, S1, D0): +def _VOP2Op_V_ADD_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 + S1.f32 D0.f32 = S0.f32 + S1.f32 -def _VOP2Op_V_SUB_F32(S0, S1, D0): +def _VOP2Op_V_SUB_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 - S1.f32 D0.f32 = S0.f32 - S1.f32 -def _VOP2Op_V_SUBREV_F32(S0, S1, D0): +def _VOP2Op_V_SUBREV_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S1.f32 - S0.f32 D0.f32 = S1.f32 - S0.f32 -def _VOP2Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0): +def _VOP2Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -2344,7 +2344,7 @@ def _VOP2Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0): else: D0.f32 = fma(S0.f32, S1.f32, D0.f32) -def _VOP2Op_V_MUL_DX9_ZERO_F32(S0, S1, D0): +def _VOP2Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -2356,27 +2356,27 @@ def _VOP2Op_V_MUL_DX9_ZERO_F32(S0, S1, D0): else: D0.f32 = S0.f32 * S1.f32 -def _VOP2Op_V_MUL_F32(S0, S1, D0): +def _VOP2Op_V_MUL_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 * S1.f32 D0.f32 = S0.f32 * S1.f32 -def _VOP2Op_V_MUL_I32_I24(S0, S1, D0): +def _VOP2Op_V_MUL_I32_I24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) D0.i32 = (S0.i24) * (S1.i24) -def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, D0): +def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) D0.i32 = (((S0.i24) * (S1.i24)) >> 32) -def _VOP2Op_V_MUL_U32_U24(S0, S1, D0): +def _VOP2Op_V_MUL_U32_U24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) D0.u32 = (S0.u24) * (S1.u24) -def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, D0): +def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) D0.u32 = (((S0.u24) * (S1.u24)) >> 32) -def _VOP2Op_V_MIN_F32(S0, S1, D0): +def _VOP2Op_V_MIN_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -2429,7 +2429,7 @@ def _VOP2Op_V_MIN_F32(S0, S1, D0): else: D0.f32 = S1.f32 -def _VOP2Op_V_MAX_F32(S0, S1, D0): +def _VOP2Op_V_MAX_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -2482,51 +2482,51 @@ def _VOP2Op_V_MAX_F32(S0, S1, D0): else: D0.f32 = S1.f32 -def _VOP2Op_V_MIN_I32(S0, S1, D0): +def _VOP2Op_V_MIN_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) -def _VOP2Op_V_MAX_I32(S0, S1, D0): +def _VOP2Op_V_MAX_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) -def _VOP2Op_V_MIN_U32(S0, S1, D0): +def _VOP2Op_V_MIN_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) -def _VOP2Op_V_MAX_U32(S0, S1, D0): +def _VOP2Op_V_MAX_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) -def _VOP2Op_V_LSHLREV_B32(S0, S1, D0): +def _VOP2Op_V_LSHLREV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S1.u32 << S0[4 : 0].u32) D0.u32 = (S1.u32 << S0[4 : 0].u32) -def _VOP2Op_V_LSHRREV_B32(S0, S1, D0): +def _VOP2Op_V_LSHRREV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) D0.u32 = (S1.u32 >> S0[4 : 0].u32) -def _VOP2Op_V_ASHRREV_I32(S0, S1, D0): +def _VOP2Op_V_ASHRREV_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) D0.i32 = (S1.i32 >> S0[4 : 0].u32) -def _VOP2Op_V_AND_B32(S0, S1, D0): +def _VOP2Op_V_AND_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 & S1.u32) D0.u32 = (S0.u32 & S1.u32) -def _VOP2Op_V_OR_B32(S0, S1, D0): +def _VOP2Op_V_OR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 | S1.u32) D0.u32 = (S0.u32 | S1.u32) -def _VOP2Op_V_XOR_B32(S0, S1, D0): +def _VOP2Op_V_XOR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 ^ S1.u32) D0.u32 = (S0.u32 ^ S1.u32) -def _VOP2Op_V_XNOR_B32(S0, S1, D0): +def _VOP2Op_V_XNOR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ~(S0.u32 ^ S1.u32) D0.u32 = ~(S0.u32 ^ S1.u32) -def _VOP2Op_V_ADD_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP2Op_V_ADD_CO_CI_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -2535,7 +2535,7 @@ def _VOP2Op_V_ADD_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 -def _VOP2Op_V_SUB_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP2Op_V_SUB_CO_CI_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -2544,7 +2544,7 @@ def _VOP2Op_V_SUB_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 -def _VOP2Op_V_SUBREV_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP2Op_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -2553,31 +2553,31 @@ def _VOP2Op_V_SUBREV_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 -def _VOP2Op_V_ADD_NC_U32(S0, S1, D0): +def _VOP2Op_V_ADD_NC_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 + S1.u32 D0.u32 = S0.u32 + S1.u32 -def _VOP2Op_V_SUB_NC_U32(S0, S1, D0): +def _VOP2Op_V_SUB_NC_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 - S1.u32 D0.u32 = S0.u32 - S1.u32 -def _VOP2Op_V_SUBREV_NC_U32(S0, S1, D0): +def _VOP2Op_V_SUBREV_NC_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S1.u32 - S0.u32 D0.u32 = S1.u32 - S0.u32 -def _VOP2Op_V_FMAC_F32(S0, S1, D0): +def _VOP2Op_V_FMAC_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) D0.f32 = fma(S0.f32, S1.f32, D0.f32) -def _VOP2Op_V_FMAMK_F32(S0, S1, D0, SIMM32): +def _VOP2Op_V_FMAMK_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) + D0.f32 = fma(S0.f32, S2.f32, S1.f32) -def _VOP2Op_V_FMAAK_F32(S0, S1, D0, SIMM32): +def _VOP2Op_V_FMAAK_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) + D0.f32 = fma(S0.f32, S1.f32, S2.f32) -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(S0, S1, tmp): +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -2585,35 +2585,35 @@ def _VOP2Op_V_CVT_PK_RTZ_F16_F32(S0, S1, tmp): tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) -def _VOP2Op_V_ADD_F16(S0, S1, D0): +def _VOP2Op_V_ADD_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 + S1.f16 D0.f16 = S0.f16 + S1.f16 -def _VOP2Op_V_SUB_F16(S0, S1, D0): +def _VOP2Op_V_SUB_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 - S1.f16 D0.f16 = S0.f16 - S1.f16 -def _VOP2Op_V_SUBREV_F16(S0, S1, D0): +def _VOP2Op_V_SUBREV_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S1.f16 - S0.f16 D0.f16 = S1.f16 - S0.f16 -def _VOP2Op_V_MUL_F16(S0, S1, D0): +def _VOP2Op_V_MUL_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 * S1.f16 D0.f16 = S0.f16 * S1.f16 -def _VOP2Op_V_FMAC_F16(S0, S1, D0): +def _VOP2Op_V_FMAC_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) D0.f16 = fma(S0.f16, S1.f16, D0.f16) -def _VOP2Op_V_FMAMK_F16(S0, S1, D0, SIMM32): +def _VOP2Op_V_FMAMK_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) + D0.f16 = fma(S0.f16, S2.f16, S1.f16) -def _VOP2Op_V_FMAAK_F16(S0, S1, D0, SIMM32): +def _VOP2Op_V_FMAAK_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) + D0.f16 = fma(S0.f16, S1.f16, S2.f16) -def _VOP2Op_V_MAX_F16(S0, S1, D0): +def _VOP2Op_V_MAX_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -2666,7 +2666,7 @@ def _VOP2Op_V_MAX_F16(S0, S1, D0): else: D0.f16 = S1.f16 -def _VOP2Op_V_MIN_F16(S0, S1, D0): +def _VOP2Op_V_MIN_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -2719,606 +2719,606 @@ def _VOP2Op_V_MIN_F16(S0, S1, D0): else: D0.f16 = S1.f16 -def _VOP2Op_V_LDEXP_F16(S0, S1, D0): +def _VOP2Op_V_LDEXP_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) -def _VOP2Op_V_PK_FMAC_F16(S0, S1, D0): +def _VOP2Op_V_PK_FMAC_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) VOP2Op_FUNCTIONS = { - VOP2Op.V_CNDMASK_B32: (_VOP2Op_V_CNDMASK_B32, (False, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP2Op.V_DOT2ACC_F32_F16: (_VOP2Op_V_DOT2ACC_F32_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP2Op.V_ADD_F32: (_VOP2Op_V_ADD_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_SUB_F32: (_VOP2Op_V_SUB_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_SUBREV_F32: (_VOP2Op_V_SUBREV_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_FMAC_DX9_ZERO_F32: (_VOP2Op_V_FMAC_DX9_ZERO_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP2Op.V_MUL_DX9_ZERO_F32: (_VOP2Op_V_MUL_DX9_ZERO_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MUL_F32: (_VOP2Op_V_MUL_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MUL_I32_I24: (_VOP2Op_V_MUL_I32_I24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MUL_HI_I32_I24: (_VOP2Op_V_MUL_HI_I32_I24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MUL_U32_U24: (_VOP2Op_V_MUL_U32_U24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MUL_HI_U32_U24: (_VOP2Op_V_MUL_HI_U32_U24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MIN_F32: (_VOP2Op_V_MIN_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MAX_F32: (_VOP2Op_V_MAX_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MIN_I32: (_VOP2Op_V_MIN_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MAX_I32: (_VOP2Op_V_MAX_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MIN_U32: (_VOP2Op_V_MIN_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MAX_U32: (_VOP2Op_V_MAX_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_LSHLREV_B32: (_VOP2Op_V_LSHLREV_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_LSHRREV_B32: (_VOP2Op_V_LSHRREV_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_ASHRREV_I32: (_VOP2Op_V_ASHRREV_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_AND_B32: (_VOP2Op_V_AND_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_OR_B32: (_VOP2Op_V_OR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_XOR_B32: (_VOP2Op_V_XOR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_XNOR_B32: (_VOP2Op_V_XNOR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_ADD_CO_CI_U32: (_VOP2Op_V_ADD_CO_CI_U32, (False, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), - VOP2Op.V_SUB_CO_CI_U32: (_VOP2Op_V_SUB_CO_CI_U32, (False, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), - VOP2Op.V_SUBREV_CO_CI_U32: (_VOP2Op_V_SUBREV_CO_CI_U32, (False, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), - VOP2Op.V_ADD_NC_U32: (_VOP2Op_V_ADD_NC_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_SUB_NC_U32: (_VOP2Op_V_SUB_NC_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_SUBREV_NC_U32: (_VOP2Op_V_SUBREV_NC_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_FMAC_F32: (_VOP2Op_V_FMAC_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_FMAMK_F32: (_VOP2Op_V_FMAMK_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SIMM32'))), - VOP2Op.V_FMAAK_F32: (_VOP2Op_V_FMAAK_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SIMM32'))), - VOP2Op.V_CVT_PK_RTZ_F16_F32: (_VOP2Op_V_CVT_PK_RTZ_F16_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP2Op.V_ADD_F16: (_VOP2Op_V_ADD_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_SUB_F16: (_VOP2Op_V_SUB_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_SUBREV_F16: (_VOP2Op_V_SUBREV_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MUL_F16: (_VOP2Op_V_MUL_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_FMAC_F16: (_VOP2Op_V_FMAC_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_FMAMK_F16: (_VOP2Op_V_FMAMK_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SIMM32'))), - VOP2Op.V_FMAAK_F16: (_VOP2Op_V_FMAAK_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'SIMM32'))), - VOP2Op.V_MAX_F16: (_VOP2Op_V_MAX_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_MIN_F16: (_VOP2Op_V_MIN_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_LDEXP_F16: (_VOP2Op_V_LDEXP_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP2Op.V_PK_FMAC_F16: (_VOP2Op_V_PK_FMAC_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), + VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, + VOP2Op.V_DOT2ACC_F32_F16: _VOP2Op_V_DOT2ACC_F32_F16, + VOP2Op.V_ADD_F32: _VOP2Op_V_ADD_F32, + VOP2Op.V_SUB_F32: _VOP2Op_V_SUB_F32, + VOP2Op.V_SUBREV_F32: _VOP2Op_V_SUBREV_F32, + VOP2Op.V_FMAC_DX9_ZERO_F32: _VOP2Op_V_FMAC_DX9_ZERO_F32, + VOP2Op.V_MUL_DX9_ZERO_F32: _VOP2Op_V_MUL_DX9_ZERO_F32, + VOP2Op.V_MUL_F32: _VOP2Op_V_MUL_F32, + VOP2Op.V_MUL_I32_I24: _VOP2Op_V_MUL_I32_I24, + VOP2Op.V_MUL_HI_I32_I24: _VOP2Op_V_MUL_HI_I32_I24, + VOP2Op.V_MUL_U32_U24: _VOP2Op_V_MUL_U32_U24, + VOP2Op.V_MUL_HI_U32_U24: _VOP2Op_V_MUL_HI_U32_U24, + VOP2Op.V_MIN_F32: _VOP2Op_V_MIN_F32, + VOP2Op.V_MAX_F32: _VOP2Op_V_MAX_F32, + VOP2Op.V_MIN_I32: _VOP2Op_V_MIN_I32, + VOP2Op.V_MAX_I32: _VOP2Op_V_MAX_I32, + VOP2Op.V_MIN_U32: _VOP2Op_V_MIN_U32, + VOP2Op.V_MAX_U32: _VOP2Op_V_MAX_U32, + VOP2Op.V_LSHLREV_B32: _VOP2Op_V_LSHLREV_B32, + VOP2Op.V_LSHRREV_B32: _VOP2Op_V_LSHRREV_B32, + VOP2Op.V_ASHRREV_I32: _VOP2Op_V_ASHRREV_I32, + VOP2Op.V_AND_B32: _VOP2Op_V_AND_B32, + VOP2Op.V_OR_B32: _VOP2Op_V_OR_B32, + VOP2Op.V_XOR_B32: _VOP2Op_V_XOR_B32, + VOP2Op.V_XNOR_B32: _VOP2Op_V_XNOR_B32, + VOP2Op.V_ADD_CO_CI_U32: _VOP2Op_V_ADD_CO_CI_U32, + VOP2Op.V_SUB_CO_CI_U32: _VOP2Op_V_SUB_CO_CI_U32, + VOP2Op.V_SUBREV_CO_CI_U32: _VOP2Op_V_SUBREV_CO_CI_U32, + VOP2Op.V_ADD_NC_U32: _VOP2Op_V_ADD_NC_U32, + VOP2Op.V_SUB_NC_U32: _VOP2Op_V_SUB_NC_U32, + VOP2Op.V_SUBREV_NC_U32: _VOP2Op_V_SUBREV_NC_U32, + VOP2Op.V_FMAC_F32: _VOP2Op_V_FMAC_F32, + VOP2Op.V_FMAMK_F32: _VOP2Op_V_FMAMK_F32, + VOP2Op.V_FMAAK_F32: _VOP2Op_V_FMAAK_F32, + VOP2Op.V_CVT_PK_RTZ_F16_F32: _VOP2Op_V_CVT_PK_RTZ_F16_F32, + VOP2Op.V_ADD_F16: _VOP2Op_V_ADD_F16, + VOP2Op.V_SUB_F16: _VOP2Op_V_SUB_F16, + VOP2Op.V_SUBREV_F16: _VOP2Op_V_SUBREV_F16, + VOP2Op.V_MUL_F16: _VOP2Op_V_MUL_F16, + VOP2Op.V_FMAC_F16: _VOP2Op_V_FMAC_F16, + VOP2Op.V_FMAMK_F16: _VOP2Op_V_FMAMK_F16, + VOP2Op.V_FMAAK_F16: _VOP2Op_V_FMAAK_F16, + VOP2Op.V_MAX_F16: _VOP2Op_V_MAX_F16, + VOP2Op.V_MIN_F16: _VOP2Op_V_MIN_F16, + VOP2Op.V_LDEXP_F16: _VOP2Op_V_LDEXP_F16, + VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_F_F16(D0, VCC, laneId): +def _VOP3Op_V_CMP_F_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOP3Op_V_CMP_LT_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 < S1.f16 -def _VOP3Op_V_CMP_EQ_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 == S1.f16 -def _VOP3Op_V_CMP_LE_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 <= S1.f16 -def _VOP3Op_V_CMP_GT_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 > S1.f16 -def _VOP3Op_V_CMP_LG_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 != S1.f16 -def _VOP3Op_V_CMP_GE_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 >= S1.f16 -def _VOP3Op_V_CMP_O_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_O_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) -def _VOP3Op_V_CMP_U_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_U_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) -def _VOP3Op_V_CMP_NGE_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NGE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 >= S1.f16) -def _VOP3Op_V_CMP_NLG_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 != S1.f16) -def _VOP3Op_V_CMP_NGT_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NGT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 > S1.f16) -def _VOP3Op_V_CMP_NLE_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 <= S1.f16) -def _VOP3Op_V_CMP_NEQ_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NEQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 == S1.f16) -def _VOP3Op_V_CMP_NLT_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 < S1.f16) -def _VOP3Op_V_CMP_T_F16(D0, VCC, laneId): +def _VOP3Op_V_CMP_T_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOP3Op_V_CMP_F_F32(D0, VCC, laneId): +def _VOP3Op_V_CMP_F_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOP3Op_V_CMP_LT_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 < S1.f32 -def _VOP3Op_V_CMP_EQ_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 == S1.f32 -def _VOP3Op_V_CMP_LE_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 <= S1.f32 -def _VOP3Op_V_CMP_GT_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 > S1.f32 -def _VOP3Op_V_CMP_LG_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 != S1.f32 -def _VOP3Op_V_CMP_GE_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 >= S1.f32 -def _VOP3Op_V_CMP_O_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_O_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) -def _VOP3Op_V_CMP_U_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_U_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) -def _VOP3Op_V_CMP_NGE_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NGE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 >= S1.f32) -def _VOP3Op_V_CMP_NLG_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 != S1.f32) -def _VOP3Op_V_CMP_NGT_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NGT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 > S1.f32) -def _VOP3Op_V_CMP_NLE_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 <= S1.f32) -def _VOP3Op_V_CMP_NEQ_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NEQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 == S1.f32) -def _VOP3Op_V_CMP_NLT_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 < S1.f32) -def _VOP3Op_V_CMP_T_F32(D0, VCC, laneId): +def _VOP3Op_V_CMP_T_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOP3Op_V_CMP_F_F64(D0, VCC, laneId): +def _VOP3Op_V_CMP_F_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOP3Op_V_CMP_LT_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 < S1.f64 -def _VOP3Op_V_CMP_EQ_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 == S1.f64 -def _VOP3Op_V_CMP_LE_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 <= S1.f64 -def _VOP3Op_V_CMP_GT_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 > S1.f64 -def _VOP3Op_V_CMP_LG_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LG_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 != S1.f64 -def _VOP3Op_V_CMP_GE_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 >= S1.f64 -def _VOP3Op_V_CMP_O_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_O_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) -def _VOP3Op_V_CMP_U_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_U_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) -def _VOP3Op_V_CMP_NGE_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NGE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 >= S1.f64) -def _VOP3Op_V_CMP_NLG_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLG_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 != S1.f64) -def _VOP3Op_V_CMP_NGT_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NGT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 > S1.f64) -def _VOP3Op_V_CMP_NLE_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 <= S1.f64) -def _VOP3Op_V_CMP_NEQ_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NEQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 == S1.f64) -def _VOP3Op_V_CMP_NLT_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NLT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 < S1.f64) -def _VOP3Op_V_CMP_T_F64(D0, VCC, laneId): +def _VOP3Op_V_CMP_T_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOP3Op_V_CMP_LT_I16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 < S1.i16 -def _VOP3Op_V_CMP_EQ_I16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 == S1.i16 -def _VOP3Op_V_CMP_LE_I16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 <= S1.i16 -def _VOP3Op_V_CMP_GT_I16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 > S1.i16 -def _VOP3Op_V_CMP_NE_I16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 != S1.i16 -def _VOP3Op_V_CMP_GE_I16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 >= S1.i16 -def _VOP3Op_V_CMP_LT_U16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 < S1.u16 -def _VOP3Op_V_CMP_EQ_U16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 == S1.u16 -def _VOP3Op_V_CMP_LE_U16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 <= S1.u16 -def _VOP3Op_V_CMP_GT_U16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 > S1.u16 -def _VOP3Op_V_CMP_NE_U16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 != S1.u16 -def _VOP3Op_V_CMP_GE_U16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 >= S1.u16 -def _VOP3Op_V_CMP_F_I32(D0, VCC, laneId): +def _VOP3Op_V_CMP_F_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOP3Op_V_CMP_LT_I32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 < S1.i32 -def _VOP3Op_V_CMP_EQ_I32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 == S1.i32 -def _VOP3Op_V_CMP_LE_I32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 <= S1.i32 -def _VOP3Op_V_CMP_GT_I32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 > S1.i32 -def _VOP3Op_V_CMP_NE_I32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 != S1.i32 -def _VOP3Op_V_CMP_GE_I32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 >= S1.i32 -def _VOP3Op_V_CMP_T_I32(D0, VCC, laneId): +def _VOP3Op_V_CMP_T_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOP3Op_V_CMP_F_U32(D0, VCC, laneId): +def _VOP3Op_V_CMP_F_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOP3Op_V_CMP_LT_U32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 < S1.u32 -def _VOP3Op_V_CMP_EQ_U32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 == S1.u32 -def _VOP3Op_V_CMP_LE_U32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 <= S1.u32 -def _VOP3Op_V_CMP_GT_U32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 > S1.u32 -def _VOP3Op_V_CMP_NE_U32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 != S1.u32 -def _VOP3Op_V_CMP_GE_U32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 >= S1.u32 -def _VOP3Op_V_CMP_T_U32(D0, VCC, laneId): +def _VOP3Op_V_CMP_T_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOP3Op_V_CMP_F_I64(D0, VCC, laneId): +def _VOP3Op_V_CMP_F_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOP3Op_V_CMP_LT_I64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 < S1.i64 -def _VOP3Op_V_CMP_EQ_I64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 == S1.i64 -def _VOP3Op_V_CMP_LE_I64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 <= S1.i64 -def _VOP3Op_V_CMP_GT_I64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 > S1.i64 -def _VOP3Op_V_CMP_NE_I64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 != S1.i64 -def _VOP3Op_V_CMP_GE_I64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 >= S1.i64 -def _VOP3Op_V_CMP_T_I64(D0, VCC, laneId): +def _VOP3Op_V_CMP_T_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOP3Op_V_CMP_F_U64(D0, VCC, laneId): +def _VOP3Op_V_CMP_F_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOP3Op_V_CMP_LT_U64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LT_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 < S1.u64 -def _VOP3Op_V_CMP_EQ_U64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_EQ_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 == S1.u64 -def _VOP3Op_V_CMP_LE_U64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_LE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 <= S1.u64 -def _VOP3Op_V_CMP_GT_U64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GT_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 > S1.u64 -def _VOP3Op_V_CMP_NE_U64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_NE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 != S1.u64 -def _VOP3Op_V_CMP_GE_U64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_GE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 >= S1.u64 -def _VOP3Op_V_CMP_T_U64(D0, VCC, laneId): +def _VOP3Op_V_CMP_T_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOP3Op_V_CMP_CLASS_F16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_CLASS_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -3364,7 +3364,7 @@ def _VOP3Op_V_CMP_CLASS_F16(S0, S1, D0, VCC, laneId): result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result -def _VOP3Op_V_CMP_CLASS_F32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_CLASS_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -3410,7 +3410,7 @@ def _VOP3Op_V_CMP_CLASS_F32(S0, S1, D0, VCC, laneId): result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result -def _VOP3Op_V_CMP_CLASS_F64(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CMP_CLASS_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -3456,402 +3456,402 @@ def _VOP3Op_V_CMP_CLASS_F64(S0, S1, D0, VCC, laneId): result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result -def _VOP3Op_V_CMPX_F_F16(EXEC, laneId): +def _VOP3Op_V_CMPX_F_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOP3Op_V_CMPX_LT_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 < S1.f16 EXEC.u64[laneId] = S0.f16 < S1.f16 -def _VOP3Op_V_CMPX_EQ_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 EXEC.u64[laneId] = S0.f16 == S1.f16 -def _VOP3Op_V_CMPX_LE_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 <= S1.f16 EXEC.u64[laneId] = S0.f16 <= S1.f16 -def _VOP3Op_V_CMPX_GT_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 > S1.f16 EXEC.u64[laneId] = S0.f16 > S1.f16 -def _VOP3Op_V_CMPX_LG_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 <> S1.f16 EXEC.u64[laneId] = S0.f16 != S1.f16 -def _VOP3Op_V_CMPX_GE_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 >= S1.f16 EXEC.u64[laneId] = S0.f16 >= S1.f16 -def _VOP3Op_V_CMPX_O_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_O_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) -def _VOP3Op_V_CMPX_U_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_U_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) -def _VOP3Op_V_CMPX_NGE_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NGE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < EXEC.u64[laneId] = not (S0.f16 >= S1.f16) -def _VOP3Op_V_CMPX_NLG_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == EXEC.u64[laneId] = not (S0.f16 != S1.f16) -def _VOP3Op_V_CMPX_NGT_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NGT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= EXEC.u64[laneId] = not (S0.f16 > S1.f16) -def _VOP3Op_V_CMPX_NLE_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > EXEC.u64[laneId] = not (S0.f16 <= S1.f16) -def _VOP3Op_V_CMPX_NEQ_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NEQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != EXEC.u64[laneId] = not (S0.f16 == S1.f16) -def _VOP3Op_V_CMPX_NLT_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= EXEC.u64[laneId] = not (S0.f16 < S1.f16) -def _VOP3Op_V_CMPX_T_F16(EXEC, laneId): +def _VOP3Op_V_CMPX_T_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOP3Op_V_CMPX_F_F32(EXEC, laneId): +def _VOP3Op_V_CMPX_F_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOP3Op_V_CMPX_LT_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 < S1.f32 EXEC.u64[laneId] = S0.f32 < S1.f32 -def _VOP3Op_V_CMPX_EQ_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 EXEC.u64[laneId] = S0.f32 == S1.f32 -def _VOP3Op_V_CMPX_LE_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 <= S1.f32 EXEC.u64[laneId] = S0.f32 <= S1.f32 -def _VOP3Op_V_CMPX_GT_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 > S1.f32 EXEC.u64[laneId] = S0.f32 > S1.f32 -def _VOP3Op_V_CMPX_LG_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 <> S1.f32 EXEC.u64[laneId] = S0.f32 != S1.f32 -def _VOP3Op_V_CMPX_GE_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 >= S1.f32 EXEC.u64[laneId] = S0.f32 >= S1.f32 -def _VOP3Op_V_CMPX_O_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_O_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) -def _VOP3Op_V_CMPX_U_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_U_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) -def _VOP3Op_V_CMPX_NGE_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NGE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < EXEC.u64[laneId] = not (S0.f32 >= S1.f32) -def _VOP3Op_V_CMPX_NLG_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == EXEC.u64[laneId] = not (S0.f32 != S1.f32) -def _VOP3Op_V_CMPX_NGT_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NGT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= EXEC.u64[laneId] = not (S0.f32 > S1.f32) -def _VOP3Op_V_CMPX_NLE_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > EXEC.u64[laneId] = not (S0.f32 <= S1.f32) -def _VOP3Op_V_CMPX_NEQ_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NEQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != EXEC.u64[laneId] = not (S0.f32 == S1.f32) -def _VOP3Op_V_CMPX_NLT_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= EXEC.u64[laneId] = not (S0.f32 < S1.f32) -def _VOP3Op_V_CMPX_T_F32(EXEC, laneId): +def _VOP3Op_V_CMPX_T_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOP3Op_V_CMPX_F_F64(EXEC, laneId): +def _VOP3Op_V_CMPX_F_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOP3Op_V_CMPX_LT_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 < S1.f64 EXEC.u64[laneId] = S0.f64 < S1.f64 -def _VOP3Op_V_CMPX_EQ_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 EXEC.u64[laneId] = S0.f64 == S1.f64 -def _VOP3Op_V_CMPX_LE_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 <= S1.f64 EXEC.u64[laneId] = S0.f64 <= S1.f64 -def _VOP3Op_V_CMPX_GT_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 > S1.f64 EXEC.u64[laneId] = S0.f64 > S1.f64 -def _VOP3Op_V_CMPX_LG_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LG_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 <> S1.f64 EXEC.u64[laneId] = S0.f64 != S1.f64 -def _VOP3Op_V_CMPX_GE_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 >= S1.f64 EXEC.u64[laneId] = S0.f64 >= S1.f64 -def _VOP3Op_V_CMPX_O_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_O_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) -def _VOP3Op_V_CMPX_U_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_U_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) -def _VOP3Op_V_CMPX_NGE_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NGE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < EXEC.u64[laneId] = not (S0.f64 >= S1.f64) -def _VOP3Op_V_CMPX_NLG_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLG_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == EXEC.u64[laneId] = not (S0.f64 != S1.f64) -def _VOP3Op_V_CMPX_NGT_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NGT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= EXEC.u64[laneId] = not (S0.f64 > S1.f64) -def _VOP3Op_V_CMPX_NLE_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > EXEC.u64[laneId] = not (S0.f64 <= S1.f64) -def _VOP3Op_V_CMPX_NEQ_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NEQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != EXEC.u64[laneId] = not (S0.f64 == S1.f64) -def _VOP3Op_V_CMPX_NLT_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NLT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= EXEC.u64[laneId] = not (S0.f64 < S1.f64) -def _VOP3Op_V_CMPX_T_F64(EXEC, laneId): +def _VOP3Op_V_CMPX_T_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOP3Op_V_CMPX_LT_I16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 < S1.i16 EXEC.u64[laneId] = S0.i16 < S1.i16 -def _VOP3Op_V_CMPX_EQ_I16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 EXEC.u64[laneId] = S0.i16 == S1.i16 -def _VOP3Op_V_CMPX_LE_I16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 <= S1.i16 EXEC.u64[laneId] = S0.i16 <= S1.i16 -def _VOP3Op_V_CMPX_GT_I16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 > S1.i16 EXEC.u64[laneId] = S0.i16 > S1.i16 -def _VOP3Op_V_CMPX_NE_I16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 <> S1.i16 EXEC.u64[laneId] = S0.i16 != S1.i16 -def _VOP3Op_V_CMPX_GE_I16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 >= S1.i16 EXEC.u64[laneId] = S0.i16 >= S1.i16 -def _VOP3Op_V_CMPX_LT_U16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 < S1.u16 EXEC.u64[laneId] = S0.u16 < S1.u16 -def _VOP3Op_V_CMPX_EQ_U16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 EXEC.u64[laneId] = S0.u16 == S1.u16 -def _VOP3Op_V_CMPX_LE_U16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 <= S1.u16 EXEC.u64[laneId] = S0.u16 <= S1.u16 -def _VOP3Op_V_CMPX_GT_U16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 > S1.u16 EXEC.u64[laneId] = S0.u16 > S1.u16 -def _VOP3Op_V_CMPX_NE_U16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 <> S1.u16 EXEC.u64[laneId] = S0.u16 != S1.u16 -def _VOP3Op_V_CMPX_GE_U16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 >= S1.u16 EXEC.u64[laneId] = S0.u16 >= S1.u16 -def _VOP3Op_V_CMPX_F_I32(EXEC, laneId): +def _VOP3Op_V_CMPX_F_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOP3Op_V_CMPX_LT_I32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 < S1.i32 EXEC.u64[laneId] = S0.i32 < S1.i32 -def _VOP3Op_V_CMPX_EQ_I32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 EXEC.u64[laneId] = S0.i32 == S1.i32 -def _VOP3Op_V_CMPX_LE_I32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 <= S1.i32 EXEC.u64[laneId] = S0.i32 <= S1.i32 -def _VOP3Op_V_CMPX_GT_I32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 > S1.i32 EXEC.u64[laneId] = S0.i32 > S1.i32 -def _VOP3Op_V_CMPX_NE_I32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 <> S1.i32 EXEC.u64[laneId] = S0.i32 != S1.i32 -def _VOP3Op_V_CMPX_GE_I32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 >= S1.i32 EXEC.u64[laneId] = S0.i32 >= S1.i32 -def _VOP3Op_V_CMPX_T_I32(EXEC, laneId): +def _VOP3Op_V_CMPX_T_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOP3Op_V_CMPX_F_U32(EXEC, laneId): +def _VOP3Op_V_CMPX_F_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOP3Op_V_CMPX_LT_U32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 < S1.u32 EXEC.u64[laneId] = S0.u32 < S1.u32 -def _VOP3Op_V_CMPX_EQ_U32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 EXEC.u64[laneId] = S0.u32 == S1.u32 -def _VOP3Op_V_CMPX_LE_U32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 <= S1.u32 EXEC.u64[laneId] = S0.u32 <= S1.u32 -def _VOP3Op_V_CMPX_GT_U32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 > S1.u32 EXEC.u64[laneId] = S0.u32 > S1.u32 -def _VOP3Op_V_CMPX_NE_U32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 <> S1.u32 EXEC.u64[laneId] = S0.u32 != S1.u32 -def _VOP3Op_V_CMPX_GE_U32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 >= S1.u32 EXEC.u64[laneId] = S0.u32 >= S1.u32 -def _VOP3Op_V_CMPX_T_U32(EXEC, laneId): +def _VOP3Op_V_CMPX_T_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOP3Op_V_CMPX_F_I64(EXEC, laneId): +def _VOP3Op_V_CMPX_F_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOP3Op_V_CMPX_LT_I64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 < S1.i64 EXEC.u64[laneId] = S0.i64 < S1.i64 -def _VOP3Op_V_CMPX_EQ_I64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 EXEC.u64[laneId] = S0.i64 == S1.i64 -def _VOP3Op_V_CMPX_LE_I64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 <= S1.i64 EXEC.u64[laneId] = S0.i64 <= S1.i64 -def _VOP3Op_V_CMPX_GT_I64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 > S1.i64 EXEC.u64[laneId] = S0.i64 > S1.i64 -def _VOP3Op_V_CMPX_NE_I64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 <> S1.i64 EXEC.u64[laneId] = S0.i64 != S1.i64 -def _VOP3Op_V_CMPX_GE_I64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 >= S1.i64 EXEC.u64[laneId] = S0.i64 >= S1.i64 -def _VOP3Op_V_CMPX_T_I64(EXEC, laneId): +def _VOP3Op_V_CMPX_T_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOP3Op_V_CMPX_F_U64(EXEC, laneId): +def _VOP3Op_V_CMPX_F_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOP3Op_V_CMPX_LT_U64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LT_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 < S1.u64 EXEC.u64[laneId] = S0.u64 < S1.u64 -def _VOP3Op_V_CMPX_EQ_U64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_EQ_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 EXEC.u64[laneId] = S0.u64 == S1.u64 -def _VOP3Op_V_CMPX_LE_U64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_LE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 <= S1.u64 EXEC.u64[laneId] = S0.u64 <= S1.u64 -def _VOP3Op_V_CMPX_GT_U64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GT_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 > S1.u64 EXEC.u64[laneId] = S0.u64 > S1.u64 -def _VOP3Op_V_CMPX_NE_U64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_NE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 <> S1.u64 EXEC.u64[laneId] = S0.u64 != S1.u64 -def _VOP3Op_V_CMPX_GE_U64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_GE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 >= S1.u64 EXEC.u64[laneId] = S0.u64 >= S1.u64 -def _VOP3Op_V_CMPX_T_U64(EXEC, laneId): +def _VOP3Op_V_CMPX_T_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOP3Op_V_CMPX_CLASS_F16(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_CLASS_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -3895,7 +3895,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(S0, S1, EXEC, laneId): result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result -def _VOP3Op_V_CMPX_CLASS_F32(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_CLASS_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -3939,7 +3939,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(S0, S1, EXEC, laneId): result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result -def _VOP3Op_V_CMPX_CLASS_F64(S0, S1, EXEC, laneId): +def _VOP3Op_V_CMPX_CLASS_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -3983,11 +3983,11 @@ def _VOP3Op_V_CMPX_CLASS_F64(S0, S1, EXEC, laneId): result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result -def _VOP3Op_V_MOV_B32(S0, D0): +def _VOP3Op_V_MOV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.b32 = S0.b32 D0.b32 = S0.b32 -def _VOP3Op_V_READFIRSTLANE_B32(D0, EXEC, SRC0, VGPR): +def _VOP3Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -4022,83 +4022,83 @@ def _VOP3Op_V_READFIRSTLANE_B32(D0, EXEC, SRC0, VGPR): lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] -def _VOP3Op_V_CVT_I32_F64(S0, D0): +def _VOP3Op_V_CVT_I32_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f64_to_i32(S0.f64) D0.i32 = f64_to_i32(S0.f64) -def _VOP3Op_V_CVT_F64_I32(S0, D0): +def _VOP3Op_V_CVT_F64_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = i32_to_f64(S0.i32) D0.f64 = i32_to_f64(S0.i32) -def _VOP3Op_V_CVT_F32_I32(S0, D0): +def _VOP3Op_V_CVT_F32_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = i32_to_f32(S0.i32) D0.f32 = i32_to_f32(S0.i32) -def _VOP3Op_V_CVT_F32_U32(S0, D0): +def _VOP3Op_V_CVT_F32_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0.u32) D0.f32 = u32_to_f32(S0.u32) -def _VOP3Op_V_CVT_U32_F32(S0, D0): +def _VOP3Op_V_CVT_U32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = f32_to_u32(S0.f32) D0.u32 = f32_to_u32(S0.f32) -def _VOP3Op_V_CVT_I32_F32(S0, D0): +def _VOP3Op_V_CVT_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f32_to_i32(S0.f32) D0.i32 = f32_to_i32(S0.f32) -def _VOP3Op_V_CVT_F16_F32(S0, D0): +def _VOP3Op_V_CVT_F16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = f32_to_f16(S0.f32) D0.f16 = f32_to_f16(S0.f32) -def _VOP3Op_V_CVT_F32_F16(S0, D0): +def _VOP3Op_V_CVT_F32_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = f16_to_f32(S0.f16) D0.f32 = f16_to_f32(S0.f16) -def _VOP3Op_V_CVT_NEAREST_I32_F32(S0, D0): +def _VOP3Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) -def _VOP3Op_V_CVT_FLOOR_I32_F32(S0, D0): +def _VOP3Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = f32_to_i32(floor(S0.f32)) D0.i32 = f32_to_i32(floor(S0.f32)) -def _VOP3Op_V_CVT_F32_F64(S0, D0): +def _VOP3Op_V_CVT_F32_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = f64_to_f32(S0.f64) D0.f32 = f64_to_f32(S0.f64) -def _VOP3Op_V_CVT_F64_F32(S0, D0): +def _VOP3Op_V_CVT_F64_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = f32_to_f64(S0.f32) D0.f64 = f32_to_f64(S0.f32) -def _VOP3Op_V_CVT_F32_UBYTE0(S0, D0): +def _VOP3Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0[7 : 0].u32) D0.f32 = u32_to_f32(S0[7 : 0].u32) -def _VOP3Op_V_CVT_F32_UBYTE1(S0, D0): +def _VOP3Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0[15 : 8].u32) D0.f32 = u32_to_f32(S0[15 : 8].u32) -def _VOP3Op_V_CVT_F32_UBYTE2(S0, D0): +def _VOP3Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0[23 : 16].u32) D0.f32 = u32_to_f32(S0[23 : 16].u32) -def _VOP3Op_V_CVT_F32_UBYTE3(S0, D0): +def _VOP3Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = u32_to_f32(S0[31 : 24].u32) D0.f32 = u32_to_f32(S0[31 : 24].u32) -def _VOP3Op_V_CVT_U32_F64(S0, D0): +def _VOP3Op_V_CVT_U32_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = f64_to_u32(S0.f64) D0.u32 = f64_to_u32(S0.f64) -def _VOP3Op_V_CVT_F64_U32(S0, D0): +def _VOP3Op_V_CVT_F64_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = u32_to_f64(S0.u32) D0.f64 = u32_to_f64(S0.u32) -def _VOP3Op_V_TRUNC_F64(S0, D0): +def _VOP3Op_V_TRUNC_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = trunc(S0.f64) D0.f64 = trunc(S0.f64) -def _VOP3Op_V_CEIL_F64(S0, D0): +def _VOP3Op_V_CEIL_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -4107,7 +4107,7 @@ def _VOP3Op_V_CEIL_F64(S0, D0): if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 -def _VOP3Op_V_RNDNE_F64(S0, D0): +def _VOP3Op_V_RNDNE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -4116,7 +4116,7 @@ def _VOP3Op_V_RNDNE_F64(S0, D0): if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 -def _VOP3Op_V_FLOOR_F64(S0, D0): +def _VOP3Op_V_FLOOR_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -4125,19 +4125,19 @@ def _VOP3Op_V_FLOOR_F64(S0, D0): if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 -def _VOP3Op_V_MOV_B16(S0, D0): +def _VOP3Op_V_MOV_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.b16 = S0.b16 D0.b16 = S0.b16 -def _VOP3Op_V_FRACT_F32(S0, D0): +def _VOP3Op_V_FRACT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 + -floor(S0.f32) D0.f32 = S0.f32 + -floor(S0.f32) -def _VOP3Op_V_TRUNC_F32(S0, D0): +def _VOP3Op_V_TRUNC_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32) D0.f32 = trunc(S0.f32) -def _VOP3Op_V_CEIL_F32(S0, D0): +def _VOP3Op_V_CEIL_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -4146,7 +4146,7 @@ def _VOP3Op_V_CEIL_F32(S0, D0): if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 -def _VOP3Op_V_RNDNE_F32(S0, D0): +def _VOP3Op_V_RNDNE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -4155,7 +4155,7 @@ def _VOP3Op_V_RNDNE_F32(S0, D0): if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 -def _VOP3Op_V_FLOOR_F32(S0, D0): +def _VOP3Op_V_FLOOR_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -4164,60 +4164,60 @@ def _VOP3Op_V_FLOOR_F32(S0, D0): if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 -def _VOP3Op_V_EXP_F32(S0, D0): +def _VOP3Op_V_EXP_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = pow(2.0F, S0.f32) D0.f32 = pow(2.0, S0.f32) -def _VOP3Op_V_LOG_F32(S0, D0): +def _VOP3Op_V_LOG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = log2(S0.f32) D0.f32 = log2(S0.f32) -def _VOP3Op_V_RCP_F32(S0, D0): +def _VOP3Op_V_RCP_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = 1.0F / S0.f32 D0.f32 = 1.0 / S0.f32 -def _VOP3Op_V_RCP_IFLAG_F32(S0, D0): +def _VOP3Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception D0.f32 = 1.0 / S0.f32 -def _VOP3Op_V_RSQ_F32(S0, D0): +def _VOP3Op_V_RSQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = 1.0F / sqrt(S0.f32) D0.f32 = 1.0 / sqrt(S0.f32) -def _VOP3Op_V_RCP_F64(S0, D0): +def _VOP3Op_V_RCP_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = 1.0 / S0.f64 D0.f64 = 1.0 / S0.f64 -def _VOP3Op_V_RSQ_F64(S0, D0): +def _VOP3Op_V_RSQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = 1.0 / sqrt(S0.f64) D0.f64 = 1.0 / sqrt(S0.f64) -def _VOP3Op_V_SQRT_F32(S0, D0): +def _VOP3Op_V_SQRT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = sqrt(S0.f32) D0.f32 = sqrt(S0.f32) -def _VOP3Op_V_SQRT_F64(S0, D0): +def _VOP3Op_V_SQRT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = sqrt(S0.f64) D0.f64 = sqrt(S0.f64) -def _VOP3Op_V_SIN_F32(S0, D0): +def _VOP3Op_V_SIN_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) D0.f32 = sin(S0.f32 * F(PI * 2.0)) -def _VOP3Op_V_COS_F32(S0, D0): +def _VOP3Op_V_COS_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) D0.f32 = cos(S0.f32 * F(PI * 2.0)) -def _VOP3Op_V_NOT_B32(S0, D0): +def _VOP3Op_V_NOT_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ~S0.u32 D0.u32 = ~S0.u32 -def _VOP3Op_V_BFREV_B32(S0, D0): +def _VOP3Op_V_BFREV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32[31 : 0] = S0.u32[0 : 31] D0.u32[31 : 0] = S0.u32[0 : 31] -def _VOP3Op_V_CLZ_I32_U32(S0, D0): +def _VOP3Op_V_CLZ_I32_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -4231,7 +4231,7 @@ def _VOP3Op_V_CLZ_I32_U32(S0, D0): if S0.u32[31 - i] == 1: D0.i32 = i; break -def _VOP3Op_V_CTZ_I32_B32(S0, D0): +def _VOP3Op_V_CTZ_I32_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -4245,7 +4245,7 @@ def _VOP3Op_V_CTZ_I32_B32(S0, D0): if S0.u32[i] == 1: D0.i32 = i; break -def _VOP3Op_V_CLS_I32(S0, D0): +def _VOP3Op_V_CLS_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -4259,7 +4259,7 @@ def _VOP3Op_V_CLS_I32(S0, D0): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i -def _VOP3Op_V_FREXP_EXP_I32_F64(S0, D0): +def _VOP3Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -4270,7 +4270,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F64(S0, D0): else: D0.i32 = exponent(S0.f64) - 1023 + 1 -def _VOP3Op_V_FREXP_MANT_F64(S0, D0): +def _VOP3Op_V_FREXP_MANT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -4281,11 +4281,11 @@ def _VOP3Op_V_FREXP_MANT_F64(S0, D0): else: D0.f64 = mantissa(S0.f64) -def _VOP3Op_V_FRACT_F64(S0, D0): +def _VOP3Op_V_FRACT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = S0.f64 + -floor(S0.f64) D0.f64 = S0.f64 + -floor(S0.f64) -def _VOP3Op_V_FREXP_EXP_I32_F32(S0, D0): +def _VOP3Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -4296,7 +4296,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F32(S0, D0): else: D0.i32 = exponent(S0.f32) - 127 + 1 -def _VOP3Op_V_FREXP_MANT_F32(S0, D0): +def _VOP3Op_V_FREXP_MANT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -4307,50 +4307,50 @@ def _VOP3Op_V_FREXP_MANT_F32(S0, D0): else: D0.f32 = mantissa(S0.f32) -def _VOP3Op_V_MOVRELS_B32(D0, laneId, SRC0, VGPR): +def _VOP3Op_V_MOVRELS_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 -def _VOP3Op_V_CVT_F16_U16(S0, D0): +def _VOP3Op_V_CVT_F16_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = u16_to_f16(S0.u16) D0.f16 = u16_to_f16(S0.u16) -def _VOP3Op_V_CVT_F16_I16(S0, D0): +def _VOP3Op_V_CVT_F16_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = i16_to_f16(S0.i16) D0.f16 = i16_to_f16(S0.i16) -def _VOP3Op_V_CVT_U16_F16(S0, D0): +def _VOP3Op_V_CVT_U16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = f16_to_u16(S0.f16) D0.u16 = f16_to_u16(S0.f16) -def _VOP3Op_V_CVT_I16_F16(S0, D0): +def _VOP3Op_V_CVT_I16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = f16_to_i16(S0.f16) D0.i16 = f16_to_i16(S0.f16) -def _VOP3Op_V_RCP_F16(S0, D0): +def _VOP3Op_V_RCP_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = 16'1.0 / S0.f16 D0.f16 = 1.0 / S0.f16 -def _VOP3Op_V_SQRT_F16(S0, D0): +def _VOP3Op_V_SQRT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = sqrt(S0.f16) D0.f16 = sqrt(S0.f16) -def _VOP3Op_V_RSQ_F16(S0, D0): +def _VOP3Op_V_RSQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = 16'1.0 / sqrt(S0.f16) D0.f16 = 1.0 / sqrt(S0.f16) -def _VOP3Op_V_LOG_F16(S0, D0): +def _VOP3Op_V_LOG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = log2(S0.f16) D0.f16 = log2(S0.f16) -def _VOP3Op_V_EXP_F16(S0, D0): +def _VOP3Op_V_EXP_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = pow(16'2.0, S0.f16) D0.f16 = pow(2.0, S0.f16) -def _VOP3Op_V_FREXP_MANT_F16(S0, D0): +def _VOP3Op_V_FREXP_MANT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -4361,7 +4361,7 @@ def _VOP3Op_V_FREXP_MANT_F16(S0, D0): else: D0.f16 = mantissa(S0.f16) -def _VOP3Op_V_FREXP_EXP_I16_F16(S0, D0): +def _VOP3Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -4372,7 +4372,7 @@ def _VOP3Op_V_FREXP_EXP_I16_F16(S0, D0): else: D0.i16 = (exponent(S0.f16) - 15 + 1) -def _VOP3Op_V_FLOOR_F16(S0, D0): +def _VOP3Op_V_FLOOR_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -4381,7 +4381,7 @@ def _VOP3Op_V_FLOOR_F16(S0, D0): if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 -def _VOP3Op_V_CEIL_F16(S0, D0): +def _VOP3Op_V_CEIL_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -4390,11 +4390,11 @@ def _VOP3Op_V_CEIL_F16(S0, D0): if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 -def _VOP3Op_V_TRUNC_F16(S0, D0): +def _VOP3Op_V_TRUNC_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = trunc(S0.f16) D0.f16 = trunc(S0.f16) -def _VOP3Op_V_RNDNE_F16(S0, D0): +def _VOP3Op_V_RNDNE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -4403,55 +4403,55 @@ def _VOP3Op_V_RNDNE_F16(S0, D0): if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 -def _VOP3Op_V_FRACT_F16(S0, D0): +def _VOP3Op_V_FRACT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 + -floor(S0.f16) D0.f16 = S0.f16 + -floor(S0.f16) -def _VOP3Op_V_SIN_F16(S0, D0): +def _VOP3Op_V_SIN_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) D0.f16 = sin(S0.f16 * F(PI * 2.0)) -def _VOP3Op_V_COS_F16(S0, D0): +def _VOP3Op_V_COS_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) D0.f16 = cos(S0.f16 * F(PI * 2.0)) -def _VOP3Op_V_CVT_NORM_I16_F16(S0, D0): +def _VOP3Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = f16_to_snorm(S0.f16) D0.i16 = f16_to_snorm(S0.f16) -def _VOP3Op_V_CVT_NORM_U16_F16(S0, D0): +def _VOP3Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = f16_to_unorm(S0.f16) D0.u16 = f16_to_unorm(S0.f16) -def _VOP3Op_V_NOT_B16(S0, D0): +def _VOP3Op_V_NOT_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = ~S0.u16 D0.u16 = ~S0.u16 -def _VOP3Op_V_CVT_I32_I16(S0, D0): +def _VOP3Op_V_CVT_I32_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(signext(S0.i16)) D0.i32 = (signext(S0.i16)) -def _VOP3Op_V_CVT_U32_U16(S0, D0): +def _VOP3Op_V_CVT_U32_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0 = { 16'0, S0.u16 } D0._val = int(_pack(0, S0.u16)) -def _VOP3Op_V_CNDMASK_B32(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CNDMASK_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) -def _VOP3Op_V_ADD_F32(S0, S1, D0): +def _VOP3Op_V_ADD_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 + S1.f32 D0.f32 = S0.f32 + S1.f32 -def _VOP3Op_V_SUB_F32(S0, S1, D0): +def _VOP3Op_V_SUB_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 - S1.f32 D0.f32 = S0.f32 - S1.f32 -def _VOP3Op_V_SUBREV_F32(S0, S1, D0): +def _VOP3Op_V_SUBREV_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S1.f32 - S0.f32 D0.f32 = S1.f32 - S0.f32 -def _VOP3Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0): +def _VOP3Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -4463,7 +4463,7 @@ def _VOP3Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0): else: D0.f32 = fma(S0.f32, S1.f32, D0.f32) -def _VOP3Op_V_MUL_DX9_ZERO_F32(S0, S1, D0): +def _VOP3Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -4475,27 +4475,27 @@ def _VOP3Op_V_MUL_DX9_ZERO_F32(S0, S1, D0): else: D0.f32 = S0.f32 * S1.f32 -def _VOP3Op_V_MUL_F32(S0, S1, D0): +def _VOP3Op_V_MUL_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 * S1.f32 D0.f32 = S0.f32 * S1.f32 -def _VOP3Op_V_MUL_I32_I24(S0, S1, D0): +def _VOP3Op_V_MUL_I32_I24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) D0.i32 = (S0.i24) * (S1.i24) -def _VOP3Op_V_MUL_HI_I32_I24(S0, S1, D0): +def _VOP3Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) D0.i32 = (((S0.i24) * (S1.i24)) >> 32) -def _VOP3Op_V_MUL_U32_U24(S0, S1, D0): +def _VOP3Op_V_MUL_U32_U24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) D0.u32 = (S0.u24) * (S1.u24) -def _VOP3Op_V_MUL_HI_U32_U24(S0, S1, D0): +def _VOP3Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) D0.u32 = (((S0.u24) * (S1.u24)) >> 32) -def _VOP3Op_V_MIN_F32(S0, S1, D0): +def _VOP3Op_V_MIN_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -4548,7 +4548,7 @@ def _VOP3Op_V_MIN_F32(S0, S1, D0): else: D0.f32 = S1.f32 -def _VOP3Op_V_MAX_F32(S0, S1, D0): +def _VOP3Op_V_MAX_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -4601,67 +4601,67 @@ def _VOP3Op_V_MAX_F32(S0, S1, D0): else: D0.f32 = S1.f32 -def _VOP3Op_V_MIN_I32(S0, S1, D0): +def _VOP3Op_V_MIN_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) -def _VOP3Op_V_MAX_I32(S0, S1, D0): +def _VOP3Op_V_MAX_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) -def _VOP3Op_V_MIN_U32(S0, S1, D0): +def _VOP3Op_V_MIN_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) -def _VOP3Op_V_MAX_U32(S0, S1, D0): +def _VOP3Op_V_MAX_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) -def _VOP3Op_V_LSHLREV_B32(S0, S1, D0): +def _VOP3Op_V_LSHLREV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S1.u32 << S0[4 : 0].u32) D0.u32 = (S1.u32 << S0[4 : 0].u32) -def _VOP3Op_V_LSHRREV_B32(S0, S1, D0): +def _VOP3Op_V_LSHRREV_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) D0.u32 = (S1.u32 >> S0[4 : 0].u32) -def _VOP3Op_V_ASHRREV_I32(S0, S1, D0): +def _VOP3Op_V_ASHRREV_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) D0.i32 = (S1.i32 >> S0[4 : 0].u32) -def _VOP3Op_V_AND_B32(S0, S1, D0): +def _VOP3Op_V_AND_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 & S1.u32) D0.u32 = (S0.u32 & S1.u32) -def _VOP3Op_V_OR_B32(S0, S1, D0): +def _VOP3Op_V_OR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 | S1.u32) D0.u32 = (S0.u32 | S1.u32) -def _VOP3Op_V_XOR_B32(S0, S1, D0): +def _VOP3Op_V_XOR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 ^ S1.u32) D0.u32 = (S0.u32 ^ S1.u32) -def _VOP3Op_V_XNOR_B32(S0, S1, D0): +def _VOP3Op_V_XNOR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ~(S0.u32 ^ S1.u32) D0.u32 = ~(S0.u32 ^ S1.u32) -def _VOP3Op_V_ADD_NC_U32(S0, S1, D0): +def _VOP3Op_V_ADD_NC_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 + S1.u32 D0.u32 = S0.u32 + S1.u32 -def _VOP3Op_V_SUB_NC_U32(S0, S1, D0): +def _VOP3Op_V_SUB_NC_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 - S1.u32 D0.u32 = S0.u32 - S1.u32 -def _VOP3Op_V_SUBREV_NC_U32(S0, S1, D0): +def _VOP3Op_V_SUBREV_NC_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S1.u32 - S0.u32 D0.u32 = S1.u32 - S0.u32 -def _VOP3Op_V_FMAC_F32(S0, S1, D0): +def _VOP3Op_V_FMAC_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) D0.f32 = fma(S0.f32, S1.f32, D0.f32) -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -4669,27 +4669,27 @@ def _VOP3Op_V_CVT_PK_RTZ_F16_F32(S0, S1, tmp): tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) -def _VOP3Op_V_ADD_F16(S0, S1, D0): +def _VOP3Op_V_ADD_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 + S1.f16 D0.f16 = S0.f16 + S1.f16 -def _VOP3Op_V_SUB_F16(S0, S1, D0): +def _VOP3Op_V_SUB_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 - S1.f16 D0.f16 = S0.f16 - S1.f16 -def _VOP3Op_V_SUBREV_F16(S0, S1, D0): +def _VOP3Op_V_SUBREV_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S1.f16 - S0.f16 D0.f16 = S1.f16 - S0.f16 -def _VOP3Op_V_MUL_F16(S0, S1, D0): +def _VOP3Op_V_MUL_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 * S1.f16 D0.f16 = S0.f16 * S1.f16 -def _VOP3Op_V_FMAC_F16(S0, S1, D0): +def _VOP3Op_V_FMAC_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) D0.f16 = fma(S0.f16, S1.f16, D0.f16) -def _VOP3Op_V_MAX_F16(S0, S1, D0): +def _VOP3Op_V_MAX_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -4742,7 +4742,7 @@ def _VOP3Op_V_MAX_F16(S0, S1, D0): else: D0.f16 = S1.f16 -def _VOP3Op_V_MIN_F16(S0, S1, D0): +def _VOP3Op_V_MIN_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -4795,11 +4795,11 @@ def _VOP3Op_V_MIN_F16(S0, S1, D0): else: D0.f16 = S1.f16 -def _VOP3Op_V_LDEXP_F16(S0, S1, D0): +def _VOP3Op_V_LDEXP_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) -def _VOP3Op_V_FMA_DX9_ZERO_F32(S0, S1, S2, D0): +def _VOP3Op_V_FMA_DX9_ZERO_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -4811,15 +4811,15 @@ def _VOP3Op_V_FMA_DX9_ZERO_F32(S0, S1, S2, D0): else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) -def _VOP3Op_V_MAD_I32_I24(S0, S1, S2, D0): +def _VOP3Op_V_MAD_I32_I24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 D0.i32 = (S0.i24) * (S1.i24) + S2.i32 -def _VOP3Op_V_MAD_U32_U24(S0, S1, S2, D0): +def _VOP3Op_V_MAD_U32_U24(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 D0.u32 = (S0.u24) * (S1.u24) + S2.u32 -def _VOP3Op_V_CUBEID_F32(S0, S1, S2, D0): +def _VOP3Op_V_CUBEID_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -4860,7 +4860,7 @@ def _VOP3Op_V_CUBEID_F32(S0, S1, S2, D0): else: D0.f32 = 0.0 -def _VOP3Op_V_CUBESC_F32(S0, S1, S2, D0): +def _VOP3Op_V_CUBESC_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // D0.f = cubemap S coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -4894,7 +4894,7 @@ def _VOP3Op_V_CUBESC_F32(S0, S1, S2, D0): else: D0.f32 = -S2.f32 -def _VOP3Op_V_CUBETC_F32(S0, S1, S2, D0): +def _VOP3Op_V_CUBETC_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // D0.f = cubemap T coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -4921,7 +4921,7 @@ def _VOP3Op_V_CUBETC_F32(S0, S1, S2, D0): else: D0.f32 = -S1.f32 -def _VOP3Op_V_CUBEMA_F32(S0, S1, S2, D0): +def _VOP3Op_V_CUBEMA_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // D0.f = 2.0 * cubemap major axis. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -4941,29 +4941,29 @@ def _VOP3Op_V_CUBEMA_F32(S0, S1, S2, D0): else: D0.f32 = S0.f32 * 2.0 -def _VOP3Op_V_BFE_U32(S0, S1, S2, D0): +def _VOP3Op_V_BFE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) -def _VOP3Op_V_BFE_I32(S0, S1, S2, D0, tmp): +def _VOP3Op_V_BFE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) -def _VOP3Op_V_BFI_B32(S0, S1, S2, D0): +def _VOP3Op_V_BFI_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) -def _VOP3Op_V_FMA_F32(S0, S1, S2, D0): +def _VOP3Op_V_FMA_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = fma(S0.f32, S1.f32, S2.f32) D0.f32 = fma(S0.f32, S1.f32, S2.f32) -def _VOP3Op_V_FMA_F64(S0, S1, S2, D0): +def _VOP3Op_V_FMA_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = fma(S0.f64, S1.f64, S2.f64) D0.f64 = fma(S0.f64, S1.f64, S2.f64) -def _VOP3Op_V_LERP_U8(S0, S1, S2, D0, tmp): +def _VOP3Op_V_LERP_U8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); @@ -4975,15 +4975,15 @@ def _VOP3Op_V_LERP_U8(S0, S1, S2, D0, tmp): tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) D0.u32 = tmp.u32 -def _VOP3Op_V_ALIGNBIT_B32(S0, S1, S2, D0): +def _VOP3Op_V_ALIGNBIT_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0].u32) & 0xffffffffLL) D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0].u32) & 0xffffffff) -def _VOP3Op_V_ALIGNBYTE_B32(S0, S1, S2, D0): +def _VOP3Op_V_ALIGNBYTE_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0].u32 * 8U)) & 0xffffffffLL) D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0].u32 * 8)) & 0xffffffff) -def _VOP3Op_V_MULLIT_F32(S0, S1, S2, D0): +def _VOP3Op_V_MULLIT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || # isNAN(64'F(S2.f32))) then # D0.f32 = -MAX_FLOAT_F32 @@ -4995,31 +4995,31 @@ def _VOP3Op_V_MULLIT_F32(S0, S1, S2, D0): else: D0.f32 = S0.f32 * S1.f32 -def _VOP3Op_V_MIN3_F32(S0, S1, S2, D0): +def _VOP3Op_V_MIN3_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) -def _VOP3Op_V_MIN3_I32(S0, S1, S2, D0): +def _VOP3Op_V_MIN3_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) -def _VOP3Op_V_MIN3_U32(S0, S1, S2, D0): +def _VOP3Op_V_MIN3_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) -def _VOP3Op_V_MAX3_F32(S0, S1, S2, D0): +def _VOP3Op_V_MAX3_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) -def _VOP3Op_V_MAX3_I32(S0, S1, S2, D0): +def _VOP3Op_V_MAX3_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) -def _VOP3Op_V_MAX3_U32(S0, S1, S2, D0): +def _VOP3Op_V_MAX3_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) -def _VOP3Op_V_MED3_F32(S0, S1, S2, D0): +def _VOP3Op_V_MED3_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then @@ -5038,7 +5038,7 @@ def _VOP3Op_V_MED3_F32(S0, S1, S2, D0): else: D0.f32 = v_max_f32(S0.f32, S1.f32) -def _VOP3Op_V_MED3_I32(S0, S1, S2, D0): +def _VOP3Op_V_MED3_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then # D0.i32 = v_max_i32(S1.i32, S2.i32) # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then @@ -5053,7 +5053,7 @@ def _VOP3Op_V_MED3_I32(S0, S1, S2, D0): else: D0.i32 = v_max_i32(S0.i32, S1.i32) -def _VOP3Op_V_MED3_U32(S0, S1, S2, D0): +def _VOP3Op_V_MED3_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then # D0.u32 = v_max_u32(S1.u32, S2.u32) # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then @@ -5068,7 +5068,7 @@ def _VOP3Op_V_MED3_U32(S0, S1, S2, D0): else: D0.u32 = v_max_u32(S0.u32, S1.u32) -def _VOP3Op_V_SAD_U8(S0, S1, S2, D0, tmp): +def _VOP3Op_V_SAD_U8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // UNSIGNED comparison # tmp = S2.u32; # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -5083,11 +5083,11 @@ def _VOP3Op_V_SAD_U8(S0, S1, S2, D0, tmp): tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) D0.u32 = tmp -def _VOP3Op_V_SAD_HI_U8(S0, S1, S2, D0): +def _VOP3Op_V_SAD_HI_U8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 D0.u32 = ((v_sad_u8(S0, S1, 0)) << 16) + S2.u32 -def _VOP3Op_V_SAD_U16(S0, S1, S2, D0, tmp): +def _VOP3Op_V_SAD_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // UNSIGNED comparison # tmp = S2.u32; # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); @@ -5098,12 +5098,12 @@ def _VOP3Op_V_SAD_U16(S0, S1, S2, D0, tmp): tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) D0.u32 = tmp -def _VOP3Op_V_SAD_U32(S0, S1, S2, D0): +def _VOP3Op_V_SAD_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // UNSIGNED comparison # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 -def _VOP3Op_V_CVT_PK_U8_F32(S0, S1, S2, D0, tmp): +def _VOP3Op_V_CVT_PK_U8_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); # D0.u32 = tmp @@ -5111,7 +5111,7 @@ def _VOP3Op_V_CVT_PK_U8_F32(S0, S1, S2, D0, tmp): tmp._val = int((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) D0.u32 = tmp -def _VOP3Op_V_DIV_FIXUP_F32(S0, S1, S2, D0): +def _VOP3Op_V_DIV_FIXUP_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # sign_out = (sign(S1.f32) ^ sign(S2.f32)); # if isNAN(64'F(S2.f32)) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) @@ -5156,7 +5156,7 @@ def _VOP3Op_V_DIV_FIXUP_F32(S0, S1, S2, D0): else: D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) -def _VOP3Op_V_DIV_FIXUP_F64(S0, S1, S2, D0): +def _VOP3Op_V_DIV_FIXUP_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # sign_out = (sign(S1.f64) ^ sign(S2.f64)); # if isNAN(S2.f64) then # D0.f64 = cvtToQuietNAN(S2.f64) @@ -5201,7 +5201,7 @@ def _VOP3Op_V_DIV_FIXUP_F64(S0, S1, S2, D0): else: D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) -def _VOP3Op_V_DIV_FMAS_F32(S0, S1, S2, D0, VCC, laneId): +def _VOP3Op_V_DIV_FMAS_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if VCC.u64[laneId] then # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) # else @@ -5212,7 +5212,7 @@ def _VOP3Op_V_DIV_FMAS_F32(S0, S1, S2, D0, VCC, laneId): else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) -def _VOP3Op_V_DIV_FMAS_F64(S0, S1, S2, D0, VCC, laneId): +def _VOP3Op_V_DIV_FMAS_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if VCC.u64[laneId] then # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) # else @@ -5223,7 +5223,7 @@ def _VOP3Op_V_DIV_FMAS_F64(S0, S1, S2, D0, VCC, laneId): else: D0.f64 = fma(S0.f64, S1.f64, S2.f64) -def _VOP3Op_V_MSAD_U8(S0, S1, S2, D0, tmp): +def _VOP3Op_V_MSAD_U8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // UNSIGNED comparison # tmp = S2.u32; # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -5238,7 +5238,7 @@ def _VOP3Op_V_MSAD_U8(S0, S1, S2, D0, tmp): tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) D0.u32 = tmp -def _VOP3Op_V_QSAD_PK_U16_U8(S0, S1, S2, D0, tmp): +def _VOP3Op_V_QSAD_PK_U16_U8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -5250,7 +5250,7 @@ def _VOP3Op_V_QSAD_PK_U16_U8(S0, S1, S2, D0, tmp): tmp[15 : 0] = (v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 -def _VOP3Op_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, tmp): +def _VOP3Op_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -5262,7 +5262,7 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, tmp): tmp[15 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 -def _VOP3Op_V_MQSAD_U32_U8(S0, S1, S2, D0, tmp): +def _VOP3Op_V_MQSAD_U32_U8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); @@ -5274,55 +5274,55 @@ def _VOP3Op_V_MQSAD_U32_U8(S0, S1, S2, D0, tmp): tmp[31 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)) D0.b128 = tmp.b128 -def _VOP3Op_V_XOR3_B32(S0, S1, S2, D0): +def _VOP3Op_V_XOR3_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) -def _VOP3Op_V_MAD_U16(S0, S1, S2, D0): +def _VOP3Op_V_MAD_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = S0.u16 * S1.u16 + S2.u16 D0.u16 = S0.u16 * S1.u16 + S2.u16 -def _VOP3Op_V_XAD_U32(S0, S1, S2, D0): +def _VOP3Op_V_XAD_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 -def _VOP3Op_V_LSHL_ADD_U32(S0, S1, S2, D0): +def _VOP3Op_V_LSHL_ADD_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 -def _VOP3Op_V_ADD_LSHL_U32(S0, S1, S2, D0): +def _VOP3Op_V_ADD_LSHL_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) -def _VOP3Op_V_FMA_F16(S0, S1, S2, D0): +def _VOP3Op_V_FMA_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = fma(S0.f16, S1.f16, S2.f16) D0.f16 = fma(S0.f16, S1.f16, S2.f16) -def _VOP3Op_V_MIN3_F16(S0, S1, S2, D0): +def _VOP3Op_V_MIN3_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) -def _VOP3Op_V_MIN3_I16(S0, S1, S2, D0): +def _VOP3Op_V_MIN3_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) -def _VOP3Op_V_MIN3_U16(S0, S1, S2, D0): +def _VOP3Op_V_MIN3_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) -def _VOP3Op_V_MAX3_F16(S0, S1, S2, D0): +def _VOP3Op_V_MAX3_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) -def _VOP3Op_V_MAX3_I16(S0, S1, S2, D0): +def _VOP3Op_V_MAX3_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) -def _VOP3Op_V_MAX3_U16(S0, S1, S2, D0): +def _VOP3Op_V_MAX3_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) -def _VOP3Op_V_MED3_F16(S0, S1, S2, D0): +def _VOP3Op_V_MED3_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then @@ -5341,7 +5341,7 @@ def _VOP3Op_V_MED3_F16(S0, S1, S2, D0): else: D0.f16 = v_max_f16(S0.f16, S1.f16) -def _VOP3Op_V_MED3_I16(S0, S1, S2, D0): +def _VOP3Op_V_MED3_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then # D0.i16 = v_max_i16(S1.i16, S2.i16) # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then @@ -5356,7 +5356,7 @@ def _VOP3Op_V_MED3_I16(S0, S1, S2, D0): else: D0.i16 = v_max_i16(S0.i16, S1.i16) -def _VOP3Op_V_MED3_U16(S0, S1, S2, D0): +def _VOP3Op_V_MED3_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then # D0.u16 = v_max_u16(S1.u16, S2.u16) # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then @@ -5371,11 +5371,11 @@ def _VOP3Op_V_MED3_U16(S0, S1, S2, D0): else: D0.u16 = v_max_u16(S0.u16, S1.u16) -def _VOP3Op_V_MAD_I16(S0, S1, S2, D0): +def _VOP3Op_V_MAD_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = S0.i16 * S1.i16 + S2.i16 D0.i16 = S0.i16 * S1.i16 + S2.i16 -def _VOP3Op_V_DIV_FIXUP_F16(S0, S1, S2, D0): +def _VOP3Op_V_DIV_FIXUP_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) @@ -5412,67 +5412,67 @@ def _VOP3Op_V_DIV_FIXUP_F16(S0, S1, S2, D0): else: D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) -def _VOP3Op_V_ADD3_U32(S0, S1, S2, D0): +def _VOP3Op_V_ADD3_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 + S1.u32 + S2.u32 D0.u32 = S0.u32 + S1.u32 + S2.u32 -def _VOP3Op_V_LSHL_OR_B32(S0, S1, S2, D0): +def _VOP3Op_V_LSHL_OR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) -def _VOP3Op_V_AND_OR_B32(S0, S1, S2, D0): +def _VOP3Op_V_AND_OR_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) D0.u32 = ((S0.u32 & S1.u32) | S2.u32) -def _VOP3Op_V_OR3_B32(S0, S1, S2, D0): +def _VOP3Op_V_OR3_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (S0.u32 | S1.u32 | S2.u32) D0.u32 = (S0.u32 | S1.u32 | S2.u32) -def _VOP3Op_V_MAD_U32_U16(S0, S1, S2, D0): +def _VOP3Op_V_MAD_U32_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 D0.u32 = (S0.u16) * (S1.u16) + S2.u32 -def _VOP3Op_V_MAD_I32_I16(S0, S1, S2, D0): +def _VOP3Op_V_MAD_I32_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 D0.i32 = (S0.i16) * (S1.i16) + S2.i32 -def _VOP3Op_V_CNDMASK_B16(S0, S1, D0, VCC, laneId): +def _VOP3Op_V_CNDMASK_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 D0.u16 = ((S1.u16) if (VCC.u64[laneId]) else (S0.u16)) -def _VOP3Op_V_MAXMIN_F32(S0, S1, S2, D0): +def _VOP3Op_V_MAXMIN_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = v_min_f32(v_max_f32(S0.f32, S1.f32), S2.f32) D0.f32 = v_min_f32(v_max_f32(S0.f32, S1.f32), S2.f32) -def _VOP3Op_V_MINMAX_F32(S0, S1, S2, D0): +def _VOP3Op_V_MINMAX_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = v_max_f32(v_min_f32(S0.f32, S1.f32), S2.f32) D0.f32 = v_max_f32(v_min_f32(S0.f32, S1.f32), S2.f32) -def _VOP3Op_V_MAXMIN_F16(S0, S1, S2, D0): +def _VOP3Op_V_MAXMIN_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = v_min_f16(v_max_f16(S0.f16, S1.f16), S2.f16) D0.f16 = v_min_f16(v_max_f16(S0.f16, S1.f16), S2.f16) -def _VOP3Op_V_MINMAX_F16(S0, S1, S2, D0): +def _VOP3Op_V_MINMAX_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f16 = v_max_f16(v_min_f16(S0.f16, S1.f16), S2.f16) D0.f16 = v_max_f16(v_min_f16(S0.f16, S1.f16), S2.f16) -def _VOP3Op_V_MAXMIN_U32(S0, S1, S2, D0): +def _VOP3Op_V_MAXMIN_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) -def _VOP3Op_V_MINMAX_U32(S0, S1, S2, D0): +def _VOP3Op_V_MINMAX_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) -def _VOP3Op_V_MAXMIN_I32(S0, S1, S2, D0): +def _VOP3Op_V_MAXMIN_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) -def _VOP3Op_V_MINMAX_I32(S0, S1, S2, D0): +def _VOP3Op_V_MINMAX_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) -def _VOP3Op_V_DOT2_F16_F16(S0, S1, S2, D0, tmp): +def _VOP3Op_V_DOT2_F16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S2.f16; # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; @@ -5482,7 +5482,7 @@ def _VOP3Op_V_DOT2_F16_F16(S0, S1, S2, D0, tmp): tmp += S0[31 : 16].f16 * S1[31 : 16].f16 D0.f16 = tmp -def _VOP3Op_V_DOT2_BF16_BF16(S0, S1, S2, D0, tmp): +def _VOP3Op_V_DOT2_BF16_BF16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S2.bf16; # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; @@ -5492,85 +5492,85 @@ def _VOP3Op_V_DOT2_BF16_BF16(S0, S1, S2, D0, tmp): tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16 D0.bf16 = tmp -def _VOP3Op_V_ADD_NC_U16(S0, S1, D0): +def _VOP3Op_V_ADD_NC_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = S0.u16 + S1.u16 D0.u16 = S0.u16 + S1.u16 -def _VOP3Op_V_SUB_NC_U16(S0, S1, D0): +def _VOP3Op_V_SUB_NC_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = S0.u16 - S1.u16 D0.u16 = S0.u16 - S1.u16 -def _VOP3Op_V_MUL_LO_U16(S0, S1, D0): +def _VOP3Op_V_MUL_LO_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = S0.u16 * S1.u16 D0.u16 = S0.u16 * S1.u16 -def _VOP3Op_V_CVT_PK_I16_F32(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_I16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); tmp[31 : 16] = (v_cvt_i16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_i16_f32(S0.f32)) -def _VOP3Op_V_CVT_PK_U16_F32(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_U16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); tmp[31 : 16] = (v_cvt_u16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_u16_f32(S0.f32)) -def _VOP3Op_V_MAX_U16(S0, S1, D0): +def _VOP3Op_V_MAX_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) -def _VOP3Op_V_MAX_I16(S0, S1, D0): +def _VOP3Op_V_MAX_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) -def _VOP3Op_V_MIN_U16(S0, S1, D0): +def _VOP3Op_V_MIN_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) -def _VOP3Op_V_MIN_I16(S0, S1, D0): +def _VOP3Op_V_MIN_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) -def _VOP3Op_V_ADD_NC_I16(S0, S1, D0): +def _VOP3Op_V_ADD_NC_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = S0.i16 + S1.i16 D0.i16 = S0.i16 + S1.i16 -def _VOP3Op_V_SUB_NC_I16(S0, S1, D0): +def _VOP3Op_V_SUB_NC_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = S0.i16 - S1.i16 D0.i16 = S0.i16 - S1.i16 -def _VOP3Op_V_PACK_B32_F16(S0, S1, D0): +def _VOP3Op_V_PACK_B32_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0[31 : 16].f16 = S1.f16; # D0[15 : 0].f16 = S0.f16 D0[31 : 16].f16 = S1.f16 D0[15 : 0].f16 = S0.f16 -def _VOP3Op_V_CVT_PK_NORM_I16_F16(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_NORM_I16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); tmp[15 : 0].i16 = f16_to_snorm(S0.f16) tmp[31 : 16].i16 = f16_to_snorm(S1.f16) -def _VOP3Op_V_CVT_PK_NORM_U16_F16(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_NORM_U16_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); tmp[15 : 0].u16 = f16_to_unorm(S0.f16) tmp[31 : 16].u16 = f16_to_unorm(S1.f16) -def _VOP3Op_V_LDEXP_F32(S0, S1, D0): +def _VOP3Op_V_LDEXP_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f32 = S0.f32 * 2.0F ** S1.i32 D0.f32 = S0.f32 * 2.0 ** S1.i32 -def _VOP3Op_V_BFM_B32(S0, S1, D0): +def _VOP3Op_V_BFM_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) -def _VOP3Op_V_BCNT_U32_B32(S0, S1, D0, tmp): +def _VOP3Op_V_BCNT_U32_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S1.u32; # for i in 0 : 31 do # tmp += S0[i].u32; @@ -5582,51 +5582,51 @@ def _VOP3Op_V_BCNT_U32_B32(S0, S1, D0, tmp): tmp += S0[i].u32 D0.u32 = tmp -def _VOP3Op_V_CVT_PK_NORM_I16_F32(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_NORM_I16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); tmp[15 : 0].i16 = f32_to_snorm(S0.f32) tmp[31 : 16].i16 = f32_to_snorm(S1.f32) -def _VOP3Op_V_CVT_PK_NORM_U16_F32(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_NORM_U16_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); tmp[15 : 0].u16 = f32_to_unorm(S0.f32) tmp[31 : 16].u16 = f32_to_unorm(S1.f32) -def _VOP3Op_V_CVT_PK_U16_U32(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_U16_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[15 : 0].u16 = u32_to_u16(S0.u32); # tmp[31 : 16].u16 = u32_to_u16(S1.u32); tmp[15 : 0].u16 = u32_to_u16(S0.u32) tmp[31 : 16].u16 = u32_to_u16(S1.u32) -def _VOP3Op_V_CVT_PK_I16_I32(S0, S1, tmp): +def _VOP3Op_V_CVT_PK_I16_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[15 : 0].i16 = i32_to_i16(S0.i32); # tmp[31 : 16].i16 = i32_to_i16(S1.i32); tmp[15 : 0].i16 = i32_to_i16(S0.i32) tmp[31 : 16].i16 = i32_to_i16(S1.i32) -def _VOP3Op_V_SUB_NC_I32(S0, S1, D0): +def _VOP3Op_V_SUB_NC_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 - S1.i32 D0.i32 = S0.i32 - S1.i32 -def _VOP3Op_V_ADD_NC_I32(S0, S1, D0): +def _VOP3Op_V_ADD_NC_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = S0.i32 + S1.i32 D0.i32 = S0.i32 + S1.i32 -def _VOP3Op_V_ADD_F64(S0, S1, D0): +def _VOP3Op_V_ADD_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = S0.f64 + S1.f64 D0.f64 = S0.f64 + S1.f64 -def _VOP3Op_V_MUL_F64(S0, S1, D0): +def _VOP3Op_V_MUL_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = S0.f64 * S1.f64 D0.f64 = S0.f64 * S1.f64 -def _VOP3Op_V_MIN_F64(S0, S1, D0): +def _VOP3Op_V_MIN_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(S0.f64) then @@ -5679,7 +5679,7 @@ def _VOP3Op_V_MIN_F64(S0, S1, D0): else: D0.f64 = S1.f64 -def _VOP3Op_V_MAX_F64(S0, S1, D0): +def _VOP3Op_V_MAX_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(S0.f64) then @@ -5732,47 +5732,47 @@ def _VOP3Op_V_MAX_F64(S0, S1, D0): else: D0.f64 = S1.f64 -def _VOP3Op_V_LDEXP_F64(S0, S1, D0): +def _VOP3Op_V_LDEXP_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.f64 = S0.f64 * 2.0 ** S1.i32 D0.f64 = S0.f64 * 2.0 ** S1.i32 -def _VOP3Op_V_MUL_LO_U32(S0, S1, D0): +def _VOP3Op_V_MUL_LO_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = S0.u32 * S1.u32 D0.u32 = S0.u32 * S1.u32 -def _VOP3Op_V_MUL_HI_U32(S0, S1, D0): +def _VOP3Op_V_MUL_HI_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) D0.u32 = (((S0.u32) * (S1.u32)) >> 32) -def _VOP3Op_V_MUL_HI_I32(S0, S1, D0): +def _VOP3Op_V_MUL_HI_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) D0.i32 = (((S0.i32) * (S1.i32)) >> 32) -def _VOP3Op_V_LSHLREV_B16(S0, S1, D0): +def _VOP3Op_V_LSHLREV_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = (S1.u16 << S0[3 : 0].u32) D0.u16 = (S1.u16 << S0[3 : 0].u32) -def _VOP3Op_V_LSHRREV_B16(S0, S1, D0): +def _VOP3Op_V_LSHRREV_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) D0.u16 = (S1.u16 >> S0[3 : 0].u32) -def _VOP3Op_V_ASHRREV_I16(S0, S1, D0): +def _VOP3Op_V_ASHRREV_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) D0.i16 = (S1.i16 >> S0[3 : 0].u32) -def _VOP3Op_V_LSHLREV_B64(S0, S1, D0): +def _VOP3Op_V_LSHLREV_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S1.u64 << S0[5 : 0].u32) D0.u64 = (S1.u64 << S0[5 : 0].u32) -def _VOP3Op_V_LSHRREV_B64(S0, S1, D0): +def _VOP3Op_V_LSHRREV_B64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64 = (S1.u64 >> S0[5 : 0].u32) D0.u64 = (S1.u64 >> S0[5 : 0].u32) -def _VOP3Op_V_ASHRREV_I64(S0, S1, D0): +def _VOP3Op_V_ASHRREV_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.i64 = (S1.i64 >> S0[5 : 0].u32) D0.i64 = (S1.i64 >> S0[5 : 0].u32) -def _VOP3Op_V_READLANE_B32(S1, D0, SRC0, VGPR): +def _VOP3Op_V_READLANE_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare lane : 32'U; # if WAVE32 then # lane = S1.u32[4 : 0].u32; @@ -5788,437 +5788,437 @@ def _VOP3Op_V_READLANE_B32(S1, D0, SRC0, VGPR): lane = S1.u32[5 : 0].u32 D0.b32 = VGPR[lane][SRC0.u32] -def _VOP3Op_V_AND_B16(S0, S1, D0): +def _VOP3Op_V_AND_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = (S0.u16 & S1.u16) D0.u16 = (S0.u16 & S1.u16) -def _VOP3Op_V_OR_B16(S0, S1, D0): +def _VOP3Op_V_OR_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = (S0.u16 | S1.u16) D0.u16 = (S0.u16 | S1.u16) -def _VOP3Op_V_XOR_B16(S0, S1, D0): +def _VOP3Op_V_XOR_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u16 = (S0.u16 ^ S1.u16) D0.u16 = (S0.u16 ^ S1.u16) VOP3Op_FUNCTIONS = { - VOP3Op.V_CMP_F_F16: (_VOP3Op_V_CMP_F_F16, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_F16: (_VOP3Op_V_CMP_LT_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_F16: (_VOP3Op_V_CMP_EQ_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_F16: (_VOP3Op_V_CMP_LE_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_F16: (_VOP3Op_V_CMP_GT_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LG_F16: (_VOP3Op_V_CMP_LG_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_F16: (_VOP3Op_V_CMP_GE_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_O_F16: (_VOP3Op_V_CMP_O_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_U_F16: (_VOP3Op_V_CMP_U_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NGE_F16: (_VOP3Op_V_CMP_NGE_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLG_F16: (_VOP3Op_V_CMP_NLG_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NGT_F16: (_VOP3Op_V_CMP_NGT_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLE_F16: (_VOP3Op_V_CMP_NLE_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NEQ_F16: (_VOP3Op_V_CMP_NEQ_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLT_F16: (_VOP3Op_V_CMP_NLT_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_T_F16: (_VOP3Op_V_CMP_T_F16, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_F_F32: (_VOP3Op_V_CMP_F_F32, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_F32: (_VOP3Op_V_CMP_LT_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_F32: (_VOP3Op_V_CMP_EQ_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_F32: (_VOP3Op_V_CMP_LE_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_F32: (_VOP3Op_V_CMP_GT_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LG_F32: (_VOP3Op_V_CMP_LG_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_F32: (_VOP3Op_V_CMP_GE_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_O_F32: (_VOP3Op_V_CMP_O_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_U_F32: (_VOP3Op_V_CMP_U_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NGE_F32: (_VOP3Op_V_CMP_NGE_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLG_F32: (_VOP3Op_V_CMP_NLG_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NGT_F32: (_VOP3Op_V_CMP_NGT_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLE_F32: (_VOP3Op_V_CMP_NLE_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NEQ_F32: (_VOP3Op_V_CMP_NEQ_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLT_F32: (_VOP3Op_V_CMP_NLT_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_T_F32: (_VOP3Op_V_CMP_T_F32, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_F_F64: (_VOP3Op_V_CMP_F_F64, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_F64: (_VOP3Op_V_CMP_LT_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_F64: (_VOP3Op_V_CMP_EQ_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_F64: (_VOP3Op_V_CMP_LE_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_F64: (_VOP3Op_V_CMP_GT_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LG_F64: (_VOP3Op_V_CMP_LG_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_F64: (_VOP3Op_V_CMP_GE_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_O_F64: (_VOP3Op_V_CMP_O_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_U_F64: (_VOP3Op_V_CMP_U_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NGE_F64: (_VOP3Op_V_CMP_NGE_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLG_F64: (_VOP3Op_V_CMP_NLG_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NGT_F64: (_VOP3Op_V_CMP_NGT_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLE_F64: (_VOP3Op_V_CMP_NLE_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NEQ_F64: (_VOP3Op_V_CMP_NEQ_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NLT_F64: (_VOP3Op_V_CMP_NLT_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_T_F64: (_VOP3Op_V_CMP_T_F64, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_I16: (_VOP3Op_V_CMP_LT_I16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_I16: (_VOP3Op_V_CMP_EQ_I16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_I16: (_VOP3Op_V_CMP_LE_I16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_I16: (_VOP3Op_V_CMP_GT_I16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NE_I16: (_VOP3Op_V_CMP_NE_I16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_I16: (_VOP3Op_V_CMP_GE_I16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_U16: (_VOP3Op_V_CMP_LT_U16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_U16: (_VOP3Op_V_CMP_EQ_U16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_U16: (_VOP3Op_V_CMP_LE_U16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_U16: (_VOP3Op_V_CMP_GT_U16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NE_U16: (_VOP3Op_V_CMP_NE_U16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_U16: (_VOP3Op_V_CMP_GE_U16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_F_I32: (_VOP3Op_V_CMP_F_I32, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_I32: (_VOP3Op_V_CMP_LT_I32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_I32: (_VOP3Op_V_CMP_EQ_I32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_I32: (_VOP3Op_V_CMP_LE_I32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_I32: (_VOP3Op_V_CMP_GT_I32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NE_I32: (_VOP3Op_V_CMP_NE_I32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_I32: (_VOP3Op_V_CMP_GE_I32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_T_I32: (_VOP3Op_V_CMP_T_I32, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_F_U32: (_VOP3Op_V_CMP_F_U32, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_U32: (_VOP3Op_V_CMP_LT_U32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_U32: (_VOP3Op_V_CMP_EQ_U32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_U32: (_VOP3Op_V_CMP_LE_U32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_U32: (_VOP3Op_V_CMP_GT_U32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NE_U32: (_VOP3Op_V_CMP_NE_U32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_U32: (_VOP3Op_V_CMP_GE_U32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_T_U32: (_VOP3Op_V_CMP_T_U32, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_F_I64: (_VOP3Op_V_CMP_F_I64, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_I64: (_VOP3Op_V_CMP_LT_I64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_I64: (_VOP3Op_V_CMP_EQ_I64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_I64: (_VOP3Op_V_CMP_LE_I64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_I64: (_VOP3Op_V_CMP_GT_I64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NE_I64: (_VOP3Op_V_CMP_NE_I64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_I64: (_VOP3Op_V_CMP_GE_I64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_T_I64: (_VOP3Op_V_CMP_T_I64, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_F_U64: (_VOP3Op_V_CMP_F_U64, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LT_U64: (_VOP3Op_V_CMP_LT_U64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_EQ_U64: (_VOP3Op_V_CMP_EQ_U64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_LE_U64: (_VOP3Op_V_CMP_LE_U64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GT_U64: (_VOP3Op_V_CMP_GT_U64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_NE_U64: (_VOP3Op_V_CMP_NE_U64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_GE_U64: (_VOP3Op_V_CMP_GE_U64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_T_U64: (_VOP3Op_V_CMP_T_U64, (True, False, False, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_CLASS_F16: (_VOP3Op_V_CMP_CLASS_F16, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_CLASS_F32: (_VOP3Op_V_CMP_CLASS_F32, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMP_CLASS_F64: (_VOP3Op_V_CMP_CLASS_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_CMPX_F_F16: (_VOP3Op_V_CMPX_F_F16, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_F16: (_VOP3Op_V_CMPX_LT_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_F16: (_VOP3Op_V_CMPX_EQ_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_F16: (_VOP3Op_V_CMPX_LE_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_F16: (_VOP3Op_V_CMPX_GT_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LG_F16: (_VOP3Op_V_CMPX_LG_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_F16: (_VOP3Op_V_CMPX_GE_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_O_F16: (_VOP3Op_V_CMPX_O_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_U_F16: (_VOP3Op_V_CMPX_U_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NGE_F16: (_VOP3Op_V_CMPX_NGE_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLG_F16: (_VOP3Op_V_CMPX_NLG_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NGT_F16: (_VOP3Op_V_CMPX_NGT_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLE_F16: (_VOP3Op_V_CMPX_NLE_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NEQ_F16: (_VOP3Op_V_CMPX_NEQ_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLT_F16: (_VOP3Op_V_CMPX_NLT_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_T_F16: (_VOP3Op_V_CMPX_T_F16, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_F_F32: (_VOP3Op_V_CMPX_F_F32, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_F32: (_VOP3Op_V_CMPX_LT_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_F32: (_VOP3Op_V_CMPX_EQ_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_F32: (_VOP3Op_V_CMPX_LE_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_F32: (_VOP3Op_V_CMPX_GT_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LG_F32: (_VOP3Op_V_CMPX_LG_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_F32: (_VOP3Op_V_CMPX_GE_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_O_F32: (_VOP3Op_V_CMPX_O_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_U_F32: (_VOP3Op_V_CMPX_U_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NGE_F32: (_VOP3Op_V_CMPX_NGE_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLG_F32: (_VOP3Op_V_CMPX_NLG_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NGT_F32: (_VOP3Op_V_CMPX_NGT_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLE_F32: (_VOP3Op_V_CMPX_NLE_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NEQ_F32: (_VOP3Op_V_CMPX_NEQ_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLT_F32: (_VOP3Op_V_CMPX_NLT_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_T_F32: (_VOP3Op_V_CMPX_T_F32, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_F_F64: (_VOP3Op_V_CMPX_F_F64, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_F64: (_VOP3Op_V_CMPX_LT_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_F64: (_VOP3Op_V_CMPX_EQ_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_F64: (_VOP3Op_V_CMPX_LE_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_F64: (_VOP3Op_V_CMPX_GT_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LG_F64: (_VOP3Op_V_CMPX_LG_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_F64: (_VOP3Op_V_CMPX_GE_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_O_F64: (_VOP3Op_V_CMPX_O_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_U_F64: (_VOP3Op_V_CMPX_U_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NGE_F64: (_VOP3Op_V_CMPX_NGE_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLG_F64: (_VOP3Op_V_CMPX_NLG_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NGT_F64: (_VOP3Op_V_CMPX_NGT_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLE_F64: (_VOP3Op_V_CMPX_NLE_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NEQ_F64: (_VOP3Op_V_CMPX_NEQ_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NLT_F64: (_VOP3Op_V_CMPX_NLT_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_T_F64: (_VOP3Op_V_CMPX_T_F64, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_I16: (_VOP3Op_V_CMPX_LT_I16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_I16: (_VOP3Op_V_CMPX_EQ_I16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_I16: (_VOP3Op_V_CMPX_LE_I16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_I16: (_VOP3Op_V_CMPX_GT_I16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NE_I16: (_VOP3Op_V_CMPX_NE_I16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_I16: (_VOP3Op_V_CMPX_GE_I16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_U16: (_VOP3Op_V_CMPX_LT_U16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_U16: (_VOP3Op_V_CMPX_EQ_U16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_U16: (_VOP3Op_V_CMPX_LE_U16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_U16: (_VOP3Op_V_CMPX_GT_U16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NE_U16: (_VOP3Op_V_CMPX_NE_U16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_U16: (_VOP3Op_V_CMPX_GE_U16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_F_I32: (_VOP3Op_V_CMPX_F_I32, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_I32: (_VOP3Op_V_CMPX_LT_I32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_I32: (_VOP3Op_V_CMPX_EQ_I32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_I32: (_VOP3Op_V_CMPX_LE_I32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_I32: (_VOP3Op_V_CMPX_GT_I32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NE_I32: (_VOP3Op_V_CMPX_NE_I32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_I32: (_VOP3Op_V_CMPX_GE_I32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_T_I32: (_VOP3Op_V_CMPX_T_I32, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_F_U32: (_VOP3Op_V_CMPX_F_U32, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_U32: (_VOP3Op_V_CMPX_LT_U32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_U32: (_VOP3Op_V_CMPX_EQ_U32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_U32: (_VOP3Op_V_CMPX_LE_U32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_U32: (_VOP3Op_V_CMPX_GT_U32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NE_U32: (_VOP3Op_V_CMPX_NE_U32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_U32: (_VOP3Op_V_CMPX_GE_U32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_T_U32: (_VOP3Op_V_CMPX_T_U32, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_F_I64: (_VOP3Op_V_CMPX_F_I64, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_I64: (_VOP3Op_V_CMPX_LT_I64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_I64: (_VOP3Op_V_CMPX_EQ_I64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_I64: (_VOP3Op_V_CMPX_LE_I64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_I64: (_VOP3Op_V_CMPX_GT_I64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NE_I64: (_VOP3Op_V_CMPX_NE_I64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_I64: (_VOP3Op_V_CMPX_GE_I64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_T_I64: (_VOP3Op_V_CMPX_T_I64, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_F_U64: (_VOP3Op_V_CMPX_F_U64, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_LT_U64: (_VOP3Op_V_CMPX_LT_U64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_EQ_U64: (_VOP3Op_V_CMPX_EQ_U64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_LE_U64: (_VOP3Op_V_CMPX_LE_U64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GT_U64: (_VOP3Op_V_CMPX_GT_U64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_NE_U64: (_VOP3Op_V_CMPX_NE_U64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_GE_U64: (_VOP3Op_V_CMPX_GE_U64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_T_U64: (_VOP3Op_V_CMPX_T_U64, (False, False, False, False, False, False, False, True, ('EXEC', 'laneId'))), - VOP3Op.V_CMPX_CLASS_F16: (_VOP3Op_V_CMPX_CLASS_F16, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_CLASS_F32: (_VOP3Op_V_CMPX_CLASS_F32, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_CMPX_CLASS_F64: (_VOP3Op_V_CMPX_CLASS_F64, (False, False, False, False, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOP3Op.V_MOV_B32: (_VOP3Op_V_MOV_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_READFIRSTLANE_B32: (_VOP3Op_V_READFIRSTLANE_B32, (False, False, False, False, False, False, False, True, ('D0', 'EXEC', 'SRC0', 'VGPR'))), - VOP3Op.V_CVT_I32_F64: (_VOP3Op_V_CVT_I32_F64, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F64_I32: (_VOP3Op_V_CVT_F64_I32, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F32_I32: (_VOP3Op_V_CVT_F32_I32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F32_U32: (_VOP3Op_V_CVT_F32_U32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_U32_F32: (_VOP3Op_V_CVT_U32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_I32_F32: (_VOP3Op_V_CVT_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F16_F32: (_VOP3Op_V_CVT_F16_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F32_F16: (_VOP3Op_V_CVT_F32_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_NEAREST_I32_F32: (_VOP3Op_V_CVT_NEAREST_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_FLOOR_I32_F32: (_VOP3Op_V_CVT_FLOOR_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F32_F64: (_VOP3Op_V_CVT_F32_F64, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F64_F32: (_VOP3Op_V_CVT_F64_F32, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F32_UBYTE0: (_VOP3Op_V_CVT_F32_UBYTE0, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F32_UBYTE1: (_VOP3Op_V_CVT_F32_UBYTE1, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F32_UBYTE2: (_VOP3Op_V_CVT_F32_UBYTE2, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F32_UBYTE3: (_VOP3Op_V_CVT_F32_UBYTE3, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_U32_F64: (_VOP3Op_V_CVT_U32_F64, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F64_U32: (_VOP3Op_V_CVT_F64_U32, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_TRUNC_F64: (_VOP3Op_V_TRUNC_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CEIL_F64: (_VOP3Op_V_CEIL_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RNDNE_F64: (_VOP3Op_V_RNDNE_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FLOOR_F64: (_VOP3Op_V_FLOOR_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_MOV_B16: (_VOP3Op_V_MOV_B16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FRACT_F32: (_VOP3Op_V_FRACT_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_TRUNC_F32: (_VOP3Op_V_TRUNC_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CEIL_F32: (_VOP3Op_V_CEIL_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RNDNE_F32: (_VOP3Op_V_RNDNE_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FLOOR_F32: (_VOP3Op_V_FLOOR_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_EXP_F32: (_VOP3Op_V_EXP_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_LOG_F32: (_VOP3Op_V_LOG_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RCP_F32: (_VOP3Op_V_RCP_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RCP_IFLAG_F32: (_VOP3Op_V_RCP_IFLAG_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RSQ_F32: (_VOP3Op_V_RSQ_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RCP_F64: (_VOP3Op_V_RCP_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RSQ_F64: (_VOP3Op_V_RSQ_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_SQRT_F32: (_VOP3Op_V_SQRT_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_SQRT_F64: (_VOP3Op_V_SQRT_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_SIN_F32: (_VOP3Op_V_SIN_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_COS_F32: (_VOP3Op_V_COS_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_NOT_B32: (_VOP3Op_V_NOT_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_BFREV_B32: (_VOP3Op_V_BFREV_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CLZ_I32_U32: (_VOP3Op_V_CLZ_I32_U32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CTZ_I32_B32: (_VOP3Op_V_CTZ_I32_B32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CLS_I32: (_VOP3Op_V_CLS_I32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FREXP_EXP_I32_F64: (_VOP3Op_V_FREXP_EXP_I32_F64, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FREXP_MANT_F64: (_VOP3Op_V_FREXP_MANT_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FRACT_F64: (_VOP3Op_V_FRACT_F64, (True, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FREXP_EXP_I32_F32: (_VOP3Op_V_FREXP_EXP_I32_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FREXP_MANT_F32: (_VOP3Op_V_FREXP_MANT_F32, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_MOVRELS_B32: (_VOP3Op_V_MOVRELS_B32, (False, False, False, False, False, False, False, False, ('D0', 'laneId', 'SRC0', 'VGPR'))), - VOP3Op.V_CVT_F16_U16: (_VOP3Op_V_CVT_F16_U16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_F16_I16: (_VOP3Op_V_CVT_F16_I16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_U16_F16: (_VOP3Op_V_CVT_U16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_I16_F16: (_VOP3Op_V_CVT_I16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RCP_F16: (_VOP3Op_V_RCP_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_SQRT_F16: (_VOP3Op_V_SQRT_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RSQ_F16: (_VOP3Op_V_RSQ_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_LOG_F16: (_VOP3Op_V_LOG_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_EXP_F16: (_VOP3Op_V_EXP_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FREXP_MANT_F16: (_VOP3Op_V_FREXP_MANT_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FREXP_EXP_I16_F16: (_VOP3Op_V_FREXP_EXP_I16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FLOOR_F16: (_VOP3Op_V_FLOOR_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CEIL_F16: (_VOP3Op_V_CEIL_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_TRUNC_F16: (_VOP3Op_V_TRUNC_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_RNDNE_F16: (_VOP3Op_V_RNDNE_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_FRACT_F16: (_VOP3Op_V_FRACT_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_SIN_F16: (_VOP3Op_V_SIN_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_COS_F16: (_VOP3Op_V_COS_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_NORM_I16_F16: (_VOP3Op_V_CVT_NORM_I16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_NORM_U16_F16: (_VOP3Op_V_CVT_NORM_U16_F16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_NOT_B16: (_VOP3Op_V_NOT_B16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_I32_I16: (_VOP3Op_V_CVT_I32_I16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CVT_U32_U16: (_VOP3Op_V_CVT_U32_U16, (False, False, False, False, False, False, False, False, ('S0', 'D0'))), - VOP3Op.V_CNDMASK_B32: (_VOP3Op_V_CNDMASK_B32, (False, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_ADD_F32: (_VOP3Op_V_ADD_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_SUB_F32: (_VOP3Op_V_SUB_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_SUBREV_F32: (_VOP3Op_V_SUBREV_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_FMAC_DX9_ZERO_F32: (_VOP3Op_V_FMAC_DX9_ZERO_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MUL_DX9_ZERO_F32: (_VOP3Op_V_MUL_DX9_ZERO_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_F32: (_VOP3Op_V_MUL_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_I32_I24: (_VOP3Op_V_MUL_I32_I24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_HI_I32_I24: (_VOP3Op_V_MUL_HI_I32_I24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_U32_U24: (_VOP3Op_V_MUL_U32_U24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_HI_U32_U24: (_VOP3Op_V_MUL_HI_U32_U24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MIN_F32: (_VOP3Op_V_MIN_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MAX_F32: (_VOP3Op_V_MAX_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MIN_I32: (_VOP3Op_V_MIN_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MAX_I32: (_VOP3Op_V_MAX_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MIN_U32: (_VOP3Op_V_MIN_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MAX_U32: (_VOP3Op_V_MAX_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_LSHLREV_B32: (_VOP3Op_V_LSHLREV_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_LSHRREV_B32: (_VOP3Op_V_LSHRREV_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_ASHRREV_I32: (_VOP3Op_V_ASHRREV_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_AND_B32: (_VOP3Op_V_AND_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_OR_B32: (_VOP3Op_V_OR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_XOR_B32: (_VOP3Op_V_XOR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_XNOR_B32: (_VOP3Op_V_XNOR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_ADD_NC_U32: (_VOP3Op_V_ADD_NC_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_SUB_NC_U32: (_VOP3Op_V_SUB_NC_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_SUBREV_NC_U32: (_VOP3Op_V_SUBREV_NC_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_FMAC_F32: (_VOP3Op_V_FMAC_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_CVT_PK_RTZ_F16_F32: (_VOP3Op_V_CVT_PK_RTZ_F16_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_ADD_F16: (_VOP3Op_V_ADD_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_SUB_F16: (_VOP3Op_V_SUB_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_SUBREV_F16: (_VOP3Op_V_SUBREV_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_F16: (_VOP3Op_V_MUL_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_FMAC_F16: (_VOP3Op_V_FMAC_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MAX_F16: (_VOP3Op_V_MAX_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MIN_F16: (_VOP3Op_V_MIN_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_LDEXP_F16: (_VOP3Op_V_LDEXP_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_FMA_DX9_ZERO_F32: (_VOP3Op_V_FMA_DX9_ZERO_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAD_I32_I24: (_VOP3Op_V_MAD_I32_I24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAD_U32_U24: (_VOP3Op_V_MAD_U32_U24, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_CUBEID_F32: (_VOP3Op_V_CUBEID_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_CUBESC_F32: (_VOP3Op_V_CUBESC_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_CUBETC_F32: (_VOP3Op_V_CUBETC_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_CUBEMA_F32: (_VOP3Op_V_CUBEMA_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_BFE_U32: (_VOP3Op_V_BFE_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_BFE_I32: (_VOP3Op_V_BFE_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_BFI_B32: (_VOP3Op_V_BFI_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_FMA_F32: (_VOP3Op_V_FMA_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_FMA_F64: (_VOP3Op_V_FMA_F64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_LERP_U8: (_VOP3Op_V_LERP_U8, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_ALIGNBIT_B32: (_VOP3Op_V_ALIGNBIT_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_ALIGNBYTE_B32: (_VOP3Op_V_ALIGNBYTE_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MULLIT_F32: (_VOP3Op_V_MULLIT_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MIN3_F32: (_VOP3Op_V_MIN3_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MIN3_I32: (_VOP3Op_V_MIN3_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MIN3_U32: (_VOP3Op_V_MIN3_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAX3_F32: (_VOP3Op_V_MAX3_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAX3_I32: (_VOP3Op_V_MAX3_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAX3_U32: (_VOP3Op_V_MAX3_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MED3_F32: (_VOP3Op_V_MED3_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MED3_I32: (_VOP3Op_V_MED3_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MED3_U32: (_VOP3Op_V_MED3_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_SAD_U8: (_VOP3Op_V_SAD_U8, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_SAD_HI_U8: (_VOP3Op_V_SAD_HI_U8, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_SAD_U16: (_VOP3Op_V_SAD_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_SAD_U32: (_VOP3Op_V_SAD_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_CVT_PK_U8_F32: (_VOP3Op_V_CVT_PK_U8_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_DIV_FIXUP_F32: (_VOP3Op_V_DIV_FIXUP_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_DIV_FIXUP_F64: (_VOP3Op_V_DIV_FIXUP_F64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_DIV_FMAS_F32: (_VOP3Op_V_DIV_FMAS_F32, (False, False, False, False, False, False, True, False, ('S0', 'S1', 'S2', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_DIV_FMAS_F64: (_VOP3Op_V_DIV_FMAS_F64, (True, False, False, False, False, False, True, False, ('S0', 'S1', 'S2', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_MSAD_U8: (_VOP3Op_V_MSAD_U8, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_QSAD_PK_U16_U8: (_VOP3Op_V_QSAD_PK_U16_U8, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_MQSAD_PK_U16_U8: (_VOP3Op_V_MQSAD_PK_U16_U8, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_MQSAD_U32_U8: (_VOP3Op_V_MQSAD_U32_U8, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_XOR3_B32: (_VOP3Op_V_XOR3_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAD_U16: (_VOP3Op_V_MAD_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_XAD_U32: (_VOP3Op_V_XAD_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_LSHL_ADD_U32: (_VOP3Op_V_LSHL_ADD_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_ADD_LSHL_U32: (_VOP3Op_V_ADD_LSHL_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_FMA_F16: (_VOP3Op_V_FMA_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MIN3_F16: (_VOP3Op_V_MIN3_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MIN3_I16: (_VOP3Op_V_MIN3_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MIN3_U16: (_VOP3Op_V_MIN3_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAX3_F16: (_VOP3Op_V_MAX3_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAX3_I16: (_VOP3Op_V_MAX3_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAX3_U16: (_VOP3Op_V_MAX3_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MED3_F16: (_VOP3Op_V_MED3_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MED3_I16: (_VOP3Op_V_MED3_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MED3_U16: (_VOP3Op_V_MED3_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAD_I16: (_VOP3Op_V_MAD_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_DIV_FIXUP_F16: (_VOP3Op_V_DIV_FIXUP_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_ADD3_U32: (_VOP3Op_V_ADD3_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_LSHL_OR_B32: (_VOP3Op_V_LSHL_OR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_AND_OR_B32: (_VOP3Op_V_AND_OR_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_OR3_B32: (_VOP3Op_V_OR3_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAD_U32_U16: (_VOP3Op_V_MAD_U32_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAD_I32_I16: (_VOP3Op_V_MAD_I32_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_CNDMASK_B16: (_VOP3Op_V_CNDMASK_B16, (False, False, False, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOP3Op.V_MAXMIN_F32: (_VOP3Op_V_MAXMIN_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MINMAX_F32: (_VOP3Op_V_MINMAX_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAXMIN_F16: (_VOP3Op_V_MAXMIN_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MINMAX_F16: (_VOP3Op_V_MINMAX_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAXMIN_U32: (_VOP3Op_V_MAXMIN_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MINMAX_U32: (_VOP3Op_V_MINMAX_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MAXMIN_I32: (_VOP3Op_V_MAXMIN_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_MINMAX_I32: (_VOP3Op_V_MINMAX_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0'))), - VOP3Op.V_DOT2_F16_F16: (_VOP3Op_V_DOT2_F16_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_DOT2_BF16_BF16: (_VOP3Op_V_DOT2_BF16_BF16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3Op.V_ADD_NC_U16: (_VOP3Op_V_ADD_NC_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_SUB_NC_U16: (_VOP3Op_V_SUB_NC_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_LO_U16: (_VOP3Op_V_MUL_LO_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_CVT_PK_I16_F32: (_VOP3Op_V_CVT_PK_I16_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_CVT_PK_U16_F32: (_VOP3Op_V_CVT_PK_U16_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_MAX_U16: (_VOP3Op_V_MAX_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MAX_I16: (_VOP3Op_V_MAX_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MIN_U16: (_VOP3Op_V_MIN_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MIN_I16: (_VOP3Op_V_MIN_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_ADD_NC_I16: (_VOP3Op_V_ADD_NC_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_SUB_NC_I16: (_VOP3Op_V_SUB_NC_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_PACK_B32_F16: (_VOP3Op_V_PACK_B32_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_CVT_PK_NORM_I16_F16: (_VOP3Op_V_CVT_PK_NORM_I16_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_CVT_PK_NORM_U16_F16: (_VOP3Op_V_CVT_PK_NORM_U16_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_LDEXP_F32: (_VOP3Op_V_LDEXP_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_BFM_B32: (_VOP3Op_V_BFM_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_BCNT_U32_B32: (_VOP3Op_V_BCNT_U32_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3Op.V_CVT_PK_NORM_I16_F32: (_VOP3Op_V_CVT_PK_NORM_I16_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_CVT_PK_NORM_U16_F32: (_VOP3Op_V_CVT_PK_NORM_U16_F32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_CVT_PK_U16_U32: (_VOP3Op_V_CVT_PK_U16_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_CVT_PK_I16_I32: (_VOP3Op_V_CVT_PK_I16_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'tmp'))), - VOP3Op.V_SUB_NC_I32: (_VOP3Op_V_SUB_NC_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_ADD_NC_I32: (_VOP3Op_V_ADD_NC_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_ADD_F64: (_VOP3Op_V_ADD_F64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_F64: (_VOP3Op_V_MUL_F64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MIN_F64: (_VOP3Op_V_MIN_F64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MAX_F64: (_VOP3Op_V_MAX_F64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_LDEXP_F64: (_VOP3Op_V_LDEXP_F64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_LO_U32: (_VOP3Op_V_MUL_LO_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_HI_U32: (_VOP3Op_V_MUL_HI_U32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_MUL_HI_I32: (_VOP3Op_V_MUL_HI_I32, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_LSHLREV_B16: (_VOP3Op_V_LSHLREV_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_LSHRREV_B16: (_VOP3Op_V_LSHRREV_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_ASHRREV_I16: (_VOP3Op_V_ASHRREV_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_LSHLREV_B64: (_VOP3Op_V_LSHLREV_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_LSHRREV_B64: (_VOP3Op_V_LSHRREV_B64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_ASHRREV_I64: (_VOP3Op_V_ASHRREV_I64, (True, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_READLANE_B32: (_VOP3Op_V_READLANE_B32, (False, False, False, False, False, False, False, False, ('S1', 'D0', 'SRC0', 'VGPR'))), - VOP3Op.V_AND_B16: (_VOP3Op_V_AND_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_OR_B16: (_VOP3Op_V_OR_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), - VOP3Op.V_XOR_B16: (_VOP3Op_V_XOR_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0'))), + VOP3Op.V_CMP_F_F16: _VOP3Op_V_CMP_F_F16, + VOP3Op.V_CMP_LT_F16: _VOP3Op_V_CMP_LT_F16, + VOP3Op.V_CMP_EQ_F16: _VOP3Op_V_CMP_EQ_F16, + VOP3Op.V_CMP_LE_F16: _VOP3Op_V_CMP_LE_F16, + VOP3Op.V_CMP_GT_F16: _VOP3Op_V_CMP_GT_F16, + VOP3Op.V_CMP_LG_F16: _VOP3Op_V_CMP_LG_F16, + VOP3Op.V_CMP_GE_F16: _VOP3Op_V_CMP_GE_F16, + VOP3Op.V_CMP_O_F16: _VOP3Op_V_CMP_O_F16, + VOP3Op.V_CMP_U_F16: _VOP3Op_V_CMP_U_F16, + VOP3Op.V_CMP_NGE_F16: _VOP3Op_V_CMP_NGE_F16, + VOP3Op.V_CMP_NLG_F16: _VOP3Op_V_CMP_NLG_F16, + VOP3Op.V_CMP_NGT_F16: _VOP3Op_V_CMP_NGT_F16, + VOP3Op.V_CMP_NLE_F16: _VOP3Op_V_CMP_NLE_F16, + VOP3Op.V_CMP_NEQ_F16: _VOP3Op_V_CMP_NEQ_F16, + VOP3Op.V_CMP_NLT_F16: _VOP3Op_V_CMP_NLT_F16, + VOP3Op.V_CMP_T_F16: _VOP3Op_V_CMP_T_F16, + VOP3Op.V_CMP_F_F32: _VOP3Op_V_CMP_F_F32, + VOP3Op.V_CMP_LT_F32: _VOP3Op_V_CMP_LT_F32, + VOP3Op.V_CMP_EQ_F32: _VOP3Op_V_CMP_EQ_F32, + VOP3Op.V_CMP_LE_F32: _VOP3Op_V_CMP_LE_F32, + VOP3Op.V_CMP_GT_F32: _VOP3Op_V_CMP_GT_F32, + VOP3Op.V_CMP_LG_F32: _VOP3Op_V_CMP_LG_F32, + VOP3Op.V_CMP_GE_F32: _VOP3Op_V_CMP_GE_F32, + VOP3Op.V_CMP_O_F32: _VOP3Op_V_CMP_O_F32, + VOP3Op.V_CMP_U_F32: _VOP3Op_V_CMP_U_F32, + VOP3Op.V_CMP_NGE_F32: _VOP3Op_V_CMP_NGE_F32, + VOP3Op.V_CMP_NLG_F32: _VOP3Op_V_CMP_NLG_F32, + VOP3Op.V_CMP_NGT_F32: _VOP3Op_V_CMP_NGT_F32, + VOP3Op.V_CMP_NLE_F32: _VOP3Op_V_CMP_NLE_F32, + VOP3Op.V_CMP_NEQ_F32: _VOP3Op_V_CMP_NEQ_F32, + VOP3Op.V_CMP_NLT_F32: _VOP3Op_V_CMP_NLT_F32, + VOP3Op.V_CMP_T_F32: _VOP3Op_V_CMP_T_F32, + VOP3Op.V_CMP_F_F64: _VOP3Op_V_CMP_F_F64, + VOP3Op.V_CMP_LT_F64: _VOP3Op_V_CMP_LT_F64, + VOP3Op.V_CMP_EQ_F64: _VOP3Op_V_CMP_EQ_F64, + VOP3Op.V_CMP_LE_F64: _VOP3Op_V_CMP_LE_F64, + VOP3Op.V_CMP_GT_F64: _VOP3Op_V_CMP_GT_F64, + VOP3Op.V_CMP_LG_F64: _VOP3Op_V_CMP_LG_F64, + VOP3Op.V_CMP_GE_F64: _VOP3Op_V_CMP_GE_F64, + VOP3Op.V_CMP_O_F64: _VOP3Op_V_CMP_O_F64, + VOP3Op.V_CMP_U_F64: _VOP3Op_V_CMP_U_F64, + VOP3Op.V_CMP_NGE_F64: _VOP3Op_V_CMP_NGE_F64, + VOP3Op.V_CMP_NLG_F64: _VOP3Op_V_CMP_NLG_F64, + VOP3Op.V_CMP_NGT_F64: _VOP3Op_V_CMP_NGT_F64, + VOP3Op.V_CMP_NLE_F64: _VOP3Op_V_CMP_NLE_F64, + VOP3Op.V_CMP_NEQ_F64: _VOP3Op_V_CMP_NEQ_F64, + VOP3Op.V_CMP_NLT_F64: _VOP3Op_V_CMP_NLT_F64, + VOP3Op.V_CMP_T_F64: _VOP3Op_V_CMP_T_F64, + VOP3Op.V_CMP_LT_I16: _VOP3Op_V_CMP_LT_I16, + VOP3Op.V_CMP_EQ_I16: _VOP3Op_V_CMP_EQ_I16, + VOP3Op.V_CMP_LE_I16: _VOP3Op_V_CMP_LE_I16, + VOP3Op.V_CMP_GT_I16: _VOP3Op_V_CMP_GT_I16, + VOP3Op.V_CMP_NE_I16: _VOP3Op_V_CMP_NE_I16, + VOP3Op.V_CMP_GE_I16: _VOP3Op_V_CMP_GE_I16, + VOP3Op.V_CMP_LT_U16: _VOP3Op_V_CMP_LT_U16, + VOP3Op.V_CMP_EQ_U16: _VOP3Op_V_CMP_EQ_U16, + VOP3Op.V_CMP_LE_U16: _VOP3Op_V_CMP_LE_U16, + VOP3Op.V_CMP_GT_U16: _VOP3Op_V_CMP_GT_U16, + VOP3Op.V_CMP_NE_U16: _VOP3Op_V_CMP_NE_U16, + VOP3Op.V_CMP_GE_U16: _VOP3Op_V_CMP_GE_U16, + VOP3Op.V_CMP_F_I32: _VOP3Op_V_CMP_F_I32, + VOP3Op.V_CMP_LT_I32: _VOP3Op_V_CMP_LT_I32, + VOP3Op.V_CMP_EQ_I32: _VOP3Op_V_CMP_EQ_I32, + VOP3Op.V_CMP_LE_I32: _VOP3Op_V_CMP_LE_I32, + VOP3Op.V_CMP_GT_I32: _VOP3Op_V_CMP_GT_I32, + VOP3Op.V_CMP_NE_I32: _VOP3Op_V_CMP_NE_I32, + VOP3Op.V_CMP_GE_I32: _VOP3Op_V_CMP_GE_I32, + VOP3Op.V_CMP_T_I32: _VOP3Op_V_CMP_T_I32, + VOP3Op.V_CMP_F_U32: _VOP3Op_V_CMP_F_U32, + VOP3Op.V_CMP_LT_U32: _VOP3Op_V_CMP_LT_U32, + VOP3Op.V_CMP_EQ_U32: _VOP3Op_V_CMP_EQ_U32, + VOP3Op.V_CMP_LE_U32: _VOP3Op_V_CMP_LE_U32, + VOP3Op.V_CMP_GT_U32: _VOP3Op_V_CMP_GT_U32, + VOP3Op.V_CMP_NE_U32: _VOP3Op_V_CMP_NE_U32, + VOP3Op.V_CMP_GE_U32: _VOP3Op_V_CMP_GE_U32, + VOP3Op.V_CMP_T_U32: _VOP3Op_V_CMP_T_U32, + VOP3Op.V_CMP_F_I64: _VOP3Op_V_CMP_F_I64, + VOP3Op.V_CMP_LT_I64: _VOP3Op_V_CMP_LT_I64, + VOP3Op.V_CMP_EQ_I64: _VOP3Op_V_CMP_EQ_I64, + VOP3Op.V_CMP_LE_I64: _VOP3Op_V_CMP_LE_I64, + VOP3Op.V_CMP_GT_I64: _VOP3Op_V_CMP_GT_I64, + VOP3Op.V_CMP_NE_I64: _VOP3Op_V_CMP_NE_I64, + VOP3Op.V_CMP_GE_I64: _VOP3Op_V_CMP_GE_I64, + VOP3Op.V_CMP_T_I64: _VOP3Op_V_CMP_T_I64, + VOP3Op.V_CMP_F_U64: _VOP3Op_V_CMP_F_U64, + VOP3Op.V_CMP_LT_U64: _VOP3Op_V_CMP_LT_U64, + VOP3Op.V_CMP_EQ_U64: _VOP3Op_V_CMP_EQ_U64, + VOP3Op.V_CMP_LE_U64: _VOP3Op_V_CMP_LE_U64, + VOP3Op.V_CMP_GT_U64: _VOP3Op_V_CMP_GT_U64, + VOP3Op.V_CMP_NE_U64: _VOP3Op_V_CMP_NE_U64, + VOP3Op.V_CMP_GE_U64: _VOP3Op_V_CMP_GE_U64, + VOP3Op.V_CMP_T_U64: _VOP3Op_V_CMP_T_U64, + VOP3Op.V_CMP_CLASS_F16: _VOP3Op_V_CMP_CLASS_F16, + VOP3Op.V_CMP_CLASS_F32: _VOP3Op_V_CMP_CLASS_F32, + VOP3Op.V_CMP_CLASS_F64: _VOP3Op_V_CMP_CLASS_F64, + VOP3Op.V_CMPX_F_F16: _VOP3Op_V_CMPX_F_F16, + VOP3Op.V_CMPX_LT_F16: _VOP3Op_V_CMPX_LT_F16, + VOP3Op.V_CMPX_EQ_F16: _VOP3Op_V_CMPX_EQ_F16, + VOP3Op.V_CMPX_LE_F16: _VOP3Op_V_CMPX_LE_F16, + VOP3Op.V_CMPX_GT_F16: _VOP3Op_V_CMPX_GT_F16, + VOP3Op.V_CMPX_LG_F16: _VOP3Op_V_CMPX_LG_F16, + VOP3Op.V_CMPX_GE_F16: _VOP3Op_V_CMPX_GE_F16, + VOP3Op.V_CMPX_O_F16: _VOP3Op_V_CMPX_O_F16, + VOP3Op.V_CMPX_U_F16: _VOP3Op_V_CMPX_U_F16, + VOP3Op.V_CMPX_NGE_F16: _VOP3Op_V_CMPX_NGE_F16, + VOP3Op.V_CMPX_NLG_F16: _VOP3Op_V_CMPX_NLG_F16, + VOP3Op.V_CMPX_NGT_F16: _VOP3Op_V_CMPX_NGT_F16, + VOP3Op.V_CMPX_NLE_F16: _VOP3Op_V_CMPX_NLE_F16, + VOP3Op.V_CMPX_NEQ_F16: _VOP3Op_V_CMPX_NEQ_F16, + VOP3Op.V_CMPX_NLT_F16: _VOP3Op_V_CMPX_NLT_F16, + VOP3Op.V_CMPX_T_F16: _VOP3Op_V_CMPX_T_F16, + VOP3Op.V_CMPX_F_F32: _VOP3Op_V_CMPX_F_F32, + VOP3Op.V_CMPX_LT_F32: _VOP3Op_V_CMPX_LT_F32, + VOP3Op.V_CMPX_EQ_F32: _VOP3Op_V_CMPX_EQ_F32, + VOP3Op.V_CMPX_LE_F32: _VOP3Op_V_CMPX_LE_F32, + VOP3Op.V_CMPX_GT_F32: _VOP3Op_V_CMPX_GT_F32, + VOP3Op.V_CMPX_LG_F32: _VOP3Op_V_CMPX_LG_F32, + VOP3Op.V_CMPX_GE_F32: _VOP3Op_V_CMPX_GE_F32, + VOP3Op.V_CMPX_O_F32: _VOP3Op_V_CMPX_O_F32, + VOP3Op.V_CMPX_U_F32: _VOP3Op_V_CMPX_U_F32, + VOP3Op.V_CMPX_NGE_F32: _VOP3Op_V_CMPX_NGE_F32, + VOP3Op.V_CMPX_NLG_F32: _VOP3Op_V_CMPX_NLG_F32, + VOP3Op.V_CMPX_NGT_F32: _VOP3Op_V_CMPX_NGT_F32, + VOP3Op.V_CMPX_NLE_F32: _VOP3Op_V_CMPX_NLE_F32, + VOP3Op.V_CMPX_NEQ_F32: _VOP3Op_V_CMPX_NEQ_F32, + VOP3Op.V_CMPX_NLT_F32: _VOP3Op_V_CMPX_NLT_F32, + VOP3Op.V_CMPX_T_F32: _VOP3Op_V_CMPX_T_F32, + VOP3Op.V_CMPX_F_F64: _VOP3Op_V_CMPX_F_F64, + VOP3Op.V_CMPX_LT_F64: _VOP3Op_V_CMPX_LT_F64, + VOP3Op.V_CMPX_EQ_F64: _VOP3Op_V_CMPX_EQ_F64, + VOP3Op.V_CMPX_LE_F64: _VOP3Op_V_CMPX_LE_F64, + VOP3Op.V_CMPX_GT_F64: _VOP3Op_V_CMPX_GT_F64, + VOP3Op.V_CMPX_LG_F64: _VOP3Op_V_CMPX_LG_F64, + VOP3Op.V_CMPX_GE_F64: _VOP3Op_V_CMPX_GE_F64, + VOP3Op.V_CMPX_O_F64: _VOP3Op_V_CMPX_O_F64, + VOP3Op.V_CMPX_U_F64: _VOP3Op_V_CMPX_U_F64, + VOP3Op.V_CMPX_NGE_F64: _VOP3Op_V_CMPX_NGE_F64, + VOP3Op.V_CMPX_NLG_F64: _VOP3Op_V_CMPX_NLG_F64, + VOP3Op.V_CMPX_NGT_F64: _VOP3Op_V_CMPX_NGT_F64, + VOP3Op.V_CMPX_NLE_F64: _VOP3Op_V_CMPX_NLE_F64, + VOP3Op.V_CMPX_NEQ_F64: _VOP3Op_V_CMPX_NEQ_F64, + VOP3Op.V_CMPX_NLT_F64: _VOP3Op_V_CMPX_NLT_F64, + VOP3Op.V_CMPX_T_F64: _VOP3Op_V_CMPX_T_F64, + VOP3Op.V_CMPX_LT_I16: _VOP3Op_V_CMPX_LT_I16, + VOP3Op.V_CMPX_EQ_I16: _VOP3Op_V_CMPX_EQ_I16, + VOP3Op.V_CMPX_LE_I16: _VOP3Op_V_CMPX_LE_I16, + VOP3Op.V_CMPX_GT_I16: _VOP3Op_V_CMPX_GT_I16, + VOP3Op.V_CMPX_NE_I16: _VOP3Op_V_CMPX_NE_I16, + VOP3Op.V_CMPX_GE_I16: _VOP3Op_V_CMPX_GE_I16, + VOP3Op.V_CMPX_LT_U16: _VOP3Op_V_CMPX_LT_U16, + VOP3Op.V_CMPX_EQ_U16: _VOP3Op_V_CMPX_EQ_U16, + VOP3Op.V_CMPX_LE_U16: _VOP3Op_V_CMPX_LE_U16, + VOP3Op.V_CMPX_GT_U16: _VOP3Op_V_CMPX_GT_U16, + VOP3Op.V_CMPX_NE_U16: _VOP3Op_V_CMPX_NE_U16, + VOP3Op.V_CMPX_GE_U16: _VOP3Op_V_CMPX_GE_U16, + VOP3Op.V_CMPX_F_I32: _VOP3Op_V_CMPX_F_I32, + VOP3Op.V_CMPX_LT_I32: _VOP3Op_V_CMPX_LT_I32, + VOP3Op.V_CMPX_EQ_I32: _VOP3Op_V_CMPX_EQ_I32, + VOP3Op.V_CMPX_LE_I32: _VOP3Op_V_CMPX_LE_I32, + VOP3Op.V_CMPX_GT_I32: _VOP3Op_V_CMPX_GT_I32, + VOP3Op.V_CMPX_NE_I32: _VOP3Op_V_CMPX_NE_I32, + VOP3Op.V_CMPX_GE_I32: _VOP3Op_V_CMPX_GE_I32, + VOP3Op.V_CMPX_T_I32: _VOP3Op_V_CMPX_T_I32, + VOP3Op.V_CMPX_F_U32: _VOP3Op_V_CMPX_F_U32, + VOP3Op.V_CMPX_LT_U32: _VOP3Op_V_CMPX_LT_U32, + VOP3Op.V_CMPX_EQ_U32: _VOP3Op_V_CMPX_EQ_U32, + VOP3Op.V_CMPX_LE_U32: _VOP3Op_V_CMPX_LE_U32, + VOP3Op.V_CMPX_GT_U32: _VOP3Op_V_CMPX_GT_U32, + VOP3Op.V_CMPX_NE_U32: _VOP3Op_V_CMPX_NE_U32, + VOP3Op.V_CMPX_GE_U32: _VOP3Op_V_CMPX_GE_U32, + VOP3Op.V_CMPX_T_U32: _VOP3Op_V_CMPX_T_U32, + VOP3Op.V_CMPX_F_I64: _VOP3Op_V_CMPX_F_I64, + VOP3Op.V_CMPX_LT_I64: _VOP3Op_V_CMPX_LT_I64, + VOP3Op.V_CMPX_EQ_I64: _VOP3Op_V_CMPX_EQ_I64, + VOP3Op.V_CMPX_LE_I64: _VOP3Op_V_CMPX_LE_I64, + VOP3Op.V_CMPX_GT_I64: _VOP3Op_V_CMPX_GT_I64, + VOP3Op.V_CMPX_NE_I64: _VOP3Op_V_CMPX_NE_I64, + VOP3Op.V_CMPX_GE_I64: _VOP3Op_V_CMPX_GE_I64, + VOP3Op.V_CMPX_T_I64: _VOP3Op_V_CMPX_T_I64, + VOP3Op.V_CMPX_F_U64: _VOP3Op_V_CMPX_F_U64, + VOP3Op.V_CMPX_LT_U64: _VOP3Op_V_CMPX_LT_U64, + VOP3Op.V_CMPX_EQ_U64: _VOP3Op_V_CMPX_EQ_U64, + VOP3Op.V_CMPX_LE_U64: _VOP3Op_V_CMPX_LE_U64, + VOP3Op.V_CMPX_GT_U64: _VOP3Op_V_CMPX_GT_U64, + VOP3Op.V_CMPX_NE_U64: _VOP3Op_V_CMPX_NE_U64, + VOP3Op.V_CMPX_GE_U64: _VOP3Op_V_CMPX_GE_U64, + VOP3Op.V_CMPX_T_U64: _VOP3Op_V_CMPX_T_U64, + VOP3Op.V_CMPX_CLASS_F16: _VOP3Op_V_CMPX_CLASS_F16, + VOP3Op.V_CMPX_CLASS_F32: _VOP3Op_V_CMPX_CLASS_F32, + VOP3Op.V_CMPX_CLASS_F64: _VOP3Op_V_CMPX_CLASS_F64, + VOP3Op.V_MOV_B32: _VOP3Op_V_MOV_B32, + VOP3Op.V_READFIRSTLANE_B32: _VOP3Op_V_READFIRSTLANE_B32, + VOP3Op.V_CVT_I32_F64: _VOP3Op_V_CVT_I32_F64, + VOP3Op.V_CVT_F64_I32: _VOP3Op_V_CVT_F64_I32, + VOP3Op.V_CVT_F32_I32: _VOP3Op_V_CVT_F32_I32, + VOP3Op.V_CVT_F32_U32: _VOP3Op_V_CVT_F32_U32, + VOP3Op.V_CVT_U32_F32: _VOP3Op_V_CVT_U32_F32, + VOP3Op.V_CVT_I32_F32: _VOP3Op_V_CVT_I32_F32, + VOP3Op.V_CVT_F16_F32: _VOP3Op_V_CVT_F16_F32, + VOP3Op.V_CVT_F32_F16: _VOP3Op_V_CVT_F32_F16, + VOP3Op.V_CVT_NEAREST_I32_F32: _VOP3Op_V_CVT_NEAREST_I32_F32, + VOP3Op.V_CVT_FLOOR_I32_F32: _VOP3Op_V_CVT_FLOOR_I32_F32, + VOP3Op.V_CVT_F32_F64: _VOP3Op_V_CVT_F32_F64, + VOP3Op.V_CVT_F64_F32: _VOP3Op_V_CVT_F64_F32, + VOP3Op.V_CVT_F32_UBYTE0: _VOP3Op_V_CVT_F32_UBYTE0, + VOP3Op.V_CVT_F32_UBYTE1: _VOP3Op_V_CVT_F32_UBYTE1, + VOP3Op.V_CVT_F32_UBYTE2: _VOP3Op_V_CVT_F32_UBYTE2, + VOP3Op.V_CVT_F32_UBYTE3: _VOP3Op_V_CVT_F32_UBYTE3, + VOP3Op.V_CVT_U32_F64: _VOP3Op_V_CVT_U32_F64, + VOP3Op.V_CVT_F64_U32: _VOP3Op_V_CVT_F64_U32, + VOP3Op.V_TRUNC_F64: _VOP3Op_V_TRUNC_F64, + VOP3Op.V_CEIL_F64: _VOP3Op_V_CEIL_F64, + VOP3Op.V_RNDNE_F64: _VOP3Op_V_RNDNE_F64, + VOP3Op.V_FLOOR_F64: _VOP3Op_V_FLOOR_F64, + VOP3Op.V_MOV_B16: _VOP3Op_V_MOV_B16, + VOP3Op.V_FRACT_F32: _VOP3Op_V_FRACT_F32, + VOP3Op.V_TRUNC_F32: _VOP3Op_V_TRUNC_F32, + VOP3Op.V_CEIL_F32: _VOP3Op_V_CEIL_F32, + VOP3Op.V_RNDNE_F32: _VOP3Op_V_RNDNE_F32, + VOP3Op.V_FLOOR_F32: _VOP3Op_V_FLOOR_F32, + VOP3Op.V_EXP_F32: _VOP3Op_V_EXP_F32, + VOP3Op.V_LOG_F32: _VOP3Op_V_LOG_F32, + VOP3Op.V_RCP_F32: _VOP3Op_V_RCP_F32, + VOP3Op.V_RCP_IFLAG_F32: _VOP3Op_V_RCP_IFLAG_F32, + VOP3Op.V_RSQ_F32: _VOP3Op_V_RSQ_F32, + VOP3Op.V_RCP_F64: _VOP3Op_V_RCP_F64, + VOP3Op.V_RSQ_F64: _VOP3Op_V_RSQ_F64, + VOP3Op.V_SQRT_F32: _VOP3Op_V_SQRT_F32, + VOP3Op.V_SQRT_F64: _VOP3Op_V_SQRT_F64, + VOP3Op.V_SIN_F32: _VOP3Op_V_SIN_F32, + VOP3Op.V_COS_F32: _VOP3Op_V_COS_F32, + VOP3Op.V_NOT_B32: _VOP3Op_V_NOT_B32, + VOP3Op.V_BFREV_B32: _VOP3Op_V_BFREV_B32, + VOP3Op.V_CLZ_I32_U32: _VOP3Op_V_CLZ_I32_U32, + VOP3Op.V_CTZ_I32_B32: _VOP3Op_V_CTZ_I32_B32, + VOP3Op.V_CLS_I32: _VOP3Op_V_CLS_I32, + VOP3Op.V_FREXP_EXP_I32_F64: _VOP3Op_V_FREXP_EXP_I32_F64, + VOP3Op.V_FREXP_MANT_F64: _VOP3Op_V_FREXP_MANT_F64, + VOP3Op.V_FRACT_F64: _VOP3Op_V_FRACT_F64, + VOP3Op.V_FREXP_EXP_I32_F32: _VOP3Op_V_FREXP_EXP_I32_F32, + VOP3Op.V_FREXP_MANT_F32: _VOP3Op_V_FREXP_MANT_F32, + VOP3Op.V_MOVRELS_B32: _VOP3Op_V_MOVRELS_B32, + VOP3Op.V_CVT_F16_U16: _VOP3Op_V_CVT_F16_U16, + VOP3Op.V_CVT_F16_I16: _VOP3Op_V_CVT_F16_I16, + VOP3Op.V_CVT_U16_F16: _VOP3Op_V_CVT_U16_F16, + VOP3Op.V_CVT_I16_F16: _VOP3Op_V_CVT_I16_F16, + VOP3Op.V_RCP_F16: _VOP3Op_V_RCP_F16, + VOP3Op.V_SQRT_F16: _VOP3Op_V_SQRT_F16, + VOP3Op.V_RSQ_F16: _VOP3Op_V_RSQ_F16, + VOP3Op.V_LOG_F16: _VOP3Op_V_LOG_F16, + VOP3Op.V_EXP_F16: _VOP3Op_V_EXP_F16, + VOP3Op.V_FREXP_MANT_F16: _VOP3Op_V_FREXP_MANT_F16, + VOP3Op.V_FREXP_EXP_I16_F16: _VOP3Op_V_FREXP_EXP_I16_F16, + VOP3Op.V_FLOOR_F16: _VOP3Op_V_FLOOR_F16, + VOP3Op.V_CEIL_F16: _VOP3Op_V_CEIL_F16, + VOP3Op.V_TRUNC_F16: _VOP3Op_V_TRUNC_F16, + VOP3Op.V_RNDNE_F16: _VOP3Op_V_RNDNE_F16, + VOP3Op.V_FRACT_F16: _VOP3Op_V_FRACT_F16, + VOP3Op.V_SIN_F16: _VOP3Op_V_SIN_F16, + VOP3Op.V_COS_F16: _VOP3Op_V_COS_F16, + VOP3Op.V_CVT_NORM_I16_F16: _VOP3Op_V_CVT_NORM_I16_F16, + VOP3Op.V_CVT_NORM_U16_F16: _VOP3Op_V_CVT_NORM_U16_F16, + VOP3Op.V_NOT_B16: _VOP3Op_V_NOT_B16, + VOP3Op.V_CVT_I32_I16: _VOP3Op_V_CVT_I32_I16, + VOP3Op.V_CVT_U32_U16: _VOP3Op_V_CVT_U32_U16, + VOP3Op.V_CNDMASK_B32: _VOP3Op_V_CNDMASK_B32, + VOP3Op.V_ADD_F32: _VOP3Op_V_ADD_F32, + VOP3Op.V_SUB_F32: _VOP3Op_V_SUB_F32, + VOP3Op.V_SUBREV_F32: _VOP3Op_V_SUBREV_F32, + VOP3Op.V_FMAC_DX9_ZERO_F32: _VOP3Op_V_FMAC_DX9_ZERO_F32, + VOP3Op.V_MUL_DX9_ZERO_F32: _VOP3Op_V_MUL_DX9_ZERO_F32, + VOP3Op.V_MUL_F32: _VOP3Op_V_MUL_F32, + VOP3Op.V_MUL_I32_I24: _VOP3Op_V_MUL_I32_I24, + VOP3Op.V_MUL_HI_I32_I24: _VOP3Op_V_MUL_HI_I32_I24, + VOP3Op.V_MUL_U32_U24: _VOP3Op_V_MUL_U32_U24, + VOP3Op.V_MUL_HI_U32_U24: _VOP3Op_V_MUL_HI_U32_U24, + VOP3Op.V_MIN_F32: _VOP3Op_V_MIN_F32, + VOP3Op.V_MAX_F32: _VOP3Op_V_MAX_F32, + VOP3Op.V_MIN_I32: _VOP3Op_V_MIN_I32, + VOP3Op.V_MAX_I32: _VOP3Op_V_MAX_I32, + VOP3Op.V_MIN_U32: _VOP3Op_V_MIN_U32, + VOP3Op.V_MAX_U32: _VOP3Op_V_MAX_U32, + VOP3Op.V_LSHLREV_B32: _VOP3Op_V_LSHLREV_B32, + VOP3Op.V_LSHRREV_B32: _VOP3Op_V_LSHRREV_B32, + VOP3Op.V_ASHRREV_I32: _VOP3Op_V_ASHRREV_I32, + VOP3Op.V_AND_B32: _VOP3Op_V_AND_B32, + VOP3Op.V_OR_B32: _VOP3Op_V_OR_B32, + VOP3Op.V_XOR_B32: _VOP3Op_V_XOR_B32, + VOP3Op.V_XNOR_B32: _VOP3Op_V_XNOR_B32, + VOP3Op.V_ADD_NC_U32: _VOP3Op_V_ADD_NC_U32, + VOP3Op.V_SUB_NC_U32: _VOP3Op_V_SUB_NC_U32, + VOP3Op.V_SUBREV_NC_U32: _VOP3Op_V_SUBREV_NC_U32, + VOP3Op.V_FMAC_F32: _VOP3Op_V_FMAC_F32, + VOP3Op.V_CVT_PK_RTZ_F16_F32: _VOP3Op_V_CVT_PK_RTZ_F16_F32, + VOP3Op.V_ADD_F16: _VOP3Op_V_ADD_F16, + VOP3Op.V_SUB_F16: _VOP3Op_V_SUB_F16, + VOP3Op.V_SUBREV_F16: _VOP3Op_V_SUBREV_F16, + VOP3Op.V_MUL_F16: _VOP3Op_V_MUL_F16, + VOP3Op.V_FMAC_F16: _VOP3Op_V_FMAC_F16, + VOP3Op.V_MAX_F16: _VOP3Op_V_MAX_F16, + VOP3Op.V_MIN_F16: _VOP3Op_V_MIN_F16, + VOP3Op.V_LDEXP_F16: _VOP3Op_V_LDEXP_F16, + VOP3Op.V_FMA_DX9_ZERO_F32: _VOP3Op_V_FMA_DX9_ZERO_F32, + VOP3Op.V_MAD_I32_I24: _VOP3Op_V_MAD_I32_I24, + VOP3Op.V_MAD_U32_U24: _VOP3Op_V_MAD_U32_U24, + VOP3Op.V_CUBEID_F32: _VOP3Op_V_CUBEID_F32, + VOP3Op.V_CUBESC_F32: _VOP3Op_V_CUBESC_F32, + VOP3Op.V_CUBETC_F32: _VOP3Op_V_CUBETC_F32, + VOP3Op.V_CUBEMA_F32: _VOP3Op_V_CUBEMA_F32, + VOP3Op.V_BFE_U32: _VOP3Op_V_BFE_U32, + VOP3Op.V_BFE_I32: _VOP3Op_V_BFE_I32, + VOP3Op.V_BFI_B32: _VOP3Op_V_BFI_B32, + VOP3Op.V_FMA_F32: _VOP3Op_V_FMA_F32, + VOP3Op.V_FMA_F64: _VOP3Op_V_FMA_F64, + VOP3Op.V_LERP_U8: _VOP3Op_V_LERP_U8, + VOP3Op.V_ALIGNBIT_B32: _VOP3Op_V_ALIGNBIT_B32, + VOP3Op.V_ALIGNBYTE_B32: _VOP3Op_V_ALIGNBYTE_B32, + VOP3Op.V_MULLIT_F32: _VOP3Op_V_MULLIT_F32, + VOP3Op.V_MIN3_F32: _VOP3Op_V_MIN3_F32, + VOP3Op.V_MIN3_I32: _VOP3Op_V_MIN3_I32, + VOP3Op.V_MIN3_U32: _VOP3Op_V_MIN3_U32, + VOP3Op.V_MAX3_F32: _VOP3Op_V_MAX3_F32, + VOP3Op.V_MAX3_I32: _VOP3Op_V_MAX3_I32, + VOP3Op.V_MAX3_U32: _VOP3Op_V_MAX3_U32, + VOP3Op.V_MED3_F32: _VOP3Op_V_MED3_F32, + VOP3Op.V_MED3_I32: _VOP3Op_V_MED3_I32, + VOP3Op.V_MED3_U32: _VOP3Op_V_MED3_U32, + VOP3Op.V_SAD_U8: _VOP3Op_V_SAD_U8, + VOP3Op.V_SAD_HI_U8: _VOP3Op_V_SAD_HI_U8, + VOP3Op.V_SAD_U16: _VOP3Op_V_SAD_U16, + VOP3Op.V_SAD_U32: _VOP3Op_V_SAD_U32, + VOP3Op.V_CVT_PK_U8_F32: _VOP3Op_V_CVT_PK_U8_F32, + VOP3Op.V_DIV_FIXUP_F32: _VOP3Op_V_DIV_FIXUP_F32, + VOP3Op.V_DIV_FIXUP_F64: _VOP3Op_V_DIV_FIXUP_F64, + VOP3Op.V_DIV_FMAS_F32: _VOP3Op_V_DIV_FMAS_F32, + VOP3Op.V_DIV_FMAS_F64: _VOP3Op_V_DIV_FMAS_F64, + VOP3Op.V_MSAD_U8: _VOP3Op_V_MSAD_U8, + VOP3Op.V_QSAD_PK_U16_U8: _VOP3Op_V_QSAD_PK_U16_U8, + VOP3Op.V_MQSAD_PK_U16_U8: _VOP3Op_V_MQSAD_PK_U16_U8, + VOP3Op.V_MQSAD_U32_U8: _VOP3Op_V_MQSAD_U32_U8, + VOP3Op.V_XOR3_B32: _VOP3Op_V_XOR3_B32, + VOP3Op.V_MAD_U16: _VOP3Op_V_MAD_U16, + VOP3Op.V_XAD_U32: _VOP3Op_V_XAD_U32, + VOP3Op.V_LSHL_ADD_U32: _VOP3Op_V_LSHL_ADD_U32, + VOP3Op.V_ADD_LSHL_U32: _VOP3Op_V_ADD_LSHL_U32, + VOP3Op.V_FMA_F16: _VOP3Op_V_FMA_F16, + VOP3Op.V_MIN3_F16: _VOP3Op_V_MIN3_F16, + VOP3Op.V_MIN3_I16: _VOP3Op_V_MIN3_I16, + VOP3Op.V_MIN3_U16: _VOP3Op_V_MIN3_U16, + VOP3Op.V_MAX3_F16: _VOP3Op_V_MAX3_F16, + VOP3Op.V_MAX3_I16: _VOP3Op_V_MAX3_I16, + VOP3Op.V_MAX3_U16: _VOP3Op_V_MAX3_U16, + VOP3Op.V_MED3_F16: _VOP3Op_V_MED3_F16, + VOP3Op.V_MED3_I16: _VOP3Op_V_MED3_I16, + VOP3Op.V_MED3_U16: _VOP3Op_V_MED3_U16, + VOP3Op.V_MAD_I16: _VOP3Op_V_MAD_I16, + VOP3Op.V_DIV_FIXUP_F16: _VOP3Op_V_DIV_FIXUP_F16, + VOP3Op.V_ADD3_U32: _VOP3Op_V_ADD3_U32, + VOP3Op.V_LSHL_OR_B32: _VOP3Op_V_LSHL_OR_B32, + VOP3Op.V_AND_OR_B32: _VOP3Op_V_AND_OR_B32, + VOP3Op.V_OR3_B32: _VOP3Op_V_OR3_B32, + VOP3Op.V_MAD_U32_U16: _VOP3Op_V_MAD_U32_U16, + VOP3Op.V_MAD_I32_I16: _VOP3Op_V_MAD_I32_I16, + VOP3Op.V_CNDMASK_B16: _VOP3Op_V_CNDMASK_B16, + VOP3Op.V_MAXMIN_F32: _VOP3Op_V_MAXMIN_F32, + VOP3Op.V_MINMAX_F32: _VOP3Op_V_MINMAX_F32, + VOP3Op.V_MAXMIN_F16: _VOP3Op_V_MAXMIN_F16, + VOP3Op.V_MINMAX_F16: _VOP3Op_V_MINMAX_F16, + VOP3Op.V_MAXMIN_U32: _VOP3Op_V_MAXMIN_U32, + VOP3Op.V_MINMAX_U32: _VOP3Op_V_MINMAX_U32, + VOP3Op.V_MAXMIN_I32: _VOP3Op_V_MAXMIN_I32, + VOP3Op.V_MINMAX_I32: _VOP3Op_V_MINMAX_I32, + VOP3Op.V_DOT2_F16_F16: _VOP3Op_V_DOT2_F16_F16, + VOP3Op.V_DOT2_BF16_BF16: _VOP3Op_V_DOT2_BF16_BF16, + VOP3Op.V_ADD_NC_U16: _VOP3Op_V_ADD_NC_U16, + VOP3Op.V_SUB_NC_U16: _VOP3Op_V_SUB_NC_U16, + VOP3Op.V_MUL_LO_U16: _VOP3Op_V_MUL_LO_U16, + VOP3Op.V_CVT_PK_I16_F32: _VOP3Op_V_CVT_PK_I16_F32, + VOP3Op.V_CVT_PK_U16_F32: _VOP3Op_V_CVT_PK_U16_F32, + VOP3Op.V_MAX_U16: _VOP3Op_V_MAX_U16, + VOP3Op.V_MAX_I16: _VOP3Op_V_MAX_I16, + VOP3Op.V_MIN_U16: _VOP3Op_V_MIN_U16, + VOP3Op.V_MIN_I16: _VOP3Op_V_MIN_I16, + VOP3Op.V_ADD_NC_I16: _VOP3Op_V_ADD_NC_I16, + VOP3Op.V_SUB_NC_I16: _VOP3Op_V_SUB_NC_I16, + VOP3Op.V_PACK_B32_F16: _VOP3Op_V_PACK_B32_F16, + VOP3Op.V_CVT_PK_NORM_I16_F16: _VOP3Op_V_CVT_PK_NORM_I16_F16, + VOP3Op.V_CVT_PK_NORM_U16_F16: _VOP3Op_V_CVT_PK_NORM_U16_F16, + VOP3Op.V_LDEXP_F32: _VOP3Op_V_LDEXP_F32, + VOP3Op.V_BFM_B32: _VOP3Op_V_BFM_B32, + VOP3Op.V_BCNT_U32_B32: _VOP3Op_V_BCNT_U32_B32, + VOP3Op.V_CVT_PK_NORM_I16_F32: _VOP3Op_V_CVT_PK_NORM_I16_F32, + VOP3Op.V_CVT_PK_NORM_U16_F32: _VOP3Op_V_CVT_PK_NORM_U16_F32, + VOP3Op.V_CVT_PK_U16_U32: _VOP3Op_V_CVT_PK_U16_U32, + VOP3Op.V_CVT_PK_I16_I32: _VOP3Op_V_CVT_PK_I16_I32, + VOP3Op.V_SUB_NC_I32: _VOP3Op_V_SUB_NC_I32, + VOP3Op.V_ADD_NC_I32: _VOP3Op_V_ADD_NC_I32, + VOP3Op.V_ADD_F64: _VOP3Op_V_ADD_F64, + VOP3Op.V_MUL_F64: _VOP3Op_V_MUL_F64, + VOP3Op.V_MIN_F64: _VOP3Op_V_MIN_F64, + VOP3Op.V_MAX_F64: _VOP3Op_V_MAX_F64, + VOP3Op.V_LDEXP_F64: _VOP3Op_V_LDEXP_F64, + VOP3Op.V_MUL_LO_U32: _VOP3Op_V_MUL_LO_U32, + VOP3Op.V_MUL_HI_U32: _VOP3Op_V_MUL_HI_U32, + VOP3Op.V_MUL_HI_I32: _VOP3Op_V_MUL_HI_I32, + VOP3Op.V_LSHLREV_B16: _VOP3Op_V_LSHLREV_B16, + VOP3Op.V_LSHRREV_B16: _VOP3Op_V_LSHRREV_B16, + VOP3Op.V_ASHRREV_I16: _VOP3Op_V_ASHRREV_I16, + VOP3Op.V_LSHLREV_B64: _VOP3Op_V_LSHLREV_B64, + VOP3Op.V_LSHRREV_B64: _VOP3Op_V_LSHRREV_B64, + VOP3Op.V_ASHRREV_I64: _VOP3Op_V_ASHRREV_I64, + VOP3Op.V_READLANE_B32: _VOP3Op_V_READLANE_B32, + VOP3Op.V_AND_B16: _VOP3Op_V_AND_B16, + VOP3Op.V_OR_B16: _VOP3Op_V_OR_B16, + VOP3Op.V_XOR_B16: _VOP3Op_V_XOR_B16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP3SDOp_V_ADD_CO_CI_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -6227,7 +6227,7 @@ def _VOP3SDOp_V_ADD_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 -def _VOP3SDOp_V_SUB_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP3SDOp_V_SUB_CO_CI_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -6236,7 +6236,7 @@ def _VOP3SDOp_V_SUB_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 -def _VOP3SDOp_V_SUBREV_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP3SDOp_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -6245,7 +6245,7 @@ def _VOP3SDOp_V_SUBREV_CO_CI_U32(S0, S1, D0, VCC, tmp, laneId): VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 -def _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, VCC): +def _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC = 0x0LL; # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # D0.f32 = NAN.f32 @@ -6298,7 +6298,7 @@ def _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, VCC): if S1.f32 == DENORM.f32: D0.f32 = float("nan") -def _VOP3SDOp_V_DIV_SCALE_F64(S0, S1, S2, D0, VCC): +def _VOP3SDOp_V_DIV_SCALE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC = 0x0LL; # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then # D0.f64 = NAN.f64 @@ -6351,19 +6351,19 @@ def _VOP3SDOp_V_DIV_SCALE_F64(S0, S1, S2, D0, VCC): if S1.f64 == DENORM.f64: D0.f64 = float("nan") -def _VOP3SDOp_V_MAD_U64_U32(S0, S1, S2, D0, D1): +def _VOP3SDOp_V_MAD_U64_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) _full = ((S0.u32) * (S1.u32) + (S2.u64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) -def _VOP3SDOp_V_MAD_I64_I32(S0, S1, S2, D0, D1): +def _VOP3SDOp_V_MAD_I64_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) _full = ((S0.i32) * (S1.i32) + (S2.i64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) -def _VOP3SDOp_V_ADD_CO_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP3SDOp_V_ADD_CO_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -6372,7 +6372,7 @@ def _VOP3SDOp_V_ADD_CO_U32(S0, S1, D0, VCC, tmp, laneId): VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 -def _VOP3SDOp_V_SUB_CO_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP3SDOp_V_SUB_CO_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -6381,7 +6381,7 @@ def _VOP3SDOp_V_SUB_CO_U32(S0, S1, D0, VCC, tmp, laneId): VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 -def _VOP3SDOp_V_SUBREV_CO_U32(S0, S1, D0, VCC, tmp, laneId): +def _VOP3SDOp_V_SUBREV_CO_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -6391,19 +6391,19 @@ def _VOP3SDOp_V_SUBREV_CO_U32(S0, S1, D0, VCC, tmp, laneId): D0.u32 = tmp.u32 VOP3SDOp_FUNCTIONS = { - VOP3SDOp.V_ADD_CO_CI_U32: (_VOP3SDOp_V_ADD_CO_CI_U32, (False, False, False, False, False, True, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), - VOP3SDOp.V_SUB_CO_CI_U32: (_VOP3SDOp_V_SUB_CO_CI_U32, (False, False, False, False, False, True, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), - VOP3SDOp.V_SUBREV_CO_CI_U32: (_VOP3SDOp_V_SUBREV_CO_CI_U32, (False, False, False, False, False, True, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), - VOP3SDOp.V_DIV_SCALE_F32: (_VOP3SDOp_V_DIV_SCALE_F32, (False, False, False, False, True, True, True, False, ('S0', 'S1', 'S2', 'D0', 'VCC'))), - VOP3SDOp.V_DIV_SCALE_F64: (_VOP3SDOp_V_DIV_SCALE_F64, (True, False, False, False, True, True, True, False, ('S0', 'S1', 'S2', 'D0', 'VCC'))), - VOP3SDOp.V_MAD_U64_U32: (_VOP3SDOp_V_MAD_U64_U32, (True, True, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'D1'))), - VOP3SDOp.V_MAD_I64_I32: (_VOP3SDOp_V_MAD_I64_I32, (True, True, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'D1'))), - VOP3SDOp.V_ADD_CO_U32: (_VOP3SDOp_V_ADD_CO_U32, (False, False, False, False, False, True, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), - VOP3SDOp.V_SUB_CO_U32: (_VOP3SDOp_V_SUB_CO_U32, (False, False, False, False, False, True, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), - VOP3SDOp.V_SUBREV_CO_U32: (_VOP3SDOp_V_SUBREV_CO_U32, (False, False, False, False, False, True, True, False, ('S0', 'S1', 'D0', 'VCC', 'tmp', 'laneId'))), + VOP3SDOp.V_ADD_CO_CI_U32: _VOP3SDOp_V_ADD_CO_CI_U32, + VOP3SDOp.V_SUB_CO_CI_U32: _VOP3SDOp_V_SUB_CO_CI_U32, + VOP3SDOp.V_SUBREV_CO_CI_U32: _VOP3SDOp_V_SUBREV_CO_CI_U32, + VOP3SDOp.V_DIV_SCALE_F32: _VOP3SDOp_V_DIV_SCALE_F32, + VOP3SDOp.V_DIV_SCALE_F64: _VOP3SDOp_V_DIV_SCALE_F64, + VOP3SDOp.V_MAD_U64_U32: _VOP3SDOp_V_MAD_U64_U32, + VOP3SDOp.V_MAD_I64_I32: _VOP3SDOp_V_MAD_I64_I32, + VOP3SDOp.V_ADD_CO_U32: _VOP3SDOp_V_ADD_CO_U32, + VOP3SDOp.V_SUB_CO_U32: _VOP3SDOp_V_SUB_CO_U32, + VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, tmp): +def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; # D0.b32 = tmp.b32 @@ -6411,7 +6411,7 @@ def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, tmp): tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -6419,7 +6419,7 @@ def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, D0, tmp): tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_ADD_I16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_ADD_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -6427,7 +6427,7 @@ def _VOP3POp_V_PK_ADD_I16(S0, S1, D0, tmp): tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_SUB_I16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_SUB_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -6435,7 +6435,7 @@ def _VOP3POp_V_PK_SUB_I16(S0, S1, D0, tmp): tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -6443,7 +6443,7 @@ def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, D0, tmp): tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) D0.b32 = tmp.b32 -def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -6451,7 +6451,7 @@ def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, D0, tmp): tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 -def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -6459,7 +6459,7 @@ def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, D0, tmp): tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MAX_I16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_MAX_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -6467,7 +6467,7 @@ def _VOP3POp_V_PK_MAX_I16(S0, S1, D0, tmp): tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MIN_I16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_MIN_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -6475,7 +6475,7 @@ def _VOP3POp_V_PK_MIN_I16(S0, S1, D0, tmp): tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, tmp): +def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; # D0.b32 = tmp.b32 @@ -6483,7 +6483,7 @@ def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, tmp): tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_ADD_U16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_ADD_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -6491,7 +6491,7 @@ def _VOP3POp_V_PK_ADD_U16(S0, S1, D0, tmp): tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_SUB_U16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_SUB_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -6499,7 +6499,7 @@ def _VOP3POp_V_PK_SUB_U16(S0, S1, D0, tmp): tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MAX_U16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_MAX_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -6507,7 +6507,7 @@ def _VOP3POp_V_PK_MAX_U16(S0, S1, D0, tmp): tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MIN_U16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_MIN_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -6515,7 +6515,7 @@ def _VOP3POp_V_PK_MIN_U16(S0, S1, D0, tmp): tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) D0.b32 = tmp.b32 -def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, tmp): +def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # declare tmp : 32'B; # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); @@ -6524,7 +6524,7 @@ def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, tmp): tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) D0.b32 = tmp -def _VOP3POp_V_PK_ADD_F16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_ADD_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; # D0.b32 = tmp.b32 @@ -6532,7 +6532,7 @@ def _VOP3POp_V_PK_ADD_F16(S0, S1, D0, tmp): tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MUL_F16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_MUL_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; # D0.b32 = tmp.b32 @@ -6540,7 +6540,7 @@ def _VOP3POp_V_PK_MUL_F16(S0, S1, D0, tmp): tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MIN_F16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_MIN_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); # D0.b32 = tmp.b32 @@ -6548,7 +6548,7 @@ def _VOP3POp_V_PK_MIN_F16(S0, S1, D0, tmp): tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16) D0.b32 = tmp.b32 -def _VOP3POp_V_PK_MAX_F16(S0, S1, D0, tmp): +def _VOP3POp_V_PK_MAX_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); # D0.b32 = tmp.b32 @@ -6556,7 +6556,7 @@ def _VOP3POp_V_PK_MAX_F16(S0, S1, D0, tmp): tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16) D0.b32 = tmp.b32 -def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, tmp): +def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S2.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -6566,7 +6566,7 @@ def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, tmp): tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp -def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, tmp): +def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S2.u32; # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); @@ -6580,7 +6580,7 @@ def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, tmp): tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) D0.u32 = tmp -def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, tmp): +def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S2.u32; # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); @@ -6602,7 +6602,7 @@ def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, tmp): tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) D0.u32 = tmp -def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, tmp): +def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # tmp = S2.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -6613,572 +6613,572 @@ def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, tmp): D0.f32 = tmp VOP3POp_FUNCTIONS = { - VOP3POp.V_PK_MAD_I16: (_VOP3POp_V_PK_MAD_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3POp.V_PK_MUL_LO_U16: (_VOP3POp_V_PK_MUL_LO_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_ADD_I16: (_VOP3POp_V_PK_ADD_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_SUB_I16: (_VOP3POp_V_PK_SUB_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_LSHLREV_B16: (_VOP3POp_V_PK_LSHLREV_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_LSHRREV_B16: (_VOP3POp_V_PK_LSHRREV_B16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_ASHRREV_I16: (_VOP3POp_V_PK_ASHRREV_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_MAX_I16: (_VOP3POp_V_PK_MAX_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_MIN_I16: (_VOP3POp_V_PK_MIN_I16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_MAD_U16: (_VOP3POp_V_PK_MAD_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3POp.V_PK_ADD_U16: (_VOP3POp_V_PK_ADD_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_SUB_U16: (_VOP3POp_V_PK_SUB_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_MAX_U16: (_VOP3POp_V_PK_MAX_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_MIN_U16: (_VOP3POp_V_PK_MIN_U16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_FMA_F16: (_VOP3POp_V_PK_FMA_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3POp.V_PK_ADD_F16: (_VOP3POp_V_PK_ADD_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_MUL_F16: (_VOP3POp_V_PK_MUL_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_MIN_F16: (_VOP3POp_V_PK_MIN_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_PK_MAX_F16: (_VOP3POp_V_PK_MAX_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'D0', 'tmp'))), - VOP3POp.V_DOT2_F32_F16: (_VOP3POp_V_DOT2_F32_F16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3POp.V_DOT4_U32_U8: (_VOP3POp_V_DOT4_U32_U8, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3POp.V_DOT8_U32_U4: (_VOP3POp_V_DOT8_U32_U4, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), - VOP3POp.V_DOT2_F32_BF16: (_VOP3POp_V_DOT2_F32_BF16, (False, False, False, False, False, False, False, False, ('S0', 'S1', 'S2', 'D0', 'tmp'))), + VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, + VOP3POp.V_PK_MUL_LO_U16: _VOP3POp_V_PK_MUL_LO_U16, + VOP3POp.V_PK_ADD_I16: _VOP3POp_V_PK_ADD_I16, + VOP3POp.V_PK_SUB_I16: _VOP3POp_V_PK_SUB_I16, + VOP3POp.V_PK_LSHLREV_B16: _VOP3POp_V_PK_LSHLREV_B16, + VOP3POp.V_PK_LSHRREV_B16: _VOP3POp_V_PK_LSHRREV_B16, + VOP3POp.V_PK_ASHRREV_I16: _VOP3POp_V_PK_ASHRREV_I16, + VOP3POp.V_PK_MAX_I16: _VOP3POp_V_PK_MAX_I16, + VOP3POp.V_PK_MIN_I16: _VOP3POp_V_PK_MIN_I16, + VOP3POp.V_PK_MAD_U16: _VOP3POp_V_PK_MAD_U16, + VOP3POp.V_PK_ADD_U16: _VOP3POp_V_PK_ADD_U16, + VOP3POp.V_PK_SUB_U16: _VOP3POp_V_PK_SUB_U16, + VOP3POp.V_PK_MAX_U16: _VOP3POp_V_PK_MAX_U16, + VOP3POp.V_PK_MIN_U16: _VOP3POp_V_PK_MIN_U16, + VOP3POp.V_PK_FMA_F16: _VOP3POp_V_PK_FMA_F16, + VOP3POp.V_PK_ADD_F16: _VOP3POp_V_PK_ADD_F16, + VOP3POp.V_PK_MUL_F16: _VOP3POp_V_PK_MUL_F16, + VOP3POp.V_PK_MIN_F16: _VOP3POp_V_PK_MIN_F16, + VOP3POp.V_PK_MAX_F16: _VOP3POp_V_PK_MAX_F16, + VOP3POp.V_DOT2_F32_F16: _VOP3POp_V_DOT2_F32_F16, + VOP3POp.V_DOT4_U32_U8: _VOP3POp_V_DOT4_U32_U8, + VOP3POp.V_DOT8_U32_U4: _VOP3POp_V_DOT8_U32_U4, + VOP3POp.V_DOT2_F32_BF16: _VOP3POp_V_DOT2_F32_BF16, } -def _VOPCOp_V_CMP_F_F16(D0, VCC, laneId): +def _VOPCOp_V_CMP_F_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOPCOp_V_CMP_LT_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 < S1.f16 -def _VOPCOp_V_CMP_EQ_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 == S1.f16 -def _VOPCOp_V_CMP_LE_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 <= S1.f16 -def _VOPCOp_V_CMP_GT_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 > S1.f16 -def _VOPCOp_V_CMP_LG_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 != S1.f16 -def _VOPCOp_V_CMP_GE_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f16 >= S1.f16 -def _VOPCOp_V_CMP_O_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_O_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) -def _VOPCOp_V_CMP_U_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_U_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) -def _VOPCOp_V_CMP_NGE_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NGE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 >= S1.f16) -def _VOPCOp_V_CMP_NLG_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 != S1.f16) -def _VOPCOp_V_CMP_NGT_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NGT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 > S1.f16) -def _VOPCOp_V_CMP_NLE_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 <= S1.f16) -def _VOPCOp_V_CMP_NEQ_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NEQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 == S1.f16) -def _VOPCOp_V_CMP_NLT_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f16 < S1.f16) -def _VOPCOp_V_CMP_T_F16(D0, VCC, laneId): +def _VOPCOp_V_CMP_T_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOPCOp_V_CMP_F_F32(D0, VCC, laneId): +def _VOPCOp_V_CMP_F_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOPCOp_V_CMP_LT_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 < S1.f32 -def _VOPCOp_V_CMP_EQ_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 == S1.f32 -def _VOPCOp_V_CMP_LE_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 <= S1.f32 -def _VOPCOp_V_CMP_GT_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 > S1.f32 -def _VOPCOp_V_CMP_LG_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 != S1.f32 -def _VOPCOp_V_CMP_GE_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f32 >= S1.f32 -def _VOPCOp_V_CMP_O_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_O_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) -def _VOPCOp_V_CMP_U_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_U_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) -def _VOPCOp_V_CMP_NGE_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NGE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 >= S1.f32) -def _VOPCOp_V_CMP_NLG_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 != S1.f32) -def _VOPCOp_V_CMP_NGT_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NGT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 > S1.f32) -def _VOPCOp_V_CMP_NLE_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 <= S1.f32) -def _VOPCOp_V_CMP_NEQ_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NEQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 == S1.f32) -def _VOPCOp_V_CMP_NLT_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f32 < S1.f32) -def _VOPCOp_V_CMP_T_F32(D0, VCC, laneId): +def _VOPCOp_V_CMP_T_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOPCOp_V_CMP_F_F64(D0, VCC, laneId): +def _VOPCOp_V_CMP_F_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOPCOp_V_CMP_LT_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 < S1.f64 -def _VOPCOp_V_CMP_EQ_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 == S1.f64 -def _VOPCOp_V_CMP_LE_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 <= S1.f64 -def _VOPCOp_V_CMP_GT_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 > S1.f64 -def _VOPCOp_V_CMP_LG_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LG_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 != S1.f64 -def _VOPCOp_V_CMP_GE_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.f64 >= S1.f64 -def _VOPCOp_V_CMP_O_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_O_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) -def _VOPCOp_V_CMP_U_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_U_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) -def _VOPCOp_V_CMP_NGE_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NGE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 >= S1.f64) -def _VOPCOp_V_CMP_NLG_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLG_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 != S1.f64) -def _VOPCOp_V_CMP_NGT_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NGT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 > S1.f64) -def _VOPCOp_V_CMP_NLE_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 <= S1.f64) -def _VOPCOp_V_CMP_NEQ_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NEQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 == S1.f64) -def _VOPCOp_V_CMP_NLT_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NLT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= # // D0 = VCC in VOPC encoding. D0.u64[laneId] = not (S0.f64 < S1.f64) -def _VOPCOp_V_CMP_T_F64(D0, VCC, laneId): +def _VOPCOp_V_CMP_T_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOPCOp_V_CMP_LT_I16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 < S1.i16 -def _VOPCOp_V_CMP_EQ_I16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 == S1.i16 -def _VOPCOp_V_CMP_LE_I16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 <= S1.i16 -def _VOPCOp_V_CMP_GT_I16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 > S1.i16 -def _VOPCOp_V_CMP_NE_I16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 != S1.i16 -def _VOPCOp_V_CMP_GE_I16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i16 >= S1.i16 -def _VOPCOp_V_CMP_LT_U16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 < S1.u16 -def _VOPCOp_V_CMP_EQ_U16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 == S1.u16 -def _VOPCOp_V_CMP_LE_U16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 <= S1.u16 -def _VOPCOp_V_CMP_GT_U16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 > S1.u16 -def _VOPCOp_V_CMP_NE_U16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 != S1.u16 -def _VOPCOp_V_CMP_GE_U16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u16 >= S1.u16 -def _VOPCOp_V_CMP_F_I32(D0, VCC, laneId): +def _VOPCOp_V_CMP_F_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOPCOp_V_CMP_LT_I32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 < S1.i32 -def _VOPCOp_V_CMP_EQ_I32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 == S1.i32 -def _VOPCOp_V_CMP_LE_I32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 <= S1.i32 -def _VOPCOp_V_CMP_GT_I32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 > S1.i32 -def _VOPCOp_V_CMP_NE_I32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 != S1.i32 -def _VOPCOp_V_CMP_GE_I32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i32 >= S1.i32 -def _VOPCOp_V_CMP_T_I32(D0, VCC, laneId): +def _VOPCOp_V_CMP_T_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOPCOp_V_CMP_F_U32(D0, VCC, laneId): +def _VOPCOp_V_CMP_F_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOPCOp_V_CMP_LT_U32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 < S1.u32 -def _VOPCOp_V_CMP_EQ_U32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 == S1.u32 -def _VOPCOp_V_CMP_LE_U32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 <= S1.u32 -def _VOPCOp_V_CMP_GT_U32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 > S1.u32 -def _VOPCOp_V_CMP_NE_U32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 != S1.u32 -def _VOPCOp_V_CMP_GE_U32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u32 >= S1.u32 -def _VOPCOp_V_CMP_T_U32(D0, VCC, laneId): +def _VOPCOp_V_CMP_T_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOPCOp_V_CMP_F_I64(D0, VCC, laneId): +def _VOPCOp_V_CMP_F_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOPCOp_V_CMP_LT_I64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 < S1.i64 -def _VOPCOp_V_CMP_EQ_I64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 == S1.i64 -def _VOPCOp_V_CMP_LE_I64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 <= S1.i64 -def _VOPCOp_V_CMP_GT_I64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 > S1.i64 -def _VOPCOp_V_CMP_NE_I64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 != S1.i64 -def _VOPCOp_V_CMP_GE_I64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.i64 >= S1.i64 -def _VOPCOp_V_CMP_T_I64(D0, VCC, laneId): +def _VOPCOp_V_CMP_T_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOPCOp_V_CMP_F_U64(D0, VCC, laneId): +def _VOPCOp_V_CMP_F_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 0 -def _VOPCOp_V_CMP_LT_U64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LT_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 < S1.u64 -def _VOPCOp_V_CMP_EQ_U64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_EQ_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 == S1.u64 -def _VOPCOp_V_CMP_LE_U64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_LE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 <= S1.u64 -def _VOPCOp_V_CMP_GT_U64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GT_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 > S1.u64 -def _VOPCOp_V_CMP_NE_U64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_NE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 != S1.u64 -def _VOPCOp_V_CMP_GE_U64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_GE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = S0.u64 >= S1.u64 -def _VOPCOp_V_CMP_T_U64(D0, VCC, laneId): +def _VOPCOp_V_CMP_T_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0.u64[laneId] = 1 -def _VOPCOp_V_CMP_CLASS_F16(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_CLASS_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -7224,7 +7224,7 @@ def _VOPCOp_V_CMP_CLASS_F16(S0, S1, D0, VCC, laneId): result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result -def _VOPCOp_V_CMP_CLASS_F32(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -7270,7 +7270,7 @@ def _VOPCOp_V_CMP_CLASS_F32(S0, S1, D0, VCC, laneId): result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result -def _VOPCOp_V_CMP_CLASS_F64(S0, S1, D0, VCC, laneId): +def _VOPCOp_V_CMP_CLASS_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -7316,402 +7316,402 @@ def _VOPCOp_V_CMP_CLASS_F64(S0, S1, D0, VCC, laneId): result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result -def _VOPCOp_V_CMPX_F_F16(EXEC, laneId): +def _VOPCOp_V_CMPX_F_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOPCOp_V_CMPX_LT_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 < S1.f16 EXEC.u64[laneId] = S0.f16 < S1.f16 -def _VOPCOp_V_CMPX_EQ_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 EXEC.u64[laneId] = S0.f16 == S1.f16 -def _VOPCOp_V_CMPX_LE_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 <= S1.f16 EXEC.u64[laneId] = S0.f16 <= S1.f16 -def _VOPCOp_V_CMPX_GT_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 > S1.f16 EXEC.u64[laneId] = S0.f16 > S1.f16 -def _VOPCOp_V_CMPX_LG_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 <> S1.f16 EXEC.u64[laneId] = S0.f16 != S1.f16 -def _VOPCOp_V_CMPX_GE_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f16 >= S1.f16 EXEC.u64[laneId] = S0.f16 >= S1.f16 -def _VOPCOp_V_CMPX_O_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_O_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) -def _VOPCOp_V_CMPX_U_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_U_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) -def _VOPCOp_V_CMPX_NGE_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NGE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < EXEC.u64[laneId] = not (S0.f16 >= S1.f16) -def _VOPCOp_V_CMPX_NLG_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLG_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == EXEC.u64[laneId] = not (S0.f16 != S1.f16) -def _VOPCOp_V_CMPX_NGT_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NGT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= EXEC.u64[laneId] = not (S0.f16 > S1.f16) -def _VOPCOp_V_CMPX_NLE_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLE_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > EXEC.u64[laneId] = not (S0.f16 <= S1.f16) -def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != EXEC.u64[laneId] = not (S0.f16 == S1.f16) -def _VOPCOp_V_CMPX_NLT_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLT_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= EXEC.u64[laneId] = not (S0.f16 < S1.f16) -def _VOPCOp_V_CMPX_T_F16(EXEC, laneId): +def _VOPCOp_V_CMPX_T_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOPCOp_V_CMPX_F_F32(EXEC, laneId): +def _VOPCOp_V_CMPX_F_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOPCOp_V_CMPX_LT_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 < S1.f32 EXEC.u64[laneId] = S0.f32 < S1.f32 -def _VOPCOp_V_CMPX_EQ_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 EXEC.u64[laneId] = S0.f32 == S1.f32 -def _VOPCOp_V_CMPX_LE_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 <= S1.f32 EXEC.u64[laneId] = S0.f32 <= S1.f32 -def _VOPCOp_V_CMPX_GT_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 > S1.f32 EXEC.u64[laneId] = S0.f32 > S1.f32 -def _VOPCOp_V_CMPX_LG_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 <> S1.f32 EXEC.u64[laneId] = S0.f32 != S1.f32 -def _VOPCOp_V_CMPX_GE_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f32 >= S1.f32 EXEC.u64[laneId] = S0.f32 >= S1.f32 -def _VOPCOp_V_CMPX_O_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_O_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) -def _VOPCOp_V_CMPX_U_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_U_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) -def _VOPCOp_V_CMPX_NGE_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NGE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < EXEC.u64[laneId] = not (S0.f32 >= S1.f32) -def _VOPCOp_V_CMPX_NLG_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLG_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == EXEC.u64[laneId] = not (S0.f32 != S1.f32) -def _VOPCOp_V_CMPX_NGT_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NGT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= EXEC.u64[laneId] = not (S0.f32 > S1.f32) -def _VOPCOp_V_CMPX_NLE_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLE_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > EXEC.u64[laneId] = not (S0.f32 <= S1.f32) -def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != EXEC.u64[laneId] = not (S0.f32 == S1.f32) -def _VOPCOp_V_CMPX_NLT_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLT_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= EXEC.u64[laneId] = not (S0.f32 < S1.f32) -def _VOPCOp_V_CMPX_T_F32(EXEC, laneId): +def _VOPCOp_V_CMPX_T_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOPCOp_V_CMPX_F_F64(EXEC, laneId): +def _VOPCOp_V_CMPX_F_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOPCOp_V_CMPX_LT_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 < S1.f64 EXEC.u64[laneId] = S0.f64 < S1.f64 -def _VOPCOp_V_CMPX_EQ_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 EXEC.u64[laneId] = S0.f64 == S1.f64 -def _VOPCOp_V_CMPX_LE_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 <= S1.f64 EXEC.u64[laneId] = S0.f64 <= S1.f64 -def _VOPCOp_V_CMPX_GT_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 > S1.f64 EXEC.u64[laneId] = S0.f64 > S1.f64 -def _VOPCOp_V_CMPX_LG_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LG_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 <> S1.f64 EXEC.u64[laneId] = S0.f64 != S1.f64 -def _VOPCOp_V_CMPX_GE_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.f64 >= S1.f64 EXEC.u64[laneId] = S0.f64 >= S1.f64 -def _VOPCOp_V_CMPX_O_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_O_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) -def _VOPCOp_V_CMPX_U_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_U_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) -def _VOPCOp_V_CMPX_NGE_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NGE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < EXEC.u64[laneId] = not (S0.f64 >= S1.f64) -def _VOPCOp_V_CMPX_NLG_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLG_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == EXEC.u64[laneId] = not (S0.f64 != S1.f64) -def _VOPCOp_V_CMPX_NGT_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NGT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= EXEC.u64[laneId] = not (S0.f64 > S1.f64) -def _VOPCOp_V_CMPX_NLE_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLE_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > EXEC.u64[laneId] = not (S0.f64 <= S1.f64) -def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != EXEC.u64[laneId] = not (S0.f64 == S1.f64) -def _VOPCOp_V_CMPX_NLT_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NLT_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= EXEC.u64[laneId] = not (S0.f64 < S1.f64) -def _VOPCOp_V_CMPX_T_F64(EXEC, laneId): +def _VOPCOp_V_CMPX_T_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOPCOp_V_CMPX_LT_I16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 < S1.i16 EXEC.u64[laneId] = S0.i16 < S1.i16 -def _VOPCOp_V_CMPX_EQ_I16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 EXEC.u64[laneId] = S0.i16 == S1.i16 -def _VOPCOp_V_CMPX_LE_I16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 <= S1.i16 EXEC.u64[laneId] = S0.i16 <= S1.i16 -def _VOPCOp_V_CMPX_GT_I16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 > S1.i16 EXEC.u64[laneId] = S0.i16 > S1.i16 -def _VOPCOp_V_CMPX_NE_I16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 <> S1.i16 EXEC.u64[laneId] = S0.i16 != S1.i16 -def _VOPCOp_V_CMPX_GE_I16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_I16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i16 >= S1.i16 EXEC.u64[laneId] = S0.i16 >= S1.i16 -def _VOPCOp_V_CMPX_LT_U16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 < S1.u16 EXEC.u64[laneId] = S0.u16 < S1.u16 -def _VOPCOp_V_CMPX_EQ_U16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 EXEC.u64[laneId] = S0.u16 == S1.u16 -def _VOPCOp_V_CMPX_LE_U16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 <= S1.u16 EXEC.u64[laneId] = S0.u16 <= S1.u16 -def _VOPCOp_V_CMPX_GT_U16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 > S1.u16 EXEC.u64[laneId] = S0.u16 > S1.u16 -def _VOPCOp_V_CMPX_NE_U16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 <> S1.u16 EXEC.u64[laneId] = S0.u16 != S1.u16 -def _VOPCOp_V_CMPX_GE_U16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_U16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u16 >= S1.u16 EXEC.u64[laneId] = S0.u16 >= S1.u16 -def _VOPCOp_V_CMPX_F_I32(EXEC, laneId): +def _VOPCOp_V_CMPX_F_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOPCOp_V_CMPX_LT_I32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 < S1.i32 EXEC.u64[laneId] = S0.i32 < S1.i32 -def _VOPCOp_V_CMPX_EQ_I32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 EXEC.u64[laneId] = S0.i32 == S1.i32 -def _VOPCOp_V_CMPX_LE_I32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 <= S1.i32 EXEC.u64[laneId] = S0.i32 <= S1.i32 -def _VOPCOp_V_CMPX_GT_I32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 > S1.i32 EXEC.u64[laneId] = S0.i32 > S1.i32 -def _VOPCOp_V_CMPX_NE_I32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 <> S1.i32 EXEC.u64[laneId] = S0.i32 != S1.i32 -def _VOPCOp_V_CMPX_GE_I32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i32 >= S1.i32 EXEC.u64[laneId] = S0.i32 >= S1.i32 -def _VOPCOp_V_CMPX_T_I32(EXEC, laneId): +def _VOPCOp_V_CMPX_T_I32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOPCOp_V_CMPX_F_U32(EXEC, laneId): +def _VOPCOp_V_CMPX_F_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOPCOp_V_CMPX_LT_U32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 < S1.u32 EXEC.u64[laneId] = S0.u32 < S1.u32 -def _VOPCOp_V_CMPX_EQ_U32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 EXEC.u64[laneId] = S0.u32 == S1.u32 -def _VOPCOp_V_CMPX_LE_U32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 <= S1.u32 EXEC.u64[laneId] = S0.u32 <= S1.u32 -def _VOPCOp_V_CMPX_GT_U32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 > S1.u32 EXEC.u64[laneId] = S0.u32 > S1.u32 -def _VOPCOp_V_CMPX_NE_U32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 <> S1.u32 EXEC.u64[laneId] = S0.u32 != S1.u32 -def _VOPCOp_V_CMPX_GE_U32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u32 >= S1.u32 EXEC.u64[laneId] = S0.u32 >= S1.u32 -def _VOPCOp_V_CMPX_T_U32(EXEC, laneId): +def _VOPCOp_V_CMPX_T_U32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOPCOp_V_CMPX_F_I64(EXEC, laneId): +def _VOPCOp_V_CMPX_F_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOPCOp_V_CMPX_LT_I64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 < S1.i64 EXEC.u64[laneId] = S0.i64 < S1.i64 -def _VOPCOp_V_CMPX_EQ_I64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 EXEC.u64[laneId] = S0.i64 == S1.i64 -def _VOPCOp_V_CMPX_LE_I64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 <= S1.i64 EXEC.u64[laneId] = S0.i64 <= S1.i64 -def _VOPCOp_V_CMPX_GT_I64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 > S1.i64 EXEC.u64[laneId] = S0.i64 > S1.i64 -def _VOPCOp_V_CMPX_NE_I64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 <> S1.i64 EXEC.u64[laneId] = S0.i64 != S1.i64 -def _VOPCOp_V_CMPX_GE_I64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.i64 >= S1.i64 EXEC.u64[laneId] = S0.i64 >= S1.i64 -def _VOPCOp_V_CMPX_T_I64(EXEC, laneId): +def _VOPCOp_V_CMPX_T_I64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOPCOp_V_CMPX_F_U64(EXEC, laneId): +def _VOPCOp_V_CMPX_F_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'0U EXEC.u64[laneId] = 0 -def _VOPCOp_V_CMPX_LT_U64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LT_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 < S1.u64 EXEC.u64[laneId] = S0.u64 < S1.u64 -def _VOPCOp_V_CMPX_EQ_U64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_EQ_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 EXEC.u64[laneId] = S0.u64 == S1.u64 -def _VOPCOp_V_CMPX_LE_U64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_LE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 <= S1.u64 EXEC.u64[laneId] = S0.u64 <= S1.u64 -def _VOPCOp_V_CMPX_GT_U64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GT_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 > S1.u64 EXEC.u64[laneId] = S0.u64 > S1.u64 -def _VOPCOp_V_CMPX_NE_U64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_NE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 <> S1.u64 EXEC.u64[laneId] = S0.u64 != S1.u64 -def _VOPCOp_V_CMPX_GE_U64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_GE_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = S0.u64 >= S1.u64 EXEC.u64[laneId] = S0.u64 >= S1.u64 -def _VOPCOp_V_CMPX_T_U64(EXEC, laneId): +def _VOPCOp_V_CMPX_T_U64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # EXEC.u64[laneId] = 1'1U EXEC.u64[laneId] = 1 -def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -7755,7 +7755,7 @@ def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, EXEC, laneId): result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result -def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -7799,7 +7799,7 @@ def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, EXEC, laneId): result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result -def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, EXEC, laneId): +def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -7844,204 +7844,203 @@ def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, EXEC, laneId): EXEC.u64[laneId] = result VOPCOp_FUNCTIONS = { - VOPCOp.V_CMP_F_F16: (_VOPCOp_V_CMP_F_F16, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_F16: (_VOPCOp_V_CMP_LT_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_F16: (_VOPCOp_V_CMP_EQ_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_F16: (_VOPCOp_V_CMP_LE_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_F16: (_VOPCOp_V_CMP_GT_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LG_F16: (_VOPCOp_V_CMP_LG_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_F16: (_VOPCOp_V_CMP_GE_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_O_F16: (_VOPCOp_V_CMP_O_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_U_F16: (_VOPCOp_V_CMP_U_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NGE_F16: (_VOPCOp_V_CMP_NGE_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLG_F16: (_VOPCOp_V_CMP_NLG_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NGT_F16: (_VOPCOp_V_CMP_NGT_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLE_F16: (_VOPCOp_V_CMP_NLE_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NEQ_F16: (_VOPCOp_V_CMP_NEQ_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLT_F16: (_VOPCOp_V_CMP_NLT_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_T_F16: (_VOPCOp_V_CMP_T_F16, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_F_F32: (_VOPCOp_V_CMP_F_F32, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_F32: (_VOPCOp_V_CMP_LT_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_F32: (_VOPCOp_V_CMP_EQ_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_F32: (_VOPCOp_V_CMP_LE_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_F32: (_VOPCOp_V_CMP_GT_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LG_F32: (_VOPCOp_V_CMP_LG_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_F32: (_VOPCOp_V_CMP_GE_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_O_F32: (_VOPCOp_V_CMP_O_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_U_F32: (_VOPCOp_V_CMP_U_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NGE_F32: (_VOPCOp_V_CMP_NGE_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLG_F32: (_VOPCOp_V_CMP_NLG_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NGT_F32: (_VOPCOp_V_CMP_NGT_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLE_F32: (_VOPCOp_V_CMP_NLE_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NEQ_F32: (_VOPCOp_V_CMP_NEQ_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLT_F32: (_VOPCOp_V_CMP_NLT_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_T_F32: (_VOPCOp_V_CMP_T_F32, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_F_F64: (_VOPCOp_V_CMP_F_F64, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_F64: (_VOPCOp_V_CMP_LT_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_F64: (_VOPCOp_V_CMP_EQ_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_F64: (_VOPCOp_V_CMP_LE_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_F64: (_VOPCOp_V_CMP_GT_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LG_F64: (_VOPCOp_V_CMP_LG_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_F64: (_VOPCOp_V_CMP_GE_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_O_F64: (_VOPCOp_V_CMP_O_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_U_F64: (_VOPCOp_V_CMP_U_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NGE_F64: (_VOPCOp_V_CMP_NGE_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLG_F64: (_VOPCOp_V_CMP_NLG_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NGT_F64: (_VOPCOp_V_CMP_NGT_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLE_F64: (_VOPCOp_V_CMP_NLE_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NEQ_F64: (_VOPCOp_V_CMP_NEQ_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NLT_F64: (_VOPCOp_V_CMP_NLT_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_T_F64: (_VOPCOp_V_CMP_T_F64, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_I16: (_VOPCOp_V_CMP_LT_I16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_I16: (_VOPCOp_V_CMP_EQ_I16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_I16: (_VOPCOp_V_CMP_LE_I16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_I16: (_VOPCOp_V_CMP_GT_I16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NE_I16: (_VOPCOp_V_CMP_NE_I16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_I16: (_VOPCOp_V_CMP_GE_I16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_U16: (_VOPCOp_V_CMP_LT_U16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_U16: (_VOPCOp_V_CMP_EQ_U16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_U16: (_VOPCOp_V_CMP_LE_U16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_U16: (_VOPCOp_V_CMP_GT_U16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NE_U16: (_VOPCOp_V_CMP_NE_U16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_U16: (_VOPCOp_V_CMP_GE_U16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_F_I32: (_VOPCOp_V_CMP_F_I32, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_I32: (_VOPCOp_V_CMP_LT_I32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_I32: (_VOPCOp_V_CMP_EQ_I32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_I32: (_VOPCOp_V_CMP_LE_I32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_I32: (_VOPCOp_V_CMP_GT_I32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NE_I32: (_VOPCOp_V_CMP_NE_I32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_I32: (_VOPCOp_V_CMP_GE_I32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_T_I32: (_VOPCOp_V_CMP_T_I32, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_F_U32: (_VOPCOp_V_CMP_F_U32, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_U32: (_VOPCOp_V_CMP_LT_U32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_U32: (_VOPCOp_V_CMP_EQ_U32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_U32: (_VOPCOp_V_CMP_LE_U32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_U32: (_VOPCOp_V_CMP_GT_U32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NE_U32: (_VOPCOp_V_CMP_NE_U32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_U32: (_VOPCOp_V_CMP_GE_U32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_T_U32: (_VOPCOp_V_CMP_T_U32, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_F_I64: (_VOPCOp_V_CMP_F_I64, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_I64: (_VOPCOp_V_CMP_LT_I64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_I64: (_VOPCOp_V_CMP_EQ_I64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_I64: (_VOPCOp_V_CMP_LE_I64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_I64: (_VOPCOp_V_CMP_GT_I64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NE_I64: (_VOPCOp_V_CMP_NE_I64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_I64: (_VOPCOp_V_CMP_GE_I64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_T_I64: (_VOPCOp_V_CMP_T_I64, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_F_U64: (_VOPCOp_V_CMP_F_U64, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LT_U64: (_VOPCOp_V_CMP_LT_U64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_EQ_U64: (_VOPCOp_V_CMP_EQ_U64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_LE_U64: (_VOPCOp_V_CMP_LE_U64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GT_U64: (_VOPCOp_V_CMP_GT_U64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_NE_U64: (_VOPCOp_V_CMP_NE_U64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_GE_U64: (_VOPCOp_V_CMP_GE_U64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_T_U64: (_VOPCOp_V_CMP_T_U64, (True, False, True, False, False, False, True, False, ('D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_CLASS_F16: (_VOPCOp_V_CMP_CLASS_F16, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_CLASS_F32: (_VOPCOp_V_CMP_CLASS_F32, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMP_CLASS_F64: (_VOPCOp_V_CMP_CLASS_F64, (True, False, True, False, False, False, True, False, ('S0', 'S1', 'D0', 'VCC', 'laneId'))), - VOPCOp.V_CMPX_F_F16: (_VOPCOp_V_CMPX_F_F16, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_F16: (_VOPCOp_V_CMPX_LT_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_F16: (_VOPCOp_V_CMPX_EQ_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_F16: (_VOPCOp_V_CMPX_LE_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_F16: (_VOPCOp_V_CMPX_GT_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LG_F16: (_VOPCOp_V_CMPX_LG_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_F16: (_VOPCOp_V_CMPX_GE_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_O_F16: (_VOPCOp_V_CMPX_O_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_U_F16: (_VOPCOp_V_CMPX_U_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NGE_F16: (_VOPCOp_V_CMPX_NGE_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLG_F16: (_VOPCOp_V_CMPX_NLG_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NGT_F16: (_VOPCOp_V_CMPX_NGT_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLE_F16: (_VOPCOp_V_CMPX_NLE_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NEQ_F16: (_VOPCOp_V_CMPX_NEQ_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLT_F16: (_VOPCOp_V_CMPX_NLT_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_T_F16: (_VOPCOp_V_CMPX_T_F16, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_F_F32: (_VOPCOp_V_CMPX_F_F32, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_F32: (_VOPCOp_V_CMPX_LT_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_F32: (_VOPCOp_V_CMPX_EQ_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_F32: (_VOPCOp_V_CMPX_LE_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_F32: (_VOPCOp_V_CMPX_GT_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LG_F32: (_VOPCOp_V_CMPX_LG_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_F32: (_VOPCOp_V_CMPX_GE_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_O_F32: (_VOPCOp_V_CMPX_O_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_U_F32: (_VOPCOp_V_CMPX_U_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NGE_F32: (_VOPCOp_V_CMPX_NGE_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLG_F32: (_VOPCOp_V_CMPX_NLG_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NGT_F32: (_VOPCOp_V_CMPX_NGT_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLE_F32: (_VOPCOp_V_CMPX_NLE_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NEQ_F32: (_VOPCOp_V_CMPX_NEQ_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLT_F32: (_VOPCOp_V_CMPX_NLT_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_T_F32: (_VOPCOp_V_CMPX_T_F32, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_F_F64: (_VOPCOp_V_CMPX_F_F64, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_F64: (_VOPCOp_V_CMPX_LT_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_F64: (_VOPCOp_V_CMPX_EQ_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_F64: (_VOPCOp_V_CMPX_LE_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_F64: (_VOPCOp_V_CMPX_GT_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LG_F64: (_VOPCOp_V_CMPX_LG_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_F64: (_VOPCOp_V_CMPX_GE_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_O_F64: (_VOPCOp_V_CMPX_O_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_U_F64: (_VOPCOp_V_CMPX_U_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NGE_F64: (_VOPCOp_V_CMPX_NGE_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLG_F64: (_VOPCOp_V_CMPX_NLG_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NGT_F64: (_VOPCOp_V_CMPX_NGT_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLE_F64: (_VOPCOp_V_CMPX_NLE_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NEQ_F64: (_VOPCOp_V_CMPX_NEQ_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NLT_F64: (_VOPCOp_V_CMPX_NLT_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_T_F64: (_VOPCOp_V_CMPX_T_F64, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_I16: (_VOPCOp_V_CMPX_LT_I16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_I16: (_VOPCOp_V_CMPX_EQ_I16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_I16: (_VOPCOp_V_CMPX_LE_I16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_I16: (_VOPCOp_V_CMPX_GT_I16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NE_I16: (_VOPCOp_V_CMPX_NE_I16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_I16: (_VOPCOp_V_CMPX_GE_I16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_U16: (_VOPCOp_V_CMPX_LT_U16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_U16: (_VOPCOp_V_CMPX_EQ_U16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_U16: (_VOPCOp_V_CMPX_LE_U16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_U16: (_VOPCOp_V_CMPX_GT_U16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NE_U16: (_VOPCOp_V_CMPX_NE_U16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_U16: (_VOPCOp_V_CMPX_GE_U16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_F_I32: (_VOPCOp_V_CMPX_F_I32, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_I32: (_VOPCOp_V_CMPX_LT_I32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_I32: (_VOPCOp_V_CMPX_EQ_I32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_I32: (_VOPCOp_V_CMPX_LE_I32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_I32: (_VOPCOp_V_CMPX_GT_I32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NE_I32: (_VOPCOp_V_CMPX_NE_I32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_I32: (_VOPCOp_V_CMPX_GE_I32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_T_I32: (_VOPCOp_V_CMPX_T_I32, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_F_U32: (_VOPCOp_V_CMPX_F_U32, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_U32: (_VOPCOp_V_CMPX_LT_U32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_U32: (_VOPCOp_V_CMPX_EQ_U32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_U32: (_VOPCOp_V_CMPX_LE_U32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_U32: (_VOPCOp_V_CMPX_GT_U32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NE_U32: (_VOPCOp_V_CMPX_NE_U32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_U32: (_VOPCOp_V_CMPX_GE_U32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_T_U32: (_VOPCOp_V_CMPX_T_U32, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_F_I64: (_VOPCOp_V_CMPX_F_I64, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_I64: (_VOPCOp_V_CMPX_LT_I64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_I64: (_VOPCOp_V_CMPX_EQ_I64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_I64: (_VOPCOp_V_CMPX_LE_I64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_I64: (_VOPCOp_V_CMPX_GT_I64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NE_I64: (_VOPCOp_V_CMPX_NE_I64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_I64: (_VOPCOp_V_CMPX_GE_I64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_T_I64: (_VOPCOp_V_CMPX_T_I64, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_F_U64: (_VOPCOp_V_CMPX_F_U64, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_LT_U64: (_VOPCOp_V_CMPX_LT_U64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_EQ_U64: (_VOPCOp_V_CMPX_EQ_U64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_LE_U64: (_VOPCOp_V_CMPX_LE_U64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GT_U64: (_VOPCOp_V_CMPX_GT_U64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_NE_U64: (_VOPCOp_V_CMPX_NE_U64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_GE_U64: (_VOPCOp_V_CMPX_GE_U64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_T_U64: (_VOPCOp_V_CMPX_T_U64, (False, False, False, True, False, False, False, True, ('EXEC', 'laneId'))), - VOPCOp.V_CMPX_CLASS_F16: (_VOPCOp_V_CMPX_CLASS_F16, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_CLASS_F32: (_VOPCOp_V_CMPX_CLASS_F32, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), - VOPCOp.V_CMPX_CLASS_F64: (_VOPCOp_V_CMPX_CLASS_F64, (False, False, False, True, False, False, False, True, ('S0', 'S1', 'EXEC', 'laneId'))), + VOPCOp.V_CMP_F_F16: _VOPCOp_V_CMP_F_F16, + VOPCOp.V_CMP_LT_F16: _VOPCOp_V_CMP_LT_F16, + VOPCOp.V_CMP_EQ_F16: _VOPCOp_V_CMP_EQ_F16, + VOPCOp.V_CMP_LE_F16: _VOPCOp_V_CMP_LE_F16, + VOPCOp.V_CMP_GT_F16: _VOPCOp_V_CMP_GT_F16, + VOPCOp.V_CMP_LG_F16: _VOPCOp_V_CMP_LG_F16, + VOPCOp.V_CMP_GE_F16: _VOPCOp_V_CMP_GE_F16, + VOPCOp.V_CMP_O_F16: _VOPCOp_V_CMP_O_F16, + VOPCOp.V_CMP_U_F16: _VOPCOp_V_CMP_U_F16, + VOPCOp.V_CMP_NGE_F16: _VOPCOp_V_CMP_NGE_F16, + VOPCOp.V_CMP_NLG_F16: _VOPCOp_V_CMP_NLG_F16, + VOPCOp.V_CMP_NGT_F16: _VOPCOp_V_CMP_NGT_F16, + VOPCOp.V_CMP_NLE_F16: _VOPCOp_V_CMP_NLE_F16, + VOPCOp.V_CMP_NEQ_F16: _VOPCOp_V_CMP_NEQ_F16, + VOPCOp.V_CMP_NLT_F16: _VOPCOp_V_CMP_NLT_F16, + VOPCOp.V_CMP_T_F16: _VOPCOp_V_CMP_T_F16, + VOPCOp.V_CMP_F_F32: _VOPCOp_V_CMP_F_F32, + VOPCOp.V_CMP_LT_F32: _VOPCOp_V_CMP_LT_F32, + VOPCOp.V_CMP_EQ_F32: _VOPCOp_V_CMP_EQ_F32, + VOPCOp.V_CMP_LE_F32: _VOPCOp_V_CMP_LE_F32, + VOPCOp.V_CMP_GT_F32: _VOPCOp_V_CMP_GT_F32, + VOPCOp.V_CMP_LG_F32: _VOPCOp_V_CMP_LG_F32, + VOPCOp.V_CMP_GE_F32: _VOPCOp_V_CMP_GE_F32, + VOPCOp.V_CMP_O_F32: _VOPCOp_V_CMP_O_F32, + VOPCOp.V_CMP_U_F32: _VOPCOp_V_CMP_U_F32, + VOPCOp.V_CMP_NGE_F32: _VOPCOp_V_CMP_NGE_F32, + VOPCOp.V_CMP_NLG_F32: _VOPCOp_V_CMP_NLG_F32, + VOPCOp.V_CMP_NGT_F32: _VOPCOp_V_CMP_NGT_F32, + VOPCOp.V_CMP_NLE_F32: _VOPCOp_V_CMP_NLE_F32, + VOPCOp.V_CMP_NEQ_F32: _VOPCOp_V_CMP_NEQ_F32, + VOPCOp.V_CMP_NLT_F32: _VOPCOp_V_CMP_NLT_F32, + VOPCOp.V_CMP_T_F32: _VOPCOp_V_CMP_T_F32, + VOPCOp.V_CMP_F_F64: _VOPCOp_V_CMP_F_F64, + VOPCOp.V_CMP_LT_F64: _VOPCOp_V_CMP_LT_F64, + VOPCOp.V_CMP_EQ_F64: _VOPCOp_V_CMP_EQ_F64, + VOPCOp.V_CMP_LE_F64: _VOPCOp_V_CMP_LE_F64, + VOPCOp.V_CMP_GT_F64: _VOPCOp_V_CMP_GT_F64, + VOPCOp.V_CMP_LG_F64: _VOPCOp_V_CMP_LG_F64, + VOPCOp.V_CMP_GE_F64: _VOPCOp_V_CMP_GE_F64, + VOPCOp.V_CMP_O_F64: _VOPCOp_V_CMP_O_F64, + VOPCOp.V_CMP_U_F64: _VOPCOp_V_CMP_U_F64, + VOPCOp.V_CMP_NGE_F64: _VOPCOp_V_CMP_NGE_F64, + VOPCOp.V_CMP_NLG_F64: _VOPCOp_V_CMP_NLG_F64, + VOPCOp.V_CMP_NGT_F64: _VOPCOp_V_CMP_NGT_F64, + VOPCOp.V_CMP_NLE_F64: _VOPCOp_V_CMP_NLE_F64, + VOPCOp.V_CMP_NEQ_F64: _VOPCOp_V_CMP_NEQ_F64, + VOPCOp.V_CMP_NLT_F64: _VOPCOp_V_CMP_NLT_F64, + VOPCOp.V_CMP_T_F64: _VOPCOp_V_CMP_T_F64, + VOPCOp.V_CMP_LT_I16: _VOPCOp_V_CMP_LT_I16, + VOPCOp.V_CMP_EQ_I16: _VOPCOp_V_CMP_EQ_I16, + VOPCOp.V_CMP_LE_I16: _VOPCOp_V_CMP_LE_I16, + VOPCOp.V_CMP_GT_I16: _VOPCOp_V_CMP_GT_I16, + VOPCOp.V_CMP_NE_I16: _VOPCOp_V_CMP_NE_I16, + VOPCOp.V_CMP_GE_I16: _VOPCOp_V_CMP_GE_I16, + VOPCOp.V_CMP_LT_U16: _VOPCOp_V_CMP_LT_U16, + VOPCOp.V_CMP_EQ_U16: _VOPCOp_V_CMP_EQ_U16, + VOPCOp.V_CMP_LE_U16: _VOPCOp_V_CMP_LE_U16, + VOPCOp.V_CMP_GT_U16: _VOPCOp_V_CMP_GT_U16, + VOPCOp.V_CMP_NE_U16: _VOPCOp_V_CMP_NE_U16, + VOPCOp.V_CMP_GE_U16: _VOPCOp_V_CMP_GE_U16, + VOPCOp.V_CMP_F_I32: _VOPCOp_V_CMP_F_I32, + VOPCOp.V_CMP_LT_I32: _VOPCOp_V_CMP_LT_I32, + VOPCOp.V_CMP_EQ_I32: _VOPCOp_V_CMP_EQ_I32, + VOPCOp.V_CMP_LE_I32: _VOPCOp_V_CMP_LE_I32, + VOPCOp.V_CMP_GT_I32: _VOPCOp_V_CMP_GT_I32, + VOPCOp.V_CMP_NE_I32: _VOPCOp_V_CMP_NE_I32, + VOPCOp.V_CMP_GE_I32: _VOPCOp_V_CMP_GE_I32, + VOPCOp.V_CMP_T_I32: _VOPCOp_V_CMP_T_I32, + VOPCOp.V_CMP_F_U32: _VOPCOp_V_CMP_F_U32, + VOPCOp.V_CMP_LT_U32: _VOPCOp_V_CMP_LT_U32, + VOPCOp.V_CMP_EQ_U32: _VOPCOp_V_CMP_EQ_U32, + VOPCOp.V_CMP_LE_U32: _VOPCOp_V_CMP_LE_U32, + VOPCOp.V_CMP_GT_U32: _VOPCOp_V_CMP_GT_U32, + VOPCOp.V_CMP_NE_U32: _VOPCOp_V_CMP_NE_U32, + VOPCOp.V_CMP_GE_U32: _VOPCOp_V_CMP_GE_U32, + VOPCOp.V_CMP_T_U32: _VOPCOp_V_CMP_T_U32, + VOPCOp.V_CMP_F_I64: _VOPCOp_V_CMP_F_I64, + VOPCOp.V_CMP_LT_I64: _VOPCOp_V_CMP_LT_I64, + VOPCOp.V_CMP_EQ_I64: _VOPCOp_V_CMP_EQ_I64, + VOPCOp.V_CMP_LE_I64: _VOPCOp_V_CMP_LE_I64, + VOPCOp.V_CMP_GT_I64: _VOPCOp_V_CMP_GT_I64, + VOPCOp.V_CMP_NE_I64: _VOPCOp_V_CMP_NE_I64, + VOPCOp.V_CMP_GE_I64: _VOPCOp_V_CMP_GE_I64, + VOPCOp.V_CMP_T_I64: _VOPCOp_V_CMP_T_I64, + VOPCOp.V_CMP_F_U64: _VOPCOp_V_CMP_F_U64, + VOPCOp.V_CMP_LT_U64: _VOPCOp_V_CMP_LT_U64, + VOPCOp.V_CMP_EQ_U64: _VOPCOp_V_CMP_EQ_U64, + VOPCOp.V_CMP_LE_U64: _VOPCOp_V_CMP_LE_U64, + VOPCOp.V_CMP_GT_U64: _VOPCOp_V_CMP_GT_U64, + VOPCOp.V_CMP_NE_U64: _VOPCOp_V_CMP_NE_U64, + VOPCOp.V_CMP_GE_U64: _VOPCOp_V_CMP_GE_U64, + VOPCOp.V_CMP_T_U64: _VOPCOp_V_CMP_T_U64, + VOPCOp.V_CMP_CLASS_F16: _VOPCOp_V_CMP_CLASS_F16, + VOPCOp.V_CMP_CLASS_F32: _VOPCOp_V_CMP_CLASS_F32, + VOPCOp.V_CMP_CLASS_F64: _VOPCOp_V_CMP_CLASS_F64, + VOPCOp.V_CMPX_F_F16: _VOPCOp_V_CMPX_F_F16, + VOPCOp.V_CMPX_LT_F16: _VOPCOp_V_CMPX_LT_F16, + VOPCOp.V_CMPX_EQ_F16: _VOPCOp_V_CMPX_EQ_F16, + VOPCOp.V_CMPX_LE_F16: _VOPCOp_V_CMPX_LE_F16, + VOPCOp.V_CMPX_GT_F16: _VOPCOp_V_CMPX_GT_F16, + VOPCOp.V_CMPX_LG_F16: _VOPCOp_V_CMPX_LG_F16, + VOPCOp.V_CMPX_GE_F16: _VOPCOp_V_CMPX_GE_F16, + VOPCOp.V_CMPX_O_F16: _VOPCOp_V_CMPX_O_F16, + VOPCOp.V_CMPX_U_F16: _VOPCOp_V_CMPX_U_F16, + VOPCOp.V_CMPX_NGE_F16: _VOPCOp_V_CMPX_NGE_F16, + VOPCOp.V_CMPX_NLG_F16: _VOPCOp_V_CMPX_NLG_F16, + VOPCOp.V_CMPX_NGT_F16: _VOPCOp_V_CMPX_NGT_F16, + VOPCOp.V_CMPX_NLE_F16: _VOPCOp_V_CMPX_NLE_F16, + VOPCOp.V_CMPX_NEQ_F16: _VOPCOp_V_CMPX_NEQ_F16, + VOPCOp.V_CMPX_NLT_F16: _VOPCOp_V_CMPX_NLT_F16, + VOPCOp.V_CMPX_T_F16: _VOPCOp_V_CMPX_T_F16, + VOPCOp.V_CMPX_F_F32: _VOPCOp_V_CMPX_F_F32, + VOPCOp.V_CMPX_LT_F32: _VOPCOp_V_CMPX_LT_F32, + VOPCOp.V_CMPX_EQ_F32: _VOPCOp_V_CMPX_EQ_F32, + VOPCOp.V_CMPX_LE_F32: _VOPCOp_V_CMPX_LE_F32, + VOPCOp.V_CMPX_GT_F32: _VOPCOp_V_CMPX_GT_F32, + VOPCOp.V_CMPX_LG_F32: _VOPCOp_V_CMPX_LG_F32, + VOPCOp.V_CMPX_GE_F32: _VOPCOp_V_CMPX_GE_F32, + VOPCOp.V_CMPX_O_F32: _VOPCOp_V_CMPX_O_F32, + VOPCOp.V_CMPX_U_F32: _VOPCOp_V_CMPX_U_F32, + VOPCOp.V_CMPX_NGE_F32: _VOPCOp_V_CMPX_NGE_F32, + VOPCOp.V_CMPX_NLG_F32: _VOPCOp_V_CMPX_NLG_F32, + VOPCOp.V_CMPX_NGT_F32: _VOPCOp_V_CMPX_NGT_F32, + VOPCOp.V_CMPX_NLE_F32: _VOPCOp_V_CMPX_NLE_F32, + VOPCOp.V_CMPX_NEQ_F32: _VOPCOp_V_CMPX_NEQ_F32, + VOPCOp.V_CMPX_NLT_F32: _VOPCOp_V_CMPX_NLT_F32, + VOPCOp.V_CMPX_T_F32: _VOPCOp_V_CMPX_T_F32, + VOPCOp.V_CMPX_F_F64: _VOPCOp_V_CMPX_F_F64, + VOPCOp.V_CMPX_LT_F64: _VOPCOp_V_CMPX_LT_F64, + VOPCOp.V_CMPX_EQ_F64: _VOPCOp_V_CMPX_EQ_F64, + VOPCOp.V_CMPX_LE_F64: _VOPCOp_V_CMPX_LE_F64, + VOPCOp.V_CMPX_GT_F64: _VOPCOp_V_CMPX_GT_F64, + VOPCOp.V_CMPX_LG_F64: _VOPCOp_V_CMPX_LG_F64, + VOPCOp.V_CMPX_GE_F64: _VOPCOp_V_CMPX_GE_F64, + VOPCOp.V_CMPX_O_F64: _VOPCOp_V_CMPX_O_F64, + VOPCOp.V_CMPX_U_F64: _VOPCOp_V_CMPX_U_F64, + VOPCOp.V_CMPX_NGE_F64: _VOPCOp_V_CMPX_NGE_F64, + VOPCOp.V_CMPX_NLG_F64: _VOPCOp_V_CMPX_NLG_F64, + VOPCOp.V_CMPX_NGT_F64: _VOPCOp_V_CMPX_NGT_F64, + VOPCOp.V_CMPX_NLE_F64: _VOPCOp_V_CMPX_NLE_F64, + VOPCOp.V_CMPX_NEQ_F64: _VOPCOp_V_CMPX_NEQ_F64, + VOPCOp.V_CMPX_NLT_F64: _VOPCOp_V_CMPX_NLT_F64, + VOPCOp.V_CMPX_T_F64: _VOPCOp_V_CMPX_T_F64, + VOPCOp.V_CMPX_LT_I16: _VOPCOp_V_CMPX_LT_I16, + VOPCOp.V_CMPX_EQ_I16: _VOPCOp_V_CMPX_EQ_I16, + VOPCOp.V_CMPX_LE_I16: _VOPCOp_V_CMPX_LE_I16, + VOPCOp.V_CMPX_GT_I16: _VOPCOp_V_CMPX_GT_I16, + VOPCOp.V_CMPX_NE_I16: _VOPCOp_V_CMPX_NE_I16, + VOPCOp.V_CMPX_GE_I16: _VOPCOp_V_CMPX_GE_I16, + VOPCOp.V_CMPX_LT_U16: _VOPCOp_V_CMPX_LT_U16, + VOPCOp.V_CMPX_EQ_U16: _VOPCOp_V_CMPX_EQ_U16, + VOPCOp.V_CMPX_LE_U16: _VOPCOp_V_CMPX_LE_U16, + VOPCOp.V_CMPX_GT_U16: _VOPCOp_V_CMPX_GT_U16, + VOPCOp.V_CMPX_NE_U16: _VOPCOp_V_CMPX_NE_U16, + VOPCOp.V_CMPX_GE_U16: _VOPCOp_V_CMPX_GE_U16, + VOPCOp.V_CMPX_F_I32: _VOPCOp_V_CMPX_F_I32, + VOPCOp.V_CMPX_LT_I32: _VOPCOp_V_CMPX_LT_I32, + VOPCOp.V_CMPX_EQ_I32: _VOPCOp_V_CMPX_EQ_I32, + VOPCOp.V_CMPX_LE_I32: _VOPCOp_V_CMPX_LE_I32, + VOPCOp.V_CMPX_GT_I32: _VOPCOp_V_CMPX_GT_I32, + VOPCOp.V_CMPX_NE_I32: _VOPCOp_V_CMPX_NE_I32, + VOPCOp.V_CMPX_GE_I32: _VOPCOp_V_CMPX_GE_I32, + VOPCOp.V_CMPX_T_I32: _VOPCOp_V_CMPX_T_I32, + VOPCOp.V_CMPX_F_U32: _VOPCOp_V_CMPX_F_U32, + VOPCOp.V_CMPX_LT_U32: _VOPCOp_V_CMPX_LT_U32, + VOPCOp.V_CMPX_EQ_U32: _VOPCOp_V_CMPX_EQ_U32, + VOPCOp.V_CMPX_LE_U32: _VOPCOp_V_CMPX_LE_U32, + VOPCOp.V_CMPX_GT_U32: _VOPCOp_V_CMPX_GT_U32, + VOPCOp.V_CMPX_NE_U32: _VOPCOp_V_CMPX_NE_U32, + VOPCOp.V_CMPX_GE_U32: _VOPCOp_V_CMPX_GE_U32, + VOPCOp.V_CMPX_T_U32: _VOPCOp_V_CMPX_T_U32, + VOPCOp.V_CMPX_F_I64: _VOPCOp_V_CMPX_F_I64, + VOPCOp.V_CMPX_LT_I64: _VOPCOp_V_CMPX_LT_I64, + VOPCOp.V_CMPX_EQ_I64: _VOPCOp_V_CMPX_EQ_I64, + VOPCOp.V_CMPX_LE_I64: _VOPCOp_V_CMPX_LE_I64, + VOPCOp.V_CMPX_GT_I64: _VOPCOp_V_CMPX_GT_I64, + VOPCOp.V_CMPX_NE_I64: _VOPCOp_V_CMPX_NE_I64, + VOPCOp.V_CMPX_GE_I64: _VOPCOp_V_CMPX_GE_I64, + VOPCOp.V_CMPX_T_I64: _VOPCOp_V_CMPX_T_I64, + VOPCOp.V_CMPX_F_U64: _VOPCOp_V_CMPX_F_U64, + VOPCOp.V_CMPX_LT_U64: _VOPCOp_V_CMPX_LT_U64, + VOPCOp.V_CMPX_EQ_U64: _VOPCOp_V_CMPX_EQ_U64, + VOPCOp.V_CMPX_LE_U64: _VOPCOp_V_CMPX_LE_U64, + VOPCOp.V_CMPX_GT_U64: _VOPCOp_V_CMPX_GT_U64, + VOPCOp.V_CMPX_NE_U64: _VOPCOp_V_CMPX_NE_U64, + VOPCOp.V_CMPX_GE_U64: _VOPCOp_V_CMPX_GE_U64, + VOPCOp.V_CMPX_T_U64: _VOPCOp_V_CMPX_T_U64, + VOPCOp.V_CMPX_CLASS_F16: _VOPCOp_V_CMPX_CLASS_F16, + VOPCOp.V_CMPX_CLASS_F32: _VOPCOp_V_CMPX_CLASS_F32, + VOPCOp.V_CMPX_CLASS_F64: _VOPCOp_V_CMPX_CLASS_F64, } # V_WRITELANE_B32: Write scalar to specific lane's VGPR (not in PDF pseudocode) -def _VOP3Op_V_WRITELANE_B32(S0, S1): +def _VOP3Op_V_WRITELANE_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): return (int(S1) & 0x1f, int(S0) & 0xffffffff) # (wr_lane, value) -# flags: (is_64, has_d1, is_cmp, is_cmpx, is_div_scale, has_sdst, uses_vcc, uses_exec, used_regs) -VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = (_VOP3Op_V_WRITELANE_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1'))) +VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = _VOP3Op_V_WRITELANE_B32 COMPILED_FUNCTIONS = { SOP1Op: SOP1Op_FUNCTIONS, diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index e37a85ee36..510e08f3b8 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -89,49 +89,32 @@ def _get_compiled() -> dict: if _COMPILED is None: _COMPILED = get_compiled_functions() return _COMPILED -# Flag indices: (is_64, has_d1, is_cmp, is_cmpx, is_div_scale, has_sdst, uses_vcc, uses_exec, used_regs) -_F_IS_64, _F_HAS_D1, _F_IS_CMP, _F_IS_CMPX, _F_IS_DIV_SCALE, _F_HAS_SDST, _F_USES_VCC, _F_USES_EXEC, _F_USED_REGS = range(9) +def _run_pcode(fn, op_cls, op, s0, s1, s2, d0, scc, vcc, lane, exec_mask, vdst_idx): + """Create Regs, run pseudocode, extract results.""" + # Determine flags from op_cls and op.name + is_div_scale = 'DIV_SCALE' in op.name + is_64 = op.name.endswith(('_B64', '_I64', '_U64', '_F64')) or op.name in ('V_MAD_U64_U32', 'V_MAD_I64_I32') + is_cmp = op_cls.__name__ == 'VOPCOp' and not op.name.startswith('V_CMPX') + is_cmpx = op_cls.__name__ == 'VOPCOp' and op.name.startswith('V_CMPX') + has_sdst = op_cls.__name__ == 'VOP3SDOp' -def _run_pcode(fn, flags, s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, vgprs, src0_idx, vdst_idx): - """Create Regs, run pseudocode, extract results based on flags.""" - used_regs = flags[_F_USED_REGS] - # Create only the Regs that are needed - regs = {} - if 'S0' in used_regs: regs['S0'] = Reg(s0) - if 'S1' in used_regs: regs['S1'] = Reg(s1) - if 'S2' in used_regs: regs['S2'] = Reg(s2) - if 'D0' in used_regs: regs['D0'] = Reg(s0 if flags[_F_IS_DIV_SCALE] else d0) - if 'D1' in used_regs: regs['D1'] = Reg(0) - if 'SCC' in used_regs: regs['SCC'] = Reg(scc) - if 'VCC' in used_regs: regs['VCC'] = Reg(vcc) - if 'EXEC' in used_regs: regs['EXEC'] = Reg(exec_mask) - if 'tmp' in used_regs: regs['tmp'] = Reg(0) - if 'saveexec' in used_regs: regs['saveexec'] = Reg(exec_mask) - if 'laneId' in used_regs: regs['laneId'] = lane - if 'SIMM16' in used_regs: regs['SIMM16'] = Reg(literal) - if 'SIMM32' in used_regs: regs['SIMM32'] = Reg(literal) - if 'SRC0' in used_regs: regs['SRC0'] = Reg(src0_idx) - if 'VDST' in used_regs: regs['VDST'] = Reg(vdst_idx) - if 'VGPR' in used_regs: regs['VGPR'] = vgprs + # Create Regs - D0 gets s0 for DIV_SCALE (passthrough behavior) + S0, S1, S2 = Reg(s0), Reg(s1), Reg(s2) + D0, D1 = Reg(s0 if is_div_scale else d0), Reg(0) + SCC, VCC, EXEC = Reg(scc), Reg(vcc), Reg(exec_mask) + tmp = Reg(0) - # Call pseudocode with only the registers it needs - ret = fn(**{r: regs[r] for r in used_regs}) + # Call pseudocode + ret = fn(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, lane) - # Build result dict based on flags - D0 = regs.get('D0') - SCC = regs.get('SCC') - VCC = regs.get('VCC') - EXEC = regs.get('EXEC') - D1 = regs.get('D1') - - result = {'d0': D0._val if D0 is not None else d0, 'scc': (SCC._val & 1) if SCC is not None else (scc & 1)} - if flags[_F_HAS_SDST]: result['vcc_lane'] = (VCC._val >> lane) & 1 - elif flags[_F_USES_VCC] and VCC is not None and VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if flags[_F_IS_CMPX]: result['exec_lane'] = (EXEC._val >> lane) & 1 - elif flags[_F_USES_EXEC] and EXEC is not None and EXEC._val != exec_mask: result['exec'] = EXEC._val - if flags[_F_IS_CMP]: result['vcc_lane'] = (D0._val >> lane) & 1 - if flags[_F_IS_64]: result['d0_64'] = True - if flags[_F_HAS_D1]: result['d1'] = D1._val & 1 + # Build result + result = {'d0': D0._val, 'scc': SCC._val & 1} + if has_sdst or VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if is_cmpx: result['exec_lane'] = (EXEC._val >> lane) & 1 + elif EXEC._val != exec_mask: result['exec'] = EXEC._val + if is_cmp: result['vcc_lane'] = (D0._val >> lane) & 1 + if is_64: result['d0_64'] = True + if D1._val: result['d1'] = D1._val & 1 # V_WRITELANE_B32 returns (wr_lane, value) directly if ret is not None: result['vgpr_write'] = (ret[0], vdst_idx, ret[1]) return result @@ -294,21 +277,20 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: else: raise NotImplementedError(f"Unknown scalar type {inst_type}") op = op_cls(inst.op) - fn_flags = compiled.get(op_cls, {}).get(op) - if fn_flags is None: raise NotImplementedError(f"{op.name} not in pseudocode") - fn, flags = fn_flags + fn = compiled.get(op_cls, {}).get(op) + if fn is None: raise NotImplementedError(f"{op.name} not in pseudocode") # Read sources - 64-bit ops need 64-bit source reads is_64bit_s0 = op.name.endswith(('_B64', '_I64', '_U64')) or '_U64_' in op.name or '_I64_' in op.name is_64bit_s0s1 = op_cls is SOPCOp and op in (SOPCOp.S_CMP_EQ_U64, SOPCOp.S_CMP_LG_U64) s0 = st.rsrc64(ssrc0, 0) if is_64bit_s0 or is_64bit_s0s1 else (st.rsrc(ssrc0, 0) if inst_type != SOPK else st.rsgpr(inst.sdst)) is_64bit_sop2 = is_64bit_s0 and inst_type is SOP2 - s1 = st.rsrc64(inst.ssrc1, 0) if (is_64bit_sop2 or is_64bit_s0s1) else (st.rsrc(inst.ssrc1, 0) if inst_type in (SOP2, SOPC) else inst.simm16 if inst_type is SOPK else 0) + s1 = st.rsrc64(inst.ssrc1, 0) if (is_64bit_sop2 or is_64bit_s0s1) else (st.rsrc(inst.ssrc1, 0) if inst_type in (SOP2, SOPC) else 0) + s2 = inst.simm16 if inst_type is SOPK else 0 # SOPK: 16-bit immediate passed as S2 d0 = st.rsgpr64(sdst) if (is_64bit_s0 or is_64bit_s0s1) and sdst is not None else (st.rsgpr(sdst) if sdst is not None else 0) - literal = inst.simm16 if inst_type is SOPK else st.literal # Execute and apply results - result = _run_pcode(fn, flags, s0, s1, 0, d0, st.scc, st.vcc, 0, st.exec_mask, literal, None, 0, 0) + result = _run_pcode(fn, op_cls, op, s0, s1, s2, d0, st.scc, st.vcc, 0, st.exec_mask, 0) if sdst is not None: if result.get('d0_64'): st.wsgpr64(sdst, result['d0']) else: st.wsgpr(sdst, result['d0']) @@ -360,16 +342,24 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No # Both ops execute simultaneously using pre-instruction values, so read all inputs first if inst_type is VOPD: vdsty = (inst.vdsty << 1) | ((inst.vdstx & 1) ^ 1) + # Read all source operands BEFORE any writes (dual-issue semantics) sx0, sx1 = st.rsrc(inst.srcx0, lane), V[inst.vsrcx1] sy0, sy1 = st.rsrc(inst.srcy0, lane), V[inst.vsrcy1] dx0, dy0 = V[inst.vdstx], V[vdsty] + # FMAAK/FMAMK in VOPD use literal as S2 + literal = getattr(inst, '_literal', None) or 0 res_x = res_y = None if (op_x := _VOPD_TO_VOP.get(inst.opx)): - if (fn_flags := compiled.get(type(op_x), {}).get(op_x)): - res_x = _run_pcode(fn_flags[0], fn_flags[1], sx0, sx1, 0, dx0, st.scc, st.vcc, lane, st.exec_mask, st.literal, None, 0, 0) + if (fn := compiled.get(type(op_x), {}).get(op_x)): + # opx 1=FMAMK, 2=FMAAK use literal + sx2 = literal if inst.opx in (VOPDOp.V_DUAL_FMAMK_F32, VOPDOp.V_DUAL_FMAAK_F32) else 0 + res_x = _run_pcode(fn, type(op_x), op_x, sx0, sx1, sx2, dx0, st.scc, st.vcc, lane, st.exec_mask, 0) if (op_y := _VOPD_TO_VOP.get(inst.opy)): - if (fn_flags := compiled.get(type(op_y), {}).get(op_y)): - res_y = _run_pcode(fn_flags[0], fn_flags[1], sy0, sy1, 0, dy0, st.scc, st.vcc, lane, st.exec_mask, st.literal, None, 0, 0) + if (fn := compiled.get(type(op_y), {}).get(op_y)): + # opy 1=FMAMK, 2=FMAAK use literal + sy2 = literal if inst.opy in (VOPDOp.V_DUAL_FMAMK_F32, VOPDOp.V_DUAL_FMAAK_F32) else 0 + res_y = _run_pcode(fn, type(op_y), op_y, sy0, sy1, sy2, dy0, st.scc, st.vcc, lane, st.exec_mask, 0) + # Write results after both ops complete if res_x: V[inst.vdstx] = res_x['d0'] if res_y: V[vdsty] = res_y['d0'] return @@ -377,17 +367,19 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No # VOP3SD: has extra scalar dest for carry output if inst_type is VOP3SD: op = VOP3SDOp(inst.op) - fn_flags = compiled.get(VOP3SDOp, {}).get(op) - if fn_flags is None: raise NotImplementedError(f"{op.name} not in pseudocode") - fn, flags = fn_flags + fn = compiled.get(VOP3SDOp, {}).get(op) + if fn is None: raise NotImplementedError(f"{op.name} not in pseudocode") s0, s1, s2 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane), st.rsrc(inst.src2, lane) + # For 64-bit src2 ops (V_MAD_U64_U32, V_MAD_I64_I32), read from consecutive registers mad64_ops = (VOP3SDOp.V_MAD_U64_U32, VOP3SDOp.V_MAD_I64_I32) if op in mad64_ops: s2 = (V[inst.src2 - 256] | (V[inst.src2 - 256 + 1] << 32)) if inst.src2 >= 256 else st.rsgpr64(inst.src2) d0 = V[inst.vdst] + # For carry-in ops (V_*_CO_CI_*), src2 register contains the carry bitmask (not VCC). + # The pseudocode uses VCC but in VOP3SD encoding, the actual carry source is inst.src2. carry_ops = (VOP3SDOp.V_ADD_CO_CI_U32, VOP3SDOp.V_SUB_CO_CI_U32, VOP3SDOp.V_SUBREV_CO_CI_U32) vcc_for_exec = st.rsgpr64(inst.src2) if op in carry_ops else st.vcc - result = _run_pcode(fn, flags, s0, s1, s2, d0, st.scc, vcc_for_exec, lane, st.exec_mask, st.literal, None, 0, inst.vdst) + result = _run_pcode(fn, VOP3SDOp, op, s0, s1, s2, d0, st.scc, vcc_for_exec, lane, st.exec_mask, inst.vdst) if result.get('d0_64'): V[inst.vdst] = result['d0'] & 0xffffffff V[inst.vdst + 1] = (result['d0'] >> 32) & 0xffffffff @@ -402,15 +394,36 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No # Get op enum and sources (None means "no source" for that operand) if inst_type is VOP1: if inst.op == VOP1Op.V_NOP: return + # V_READFIRSTLANE_B32: read from first active lane's VGPR -> SGPR (not in pseudocode - needs cross-lane access) + if inst.op == VOP1Op.V_READFIRSTLANE_B32: + first_lane = (st.exec_mask & -st.exec_mask).bit_length() - 1 if st.exec_mask else 0 + vgpr_idx = inst.src0 - 256 if inst.src0 >= 256 else inst.src0 # VGPR index + st.wsgpr(inst.vdst, st.vgpr[first_lane][vgpr_idx]) + return op_cls, op, src0, src1, src2, vdst = VOP1Op, VOP1Op(inst.op), inst.src0, None, None, inst.vdst elif inst_type is VOP2: - op_cls, op, src0, src1, src2, vdst = VOP2Op, VOP2Op(inst.op), inst.src0, inst.vsrc1 + 256, None, inst.vdst + op_cls, op = VOP2Op, VOP2Op(inst.op) + # FMAAK/FMAMK use inline literal constant as S2 + literal = getattr(inst, '_literal', None) + src0, src1, src2, vdst = inst.src0, inst.vsrc1 + 256, literal, inst.vdst elif inst_type is VOP3: # VOP3 ops 0-255 are VOPC comparisons encoded as VOP3 (use VOPCOp pseudocode) if inst.op < 256: op_cls, op, src0, src1, src2, vdst = VOPCOp, VOPCOp(inst.op), inst.src0, inst.src1, None, inst.vdst else: op_cls, op, src0, src1, src2, vdst = VOP3Op, VOP3Op(inst.op), inst.src0, inst.src1, inst.src2, inst.vdst + # V_READFIRSTLANE_B32 in VOP3 encoding - same as VOP1 but with VOP3 format + if op == VOP3Op.V_READFIRSTLANE_B32: + first_lane = (st.exec_mask & -st.exec_mask).bit_length() - 1 if st.exec_mask else 0 + vgpr_idx = inst.src0 - 256 if inst.src0 >= 256 else inst.src0 + st.wsgpr(inst.vdst, st.vgpr[first_lane][vgpr_idx]) + return + # V_READLANE_B32: read from specific lane's VGPR -> SGPR (lane specified in src1) + if op == VOP3Op.V_READLANE_B32: + read_lane = st.rsrc(inst.src1, lane) & 0x1f # Lane to read from (5 bits) + vgpr_idx = inst.src0 - 256 if inst.src0 >= 256 else inst.src0 + st.wsgpr(inst.vdst, st.vgpr[read_lane][vgpr_idx]) + return # V_PERM_B32: byte permutation - not in pseudocode PDF, implement directly # D0[byte_i] = selector[byte_i] < 8 ? {src0, src1}[selector[byte_i]] : (selector[byte_i] >= 0xD ? 0xFF : 0x00) if op == VOP3Op.V_PERM_B32: @@ -504,16 +517,15 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No s1 = (s1_hi << 16) | s1_lo s2 = (s2_hi << 16) | s2_lo vdst = inst.vdst - fn_flags = compiled.get(VOP3POp, {}).get(op) - if fn_flags is None: raise NotImplementedError(f"{op.name} not in pseudocode") - result = _run_pcode(fn_flags[0], fn_flags[1], s0, s1, s2, 0, st.scc, st.vcc, lane, st.exec_mask, st.literal, None, 0, vdst) + fn = compiled.get(VOP3POp, {}).get(op) + if fn is None: raise NotImplementedError(f"{op.name} not in pseudocode") + result = _run_pcode(fn, VOP3POp, op, s0, s1, s2, 0, st.scc, st.vcc, lane, st.exec_mask, vdst) st.vgpr[lane][vdst] = result['d0'] & 0xffffffff return else: raise NotImplementedError(f"Unknown vector type {inst_type}") - fn_flags = compiled.get(op_cls, {}).get(op) - if fn_flags is None: raise NotImplementedError(f"{op.name} not in pseudocode") - fn, flags = fn_flags + fn = compiled.get(op_cls, {}).get(op) + if fn is None: raise NotImplementedError(f"{op.name} not in pseudocode") # Read sources (with VOP3 modifiers if applicable) neg, abs_ = (getattr(inst, 'neg', 0), getattr(inst, 'abs', 0)) if inst_type is VOP3 else (0, 0) @@ -528,25 +540,29 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No return val # Determine if sources are 64-bit based on instruction type + # For 64-bit shift ops: src0 is 32-bit (shift amount), src1 is 64-bit (value to shift) + # For V_LDEXP_F64: src0 is 64-bit float, src1 is 32-bit integer exponent + # For most other _B64/_I64/_U64/_F64 ops: all sources are 64-bit is_64bit_op = op.name.endswith(('_B64', '_I64', '_U64', '_F64')) is_ldexp_64 = op in (VOP3Op.V_LDEXP_F64,) is_shift_64 = op in (VOP3Op.V_LSHLREV_B64, VOP3Op.V_LSHRREV_B64, VOP3Op.V_ASHRREV_I64) is_16bit_src = op_cls is VOP3Op and op in _VOP3_16BIT_OPS and op not in _CVT_32_64_SRC_OPS - is_vop2_16bit = op_cls is VOP2Op and op in _VOP2_16BIT_OPS + is_vop2_16bit = op_cls is VOP2Op and op in _VOP2_16BIT_OPS # VOP2 16-bit ops use f16 inline constants if is_shift_64: - s0 = mod_src(st.rsrc(src0, lane), 0) - s1 = st.rsrc64(src1, lane) if src1 is not None else 0 + s0 = mod_src(st.rsrc(src0, lane), 0) # shift amount is 32-bit + s1 = st.rsrc64(src1, lane) if src1 is not None else 0 # value to shift is 64-bit s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 elif is_ldexp_64: - s0 = mod_src64(st.rsrc64(src0, lane), 0) - s1 = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 + s0 = mod_src64(st.rsrc64(src0, lane), 0) # mantissa is 64-bit float + s1 = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 # exponent is 32-bit int s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 elif is_64bit_op: s0 = mod_src64(st.rsrc64(src0, lane), 0) s1 = mod_src64(st.rsrc64(src1, lane), 1) if src1 is not None else 0 s2 = mod_src64(st.rsrc64(src2, lane), 2) if src2 is not None else 0 elif is_16bit_src: + # For 16-bit source ops, opsel bits select which half to use s0_raw = mod_src(st.rsrc(src0, lane), 0) s1_raw = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 s2_raw = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 @@ -560,26 +576,31 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No else: s0 = mod_src(st.rsrc(src0, lane), 0) s1 = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 - s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 + # src2 can be a register index OR a raw literal value (for FMAAK/FMAMK) + # If src2 > 511, it's a raw literal value, not a register index + s2 = src2 if src2 is not None and src2 > 511 else (mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0) d0 = V[vdst] if not is_64bit_op else (V[vdst] | (V[vdst + 1] << 32)) - # V_CNDMASK_B32: VOP3 encoding uses src2 as mask (not VCC) + # V_CNDMASK_B32: VOP3 encoding uses src2 as mask (not VCC); VOP2 uses VCC implicitly vcc_for_fn = st.rsgpr64(src2) if op in (VOP3Op.V_CNDMASK_B32,) and inst_type is VOP3 and src2 is not None and src2 < 256 else st.vcc - src0_idx = (src0 - 256) if src0 is not None and src0 >= 256 else (src0 if src0 is not None else 0) # Execute pseudocode - result = _run_pcode(fn, flags, s0, s1, s2, d0, st.scc, vcc_for_fn, lane, st.exec_mask, st.literal, st.vgpr, src0_idx, vdst) + result = _run_pcode(fn, op_cls, op, s0, s1, s2, d0, st.scc, vcc_for_fn, lane, st.exec_mask, vdst) # Apply results if 'vgpr_write' in result: + # Lane instruction wrote to VGPR: (lane, vgpr_idx, value) wr_lane, wr_idx, wr_val = result['vgpr_write'] st.vgpr[wr_lane][wr_idx] = wr_val if 'vcc_lane' in result: + # VOP2 carry instructions write carry to VCC implicitly; VOPC writes to vdst vcc_dst = VCC_LO if op_cls is VOP2Op and op in (VOP2Op.V_ADD_CO_CI_U32, VOP2Op.V_SUB_CO_CI_U32, VOP2Op.V_SUBREV_CO_CI_U32) else vdst st.pend_sgpr_lane(vcc_dst, lane, result['vcc_lane']) if 'exec_lane' in result: + # V_CMPX instructions write to EXEC per-lane st.pend_sgpr_lane(EXEC_LO, lane, result['exec_lane']) if 'd0' in result and op_cls not in (VOPCOp,) and 'vgpr_write' not in result: + # V_READFIRSTLANE_B32 and V_READLANE_B32 write to SGPR, not VGPR writes_to_sgpr = op in (VOP1Op.V_READFIRSTLANE_B32,) or \ (op_cls is VOP3Op and op in (VOP3Op.V_READFIRSTLANE_B32, VOP3Op.V_READLANE_B32)) is_16bit_dst = op in _VOP3_16BIT_DST_OPS or op in _VOP1_16BIT_DST_OPS @@ -589,6 +610,7 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No V[vdst] = result['d0'] & 0xffffffff V[vdst + 1] = (result['d0'] >> 32) & 0xffffffff elif is_16bit_dst and inst_type is VOP3: + # VOP3 16-bit ops: opsel[3] controls hi/lo destination if opsel & 8: V[vdst] = (V[vdst] & 0x0000ffff) | ((result['d0'] & 0xffff) << 16) else: V[vdst] = (V[vdst] & 0xffff0000) | (result['d0'] & 0xffff) else: diff --git a/extra/assembly/amd/pcode.py b/extra/assembly/amd/pcode.py index 46e2f17806..9f98358d84 100644 --- a/extra/assembly/amd/pcode.py +++ b/extra/assembly/amd/pcode.py @@ -657,9 +657,14 @@ def compile_pseudocode(pseudocode: str) -> str: return '\n'.join(lines) def _assign(lhs: str, rhs: str) -> str: - """Generate assignment. Bare tmp/SCC/etc modify existing Reg._val.""" - if lhs in ('tmp', 'SCC', 'VCC', 'EXEC', 'D0', 'D1', 'saveexec'): + """Generate assignment. Outputs modify Reg in-place via ._val.""" + # Output registers and tmp: modify in-place so caller sees changes + if lhs in ('SCC', 'VCC', 'EXEC', 'D0', 'D1', 'tmp'): return f"{lhs}._val = int({rhs})" + # saveexec needs to be a new Reg for typed accessor access + if lhs == 'saveexec': + return f"{lhs} = Reg(int({rhs}))" + # Other locals: natural style return f"{lhs} = {rhs}" def _expr(e: str) -> str: @@ -982,30 +987,15 @@ from extra.assembly.amd.pcode import * code = code.replace( 'D0.f64 = ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64)))', 'D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64)))') - # Detect flags for result handling (stored in metadata, not generated code) - is_64 = any(p in pc for p in ['D0.u64', 'D0.b64', 'D0.f64', 'D0.i64', 'D1.u64', 'D1.b64', 'D1.f64', 'D1.i64']) - has_d1 = '{ D1' in pc - if has_d1: is_64 = True - is_cmp = cls_name == 'VOPCOp' and 'D0.u64[laneId]' in pc - is_cmpx = cls_name == 'VOPCOp' and 'EXEC.u64[laneId]' in pc # V_CMPX writes to EXEC per-lane - is_div_scale = 'DIV_SCALE' in op.name - has_sdst = cls_name == 'VOP3SDOp' and ('VCC.u64[laneId]' in pc or is_div_scale) - combined = code + pc - uses_vcc = 'VCC' in combined - uses_exec = 'EXEC' in combined or 'EXEC_LO' in combined or 'EXEC_HI' in combined - - # Determine which registers are used - all_regs = ['S0', 'S1', 'S2', 'D0', 'D1', 'SCC', 'VCC', 'EXEC', 'tmp', 'saveexec', 'laneId', 'SIMM16', 'SIMM32', 'SRC0', 'VDST', 'VGPR'] - used_regs = [r for r in all_regs if r in combined] - if 'EXEC_LO' in combined or 'EXEC_HI' in combined: - if 'EXEC' not in used_regs: used_regs.append('EXEC') - - # Generate pure pseudocode function - Regs passed directly as arguments + # SIMM16/SIMM32 (inline literal constants) are passed as S2 + code = code.replace('SIMM16', 'S2').replace('SIMM32', 'S2') + # Generate function with standard signature fn_name = f"_{cls_name}_{op.name}" - lines.append(f"def {fn_name}({', '.join(used_regs)}):") + lines.append(f"def {fn_name}(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId):") for pc_line in pc.split('\n'): lines.append(f" # {pc_line}") # Add EXEC_LO/EXEC_HI if needed + combined = code + pc if 'EXEC_LO' in combined: lines.append(" EXEC_LO = SliceProxy(EXEC, 31, 0)") if 'EXEC_HI' in combined: lines.append(" EXEC_HI = SliceProxy(EXEC, 63, 32)") code_lines = [line for line in code.split('\n') if line.strip()] @@ -1016,9 +1006,7 @@ from extra.assembly.amd.pcode import * lines.append(" pass") lines.append("") - # Build flags tuple: (is_64, has_d1, is_cmp, is_cmpx, is_div_scale, has_sdst, uses_vcc, uses_exec, used_regs) - flags = (is_64, has_d1, is_cmp, is_cmpx, is_div_scale, has_sdst, uses_vcc, uses_exec, tuple(used_regs)) - fn_entries.append((op, fn_name, flags)) + fn_entries.append((op, fn_name)) compiled_count += 1 except Exception as e: print(f" Warning: Failed to compile {op.name}: {e}") @@ -1026,8 +1014,8 @@ from extra.assembly.amd.pcode import * if fn_entries: lines.append(f'{cls_name}_FUNCTIONS = {{') - for op, fn_name, flags in fn_entries: - lines.append(f" {cls_name}.{op.name}: ({fn_name}, {flags}),") + for op, fn_name in fn_entries: + lines.append(f" {cls_name}.{op.name}: {fn_name},") lines.append('}') lines.append('') @@ -1036,10 +1024,9 @@ from extra.assembly.amd.pcode import * if 'VOP3Op' in enum_names: lines.append(''' # V_WRITELANE_B32: Write scalar to specific lane's VGPR (not in PDF pseudocode) -def _VOP3Op_V_WRITELANE_B32(S0, S1): +def _VOP3Op_V_WRITELANE_B32(S0, S1, S2, D0, D1, SCC, VCC, EXEC, tmp, laneId): return (int(S1) & 0x1f, int(S0) & 0xffffffff) # (wr_lane, value) -# flags: (is_64, has_d1, is_cmp, is_cmpx, is_div_scale, has_sdst, uses_vcc, uses_exec, used_regs) -VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = (_VOP3Op_V_WRITELANE_B32, (False, False, False, False, False, False, False, False, ('S0', 'S1'))) +VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = _VOP3Op_V_WRITELANE_B32 ''') lines.append('COMPILED_FUNCTIONS = {') diff --git a/extra/assembly/amd/test/test_pcode.py b/extra/assembly/amd/test/test_pcode.py index dae9ce3850..316bb187cd 100644 --- a/extra/assembly/amd/test/test_pcode.py +++ b/extra/assembly/amd/test/test_pcode.py @@ -234,8 +234,8 @@ class TestPseudocodeRegressions(unittest.TestCase): s0 = 0x3f800000 # 1.0 s1 = 0x40400000 # 3.0 s2 = 0x3f800000 # 1.0 (numerator) - fn, flags = VOP3SDOp_FUNCTIONS[VOP3SDOp.V_DIV_SCALE_F32] - result = _run_pcode(fn, flags, s0, s1, s2, 0, 0, 0, 0, 0xffffffff, 0, None, 0, 0) + fn = VOP3SDOp_FUNCTIONS[VOP3SDOp.V_DIV_SCALE_F32] + result = _run_pcode(fn, VOP3SDOp, VOP3SDOp.V_DIV_SCALE_F32, s0, s1, s2, 0, 0, 0, 0, 0xffffffff, 0) # Must always have vcc_lane in result self.assertIn('vcc_lane', result, "V_DIV_SCALE_F32 must always return vcc_lane") self.assertEqual(result['vcc_lane'], 0, "vcc_lane should be 0 when no scaling needed") @@ -245,20 +245,20 @@ class TestPseudocodeRegressions(unittest.TestCase): Bug: isQuietNAN and isSignalNAN both used math.isnan which can't distinguish them.""" quiet_nan = 0x7fc00000 # quiet NaN: exponent=255, bit22=1 signal_nan = 0x7f800001 # signaling NaN: exponent=255, bit22=0 - fn, flags = VOPCOp_FUNCTIONS[VOPCOp.V_CMP_CLASS_F32] + fn = VOPCOp_FUNCTIONS[VOPCOp.V_CMP_CLASS_F32] # Test quiet NaN detection (bit 1 in mask) s1_quiet = 0b0000000010 # bit 1 = quiet NaN - result = _run_pcode(fn, flags, quiet_nan, s1_quiet, 0, 0, 0, 0, 0, 0xffffffff, 0, None, 0, 0) + result = _run_pcode(fn, VOPCOp, VOPCOp.V_CMP_CLASS_F32, quiet_nan, s1_quiet, 0, 0, 0, 0, 0, 0xffffffff, 0) self.assertEqual(result['vcc_lane'], 1, "Should detect quiet NaN with quiet NaN mask") # Test signaling NaN detection (bit 0 in mask) s1_signal = 0b0000000001 # bit 0 = signaling NaN - result = _run_pcode(fn, flags, signal_nan, s1_signal, 0, 0, 0, 0, 0, 0xffffffff, 0, None, 0, 0) + result = _run_pcode(fn, VOPCOp, VOPCOp.V_CMP_CLASS_F32, signal_nan, s1_signal, 0, 0, 0, 0, 0, 0xffffffff, 0) self.assertEqual(result['vcc_lane'], 1, "Should detect signaling NaN with signaling NaN mask") # Test that quiet NaN doesn't match signaling NaN mask - result = _run_pcode(fn, flags, quiet_nan, s1_signal, 0, 0, 0, 0, 0, 0xffffffff, 0, None, 0, 0) + result = _run_pcode(fn, VOPCOp, VOPCOp.V_CMP_CLASS_F32, quiet_nan, s1_signal, 0, 0, 0, 0, 0, 0xffffffff, 0) self.assertEqual(result['vcc_lane'], 0, "Quiet NaN should not match signaling NaN mask") # Test that signaling NaN doesn't match quiet NaN mask - result = _run_pcode(fn, flags, signal_nan, s1_quiet, 0, 0, 0, 0, 0, 0xffffffff, 0, None, 0, 0) + result = _run_pcode(fn, VOPCOp, VOPCOp.V_CMP_CLASS_F32, signal_nan, s1_quiet, 0, 0, 0, 0, 0, 0xffffffff, 0) self.assertEqual(result['vcc_lane'], 0, "Signaling NaN should not match quiet NaN mask") def test_isnan_with_typed_view(self):