diff --git a/riscv/src/compiler.rs b/riscv/src/compiler.rs index 755f44447..d65cf6dfb 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/compiler.rs @@ -66,15 +66,15 @@ struct RiscvArchitecture {} impl Architecture for RiscvArchitecture { fn instruction_ends_control_flow(instr: &str) -> bool { match instr { - "li" | "lui" | "la" | "mv" | "add" | "addi" | "sub" | "neg" | "mul" | "mulhu" - | "mulhsu" | "divu" | "remu" | "xor" | "xori" | "and" | "andi" | "or" | "ori" - | "not" | "slli" | "sll" | "srli" | "srl" | "srai" | "seqz" | "snez" | "slt" - | "slti" | "sltu" | "sltiu" | "sgtz" | "beq" | "beqz" | "bgeu" | "bltu" | "blt" - | "bge" | "bltz" | "blez" | "bgtz" | "bgez" | "bne" | "bnez" | "jal" | "jalr" - | "call" | "ecall" | "ebreak" | "lw" | "lb" | "lbu" | "lh" | "lhu" | "sw" | "sh" - | "sb" | "nop" | "fence" | "fence.i" | "amoadd.w" | "amoadd.w.aq" | "amoadd.w.rl" - | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" | "sc.w" - | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, + "li" | "lui" | "la" | "mv" | "add" | "addi" | "sub" | "neg" | "mul" | "mulh" + | "mulhu" | "mulhsu" | "divu" | "remu" | "xor" | "xori" | "and" | "andi" | "or" + | "ori" | "not" | "slli" | "sll" | "srli" | "srl" | "srai" | "seqz" | "snez" + | "slt" | "slti" | "sltu" | "sltiu" | "sgtz" | "beq" | "beqz" | "bgeu" | "bltu" + | "blt" | "bge" | "bltz" | "blez" | "bgtz" | "bgez" | "bne" | "bnez" | "jal" + | "jalr" | "call" | "ecall" | "ebreak" | "lw" | "lb" | "lbu" | "lh" | "lhu" | "sw" + | "sh" | "sb" | "nop" | "fence" | "fence.i" | "amoadd.w" | "amoadd.w.aq" + | "amoadd.w.rl" | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" + | "sc.w" | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, "j" | "jr" | "tail" | "ret" | "unimp" => true, _ => { panic!("Unknown instruction: {instr}"); @@ -448,6 +448,7 @@ fn preamble(degree: u64, coprocessors: &CoProcessors) -> String { reg tmp1; reg tmp2; reg tmp3; + reg tmp4; reg lr_sc_reservation; "# .to_owned() @@ -912,6 +913,33 @@ fn process_instruction(instr: &str, args: &[Argument], coprocessors: &CoProcesso let (rd, r1, r2) = rrr(args); only_if_no_write_to_zero(format!("tmp1, {rd} <== mul({r1}, {r2});"), rd) } + "mulh" => { + let (rd, r1, r2) = rrr(args); + only_if_no_write_to_zero_vec( + vec![ + format!("tmp1 <== to_signed({r1});"), + format!("tmp2 <== to_signed({r2});"), + // tmp3 is 1 if tmp1 is non-negative + "tmp3 <== is_positive(tmp1 + 1);".into(), + // tmp4 is 1 if tmp2 is non-negative + "tmp4 <== is_positive(tmp2 + 1);".into(), + // If tmp1 is negative, convert to positive + "skip_if_zero 0, tmp3;".into(), + "tmp1 <=X= 0 - tmp1;".into(), + // If tmp2 is negative, convert to positive + "skip_if_zero 0, tmp4;".into(), + "tmp2 <=X= 0 - tmp2;".into(), + format!("tmp1, {rd} <== mul(tmp1, tmp2);"), + // Determine the sign of the result based on the signs of tmp1 and tmp2 + "tmp3 <== is_not_equal_zero(tmp3 - tmp4);".into(), + // If the result should be negative, convert back to negative + "skip_if_zero tmp3, 2;".into(), + "tmp1 <== is_equal_zero(tmp1);".into(), + format!("{rd} <== wrap_signed(-{rd} - 1 + tmp1);"), + ], + rd, + ) + } "mulhsu" => { let (rd, r1, r2) = rrr(args); only_if_no_write_to_zero_vec( diff --git a/riscv/tests/instruction_tests/generated/mulh.S b/riscv/tests/instruction_tests/generated/mulh.S new file mode 100644 index 000000000..b0a43feb7 --- /dev/null +++ b/riscv/tests/instruction_tests/generated/mulh.S @@ -0,0 +1,149 @@ +# 0 "sources/mulh.S" +# 0 "" +# 0 "" +# 1 "/usr/include/stdc-predef.h" 1 3 4 +# 0 "" 2 +# 1 "sources/mulh.S" +# See LICENSE for license details. + +#***************************************************************************** +# mulh.S +#----------------------------------------------------------------------------- + +# Test mulh instruction. + + +# 1 "sources/riscv_test.h" 1 +# 11 "sources/mulh.S" 2 +# 1 "sources/test_macros.h" 1 + + + + + + +#----------------------------------------------------------------------- +# Helper macros +#----------------------------------------------------------------------- +# 20 "sources/test_macros.h" +# We use a macro hack to simpify code generation for various numbers +# of bubble cycles. +# 36 "sources/test_macros.h" +#----------------------------------------------------------------------- +# RV64UI MACROS +#----------------------------------------------------------------------- + +#----------------------------------------------------------------------- +# Tests for instructions with immediate operand +#----------------------------------------------------------------------- +# 92 "sources/test_macros.h" +#----------------------------------------------------------------------- +# Tests for vector config instructions +#----------------------------------------------------------------------- +# 120 "sources/test_macros.h" +#----------------------------------------------------------------------- +# Tests for an instruction with register operands +#----------------------------------------------------------------------- +# 148 "sources/test_macros.h" +#----------------------------------------------------------------------- +# Tests for an instruction with register-register operands +#----------------------------------------------------------------------- +# 242 "sources/test_macros.h" +#----------------------------------------------------------------------- +# Test memory instructions +#----------------------------------------------------------------------- +# 319 "sources/test_macros.h" +#----------------------------------------------------------------------- +# Test branch instructions +#----------------------------------------------------------------------- +# 404 "sources/test_macros.h" +#----------------------------------------------------------------------- +# Test jump instructions +#----------------------------------------------------------------------- +# 433 "sources/test_macros.h" +#----------------------------------------------------------------------- +# RV64UF MACROS +#----------------------------------------------------------------------- + +#----------------------------------------------------------------------- +# Tests floating-point instructions +#----------------------------------------------------------------------- +# 569 "sources/test_macros.h" +#----------------------------------------------------------------------- +# Pass and fail code (assumes test num is in x28) +#----------------------------------------------------------------------- +# 581 "sources/test_macros.h" +#----------------------------------------------------------------------- +# Test data section +#----------------------------------------------------------------------- +# 12 "sources/mulh.S" 2 + + +.globl __runtime_start; __runtime_start: la x10,__return_pointer; sw x1,0(x10); li x10,0 + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + test_2: li x10, 2; ebreak; li x1, 0x00000000; li x2, 0x00000000; mulh x3, x1, x2;; li x29, 0x00000000; li x28, 2; bne x3, x29, fail;; + test_3: li x10, 3; ebreak; li x1, 0x00000001; li x2, 0x00000001; mulh x3, x1, x2;; li x29, 0x00000000; li x28, 3; bne x3, x29, fail;; + test_4: li x10, 4; ebreak; li x1, 0x00000003; li x2, 0x00000007; mulh x3, x1, x2;; li x29, 0x00000000; li x28, 4; bne x3, x29, fail;; + + test_5: li x10, 5; ebreak; li x1, 0x00000000; li x2, 0xffff8000; mulh x3, x1, x2;; li x29, 0x00000000; li x28, 5; bne x3, x29, fail;; + test_6: li x10, 6; ebreak; li x1, 0x80000000; li x2, 0x00000000; mulh x3, x1, x2;; li x29, 0x00000000; li x28, 6; bne x3, x29, fail;; + test_7: li x10, 7; ebreak; li x1, 0x80000000; li x2, 0x00000000; mulh x3, x1, x2;; li x29, 0x00000000; li x28, 7; bne x3, x29, fail;; + + test_30: li x10, 30; ebreak; li x1, 0xaaaaaaab; li x2, 0x0002fe7d; mulh x3, x1, x2;; li x29, 0xffff0081; li x28, 30; bne x3, x29, fail;; + test_31: li x10, 31; ebreak; li x1, 0x0002fe7d; li x2, 0xaaaaaaab; mulh x3, x1, x2;; li x29, 0xffff0081; li x28, 31; bne x3, x29, fail;; + + test_32: li x10, 32; ebreak; li x1, 0xff000000; li x2, 0xff000000; mulh x3, x1, x2;; li x29, 0x00010000; li x28, 32; bne x3, x29, fail;; + + test_33: li x10, 33; ebreak; li x1, 0xffffffff; li x2, 0xffffffff; mulh x3, x1, x2;; li x29, 0x00000000; li x28, 33; bne x3, x29, fail;; + test_34: li x10, 34; ebreak; li x1, 0xffffffff; li x2, 0x00000001; mulh x3, x1, x2;; li x29, 0xffffffff; li x28, 34; bne x3, x29, fail;; + test_35: li x10, 35; ebreak; li x1, 0x00000001; li x2, 0xffffffff; mulh x3, x1, x2;; li x29, 0xffffffff; li x28, 35; bne x3, x29, fail;; + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + test_8: li x10, 8; ebreak; li x1, 13<<20; li x2, 11<<20; mulh x1, x1, x2;; li x29, 36608; li x28, 8; bne x1, x29, fail;; + test_9: li x10, 9; ebreak; li x1, 14<<20; li x2, 11<<20; mulh x2, x1, x2;; li x29, 39424; li x28, 9; bne x2, x29, fail;; + test_10: li x10, 10; ebreak; li x1, 13<<20; mulh x1, x1, x1;; li x29, 43264; li x28, 10; bne x1, x29, fail;; + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + test_11: li x10, 11; ebreak; li x4, 0; test_11_l1: li x1, 13<<20; li x2, 11<<20; mulh x3, x1, x2; addi x6, x3, 0; addi x4, x4, 1; li x5, 2; bne x4, x5, test_11_l1; li x29, 36608; li x28, 11; bne x6, x29, fail;; + test_12: li x10, 12; ebreak; li x4, 0; test_12_l1: li x1, 14<<20; li x2, 11<<20; mulh x3, x1, x2; nop; addi x6, x3, 0; addi x4, x4, 1; li x5, 2; bne x4, x5, test_12_l1; li x29, 39424; li x28, 12; bne x6, x29, fail;; + test_13: li x10, 13; ebreak; li x4, 0; test_13_l1: li x1, 15<<20; li x2, 11<<20; mulh x3, x1, x2; nop; nop; addi x6, x3, 0; addi x4, x4, 1; li x5, 2; bne x4, x5, test_13_l1; li x29, 42240; li x28, 13; bne x6, x29, fail;; + + test_14: li x10, 14; ebreak; li x4, 0; test_14_l1: li x1, 13<<20; li x2, 11<<20; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_14_l1; li x29, 36608; li x28, 14; bne x3, x29, fail;; + test_15: li x10, 15; ebreak; li x4, 0; test_15_l1: li x1, 14<<20; li x2, 11<<20; nop; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_15_l1; li x29, 39424; li x28, 15; bne x3, x29, fail;; + test_16: li x10, 16; ebreak; li x4, 0; test_16_l1: li x1, 15<<20; li x2, 11<<20; nop; nop; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_16_l1; li x29, 42240; li x28, 16; bne x3, x29, fail;; + test_17: li x10, 17; ebreak; li x4, 0; test_17_l1: li x1, 13<<20; nop; li x2, 11<<20; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_17_l1; li x29, 36608; li x28, 17; bne x3, x29, fail;; + test_18: li x10, 18; ebreak; li x4, 0; test_18_l1: li x1, 14<<20; nop; li x2, 11<<20; nop; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_18_l1; li x29, 39424; li x28, 18; bne x3, x29, fail;; + test_19: li x10, 19; ebreak; li x4, 0; test_19_l1: li x1, 15<<20; nop; nop; li x2, 11<<20; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_19_l1; li x29, 42240; li x28, 19; bne x3, x29, fail;; + + test_20: li x10, 20; ebreak; li x4, 0; test_20_l1: li x2, 11<<20; li x1, 13<<20; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_20_l1; li x29, 36608; li x28, 20; bne x3, x29, fail;; + test_21: li x10, 21; ebreak; li x4, 0; test_21_l1: li x2, 11<<20; li x1, 14<<20; nop; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_21_l1; li x29, 39424; li x28, 21; bne x3, x29, fail;; + test_22: li x10, 22; ebreak; li x4, 0; test_22_l1: li x2, 11<<20; li x1, 15<<20; nop; nop; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_22_l1; li x29, 42240; li x28, 22; bne x3, x29, fail;; + test_23: li x10, 23; ebreak; li x4, 0; test_23_l1: li x2, 11<<20; nop; li x1, 13<<20; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_23_l1; li x29, 36608; li x28, 23; bne x3, x29, fail;; + test_24: li x10, 24; ebreak; li x4, 0; test_24_l1: li x2, 11<<20; nop; li x1, 14<<20; nop; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_24_l1; li x29, 39424; li x28, 24; bne x3, x29, fail;; + test_25: li x10, 25; ebreak; li x4, 0; test_25_l1: li x2, 11<<20; nop; nop; li x1, 15<<20; mulh x3, x1, x2; addi x4, x4, 1; li x5, 2; bne x4, x5, test_25_l1; li x29, 42240; li x28, 25; bne x3, x29, fail;; + + test_26: li x10, 26; ebreak; li x1, 31<<26; mulh x2, x0, x1;; li x29, 0; li x28, 26; bne x2, x29, fail;; + test_27: li x10, 27; ebreak; li x1, 32<<26; mulh x2, x1, x0;; li x29, 0; li x28, 27; bne x2, x29, fail;; + test_28: li x10, 28; ebreak; mulh x1, x0, x0;; li x29, 0; li x28, 28; bne x1, x29, fail;; + test_29: li x10, 29; ebreak; li x1, 33<<20; li x2, 34<<20; mulh x0, x1, x2;; li x29, 0; li x28, 29; bne x0, x29, fail;; + + bne x0, x28, pass; fail: unimp;; pass: la x10,__return_pointer; lw x1,0(x10); ret; + + + + .data +.balign 4; __return_pointer: .word 0; + + + + diff --git a/riscv/tests/instruction_tests/sources/mulh.S b/riscv/tests/instruction_tests/sources/mulh.S new file mode 100644 index 000000000..e583f5f69 --- /dev/null +++ b/riscv/tests/instruction_tests/sources/mulh.S @@ -0,0 +1,81 @@ +# See LICENSE for license details. + +#***************************************************************************** +# mulh.S +#----------------------------------------------------------------------------- +# +# Test mulh instruction. +# + +#include "riscv_test.h" +#include "test_macros.h" + +RVTEST_RV32U +RVTEST_CODE_BEGIN + + #------------------------------------------------------------- + # Arithmetic tests + #------------------------------------------------------------- + + TEST_RR_OP( 2, mulh, 0x00000000, 0x00000000, 0x00000000 ); + TEST_RR_OP( 3, mulh, 0x00000000, 0x00000001, 0x00000001 ); + TEST_RR_OP( 4, mulh, 0x00000000, 0x00000003, 0x00000007 ); + + TEST_RR_OP( 5, mulh, 0x00000000, 0x00000000, 0xffff8000 ); + TEST_RR_OP( 6, mulh, 0x00000000, 0x80000000, 0x00000000 ); + TEST_RR_OP( 7, mulh, 0x00000000, 0x80000000, 0x00000000 ); + + TEST_RR_OP(30, mulh, 0xffff0081, 0xaaaaaaab, 0x0002fe7d ); + TEST_RR_OP(31, mulh, 0xffff0081, 0x0002fe7d, 0xaaaaaaab ); + + TEST_RR_OP(32, mulh, 0x00010000, 0xff000000, 0xff000000 ); + + TEST_RR_OP(33, mulh, 0x00000000, 0xffffffff, 0xffffffff ); + TEST_RR_OP(34, mulh, 0xffffffff, 0xffffffff, 0x00000001 ); + TEST_RR_OP(35, mulh, 0xffffffff, 0x00000001, 0xffffffff ); + + #------------------------------------------------------------- + # Source/Destination tests + #------------------------------------------------------------- + + TEST_RR_SRC1_EQ_DEST( 8, mulh, 36608, 13<<20, 11<<20 ); + TEST_RR_SRC2_EQ_DEST( 9, mulh, 39424, 14<<20, 11<<20 ); + TEST_RR_SRC12_EQ_DEST( 10, mulh, 43264, 13<<20 ); + + #------------------------------------------------------------- + # Bypassing tests + #------------------------------------------------------------- + + TEST_RR_DEST_BYPASS( 11, 0, mulh, 36608, 13<<20, 11<<20 ); + TEST_RR_DEST_BYPASS( 12, 1, mulh, 39424, 14<<20, 11<<20 ); + TEST_RR_DEST_BYPASS( 13, 2, mulh, 42240, 15<<20, 11<<20 ); + + TEST_RR_SRC12_BYPASS( 14, 0, 0, mulh, 36608, 13<<20, 11<<20 ); + TEST_RR_SRC12_BYPASS( 15, 0, 1, mulh, 39424, 14<<20, 11<<20 ); + TEST_RR_SRC12_BYPASS( 16, 0, 2, mulh, 42240, 15<<20, 11<<20 ); + TEST_RR_SRC12_BYPASS( 17, 1, 0, mulh, 36608, 13<<20, 11<<20 ); + TEST_RR_SRC12_BYPASS( 18, 1, 1, mulh, 39424, 14<<20, 11<<20 ); + TEST_RR_SRC12_BYPASS( 19, 2, 0, mulh, 42240, 15<<20, 11<<20 ); + + TEST_RR_SRC21_BYPASS( 20, 0, 0, mulh, 36608, 13<<20, 11<<20 ); + TEST_RR_SRC21_BYPASS( 21, 0, 1, mulh, 39424, 14<<20, 11<<20 ); + TEST_RR_SRC21_BYPASS( 22, 0, 2, mulh, 42240, 15<<20, 11<<20 ); + TEST_RR_SRC21_BYPASS( 23, 1, 0, mulh, 36608, 13<<20, 11<<20 ); + TEST_RR_SRC21_BYPASS( 24, 1, 1, mulh, 39424, 14<<20, 11<<20 ); + TEST_RR_SRC21_BYPASS( 25, 2, 0, mulh, 42240, 15<<20, 11<<20 ); + + TEST_RR_ZEROSRC1( 26, mulh, 0, 31<<26 ); + TEST_RR_ZEROSRC2( 27, mulh, 0, 32<<26 ); + TEST_RR_ZEROSRC12( 28, mulh, 0 ); + TEST_RR_ZERODEST( 29, mulh, 33<<20, 34<<20 ); + + TEST_PASSFAIL + +RVTEST_CODE_END + + .data +RVTEST_DATA_BEGIN + + TEST_DATA + +RVTEST_DATA_END