diff --git a/asm-utils/src/data_storage.rs b/asm-utils/src/data_storage.rs index f588d1924..fbe43603d 100644 --- a/asm-utils/src/data_storage.rs +++ b/asm-utils/src/data_storage.rs @@ -7,23 +7,29 @@ use crate::{ utils::{alignment_size, next_aligned}, }; -pub enum SingleDataValue<'a> { +/// A single 32-bit data value. +pub enum SingleDataValue { + /// A literal value. Value(u32), - LabelReference(&'a str), - Offset(&'a str, &'a str), + /// The value of a pointer to a text label. Since there is no 1-to-1 + /// correspondence between RISC-V and Powdr ASM instructions, this is + /// passed unresolved to the code generator. + LabelReference(String), + /// Currently not supported. + Offset(String, String), } -struct WordWriter<'a, 'b> { - data_writer: &'a mut dyn FnMut(Option<&str>, u32, SingleDataValue), +struct WordWriter<'a> { + data_writer: &'a mut dyn FnMut(Option, u32, SingleDataValue), partial: u32, current_pos: u32, - latest_label: Option<&'b str>, + latest_label: Option, } -impl<'a, 'b> WordWriter<'a, 'b> { +impl<'a> WordWriter<'a> { fn new( starting_pos: u32, - data_writer: &'a mut dyn FnMut(Option<&str>, u32, SingleDataValue), + data_writer: &'a mut dyn FnMut(Option, u32, SingleDataValue), ) -> Self { // sanitary alignment to 8 bytes let current_pos = next_aligned(starting_pos as usize, 8) as u32; @@ -39,7 +45,7 @@ impl<'a, 'b> WordWriter<'a, 'b> { self.current_pos } - fn set_label(&mut self, label: &'b str) { + fn set_label(&mut self, label: String) { self.latest_label = Some(label) } @@ -77,7 +83,7 @@ impl<'a, 'b> WordWriter<'a, 'b> { } } - fn write_label_reference(&mut self, label: &str) { + fn write_label_reference(&mut self, label: String) { assert_eq!( self.current_pos % 4, 0, @@ -103,7 +109,7 @@ impl<'a, 'b> WordWriter<'a, 'b> { pub fn store_data_objects( sections: Vec, Vec)>>, memory_start: u32, - code_gen: &mut dyn FnMut(Option<&str>, u32, SingleDataValue), + code_gen: &mut dyn FnMut(Option, u32, SingleDataValue), ) -> BTreeMap { let mut writer = WordWriter::new(memory_start, code_gen); @@ -121,22 +127,22 @@ pub fn store_data_objects( positions }; - for (name, data) in sections.iter().flatten() { + for (name, data) in sections.into_iter().flatten() { if let Some(name) = name { writer.set_label(name); } for item in data { - match &item { + match item { DataValue::Zero(length) => { // We can assume memory to be zero-initialized, so we // just have to advance. - writer.advance(*length as u32); + writer.advance(length as u32); } DataValue::Direct(bytes) => { writer.write_bytes(bytes.iter().copied()); } DataValue::Reference(sym) => { - if let Some(p) = positions.get(sym) { + if let Some(p) = positions.get(&sym) { writer.write_bytes(p.to_le_bytes().iter().copied()); } else { // code reference @@ -144,7 +150,7 @@ pub fn store_data_objects( } } DataValue::Alignment(bytes, pad_value) => { - writer.align(*bytes as u32, *pad_value); + writer.align(bytes as u32, pad_value); } DataValue::Offset(_l, _r) => unimplemented!(), } diff --git a/riscv/benches/executor_benchmark.rs b/riscv/benches/executor_benchmark.rs index 0d79343c9..777865748 100644 --- a/riscv/benches/executor_benchmark.rs +++ b/riscv/benches/executor_benchmark.rs @@ -2,7 +2,7 @@ use ::powdr_pipeline::Pipeline; use powdr_number::GoldilocksField; use powdr_riscv::{ - compile_rust_crate_to_riscv_asm, compiler, continuations::bootloader::default_input, Runtime, + asm, compile_rust_crate_to_riscv_asm, continuations::bootloader::default_input, Runtime, }; use criterion::{criterion_group, criterion_main, Criterion}; @@ -18,7 +18,7 @@ fn executor_benchmark(c: &mut Criterion) { let tmp_dir = Temp::new_dir().unwrap(); let riscv_asm_files = compile_rust_crate_to_riscv_asm("./tests/riscv_data/keccak/Cargo.toml", &tmp_dir); - let contents = compiler::compile::(riscv_asm_files, &Runtime::base(), false); + let contents = asm::compile::(riscv_asm_files, &Runtime::base(), false); let mut pipeline = Pipeline::::default().from_asm_string(contents, None); pipeline.compute_optimized_pil().unwrap(); pipeline.compute_fixed_cols().unwrap(); @@ -30,7 +30,7 @@ fn executor_benchmark(c: &mut Criterion) { // The first chunk of `many_chunks`, with Poseidon co-processor & bootloader let riscv_asm_files = compile_rust_crate_to_riscv_asm("./tests/riscv_data/many_chunks/Cargo.toml", &tmp_dir); - let contents = compiler::compile::(riscv_asm_files, &Runtime::base().with_poseidon(), true); + let contents = asm::compile::(riscv_asm_files, &Runtime::base().with_poseidon(), true); let mut pipeline = Pipeline::::default().from_asm_string(contents, None); pipeline.compute_optimized_pil().unwrap(); pipeline.compute_fixed_cols().unwrap(); diff --git a/riscv/src/disambiguator.rs b/riscv/src/asm/disambiguator.rs similarity index 99% rename from riscv/src/disambiguator.rs rename to riscv/src/asm/disambiguator.rs index b2899be5c..db33bb162 100644 --- a/riscv/src/disambiguator.rs +++ b/riscv/src/asm/disambiguator.rs @@ -2,7 +2,7 @@ use std::collections::{HashMap, HashSet}; use itertools::Itertools; -use crate::{Argument, Expression, Statement}; +use super::{Argument, Expression, Statement}; /// Disambiguates the collection of assembly files and concatenates it to a single list of statements. /// Also disambiguates file ids (debugging information) and returns a list of all files with new IDs. diff --git a/riscv/src/asm/mod.rs b/riscv/src/asm/mod.rs new file mode 100644 index 000000000..6d165c839 --- /dev/null +++ b/riscv/src/asm/mod.rs @@ -0,0 +1,573 @@ +use std::collections::{BTreeMap, BTreeSet, HashSet}; + +use itertools::Itertools; +use parser::RiscParser; +use powdr_asm_utils::{ + ast::{BinaryOpKind, UnaryOpKind}, + data_parser, + data_storage::store_data_objects, + parser::parse_asm, + reachability::{self, symbols_in_args}, + utils::{argument_to_escaped_symbol, argument_to_number, expression_to_number}, + Architecture, +}; +use powdr_number::FieldElement; + +use crate::{ + code_gen::{ + self, FunctionKind, InstructionArgs, MemEntry, Register, RiscVProgram, SourceFileInfo, + }, + Runtime, +}; + +mod disambiguator; +mod parser; + +type Statement = powdr_asm_utils::ast::Statement; +type Argument = powdr_asm_utils::ast::Argument; +type Expression = powdr_asm_utils::ast::Expression; + +struct AsmProgram { + file_ids: Vec<(i64, String, String)>, + mem_entries: Option>, + statements: Vec, +} + +const START_FUNCTION: &str = "__stack_setup"; + +impl RiscVProgram for AsmProgram { + type Args = [Argument]; + + fn take_source_files_info(&mut self) -> impl Iterator { + self.file_ids.iter().map(|(id, dir, file)| SourceFileInfo { + id: *id as u32, + dir, + file, + }) + } + + fn take_initial_mem(&mut self) -> impl Iterator { + std::mem::take(&mut self.mem_entries).unwrap().into_iter() + } + + fn take_executable_statements( + &mut self, + ) -> impl Iterator> { + self.statements.iter().filter_map(process_statement) + } + + fn start_function(&self) -> &str { + START_FUNCTION + } +} + +impl InstructionArgs for [Argument] { + type Error = &'static str; + + fn l(&self) -> Result { + const ERR: &str = "Expected: label"; + match self { + [l] => Ok(argument_to_escaped_symbol(l).ok_or(ERR)?), + _ => Err(ERR), + } + } + + fn r(&self) -> Result { + match self { + [Argument::Register(r1)] => Ok(*r1), + _ => Err("Expected: register"), + } + } + + fn rri(&self) -> Result<(Register, Register, u32), &'static str> { + const ERR: &str = "Expected: register, register, immediate"; + match self { + [Argument::Register(r1), Argument::Register(r2), n] => { + Ok((*r1, *r2, argument_to_number(n).ok_or(ERR)?)) + } + _ => Err(ERR), + } + } + + fn rrr(&self) -> Result<(Register, Register, Register), &'static str> { + match self { + [Argument::Register(r1), Argument::Register(r2), Argument::Register(r3)] => { + Ok((*r1, *r2, *r3)) + } + _ => Err("Expected: register, register, register"), + } + } + + fn ri(&self) -> Result<(Register, u32), &'static str> { + const ERR: &str = "Expected: register, immediate"; + match self { + [Argument::Register(r1), n] => Ok((*r1, argument_to_number(n).ok_or(ERR)?)), + _ => Err(ERR), + } + } + + fn rr(&self) -> Result<(Register, Register), &'static str> { + match self { + [Argument::Register(r1), Argument::Register(r2)] => Ok((*r1, *r2)), + _ => Err("Expected: register, register"), + } + } + + fn rrl(&self) -> Result<(Register, Register, String), &'static str> { + const ERR: &str = "Expected: register, register, label"; + match self { + [Argument::Register(r1), Argument::Register(r2), l] => { + Ok((*r1, *r2, argument_to_escaped_symbol(l).ok_or(ERR)?)) + } + _ => Err(ERR), + } + } + + fn rl(&self) -> Result<(Register, String), &'static str> { + const ERR: &str = "Expected: register, label"; + match self { + [Argument::Register(r1), l] => Ok((*r1, argument_to_escaped_symbol(l).ok_or(ERR)?)), + _ => Err(ERR), + } + } + + fn rro(&self) -> Result<(Register, Register, u32), &'static str> { + if let [Argument::Register(r1), Argument::RegOffset(off, r2)] = self { + if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) + { + return Ok((*r1, *r2, off)); + } + } + if let [Argument::Register(r1), Argument::Expression(off)] = self { + if let Some(off) = expression_to_number(off) { + // If the register is not specified, it defaults to x0 + return Ok((*r1, Register::new(0), off)); + } + } + + Err("Expected: register, offset(register)") + } + + fn rrro(&self) -> Result<(Register, Register, Register, u32), &'static str> { + if let [Argument::Register(r1), Argument::Register(r2), Argument::RegOffset(off, r3)] = self + { + if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) + { + return Ok((*r1, *r2, *r3, off)); + } + } + if let [Argument::Register(r1), Argument::Register(r2), Argument::Expression(off)] = self { + if let Some(off) = expression_to_number(off) { + // If the register is not specified, it defaults to x0 + return Ok((*r1, *r2, Register::new(0), off)); + } + } + Err("Expected: register, register, offset(register)") + } + + fn empty(&self) -> Result<(), &'static str> { + match self { + [] => Ok(()), + _ => Err("Expected: no arguments"), + } + } +} + +/// Compiles riscv assembly to a powdr assembly file. Adds required library routines. +pub fn compile( + assemblies: BTreeMap, + runtime: &Runtime, + with_bootloader: bool, +) -> String { + let asm_program = compile_internal(assemblies); + + code_gen::translate_program::(asm_program, runtime, with_bootloader) +} + +fn compile_internal(mut assemblies: BTreeMap) -> AsmProgram { + // stack grows towards zero + let stack_start = 0x10000000; + // data grows away from zero + let data_start = 0x10000100; + + assert!(assemblies + .insert("__runtime".to_string(), global_declarations(stack_start)) + .is_none()); + + // TODO remove unreferenced files. + let (mut statements, file_ids) = disambiguator::disambiguate( + assemblies + .into_iter() + .map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents))) + .collect(), + ); + let mut data_sections = data_parser::extract_data_objects(&statements); + + // Reduce to the code that is actually reachable from main + // (and the objects that are referred from there) + let data_labels = reachability::filter_reachable_from::<_, _, RiscvArchitecture>( + START_FUNCTION, + &mut statements, + &mut data_sections, + ); + + // Replace dynamic references to code labels + replace_dynamic_label_references(&mut statements, &data_labels); + + let mut mem_entries = Vec::new(); + let data_positions = + store_data_objects(data_sections, data_start, &mut |label, addr, value| { + mem_entries.push(MemEntry { label, addr, value }); + }); + + let statements = substitute_symbols_with_values(statements, &data_positions); + + AsmProgram { + file_ids, + mem_entries: Some(mem_entries), + statements, + } +} + +/// Replace certain patterns of references to code labels by +/// special instructions. We ignore any references to data objects +/// because they will be handled differently. +fn replace_dynamic_label_references(statements: &mut Vec, data_labels: &HashSet<&str>) { + /* + Find patterns of the form + lui a0, %hi(LABEL) + addi s10, a0, %lo(LABEL) + - + turn this into the pseudoinstruction + li s10, LABEL + which is then turned into + + s10 <== load_label(LABEL) + + It gets complicated by the fact that sometimes, labels + and debugging directives occur between the two statements + matching that pattern... + */ + let instruction_indices = statements + .iter() + .enumerate() + .filter_map(|(i, s)| match s { + Statement::Instruction(_, _) => Some(i), + _ => None, + }) + .collect::>(); + + let mut to_delete = BTreeSet::default(); + for (i1, i2) in instruction_indices.into_iter().tuple_windows() { + if let Some(r) = + replace_dynamic_label_reference(&statements[i1], &statements[i2], data_labels) + { + to_delete.insert(i1); + statements[i2] = r; + } + } + + let mut i = 0; + statements.retain(|_| (!to_delete.contains(&i), i += 1).0); +} + +fn replace_dynamic_label_reference( + s1: &Statement, + s2: &Statement, + data_labels: &HashSet<&str>, +) -> Option { + let Statement::Instruction(instr1, args1) = s1 else { + return None; + }; + let Statement::Instruction(instr2, args2) = s2 else { + return None; + }; + if instr1.as_str() != "lui" || instr2.as_str() != "addi" { + return None; + }; + let [Argument::Register(r1), Argument::Expression(Expression::FunctionOp(FunctionKind::HiDataRef, expr1))] = + &args1[..] + else { + return None; + }; + // Maybe should try to reduce expr1 and expr2 before comparing deciding it is a pure symbol? + let Expression::Symbol(label1) = expr1.as_ref() else { + return None; + }; + let [Argument::Register(r2), Argument::Register(r3), Argument::Expression(Expression::FunctionOp(FunctionKind::LoDataRef, expr2))] = + &args2[..] + else { + return None; + }; + let Expression::Symbol(label2) = expr2.as_ref() else { + return None; + }; + if r1 != r3 || label1 != label2 || data_labels.contains(label1.as_str()) { + return None; + } + Some(Statement::Instruction( + "li".to_string(), + vec![ + Argument::Register(*r2), + Argument::Expression(Expression::Symbol(label1.clone())), + ], + )) +} + +fn substitute_symbols_with_values( + mut statements: Vec, + data_positions: &BTreeMap, +) -> Vec { + for s in &mut statements { + let Statement::Instruction(_name, args) = s else { + continue; + }; + for arg in args { + arg.post_visit_expressions_mut(&mut |expression| match expression { + Expression::Number(_) => {} + Expression::Symbol(symb) => { + if let Some(pos) = data_positions.get(symb) { + *expression = Expression::Number(*pos as i64) + } + } + Expression::UnaryOp(op, subexpr) => { + if let Expression::Number(num) = subexpr.as_ref() { + let result = match op { + UnaryOpKind::BitwiseNot => !num, + UnaryOpKind::Negation => -num, + }; + *expression = Expression::Number(result); + }; + } + Expression::BinaryOp(op, subexprs) => { + if let (Expression::Number(a), Expression::Number(b)) = + (&subexprs[0], &subexprs[1]) + { + let result = match op { + BinaryOpKind::Or => a | b, + BinaryOpKind::Xor => a ^ b, + BinaryOpKind::And => a & b, + BinaryOpKind::LeftShift => a << b, + BinaryOpKind::RightShift => a >> b, + BinaryOpKind::Add => a + b, + BinaryOpKind::Sub => a - b, + BinaryOpKind::Mul => a * b, + BinaryOpKind::Div => a / b, + BinaryOpKind::Mod => a % b, + }; + *expression = Expression::Number(result); + } + } + Expression::FunctionOp(op, subexpr) => { + if let Expression::Number(num) = subexpr.as_ref() { + let result = match op { + FunctionKind::HiDataRef => num >> 12, + FunctionKind::LoDataRef => num & 0xfff, + }; + *expression = Expression::Number(result); + }; + } + }); + } + } + statements +} + +fn process_statement(s: &Statement) -> Option> { + match s { + Statement::Label(l) => Some(code_gen::Statement::Label(l)), + Statement::Directive(directive, args) => match (directive.as_str(), &args[..]) { + ( + ".loc", + [Argument::Expression(Expression::Number(file)), Argument::Expression(Expression::Number(line)), Argument::Expression(Expression::Number(column)), ..], + ) => Some(code_gen::Statement::DebugLoc { + file: *file as u64, + line: *line as u64, + col: *column as u64, + }), + (".file", _) => { + // We ignore ".file" directives because they have been extracted to the top. + None + } + (".size", _) => { + // We ignore ".size" directives + None + } + _ if directive.starts_with(".cfi_") => None, + _ => panic!( + "Leftover directive in code: {directive} {}", + args.iter().format(", ") + ), + }, + Statement::Instruction(instr, args) => { + // TODO: maybe restore this debug info + /* + let stmt_str = format!("{s}"); + // remove indentation and trailing newline + let stmt_str = &stmt_str[2..(stmt_str.len() - 1)]; + let mut ret = vec![format!(" .debug insn \"{stmt_str}\";")]; + */ + Some(code_gen::Statement::Instruction { + op: instr, + args: args.as_slice(), + }) + } + } +} + +struct RiscvArchitecture {} + +impl Architecture for RiscvArchitecture { + fn instruction_ends_control_flow(instr: &str) -> bool { + match instr { + "li" | "lui" | "la" | "mv" | "add" | "addi" | "sub" | "neg" | "mul" | "mulh" + | "mulhu" | "mulhsu" | "divu" | "remu" | "xor" | "xori" | "and" | "andi" | "or" + | "ori" | "not" | "slli" | "sll" | "srli" | "srl" | "srai" | "seqz" | "snez" + | "slt" | "slti" | "sltu" | "sltiu" | "sgtz" | "beq" | "beqz" | "bgeu" | "bltu" + | "blt" | "bge" | "bltz" | "blez" | "bgtz" | "bgez" | "bne" | "bnez" | "jal" + | "jalr" | "call" | "ecall" | "ebreak" | "lw" | "lb" | "lbu" | "lh" | "lhu" | "sw" + | "sh" | "sb" | "nop" | "fence" | "amoadd.w" | "amoadd.w.aq" | "amoadd.w.rl" + | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" | "sc.w" + | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, + "j" | "jr" | "tail" | "ret" | "unimp" => true, + _ => { + panic!("Unknown instruction: {instr}"); + } + } + } + + fn get_references< + 'a, + R: powdr_asm_utils::ast::Register, + F: powdr_asm_utils::ast::FunctionOpKind, + >( + instr: &str, + args: &'a [powdr_asm_utils::ast::Argument], + ) -> Vec<&'a str> { + // fence arguments are not symbols, they are like reserved + // keywords affecting the instruction behavior + if instr.starts_with("fence") { + Vec::new() + } else { + symbols_in_args(args) + } + } +} + +/// Maps an instruction in .insn syntax to Statement::Instruction() in the expected format. +/// +/// See https://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dFormats.html +pub fn map_insn_i( + opcode6: Expression, + func3: Expression, + rd: Register, + rs1: Register, + simm12: Expression, +) -> Statement { + let (Expression::Number(opcode6), Expression::Number(func3)) = (opcode6, func3) else { + panic!("Only literal opcode and function are supported in .insn syntax"); + }; + + // These are almost all instructions in RISC-V Instruction Set Manual that + // we are supposed to implement and roughly fits the pattern of the I-type + // instruction. Only "csr*i" instructions are missing. + + // First we try to match the instructions that uses the I-type encoding + // ordinarily, i.e. where all fields are what they are supposed to be: + let name = match (opcode6, func3) { + (0b1100111, 0b000) => "jalr", + (0b0000011, 0b000) => "lb", + (0b0000011, 0b001) => "lh", + (0b0000011, 0b010) => "lw", + (0b0000011, 0b100) => "lbu", + (0b0000011, 0b101) => "lhu", + (0b0010011, 0b000) => "addi", + (0b0010011, 0b010) => "slti", + (0b0010011, 0b011) => "sltiu", + (0b0010011, 0b100) => "xori", + (0b0010011, 0b110) => "ori", + (0b0010011, 0b111) => "andi", + (0b1110011, 0b001) => "csrrw", + (0b1110011, 0b010) => "csrrs", + (0b1110011, 0b011) => "csrrc", + // won't interpret "csr*i" instructions because it is too weird to + // encode an immediate as a register + opfunc => { + // We now try the instructions that take certain liberties with the + // I-type encoding, and don't use the standard arguments for it. + let name = match opfunc { + (0b0001111, 0b000) => "fence", + (0b0001111, 0b001) => "fence.i", + (0b1110011, 0b000) => { + let Expression::Number(simm12) = simm12 else { + panic!( + "Only literal simm12 is supported for ecall and ebreak instructions" + ); + }; + match simm12 { + 0 => "ecall", + 1 => "ebreak", + _ => panic!("unknown instruction"), + } + } + _ => panic!("unsupported .insn instruction"), + }; + return Statement::Instruction(name.to_string(), Vec::new()); + } + }; + + let args = vec![ + Argument::Register(rd), + Argument::Register(rs1), + Argument::Expression(simm12), + ]; + + Statement::Instruction(name.to_string(), args) +} + +fn global_declarations(stack_start: u32) -> String { + [ + "__divdi3", + "__udivdi3", + "__udivti3", + "__divdf3", + "__muldf3", + "__moddi3", + "__umoddi3", + "__umodti3", + "__eqdf2", + "__ltdf2", + "__nedf2", + "__unorddf2", + "__floatundidf", + "__extendsfdf2", + "memcpy", + "memmove", + "memset", + "memcmp", + "bcmp", + "strlen", + ] + .map(|n| format!(".globl {n}@plt\n.globl {n}\n.set {n}@plt, {n}\n")) + .join("\n\n") + + &[("__rust_alloc_error_handler", "__rg_oom")] + .map(|(n, m)| format!(".globl {n}\n.set {n}, {m}\n")) + .join("\n\n") + + + // some extra symbols expected by rust code: + // - __rust_no_alloc_shim_is_unstable: compilation time acknowledgment that this feature is unstable. + // - __rust_alloc_error_handler_should_panic: needed by the default alloc error handler, + // not sure why it's not present in the asm. + // https://github.com/rust-lang/rust/blob/ae9d7b0c6434b27e4e2effe8f05b16d37e7ef33f/library/alloc/src/alloc.rs#L415 + &format!(r".data +.globl __rust_alloc_error_handler_should_panic +__rust_alloc_error_handler_should_panic: .byte 0 +.globl __rust_no_alloc_shim_is_unstable +__rust_no_alloc_shim_is_unstable: .byte 0 +.text +.globl __stack_setup +__stack_setup: +li sp, {stack_start} +tail __runtime_start +") +} diff --git a/riscv/src/parser.rs b/riscv/src/asm/parser.rs similarity index 88% rename from riscv/src/parser.rs rename to riscv/src/asm/parser.rs index 88dd5f162..0ca41b6f4 100644 --- a/riscv/src/parser.rs +++ b/riscv/src/asm/parser.rs @@ -1,16 +1,15 @@ use lalrpop_util::*; -use crate::{ - compiler::{FunctionKind, Register}, - Statement, -}; +use crate::code_gen::{FunctionKind, Register}; use powdr_parser_util::handle_parse_error; +use super::Statement; + lalrpop_mod!( #[allow(clippy::all)] #[allow(clippy::uninlined_format_args)] riscv_asm, - "/riscv_asm.rs" + "/asm/riscv_asm.rs" ); pub struct RiscParser { diff --git a/riscv/src/riscv_asm.lalrpop b/riscv/src/asm/riscv_asm.lalrpop similarity index 98% rename from riscv/src/riscv_asm.lalrpop rename to riscv/src/asm/riscv_asm.lalrpop index f876f30bb..50a8f9896 100644 --- a/riscv/src/riscv_asm.lalrpop +++ b/riscv/src/asm/riscv_asm.lalrpop @@ -13,7 +13,7 @@ use std::str::FromStr; use powdr_asm_utils::ast::{unescape_string, BinaryOpKind as BOp, UnaryOpKind as UOp, new_binary_op as bin_op, new_unary_op as un_op, new_function_op as fn_op}; -use crate::{Argument, Register, Statement, FunctionKind as FOp, Expression, map_insn_i}; +use super::super::{Argument, Register, Statement, FunctionKind as FOp, Expression, map_insn_i}; grammar; diff --git a/riscv/src/compiler.rs b/riscv/src/code_gen.rs similarity index 69% rename from riscv/src/compiler.rs rename to riscv/src/code_gen.rs index 9e9ec4a2b..b77d2d9bb 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/code_gen.rs @@ -1,27 +1,12 @@ -use std::{ - collections::{BTreeMap, BTreeSet, HashSet}, - fmt, -}; +use std::fmt; use itertools::Itertools; -use powdr_asm_utils::{ - ast::{BinaryOpKind, UnaryOpKind}, - data_parser, - data_storage::{store_data_objects, SingleDataValue}, - parser::parse_asm, - reachability::{self, symbols_in_args}, - utils::{ - argument_to_escaped_symbol, argument_to_number, escape_label, expression_to_number, quote, - }, - Architecture, -}; +use powdr_asm_utils::data_storage::SingleDataValue; +use powdr_asm_utils::utils::{escape_label, quote}; use powdr_number::{FieldElement, KnownField}; use crate::continuations::bootloader::{bootloader_and_shutdown_routine, bootloader_preamble}; -use crate::disambiguator; -use crate::parser::RiscParser; use crate::runtime::Runtime; -use crate::{Argument, Expression, Statement}; #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub struct Register { @@ -63,135 +48,124 @@ impl fmt::Display for FunctionKind { } } -struct RiscvArchitecture {} - -impl Architecture for RiscvArchitecture { - fn instruction_ends_control_flow(instr: &str) -> bool { - match instr { - "li" | "lui" | "la" | "mv" | "add" | "addi" | "sub" | "neg" | "mul" | "mulh" - | "mulhu" | "mulhsu" | "divu" | "remu" | "xor" | "xori" | "and" | "andi" | "or" - | "ori" | "not" | "slli" | "sll" | "srli" | "srl" | "srai" | "seqz" | "snez" - | "slt" | "slti" | "sltu" | "sltiu" | "sgtz" | "beq" | "beqz" | "bgeu" | "bltu" - | "blt" | "bge" | "bltz" | "blez" | "bgtz" | "bgez" | "bne" | "bnez" | "jal" - | "jalr" | "call" | "ecall" | "ebreak" | "lw" | "lb" | "lbu" | "lh" | "lhu" | "sw" - | "sh" | "sb" | "nop" | "fence" | "fence.i" | "amoadd.w" | "amoadd.w.aq" - | "amoadd.w.rl" | "amoadd.w.aqrl" | "lr.w" | "lr.w.aq" | "lr.w.rl" | "lr.w.aqrl" - | "sc.w" | "sc.w.aq" | "sc.w.rl" | "sc.w.aqrl" => false, - "j" | "jr" | "tail" | "ret" | "unimp" => true, - _ => { - panic!("Unknown instruction: {instr}"); - } - } - } - - fn get_references< - 'a, - R: powdr_asm_utils::ast::Register, - F: powdr_asm_utils::ast::FunctionOpKind, - >( - instr: &str, - args: &'a [powdr_asm_utils::ast::Argument], - ) -> Vec<&'a str> { - // fence arguments are not symbols, they are like reserved - // keywords affecting the instruction behavior - if instr.starts_with("fence") { - Vec::new() - } else { - symbols_in_args(args) - } - } +pub enum Statement<'a, L: AsRef + 'a, A: InstructionArgs + ?Sized + 'a> { + DebugLoc { file: u64, line: u64, col: u64 }, + Label(L), + Instruction { op: &'a str, args: &'a A }, } -/// Compiles riscv assembly to a powdr assembly file. Adds required library routines. -pub fn compile( - mut assemblies: BTreeMap, +pub struct MemEntry { + pub label: Option, + pub addr: u32, + pub value: SingleDataValue, +} + +pub struct SourceFileInfo<'a> { + pub id: u32, + pub dir: &'a str, + pub file: &'a str, +} + +/// A RISC-V program that can be translated to POWDR ASM. +pub trait RiscVProgram { + type Args: InstructionArgs + ?Sized; + + /// Takes the listing of source files, to be used in the debug statements. + fn take_source_files_info(&mut self) -> impl Iterator; + + /// Takes the initial memory snapshot. + fn take_initial_mem(&mut self) -> impl Iterator; + + /// Takes the executable statements and labels. + fn take_executable_statements( + &mut self, + ) -> impl Iterator, Self::Args>>; + + /// The name of the function that should be called to start the program. + fn start_function(&self) -> &str; +} + +/// Translates a RISC-V program to POWDR ASM. +/// +/// Will call each of the methods in the `RiscVProgram` just once. +pub fn translate_program( + program: impl RiscVProgram, runtime: &Runtime, with_bootloader: bool, ) -> String { - // stack grows towards zero - let stack_start = 0x10000; - // data grows away from zero - let data_start = 0x10100; + // Do this in a separate function to avoid most of the code being generic on F. + let (initial_mem, instructions, degree) = + translate_program_impl(program, runtime, with_bootloader); - assert!(assemblies - .insert("__runtime".to_string(), runtime.global_declarations()) - .is_none()); - - // TODO remove unreferenced files. - let (mut statements, file_ids) = disambiguator::disambiguate( - assemblies - .into_iter() - .map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents))) - .collect(), - ); - let mut data_sections = data_parser::extract_data_objects(&statements); - - // Reduce to the code that is actually reachable from main - // (and the objects that are referred from there) - let data_labels = reachability::filter_reachable_from::<_, _, RiscvArchitecture>( - "__runtime_start", - &mut statements, - &mut data_sections, - ); - - // Replace dynamic references to code labels - replace_dynamic_label_references(&mut statements, &data_labels); + riscv_machine( + runtime, + degree, + &preamble::(runtime, with_bootloader), + initial_mem, + instructions, + ) +} +fn translate_program_impl( + mut program: impl RiscVProgram, + runtime: &Runtime, + with_bootloader: bool, +) -> (Vec, Vec, u64) { let mut initial_mem = Vec::new(); let mut data_code = Vec::new(); - let data_positions = - store_data_objects(data_sections, data_start, &mut |label, addr, value| { - if let Some(label) = label { - let comment = format!(" // data {label}"); - if with_bootloader && !matches!(value, SingleDataValue::LabelReference(_)) { - &mut initial_mem + for MemEntry { label, addr, value } in program.take_initial_mem() { + if let Some(label) = label { + // This is a comment, so we don't need to escape the label. + let comment = format!(" // data {label}"); + if with_bootloader && !matches!(value, SingleDataValue::LabelReference(_)) { + &mut initial_mem + } else { + &mut data_code + } + .push(comment); + } + match value { + SingleDataValue::Value(v) => { + if with_bootloader { + // Instead of generating the data loading code, we store it + // in the variable that will be used as the initial memory + // snapshot, committed by the bootloader. + initial_mem.push(format!("(0x{addr:x}, 0x{v:x})")); } else { - &mut data_code - } - .push(comment); - } - match value { - SingleDataValue::Value(v) => { - if with_bootloader { - // Instead of generating the data loading code, we store it - // in the variable that will be used as the initial memory - // snapshot, committed by the bootloader. - initial_mem.push(format!("(0x{addr:x}, 0x{v:x})")); - } else { - // There is no bootloader to commit to memory, so we have to - // load it explicitly. - data_code.push(format!("mstore 0x{addr:x}, 0x{v:x};")); - } - } - SingleDataValue::LabelReference(sym) => { - // The label value is not known at this point, so we have to - // load it via code, irrespectively of bootloader availability. - // - // TODO should be possible without temporary - data_code.extend([ - format!("tmp1 <== load_label({});", escape_label(sym)), - format!("mstore 0x{addr:x}, tmp1;"), - ]); - } - SingleDataValue::Offset(_, _) => { - unimplemented!(); - /* - object_code.push(format!("addr <=X= 0x{pos:x};")); - - I think this solution should be fine but hard to say without - an actual code snippet that uses it. - - // TODO should be possible without temporary - object_code.extend([ - format!("tmp1 <== load_label({});", escape_label(a)), - format!("tmp2 <== load_label({});", escape_label(b)), - // TODO check if registers match - "mstore wrap(tmp1 - tmp2);".to_string(), - ]); - */ + // There is no bootloader to commit to memory, so we have to + // load it explicitly. + data_code.push(format!("mstore 0x{addr:x}, 0x{v:x};")); } } - }); + SingleDataValue::LabelReference(sym) => { + // The label value is not known at this point, so we have to + // load it via code, irrespectively of bootloader availability. + // + // TODO should be possible without temporary + data_code.extend([ + format!("tmp1 <== load_label({});", escape_label(&sym)), + format!("mstore 0x{addr:x}, tmp1;"), + ]); + } + SingleDataValue::Offset(_, _) => { + unimplemented!(); + /* + object_code.push(format!("addr <=X= 0x{pos:x};")); + + I think this solution should be fine but hard to say without + an actual code snippet that uses it. + + // TODO should be possible without temporary + object_code.extend([ + format!("tmp1 <== load_label({});", escape_label(a)), + format!("tmp2 <== load_label({});", escape_label(b)), + // TODO check if registers match + "mstore wrap(tmp1 - tmp2);".to_string(), + ]); + */ + } + } + } let submachines_init = runtime.submachines_init(); let bootloader_and_shutdown_routine_lines = if with_bootloader { @@ -205,26 +179,42 @@ pub fn compile( submachines_init }; - let mut program: Vec = file_ids - .into_iter() - .map(|(id, dir, file)| format!(".debug file {id} {} {};", quote(&dir), quote(&file))) + let mut statements: Vec = program + .take_source_files_info() + .map( + |SourceFileInfo { + id, + dir, + file: name, + }| { format!(".debug file {id} {} {};", quote(dir), quote(name)) }, + ) .chain(bootloader_and_shutdown_routine_lines) .collect(); if !data_code.is_empty() { - program.push("x1 <== jump(__data_init);".to_string()); + statements.push("x1 <== jump(__data_init);".to_string()); } - program.extend([ - format!("// Set stack pointer\nx2 <=X= {stack_start};"), - "x1 <== jump(__runtime_start);".to_string(), + statements.extend([ + format!("x1 <== jump({});", program.start_function()), "return;".to_string(), // This is not "riscv ret", but "return from powdr asm function". ]); - program.extend( - substitute_symbols_with_values(statements, &data_positions) - .into_iter() - .flat_map(process_statement), - ); + for s in program.take_executable_statements() { + match s { + Statement::DebugLoc { file, line, col } => { + statements.push(format!(".debug loc {file} {line} {col};")) + } + Statement::Label(l) => statements.push(format!("{}:", escape_label(l.as_ref()))), + Statement::Instruction { op, args } => { + let processed_instr = match process_instruction(op, args) { + Ok(s) => s, + Err(e) => panic!("Failed to process instruction '{op}'. {e}"), + }; + statements.extend(processed_instr.into_iter().map(|s| " ".to_string() + &s)) + } + } + } + if !data_code.is_empty() { - program.extend( + statements.extend( ["// This is the data initialization routine.\n__data_init:".to_string()].into_iter() .chain(data_code) .chain([ @@ -232,10 +222,10 @@ pub fn compile( .to_string(), ])); } - program.extend(runtime.ecall_handler()); + statements.extend(runtime.ecall_handler()); // The program ROM needs to fit the degree, so we use the next power of 2. - let degree = program.len().ilog2() + 1; + let degree = statements.len().ilog2() + 1; let degree = std::cmp::max(degree, 18); log::info!("Inferred degree 2^{degree}"); @@ -250,157 +240,7 @@ pub fn compile( assert!((18..=20).contains(°ree)); let degree = 1 << degree; - riscv_machine( - runtime, - degree, - &preamble::(runtime, with_bootloader), - initial_mem, - program, - ) -} - -/// Replace certain patterns of references to code labels by -/// special instructions. We ignore any references to data objects -/// because they will be handled differently. -fn replace_dynamic_label_references(statements: &mut Vec, data_labels: &HashSet<&str>) { - /* - Find patterns of the form - lui a0, %hi(LABEL) - addi s10, a0, %lo(LABEL) - - - turn this into the pseudoinstruction - li s10, LABEL - which is then turned into - - s10 <== load_label(LABEL) - - It gets complicated by the fact that sometimes, labels - and debugging directives occur between the two statements - matching that pattern... - */ - let instruction_indices = statements - .iter() - .enumerate() - .filter_map(|(i, s)| match s { - Statement::Instruction(_, _) => Some(i), - _ => None, - }) - .collect::>(); - - let mut to_delete = BTreeSet::default(); - for (i1, i2) in instruction_indices.into_iter().tuple_windows() { - if let Some(r) = - replace_dynamic_label_reference(&statements[i1], &statements[i2], data_labels) - { - to_delete.insert(i1); - statements[i2] = r; - } - } - - let mut i = 0; - statements.retain(|_| (!to_delete.contains(&i), i += 1).0); -} - -fn replace_dynamic_label_reference( - s1: &Statement, - s2: &Statement, - data_labels: &HashSet<&str>, -) -> Option { - let Statement::Instruction(instr1, args1) = s1 else { - return None; - }; - let Statement::Instruction(instr2, args2) = s2 else { - return None; - }; - if instr1.as_str() != "lui" || instr2.as_str() != "addi" { - return None; - }; - let [Argument::Register(r1), Argument::Expression(Expression::FunctionOp(FunctionKind::HiDataRef, expr1))] = - &args1[..] - else { - return None; - }; - // Maybe should try to reduce expr1 and expr2 before comparing deciding it is a pure symbol? - let Expression::Symbol(label1) = expr1.as_ref() else { - return None; - }; - let [Argument::Register(r2), Argument::Register(r3), Argument::Expression(Expression::FunctionOp(FunctionKind::LoDataRef, expr2))] = - &args2[..] - else { - return None; - }; - let Expression::Symbol(label2) = expr2.as_ref() else { - return None; - }; - if r1 != r3 || label1 != label2 || data_labels.contains(label1.as_str()) { - return None; - } - Some(Statement::Instruction( - "li".to_string(), - vec![ - Argument::Register(*r2), - Argument::Expression(Expression::Symbol(label1.clone())), - ], - )) -} - -fn substitute_symbols_with_values( - mut statements: Vec, - data_positions: &BTreeMap, -) -> Vec { - for s in &mut statements { - let Statement::Instruction(_name, args) = s else { - continue; - }; - for arg in args { - arg.post_visit_expressions_mut(&mut |expression| match expression { - Expression::Number(_) => {} - Expression::Symbol(symb) => { - if let Some(pos) = data_positions.get(symb) { - *expression = Expression::Number(*pos as i64) - } - } - Expression::UnaryOp(op, subexpr) => { - if let Expression::Number(num) = subexpr.as_ref() { - let result = match op { - UnaryOpKind::BitwiseNot => !num, - UnaryOpKind::Negation => -num, - }; - *expression = Expression::Number(result); - }; - } - Expression::BinaryOp(op, subexprs) => { - if let (Expression::Number(a), Expression::Number(b)) = - (&subexprs[0], &subexprs[1]) - { - let result = match op { - BinaryOpKind::Or => a | b, - BinaryOpKind::Xor => a ^ b, - BinaryOpKind::And => a & b, - BinaryOpKind::LeftShift => a << b, - BinaryOpKind::RightShift => a >> b, - BinaryOpKind::Add => a + b, - BinaryOpKind::Sub => a - b, - BinaryOpKind::Mul => a * b, - BinaryOpKind::Div => a / b, - BinaryOpKind::Mod => a % b, - }; - *expression = Expression::Number(result); - } - } - Expression::FunctionOp(op, subexpr) => { - if let Expression::Number(num) = subexpr.as_ref() { - let result = match op { - FunctionKind::HiDataRef => num >> 12, - FunctionKind::LoDataRef => num & 0xfff, - }; - *expression = Expression::Number(result); - }; - } - }); - } - } - statements + (initial_mem, statements, degree) } fn riscv_machine( @@ -812,47 +652,8 @@ fn memory(with_bootloader: bool) -> String { "# } -fn process_statement(s: Statement) -> Vec { - match &s { - Statement::Label(l) => vec![format!("{}:", escape_label(l))], - Statement::Directive(directive, args) => match (directive.as_str(), &args[..]) { - ( - ".loc", - [Argument::Expression(Expression::Number(file)), Argument::Expression(Expression::Number(line)), Argument::Expression(Expression::Number(column)), ..], - ) => { - vec![format!(" .debug loc {file} {line} {column};")] - } - (".file", _) => { - // We ignore ".file" directives because they have been extracted to the top. - vec![] - } - (".size", _) => { - // We ignore ".size" directives - vec![] - } - _ if directive.starts_with(".cfi_") => vec![], - _ => panic!( - "Leftover directive in code: {directive} {}", - args.iter().format(", ") - ), - }, - Statement::Instruction(instr, args) => { - let stmt_str = format!("{s}"); - // remove indentation and trailing newline - let stmt_str = &stmt_str[2..(stmt_str.len() - 1)]; - let mut ret = vec![format!(" .debug insn \"{stmt_str}\";")]; - let processed_instr = match process_instruction(instr, &args[..]) { - Ok(s) => s, - Err(e) => panic!("Failed to process instruction '{instr}'. {e}"), - }; - ret.extend(processed_instr.into_iter().map(|s| " ".to_string() + &s)); - ret - } - } -} - -trait Args { - type Error; +pub trait InstructionArgs { + type Error: fmt::Display; fn l(&self) -> Result; fn r(&self) -> Result; @@ -867,118 +668,6 @@ trait Args { fn empty(&self) -> Result<(), Self::Error>; } -impl Args for [Argument] { - type Error = &'static str; - - fn l(&self) -> Result { - const ERR: &str = "Expected: label"; - match self { - [l] => Ok(argument_to_escaped_symbol(l).ok_or(ERR)?), - _ => Err(ERR), - } - } - - fn r(&self) -> Result { - match self { - [Argument::Register(r1)] => Ok(*r1), - _ => Err("Expected: register"), - } - } - - fn rri(&self) -> Result<(Register, Register, u32), &'static str> { - const ERR: &str = "Expected: register, register, immediate"; - match self { - [Argument::Register(r1), Argument::Register(r2), n] => { - Ok((*r1, *r2, argument_to_number(n).ok_or(ERR)?)) - } - _ => Err(ERR), - } - } - - fn rrr(&self) -> Result<(Register, Register, Register), &'static str> { - match self { - [Argument::Register(r1), Argument::Register(r2), Argument::Register(r3)] => { - Ok((*r1, *r2, *r3)) - } - _ => Err("Expected: register, register, register"), - } - } - - fn ri(&self) -> Result<(Register, u32), &'static str> { - const ERR: &str = "Expected: register, immediate"; - match self { - [Argument::Register(r1), n] => Ok((*r1, argument_to_number(n).ok_or(ERR)?)), - _ => Err(ERR), - } - } - - fn rr(&self) -> Result<(Register, Register), &'static str> { - match self { - [Argument::Register(r1), Argument::Register(r2)] => Ok((*r1, *r2)), - _ => Err("Expected: register, register"), - } - } - - fn rrl(&self) -> Result<(Register, Register, String), &'static str> { - const ERR: &str = "Expected: register, register, label"; - match self { - [Argument::Register(r1), Argument::Register(r2), l] => { - Ok((*r1, *r2, argument_to_escaped_symbol(l).ok_or(ERR)?)) - } - _ => Err(ERR), - } - } - - fn rl(&self) -> Result<(Register, String), &'static str> { - const ERR: &str = "Expected: register, label"; - match self { - [Argument::Register(r1), l] => Ok((*r1, argument_to_escaped_symbol(l).ok_or(ERR)?)), - _ => Err(ERR), - } - } - - fn rro(&self) -> Result<(Register, Register, u32), &'static str> { - if let [Argument::Register(r1), Argument::RegOffset(off, r2)] = self { - if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) - { - return Ok((*r1, *r2, off)); - } - } - if let [Argument::Register(r1), Argument::Expression(off)] = self { - if let Some(off) = expression_to_number(off) { - // If the register is not specified, it defaults to x0 - return Ok((*r1, Register::new(0), off)); - } - } - - Err("Expected: register, offset(register)") - } - - fn rrro(&self) -> Result<(Register, Register, Register, u32), &'static str> { - if let [Argument::Register(r1), Argument::Register(r2), Argument::RegOffset(off, r3)] = self - { - if let Some(off) = expression_to_number(off.as_ref().unwrap_or(&Expression::Number(0))) - { - return Ok((*r1, *r2, *r3, off)); - } - } - if let [Argument::Register(r1), Argument::Register(r2), Argument::Expression(off)] = self { - if let Some(off) = expression_to_number(off) { - // If the register is not specified, it defaults to x0 - return Ok((*r1, *r2, Register::new(0), off)); - } - } - Err("Expected: register, register, offset(register)") - } - - fn empty(&self) -> Result<(), &'static str> { - match self { - [] => Ok(()), - _ => Err("Expected: no arguments"), - } - } -} - fn only_if_no_write_to_zero(statement: String, reg: Register) -> Vec { only_if_no_write_to_zero_vec(vec![statement], reg) } @@ -1007,7 +696,10 @@ pub fn pop_register(name: &str) -> [String; 2] { ] } -fn process_instruction(instr: &str, args: &A) -> Result, A::Error> { +fn process_instruction( + instr: &str, + args: &A, +) -> Result, A::Error> { Ok(match instr { // load/store registers "li" | "la" => { diff --git a/riscv/src/lib.rs b/riscv/src/lib.rs index 5bc61fb5e..18a3ad255 100644 --- a/riscv/src/lib.rs +++ b/riscv/src/lib.rs @@ -13,19 +13,13 @@ use powdr_number::FieldElement; use serde_json::Value as JsonValue; use std::fs; -use crate::compiler::{FunctionKind, Register}; pub use crate::runtime::Runtime; -pub mod compiler; +pub mod asm; +mod code_gen; pub mod continuations; -mod disambiguator; -pub mod parser; pub mod runtime; -type Statement = powdr_asm_utils::ast::Statement; -type Argument = powdr_asm_utils::ast::Argument; -type Expression = powdr_asm_utils::ast::Expression; - /// Compiles a rust file all the way down to PIL and generates /// fixed and witness columns. #[allow(clippy::print_stderr)] @@ -105,7 +99,7 @@ pub fn compile_riscv_asm_bundle( return None; } - let powdr_asm = compiler::compile::(riscv_asm_files, runtime, with_bootloader); + let powdr_asm = asm::compile::(riscv_asm_files, runtime, with_bootloader); fs::write(powdr_asm_file_name.clone(), &powdr_asm).unwrap(); log::info!("Wrote {}", powdr_asm_file_name.to_str().unwrap()); @@ -263,74 +257,3 @@ fn output_files_from_cargo_build_plan( assemblies } - -/// Maps an instruction in .insn syntax to Statement::Instruction() in the expected format. -/// -/// See https://www.rowleydownload.co.uk/arm/documentation/gnu/as/RISC_002dV_002dFormats.html -pub fn map_insn_i( - opcode6: Expression, - func3: Expression, - rd: Register, - rs1: Register, - simm12: Expression, -) -> Statement { - let (Expression::Number(opcode6), Expression::Number(func3)) = (opcode6, func3) else { - panic!("Only literal opcode and function are supported in .insn syntax"); - }; - - // These are almost all instructions in RISC-V Instruction Set Manual that - // we are supposed to implement and roughly fits the pattern of the I-type - // instruction. Only "csr*i" instructions are missing. - - // First we try to match the instructions that uses the I-type encoding - // ordinarily, i.e. where all fields are what they are supposed to be: - let name = match (opcode6, func3) { - (0b1100111, 0b000) => "jalr", - (0b0000011, 0b000) => "lb", - (0b0000011, 0b001) => "lh", - (0b0000011, 0b010) => "lw", - (0b0000011, 0b100) => "lbu", - (0b0000011, 0b101) => "lhu", - (0b0010011, 0b000) => "addi", - (0b0010011, 0b010) => "slti", - (0b0010011, 0b011) => "sltiu", - (0b0010011, 0b100) => "xori", - (0b0010011, 0b110) => "ori", - (0b0010011, 0b111) => "andi", - (0b1110011, 0b001) => "csrrw", - (0b1110011, 0b010) => "csrrs", - (0b1110011, 0b011) => "csrrc", - // won't interpret "csr*i" instructions because it is too weird to - // encode an immediate as a register - opfunc => { - // We now try the instructions that take certain liberties with the - // I-type encoding, and don't use the standard arguments for it. - let name = match opfunc { - (0b0001111, 0b000) => "fence", - (0b0001111, 0b001) => "fence.i", - (0b1110011, 0b000) => { - let Expression::Number(simm12) = simm12 else { - panic!( - "Only literal simm12 is supported for ecall and ebreak instructions" - ); - }; - match simm12 { - 0 => "ecall", - 1 => "ebreak", - _ => panic!("unknown instruction"), - } - } - _ => panic!("unsupported .insn instruction"), - }; - return Statement::Instruction(name.to_string(), Vec::new()); - } - }; - - let args = vec![ - Argument::Register(rd), - Argument::Register(rs1), - Argument::Expression(simm12), - ]; - - Statement::Instruction(name.to_string(), args) -} diff --git a/riscv/src/runtime.rs b/riscv/src/runtime.rs index 3b08f7264..564ef3813 100644 --- a/riscv/src/runtime.rs +++ b/riscv/src/runtime.rs @@ -7,7 +7,7 @@ use powdr_ast::parsed::asm::{FunctionStatement, MachineStatement, SymbolPath}; use itertools::Itertools; use powdr_parser::ParserContext; -use crate::compiler::{pop_register, push_register}; +use crate::code_gen::{pop_register, push_register}; static EXTRA_REG_PREFIX: &str = "xtra"; @@ -436,49 +436,6 @@ impl Runtime { .collect() } - pub fn global_declarations(&self) -> String { - [ - "__divdi3", - "__udivdi3", - "__udivti3", - "__divdf3", - "__muldf3", - "__moddi3", - "__umoddi3", - "__umodti3", - "__eqdf2", - "__ltdf2", - "__nedf2", - "__unorddf2", - "__floatundidf", - "__extendsfdf2", - "memcpy", - "memmove", - "memset", - "memcmp", - "bcmp", - "strlen", - ] - .map(|n| format!(".globl {n}@plt\n.globl {n}\n.set {n}@plt, {n}\n")) - .join("\n\n") - + &[("__rust_alloc_error_handler", "__rg_oom")] - .map(|(n, m)| format!(".globl {n}\n.set {n}, {m}\n")) - .join("\n\n") - + - // some extra symbols expected by rust code: - // - __rust_no_alloc_shim_is_unstable: compilation time acknowledgment that this feature is unstable. - // - __rust_alloc_error_handler_should_panic: needed by the default alloc error handler, - // not sure why it's not present in the asm. - // https://github.com/rust-lang/rust/blob/ae9d7b0c6434b27e4e2effe8f05b16d37e7ef33f/library/alloc/src/alloc.rs#L415 - r".data -.globl __rust_alloc_error_handler_should_panic -__rust_alloc_error_handler_should_panic: .byte 0 -.globl __rust_no_alloc_shim_is_unstable -__rust_no_alloc_shim_is_unstable: .byte 0 -.text -" - } - pub fn ecall_handler(&self) -> Vec { let ecall = [ "// ecall handler".to_string(), diff --git a/riscv/tests/instructions.rs b/riscv/tests/instructions.rs index 1476631a7..32a5c39c9 100644 --- a/riscv/tests/instructions.rs +++ b/riscv/tests/instructions.rs @@ -4,7 +4,7 @@ mod instruction_tests { use crate::common::verify_riscv_asm_string; use powdr_backend::BackendType; use powdr_number::GoldilocksField; - use powdr_riscv::compiler::compile; + use powdr_riscv::asm::compile; use powdr_riscv::Runtime; use test_log::test; diff --git a/riscv/tests/riscv.rs b/riscv/tests/riscv.rs index e6689cdd8..8e53f052e 100644 --- a/riscv/tests/riscv.rs +++ b/riscv/tests/riscv.rs @@ -22,7 +22,7 @@ pub fn test_continuations(case: &str) { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = powdr_riscv::compiler::compile::(riscv_asm, &runtime, true); + let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, true); // Manually create tmp dir, so that it is the same in all chunks. let tmp_dir = mktemp::Temp::new_dir().unwrap(); @@ -274,7 +274,7 @@ fn many_chunks_dry() { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = powdr_riscv::compiler::compile::(riscv_asm, &runtime, true); + let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, true); let mut pipeline = Pipeline::default() .from_asm_string(powdr_asm, Some(PathBuf::from(case))) @@ -299,7 +299,7 @@ fn output_syscall() { &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - let powdr_asm = powdr_riscv::compiler::compile::(riscv_asm, &runtime, false); + let powdr_asm = powdr_riscv::asm::compile::(riscv_asm, &runtime, false); let inputs = vec![1u32, 2, 3] .into_iter() @@ -373,5 +373,5 @@ fn compile_riscv_crate(case: &str, runtime: &Runtime) -> String &format!("tests/riscv_data/{case}/Cargo.toml"), &temp_dir, ); - powdr_riscv::compiler::compile::(riscv_asm, runtime, false) + powdr_riscv::asm::compile::(riscv_asm, runtime, false) }