From b87928df8e4eb67b54cd6b41d2cb3000ccc08cc2 Mon Sep 17 00:00:00 2001 From: Lucas Clemente Vella Date: Tue, 27 Feb 2024 13:23:04 +0000 Subject: [PATCH] With continuations, don't load initial memory state via code. --- asm-utils/src/data_storage.rs | 32 ++--- riscv-executor/src/lib.rs | 24 +++- riscv/src/compiler.rs | 117 ++++++++++++------ riscv/src/continuations.rs | 61 ++++++++- riscv/src/continuations/memory_merkle_tree.rs | 1 - riscv/tests/common/mod.rs | 1 + 6 files changed, 165 insertions(+), 71 deletions(-) diff --git a/asm-utils/src/data_storage.rs b/asm-utils/src/data_storage.rs index 3166b67c0..e65e8934a 100644 --- a/asm-utils/src/data_storage.rs +++ b/asm-utils/src/data_storage.rs @@ -14,18 +14,16 @@ pub enum SingleDataValue<'a> { } struct WordWriter<'a, 'b> { - data_writer: &'a mut dyn FnMut(u32, SingleDataValue) -> Vec, + data_writer: &'a mut dyn FnMut(Option<&str>, u32, SingleDataValue), partial: u32, current_pos: u32, - generated_code: Vec, - latest_label: Option<&'b str>, } impl<'a, 'b> WordWriter<'a, 'b> { fn new( starting_pos: u32, - data_writer: &'a mut dyn FnMut(u32, SingleDataValue) -> Vec, + data_writer: &'a mut dyn FnMut(Option<&str>, u32, SingleDataValue), ) -> Self { // sanitary alignment to 8 bytes let current_pos = next_aligned(starting_pos as usize, 8) as u32; @@ -33,7 +31,6 @@ impl<'a, 'b> WordWriter<'a, 'b> { partial: 0, current_pos, data_writer, - generated_code: Vec::new(), latest_label: None, } } @@ -52,14 +49,11 @@ impl<'a, 'b> WordWriter<'a, 'b> { // if changed words, flush let curr_word = self.current_pos & (!0b11); if (next_pos & (!0b11) != curr_word) && (self.partial != 0) { - if let Some(label) = std::mem::take(&mut self.latest_label) { - self.generated_code.push(format!("// data {label}")); - } - - self.generated_code.extend((*self.data_writer)( + (*self.data_writer)( + std::mem::take(&mut self.latest_label), curr_word, SingleDataValue::Value(self.partial), - )); + ); self.partial = 0; } self.current_pos = next_pos; @@ -90,28 +84,27 @@ impl<'a, 'b> WordWriter<'a, 'b> { "reference to code labels in misaligned data section is not supported" ); - self.generated_code.extend((*self.data_writer)( + (*self.data_writer)( + std::mem::take(&mut self.latest_label), self.current_pos, SingleDataValue::LabelReference(label), - )); + ); assert_eq!(self.partial, 0); self.current_pos += 4; } - fn finish(mut self) -> Vec { + fn finish(mut self) { // ensure the latest partial word is written self.advance(4); - - self.generated_code } } pub fn store_data_objects( sections: Vec, Vec)>>, memory_start: u32, - code_gen: &mut dyn FnMut(u32, SingleDataValue) -> Vec, -) -> (Vec, BTreeMap) { + code_gen: &mut dyn FnMut(Option<&str>, u32, SingleDataValue), +) -> BTreeMap { let mut writer = WordWriter::new(memory_start, code_gen); let positions = { @@ -157,6 +150,7 @@ pub fn store_data_objects( } } } + writer.finish(); - (writer.finish(), positions) + positions } diff --git a/riscv-executor/src/lib.rs b/riscv-executor/src/lib.rs index 053581d27..00a6ddab8 100644 --- a/riscv-executor/src/lib.rs +++ b/riscv-executor/src/lib.rs @@ -284,6 +284,7 @@ mod builder { /// in Err. pub fn new( main: &'a Machine, + mem: MemoryState, batch_to_line_map: &'b [u32], max_rows_len: usize, mode: ExecMode, @@ -326,7 +327,7 @@ mod builder { batch_to_line_map, max_rows: max_rows_len, regs, - mem: HashMap::new(), + mem, mode, }; @@ -472,7 +473,7 @@ mod builder { } } -fn get_main_machine(program: &AnalysisASMFile) -> &Machine { +pub fn get_main_machine(program: &AnalysisASMFile) -> &Machine { for (name, m) in program.items.iter() { if name.len() == 1 && name.parts().next() == Some("Main") { let Item::Machine(m) = m else { @@ -847,6 +848,7 @@ impl<'a, 'b, F: FieldElement> Executor<'a, 'b, F> { pub fn execute_ast( program: &AnalysisASMFile, + initial_memory: MemoryState, inputs: &Callback, bootloader_inputs: &[Elem], max_steps_to_execute: usize, @@ -860,8 +862,13 @@ pub fn execute_ast( debug_files, } = preprocess_main_function(main_machine); - let proc = match TraceBuilder::new(main_machine, &batch_to_line_map, max_steps_to_execute, mode) - { + let proc = match TraceBuilder::new( + main_machine, + initial_memory, + &batch_to_line_map, + max_steps_to_execute, + mode, + ) { Ok(proc) => proc, Err(ret) => return *ret, }; @@ -941,7 +948,14 @@ pub fn execute( let analyzed = powdr_analysis::analyze(resolved).unwrap(); log::info!("Executing..."); - execute_ast(&analyzed, inputs, bootloader_inputs, usize::MAX, mode) + execute_ast( + &analyzed, + MemoryState::new(), + inputs, + bootloader_inputs, + usize::MAX, + mode, + ) } fn to_u32(val: &F) -> Option { diff --git a/riscv/src/compiler.rs b/riscv/src/compiler.rs index 5fad845f1..a721fe5e9 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/compiler.rs @@ -144,34 +144,59 @@ pub fn compile( // for compilation, and will not be called. statements = replace_coprocessor_stubs(statements, coprocessors).collect::>(); - let (data_code, data_positions) = - store_data_objects(data_sections, data_start, &mut |addr, value| match value { - SingleDataValue::Value(v) => { - vec![format!("mstore 0x{addr:x}, 0x{v:x};")] + let mut initial_mem = Vec::new(); + let mut data_code = Vec::new(); + let data_positions = + store_data_objects(data_sections, data_start, &mut |label, addr, value| { + if let Some(label) = label { + let comment = format!(" // data {label}"); + if with_bootloader && !matches!(value, SingleDataValue::LabelReference(_)) { + &mut initial_mem + } else { + &mut data_code + } + .push(comment); } - SingleDataValue::LabelReference(sym) => { - // TODO should be possible without temporary - vec![ - format!("tmp1 <== load_label({});", escape_label(sym)), - format!("mstore 0x{addr:x}, tmp1;"), - ] - } - SingleDataValue::Offset(_, _) => { - unimplemented!(); - /* - object_code.push(format!("addr <=X= 0x{pos:x};")); + match value { + SingleDataValue::Value(v) => { + if with_bootloader { + // Instead of generating the data loading code, we store it + // in the variable that will be used as the initial memory + // snapshot, committed by the bootloader. + initial_mem.push(format!("(0x{addr:x}, 0x{v:x})")); + } else { + // There is no bootloader to commit to memory, so we have to + // load it explicitly. + data_code.push(format!("mstore 0x{addr:x}, 0x{v:x};")); + } + } + SingleDataValue::LabelReference(sym) => { + // The label value is not known at this point, so we have to + // load it via code, irrespectively of bootloader availability. + // + // TODO should be possible without temporary + data_code.extend([ + format!("tmp1 <== load_label({});", escape_label(sym)), + format!("mstore 0x{addr:x}, tmp1;"), + ]); + } + SingleDataValue::Offset(_, _) => { + unimplemented!(); + /* + object_code.push(format!("addr <=X= 0x{pos:x};")); - I think this solution should be fine but hard to say without - an actual code snippet that uses it. + I think this solution should be fine but hard to say without + an actual code snippet that uses it. - // TODO should be possible without temporary - object_code.extend([ - format!("tmp1 <== load_label({});", escape_label(a)), - format!("tmp2 <== load_label({});", escape_label(b)), - // TODO check if registers match - "mstore wrap(tmp1 - tmp2);".to_string(), - ]); - */ + // TODO should be possible without temporary + object_code.extend([ + format!("tmp1 <== load_label({});", escape_label(a)), + format!("tmp2 <== load_label({});", escape_label(b)), + // TODO check if registers match + "mstore wrap(tmp1 - tmp2);".to_string(), + ]); + */ + } } }); @@ -187,28 +212,33 @@ pub fn compile( submachine_init }; - let program: Vec = file_ids + let mut program: Vec = file_ids .into_iter() .map(|(id, dir, file)| format!(".debug file {id} {} {};", quote(&dir), quote(&file))) .chain(bootloader_and_shutdown_routine_lines) - .chain(["x1 <== jump(__data_init);".to_string()]) - .chain([ - format!("// Set stack pointer\nx2 <=X= {stack_start};"), - "x1 <== jump(__runtime_start);".to_string(), - "return;".to_string(), // This is not "riscv ret", but "return from powdr asm function". - ]) - .chain( - substitute_symbols_with_values(statements, &data_positions) - .into_iter() - .flat_map(|v| process_statement(v, coprocessors)), - ) - .chain(["// This is the data initialization routine.\n__data_init:".to_string()]) + .collect(); + if !data_code.is_empty() { + program.push("x1 <== jump(__data_init);".to_string()); + } + program.extend([ + format!("// Set stack pointer\nx2 <=X= {stack_start};"), + "x1 <== jump(__runtime_start);".to_string(), + "return;".to_string(), // This is not "riscv ret", but "return from powdr asm function". + ]); + program.extend( + substitute_symbols_with_values(statements, &data_positions) + .into_iter() + .flat_map(|v| process_statement(v, coprocessors)), + ); + if !data_code.is_empty() { + program.extend( + ["// This is the data initialization routine.\n__data_init:".to_string()].into_iter() .chain(data_code) .chain([ "// This is the end of the data initialization routine.\ntmp1 <== jump_dyn(x1);" .to_string(), - ]) - .collect(); + ])); + } // The program ROM needs to fit the degree, so we use the next power of 2. let degree = program.len().ilog2() + 1; @@ -229,6 +259,7 @@ pub fn compile( riscv_machine( &coprocessors.machine_imports(), &preamble(degree, coprocessors, with_bootloader), + initial_mem, &coprocessors.declarations(), program, ) @@ -407,6 +438,7 @@ fn substitute_symbols_with_values( fn riscv_machine( machines: &[&str], preamble: &str, + initial_memory: Vec, submachines: &[(&str, &str)], program: Vec, ) -> String { @@ -418,6 +450,10 @@ machine Main {{ {} +let initial_memory = [ +{} +]; + function main {{ {} }} @@ -430,6 +466,7 @@ machine Main {{ .collect::>() .join("\n"), preamble, + initial_memory.join(",\n"), program .into_iter() .map(|line| format!("\t\t{line}")) diff --git a/riscv/src/continuations.rs b/riscv/src/continuations.rs index 0a712faf2..27519648e 100644 --- a/riscv/src/continuations.rs +++ b/riscv/src/continuations.rs @@ -2,11 +2,11 @@ use std::collections::{BTreeSet, HashMap}; use powdr_ast::{ asm_analysis::{AnalysisASMFile, RegisterTy}, - parsed::asm::parse_absolute_path, + parsed::{asm::parse_absolute_path, Expression, PilStatement}, }; -use powdr_number::FieldElement; +use powdr_number::{BigInt, FieldElement}; use powdr_pipeline::Pipeline; -use powdr_riscv_executor::{Elem, ExecutionTrace}; +use powdr_riscv_executor::{get_main_machine, Elem, ExecutionTrace, MemoryState}; pub mod bootloader; mod memory_merkle_tree; @@ -134,15 +134,49 @@ fn sanity_check(program: &AnalysisASMFile) { assert_eq!(machine_registers, expected_registers); } +fn load_initial_memory(program: &AnalysisASMFile) -> MemoryState { + let machine = get_main_machine(program); + let Some(expr) = machine.pil.iter().find_map(|v| match v { + PilStatement::LetStatement(_, n, None, expr) if n == "initial_memory" => expr.as_ref(), + _ => None, + }) else { + log::warn!("No initial_memory variable found in the machine. Assuming zeroed memory."); + return MemoryState::default(); + }; + + let Expression::ArrayLiteral(array) = expr else { + panic!("initial_memory is not an array literal"); + }; + + array + .items + .iter() + .map(|entry| { + let Expression::Tuple(tuple) = entry else { + panic!("initial_memory entry is not a tuple"); + }; + assert_eq!(tuple.len(), 2); + let Expression::Number(key, None) = &tuple[0] else { + panic!("initial_memory entry key is not a number"); + }; + let Expression::Number(value, None) = &tuple[1] else { + panic!("initial_memory entry value is not a number"); + }; + + ( + key.to_integer().try_into_u32().unwrap(), + value.to_integer().try_into_u32().unwrap(), + ) + }) + .collect() +} + /// Runs the entire execution using the RISC-V executor. For each chunk, it collects: /// - The inputs to the bootloader, needed to restore the correct state. /// - The number of rows after which the prover should jump to the shutdown routine. pub fn rust_continuations_dry_run( pipeline: &mut Pipeline, ) -> Vec<(Vec, u64)> { - log::info!("Initializing memory merkle tree..."); - let mut merkle_tree = MerkleTree::::new(); - // All inputs for all chunks. let mut bootloader_inputs_and_num_rows = vec![]; @@ -152,10 +186,24 @@ pub fn rust_continuations_dry_run( let program = pipeline.compute_analyzed_asm().unwrap().clone(); sanity_check(&program); + log::info!("Initializing memory merkle tree..."); + + // Get initial memory contents from the special variable "initial_memory". + // In the first full run, we use it as the memory contents of the executor; + // on the independent chunk runs, the executor uses zeroed initial memory, + // and the pages are loaded via the bootloader. + let initial_memory = load_initial_memory(&program); + + let mut merkle_tree = MerkleTree::::new(); + merkle_tree.update(initial_memory.iter().map(|(k, v)| (*k, *v))); + + // TODO: commit to the merkle_tree root in the verifier. + log::info!("Executing powdr-asm..."); let (full_trace, memory_accesses) = { let trace = powdr_riscv_executor::execute_ast::( &program, + initial_memory, pipeline.data_callback().unwrap(), // Run full trace without any accessed pages. This would actually violate the // constraints, but the executor does the right thing (read zero if the memory @@ -248,6 +296,7 @@ pub fn rust_continuations_dry_run( let (chunk_trace, memory_snapshot_update) = { let (trace, memory_snapshot_update) = powdr_riscv_executor::execute_ast::( &program, + MemoryState::new(), pipeline.data_callback().unwrap(), &bootloader_inputs, num_rows, diff --git a/riscv/src/continuations/memory_merkle_tree.rs b/riscv/src/continuations/memory_merkle_tree.rs index c9d78cedf..f33309cb5 100644 --- a/riscv/src/continuations/memory_merkle_tree.rs +++ b/riscv/src/continuations/memory_merkle_tree.rs @@ -93,7 +93,6 @@ impl /// Applies updates, given an iterator of (memory address, value) pairs. /// Memory addresses are assumed to be word-aligned. - #[allow(dead_code)] pub fn update(&mut self, updates: impl Iterator) { for (page_index, updates) in self.organize_updates_by_page(updates) { self.update_page(page_index, updates.into_iter()); diff --git a/riscv/tests/common/mod.rs b/riscv/tests/common/mod.rs index 32090091d..df08a447d 100644 --- a/riscv/tests/common/mod.rs +++ b/riscv/tests/common/mod.rs @@ -13,6 +13,7 @@ pub fn verify_riscv_asm_string(file_name: &str, contents: &str, inputs: Vec