diff --git a/asm-utils/src/data_parser.rs b/asm-utils/src/data_parser.rs index 63b97520f..c5a49128a 100644 --- a/asm-utils/src/data_parser.rs +++ b/asm-utils/src/data_parser.rs @@ -1,11 +1,13 @@ -use std::collections::BTreeMap; - -use crate::ast::{Argument, BinaryOpKind, Expression, FunctionOpKind, Register, Statement}; +use crate::{ + ast::{Argument, BinaryOpKind, Expression, FunctionOpKind, Register, Statement}, + utils::{alignment_size, split_at_first}, +}; #[derive(Debug)] pub enum DataValue { Direct(Vec), Zero(usize), + Alignment(usize), Reference(String), // This is needed for .word diretives such as // .word .Lfunc_begin0-.Lfunc_begin0 @@ -14,76 +16,140 @@ pub enum DataValue { impl DataValue { /// Returns the size of the value in bytes. - pub fn size(&self) -> usize { + /// + /// The address is necessary because the size of the alignment padding + /// depends on what address it is defined on. + pub fn size(&self, from_addr: usize) -> usize { match self { DataValue::Direct(data) => data.len(), DataValue::Zero(length) => *length, + DataValue::Alignment(bytes) => alignment_size(from_addr, *bytes), DataValue::Reference(_) => 4, DataValue::Offset(..) => 4, } } } +#[derive(Default)] +struct DataSections { + /// This is a vector of sections, where each section is a vector of (maybe + /// named) labels, which in turn contains a sequence of data values. + /// + /// I weighted against making this and a potential `struct Section` part of + /// the public API because the users would need to know and access all the + /// internals anyway, so it wouldn't be abstracting away any complexity. + sections: Vec, Vec)>>, +} + +impl DataSections { + fn new() -> Self { + Default::default() + } + + fn current_entry(&mut self) -> &mut Vec { + let last_section = self.sections.last_mut().unwrap(); + if last_section.is_empty() { + last_section.push((None, Vec::new())) + } + &mut last_section.last_mut().unwrap().1 + } + + fn append_label_to_curr_section(&mut self, label: &str) { + let last_section = self.sections.last_mut().unwrap(); + last_section.push((Some(label.to_owned()), Vec::new())); + } + + fn append_section(&mut self) { + self.sections.push(Vec::new()) + } +} + /// Extract all data objects from the list of statements. /// Returns the named data objects themselves and a vector of the names /// in the order in which they occur in the statements. pub fn extract_data_objects( statements: &[Statement], -) -> (BTreeMap>, Vec) { - let mut current_label = None; - // TODO the way these collections are used here looks hacky. - // It might need a more function reimpl. - let mut object_order = vec![]; - let mut objects = BTreeMap::new(); +) -> Vec, Vec)>> { + let mut data = DataSections::new(); + + let mut is_in_data_section = false; + for s in statements { match s { Statement::Label(l) => { - current_label = Some(l.as_str()); + if is_in_data_section { + data.append_label_to_curr_section(l); + } } Statement::Directive(dir, args) => match (dir.as_str(), &args[..]) { - ( - ".type", - [Argument::Expression(Expression::Symbol(name)), Argument::Expression(Expression::Symbol(kind))], - ) if kind.as_str() == "@object" => { - object_order.push(name.clone()); - assert!(objects.insert(name.clone(), vec![]).is_none()); + (".text", args) => { + assert!(args.is_empty()); + is_in_data_section = false; } - (".zero" | ".ascii" | ".asciz" | ".word" | ".byte", args) => { - let label = current_label.unwrap().to_string(); - objects - .entry(label.clone()) - .or_insert_with(|| { - object_order.push(label); - Default::default() - }) - .extend(extract_data_value(dir.as_str(), args)); + (".data", args) => { + assert!(args.is_empty()); + is_in_data_section = true; + data.append_section(); + } + (".section", args) => { + is_in_data_section = is_data_section(&args[0]); + if is_in_data_section { + data.append_section(); + } } ( - ".size", - [Argument::Expression(Expression::Symbol(name)), Argument::Expression(Expression::Number(n))], - ) if Some(name.as_str()) == current_label => { - let label = current_label.unwrap().to_string(); - objects - .entry(current_label.unwrap().into()) - .and_modify(|entry| { - let size: usize = entry.iter().map(|v| v.size()).sum(); - assert!( - size as i64 == *n, - "Invalid size for data object {name}: computed: {size} vs. specified: {n}" - ); - }) - .or_insert_with(|| { - object_order.push(label); - assert!(*n == 0, "Nonzero size for object without elements: {name}"); - Default::default() - }); + ".zero" | ".ascii" | ".asciz" | ".dword" | ".word" | ".half" | ".hword" + | ".short" | ".byte", + args, + ) => { + if is_in_data_section { + data.current_entry() + .extend(extract_data_value(dir.as_str(), args)); + } else { + // This is most likely debug data. + } + } + (".balign", [Argument::Expression(Expression::Number(byte_size))]) => { + if is_in_data_section { + data.current_entry() + .push(DataValue::Alignment(*byte_size as usize)); + } + } + (".p2align", [Argument::Expression(Expression::Number(pow_of_2))]) => { + if is_in_data_section { + data.current_entry() + .push(DataValue::Alignment((1 << pow_of_2) as usize)); + } + } + (".balign" | ".p2align", _) => { + // TODO: implement the optional arguments of .balign and .p2align + unimplemented!() } _ => {} }, _ => {} } } - (objects, object_order) + data.sections +} + +fn is_data_section(arg: &Argument) -> bool { + let full_name = match arg { + Argument::StringLiteral(name) => name.as_slice(), + Argument::Expression(Expression::Symbol(name)) => name.as_bytes(), + _ => return false, + }; + + // split out the part before the initial '.' + let name = split_at_first(full_name, &b'.').1.unwrap(); + + // isolate name until next '.' + let name = split_at_first(name, &b'.').0; + + matches!( + name, + b"sbss" | b"tbss" | b"bss" | b"sdata" | b"tdata" | b"rodata" | b"data" | b"data1" + ) } fn extract_data_value( @@ -109,6 +175,25 @@ fn extract_data_value( data.push(0); vec![DataValue::Direct(data)] } + (".dword" | ".half" | ".hword" | ".short" | ".byte", data) => { + let len = match directive { + ".dword" => 8, + ".byte" => 1, + _ => 2, + }; + + let mut bytes = Vec::with_capacity(data.len() * len); + for arg in data { + let Argument::Expression(Expression::Number(n)) = arg else { + panic!("only literals are supported for .{directive}"); + }; + for byte in 0..len { + bytes.push((n >> (byte * 8) & 0xff) as u8); + } + } + + vec![DataValue::Direct(bytes)] + } (".word", data) => data .iter() .map(|x| match x { @@ -133,20 +218,6 @@ fn extract_data_value( _ => panic!("Invalid .word directive"), }) .collect::>(), - (".byte", data) => { - // TODO alignment? - vec![DataValue::Direct( - data.iter() - .map(|x| { - if let Argument::Expression(Expression::Number(n)) = x { - *n as u8 - } else { - panic!("Invalid argument to .byte directive") - } - }) - .collect::>(), - )] - } _ => panic!(), } } diff --git a/asm-utils/src/data_storage.rs b/asm-utils/src/data_storage.rs index 5946570f7..d9f430fe8 100644 --- a/asm-utils/src/data_storage.rs +++ b/asm-utils/src/data_storage.rs @@ -2,76 +2,157 @@ use std::collections::BTreeMap; -use crate::{data_parser::DataValue, utils::next_multiple_of_four}; +use crate::{ + data_parser::DataValue, + utils::{alignment_size, next_aligned}, +}; pub enum SingleDataValue<'a> { Value(u32), - LabelReference(&'a String), - Offset(&'a String, &'a String), + LabelReference(&'a str), + Offset(&'a str, &'a str), } -pub fn store_data_objects<'a>( - objects: impl IntoIterator)> + Copy, +struct WordWriter<'a, 'b> { + data_writer: &'a mut dyn FnMut(u32, SingleDataValue) -> Vec, + partial: u32, + current_pos: u32, + generated_code: Vec, + + latest_label: Option<&'b str>, +} + +impl<'a, 'b> WordWriter<'a, 'b> { + fn new( + starting_pos: u32, + data_writer: &'a mut dyn FnMut(u32, SingleDataValue) -> Vec, + ) -> Self { + // sanitary alignment to 8 bytes + let current_pos = next_aligned(starting_pos as usize, 8) as u32; + Self { + partial: 0, + current_pos, + data_writer, + generated_code: Vec::new(), + latest_label: None, + } + } + + fn current_position(&self) -> u32 { + self.current_pos + } + + fn set_label(&mut self, label: &'b str) { + self.latest_label = Some(label) + } + + fn advance(&mut self, bytes: u32) { + let next_pos = self.current_pos + bytes; + + // if changed words, flush + let curr_word = self.current_pos & (!0b11); + if (next_pos & (!0b11) != curr_word) && (self.partial != 0) { + if let Some(label) = std::mem::take(&mut self.latest_label) { + self.generated_code.push(format!("// data {label}")); + } + + self.generated_code.extend((*self.data_writer)( + curr_word, + SingleDataValue::Value(self.partial), + )); + self.partial = 0; + } + self.current_pos = next_pos; + } + + fn align(&mut self, alignment: u32) { + let padding_size = alignment_size(self.current_pos as usize, alignment as usize); + if padding_size != 0 { + self.advance(padding_size as u32); + } + } + + fn write_bytes(&mut self, bytes: &[u8]) { + for b in bytes { + self.partial |= (*b as u32) << (8 * (self.current_pos % 4)); + self.advance(1); + } + } + + fn write_label_reference(&mut self, label: &str) { + assert_eq!( + self.current_pos % 4, + 0, + "reference to code labels in misaligned data section is not supported" + ); + + self.generated_code.extend((*self.data_writer)( + self.current_pos, + SingleDataValue::LabelReference(label), + )); + + assert_eq!(self.partial, 0); + self.current_pos += 4; + } + + fn finish(mut self) -> Vec { + // ensure the latest partial word is written + self.advance(4); + + self.generated_code + } +} + +pub fn store_data_objects( + sections: Vec, Vec)>>, memory_start: u32, code_gen: &mut dyn FnMut(u32, SingleDataValue) -> Vec, ) -> (Vec, BTreeMap) { - let mut current_pos = ((memory_start + 7) / 8) * 8; - let mut positions = BTreeMap::new(); - for (name, data) in objects.into_iter() { - // TODO check if we need to use multiples of four. - let size: u32 = data - .iter() - .map(|d| next_multiple_of_four(d.size()) as u32) - .sum(); - positions.insert(name.clone(), current_pos); - current_pos += size; - } + let mut writer = WordWriter::new(memory_start, code_gen); - let code = objects - .into_iter() - .flat_map(|(name, data)| { - let mut object_code = vec![]; - let mut pos = positions[name]; - for item in data { - match &item { - DataValue::Zero(_length) => { - // We can assume memory to be zero-initialized, - // so we do nothing. - } - DataValue::Direct(bytes) => { - for i in (0..bytes.len()).step_by(4) { - let v = (0..4) - .map(|j| { - (bytes.get(i + j).cloned().unwrap_or_default() as u32) - << (j * 8) - }) - .sum(); - // We can assume memory to be zero-initialized. - if v != 0 { - object_code - .extend(code_gen(pos + i as u32, SingleDataValue::Value(v))); - } - } - } - DataValue::Reference(sym) => { - object_code.extend(if let Some(p) = positions.get(sym) { - code_gen(pos, SingleDataValue::Value(*p)) - } else { - // code reference - code_gen(pos, SingleDataValue::LabelReference(sym)) - }) - } - DataValue::Offset(l, r) => { - object_code.extend(code_gen(pos, SingleDataValue::Offset(l, r))); + let positions = { + let mut positions = BTreeMap::new(); + let mut current_pos = writer.current_position(); + for (name, data) in sections.iter().flatten() { + if let Some(name) = name { + positions.insert(name.clone(), current_pos); + } + for d in data.iter() { + current_pos += d.size(current_pos as usize) as u32; + } + } + positions + }; + + for (name, data) in sections.iter().flatten() { + if let Some(name) = name { + writer.set_label(name); + } + for item in data { + match &item { + DataValue::Zero(length) => { + // We can assume memory to be zero-initialized, so we + // just have to advance. + writer.advance(*length as u32); + } + DataValue::Direct(bytes) => { + writer.write_bytes(bytes); + } + DataValue::Reference(sym) => { + if let Some(p) = positions.get(sym) { + writer.write_bytes(&p.to_le_bytes()); + } else { + // code reference + writer.write_label_reference(sym); } } - pos += item.size() as u32; + DataValue::Alignment(bytes) => { + writer.align(*bytes as u32); + } + DataValue::Offset(_l, _r) => unimplemented!(), } - if let Some(first_line) = object_code.first_mut() { - *first_line = format!("// data {name}\n") + first_line; - } - object_code - }) - .collect(); - (code, positions) + } + } + + (writer.finish(), positions) } diff --git a/asm-utils/src/reachability.rs b/asm-utils/src/reachability.rs index 93bbd7326..51e51002a 100644 --- a/asm-utils/src/reachability.rs +++ b/asm-utils/src/reachability.rs @@ -10,24 +10,34 @@ use crate::ast::{Argument, Expression, FunctionOpKind, Register, Statement}; /// Processes the statements and removes all statements and objects that are /// not reachable from the label `label`. /// Keeps the order of the statements. -pub fn filter_reachable_from( +pub fn filter_reachable_from<'a, R: Register, F: FunctionOpKind, A: Architecture>( label: &str, statements: &mut Vec>, - objects: &mut BTreeMap>, -) { + data_sections: &'a mut Vec, Vec)>>, +) -> HashSet<&'a str> { let replacements = extract_replacements(statements); let replacement_refs = replacements .iter() .map(|(k, v)| (k.as_str(), v.as_str())) .collect(); - let referenced_labels = - find_reachable_labels::(label, statements, objects, &replacement_refs) - .into_iter() - .map(|s| s.to_owned()) - .collect::>(); + let (referenced_labels, referenced_data_sections) = + find_reachable_labels::(label, statements, data_sections, &replacement_refs); + + { + let mut iter_idx = 0usize; + data_sections.retain(|_| { + let must_retain = referenced_data_sections.contains(&iter_idx); + iter_idx += 1; + must_retain + }); + } + + let mut remaining_data_labels = HashSet::new(); + for (name, value) in data_sections.iter_mut().flatten() { + if let Some(label) = name { + remaining_data_labels.insert(label.as_str()); + } - objects.retain(|name, _value| referenced_labels.contains(name)); - for (_name, value) in objects.iter_mut() { apply_replacement_to_object(value, &replacement_refs) } @@ -42,10 +52,12 @@ pub fn filter_reachable_from( true } else { if let Statement::Label(l) = &s { - active = referenced_labels.contains(l) && !objects.contains_key(l); + active = referenced_labels.contains(l) + && !remaining_data_labels.contains(l.as_str()); } active }; + if include { apply_replacement_to_instruction(&mut s, &replacement_refs); Some(s) @@ -54,27 +66,44 @@ pub fn filter_reachable_from( } }) .collect(); + + remaining_data_labels } #[allow(clippy::print_stderr)] pub fn find_reachable_labels<'a, R: Register, F: FunctionOpKind, A: Architecture>( label: &'a str, statements: &'a [Statement], - objects: &'a mut BTreeMap>, + data_sections: &'a [Vec<(Option, Vec)>], replacements: &BTreeMap<&str, &'a str>, -) -> BTreeSet<&'a str> { +) -> (HashSet, HashSet) { + // Maps each data label to the section they belong to + let all_data_labels: BTreeMap<&str, usize> = data_sections + .iter() + .enumerate() + .flat_map(|(section_idx, entries)| { + entries + .iter() + .filter_map(move |(name, _)| name.as_ref().map(|name| (name.as_str(), section_idx))) + }) + .collect(); + let label_offsets = extract_label_offsets(statements); let mut queued_labels = BTreeSet::from([label]); - let mut processed_labels = BTreeSet::<&str>::new(); + let mut processed_labels = HashSet::new(); + let mut reached_data_sections = HashSet::new(); while let Some(l) = queued_labels.pop_first() { let l = *replacements.get(l).unwrap_or(&l); - if !processed_labels.insert(l) { + if !processed_labels.insert(l.to_owned()) { continue; } - let new_references = if let Some(data_values) = objects.get(l) { - data_values + let new_references = if let Some(section_idx) = all_data_labels.get(l) { + reached_data_sections.insert(*section_idx); + let section = &data_sections[*section_idx]; + section .iter() + .flat_map(|(_, values)| values.iter()) .filter_map(|v| { if let DataValue::Reference(sym) = v { Some(sym.as_str()) @@ -86,7 +115,7 @@ pub fn find_reachable_labels<'a, R: Register, F: FunctionOpKind, A: Architecture } else if let Some(offset) = label_offsets.get(l) { let (referenced_labels_in_block, seen_labels_in_block) = basic_block_references_starting_from::(&statements[*offset..]); - processed_labels.extend(seen_labels_in_block); + processed_labels.extend(seen_labels_in_block.into_iter().map(|s| s.to_string())); referenced_labels_in_block } else { eprintln!( @@ -102,7 +131,7 @@ pub fn find_reachable_labels<'a, R: Register, F: FunctionOpKind, A: Architecture } } - processed_labels + (processed_labels, reached_data_sections) } fn extract_replacements( diff --git a/asm-utils/src/utils.rs b/asm-utils/src/utils.rs index 79e126314..faa5c6033 100644 --- a/asm-utils/src/utils.rs +++ b/asm-utils/src/utils.rs @@ -1,7 +1,34 @@ use crate::ast::{Argument, Expression, FunctionOpKind, Register}; -pub fn next_multiple_of_four(x: usize) -> usize { - ((x + 3) / 4) * 4 +pub fn next_aligned(val: usize, alignment: usize) -> usize { + // Alignment will probably always be a power of two, which can be aligned in + // a much faster bitwise operation. But then we would have to assert!() it, + // so it is just better to use the generic version. + ((val + (alignment - 1)) / alignment) * alignment +} + +/// Padding to next alignment boundary, in bytes. +pub fn alignment_size(from: usize, alignment: usize) -> usize { + let dest = next_aligned(from, alignment); + dest - from +} + +/// Split an slice as before and after the first occurrence of an element. +/// +/// The second return value is None if the element is not found. +pub fn split_at_first<'a, T: Eq>(s: &'a [T], elem: &T) -> (&'a [T], Option<&'a [T]>) { + match s.iter().position(|e| e == elem) { + Some(idx) => (&s[..idx], Some(&s[(idx + 1)..])), + None => (s, None), + } +} + +/// Find the position of the next given element in an iterable. +pub fn find_position>( + seq: impl IntoIterator, + elem: T, +) -> Option { + seq.into_iter().position(|e| e == elem) } pub fn quote(s: &str) -> String { diff --git a/riscv/src/compiler.rs b/riscv/src/compiler.rs index 58db2b616..8987f578e 100644 --- a/riscv/src/compiler.rs +++ b/riscv/src/compiler.rs @@ -1,12 +1,12 @@ use std::{ - collections::{BTreeMap, BTreeSet}, + collections::{BTreeMap, BTreeSet, HashSet}, fmt, }; use itertools::Itertools; use powdr_asm_utils::{ ast::{BinaryOpKind, UnaryOpKind}, - data_parser::{self, DataValue}, + data_parser, data_storage::{store_data_objects, SingleDataValue}, parser::parse_asm, reachability::{self, symbols_in_args}, @@ -123,35 +123,25 @@ pub fn compile( .map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents))) .collect(), ); - let (mut objects, object_order) = data_parser::extract_data_objects(&statements); - assert_eq!(objects.keys().len(), object_order.len()); + let mut data_sections = data_parser::extract_data_objects(&statements); // Reduce to the code that is actually reachable from main // (and the objects that are referred from there) - reachability::filter_reachable_from::<_, _, RiscvArchitecture>( + let data_labels = reachability::filter_reachable_from::<_, _, RiscvArchitecture>( "__runtime_start", &mut statements, - &mut objects, + &mut data_sections, ); // Replace dynamic references to code labels - replace_dynamic_label_references(&mut statements, &objects); + replace_dynamic_label_references(&mut statements, &data_labels); // Remove the riscv asm stub function, which is used // for compilation, and will not be called. statements = replace_coprocessor_stubs(statements, coprocessors).collect::>(); - let sorted_objects = object_order - .into_iter() - .filter_map(|n| { - let value = objects.get_mut(&n).map(std::mem::take); - value.map(|v| (n, v)) - }) - .collect::>(); - let (data_code, data_positions) = store_data_objects( - &sorted_objects, - data_start, - &mut |addr, value| match value { + let (data_code, data_positions) = + store_data_objects(data_sections, data_start, &mut |addr, value| match value { SingleDataValue::Value(v) => { vec![format!("mstore 0x{addr:x}, 0x{v:x};")] } @@ -179,8 +169,7 @@ pub fn compile( ]); */ } - }, - ); + }); let submachine_init = call_every_submachine(coprocessors); let bootloader_lines = if with_bootloader { @@ -241,10 +230,7 @@ pub fn compile( /// Replace certain patterns of references to code labels by /// special instructions. We ignore any references to data objects /// because they will be handled differently. -fn replace_dynamic_label_references( - statements: &mut Vec, - data_objects: &BTreeMap>, -) { +fn replace_dynamic_label_references(statements: &mut Vec, data_labels: &HashSet<&str>) { /* Find patterns of the form lui a0, %hi(LABEL) @@ -272,7 +258,7 @@ fn replace_dynamic_label_references( let mut to_delete = BTreeSet::default(); for (i1, i2) in instruction_indices.into_iter().tuple_windows() { if let Some(r) = - replace_dynamic_label_reference(&statements[i1], &statements[i2], data_objects) + replace_dynamic_label_reference(&statements[i1], &statements[i2], data_labels) { to_delete.insert(i1); statements[i2] = r; @@ -286,7 +272,7 @@ fn replace_dynamic_label_references( fn replace_dynamic_label_reference( s1: &Statement, s2: &Statement, - data_objects: &BTreeMap>, + data_labels: &HashSet<&str>, ) -> Option { let Statement::Instruction(instr1, args1) = s1 else { return None; @@ -314,7 +300,7 @@ fn replace_dynamic_label_reference( let Expression::Symbol(label2) = expr2.as_ref() else { return None; }; - if r1 != r3 || label1 != label2 || data_objects.contains_key(label1) { + if r1 != r3 || label1 != label2 || data_labels.contains(label1.as_str()) { return None; } Some(Statement::Instruction(