More conforming handling of asm data sections.

This commit is contained in:
Lucas Clemente Vella
2024-01-19 21:35:22 +00:00
committed by Lucas Clemente Vella
parent de6cec7dbe
commit 51273d2209
5 changed files with 361 additions and 167 deletions

View File

@@ -1,11 +1,13 @@
use std::collections::BTreeMap;
use crate::ast::{Argument, BinaryOpKind, Expression, FunctionOpKind, Register, Statement};
use crate::{
ast::{Argument, BinaryOpKind, Expression, FunctionOpKind, Register, Statement},
utils::{alignment_size, split_at_first},
};
#[derive(Debug)]
pub enum DataValue {
Direct(Vec<u8>),
Zero(usize),
Alignment(usize),
Reference(String),
// This is needed for .word diretives such as
// .word .Lfunc_begin0-.Lfunc_begin0
@@ -14,76 +16,140 @@ pub enum DataValue {
impl DataValue {
/// Returns the size of the value in bytes.
pub fn size(&self) -> usize {
///
/// The address is necessary because the size of the alignment padding
/// depends on what address it is defined on.
pub fn size(&self, from_addr: usize) -> usize {
match self {
DataValue::Direct(data) => data.len(),
DataValue::Zero(length) => *length,
DataValue::Alignment(bytes) => alignment_size(from_addr, *bytes),
DataValue::Reference(_) => 4,
DataValue::Offset(..) => 4,
}
}
}
#[derive(Default)]
struct DataSections {
/// This is a vector of sections, where each section is a vector of (maybe
/// named) labels, which in turn contains a sequence of data values.
///
/// I weighted against making this and a potential `struct Section` part of
/// the public API because the users would need to know and access all the
/// internals anyway, so it wouldn't be abstracting away any complexity.
sections: Vec<Vec<(Option<String>, Vec<DataValue>)>>,
}
impl DataSections {
fn new() -> Self {
Default::default()
}
fn current_entry(&mut self) -> &mut Vec<DataValue> {
let last_section = self.sections.last_mut().unwrap();
if last_section.is_empty() {
last_section.push((None, Vec::new()))
}
&mut last_section.last_mut().unwrap().1
}
fn append_label_to_curr_section(&mut self, label: &str) {
let last_section = self.sections.last_mut().unwrap();
last_section.push((Some(label.to_owned()), Vec::new()));
}
fn append_section(&mut self) {
self.sections.push(Vec::new())
}
}
/// Extract all data objects from the list of statements.
/// Returns the named data objects themselves and a vector of the names
/// in the order in which they occur in the statements.
pub fn extract_data_objects<R: Register, F: FunctionOpKind>(
statements: &[Statement<R, F>],
) -> (BTreeMap<String, Vec<DataValue>>, Vec<String>) {
let mut current_label = None;
// TODO the way these collections are used here looks hacky.
// It might need a more function reimpl.
let mut object_order = vec![];
let mut objects = BTreeMap::new();
) -> Vec<Vec<(Option<String>, Vec<DataValue>)>> {
let mut data = DataSections::new();
let mut is_in_data_section = false;
for s in statements {
match s {
Statement::Label(l) => {
current_label = Some(l.as_str());
if is_in_data_section {
data.append_label_to_curr_section(l);
}
}
Statement::Directive(dir, args) => match (dir.as_str(), &args[..]) {
(
".type",
[Argument::Expression(Expression::Symbol(name)), Argument::Expression(Expression::Symbol(kind))],
) if kind.as_str() == "@object" => {
object_order.push(name.clone());
assert!(objects.insert(name.clone(), vec![]).is_none());
(".text", args) => {
assert!(args.is_empty());
is_in_data_section = false;
}
(".zero" | ".ascii" | ".asciz" | ".word" | ".byte", args) => {
let label = current_label.unwrap().to_string();
objects
.entry(label.clone())
.or_insert_with(|| {
object_order.push(label);
Default::default()
})
.extend(extract_data_value(dir.as_str(), args));
(".data", args) => {
assert!(args.is_empty());
is_in_data_section = true;
data.append_section();
}
(".section", args) => {
is_in_data_section = is_data_section(&args[0]);
if is_in_data_section {
data.append_section();
}
}
(
".size",
[Argument::Expression(Expression::Symbol(name)), Argument::Expression(Expression::Number(n))],
) if Some(name.as_str()) == current_label => {
let label = current_label.unwrap().to_string();
objects
.entry(current_label.unwrap().into())
.and_modify(|entry| {
let size: usize = entry.iter().map(|v| v.size()).sum();
assert!(
size as i64 == *n,
"Invalid size for data object {name}: computed: {size} vs. specified: {n}"
);
})
.or_insert_with(|| {
object_order.push(label);
assert!(*n == 0, "Nonzero size for object without elements: {name}");
Default::default()
});
".zero" | ".ascii" | ".asciz" | ".dword" | ".word" | ".half" | ".hword"
| ".short" | ".byte",
args,
) => {
if is_in_data_section {
data.current_entry()
.extend(extract_data_value(dir.as_str(), args));
} else {
// This is most likely debug data.
}
}
(".balign", [Argument::Expression(Expression::Number(byte_size))]) => {
if is_in_data_section {
data.current_entry()
.push(DataValue::Alignment(*byte_size as usize));
}
}
(".p2align", [Argument::Expression(Expression::Number(pow_of_2))]) => {
if is_in_data_section {
data.current_entry()
.push(DataValue::Alignment((1 << pow_of_2) as usize));
}
}
(".balign" | ".p2align", _) => {
// TODO: implement the optional arguments of .balign and .p2align
unimplemented!()
}
_ => {}
},
_ => {}
}
}
(objects, object_order)
data.sections
}
fn is_data_section<R: Register, F: FunctionOpKind>(arg: &Argument<R, F>) -> bool {
let full_name = match arg {
Argument::StringLiteral(name) => name.as_slice(),
Argument::Expression(Expression::Symbol(name)) => name.as_bytes(),
_ => return false,
};
// split out the part before the initial '.'
let name = split_at_first(full_name, &b'.').1.unwrap();
// isolate name until next '.'
let name = split_at_first(name, &b'.').0;
matches!(
name,
b"sbss" | b"tbss" | b"bss" | b"sdata" | b"tdata" | b"rodata" | b"data" | b"data1"
)
}
fn extract_data_value<R: Register, F: FunctionOpKind>(
@@ -109,6 +175,25 @@ fn extract_data_value<R: Register, F: FunctionOpKind>(
data.push(0);
vec![DataValue::Direct(data)]
}
(".dword" | ".half" | ".hword" | ".short" | ".byte", data) => {
let len = match directive {
".dword" => 8,
".byte" => 1,
_ => 2,
};
let mut bytes = Vec::with_capacity(data.len() * len);
for arg in data {
let Argument::Expression(Expression::Number(n)) = arg else {
panic!("only literals are supported for .{directive}");
};
for byte in 0..len {
bytes.push((n >> (byte * 8) & 0xff) as u8);
}
}
vec![DataValue::Direct(bytes)]
}
(".word", data) => data
.iter()
.map(|x| match x {
@@ -133,20 +218,6 @@ fn extract_data_value<R: Register, F: FunctionOpKind>(
_ => panic!("Invalid .word directive"),
})
.collect::<Vec<DataValue>>(),
(".byte", data) => {
// TODO alignment?
vec![DataValue::Direct(
data.iter()
.map(|x| {
if let Argument::Expression(Expression::Number(n)) = x {
*n as u8
} else {
panic!("Invalid argument to .byte directive")
}
})
.collect::<Vec<u8>>(),
)]
}
_ => panic!(),
}
}

View File

@@ -2,76 +2,157 @@
use std::collections::BTreeMap;
use crate::{data_parser::DataValue, utils::next_multiple_of_four};
use crate::{
data_parser::DataValue,
utils::{alignment_size, next_aligned},
};
pub enum SingleDataValue<'a> {
Value(u32),
LabelReference(&'a String),
Offset(&'a String, &'a String),
LabelReference(&'a str),
Offset(&'a str, &'a str),
}
pub fn store_data_objects<'a>(
objects: impl IntoIterator<Item = &'a (String, Vec<DataValue>)> + Copy,
struct WordWriter<'a, 'b> {
data_writer: &'a mut dyn FnMut(u32, SingleDataValue) -> Vec<String>,
partial: u32,
current_pos: u32,
generated_code: Vec<String>,
latest_label: Option<&'b str>,
}
impl<'a, 'b> WordWriter<'a, 'b> {
fn new(
starting_pos: u32,
data_writer: &'a mut dyn FnMut(u32, SingleDataValue) -> Vec<String>,
) -> Self {
// sanitary alignment to 8 bytes
let current_pos = next_aligned(starting_pos as usize, 8) as u32;
Self {
partial: 0,
current_pos,
data_writer,
generated_code: Vec::new(),
latest_label: None,
}
}
fn current_position(&self) -> u32 {
self.current_pos
}
fn set_label(&mut self, label: &'b str) {
self.latest_label = Some(label)
}
fn advance(&mut self, bytes: u32) {
let next_pos = self.current_pos + bytes;
// if changed words, flush
let curr_word = self.current_pos & (!0b11);
if (next_pos & (!0b11) != curr_word) && (self.partial != 0) {
if let Some(label) = std::mem::take(&mut self.latest_label) {
self.generated_code.push(format!("// data {label}"));
}
self.generated_code.extend((*self.data_writer)(
curr_word,
SingleDataValue::Value(self.partial),
));
self.partial = 0;
}
self.current_pos = next_pos;
}
fn align(&mut self, alignment: u32) {
let padding_size = alignment_size(self.current_pos as usize, alignment as usize);
if padding_size != 0 {
self.advance(padding_size as u32);
}
}
fn write_bytes(&mut self, bytes: &[u8]) {
for b in bytes {
self.partial |= (*b as u32) << (8 * (self.current_pos % 4));
self.advance(1);
}
}
fn write_label_reference(&mut self, label: &str) {
assert_eq!(
self.current_pos % 4,
0,
"reference to code labels in misaligned data section is not supported"
);
self.generated_code.extend((*self.data_writer)(
self.current_pos,
SingleDataValue::LabelReference(label),
));
assert_eq!(self.partial, 0);
self.current_pos += 4;
}
fn finish(mut self) -> Vec<String> {
// ensure the latest partial word is written
self.advance(4);
self.generated_code
}
}
pub fn store_data_objects(
sections: Vec<Vec<(Option<String>, Vec<DataValue>)>>,
memory_start: u32,
code_gen: &mut dyn FnMut(u32, SingleDataValue) -> Vec<String>,
) -> (Vec<String>, BTreeMap<String, u32>) {
let mut current_pos = ((memory_start + 7) / 8) * 8;
let mut positions = BTreeMap::new();
for (name, data) in objects.into_iter() {
// TODO check if we need to use multiples of four.
let size: u32 = data
.iter()
.map(|d| next_multiple_of_four(d.size()) as u32)
.sum();
positions.insert(name.clone(), current_pos);
current_pos += size;
}
let mut writer = WordWriter::new(memory_start, code_gen);
let code = objects
.into_iter()
.flat_map(|(name, data)| {
let mut object_code = vec![];
let mut pos = positions[name];
for item in data {
match &item {
DataValue::Zero(_length) => {
// We can assume memory to be zero-initialized,
// so we do nothing.
}
DataValue::Direct(bytes) => {
for i in (0..bytes.len()).step_by(4) {
let v = (0..4)
.map(|j| {
(bytes.get(i + j).cloned().unwrap_or_default() as u32)
<< (j * 8)
})
.sum();
// We can assume memory to be zero-initialized.
if v != 0 {
object_code
.extend(code_gen(pos + i as u32, SingleDataValue::Value(v)));
}
}
}
DataValue::Reference(sym) => {
object_code.extend(if let Some(p) = positions.get(sym) {
code_gen(pos, SingleDataValue::Value(*p))
} else {
// code reference
code_gen(pos, SingleDataValue::LabelReference(sym))
})
}
DataValue::Offset(l, r) => {
object_code.extend(code_gen(pos, SingleDataValue::Offset(l, r)));
let positions = {
let mut positions = BTreeMap::new();
let mut current_pos = writer.current_position();
for (name, data) in sections.iter().flatten() {
if let Some(name) = name {
positions.insert(name.clone(), current_pos);
}
for d in data.iter() {
current_pos += d.size(current_pos as usize) as u32;
}
}
positions
};
for (name, data) in sections.iter().flatten() {
if let Some(name) = name {
writer.set_label(name);
}
for item in data {
match &item {
DataValue::Zero(length) => {
// We can assume memory to be zero-initialized, so we
// just have to advance.
writer.advance(*length as u32);
}
DataValue::Direct(bytes) => {
writer.write_bytes(bytes);
}
DataValue::Reference(sym) => {
if let Some(p) = positions.get(sym) {
writer.write_bytes(&p.to_le_bytes());
} else {
// code reference
writer.write_label_reference(sym);
}
}
pos += item.size() as u32;
DataValue::Alignment(bytes) => {
writer.align(*bytes as u32);
}
DataValue::Offset(_l, _r) => unimplemented!(),
}
if let Some(first_line) = object_code.first_mut() {
*first_line = format!("// data {name}\n") + first_line;
}
object_code
})
.collect();
(code, positions)
}
}
(writer.finish(), positions)
}

View File

@@ -10,24 +10,34 @@ use crate::ast::{Argument, Expression, FunctionOpKind, Register, Statement};
/// Processes the statements and removes all statements and objects that are
/// not reachable from the label `label`.
/// Keeps the order of the statements.
pub fn filter_reachable_from<R: Register, F: FunctionOpKind, A: Architecture>(
pub fn filter_reachable_from<'a, R: Register, F: FunctionOpKind, A: Architecture>(
label: &str,
statements: &mut Vec<Statement<R, F>>,
objects: &mut BTreeMap<String, Vec<DataValue>>,
) {
data_sections: &'a mut Vec<Vec<(Option<String>, Vec<DataValue>)>>,
) -> HashSet<&'a str> {
let replacements = extract_replacements(statements);
let replacement_refs = replacements
.iter()
.map(|(k, v)| (k.as_str(), v.as_str()))
.collect();
let referenced_labels =
find_reachable_labels::<R, F, A>(label, statements, objects, &replacement_refs)
.into_iter()
.map(|s| s.to_owned())
.collect::<HashSet<_>>();
let (referenced_labels, referenced_data_sections) =
find_reachable_labels::<R, F, A>(label, statements, data_sections, &replacement_refs);
{
let mut iter_idx = 0usize;
data_sections.retain(|_| {
let must_retain = referenced_data_sections.contains(&iter_idx);
iter_idx += 1;
must_retain
});
}
let mut remaining_data_labels = HashSet::new();
for (name, value) in data_sections.iter_mut().flatten() {
if let Some(label) = name {
remaining_data_labels.insert(label.as_str());
}
objects.retain(|name, _value| referenced_labels.contains(name));
for (_name, value) in objects.iter_mut() {
apply_replacement_to_object(value, &replacement_refs)
}
@@ -42,10 +52,12 @@ pub fn filter_reachable_from<R: Register, F: FunctionOpKind, A: Architecture>(
true
} else {
if let Statement::Label(l) = &s {
active = referenced_labels.contains(l) && !objects.contains_key(l);
active = referenced_labels.contains(l)
&& !remaining_data_labels.contains(l.as_str());
}
active
};
if include {
apply_replacement_to_instruction(&mut s, &replacement_refs);
Some(s)
@@ -54,27 +66,44 @@ pub fn filter_reachable_from<R: Register, F: FunctionOpKind, A: Architecture>(
}
})
.collect();
remaining_data_labels
}
#[allow(clippy::print_stderr)]
pub fn find_reachable_labels<'a, R: Register, F: FunctionOpKind, A: Architecture>(
label: &'a str,
statements: &'a [Statement<R, F>],
objects: &'a mut BTreeMap<String, Vec<DataValue>>,
data_sections: &'a [Vec<(Option<String>, Vec<DataValue>)>],
replacements: &BTreeMap<&str, &'a str>,
) -> BTreeSet<&'a str> {
) -> (HashSet<String>, HashSet<usize>) {
// Maps each data label to the section they belong to
let all_data_labels: BTreeMap<&str, usize> = data_sections
.iter()
.enumerate()
.flat_map(|(section_idx, entries)| {
entries
.iter()
.filter_map(move |(name, _)| name.as_ref().map(|name| (name.as_str(), section_idx)))
})
.collect();
let label_offsets = extract_label_offsets(statements);
let mut queued_labels = BTreeSet::from([label]);
let mut processed_labels = BTreeSet::<&str>::new();
let mut processed_labels = HashSet::new();
let mut reached_data_sections = HashSet::new();
while let Some(l) = queued_labels.pop_first() {
let l = *replacements.get(l).unwrap_or(&l);
if !processed_labels.insert(l) {
if !processed_labels.insert(l.to_owned()) {
continue;
}
let new_references = if let Some(data_values) = objects.get(l) {
data_values
let new_references = if let Some(section_idx) = all_data_labels.get(l) {
reached_data_sections.insert(*section_idx);
let section = &data_sections[*section_idx];
section
.iter()
.flat_map(|(_, values)| values.iter())
.filter_map(|v| {
if let DataValue::Reference(sym) = v {
Some(sym.as_str())
@@ -86,7 +115,7 @@ pub fn find_reachable_labels<'a, R: Register, F: FunctionOpKind, A: Architecture
} else if let Some(offset) = label_offsets.get(l) {
let (referenced_labels_in_block, seen_labels_in_block) =
basic_block_references_starting_from::<R, F, A>(&statements[*offset..]);
processed_labels.extend(seen_labels_in_block);
processed_labels.extend(seen_labels_in_block.into_iter().map(|s| s.to_string()));
referenced_labels_in_block
} else {
eprintln!(
@@ -102,7 +131,7 @@ pub fn find_reachable_labels<'a, R: Register, F: FunctionOpKind, A: Architecture
}
}
processed_labels
(processed_labels, reached_data_sections)
}
fn extract_replacements<R: Register, F: FunctionOpKind>(

View File

@@ -1,7 +1,34 @@
use crate::ast::{Argument, Expression, FunctionOpKind, Register};
pub fn next_multiple_of_four(x: usize) -> usize {
((x + 3) / 4) * 4
pub fn next_aligned(val: usize, alignment: usize) -> usize {
// Alignment will probably always be a power of two, which can be aligned in
// a much faster bitwise operation. But then we would have to assert!() it,
// so it is just better to use the generic version.
((val + (alignment - 1)) / alignment) * alignment
}
/// Padding to next alignment boundary, in bytes.
pub fn alignment_size(from: usize, alignment: usize) -> usize {
let dest = next_aligned(from, alignment);
dest - from
}
/// Split an slice as before and after the first occurrence of an element.
///
/// The second return value is None if the element is not found.
pub fn split_at_first<'a, T: Eq>(s: &'a [T], elem: &T) -> (&'a [T], Option<&'a [T]>) {
match s.iter().position(|e| e == elem) {
Some(idx) => (&s[..idx], Some(&s[(idx + 1)..])),
None => (s, None),
}
}
/// Find the position of the next given element in an iterable.
pub fn find_position<T: Eq, I: Iterator<Item = T>>(
seq: impl IntoIterator<IntoIter = I>,
elem: T,
) -> Option<usize> {
seq.into_iter().position(|e| e == elem)
}
pub fn quote(s: &str) -> String {

View File

@@ -1,12 +1,12 @@
use std::{
collections::{BTreeMap, BTreeSet},
collections::{BTreeMap, BTreeSet, HashSet},
fmt,
};
use itertools::Itertools;
use powdr_asm_utils::{
ast::{BinaryOpKind, UnaryOpKind},
data_parser::{self, DataValue},
data_parser,
data_storage::{store_data_objects, SingleDataValue},
parser::parse_asm,
reachability::{self, symbols_in_args},
@@ -123,35 +123,25 @@ pub fn compile(
.map(|(name, contents)| (name, parse_asm(RiscParser::default(), &contents)))
.collect(),
);
let (mut objects, object_order) = data_parser::extract_data_objects(&statements);
assert_eq!(objects.keys().len(), object_order.len());
let mut data_sections = data_parser::extract_data_objects(&statements);
// Reduce to the code that is actually reachable from main
// (and the objects that are referred from there)
reachability::filter_reachable_from::<_, _, RiscvArchitecture>(
let data_labels = reachability::filter_reachable_from::<_, _, RiscvArchitecture>(
"__runtime_start",
&mut statements,
&mut objects,
&mut data_sections,
);
// Replace dynamic references to code labels
replace_dynamic_label_references(&mut statements, &objects);
replace_dynamic_label_references(&mut statements, &data_labels);
// Remove the riscv asm stub function, which is used
// for compilation, and will not be called.
statements = replace_coprocessor_stubs(statements, coprocessors).collect::<Vec<_>>();
let sorted_objects = object_order
.into_iter()
.filter_map(|n| {
let value = objects.get_mut(&n).map(std::mem::take);
value.map(|v| (n, v))
})
.collect::<Vec<_>>();
let (data_code, data_positions) = store_data_objects(
&sorted_objects,
data_start,
&mut |addr, value| match value {
let (data_code, data_positions) =
store_data_objects(data_sections, data_start, &mut |addr, value| match value {
SingleDataValue::Value(v) => {
vec![format!("mstore 0x{addr:x}, 0x{v:x};")]
}
@@ -179,8 +169,7 @@ pub fn compile(
]);
*/
}
},
);
});
let submachine_init = call_every_submachine(coprocessors);
let bootloader_lines = if with_bootloader {
@@ -241,10 +230,7 @@ pub fn compile(
/// Replace certain patterns of references to code labels by
/// special instructions. We ignore any references to data objects
/// because they will be handled differently.
fn replace_dynamic_label_references(
statements: &mut Vec<Statement>,
data_objects: &BTreeMap<String, Vec<DataValue>>,
) {
fn replace_dynamic_label_references(statements: &mut Vec<Statement>, data_labels: &HashSet<&str>) {
/*
Find patterns of the form
lui a0, %hi(LABEL)
@@ -272,7 +258,7 @@ fn replace_dynamic_label_references(
let mut to_delete = BTreeSet::default();
for (i1, i2) in instruction_indices.into_iter().tuple_windows() {
if let Some(r) =
replace_dynamic_label_reference(&statements[i1], &statements[i2], data_objects)
replace_dynamic_label_reference(&statements[i1], &statements[i2], data_labels)
{
to_delete.insert(i1);
statements[i2] = r;
@@ -286,7 +272,7 @@ fn replace_dynamic_label_references(
fn replace_dynamic_label_reference(
s1: &Statement,
s2: &Statement,
data_objects: &BTreeMap<String, Vec<DataValue>>,
data_labels: &HashSet<&str>,
) -> Option<Statement> {
let Statement::Instruction(instr1, args1) = s1 else {
return None;
@@ -314,7 +300,7 @@ fn replace_dynamic_label_reference(
let Expression::Symbol(label2) = expr2.as_ref() else {
return None;
};
if r1 != r3 || label1 != label2 || data_objects.contains_key(label1) {
if r1 != r3 || label1 != label2 || data_labels.contains(label1.as_str()) {
return None;
}
Some(Statement::Instruction(