powdr/asm-utils/src/data_parser.rs

use crate::{
    ast::{Argument, BinaryOpKind, Expression, FunctionOpKind, Register, Statement},
    utils::{alignment_size, split_at_first},
};

#[derive(Debug)]
pub enum DataValue {
    Direct(Vec<u8>),
    Zero(usize),
    // alignment size and the byte value used as padding
    Alignment(usize, u8),
    Reference(String),
    // This is needed for .word directives such as
    // .word	.Lfunc_begin0-.Lfunc_begin0
    Offset(String, String),
}

impl DataValue {
    /// Returns the size of the value in bytes.
    ///
    /// The address is necessary because the size of the alignment padding
    /// depends on what address it is defined on.
    pub fn size(&self, from_addr: usize) -> usize {
        match self {
            DataValue::Direct(data) => data.len(),
            DataValue::Zero(length) => *length,
            DataValue::Alignment(bytes, _) => alignment_size(from_addr, *bytes),
            DataValue::Reference(_) => 4,
            DataValue::Offset(..) => 4,
        }
    }
}

#[derive(Default)]
struct DataSections {
    /// This is a vector of sections, where each section is a vector of (maybe
    /// named) labels, which in turn contains a sequence of data values.
    ///
    /// I weighted against making this and a potential `struct Section` part of
    /// the public API because the users would need to know and access all the
    /// internals anyway, so it wouldn't be abstracting away any complexity.
    sections: Vec<Vec<(Option<String>, Vec<DataValue>)>>,
}

impl DataSections {
    fn new() -> Self {
        Default::default()
    }

    fn current_entry(&mut self) -> &mut Vec<DataValue> {
        let last_section = self.sections.last_mut().unwrap();
        if last_section.is_empty() {
            last_section.push((None, Vec::new()))
        }
        &mut last_section.last_mut().unwrap().1
    }

    fn append_label_to_curr_section(&mut self, label: &str) {
        let last_section = self.sections.last_mut().unwrap();
        last_section.push((Some(label.to_owned()), Vec::new()));
    }

    fn append_section(&mut self) {
        self.sections.push(Vec::new())
    }
}

/// Extract all data objects from the list of statements.
/// Returns the named data objects themselves and a vector of the names
/// in the order in which they occur in the statements.
pub fn extract_data_objects<R: Register, F: FunctionOpKind>(
    statements: &[Statement<R, F>],
) -> Vec<Vec<(Option<String>, Vec<DataValue>)>> {
    let mut data = DataSections::new();

    let mut is_in_data_section = false;

    for s in statements {
        match s {
            Statement::Label(l) => {
                if is_in_data_section {
                    data.append_label_to_curr_section(l);
                }
            }
            Statement::Directive(dir, args) => match (dir.as_str(), &args[..]) {
                (".text", args) => {
                    assert!(args.is_empty());
                    is_in_data_section = false;
                }
                (".data", args) => {
                    assert!(args.is_empty());
                    is_in_data_section = true;
                    data.append_section();
                }
                (".section", args) => {
                    is_in_data_section = is_data_section(&args[0]);
                    if is_in_data_section {
                        data.append_section();
                    }
                }
                (
                    ".zero" | ".ascii" | ".asciz" | ".dword" | ".word" | ".half" | ".hword"
                    | ".short" | ".byte",
                    args,
                ) => {
                    if is_in_data_section {
                        data.current_entry()
                            .extend(extract_data_value(dir.as_str(), args));
                    } else {
                        // This is most likely debug data.
                    }
                }
                (".balign", [Argument::Expression(Expression::Number(byte_size))]) => {
                    if is_in_data_section {
                        data.current_entry()
                            .push(DataValue::Alignment(*byte_size as usize, 0));
                    }
                }
                (
                    ".balign",
                    [Argument::Expression(Expression::Number(byte_size)), Argument::Expression(Expression::Number(pad_value))],
                ) => {
                    if is_in_data_section {
                        data.current_entry()
                            .push(DataValue::Alignment(*byte_size as usize, *pad_value as u8));
                    }
                }
                (".p2align", [Argument::Expression(Expression::Number(pow_of_2))]) => {
                    if is_in_data_section {
                        data.current_entry()
                            .push(DataValue::Alignment((1 << pow_of_2) as usize, 0));
                    }
                }
                (
                    ".p2align",
                    [Argument::Expression(Expression::Number(pow_of_2)), Argument::Expression(Expression::Number(pad_value))],
                ) => {
                    if is_in_data_section {
                        data.current_entry().push(DataValue::Alignment(
                            (1 << pow_of_2) as usize,
                            *pad_value as u8,
                        ));
                    }
                }
                (n @ ".balign" | n @ ".p2align", arg) => {
                    // TODO: implement last optional argument of .balign and .p2align
                    unimplemented!("{n} {arg:?}");
                }
                _ => {}
            },
            _ => {}
        }
    }
    data.sections
}

fn is_data_section<R: Register, F: FunctionOpKind>(arg: &Argument<R, F>) -> bool {
    let full_name = match arg {
        Argument::StringLiteral(name) => name.as_slice(),
        Argument::Expression(Expression::Symbol(name)) => name.as_bytes(),
        _ => return false,
    };

    // split out the part before the initial '.'
    let name = split_at_first(full_name, &b'.').1.unwrap();

    // isolate name until next '.'
    let name = split_at_first(name, &b'.').0;

    matches!(
        name,
        b"sbss" | b"tbss" | b"bss" | b"sdata" | b"tdata" | b"rodata" | b"data" | b"data1"
    )
}

fn extract_data_value<R: Register, F: FunctionOpKind>(
    directive: &str,
    arguments: &[Argument<R, F>],
) -> Vec<DataValue> {
    match (directive, arguments) {
        (".zero", [Argument::Expression(Expression::Number(n))]) => {
            vec![DataValue::Zero(*n as usize)]
        }
        (
            ".zero",
            [Argument::Expression(Expression::Number(n)), Argument::Expression(Expression::Number(value))],
        ) => {
            assert!(0 <= *value && *value <= 0xff);
            vec![DataValue::Direct(vec![*value as u8; *n as usize])]
        }
        (".ascii", [Argument::StringLiteral(data)]) => {
            vec![DataValue::Direct(data.clone())]
        }
        (".asciz", [Argument::StringLiteral(data)]) => {
            let mut data = data.clone();
            data.push(0);
            vec![DataValue::Direct(data)]
        }
        (".dword" | ".half" | ".hword" | ".short" | ".byte", data) => {
            let len = match directive {
                ".dword" => 8,
                ".byte" => 1,
                _ => 2,
            };

            let mut bytes = Vec::with_capacity(data.len() * len);
            for arg in data {
                let Argument::Expression(Expression::Number(n)) = arg else {
                    panic!("only literals are supported for .{directive}");
                };
                for byte in 0..len {
                    bytes.push((n >> (byte * 8) & 0xff) as u8);
                }
            }

            vec![DataValue::Direct(bytes)]
        }
        (".word", data) => data
            .iter()
            .map(|x| match x {
                Argument::Expression(Expression::Number(n)) => {
                    let n = *n as u32;
                    DataValue::Direct(vec![
                        (n & 0xff) as u8,
                        (n >> 8 & 0xff) as u8,
                        (n >> 16 & 0xff) as u8,
                        (n >> 24 & 0xff) as u8,
                    ])
                }
                Argument::Expression(Expression::Symbol(sym)) => DataValue::Reference(sym.clone()),
                Argument::Expression(Expression::BinaryOp(BinaryOpKind::Sub, args)) => {
                    match args.as_slice() {
                        [Expression::Symbol(a), Expression::Symbol(b)] => {
                            DataValue::Offset(a.to_string(), b.to_string())
                        }
                        _ => panic!("Invalid .word directive"),
                    }
                }
                _ => panic!("Invalid .word directive"),
            })
            .collect::<Vec<DataValue>>(),
        _ => panic!(),
    }
}