assembly_parser

This commit is contained in:
Andrew Morris
2023-03-05 08:43:04 +11:00
parent 372d9c9b36
commit 2dc78c2002
5 changed files with 864 additions and 7 deletions

View File

@@ -56,6 +56,7 @@ impl std::fmt::Display for DefinitionRef {
}
}
#[derive(Default)]
pub struct Function {
pub parameters: Vec<Register>,
pub body: Vec<InstructionOrLabel>,
@@ -413,13 +414,14 @@ pub enum Value {
Void,
Undefined,
Null,
Boolean(bool),
Bool(bool),
Number(f64),
String(String),
Array(Box<Array>),
Object(Box<Object>),
Register(Register),
DefinitionRef(DefinitionRef),
Builtin(Builtin),
}
impl std::fmt::Display for Value {
@@ -428,7 +430,7 @@ impl std::fmt::Display for Value {
Value::Void => write!(f, "void"),
Value::Undefined => write!(f, "undefined"),
Value::Null => write!(f, "null"),
Value::Boolean(value) => write!(f, "{}", value),
Value::Bool(value) => write!(f, "{}", value),
Value::Number(value) => write!(f, "{}", value),
Value::String(value) => write!(
f,
@@ -439,10 +441,22 @@ impl std::fmt::Display for Value {
Value::Object(value) => write!(f, "{}", value),
Value::Register(value) => write!(f, "{}", value),
Value::DefinitionRef(value) => write!(f, "{}", value),
Value::Builtin(value) => write!(f, "{}", value),
}
}
}
pub struct Builtin {
pub name: String,
}
impl std::fmt::Display for Builtin {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "${}", self.name)
}
}
#[derive(Default)]
pub struct Array {
pub values: Vec<Value>,
}
@@ -460,6 +474,7 @@ impl std::fmt::Display for Array {
}
}
#[derive(Default)]
pub struct Object {
pub properties: Vec<(Value, Value)>,
}

View File

@@ -1,7 +1,7 @@
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use crate::asm::{
Array, Class, Definition, DefinitionContent, DefinitionRef, Function, Instruction,
Array, Builtin, Class, Definition, DefinitionContent, DefinitionRef, Function, Instruction,
InstructionOrLabel, Label, LabelRef, Module, Object, Register, Value,
};
@@ -64,7 +64,15 @@ impl Assembler {
self.output.push(function.parameters.len() as u8);
let mut param_set = HashSet::<Register>::new();
for parameter in &function.parameters {
let inserted = param_set.insert(parameter.clone());
if !inserted {
panic!("Duplicate parameter: {}", parameter);
}
self.register(parameter);
}
@@ -179,7 +187,7 @@ impl Assembler {
}
Value::Number(number) => self.number(*number),
Value::String(string) => self.string(string),
Value::Boolean(boolean) => match boolean {
Value::Bool(boolean) => match boolean {
false => self.output.push(ValueType::False as u8),
true => self.output.push(ValueType::True as u8),
},
@@ -189,6 +197,7 @@ impl Assembler {
Value::Array(array) => self.array(array),
Value::Object(object) => self.object(object),
Value::DefinitionRef(definition_ref) => self.definition_ref(definition_ref),
Value::Builtin(builtin) => self.builtin(builtin),
}
}
@@ -274,6 +283,18 @@ impl Assembler {
.add_unresolved(LocationRef::DefinitionRef(value.clone()), &mut self.output);
}
fn builtin(&mut self, builtin: &Builtin) {
self.output.push(ValueType::Builtin as u8);
let builtin_code = match builtin.name.as_str() {
"Math" => 0,
"Debug" => 1,
_ => panic!("Unknown builtin: {}", builtin.name),
};
self.varsize_uint(builtin_code);
}
fn array(&mut self, array: &Array) {
self.output.push(ValueType::Array as u8);
self.varsize_uint(array.values.len());

View File

@@ -0,0 +1,820 @@
use std::collections::HashMap;
use std::str::FromStr;
use crate::asm::{
Array, Builtin, Class, Definition, DefinitionContent, DefinitionRef, Function, Instruction,
InstructionOrLabel, Label, LabelRef, Module, Object, Register, Value,
};
struct AssemblyParser<'a> {
content: &'a str,
pos: std::iter::Peekable<std::str::Chars<'a>>,
}
impl<'a> AssemblyParser<'a> {
fn module(&mut self) -> Module {
let mut definitions = Vec::<Definition>::new();
loop {
self.parse_optional_whitespace();
if self.pos.peek().is_none() {
break;
}
definitions.push(self.assemble_definition());
}
Module { definitions }
}
fn get_pos_index(&self) -> usize {
let mut start = self.content.chars();
let mut i = 0_usize;
loop {
if start.clone().eq(self.pos.clone()) {
return i;
}
i += 1;
start.next();
}
}
fn test_chars(&self, chars: &str) -> bool {
let mut pos = self.pos.clone();
for c in chars.chars() {
if pos.next() != Some(c) {
return false;
}
}
return true;
}
fn parse_optional_whitespace(&mut self) {
loop {
match self.pos.peek() {
Some(' ') => {}
Some('\n') => {}
_ => {
return;
}
}
self.pos.next();
}
}
fn assemble_definition(&mut self) -> Definition {
self.parse_exact("@");
let def_name = self.parse_identifier();
self.parse_optional_whitespace();
self.parse_exact("=");
self.parse_optional_whitespace();
let c = *self.pos.peek().expect("Expected value for definition");
let content = match c {
'f' => DefinitionContent::Function(self.assemble_function()),
'c' => DefinitionContent::Class(self.assemble_class()),
_ => DefinitionContent::Value(self.assemble_value()),
};
Definition {
ref_: DefinitionRef { name: def_name },
content,
}
}
fn parse_instruction_word(&mut self) -> InstructionByte {
let instruction_word_map: HashMap<&str, InstructionByte> = HashMap::from([
("end", InstructionByte::End),
("mov", InstructionByte::Mov),
("op++", InstructionByte::OpInc),
("op--", InstructionByte::OpDec),
("op+", InstructionByte::OpPlus),
("op-", InstructionByte::OpMinus),
("op*", InstructionByte::OpMul),
("op/", InstructionByte::OpDiv),
("op%", InstructionByte::OpMod),
("op**", InstructionByte::OpExp),
("op==", InstructionByte::OpEq),
("op!=", InstructionByte::OpNe),
("op===", InstructionByte::OpTripleEq),
("op!==", InstructionByte::OpTripleNe),
("op&&", InstructionByte::OpAnd),
("op||", InstructionByte::OpOr),
("op!", InstructionByte::OpNot),
("op<", InstructionByte::OpLess),
("op<=", InstructionByte::OpLessEq),
("op>", InstructionByte::OpGreater),
("op>=", InstructionByte::OpGreaterEq),
("op??", InstructionByte::OpNullishCoalesce),
("op?.", InstructionByte::OpOptionalChain),
("op&", InstructionByte::OpBitAnd),
("op|", InstructionByte::OpBitOr),
("op~", InstructionByte::OpBitNot),
("op^", InstructionByte::OpBitXor),
("op<<", InstructionByte::OpLeftShift),
("op>>", InstructionByte::OpRightShift),
("op>>>", InstructionByte::OpRightShiftUnsigned),
("typeof", InstructionByte::TypeOf),
("instanceof", InstructionByte::InstanceOf),
("in", InstructionByte::In),
("call", InstructionByte::Call),
("apply", InstructionByte::Apply),
("bind", InstructionByte::Bind),
("sub", InstructionByte::Sub),
("submov", InstructionByte::SubMov),
("subcall", InstructionByte::SubCall),
("jmp", InstructionByte::Jmp),
("jmpif", InstructionByte::JmpIf),
("unary+", InstructionByte::UnaryPlus),
("unary-", InstructionByte::UnaryMinus),
("new", InstructionByte::New),
]);
for (word, instruction) in instruction_word_map {
if self.test_instruction_word(word) {
advance_chars(&mut self.pos, word.len() + 1);
self.parse_optional_whitespace();
return instruction;
}
}
panic!("Failed to parse instruction at {}", self.get_pos_index());
}
fn test_instruction_word(&self, word: &str) -> bool {
let mut pos = self.pos.clone();
let has_chars = self.test_chars(word);
if !has_chars {
return false;
}
advance_chars(&mut pos, word.len());
return match pos.next() {
None => true,
Some(' ') => true,
Some('\n') => true,
_ => false,
};
}
fn test_identifier(&self) -> Option<String> {
let start = self.pos.clone();
let mut pos = start;
let mut res = "".to_string();
let leading_char = match pos.next() {
None => {
return None;
}
Some(c) => c,
};
if !is_leading_identifier_char(leading_char) {
return None;
}
res.push(leading_char);
loop {
match pos.next() {
None => {
break;
}
Some(c) => {
if !is_identifier_char(c) {
break;
}
res.push(c);
}
};
}
return Some(res);
}
fn parse_identifier(&mut self) -> String {
let optional_identifier = self.test_identifier();
if optional_identifier.is_none() {
panic!("Invalid identifier at {}", self.get_pos_index());
}
let identifier = optional_identifier.unwrap();
advance_chars(&mut self.pos, identifier.len());
return identifier;
}
fn parse_exact(&mut self, chars: &str) {
for c in chars.chars() {
if self.pos.next() != Some(c) {
panic!("Expected '{}' at {}", c, self.get_pos_index());
}
}
}
fn parse_one_of(&mut self, options: &[&str]) -> String {
for opt in options {
if self.test_chars(opt) {
advance_chars(&mut self.pos, opt.len());
return opt.to_string();
}
}
// FIXME: How best to display options here?
panic!("Expected one of (options) at {}", self.get_pos_index());
}
fn parse_string_literal(&mut self) -> String {
let mut result = "".to_string();
self.parse_exact("\"");
let mut escaping = false;
loop {
let oc = self.pos.next();
if oc.is_none() {
break;
}
let c = oc.unwrap();
if escaping {
if c == '\\' {
result.push('\\');
} else if c == '"' {
result.push('"');
} else if c == 'n' {
result.push('\n');
} else if c == 't' {
result.push('\t');
} else {
panic!("Unimplemented escape sequence at {}", self.get_pos_index());
}
escaping = false;
} else if c == '\\' {
escaping = true;
} else if c == '"' {
break;
} else {
result.push(c);
}
}
if escaping {
panic!(
"Unexpected end of input after escape character at {}",
self.get_pos_index(),
);
}
return result;
}
fn assemble_function(&mut self) -> Function {
let mut function = Function::default();
self.parse_exact("function(");
loop {
self.parse_optional_whitespace();
let mut next = self.parse_one_of(&["%", ")"]);
if next == ")" {
break;
}
if next != "%" {
panic!("Expected this to be impossible");
}
let param_name = self.parse_identifier();
function
.parameters
.push(Register::Named(param_name.clone()));
next = self.parse_one_of(&[",", ")"]);
if next == ")" {
break;
}
}
self.parse_optional_whitespace();
self.parse_exact("{");
loop {
self.parse_optional_whitespace();
let c = *self
.pos
.peek()
.expect("Expected instruction, label, or end of function");
if c == '}' {
self.pos.next();
break;
}
let optional_label = self.test_label();
if optional_label.is_some() {
function.body.push(InstructionOrLabel::Label(
self.assemble_label(optional_label.unwrap()),
));
} else {
function
.body
.push(InstructionOrLabel::Instruction(self.assemble_instruction()));
}
}
function
}
fn assemble_class(&mut self) -> Class {
self.parse_exact("class(");
self.parse_optional_whitespace();
let constructor = self.assemble_value();
self.parse_optional_whitespace();
self.parse_exact(",");
self.parse_optional_whitespace();
let methods = self.assemble_value();
self.parse_optional_whitespace();
self.parse_exact(")");
Class {
constructor,
methods,
}
}
fn assemble_instruction(&mut self) -> Instruction {
use InstructionByte::*;
let instr = self.parse_instruction_word();
match instr {
End => Instruction::End,
Mov => Instruction::Mov(self.assemble_value(), self.assemble_register()),
OpInc => Instruction::OpInc(self.assemble_register()),
OpDec => Instruction::OpDec(self.assemble_register()),
OpPlus => Instruction::OpPlus(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpMinus => Instruction::OpMinus(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpMul => Instruction::OpMul(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpDiv => Instruction::OpDiv(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpMod => Instruction::OpMod(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpExp => Instruction::OpExp(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpEq => Instruction::OpEq(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpNe => Instruction::OpNe(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpTripleEq => Instruction::OpTripleEq(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpTripleNe => Instruction::OpTripleNe(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpAnd => Instruction::OpAnd(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpOr => Instruction::OpOr(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpNot => Instruction::OpNot(self.assemble_value(), self.assemble_register()),
OpLess => Instruction::OpLess(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpLessEq => Instruction::OpLessEq(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpGreater => Instruction::OpGreater(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpGreaterEq => Instruction::OpGreaterEq(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpNullishCoalesce => Instruction::OpNullishCoalesce(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpOptionalChain => Instruction::OpOptionalChain(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpBitAnd => Instruction::OpBitAnd(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpBitOr => Instruction::OpBitOr(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpBitNot => Instruction::OpBitNot(self.assemble_value(), self.assemble_register()),
OpBitXor => Instruction::OpBitXor(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpLeftShift => Instruction::OpLeftShift(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpRightShift => Instruction::OpRightShift(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
OpRightShiftUnsigned => Instruction::OpRightShiftUnsigned(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
TypeOf => Instruction::TypeOf(self.assemble_value(), self.assemble_register()),
InstanceOf => Instruction::InstanceOf(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
In => Instruction::In(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
Call => Instruction::Call(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
Apply => Instruction::Apply(
self.assemble_value(),
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
Bind => Instruction::Bind(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
Sub => Instruction::Sub(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
SubMov => Instruction::SubMov(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
SubCall => Instruction::SubCall(
self.assemble_value(),
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
Jmp => Instruction::Jmp(self.assemble_label_read()),
JmpIf => Instruction::JmpIf(self.assemble_value(), self.assemble_label_read()),
UnaryPlus => Instruction::UnaryPlus(self.assemble_value(), self.assemble_register()),
UnaryMinus => Instruction::UnaryMinus(self.assemble_value(), self.assemble_register()),
New => Instruction::New(
self.assemble_value(),
self.assemble_value(),
self.assemble_register(),
),
}
}
fn assemble_value(&mut self) -> Value {
self.parse_optional_whitespace();
match self.pos.peek() {
None => panic!("Expected value at {}", self.get_pos_index()),
Some('%') => Value::Register(self.assemble_register()),
Some('@') => {
self.parse_exact("@");
let name = self.parse_identifier();
Value::DefinitionRef(DefinitionRef { name })
}
Some('$') => Value::Builtin(self.assemble_builtin()),
Some('[') => Value::Array(Box::new(self.assemble_array())),
Some('-' | '.' | '0'..='9') => Value::Number(self.assemble_number()),
Some('"') => Value::String(self.parse_string_literal()),
Some('{') => Value::Object(Box::new(self.assemble_object())),
Some(ref_c) => {
let c = *ref_c;
let parsed = self.parse_one_of(&["void", "undefined", "null", "false", "true", ""]);
match parsed.as_str() {
"void" => Value::Void,
"undefined" => Value::Undefined,
"null" => Value::Null,
"false" => Value::Bool(false),
"true" => Value::Bool(true),
// TODO: Finish implementing the different values
_ => panic!(
"Unimplemented value type or unexpected character {} at {}",
c,
self.get_pos_index(),
),
}
}
}
}
fn assemble_array(&mut self) -> Array {
let mut array = Array::default();
self.parse_optional_whitespace();
self.parse_exact("[");
loop {
self.parse_optional_whitespace();
match self.pos.peek() {
None => panic!("Expected value or array end at {}", self.get_pos_index()),
Some(']') => break array,
_ => {}
}
array.values.push(self.assemble_value());
self.parse_optional_whitespace();
let next = self.parse_one_of(&[",", "]"]);
if next == "," {
self.pos.next(); // TODO: Assert whitespace
continue;
}
if next == "]" {
self.parse_optional_whitespace();
break array;
}
panic!("Expected this to be impossible");
}
}
fn assemble_register(&mut self) -> Register {
self.parse_optional_whitespace();
self.parse_exact("%");
let name = self.parse_identifier();
Register::Named(name)
}
fn assemble_builtin(&mut self) -> Builtin {
match self.parse_one_of(&["$Math", "$Debug"]).as_str() {
"$Math" => Builtin {
name: "Math".to_string(),
},
"$Debug" => Builtin {
name: "Debug".to_string(),
},
_ => panic!("Shouldn't happen"),
}
}
fn test_label(&self) -> Option<String> {
let optional_identifier = self.test_identifier();
if optional_identifier.is_none() {
return None;
}
let identifier = optional_identifier.unwrap();
let mut pos = self.pos.clone();
advance_chars(&mut pos, identifier.len());
if pos.next() == Some(':') {
return Some(identifier);
}
return None;
}
fn assemble_label(&mut self, name: String) -> Label {
self.parse_optional_whitespace();
advance_chars(&mut self.pos, name.len() + 1);
Label { name }
}
fn assemble_label_read(&mut self) -> LabelRef {
self.parse_optional_whitespace();
self.parse_exact(":");
let name = self.parse_identifier();
LabelRef { name }
}
fn assemble_number(&mut self) -> f64 {
let mut num_string = "".to_string();
loop {
match self.pos.peek() {
Some('-' | '.' | 'e' | '0'..='9') => {
num_string.push(self.pos.next().unwrap());
}
_ => {
break;
}
}
}
let value_result = f64::from_str(num_string.as_str());
if value_result.is_err() {
panic!("Expected valid number at {}", self.get_pos_index());
}
value_result.unwrap()
}
fn assemble_object(&mut self) -> Object {
let mut object = Object::default();
self.parse_exact("{");
loop {
self.parse_optional_whitespace();
let mut c = *self.pos.peek().expect("Expected object content or end");
let key = match c {
'"' => Value::String(self.parse_string_literal()),
'%' => Value::Register(self.assemble_register()),
'@' => {
self.parse_exact("@");
let name = self.parse_identifier();
Value::DefinitionRef(DefinitionRef { name })
}
'}' => break object,
_ => panic!("Unexpected character {} at {}", c, self.get_pos_index()),
};
self.parse_optional_whitespace();
self.parse_exact(":");
let value = self.assemble_value();
object.properties.push((key, value));
self.parse_optional_whitespace();
c = *self.pos.peek().expect("Expected comma or object end");
match c {
',' => {
self.pos.next();
}
'}' => {
self.pos.next();
break object;
}
_ => panic!("Unexpected character {} at {}", c, self.get_pos_index()),
}
}
}
}
pub fn parse_module(content: &str) -> Module {
let mut assembler = AssemblyParser {
content,
pos: content.chars().peekable(),
};
assembler.module()
}
#[derive(Debug, Clone)]
enum InstructionByte {
End = 0x00,
Mov = 0x01,
OpInc = 0x02,
OpDec = 0x03,
OpPlus = 0x04,
OpMinus = 0x05,
OpMul = 0x06,
OpDiv = 0x07,
OpMod = 0x08,
OpExp = 0x09,
OpEq = 0x0a,
OpNe = 0x0b,
OpTripleEq = 0x0c,
OpTripleNe = 0x0d,
OpAnd = 0x0e,
OpOr = 0x0f,
OpNot = 0x10,
OpLess = 0x11,
OpLessEq = 0x12,
OpGreater = 0x13,
OpGreaterEq = 0x14,
OpNullishCoalesce = 0x15,
OpOptionalChain = 0x16,
OpBitAnd = 0x17,
OpBitOr = 0x18,
OpBitNot = 0x19,
OpBitXor = 0x1a,
OpLeftShift = 0x1b,
OpRightShift = 0x1c,
OpRightShiftUnsigned = 0x1d,
TypeOf = 0x1e,
InstanceOf = 0x1f,
In = 0x20,
Call = 0x21,
Apply = 0x22,
Bind = 0x23,
Sub = 0x24,
SubMov = 0x25,
SubCall = 0x26,
Jmp = 0x27,
JmpIf = 0x28,
UnaryPlus = 0x29,
UnaryMinus = 0x2a,
New = 0x2b,
}
fn is_leading_identifier_char(c: char) -> bool {
return c == '_' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}
fn is_identifier_char(c: char) -> bool {
return c == '_' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}
fn advance_chars(iter: &mut std::iter::Peekable<std::str::Chars>, len: usize) {
for _ in 0..len {
iter.next();
}
}

View File

@@ -1,6 +1,7 @@
mod asm;
mod assemble;
mod assembler;
mod assembly_parser;
mod capture_finder;
mod compile;
mod diagnostic;