Refactor to object oriented assembler

This commit is contained in:
Andrew Morris
2022-04-26 15:47:48 +10:00
parent 44a8aa80c8
commit a791329f51

View File

@@ -40,21 +40,306 @@ fn show_help() {
println!(" vstc assemble <file>");
}
fn assemble(content: &str) -> std::vec::Vec<u8> {
let mut output: Vec<u8> = Vec::new();
let mut pos: usize = 0;
struct AssemblerData {
content: String, // TODO: Avoid copying this in
pos: usize,
output: Vec<u8>,
}
loop {
parse_optional_whitespace(content, &mut pos);
trait Assembler {
fn run(&mut self);
fn content_at(&self, pos: usize) -> char;
fn test_chars(&self, chars: &str) -> bool;
fn parse_optional_whitespace(&mut self);
fn assemble_definition(&mut self);
fn parse_instruction_word(&mut self) -> Instruction;
fn test_instruction_word(&self, word: &str) -> bool;
fn parse_identifier(&mut self) -> String;
fn parse_exact(&mut self, chars: &str);
fn parse_optional_exact(&mut self, chars: &str) -> bool;
fn parse_one_of(&mut self, options: &[&str]) -> String;
fn assemble_function(&mut self);
fn assemble_instruction(&mut self);
fn skip_line(&mut self);
}
if pos >= content.len() {
break;
impl Assembler for AssemblerData {
fn run(&mut self) {
loop {
self.parse_optional_whitespace();
if self.pos >= self.content.len() {
break;
}
self.assemble_definition();
}
assemble_definition(content, &mut pos, &mut output);
}
return output;
fn content_at(&self, pos: usize) -> char {
return self.content.chars().nth(pos).unwrap();
}
fn test_chars(&self, chars: &str) -> bool {
let mut pos = self.pos;
for c in chars.chars() {
if pos >= self.content.len() || self.content_at(pos) != c {
return false;
}
pos += 1;
}
return true;
}
fn parse_optional_whitespace(&mut self) {
while self.pos < self.content.len() {
let c = self.content_at(self.pos);
if c != ' ' && c != '\n' {
return;
}
self.pos += 1;
}
}
fn assemble_definition(&mut self) {
self.parse_exact("@");
let def_name = self.parse_identifier();
println!("assembling {}", def_name);
self.parse_optional_whitespace();
self.parse_exact("=");
self.parse_optional_whitespace();
// TODO: Handle other kinds of definitions
self.assemble_function();
}
fn parse_instruction_word(&mut self) -> Instruction {
let instruction_word_map: HashMap<&str, Instruction> = HashMap::from([
("end", Instruction::End),
("mov", Instruction::Mov),
("op++", Instruction::OpInc),
("op--", Instruction::OpDec),
("op+", Instruction::OpPlus),
("op-", Instruction::OpMinus),
("op*", Instruction::OpMul),
("op/", Instruction::OpDiv),
("op%", Instruction::OpMod),
("op**", Instruction::OpExp),
("op==", Instruction::OpEq),
("op!=", Instruction::OpNe),
("op===", Instruction::OpTripleEq),
("op!==", Instruction::OpTripleNe),
("op&&", Instruction::OpAnd),
("op||", Instruction::OpOr),
("op!", Instruction::OpNot),
("op<", Instruction::OpLess),
("op<=", Instruction::OpLessEq),
("op>", Instruction::OpGreater),
("op>=", Instruction::OpGreaterEq),
("op??", Instruction::OpNullishCoalesce),
("op?.", Instruction::OpOptionalChain),
("op&", Instruction::OpBitAnd),
("op|", Instruction::OpBitOr),
("op~", Instruction::OpBitNot),
("op^", Instruction::OpBitXor),
("op<<", Instruction::OpLeftShift),
("op>>", Instruction::OpRightShift),
("op>>>", Instruction::OpRightShiftUnsigned),
("typeof", Instruction::TypeOf),
("instanceof", Instruction::InstanceOf),
("in", Instruction::In),
("call", Instruction::Call),
("apply", Instruction::Apply),
("bind", Instruction::Bind),
("sub", Instruction::Sub),
("submov", Instruction::SubMov),
("subcall", Instruction::SubCall),
("jmp", Instruction::Jmp),
("jmpif", Instruction::JmpIf),
]);
for (word, instruction) in instruction_word_map {
if self.test_instruction_word(word) {
self.pos += word.len() + 1;
self.parse_optional_whitespace();
return instruction;
}
}
std::panic!("Failed to parse instruction at {}", self.pos);
}
fn test_instruction_word(&self, word: &str) -> bool {
let mut pos = self.pos;
let has_chars = self.test_chars(word);
if !has_chars {
return false;
}
pos += word.len();
if pos >= self.content.len() {
return true;
}
let ch = self.content_at(pos);
return ch == ' ' || ch == '\n';
}
fn parse_identifier(&mut self) -> String {
let start = self.pos;
let leading_char = self.content_at(start);
if !is_leading_identifier_char(leading_char) {
std::panic!("Invalid identifier at {}", self.pos);
}
self.pos += 1;
while self.pos < self.content.len() {
let c = self.content_at(self.pos);
if !is_identifier_char(c) {
break;
}
self.pos += 1;
}
unsafe {
return self.content.get_unchecked(start..self.pos).to_string();
}
}
fn parse_exact(&mut self, chars: &str) {
for c in chars.chars() {
if self.pos >= self.content.len() || self.content_at(self.pos) != c {
std::panic!("Expected '{}' at {}", c, self.pos);
}
self.pos += 1;
}
}
fn parse_optional_exact(&mut self, chars: &str) -> bool {
if self.test_chars(chars) {
self.pos += chars.len();
return true;
}
return false;
}
fn parse_one_of(&mut self, options: &[&str]) -> String {
for opt in options {
if self.test_chars(opt) {
self.pos += opt.len();
return opt.to_string();
}
}
// FIXME: How best to display options here?
std::panic!("Expected one of (options) at {}", self.pos);
}
fn assemble_function(&mut self) {
self.parse_exact("function(");
self.output.push(ValueType::Function as u8);
let mut register_names: Vec<String> = Vec::from([
"return".to_string(),
"this".to_string(),
]);
let mut param_names: HashSet<String> = HashSet::new();
loop {
self.parse_optional_whitespace();
let mut next = self.parse_one_of(&["%", ")"]);
if next == ")" {
self.output.push(0xff); // TODO: This byte should be the number of registers
self.output.push(param_names.len() as u8); // TODO: Handle >255 params
break;
}
if next != "%" {
std::panic!("Expected this to be impossible");
}
let param_name = self.parse_identifier();
param_names.insert(param_name.clone());
register_names.push(param_name);
self.parse_optional_whitespace();
next = self.parse_one_of(&[",", ")"]);
if next == ")" {
self.output.push(0xff); // TODO: This byte should be the number of registers
self.output.push(param_names.len() as u8); // TODO: Handle >255 params
break;
}
}
self.parse_optional_whitespace();
self.parse_exact("{");
loop {
self.parse_optional_whitespace();
let c = self.content.chars().nth(self.pos);
if c == None {
std::panic!("Expected instruction or end of function at {}", self.pos);
}
if c.unwrap() == '}' {
self.output.push(Instruction::End as u8);
self.pos += 1;
break;
}
self.assemble_instruction();
}
}
fn assemble_instruction(&mut self) {
let instr = self.parse_instruction_word();
println!("Skipping instruction {:?}", instr);
self.skip_line();
}
fn skip_line(&mut self) {
while self.pos < self.content.len() {
let c = self.content_at(self.pos);
self.pos += 1;
if c == '\n' {
return;
}
}
std::panic!("Reached end of file looking for newline");
}
}
fn assemble(content: &str) -> Vec<u8> {
let mut assembler = AssemblerData {
content: content.to_string(),
pos: 0,
output: Vec::new(),
};
assembler.run();
return assembler.output;
}
#[derive(Debug)]
@@ -102,129 +387,6 @@ enum Instruction {
JmpIf = 0x28,
}
fn parse_instruction_word(content: &str, pos: &mut usize) -> Instruction {
let instruction_word_map: HashMap<&str, Instruction> = HashMap::from([
("end", Instruction::End),
("mov", Instruction::Mov),
("op++", Instruction::OpInc),
("op--", Instruction::OpDec),
("op+", Instruction::OpPlus),
("op-", Instruction::OpMinus),
("op*", Instruction::OpMul),
("op/", Instruction::OpDiv),
("op%", Instruction::OpMod),
("op**", Instruction::OpExp),
("op==", Instruction::OpEq),
("op!=", Instruction::OpNe),
("op===", Instruction::OpTripleEq),
("op!==", Instruction::OpTripleNe),
("op&&", Instruction::OpAnd),
("op||", Instruction::OpOr),
("op!", Instruction::OpNot),
("op<", Instruction::OpLess),
("op<=", Instruction::OpLessEq),
("op>", Instruction::OpGreater),
("op>=", Instruction::OpGreaterEq),
("op??", Instruction::OpNullishCoalesce),
("op?.", Instruction::OpOptionalChain),
("op&", Instruction::OpBitAnd),
("op|", Instruction::OpBitOr),
("op~", Instruction::OpBitNot),
("op^", Instruction::OpBitXor),
("op<<", Instruction::OpLeftShift),
("op>>", Instruction::OpRightShift),
("op>>>", Instruction::OpRightShiftUnsigned),
("typeof", Instruction::TypeOf),
("instanceof", Instruction::InstanceOf),
("in", Instruction::In),
("call", Instruction::Call),
("apply", Instruction::Apply),
("bind", Instruction::Bind),
("sub", Instruction::Sub),
("submov", Instruction::SubMov),
("subcall", Instruction::SubCall),
("jmp", Instruction::Jmp),
("jmpif", Instruction::JmpIf),
]);
for (word, instruction) in instruction_word_map {
if test_instruction_word(content, *pos, word) {
*pos += word.len() + 1;
parse_optional_whitespace(content, pos);
return instruction;
}
}
std::panic!("Failed to parse instruction at {}", pos);
}
fn test_chars(content: &str, mut pos: usize, chars: &str) -> bool {
for c in chars.chars() {
if pos >= content.len() || content.chars().nth(pos).unwrap() != c {
return false;
}
pos += 1;
}
return true;
}
fn test_instruction_word(content: &str, mut pos: usize, word: &str) -> bool {
let has_chars = test_chars(content, pos, word);
if !has_chars {
return false;
}
pos += word.len();
if pos >= content.len() {
return true;
}
let ch = content.chars().nth(pos).unwrap();
return ch == ' ' || ch == '\n';
}
fn parse_optional_whitespace(content: &str, pos: &mut usize) {
while *pos < content.len() {
let c = content.chars().nth(*pos).unwrap();
if c != ' ' && c != '\n' {
return;
}
*pos += 1;
}
}
fn parse_identifier(content: &str, pos: &mut usize) -> String {
let start = *pos;
let leading_char = content.chars().nth(start).unwrap();
if !is_leading_identifier_char(leading_char) {
std::panic!("Invalid identifier at {}", pos);
}
*pos += 1;
while *pos < content.len() {
let c = content.chars().nth(*pos).unwrap();
if !is_identifier_char(c) {
break;
}
*pos += 1;
}
unsafe {
return content.get_unchecked(start..*pos).to_string();
}
}
fn is_leading_identifier_char(c: char) -> bool {
return
c == '_' ||
@@ -242,110 +404,6 @@ fn is_identifier_char(c: char) -> bool {
;
}
fn parse_exact(content: &str, pos: &mut usize, chars: &str) {
for c in chars.chars() {
if *pos >= content.len() || content.chars().nth(*pos).unwrap() != c {
std::panic!("Expected '{}' at {}", c, *pos);
}
*pos += 1;
}
}
fn parse_optional_exact(content: &str, pos: &mut usize, chars: &str) -> bool {
if test_chars(content, *pos, chars) {
*pos += chars.len();
return true;
}
return false;
}
fn parse_one_of(content: &str, pos: &mut usize, options: &[&str]) -> String {
for opt in options {
if test_chars(content, *pos, opt) {
*pos += opt.len();
return opt.to_string();
}
}
// FIXME: How best to display options here?
std::panic!("Expected one of (options) at {}", pos);
}
fn assemble_definition(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
parse_exact(content, pos, "@");
let def_name = parse_identifier(content, pos);
println!("assembling {}", def_name);
parse_optional_whitespace(content, pos);
parse_exact(content, pos, "=");
parse_optional_whitespace(content, pos);
// TODO: Handle other kinds of definitions
assemble_function(content, pos, output);
}
fn assemble_function(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
parse_exact(content, pos, "function(");
output.push(ValueType::Function as u8);
let mut register_names: Vec<String> = Vec::from([
"return".to_string(),
"this".to_string(),
]);
let mut param_names: HashSet<String> = HashSet::new();
loop {
parse_optional_whitespace(content, pos);
let mut next = parse_one_of(content, pos, &["%", ")"]);
if next == ")" {
output.push(0xff); // TODO: This byte should be the number of registers
output.push(param_names.len() as u8); // TODO: Handle >255 params
break;
}
if next != "%" {
std::panic!("Expected this to be impossible");
}
let param_name = parse_identifier(content, pos);
param_names.insert(param_name.clone());
register_names.push(param_name);
parse_optional_whitespace(content, pos);
next = parse_one_of(content, pos, &[",", ")"]);
if next == ")" {
output.push(0xff); // TODO: This byte should be the number of registers
output.push(param_names.len() as u8); // TODO: Handle >255 params
break;
}
}
parse_optional_whitespace(content, pos);
parse_exact(content, pos, "{");
loop {
parse_optional_whitespace(content, pos);
let c = content.chars().nth(*pos);
if c == None {
std::panic!("Expected instruction or end of function at {}", pos);
}
if c.unwrap() == '}' {
output.push(Instruction::End as u8);
*pos += 1;
break;
}
assemble_instruction(content, pos, output);
}
}
enum ValueType {
Void = 0x01,
Undefined = 0x02,
@@ -360,22 +418,3 @@ enum ValueType {
Function = 0x0b,
Instance = 0x0c,
}
fn assemble_instruction(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
let instr = parse_instruction_word(content, pos);
println!("Skipping instruction {:?}", instr);
skip_line(content, pos);
}
fn skip_line(content: &str, pos: &mut usize) {
while *pos < content.len() {
let c = content.chars().nth(*pos).unwrap();
*pos += 1;
if c == '\n' {
return;
}
}
std::panic!("Reached end of file looking for newline");
}