From a791329f519afced74b2e7d0023a487cdf0e8e95 Mon Sep 17 00:00:00 2001 From: Andrew Morris Date: Tue, 26 Apr 2022 15:47:48 +1000 Subject: [PATCH] Refactor to object oriented assembler --- src/vstc/assemble.rs | 551 +++++++++++++++++++++++-------------------- 1 file changed, 295 insertions(+), 256 deletions(-) diff --git a/src/vstc/assemble.rs b/src/vstc/assemble.rs index 942943b..63fc8a8 100644 --- a/src/vstc/assemble.rs +++ b/src/vstc/assemble.rs @@ -40,21 +40,306 @@ fn show_help() { println!(" vstc assemble "); } -fn assemble(content: &str) -> std::vec::Vec { - let mut output: Vec = Vec::new(); - let mut pos: usize = 0; +struct AssemblerData { + content: String, // TODO: Avoid copying this in + pos: usize, + output: Vec, +} - loop { - parse_optional_whitespace(content, &mut pos); +trait Assembler { + fn run(&mut self); + fn content_at(&self, pos: usize) -> char; + fn test_chars(&self, chars: &str) -> bool; + fn parse_optional_whitespace(&mut self); + fn assemble_definition(&mut self); + fn parse_instruction_word(&mut self) -> Instruction; + fn test_instruction_word(&self, word: &str) -> bool; + fn parse_identifier(&mut self) -> String; + fn parse_exact(&mut self, chars: &str); + fn parse_optional_exact(&mut self, chars: &str) -> bool; + fn parse_one_of(&mut self, options: &[&str]) -> String; + fn assemble_function(&mut self); + fn assemble_instruction(&mut self); + fn skip_line(&mut self); +} - if pos >= content.len() { - break; +impl Assembler for AssemblerData { + fn run(&mut self) { + loop { + self.parse_optional_whitespace(); + + if self.pos >= self.content.len() { + break; + } + + self.assemble_definition(); } - - assemble_definition(content, &mut pos, &mut output); } - return output; + fn content_at(&self, pos: usize) -> char { + return self.content.chars().nth(pos).unwrap(); + } + + fn test_chars(&self, chars: &str) -> bool { + let mut pos = self.pos; + + for c in chars.chars() { + if pos >= self.content.len() || self.content_at(pos) != c { + return false; + } + + pos += 1; + } + + return true; + } + + fn parse_optional_whitespace(&mut self) { + while self.pos < self.content.len() { + let c = self.content_at(self.pos); + + if c != ' ' && c != '\n' { + return; + } + + self.pos += 1; + } + } + + fn assemble_definition(&mut self) { + self.parse_exact("@"); + let def_name = self.parse_identifier(); + println!("assembling {}", def_name); + self.parse_optional_whitespace(); + self.parse_exact("="); + self.parse_optional_whitespace(); + + // TODO: Handle other kinds of definitions + self.assemble_function(); + } + + fn parse_instruction_word(&mut self) -> Instruction { + let instruction_word_map: HashMap<&str, Instruction> = HashMap::from([ + ("end", Instruction::End), + ("mov", Instruction::Mov), + ("op++", Instruction::OpInc), + ("op--", Instruction::OpDec), + ("op+", Instruction::OpPlus), + ("op-", Instruction::OpMinus), + ("op*", Instruction::OpMul), + ("op/", Instruction::OpDiv), + ("op%", Instruction::OpMod), + ("op**", Instruction::OpExp), + ("op==", Instruction::OpEq), + ("op!=", Instruction::OpNe), + ("op===", Instruction::OpTripleEq), + ("op!==", Instruction::OpTripleNe), + ("op&&", Instruction::OpAnd), + ("op||", Instruction::OpOr), + ("op!", Instruction::OpNot), + ("op<", Instruction::OpLess), + ("op<=", Instruction::OpLessEq), + ("op>", Instruction::OpGreater), + ("op>=", Instruction::OpGreaterEq), + ("op??", Instruction::OpNullishCoalesce), + ("op?.", Instruction::OpOptionalChain), + ("op&", Instruction::OpBitAnd), + ("op|", Instruction::OpBitOr), + ("op~", Instruction::OpBitNot), + ("op^", Instruction::OpBitXor), + ("op<<", Instruction::OpLeftShift), + ("op>>", Instruction::OpRightShift), + ("op>>>", Instruction::OpRightShiftUnsigned), + ("typeof", Instruction::TypeOf), + ("instanceof", Instruction::InstanceOf), + ("in", Instruction::In), + ("call", Instruction::Call), + ("apply", Instruction::Apply), + ("bind", Instruction::Bind), + ("sub", Instruction::Sub), + ("submov", Instruction::SubMov), + ("subcall", Instruction::SubCall), + ("jmp", Instruction::Jmp), + ("jmpif", Instruction::JmpIf), + ]); + + for (word, instruction) in instruction_word_map { + if self.test_instruction_word(word) { + self.pos += word.len() + 1; + self.parse_optional_whitespace(); + return instruction; + } + } + + std::panic!("Failed to parse instruction at {}", self.pos); + } + + fn test_instruction_word(&self, word: &str) -> bool { + let mut pos = self.pos; + let has_chars = self.test_chars(word); + + if !has_chars { + return false; + } + + pos += word.len(); + + if pos >= self.content.len() { + return true; + } + + let ch = self.content_at(pos); + + return ch == ' ' || ch == '\n'; + } + + fn parse_identifier(&mut self) -> String { + let start = self.pos; + let leading_char = self.content_at(start); + + if !is_leading_identifier_char(leading_char) { + std::panic!("Invalid identifier at {}", self.pos); + } + + self.pos += 1; + + while self.pos < self.content.len() { + let c = self.content_at(self.pos); + + if !is_identifier_char(c) { + break; + } + + self.pos += 1; + } + + unsafe { + return self.content.get_unchecked(start..self.pos).to_string(); + } + } + + fn parse_exact(&mut self, chars: &str) { + for c in chars.chars() { + if self.pos >= self.content.len() || self.content_at(self.pos) != c { + std::panic!("Expected '{}' at {}", c, self.pos); + } + + self.pos += 1; + } + } + + fn parse_optional_exact(&mut self, chars: &str) -> bool { + if self.test_chars(chars) { + self.pos += chars.len(); + return true; + } + + return false; + } + + fn parse_one_of(&mut self, options: &[&str]) -> String { + for opt in options { + if self.test_chars(opt) { + self.pos += opt.len(); + return opt.to_string(); + } + } + + // FIXME: How best to display options here? + std::panic!("Expected one of (options) at {}", self.pos); + } + + fn assemble_function(&mut self) { + self.parse_exact("function("); + self.output.push(ValueType::Function as u8); + + let mut register_names: Vec = Vec::from([ + "return".to_string(), + "this".to_string(), + ]); + + let mut param_names: HashSet = HashSet::new(); + + loop { + self.parse_optional_whitespace(); + let mut next = self.parse_one_of(&["%", ")"]); + + if next == ")" { + self.output.push(0xff); // TODO: This byte should be the number of registers + self.output.push(param_names.len() as u8); // TODO: Handle >255 params + break; + } + + if next != "%" { + std::panic!("Expected this to be impossible"); + } + + let param_name = self.parse_identifier(); + param_names.insert(param_name.clone()); + register_names.push(param_name); + self.parse_optional_whitespace(); + + next = self.parse_one_of(&[",", ")"]); + + if next == ")" { + self.output.push(0xff); // TODO: This byte should be the number of registers + self.output.push(param_names.len() as u8); // TODO: Handle >255 params + break; + } + } + + self.parse_optional_whitespace(); + self.parse_exact("{"); + + loop { + self.parse_optional_whitespace(); + + let c = self.content.chars().nth(self.pos); + + if c == None { + std::panic!("Expected instruction or end of function at {}", self.pos); + } + + if c.unwrap() == '}' { + self.output.push(Instruction::End as u8); + self.pos += 1; + break; + } + + self.assemble_instruction(); + } + } + + fn assemble_instruction(&mut self) { + let instr = self.parse_instruction_word(); + println!("Skipping instruction {:?}", instr); + self.skip_line(); + } + + fn skip_line(&mut self) { + while self.pos < self.content.len() { + let c = self.content_at(self.pos); + self.pos += 1; + + if c == '\n' { + return; + } + } + + std::panic!("Reached end of file looking for newline"); + } +} + +fn assemble(content: &str) -> Vec { + let mut assembler = AssemblerData { + content: content.to_string(), + pos: 0, + output: Vec::new(), + }; + + assembler.run(); + + return assembler.output; } #[derive(Debug)] @@ -102,129 +387,6 @@ enum Instruction { JmpIf = 0x28, } -fn parse_instruction_word(content: &str, pos: &mut usize) -> Instruction { - let instruction_word_map: HashMap<&str, Instruction> = HashMap::from([ - ("end", Instruction::End), - ("mov", Instruction::Mov), - ("op++", Instruction::OpInc), - ("op--", Instruction::OpDec), - ("op+", Instruction::OpPlus), - ("op-", Instruction::OpMinus), - ("op*", Instruction::OpMul), - ("op/", Instruction::OpDiv), - ("op%", Instruction::OpMod), - ("op**", Instruction::OpExp), - ("op==", Instruction::OpEq), - ("op!=", Instruction::OpNe), - ("op===", Instruction::OpTripleEq), - ("op!==", Instruction::OpTripleNe), - ("op&&", Instruction::OpAnd), - ("op||", Instruction::OpOr), - ("op!", Instruction::OpNot), - ("op<", Instruction::OpLess), - ("op<=", Instruction::OpLessEq), - ("op>", Instruction::OpGreater), - ("op>=", Instruction::OpGreaterEq), - ("op??", Instruction::OpNullishCoalesce), - ("op?.", Instruction::OpOptionalChain), - ("op&", Instruction::OpBitAnd), - ("op|", Instruction::OpBitOr), - ("op~", Instruction::OpBitNot), - ("op^", Instruction::OpBitXor), - ("op<<", Instruction::OpLeftShift), - ("op>>", Instruction::OpRightShift), - ("op>>>", Instruction::OpRightShiftUnsigned), - ("typeof", Instruction::TypeOf), - ("instanceof", Instruction::InstanceOf), - ("in", Instruction::In), - ("call", Instruction::Call), - ("apply", Instruction::Apply), - ("bind", Instruction::Bind), - ("sub", Instruction::Sub), - ("submov", Instruction::SubMov), - ("subcall", Instruction::SubCall), - ("jmp", Instruction::Jmp), - ("jmpif", Instruction::JmpIf), - ]); - - for (word, instruction) in instruction_word_map { - if test_instruction_word(content, *pos, word) { - *pos += word.len() + 1; - parse_optional_whitespace(content, pos); - return instruction; - } - } - - std::panic!("Failed to parse instruction at {}", pos); -} - -fn test_chars(content: &str, mut pos: usize, chars: &str) -> bool { - for c in chars.chars() { - if pos >= content.len() || content.chars().nth(pos).unwrap() != c { - return false; - } - - pos += 1; - } - - return true; -} - -fn test_instruction_word(content: &str, mut pos: usize, word: &str) -> bool { - let has_chars = test_chars(content, pos, word); - - if !has_chars { - return false; - } - - pos += word.len(); - - if pos >= content.len() { - return true; - } - - let ch = content.chars().nth(pos).unwrap(); - - return ch == ' ' || ch == '\n'; -} - -fn parse_optional_whitespace(content: &str, pos: &mut usize) { - while *pos < content.len() { - let c = content.chars().nth(*pos).unwrap(); - - if c != ' ' && c != '\n' { - return; - } - - *pos += 1; - } -} - -fn parse_identifier(content: &str, pos: &mut usize) -> String { - let start = *pos; - let leading_char = content.chars().nth(start).unwrap(); - - if !is_leading_identifier_char(leading_char) { - std::panic!("Invalid identifier at {}", pos); - } - - *pos += 1; - - while *pos < content.len() { - let c = content.chars().nth(*pos).unwrap(); - - if !is_identifier_char(c) { - break; - } - - *pos += 1; - } - - unsafe { - return content.get_unchecked(start..*pos).to_string(); - } -} - fn is_leading_identifier_char(c: char) -> bool { return c == '_' || @@ -242,110 +404,6 @@ fn is_identifier_char(c: char) -> bool { ; } -fn parse_exact(content: &str, pos: &mut usize, chars: &str) { - for c in chars.chars() { - if *pos >= content.len() || content.chars().nth(*pos).unwrap() != c { - std::panic!("Expected '{}' at {}", c, *pos); - } - - *pos += 1; - } -} - -fn parse_optional_exact(content: &str, pos: &mut usize, chars: &str) -> bool { - if test_chars(content, *pos, chars) { - *pos += chars.len(); - return true; - } - - return false; -} - -fn parse_one_of(content: &str, pos: &mut usize, options: &[&str]) -> String { - for opt in options { - if test_chars(content, *pos, opt) { - *pos += opt.len(); - return opt.to_string(); - } - } - - // FIXME: How best to display options here? - std::panic!("Expected one of (options) at {}", pos); -} - -fn assemble_definition(content: &str, pos: &mut usize, output: &mut Vec) { - parse_exact(content, pos, "@"); - let def_name = parse_identifier(content, pos); - println!("assembling {}", def_name); - parse_optional_whitespace(content, pos); - parse_exact(content, pos, "="); - parse_optional_whitespace(content, pos); - - // TODO: Handle other kinds of definitions - assemble_function(content, pos, output); -} - -fn assemble_function(content: &str, pos: &mut usize, output: &mut Vec) { - parse_exact(content, pos, "function("); - output.push(ValueType::Function as u8); - - let mut register_names: Vec = Vec::from([ - "return".to_string(), - "this".to_string(), - ]); - - let mut param_names: HashSet = HashSet::new(); - - loop { - parse_optional_whitespace(content, pos); - let mut next = parse_one_of(content, pos, &["%", ")"]); - - if next == ")" { - output.push(0xff); // TODO: This byte should be the number of registers - output.push(param_names.len() as u8); // TODO: Handle >255 params - break; - } - - if next != "%" { - std::panic!("Expected this to be impossible"); - } - - let param_name = parse_identifier(content, pos); - param_names.insert(param_name.clone()); - register_names.push(param_name); - parse_optional_whitespace(content, pos); - - next = parse_one_of(content, pos, &[",", ")"]); - - if next == ")" { - output.push(0xff); // TODO: This byte should be the number of registers - output.push(param_names.len() as u8); // TODO: Handle >255 params - break; - } - } - - parse_optional_whitespace(content, pos); - parse_exact(content, pos, "{"); - - loop { - parse_optional_whitespace(content, pos); - - let c = content.chars().nth(*pos); - - if c == None { - std::panic!("Expected instruction or end of function at {}", pos); - } - - if c.unwrap() == '}' { - output.push(Instruction::End as u8); - *pos += 1; - break; - } - - assemble_instruction(content, pos, output); - } -} - enum ValueType { Void = 0x01, Undefined = 0x02, @@ -360,22 +418,3 @@ enum ValueType { Function = 0x0b, Instance = 0x0c, } - -fn assemble_instruction(content: &str, pos: &mut usize, output: &mut Vec) { - let instr = parse_instruction_word(content, pos); - println!("Skipping instruction {:?}", instr); - skip_line(content, pos); -} - -fn skip_line(content: &str, pos: &mut usize) { - while *pos < content.len() { - let c = content.chars().nth(*pos).unwrap(); - *pos += 1; - - if c == '\n' { - return; - } - } - - std::panic!("Reached end of file looking for newline"); -}