From a791329f519afced74b2e7d0023a487cdf0e8e95 Mon Sep 17 00:00:00 2001
From: Andrew Morris <voltrevo@gmail.com>
Date: Tue, 26 Apr 2022 15:47:48 +1000
Subject: [PATCH] Refactor to object oriented assembler

---
 src/vstc/assemble.rs | 551 +++++++++++++++++++++++--------------------
 1 file changed, 295 insertions(+), 256 deletions(-)
diff --git a/src/vstc/assemble.rs b/src/vstc/assemble.rs
index 942943b..63fc8a8 100644
--- a/src/vstc/assemble.rs
+++ b/src/vstc/assemble.rs
@@ -40,21 +40,306 @@ fn show_help() {
   println!("    vstc assemble <file>");
 }
 
-fn assemble(content: &str) -> std::vec::Vec<u8> {
-  let mut output: Vec<u8> = Vec::new();
-  let mut pos: usize = 0;
+struct AssemblerData {
+  content: String, // TODO: Avoid copying this in
+  pos: usize,
+  output: Vec<u8>,
+}
 
-  loop {
-    parse_optional_whitespace(content, &mut pos);
+trait Assembler {
+  fn run(&mut self);
+  fn content_at(&self, pos: usize) -> char;
+  fn test_chars(&self, chars: &str) -> bool;
+  fn parse_optional_whitespace(&mut self);
+  fn assemble_definition(&mut self);
+  fn parse_instruction_word(&mut self) -> Instruction;
+  fn test_instruction_word(&self, word: &str) -> bool;
+  fn parse_identifier(&mut self) -> String;
+  fn parse_exact(&mut self, chars: &str);
+  fn parse_optional_exact(&mut self, chars: &str) -> bool;
+  fn parse_one_of(&mut self, options: &[&str]) -> String;
+  fn assemble_function(&mut self);
+  fn assemble_instruction(&mut self);
+  fn skip_line(&mut self);
+}
 
-    if pos >= content.len() {
-      break;
+impl Assembler for AssemblerData {
+  fn run(&mut self) {
+    loop {
+      self.parse_optional_whitespace();
+
+      if self.pos >= self.content.len() {
+        break;
+      }
+
+      self.assemble_definition();
     }
-
-    assemble_definition(content, &mut pos, &mut output);
   }
 
-  return output;
+  fn content_at(&self, pos: usize) -> char {
+    return self.content.chars().nth(pos).unwrap();
+  }
+
+  fn test_chars(&self, chars: &str) -> bool {
+    let mut pos = self.pos;
+
+    for c in chars.chars() {
+      if pos >= self.content.len() || self.content_at(pos) != c {
+        return false;
+      }
+
+      pos += 1;
+    }
+
+    return true;
+  }
+
+  fn parse_optional_whitespace(&mut self) {
+    while self.pos < self.content.len() {
+      let c = self.content_at(self.pos);
+
+      if c != ' ' && c != '\n' {
+        return;
+      }
+
+      self.pos += 1;
+    }
+  }
+
+  fn assemble_definition(&mut self) {
+    self.parse_exact("@");
+    let def_name = self.parse_identifier();
+    println!("assembling {}", def_name);
+    self.parse_optional_whitespace();
+    self.parse_exact("=");
+    self.parse_optional_whitespace();
+
+    // TODO: Handle other kinds of definitions
+    self.assemble_function();
+  }
+
+  fn parse_instruction_word(&mut self) -> Instruction {
+    let instruction_word_map: HashMap<&str, Instruction> = HashMap::from([
+      ("end", Instruction::End),
+      ("mov", Instruction::Mov),
+      ("op++", Instruction::OpInc),
+      ("op--", Instruction::OpDec),
+      ("op+", Instruction::OpPlus),
+      ("op-", Instruction::OpMinus),
+      ("op*", Instruction::OpMul),
+      ("op/", Instruction::OpDiv),
+      ("op%", Instruction::OpMod),
+      ("op**", Instruction::OpExp),
+      ("op==", Instruction::OpEq),
+      ("op!=", Instruction::OpNe),
+      ("op===", Instruction::OpTripleEq),
+      ("op!==", Instruction::OpTripleNe),
+      ("op&&", Instruction::OpAnd),
+      ("op||", Instruction::OpOr),
+      ("op!", Instruction::OpNot),
+      ("op<", Instruction::OpLess),
+      ("op<=", Instruction::OpLessEq),
+      ("op>", Instruction::OpGreater),
+      ("op>=", Instruction::OpGreaterEq),
+      ("op??", Instruction::OpNullishCoalesce),
+      ("op?.", Instruction::OpOptionalChain),
+      ("op&", Instruction::OpBitAnd),
+      ("op|", Instruction::OpBitOr),
+      ("op~", Instruction::OpBitNot),
+      ("op^", Instruction::OpBitXor),
+      ("op<<", Instruction::OpLeftShift),
+      ("op>>", Instruction::OpRightShift),
+      ("op>>>", Instruction::OpRightShiftUnsigned),
+      ("typeof", Instruction::TypeOf),
+      ("instanceof", Instruction::InstanceOf),
+      ("in", Instruction::In),
+      ("call", Instruction::Call),
+      ("apply", Instruction::Apply),
+      ("bind", Instruction::Bind),
+      ("sub", Instruction::Sub),
+      ("submov", Instruction::SubMov),
+      ("subcall", Instruction::SubCall),
+      ("jmp", Instruction::Jmp),
+      ("jmpif", Instruction::JmpIf),
+    ]);
+
+    for (word, instruction) in instruction_word_map {
+      if self.test_instruction_word(word) {
+        self.pos += word.len() + 1;
+        self.parse_optional_whitespace();
+        return instruction;
+      }
+    }
+
+    std::panic!("Failed to parse instruction at {}", self.pos);
+  }
+
+  fn test_instruction_word(&self, word: &str) -> bool {
+    let mut pos = self.pos;
+    let has_chars = self.test_chars(word);
+
+    if !has_chars {
+      return false;
+    }
+
+    pos += word.len();
+
+    if pos >= self.content.len() {
+      return true;
+    }
+
+    let ch = self.content_at(pos);
+
+    return ch == ' ' || ch == '\n';
+  }
+
+  fn parse_identifier(&mut self) -> String {
+    let start = self.pos;
+    let leading_char = self.content_at(start);
+
+    if !is_leading_identifier_char(leading_char) {
+      std::panic!("Invalid identifier at {}", self.pos);
+    }
+
+    self.pos += 1;
+
+    while self.pos < self.content.len() {
+      let c = self.content_at(self.pos);
+
+      if !is_identifier_char(c) {
+        break;
+      }
+
+      self.pos += 1;
+    }
+
+    unsafe {
+      return self.content.get_unchecked(start..self.pos).to_string();
+    }
+  }
+
+  fn parse_exact(&mut self, chars: &str) {
+    for c in chars.chars() {
+      if self.pos >= self.content.len() || self.content_at(self.pos) != c {
+        std::panic!("Expected '{}' at {}", c, self.pos);
+      }
+
+      self.pos += 1;
+    }
+  }
+
+  fn parse_optional_exact(&mut self, chars: &str) -> bool {
+    if self.test_chars(chars) {
+      self.pos += chars.len();
+      return true;
+    }
+
+    return false;
+  }
+
+  fn parse_one_of(&mut self, options: &[&str]) -> String {
+    for opt in options {
+      if self.test_chars(opt) {
+        self.pos += opt.len();
+        return opt.to_string();
+      }
+    }
+
+    // FIXME: How best to display options here?
+    std::panic!("Expected one of (options) at {}", self.pos);
+  }
+
+  fn assemble_function(&mut self) {
+    self.parse_exact("function(");
+    self.output.push(ValueType::Function as u8);
+
+    let mut register_names: Vec<String> = Vec::from([
+      "return".to_string(),
+      "this".to_string(),
+    ]);
+
+    let mut param_names: HashSet<String> = HashSet::new();
+
+    loop {
+      self.parse_optional_whitespace();
+      let mut next = self.parse_one_of(&["%", ")"]);
+
+      if next == ")" {
+        self.output.push(0xff); // TODO: This byte should be the number of registers
+        self.output.push(param_names.len() as u8); // TODO: Handle >255 params
+        break;
+      }
+
+      if next != "%" {
+        std::panic!("Expected this to be impossible");
+      }
+
+      let param_name = self.parse_identifier();
+      param_names.insert(param_name.clone());
+      register_names.push(param_name);
+      self.parse_optional_whitespace();
+
+      next = self.parse_one_of(&[",", ")"]);
+
+      if next == ")" {
+        self.output.push(0xff); // TODO: This byte should be the number of registers
+        self.output.push(param_names.len() as u8); // TODO: Handle >255 params
+        break;
+      }
+    }
+
+    self.parse_optional_whitespace();
+    self.parse_exact("{");
+
+    loop {
+      self.parse_optional_whitespace();
+
+      let c = self.content.chars().nth(self.pos);
+
+      if c == None {
+        std::panic!("Expected instruction or end of function at {}", self.pos);
+      }
+
+      if c.unwrap() == '}' {
+        self.output.push(Instruction::End as u8);
+        self.pos += 1;
+        break;
+      }
+
+      self.assemble_instruction();
+    }
+  }
+
+  fn assemble_instruction(&mut self) {
+    let instr = self.parse_instruction_word();
+    println!("Skipping instruction {:?}", instr);
+    self.skip_line();
+  }
+
+  fn skip_line(&mut self) {
+    while self.pos < self.content.len() {
+      let c = self.content_at(self.pos);
+      self.pos += 1;
+
+      if c == '\n' {
+        return;
+      }
+    }
+
+    std::panic!("Reached end of file looking for newline");
+  }
+}
+
+fn assemble(content: &str) -> Vec<u8> {
+  let mut assembler = AssemblerData {
+    content: content.to_string(),
+    pos: 0,
+    output: Vec::new(),
+  };
+
+  assembler.run();
+
+  return assembler.output;
 }
 
 #[derive(Debug)]
@@ -102,129 +387,6 @@ enum Instruction {
   JmpIf = 0x28,
 }
 
-fn parse_instruction_word(content: &str, pos: &mut usize) -> Instruction {
-  let instruction_word_map: HashMap<&str, Instruction> = HashMap::from([
-    ("end", Instruction::End),
-    ("mov", Instruction::Mov),
-    ("op++", Instruction::OpInc),
-    ("op--", Instruction::OpDec),
-    ("op+", Instruction::OpPlus),
-    ("op-", Instruction::OpMinus),
-    ("op*", Instruction::OpMul),
-    ("op/", Instruction::OpDiv),
-    ("op%", Instruction::OpMod),
-    ("op**", Instruction::OpExp),
-    ("op==", Instruction::OpEq),
-    ("op!=", Instruction::OpNe),
-    ("op===", Instruction::OpTripleEq),
-    ("op!==", Instruction::OpTripleNe),
-    ("op&&", Instruction::OpAnd),
-    ("op||", Instruction::OpOr),
-    ("op!", Instruction::OpNot),
-    ("op<", Instruction::OpLess),
-    ("op<=", Instruction::OpLessEq),
-    ("op>", Instruction::OpGreater),
-    ("op>=", Instruction::OpGreaterEq),
-    ("op??", Instruction::OpNullishCoalesce),
-    ("op?.", Instruction::OpOptionalChain),
-    ("op&", Instruction::OpBitAnd),
-    ("op|", Instruction::OpBitOr),
-    ("op~", Instruction::OpBitNot),
-    ("op^", Instruction::OpBitXor),
-    ("op<<", Instruction::OpLeftShift),
-    ("op>>", Instruction::OpRightShift),
-    ("op>>>", Instruction::OpRightShiftUnsigned),
-    ("typeof", Instruction::TypeOf),
-    ("instanceof", Instruction::InstanceOf),
-    ("in", Instruction::In),
-    ("call", Instruction::Call),
-    ("apply", Instruction::Apply),
-    ("bind", Instruction::Bind),
-    ("sub", Instruction::Sub),
-    ("submov", Instruction::SubMov),
-    ("subcall", Instruction::SubCall),
-    ("jmp", Instruction::Jmp),
-    ("jmpif", Instruction::JmpIf),
-  ]);
-
-  for (word, instruction) in instruction_word_map {
-    if test_instruction_word(content, *pos, word) {
-      *pos += word.len() + 1;
-      parse_optional_whitespace(content, pos);
-      return instruction;
-    }
-  }
-
-  std::panic!("Failed to parse instruction at {}", pos);
-}
-
-fn test_chars(content: &str, mut pos: usize, chars: &str) -> bool {
-  for c in chars.chars() {
-    if pos >= content.len() || content.chars().nth(pos).unwrap() != c {
-      return false;
-    }
-
-    pos += 1;
-  }
-
-  return true;
-}
-
-fn test_instruction_word(content: &str, mut pos: usize, word: &str) -> bool {
-  let has_chars = test_chars(content, pos, word);
-
-  if !has_chars {
-    return false;
-  }
-
-  pos += word.len();
-
-  if pos >= content.len() {
-    return true;
-  }
-
-  let ch = content.chars().nth(pos).unwrap();
-
-  return ch == ' ' || ch == '\n';
-}
-
-fn parse_optional_whitespace(content: &str, pos: &mut usize) {
-  while *pos < content.len() {
-    let c = content.chars().nth(*pos).unwrap();
-
-    if c != ' ' && c != '\n' {
-      return;
-    }
-
-    *pos += 1;
-  }
-}
-
-fn parse_identifier(content: &str, pos: &mut usize) -> String {
-  let start = *pos;
-  let leading_char = content.chars().nth(start).unwrap();
-
-  if !is_leading_identifier_char(leading_char) {
-    std::panic!("Invalid identifier at {}", pos);
-  }
-
-  *pos += 1;
-
-  while *pos < content.len() {
-    let c = content.chars().nth(*pos).unwrap();
-
-    if !is_identifier_char(c) {
-      break;
-    }
-
-    *pos += 1;
-  }
-
-  unsafe {
-    return content.get_unchecked(start..*pos).to_string();
-  }
-}
-
 fn is_leading_identifier_char(c: char) -> bool {
   return
     c == '_' ||
@@ -242,110 +404,6 @@ fn is_identifier_char(c: char) -> bool {
   ;
 }
 
-fn parse_exact(content: &str, pos: &mut usize, chars: &str) {
-  for c in chars.chars() {
-    if *pos >= content.len() || content.chars().nth(*pos).unwrap() != c {
-      std::panic!("Expected '{}' at {}", c, *pos);
-    }
-
-    *pos += 1;
-  }
-}
-
-fn parse_optional_exact(content: &str, pos: &mut usize, chars: &str) -> bool {
-  if test_chars(content, *pos, chars) {
-    *pos += chars.len();
-    return true;
-  }
-
-  return false;
-}
-
-fn parse_one_of(content: &str, pos: &mut usize, options: &[&str]) -> String {
-  for opt in options {
-    if test_chars(content, *pos, opt) {
-      *pos += opt.len();
-      return opt.to_string();
-    }
-  }
-
-  // FIXME: How best to display options here?
-  std::panic!("Expected one of (options) at {}", pos);
-}
-
-fn assemble_definition(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
-  parse_exact(content, pos, "@");
-  let def_name = parse_identifier(content, pos);
-  println!("assembling {}", def_name);
-  parse_optional_whitespace(content, pos);
-  parse_exact(content, pos, "=");
-  parse_optional_whitespace(content, pos);
-
-  // TODO: Handle other kinds of definitions
-  assemble_function(content, pos, output);
-}
-
-fn assemble_function(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
-  parse_exact(content, pos, "function(");
-  output.push(ValueType::Function as u8);
-
-  let mut register_names: Vec<String> = Vec::from([
-    "return".to_string(),
-    "this".to_string(),
-  ]);
-
-  let mut param_names: HashSet<String> = HashSet::new();
-
-  loop {
-    parse_optional_whitespace(content, pos);
-    let mut next = parse_one_of(content, pos, &["%", ")"]);
-
-    if next == ")" {
-      output.push(0xff); // TODO: This byte should be the number of registers
-      output.push(param_names.len() as u8); // TODO: Handle >255 params
-      break;
-    }
-
-    if next != "%" {
-      std::panic!("Expected this to be impossible");
-    }
-
-    let param_name = parse_identifier(content, pos);
-    param_names.insert(param_name.clone());
-    register_names.push(param_name);
-    parse_optional_whitespace(content, pos);
-
-    next = parse_one_of(content, pos, &[",", ")"]);
-
-    if next == ")" {
-      output.push(0xff); // TODO: This byte should be the number of registers
-      output.push(param_names.len() as u8); // TODO: Handle >255 params
-      break;
-    }
-  }
-
-  parse_optional_whitespace(content, pos);
-  parse_exact(content, pos, "{");
-
-  loop {
-    parse_optional_whitespace(content, pos);
-
-    let c = content.chars().nth(*pos);
-
-    if c == None {
-      std::panic!("Expected instruction or end of function at {}", pos);
-    }
-
-    if c.unwrap() == '}' {
-      output.push(Instruction::End as u8);
-      *pos += 1;
-      break;
-    }
-
-    assemble_instruction(content, pos, output);
-  }
-}
-
 enum ValueType {
   Void = 0x01,
   Undefined = 0x02,
@@ -360,22 +418,3 @@ enum ValueType {
   Function = 0x0b,
   Instance = 0x0c,
 }
-
-fn assemble_instruction(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
-  let instr = parse_instruction_word(content, pos);
-  println!("Skipping instruction {:?}", instr);
-  skip_line(content, pos);
-}
-
-fn skip_line(content: &str, pos: &mut usize) {
-  while *pos < content.len() {
-    let c = content.chars().nth(*pos).unwrap();
-    *pos += 1;
-
-    if c == '\n' {
-      return;
-    }
-  }
-
-  std::panic!("Reached end of file looking for newline");
-}