mirror of
https://github.com/voltrevo/ValueScript.git
synced 2026-01-13 23:48:02 -05:00
Refactor to object oriented assembler
This commit is contained in:
@@ -40,21 +40,306 @@ fn show_help() {
|
||||
println!(" vstc assemble <file>");
|
||||
}
|
||||
|
||||
fn assemble(content: &str) -> std::vec::Vec<u8> {
|
||||
let mut output: Vec<u8> = Vec::new();
|
||||
let mut pos: usize = 0;
|
||||
struct AssemblerData {
|
||||
content: String, // TODO: Avoid copying this in
|
||||
pos: usize,
|
||||
output: Vec<u8>,
|
||||
}
|
||||
|
||||
loop {
|
||||
parse_optional_whitespace(content, &mut pos);
|
||||
trait Assembler {
|
||||
fn run(&mut self);
|
||||
fn content_at(&self, pos: usize) -> char;
|
||||
fn test_chars(&self, chars: &str) -> bool;
|
||||
fn parse_optional_whitespace(&mut self);
|
||||
fn assemble_definition(&mut self);
|
||||
fn parse_instruction_word(&mut self) -> Instruction;
|
||||
fn test_instruction_word(&self, word: &str) -> bool;
|
||||
fn parse_identifier(&mut self) -> String;
|
||||
fn parse_exact(&mut self, chars: &str);
|
||||
fn parse_optional_exact(&mut self, chars: &str) -> bool;
|
||||
fn parse_one_of(&mut self, options: &[&str]) -> String;
|
||||
fn assemble_function(&mut self);
|
||||
fn assemble_instruction(&mut self);
|
||||
fn skip_line(&mut self);
|
||||
}
|
||||
|
||||
if pos >= content.len() {
|
||||
break;
|
||||
impl Assembler for AssemblerData {
|
||||
fn run(&mut self) {
|
||||
loop {
|
||||
self.parse_optional_whitespace();
|
||||
|
||||
if self.pos >= self.content.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
self.assemble_definition();
|
||||
}
|
||||
|
||||
assemble_definition(content, &mut pos, &mut output);
|
||||
}
|
||||
|
||||
return output;
|
||||
fn content_at(&self, pos: usize) -> char {
|
||||
return self.content.chars().nth(pos).unwrap();
|
||||
}
|
||||
|
||||
fn test_chars(&self, chars: &str) -> bool {
|
||||
let mut pos = self.pos;
|
||||
|
||||
for c in chars.chars() {
|
||||
if pos >= self.content.len() || self.content_at(pos) != c {
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
fn parse_optional_whitespace(&mut self) {
|
||||
while self.pos < self.content.len() {
|
||||
let c = self.content_at(self.pos);
|
||||
|
||||
if c != ' ' && c != '\n' {
|
||||
return;
|
||||
}
|
||||
|
||||
self.pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn assemble_definition(&mut self) {
|
||||
self.parse_exact("@");
|
||||
let def_name = self.parse_identifier();
|
||||
println!("assembling {}", def_name);
|
||||
self.parse_optional_whitespace();
|
||||
self.parse_exact("=");
|
||||
self.parse_optional_whitespace();
|
||||
|
||||
// TODO: Handle other kinds of definitions
|
||||
self.assemble_function();
|
||||
}
|
||||
|
||||
fn parse_instruction_word(&mut self) -> Instruction {
|
||||
let instruction_word_map: HashMap<&str, Instruction> = HashMap::from([
|
||||
("end", Instruction::End),
|
||||
("mov", Instruction::Mov),
|
||||
("op++", Instruction::OpInc),
|
||||
("op--", Instruction::OpDec),
|
||||
("op+", Instruction::OpPlus),
|
||||
("op-", Instruction::OpMinus),
|
||||
("op*", Instruction::OpMul),
|
||||
("op/", Instruction::OpDiv),
|
||||
("op%", Instruction::OpMod),
|
||||
("op**", Instruction::OpExp),
|
||||
("op==", Instruction::OpEq),
|
||||
("op!=", Instruction::OpNe),
|
||||
("op===", Instruction::OpTripleEq),
|
||||
("op!==", Instruction::OpTripleNe),
|
||||
("op&&", Instruction::OpAnd),
|
||||
("op||", Instruction::OpOr),
|
||||
("op!", Instruction::OpNot),
|
||||
("op<", Instruction::OpLess),
|
||||
("op<=", Instruction::OpLessEq),
|
||||
("op>", Instruction::OpGreater),
|
||||
("op>=", Instruction::OpGreaterEq),
|
||||
("op??", Instruction::OpNullishCoalesce),
|
||||
("op?.", Instruction::OpOptionalChain),
|
||||
("op&", Instruction::OpBitAnd),
|
||||
("op|", Instruction::OpBitOr),
|
||||
("op~", Instruction::OpBitNot),
|
||||
("op^", Instruction::OpBitXor),
|
||||
("op<<", Instruction::OpLeftShift),
|
||||
("op>>", Instruction::OpRightShift),
|
||||
("op>>>", Instruction::OpRightShiftUnsigned),
|
||||
("typeof", Instruction::TypeOf),
|
||||
("instanceof", Instruction::InstanceOf),
|
||||
("in", Instruction::In),
|
||||
("call", Instruction::Call),
|
||||
("apply", Instruction::Apply),
|
||||
("bind", Instruction::Bind),
|
||||
("sub", Instruction::Sub),
|
||||
("submov", Instruction::SubMov),
|
||||
("subcall", Instruction::SubCall),
|
||||
("jmp", Instruction::Jmp),
|
||||
("jmpif", Instruction::JmpIf),
|
||||
]);
|
||||
|
||||
for (word, instruction) in instruction_word_map {
|
||||
if self.test_instruction_word(word) {
|
||||
self.pos += word.len() + 1;
|
||||
self.parse_optional_whitespace();
|
||||
return instruction;
|
||||
}
|
||||
}
|
||||
|
||||
std::panic!("Failed to parse instruction at {}", self.pos);
|
||||
}
|
||||
|
||||
fn test_instruction_word(&self, word: &str) -> bool {
|
||||
let mut pos = self.pos;
|
||||
let has_chars = self.test_chars(word);
|
||||
|
||||
if !has_chars {
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += word.len();
|
||||
|
||||
if pos >= self.content.len() {
|
||||
return true;
|
||||
}
|
||||
|
||||
let ch = self.content_at(pos);
|
||||
|
||||
return ch == ' ' || ch == '\n';
|
||||
}
|
||||
|
||||
fn parse_identifier(&mut self) -> String {
|
||||
let start = self.pos;
|
||||
let leading_char = self.content_at(start);
|
||||
|
||||
if !is_leading_identifier_char(leading_char) {
|
||||
std::panic!("Invalid identifier at {}", self.pos);
|
||||
}
|
||||
|
||||
self.pos += 1;
|
||||
|
||||
while self.pos < self.content.len() {
|
||||
let c = self.content_at(self.pos);
|
||||
|
||||
if !is_identifier_char(c) {
|
||||
break;
|
||||
}
|
||||
|
||||
self.pos += 1;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
return self.content.get_unchecked(start..self.pos).to_string();
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_exact(&mut self, chars: &str) {
|
||||
for c in chars.chars() {
|
||||
if self.pos >= self.content.len() || self.content_at(self.pos) != c {
|
||||
std::panic!("Expected '{}' at {}", c, self.pos);
|
||||
}
|
||||
|
||||
self.pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_optional_exact(&mut self, chars: &str) -> bool {
|
||||
if self.test_chars(chars) {
|
||||
self.pos += chars.len();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
fn parse_one_of(&mut self, options: &[&str]) -> String {
|
||||
for opt in options {
|
||||
if self.test_chars(opt) {
|
||||
self.pos += opt.len();
|
||||
return opt.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: How best to display options here?
|
||||
std::panic!("Expected one of (options) at {}", self.pos);
|
||||
}
|
||||
|
||||
fn assemble_function(&mut self) {
|
||||
self.parse_exact("function(");
|
||||
self.output.push(ValueType::Function as u8);
|
||||
|
||||
let mut register_names: Vec<String> = Vec::from([
|
||||
"return".to_string(),
|
||||
"this".to_string(),
|
||||
]);
|
||||
|
||||
let mut param_names: HashSet<String> = HashSet::new();
|
||||
|
||||
loop {
|
||||
self.parse_optional_whitespace();
|
||||
let mut next = self.parse_one_of(&["%", ")"]);
|
||||
|
||||
if next == ")" {
|
||||
self.output.push(0xff); // TODO: This byte should be the number of registers
|
||||
self.output.push(param_names.len() as u8); // TODO: Handle >255 params
|
||||
break;
|
||||
}
|
||||
|
||||
if next != "%" {
|
||||
std::panic!("Expected this to be impossible");
|
||||
}
|
||||
|
||||
let param_name = self.parse_identifier();
|
||||
param_names.insert(param_name.clone());
|
||||
register_names.push(param_name);
|
||||
self.parse_optional_whitespace();
|
||||
|
||||
next = self.parse_one_of(&[",", ")"]);
|
||||
|
||||
if next == ")" {
|
||||
self.output.push(0xff); // TODO: This byte should be the number of registers
|
||||
self.output.push(param_names.len() as u8); // TODO: Handle >255 params
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
self.parse_optional_whitespace();
|
||||
self.parse_exact("{");
|
||||
|
||||
loop {
|
||||
self.parse_optional_whitespace();
|
||||
|
||||
let c = self.content.chars().nth(self.pos);
|
||||
|
||||
if c == None {
|
||||
std::panic!("Expected instruction or end of function at {}", self.pos);
|
||||
}
|
||||
|
||||
if c.unwrap() == '}' {
|
||||
self.output.push(Instruction::End as u8);
|
||||
self.pos += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
self.assemble_instruction();
|
||||
}
|
||||
}
|
||||
|
||||
fn assemble_instruction(&mut self) {
|
||||
let instr = self.parse_instruction_word();
|
||||
println!("Skipping instruction {:?}", instr);
|
||||
self.skip_line();
|
||||
}
|
||||
|
||||
fn skip_line(&mut self) {
|
||||
while self.pos < self.content.len() {
|
||||
let c = self.content_at(self.pos);
|
||||
self.pos += 1;
|
||||
|
||||
if c == '\n' {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::panic!("Reached end of file looking for newline");
|
||||
}
|
||||
}
|
||||
|
||||
fn assemble(content: &str) -> Vec<u8> {
|
||||
let mut assembler = AssemblerData {
|
||||
content: content.to_string(),
|
||||
pos: 0,
|
||||
output: Vec::new(),
|
||||
};
|
||||
|
||||
assembler.run();
|
||||
|
||||
return assembler.output;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -102,129 +387,6 @@ enum Instruction {
|
||||
JmpIf = 0x28,
|
||||
}
|
||||
|
||||
fn parse_instruction_word(content: &str, pos: &mut usize) -> Instruction {
|
||||
let instruction_word_map: HashMap<&str, Instruction> = HashMap::from([
|
||||
("end", Instruction::End),
|
||||
("mov", Instruction::Mov),
|
||||
("op++", Instruction::OpInc),
|
||||
("op--", Instruction::OpDec),
|
||||
("op+", Instruction::OpPlus),
|
||||
("op-", Instruction::OpMinus),
|
||||
("op*", Instruction::OpMul),
|
||||
("op/", Instruction::OpDiv),
|
||||
("op%", Instruction::OpMod),
|
||||
("op**", Instruction::OpExp),
|
||||
("op==", Instruction::OpEq),
|
||||
("op!=", Instruction::OpNe),
|
||||
("op===", Instruction::OpTripleEq),
|
||||
("op!==", Instruction::OpTripleNe),
|
||||
("op&&", Instruction::OpAnd),
|
||||
("op||", Instruction::OpOr),
|
||||
("op!", Instruction::OpNot),
|
||||
("op<", Instruction::OpLess),
|
||||
("op<=", Instruction::OpLessEq),
|
||||
("op>", Instruction::OpGreater),
|
||||
("op>=", Instruction::OpGreaterEq),
|
||||
("op??", Instruction::OpNullishCoalesce),
|
||||
("op?.", Instruction::OpOptionalChain),
|
||||
("op&", Instruction::OpBitAnd),
|
||||
("op|", Instruction::OpBitOr),
|
||||
("op~", Instruction::OpBitNot),
|
||||
("op^", Instruction::OpBitXor),
|
||||
("op<<", Instruction::OpLeftShift),
|
||||
("op>>", Instruction::OpRightShift),
|
||||
("op>>>", Instruction::OpRightShiftUnsigned),
|
||||
("typeof", Instruction::TypeOf),
|
||||
("instanceof", Instruction::InstanceOf),
|
||||
("in", Instruction::In),
|
||||
("call", Instruction::Call),
|
||||
("apply", Instruction::Apply),
|
||||
("bind", Instruction::Bind),
|
||||
("sub", Instruction::Sub),
|
||||
("submov", Instruction::SubMov),
|
||||
("subcall", Instruction::SubCall),
|
||||
("jmp", Instruction::Jmp),
|
||||
("jmpif", Instruction::JmpIf),
|
||||
]);
|
||||
|
||||
for (word, instruction) in instruction_word_map {
|
||||
if test_instruction_word(content, *pos, word) {
|
||||
*pos += word.len() + 1;
|
||||
parse_optional_whitespace(content, pos);
|
||||
return instruction;
|
||||
}
|
||||
}
|
||||
|
||||
std::panic!("Failed to parse instruction at {}", pos);
|
||||
}
|
||||
|
||||
fn test_chars(content: &str, mut pos: usize, chars: &str) -> bool {
|
||||
for c in chars.chars() {
|
||||
if pos >= content.len() || content.chars().nth(pos).unwrap() != c {
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
fn test_instruction_word(content: &str, mut pos: usize, word: &str) -> bool {
|
||||
let has_chars = test_chars(content, pos, word);
|
||||
|
||||
if !has_chars {
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += word.len();
|
||||
|
||||
if pos >= content.len() {
|
||||
return true;
|
||||
}
|
||||
|
||||
let ch = content.chars().nth(pos).unwrap();
|
||||
|
||||
return ch == ' ' || ch == '\n';
|
||||
}
|
||||
|
||||
fn parse_optional_whitespace(content: &str, pos: &mut usize) {
|
||||
while *pos < content.len() {
|
||||
let c = content.chars().nth(*pos).unwrap();
|
||||
|
||||
if c != ' ' && c != '\n' {
|
||||
return;
|
||||
}
|
||||
|
||||
*pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_identifier(content: &str, pos: &mut usize) -> String {
|
||||
let start = *pos;
|
||||
let leading_char = content.chars().nth(start).unwrap();
|
||||
|
||||
if !is_leading_identifier_char(leading_char) {
|
||||
std::panic!("Invalid identifier at {}", pos);
|
||||
}
|
||||
|
||||
*pos += 1;
|
||||
|
||||
while *pos < content.len() {
|
||||
let c = content.chars().nth(*pos).unwrap();
|
||||
|
||||
if !is_identifier_char(c) {
|
||||
break;
|
||||
}
|
||||
|
||||
*pos += 1;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
return content.get_unchecked(start..*pos).to_string();
|
||||
}
|
||||
}
|
||||
|
||||
fn is_leading_identifier_char(c: char) -> bool {
|
||||
return
|
||||
c == '_' ||
|
||||
@@ -242,110 +404,6 @@ fn is_identifier_char(c: char) -> bool {
|
||||
;
|
||||
}
|
||||
|
||||
fn parse_exact(content: &str, pos: &mut usize, chars: &str) {
|
||||
for c in chars.chars() {
|
||||
if *pos >= content.len() || content.chars().nth(*pos).unwrap() != c {
|
||||
std::panic!("Expected '{}' at {}", c, *pos);
|
||||
}
|
||||
|
||||
*pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_optional_exact(content: &str, pos: &mut usize, chars: &str) -> bool {
|
||||
if test_chars(content, *pos, chars) {
|
||||
*pos += chars.len();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
fn parse_one_of(content: &str, pos: &mut usize, options: &[&str]) -> String {
|
||||
for opt in options {
|
||||
if test_chars(content, *pos, opt) {
|
||||
*pos += opt.len();
|
||||
return opt.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: How best to display options here?
|
||||
std::panic!("Expected one of (options) at {}", pos);
|
||||
}
|
||||
|
||||
fn assemble_definition(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
|
||||
parse_exact(content, pos, "@");
|
||||
let def_name = parse_identifier(content, pos);
|
||||
println!("assembling {}", def_name);
|
||||
parse_optional_whitespace(content, pos);
|
||||
parse_exact(content, pos, "=");
|
||||
parse_optional_whitespace(content, pos);
|
||||
|
||||
// TODO: Handle other kinds of definitions
|
||||
assemble_function(content, pos, output);
|
||||
}
|
||||
|
||||
fn assemble_function(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
|
||||
parse_exact(content, pos, "function(");
|
||||
output.push(ValueType::Function as u8);
|
||||
|
||||
let mut register_names: Vec<String> = Vec::from([
|
||||
"return".to_string(),
|
||||
"this".to_string(),
|
||||
]);
|
||||
|
||||
let mut param_names: HashSet<String> = HashSet::new();
|
||||
|
||||
loop {
|
||||
parse_optional_whitespace(content, pos);
|
||||
let mut next = parse_one_of(content, pos, &["%", ")"]);
|
||||
|
||||
if next == ")" {
|
||||
output.push(0xff); // TODO: This byte should be the number of registers
|
||||
output.push(param_names.len() as u8); // TODO: Handle >255 params
|
||||
break;
|
||||
}
|
||||
|
||||
if next != "%" {
|
||||
std::panic!("Expected this to be impossible");
|
||||
}
|
||||
|
||||
let param_name = parse_identifier(content, pos);
|
||||
param_names.insert(param_name.clone());
|
||||
register_names.push(param_name);
|
||||
parse_optional_whitespace(content, pos);
|
||||
|
||||
next = parse_one_of(content, pos, &[",", ")"]);
|
||||
|
||||
if next == ")" {
|
||||
output.push(0xff); // TODO: This byte should be the number of registers
|
||||
output.push(param_names.len() as u8); // TODO: Handle >255 params
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
parse_optional_whitespace(content, pos);
|
||||
parse_exact(content, pos, "{");
|
||||
|
||||
loop {
|
||||
parse_optional_whitespace(content, pos);
|
||||
|
||||
let c = content.chars().nth(*pos);
|
||||
|
||||
if c == None {
|
||||
std::panic!("Expected instruction or end of function at {}", pos);
|
||||
}
|
||||
|
||||
if c.unwrap() == '}' {
|
||||
output.push(Instruction::End as u8);
|
||||
*pos += 1;
|
||||
break;
|
||||
}
|
||||
|
||||
assemble_instruction(content, pos, output);
|
||||
}
|
||||
}
|
||||
|
||||
enum ValueType {
|
||||
Void = 0x01,
|
||||
Undefined = 0x02,
|
||||
@@ -360,22 +418,3 @@ enum ValueType {
|
||||
Function = 0x0b,
|
||||
Instance = 0x0c,
|
||||
}
|
||||
|
||||
fn assemble_instruction(content: &str, pos: &mut usize, output: &mut Vec<u8>) {
|
||||
let instr = parse_instruction_word(content, pos);
|
||||
println!("Skipping instruction {:?}", instr);
|
||||
skip_line(content, pos);
|
||||
}
|
||||
|
||||
fn skip_line(content: &str, pos: &mut usize) {
|
||||
while *pos < content.len() {
|
||||
let c = content.chars().nth(*pos).unwrap();
|
||||
*pos += 1;
|
||||
|
||||
if c == '\n' {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::panic!("Reached end of file looking for newline");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user