From 5244987f8907293a7561050e62690b07f3429c98 Mon Sep 17 00:00:00 2001 From: Andrew Morris Date: Thu, 9 Mar 2023 12:19:53 +1100 Subject: [PATCH] Link modules for vstc compile (almost works) --- valuescript_compiler/src/asm.rs | 20 ++ valuescript_compiler/src/gather_modules.rs | 95 +++--- valuescript_compiler/src/import_pattern.rs | 82 +++++ valuescript_compiler/src/lib.rs | 4 + valuescript_compiler/src/link_module.rs | 346 ++++++++++++++++++++- valuescript_compiler/src/resolve_path.rs | 27 ++ vstc/src/compile_command.rs | 26 +- 7 files changed, 529 insertions(+), 71 deletions(-) create mode 100644 valuescript_compiler/src/import_pattern.rs create mode 100644 valuescript_compiler/src/resolve_path.rs diff --git a/valuescript_compiler/src/asm.rs b/valuescript_compiler/src/asm.rs index 264bdab..5d35e0e 100644 --- a/valuescript_compiler/src/asm.rs +++ b/valuescript_compiler/src/asm.rs @@ -587,3 +587,23 @@ impl std::fmt::Display for Object { write!(f, " }}") } } + +impl Object { + pub fn try_resolve_key(&self, key: &String) -> Option<&Value> { + let mut result: Option<&Value> = None; + + for (k, v) in &self.properties { + if let Value::String(k) = k { + if k == key { + result = Some(v); + } + } else { + // If the key is not a string, it's possible that the result we found earlier is overwritten + // here, so we have to set back to None. + result = None; + } + } + + result + } +} diff --git a/valuescript_compiler/src/gather_modules.rs b/valuescript_compiler/src/gather_modules.rs index d4b356e..3ac13e7 100644 --- a/valuescript_compiler/src/gather_modules.rs +++ b/valuescript_compiler/src/gather_modules.rs @@ -1,34 +1,41 @@ -use std::{ - collections::{HashMap, HashSet}, - path::{Path, PathBuf}, -}; +use std::collections::{HashMap, HashSet}; use queues::{IsQueue, Queue}; use crate::{ - asm::{DefinitionContent, Instruction, InstructionOrLabel, Module, Value}, - compile, Diagnostic, DiagnosticLevel, + asm::Module, + compile, + import_pattern::ImportPattern, + resolve_path::{resolve_path, ResolvedPath}, + Diagnostic, DiagnosticLevel, }; #[derive(Clone, Debug)] enum DependencyReason { EntryPoint, - ImportedBy(String), + ImportedBy(ResolvedPath), } #[derive(Clone)] struct Dependency { - path: String, + path: ResolvedPath, reason: DependencyReason, } -pub struct GatheredModules { - pub entry_point: String, - pub modules: HashMap, - pub diagnostics: HashMap>, +#[derive(Clone)] +pub struct PathAndModule { + // FIXME: This should just be something like CompiledModule, and also include diagnostics + pub path: ResolvedPath, + pub module: Module, } -pub fn gather_modules(entry_point: String, read_file: ReadFile) -> GatheredModules +pub struct GatheredModules { + pub entry_point: ResolvedPath, + pub modules: HashMap, + pub diagnostics: HashMap>, +} + +pub fn gather_modules(entry_point: ResolvedPath, read_file: ReadFile) -> GatheredModules where ReadFile: Fn(&str) -> Result, { @@ -53,9 +60,10 @@ where Err(_) => break, }; - let file_contents = match read_file(&dependency.path) { + let file_contents = match read_file(&dependency.path.path) { Ok(file_contents) => file_contents, Err(err) => { + // FIXME: This diagnostic should really be attached to the import statement gm.diagnostics .entry(dependency.path.clone()) .or_insert(vec![]) @@ -64,7 +72,7 @@ where message: match dependency.reason { DependencyReason::EntryPoint => format!("File read failed: {}", err), DependencyReason::ImportedBy(importer) => { - format!("Error reading file imported by {}: {}", importer, err) + format!("File read failed: {} (imported by: {})", err, importer) } }, span: swc_common::DUMMY_SP, @@ -81,10 +89,12 @@ where .or_insert(vec![]) .append(&mut compiler_output.diagnostics); - for imported_path in get_imported_paths( - &compiler_output.module, - &DependencyReason::ImportedBy(dependency.path.clone()), - ) { + let path_and_module = PathAndModule { + path: dependency.path.clone(), + module: compiler_output.module, + }; + + for imported_path in get_imported_paths(&path_and_module) { if gm.modules.contains_key(&imported_path) { continue; } @@ -97,53 +107,22 @@ where .expect("Failed to add to queue"); } - gm.modules.insert(dependency.path, compiler_output.module); + gm.modules.insert(dependency.path, path_and_module); } gm } -fn get_imported_paths(module: &Module, reason: &DependencyReason) -> HashSet { - let mut imported_paths = HashSet::::new(); +pub fn get_imported_paths(path_and_module: &PathAndModule) -> HashSet { + let mut imported_paths = HashSet::::new(); - for definition in &module.definitions { - let lazy = match &definition.content { - DefinitionContent::Lazy(lazy) => lazy, - _ => continue, + for definition in &path_and_module.module.definitions { + let import_pattern = match ImportPattern::decode(definition) { + Some(import_pattern) => import_pattern, + None => continue, }; - match lazy.body.first() { - Some(InstructionOrLabel::Instruction(instruction)) => { - match instruction { - Instruction::Import(import_path, _) | Instruction::ImportStar(import_path, _) => { - match import_path { - Value::String(import_path) => { - let resolved_path = match reason { - DependencyReason::EntryPoint => import_path.clone(), - DependencyReason::ImportedBy(importer) => { - let importer_path = PathBuf::from(importer); - let parent = importer_path.parent().unwrap_or_else(|| Path::new("/")); - - parent - .join(import_path) - .canonicalize() - .expect("Failed to canonicalize path") - .to_str() - .expect("Failed to convert path to string") - .to_string() - } - }; - - imported_paths.insert(resolved_path); - } - _ => {} - } - } - _ => {} - }; - } - _ => {} - } + imported_paths.insert(resolve_path(&path_and_module.path, &import_pattern.path)); } imported_paths diff --git a/valuescript_compiler/src/import_pattern.rs b/valuescript_compiler/src/import_pattern.rs new file mode 100644 index 0000000..62089c7 --- /dev/null +++ b/valuescript_compiler/src/import_pattern.rs @@ -0,0 +1,82 @@ +use crate::asm::{ + Definition, DefinitionContent, Instruction, InstructionOrLabel, Pointer, Register, Value, +}; + +pub struct ImportPattern { + pub pointer: Pointer, + pub path: String, + pub kind: ImportKind, +} + +pub enum ImportKind { + Default, + Star, + Name(String), +} + +impl ImportPattern { + pub fn decode(definition: &Definition) -> Option { + let lazy = match &definition.content { + DefinitionContent::Lazy(lazy) => lazy, + _ => return None, + }; + + if lazy.body.len() > 2 { + return None; + } + + let first_instruction = match lazy.body.first() { + Some(InstructionOrLabel::Instruction(instruction)) => instruction, + _ => return None, + }; + + let (path_value, is_star) = match first_instruction { + Instruction::Import(path, Register::Return) => (path, false), + Instruction::ImportStar(path, Register::Return) => (path, true), + _ => return None, + }; + + let path = match path_value { + Value::String(path) => path, + _ => return None, + }; + + let second_instruction_opt = lazy.body.get(1); + + if !is_star { + return match second_instruction_opt { + Some(_) => None, + _ => Some(ImportPattern { + pointer: definition.pointer.clone(), + path: path.clone(), + kind: ImportKind::Default, + }), + }; + } + + let second_instruction = match second_instruction_opt { + Some(InstructionOrLabel::Instruction(instruction)) => instruction, + Some(_) => return None, + _ => { + return Some(ImportPattern { + pointer: definition.pointer.clone(), + path: path.clone(), + kind: ImportKind::Star, + }) + } + }; + + match second_instruction { + Instruction::Sub( + Value::Register(Register::Return), + Value::String(name), + Register::Return, + ) => Some(ImportPattern { + pointer: definition.pointer.clone(), + path: path.clone(), + kind: ImportKind::Name(name.clone()), + }), + _ => None, + } + } +} diff --git a/valuescript_compiler/src/lib.rs b/valuescript_compiler/src/lib.rs index 4ec58d4..99b650d 100644 --- a/valuescript_compiler/src/lib.rs +++ b/valuescript_compiler/src/lib.rs @@ -6,9 +6,11 @@ mod diagnostic; mod expression_compiler; mod function_compiler; mod gather_modules; +mod import_pattern; mod link_module; mod module_compiler; mod name_allocator; +mod resolve_path; mod scope; mod scope_analysis; @@ -20,3 +22,5 @@ pub use gather_modules::gather_modules; pub use link_module::link_module; pub use module_compiler::compile; pub use module_compiler::CompilerOutput; +pub use resolve_path::resolve_path; +pub use resolve_path::ResolvedPath; diff --git a/valuescript_compiler/src/link_module.rs b/valuescript_compiler/src/link_module.rs index e1aabb1..77efd6c 100644 --- a/valuescript_compiler/src/link_module.rs +++ b/valuescript_compiler/src/link_module.rs @@ -1,5 +1,12 @@ use std::collections::HashMap; +use crate::asm::{ + Array, Definition, DefinitionContent, Instruction, InstructionOrLabel, Object, Pointer, Value, +}; +use crate::gather_modules::PathAndModule; +use crate::import_pattern::{ImportKind, ImportPattern}; +use crate::name_allocator::NameAllocator; +use crate::resolve_path::{resolve_path, ResolvedPath}; use crate::DiagnosticLevel; use crate::{asm::Module, Diagnostic}; @@ -8,14 +15,20 @@ pub struct LinkModuleResult { pub diagnostics: Vec, } -pub fn link_module(entry_point: &String, modules: &HashMap) -> LinkModuleResult { +pub fn link_module( + entry_point: &ResolvedPath, + modules: &HashMap, +) -> LinkModuleResult { let mut result = LinkModuleResult { module: None, diagnostics: vec![], }; - result.module = Some(match modules.get(&entry_point.clone()) { - Some(module) => module.clone(), + let mut pointer_allocator = NameAllocator::default(); + let mut included_modules = HashMap::::new(); + + let mut path_and_module = match modules.get(&entry_point.clone()) { + Some(path_and_module) => path_and_module.clone(), None => { result.diagnostics.push(Diagnostic { level: DiagnosticLevel::Error, @@ -25,9 +38,332 @@ pub fn link_module(entry_point: &String, modules: &HashMap) -> L return result; } - }); + }; - // TODO + let mut modules_to_include = resolve_and_rewrite_import_patterns(&mut path_and_module); + let mut modules_to_include_i = 0; + + // No rewrites should actually occur here, but we still need to do this to get the names into the + // allocator. + rewrite_pointers(&mut path_and_module.module, &mut pointer_allocator); + + included_modules.insert( + entry_point.clone(), + ( + path_and_module.module.export_default.clone(), + path_and_module.module.export_star.clone(), + ), + ); + + while modules_to_include_i < modules_to_include.len() { + let module_to_include = modules_to_include[modules_to_include_i].clone(); + modules_to_include_i += 1; + + if included_modules.contains_key(&module_to_include) { + continue; + } + + let mut including_path_and_module = match modules.get(&module_to_include) { + Some(pm) => pm.clone(), + None => { + result.diagnostics.push(Diagnostic { + level: DiagnosticLevel::Error, + message: format!("Module not found: {}", module_to_include), + span: swc_common::DUMMY_SP, + }); + + continue; + } + }; + + let mut new_modules_to_include = + resolve_and_rewrite_import_patterns(&mut including_path_and_module); + + modules_to_include.append(&mut new_modules_to_include); + + rewrite_pointers( + &mut including_path_and_module.module, + &mut pointer_allocator, + ); + + included_modules.insert( + module_to_include, + ( + including_path_and_module.module.export_default, + including_path_and_module.module.export_star, + ), + ); + + path_and_module + .module + .definitions + .append(&mut including_path_and_module.module.definitions); + } + + result.module = Some(rewrite_import_patterns( + path_and_module.module, + &included_modules, + &mut result.diagnostics, + )); result } + +fn rewrite_pointers(module: &mut Module, pointer_allocator: &mut NameAllocator) { + let mut pointer_rewriter = PointerRewriter::init(module, pointer_allocator); + pointer_rewriter.module(module); +} + +struct PointerRewriter { + pointer_map: HashMap, +} + +impl PointerRewriter { + pub fn init(module: &Module, pointer_allocator: &mut NameAllocator) -> Self { + let mut self_ = Self { + pointer_map: HashMap::new(), + }; + + for definition in &module.definitions { + let mapped_pointer = Pointer { + name: pointer_allocator.allocate(&definition.pointer.name), + }; + + if mapped_pointer != definition.pointer { + self_ + .pointer_map + .insert(definition.pointer.clone(), mapped_pointer); + } + } + + self_ + } + + pub fn module(&mut self, module: &mut Module) { + self.value(&mut module.export_default); + self.object(&mut module.export_star); + + for definition in &mut module.definitions { + self.definition(definition); + } + } + + fn definition(&mut self, definition: &mut Definition) { + self.pointer(&mut definition.pointer); + + match &mut definition.content { + DefinitionContent::Function(function) => { + self.body(&mut function.body); + } + DefinitionContent::Class(class) => { + self.value(&mut class.constructor); + self.value(&mut class.methods); + } + DefinitionContent::Value(value) => { + self.value(value); + } + DefinitionContent::Lazy(lazy) => { + self.body(&mut lazy.body); + } + } + } + + fn pointer(&mut self, pointer: &mut Pointer) { + if let Some(mapped_pointer) = self.pointer_map.get(&pointer) { + *pointer = mapped_pointer.clone(); + } + } + + fn array(&mut self, array: &mut Array) { + for value in &mut array.values { + self.value(value); + } + } + + fn object(&mut self, object: &mut Object) { + for (key, value) in object.properties.iter_mut() { + self.value(key); + self.value(value); + } + } + + fn value(&mut self, value: &mut Value) { + use Value::*; + + match value { + Void => {} + Undefined => {} + Null => {} + Bool(_) => {} + Number(_) => {} + String(_) => {} + Array(array) => { + self.array(array); + } + Object(object) => { + self.object(object); + } + Register(_) => {} + Pointer(pointer) => { + self.pointer(pointer); + } + Builtin(_) => {} + } + } + + fn instruction(&mut self, instruction: &mut Instruction) { + use Instruction::*; + + match instruction { + End => {} + OpInc(_) | OpDec(_) => {} + Mov(arg, _) + | OpNot(arg, _) + | OpBitNot(arg, _) + | TypeOf(arg, _) + | UnaryPlus(arg, _) + | UnaryMinus(arg, _) + | Import(arg, _) + | ImportStar(arg, _) => { + self.value(arg); + } + OpPlus(arg1, arg2, _) + | OpMinus(arg1, arg2, _) + | OpMul(arg1, arg2, _) + | OpDiv(arg1, arg2, _) + | OpMod(arg1, arg2, _) + | OpExp(arg1, arg2, _) + | OpEq(arg1, arg2, _) + | OpNe(arg1, arg2, _) + | OpTripleEq(arg1, arg2, _) + | OpTripleNe(arg1, arg2, _) + | OpAnd(arg1, arg2, _) + | OpOr(arg1, arg2, _) + | OpLess(arg1, arg2, _) + | OpLessEq(arg1, arg2, _) + | OpGreater(arg1, arg2, _) + | OpGreaterEq(arg1, arg2, _) + | OpNullishCoalesce(arg1, arg2, _) + | OpOptionalChain(arg1, arg2, _) + | OpBitAnd(arg1, arg2, _) + | OpBitOr(arg1, arg2, _) + | OpBitXor(arg1, arg2, _) + | OpLeftShift(arg1, arg2, _) + | OpRightShift(arg1, arg2, _) + | OpRightShiftUnsigned(arg1, arg2, _) + | InstanceOf(arg1, arg2, _) + | In(arg1, arg2, _) + | Call(arg1, arg2, _) + | Bind(arg1, arg2, _) + | Sub(arg1, arg2, _) + | SubMov(arg1, arg2, _) + | New(arg1, arg2, _) => { + self.value(arg1); + self.value(arg2); + } + Apply(arg1, arg2, arg3, _) | SubCall(arg1, arg2, arg3, _) => { + self.value(arg1); + self.value(arg2); + self.value(arg3); + } + Jmp(_) => {} + JmpIf(arg, _) => { + self.value(arg); + } + }; + } + + fn body(&mut self, body: &mut Vec) { + for instruction_or_label in body { + match instruction_or_label { + InstructionOrLabel::Instruction(instruction) => { + self.instruction(instruction); + } + InstructionOrLabel::Label(_) => {} + } + } + } +} + +fn resolve_and_rewrite_import_patterns(path_and_module: &mut PathAndModule) -> Vec { + let mut resolved_paths = Vec::::new(); + + for definition in &mut path_and_module.module.definitions { + match ImportPattern::decode(definition) { + Some(_) => {} + None => continue, + } + + let lazy = match &mut definition.content { + DefinitionContent::Lazy(lazy) => lazy, + _ => panic!("Inconsistent with import pattern"), + }; + + let first_instruction = match lazy.body.first_mut() { + Some(InstructionOrLabel::Instruction(instruction)) => instruction, + _ => panic!("Inconsistent with import pattern"), + }; + + let import_string = match first_instruction { + Instruction::Import(Value::String(string), _) + | Instruction::ImportStar(Value::String(string), _) => string, + _ => panic!("Inconsistent with import pattern"), + }; + + let resolved = resolve_path(&path_and_module.path, import_string); + resolved_paths.push(resolved.clone()); + *import_string = resolved.path; + } + + resolved_paths +} + +fn rewrite_import_patterns( + mut module: Module, + included_modules: &HashMap, + diagnostics: &mut Vec, +) -> Module { + for definition in &mut module.definitions { + let import_pattern = match ImportPattern::decode(definition) { + Some(import_pattern) => import_pattern, + None => continue, + }; + + let resolved_path = ResolvedPath { + // Should have been resolved already during resolve_and_rewrite_import_patterns + path: import_pattern.path.clone(), + }; + + let (default, namespace) = match included_modules.get(&resolved_path) { + Some(el) => el, + None => continue, + }; + + let new_definition = Definition { + pointer: import_pattern.pointer, + content: match import_pattern.kind { + ImportKind::Default => DefinitionContent::Value(default.clone()), + ImportKind::Star => DefinitionContent::Value(Value::Object(Box::new(namespace.clone()))), + ImportKind::Name(name) => match namespace.try_resolve_key(&name) { + Some(value) => DefinitionContent::Value(value.clone()), + None => { + diagnostics.push(Diagnostic { + level: DiagnosticLevel::Error, + message: format!( + "Imported name `{}` does not exist in `{}`", + name, import_pattern.path + ), + span: swc_common::DUMMY_SP, + }); + + continue; + } + }, + }, + }; + + *definition = new_definition; + } + + module +} diff --git a/valuescript_compiler/src/resolve_path.rs b/valuescript_compiler/src/resolve_path.rs new file mode 100644 index 0000000..4209c6e --- /dev/null +++ b/valuescript_compiler/src/resolve_path.rs @@ -0,0 +1,27 @@ +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct ResolvedPath { + pub path: String, +} + +impl std::fmt::Display for ResolvedPath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.path) + } +} + +pub fn resolve_path(importer_path: &ResolvedPath, path: &String) -> ResolvedPath { + let importer_path_buf = PathBuf::from(&importer_path.path); + let parent = importer_path_buf.parent().unwrap_or_else(|| Path::new("/")); + + ResolvedPath { + path: parent + .join(path) + .canonicalize() + .expect("Failed to canonicalize path") + .to_str() + .expect("Failed to convert path to string") + .to_string(), + } +} diff --git a/vstc/src/compile_command.rs b/vstc/src/compile_command.rs index ace9c4b..b40f60e 100644 --- a/vstc/src/compile_command.rs +++ b/vstc/src/compile_command.rs @@ -5,6 +5,8 @@ use std::process::exit; use super::handle_diagnostics_cli::handle_diagnostics_cli; use valuescript_compiler::gather_modules; use valuescript_compiler::link_module; +use valuescript_compiler::resolve_path; +use valuescript_compiler::ResolvedPath; pub fn compile_command(args: &Vec) { if args.len() != 3 { @@ -15,24 +17,32 @@ pub fn compile_command(args: &Vec) { let entry_path = &args[2]; - let abs_entry_path = std::fs::canonicalize(entry_path) - .expect("Failed to get absolute path") - .to_str() - .expect("Failed to convert to str") - .to_string(); + // Like cwd (current working dir), but it's cwd/file. + // This is a bit of a hack so we can use resolve_path to get the absolute path of the entry point. + let cwd_file = ResolvedPath { + path: std::env::current_dir() + .expect("Failed to get current directory") + .as_path() + .join("file") + .to_str() + .expect("Failed to convert to str") + .to_string(), + }; - let gm = gather_modules(abs_entry_path, |path| { + let resolved_entry_path = resolve_path(&cwd_file, entry_path); + + let gm = gather_modules(resolved_entry_path, |path| { std::fs::read_to_string(path).map_err(|err| err.to_string()) }); for (path, diagnostics) in gm.diagnostics.iter() { - handle_diagnostics_cli(path, diagnostics); + handle_diagnostics_cli(&path.path, diagnostics); } let link_module_result = link_module(&gm.entry_point, &gm.modules); // FIXME: Diagnostics from link_module should have paths associated - handle_diagnostics_cli(&gm.entry_point, &link_module_result.diagnostics); + handle_diagnostics_cli(&gm.entry_point.path, &link_module_result.diagnostics); let module = link_module_result .module