use crate::tokenizer::{Token, tokenize};
use hylo_ir::*;
-use std::collections::HashSet;
+use std::collections::{HashSet, HashMap};
use std::path::PathBuf;
+use std::rc::Rc;
-use anyhow::{Result, anyhow};
+use anyhow::{Result, bail};
macro_rules! push_num {
($num:ident) => { IR::StackPush(*$num as u64) };
($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) };
}
-fn import(importer_dir: &PathBuf, specifier: &str, imported: &mut HashSet<PathBuf>, is_entrypoint: bool) -> Result<Option<ModuleWithImports>> {
- let mut path = PathBuf::from(specifier);
- if path.is_relative() {
- let mut new_path = importer_dir.clone();
- new_path.push(path);
- path = new_path.canonicalize()?;
- }
- if imported.contains(&path) {
- return Ok(None);
- }
-
- let contents = std::fs::read_to_string(&path)?;
+#[derive(Debug, Default)]
+struct IRModule {
+ data: Vec<IR>,
+ text: Vec<IR>,
+ imports: Vec<Rc<IRModule>>,
+ exports: Vec<String>,
+ source_file: PathBuf,
+}
- Ok(Some(generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?, imported)))
+#[derive(Default)]
+struct ImportTree {
+ data: Vec<IR>,
+ text: Vec<IR>,
+ all_modules: HashMap<PathBuf, Rc<IRModule>>,
+ all_exports: HashSet<String>,
+ entrypoint: Rc<IRModule>,
+ module_count: usize,
+ collapse_seen: HashSet<PathBuf>,
}
-fn collapse_module(mut module_w: ModuleWithImports) -> Result<IRModule> {
- let mut module = module_w.module.take().ok_or(anyhow!("no module to collapse"))?;
- let mut data = std::mem::take(&mut module.data);
- let mut prev_data_len = data.len();
- let mut text = std::mem::take(&mut module.text);
-
- module_w.imports.take().ok_or(anyhow!("no imports to collapse"))?.into_iter().try_for_each(|imported| -> Result<()>{
- let mut ir_mod = collapse_module(imported)?;
- let mut mod_data = std::mem::take(&mut ir_mod.data);
- let mod_data_len = mod_data.len();
- data.append(&mut mod_data);
-
- let mut mod_text = std::mem::take(&mut ir_mod.text).into_iter().map(|ir| {
- if let IR::StackPushString(num) = ir {
- IR::StackPushString(num + prev_data_len)
+impl ImportTree {
+ fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result<Rc<IRModule>> {
+ let mut path = PathBuf::from(specifier);
+ if path.is_relative() {
+ let mut new_path = importer_dir.clone();
+ new_path.push(path);
+ path = new_path.canonicalize()?;
+ }
+ if self.all_modules.contains_key(&path) {
+ let module = self.all_modules.get(&path).unwrap().clone();
+ return Ok(module);
+ }
+
+ let contents = std::fs::read_to_string(&path)?;
+
+ let module = self.generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?);
+ let module = Rc::new(module);
+ self.all_modules.insert(module.source_file.clone(), module.clone());
+ if is_entrypoint {
+ self.entrypoint = module.clone();
+ }
+ Ok(module)
+ }
+
+ fn generate_internal(&mut self, path: PathBuf, module: &Module) -> IRModule {
+ // Eventually these will end up being sections in assembly
+ let mut text = vec![];
+ let mut data = vec![];
+
+ let mut imports = vec![];
+ module.imports.iter().for_each(|imported| {
+ if let Some(parent_path) = path.parent() {
+ match self.import(&parent_path.to_path_buf(), imported, false) {
+ Ok(module) => {
+ imports.push(module);
+ },
+ Err(msg) => {
+ eprintln!("{}", msg);
+ }
+ }
} else {
- ir
}
- }).collect::<Vec<_>>();
- text.append(&mut mod_text);
+ });
+
+ let exports: Vec<_> = module.exports.iter().map(|s| {
+ self.all_exports.insert(s.to_string());
+ s.to_string()
+ }).collect();
+
+
+ text.push(module.words.iter().map(|def| {
+ let mut body = def.instructions.iter().map(|inst| {
+ let mapped_ir = match inst {
+ Token::Word(word) => {
+ match *word {
+ "@" => IR::Load,
+ "!" => IR::Store,
+ "dup" => IR::Dup,
+ "swap" => IR::Swap,
+ "drop" => IR::Drop,
+ "over" => IR::Over,
+ "puts" => IR::PutS,
+ "putn" => IR::PutN,
+ "if" => IR::If,
+ "endif" => IR::EndIf,
+ "=" => IR::Equals,
+ ">" => IR::GreaterThan,
+ "+" => IR::AddU64,
+ "-" => IR::SubtractU64,
+ "*" => IR::MultiplyU64,
+ "/" => IR::DivideU64,
+ "%" => IR::ModU64,
+ "|" => IR::BitwiseOr,
+ "sys0" => IR::Sys0,
+ "sys1" => IR::Sys1,
+ "sys2" => IR::Sys2,
+ "sys3" => IR::Sys3,
+ "sys4" => IR::Sys4,
+ "sys5" => IR::Sys5,
+ "sys6" => IR::Sys6,
+ // TODO num type specfic math like `+:i32`, etc.
+ _ => IR::Call(String::from(*word))
+ }
+ },
+ Token::String(text) => {
+ let string_label = format!("string_{}", data.len() - 1);
+ data.push(IR::StringDef(string_label.clone(), String::from(*text)));
+ IR::StackPushString(string_label)
+ },
+ Token::NumU8(num) => push_num!(num),
+ Token::NumI8(num) => push_num!(num, u8),
+ Token::NumU16(num) => push_num!(num),
+ Token::NumI16(num) => push_num!(num, u16),
+ Token::NumU32(num) => push_num!(num),
+ Token::NumI32(num) => push_num!(num, u32),
+ Token::NumU64(num) => push_num!(num),
+ Token::NumI64(num) => push_num!(num),
+ Token::NumF32(num) => push_num!(num),
+ Token::NumF64(num) => push_num!(num),
+ };
+ mapped_ir
+ }).collect::<Vec<_>>();
+
+ let mut result = vec![IR::Label(def.name.to_string())];
+ result.append(&mut body);
+ result.push(IR::Ret);
+ result
+ }).flatten().collect::<Vec<_>>());
+
+
+ IRModule {
+ text: text.into_iter().flatten().collect::<Vec<_>>(),
+ data,
+ imports,
+ exports,
+ source_file: path,
+ }
+ }
- prev_data_len += mod_data_len;
+ fn collapse(&mut self, module: Rc<IRModule>) -> Result<()> {
+ if self.collapse_seen.contains(&module.source_file) {
+ return Ok(())
+ }
- Ok(())
- })?;
-
- Ok(IRModule {
- data,
- text,
- })
-}
+ for imported in module.imports.clone() {
+ self.collapse(imported)?;
+ }
-pub fn compile(path: &str) -> Result<IRModule> {
- let mut imported = HashSet::new();
- let dir = std::env::current_dir()?;
- let module = import(&dir, path, &mut imported, true)?.expect("somehow we've already imported this");
- collapse_module(module) // TODO remove unused words
-}
+ let is_entrypoint = module.source_file == self.entrypoint.source_file;
+
+ let module_number = self.module_count;
+ self.module_count += 1;
-#[derive(Debug)]
-struct ModuleWithImports {
- module: Option<IRModule>,
- imports: Option<Vec<ModuleWithImports>>,
-}
+ for string in &module.data {
+ if let IR::StringDef(name, val) = string {
+ let new_name = format!("{}_{}", name, module_number);
+ self.data.push(IR::StringDef(new_name, val.clone()));
+ } else {
+ bail!("non-string data");
+ }
+ }
-fn generate_internal(path: PathBuf, module: &Module, imported: &mut HashSet<PathBuf>) -> ModuleWithImports {
- // Eventually these will end up being sections in assembly
- let mut text = vec![];
- let mut data = vec![];
-
- let mut imports = vec![];
-
- let mut last_was_import = false;
-
- text.push(module.words.iter().map(|def| {
- let mut body = def.instructions.iter().map(|inst| {
- let mapped_ir = match inst {
- Token::Word(word) => {
- match *word {
- "@" => IR::Load,
- "!" => IR::Store,
- "dup" => IR::Dup,
- "swap" => IR::Swap,
- "drop" => IR::Drop,
- "over" => IR::Over,
- "puts" => IR::PutS,
- "putn" => IR::PutN,
- "if" => IR::If,
- "endif" => IR::EndIf,
- "=" => IR::Equals,
- ">" => IR::GreaterThan,
- "+" => IR::AddU64,
- "-" => IR::SubtractU64,
- "*" => IR::MultiplyU64,
- "/" => IR::DivideU64,
- "%" => IR::ModU64,
- "|" => IR::BitwiseOr,
- "import" => IR::Import,
- "sys0" => IR::Sys0,
- "sys1" => IR::Sys1,
- "sys2" => IR::Sys2,
- "sys3" => IR::Sys3,
- "sys4" => IR::Sys4,
- "sys5" => IR::Sys5,
- "sys6" => IR::Sys6,
- // TODO num type specfic math like `+:i32`, etc.
- _ => IR::Call(String::from(*word))
+ for instruction in &module.text {
+ let new_instruction = match instruction {
+ IR::StackPushString(name) => {
+ let new_name = format!("{}_{}", name, module_number);
+ IR::StackPushString(new_name)
+ },
+ IR::Label(name) => {
+ if is_entrypoint || module.exports.contains(name) {
+ instruction.clone()
+ } else {
+ let new_name = format!("_m{}_{}", module_number, name);
+ IR::Label(new_name)
}
},
- Token::String(text) => {
- if last_was_import {
- if let Some(parent_path) = path.parent() {
- match import(&parent_path.to_path_buf(), text, imported, false) {
- Ok(module) => {
- if let Some(module) = module {
- imports.push(module);
- }
- },
- Err(msg) => {
- eprintln!("{}", msg);
- }
- }
- }
- IR::ImportString // This will be elided later
+ IR::Call(name) => {
+ if is_entrypoint || self.all_exports.contains(name) {
+ instruction.clone()
} else {
- data.push(IR::StringDef(String::from(*text)));
- IR::StackPushString(data.len() - 1)
+ let new_name = format!("_m{}_{}", module_number, name);
+ IR::Label(new_name)
}
},
- Token::NumU8(num) => push_num!(num),
- Token::NumI8(num) => push_num!(num, u8),
- Token::NumU16(num) => push_num!(num),
- Token::NumI16(num) => push_num!(num, u16),
- Token::NumU32(num) => push_num!(num),
- Token::NumI32(num) => push_num!(num, u32),
- Token::NumU64(num) => push_num!(num),
- Token::NumI64(num) => push_num!(num),
- Token::NumF32(num) => push_num!(num),
- Token::NumF64(num) => push_num!(num),
+ _ => instruction.clone()
};
- last_was_import = match mapped_ir {
- IR::Import => true,
- _ => false,
- };
- mapped_ir
- }).filter(|ir| {
- // Elide IRs that shouldn't actually be in the output IR.
- match ir {
- IR::Import => false,
- IR::ImportString => false,
- _ => true,
- }
- }).collect::<Vec<_>>();
+ self.text.push(new_instruction);
+ }
- let mut result = vec![IR::Label(def.name.to_string())];
- result.append(&mut body);
- result.push(IR::Ret);
- result
- }).flatten().collect::<Vec<_>>());
-
-
- ModuleWithImports {
- module: Some(IRModule {
- text: text.into_iter().flatten().collect::<Vec<_>>(),
- data,
- }),
- imports: Some(imports),
+ Ok(())
}
}
+
+
+
+pub fn compile(path: &str) -> Result<IRObject> {
+ let dir = std::env::current_dir()?;
+ let mut tree: ImportTree = Default::default();
+ let module = tree.import(&dir, path, true)?;
+ tree.collapse(module)?;
+ // TODO remove unused words
+ Ok(IRObject {
+ data: tree.data,
+ text: tree.text,
+ })
+}