From 4e090f351e7b5cb54b7958e12f56faef694206a1 Mon Sep 17 00:00:00 2001 From: Bryan English Date: Tue, 20 Jan 2026 00:57:44 -0500 Subject: [PATCH] slightly more proper import system Words now need to be exported with `export word-name` to be used in other modules. Imports other than at the top level now work. --- hylo-lang/examples/put2.hylo | 4 + hylo-lang/hylo-interpret/src/lib.rs | 17 +- hylo-lang/hylo-ir/src/lib.rs | 14 +- hylo-lang/hyloc/src/ir.rs | 338 +++++++++++++---------- hylo-lang/hyloc/src/parser.rs | 36 ++- hylo-lang/hyloc/src/riscv_asm_codegen.rs | 19 +- hylo-lang/hyloc/src/tokenizer.rs | 2 +- 7 files changed, 253 insertions(+), 177 deletions(-) diff --git a/hylo-lang/examples/put2.hylo b/hylo-lang/examples/put2.hylo index 440f50f..dbdfb89 100644 --- a/hylo-lang/examples/put2.hylo +++ b/hylo-lang/examples/put2.hylo @@ -1 +1,5 @@ : put2 putn putn ; + +: foobar dup dup ; + +export put2 diff --git a/hylo-lang/hylo-interpret/src/lib.rs b/hylo-lang/hylo-interpret/src/lib.rs index 08b8721..e5f46bd 100644 --- a/hylo-lang/hylo-interpret/src/lib.rs +++ b/hylo-lang/hylo-interpret/src/lib.rs @@ -6,16 +6,16 @@ use syscalls::*; use anyhow::{Result, anyhow}; pub struct Interpreter<'a> { - module: &'a IRModule, + module: &'a IRObject, data_stack: Vec, instruction_pointer: usize, return_stack: Vec, labels: HashMap, - strings: Vec, + strings: HashMap, } impl<'a> Interpreter<'a> { - pub fn new(ir_mod: &'a IRModule) -> Result { + pub fn new(ir_mod: &'a IRObject) -> Result { let mut index = 0; let mut labels = HashMap::new(); for token in ir_mod.text.iter() { @@ -26,14 +26,15 @@ impl<'a> Interpreter<'a> { } let instruction_pointer = *labels.get("main").ok_or(anyhow!("no main word found!"))?; - let strings = ir_mod.data.iter().filter_map(|s| { + let mut strings = HashMap::new(); + ir_mod.data.iter().for_each(|s| { match s { - IR::StringDef(s) => { - Some(s.clone()) + IR::StringDef(label, string) => { + strings.insert(label.clone(), string.clone()); }, - _ => None + _ => {} } - }).collect(); + }); Ok(Self { module: ir_mod, diff --git a/hylo-lang/hylo-ir/src/lib.rs b/hylo-lang/hylo-ir/src/lib.rs index e1db8f7..88173de 100644 --- a/hylo-lang/hylo-ir/src/lib.rs +++ b/hylo-lang/hylo-ir/src/lib.rs @@ -1,17 +1,14 @@ use serde_yaml::{from_str, to_string, Error}; use serde_derive::{Serialize, Deserialize}; -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub enum IR { Label(String), Call(String), Ret, StackPush(u64), - StackPushString(usize), - StringDef(String), - - Import, // Not actually used at runtime. Should be elided. - ImportString, // Not actually used at runtime. Should be elided. + StackPushString(String), // refers to string label, not the string itself + StringDef(String, String), // first is string label, second is string value // These next ones should always be inlined, so they're in IR. Load, // @ ( addr -- x ) -- Fetch memory contents at addr @@ -48,13 +45,14 @@ pub enum IR { Sys6, } +// This is like an .o file. #[derive(Serialize, Deserialize, Debug)] -pub struct IRModule { +pub struct IRObject { pub text: Vec, pub data: Vec, } -impl IRModule { +impl IRObject { pub fn to_s(&self) -> Result { to_string(self) } diff --git a/hylo-lang/hyloc/src/ir.rs b/hylo-lang/hyloc/src/ir.rs index 643dc8b..0860ced 100644 --- a/hylo-lang/hyloc/src/ir.rs +++ b/hylo-lang/hyloc/src/ir.rs @@ -2,178 +2,220 @@ use crate::parser::Module; use crate::tokenizer::{Token, tokenize}; use hylo_ir::*; -use std::collections::HashSet; +use std::collections::{HashSet, HashMap}; use std::path::PathBuf; +use std::rc::Rc; -use anyhow::{Result, anyhow}; +use anyhow::{Result, bail}; macro_rules! push_num { ($num:ident) => { IR::StackPush(*$num as u64) }; ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) }; } -fn import(importer_dir: &PathBuf, specifier: &str, imported: &mut HashSet, is_entrypoint: bool) -> Result> { - let mut path = PathBuf::from(specifier); - if path.is_relative() { - let mut new_path = importer_dir.clone(); - new_path.push(path); - path = new_path.canonicalize()?; - } - if imported.contains(&path) { - return Ok(None); - } - - let contents = std::fs::read_to_string(&path)?; +#[derive(Debug, Default)] +struct IRModule { + data: Vec, + text: Vec, + imports: Vec>, + exports: Vec, + source_file: PathBuf, +} - Ok(Some(generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?, imported))) +#[derive(Default)] +struct ImportTree { + data: Vec, + text: Vec, + all_modules: HashMap>, + all_exports: HashSet, + entrypoint: Rc, + module_count: usize, + collapse_seen: HashSet, } -fn collapse_module(mut module_w: ModuleWithImports) -> Result { - let mut module = module_w.module.take().ok_or(anyhow!("no module to collapse"))?; - let mut data = std::mem::take(&mut module.data); - let mut prev_data_len = data.len(); - let mut text = std::mem::take(&mut module.text); - - module_w.imports.take().ok_or(anyhow!("no imports to collapse"))?.into_iter().try_for_each(|imported| -> Result<()>{ - let mut ir_mod = collapse_module(imported)?; - let mut mod_data = std::mem::take(&mut ir_mod.data); - let mod_data_len = mod_data.len(); - data.append(&mut mod_data); - - let mut mod_text = std::mem::take(&mut ir_mod.text).into_iter().map(|ir| { - if let IR::StackPushString(num) = ir { - IR::StackPushString(num + prev_data_len) +impl ImportTree { + fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result> { + let mut path = PathBuf::from(specifier); + if path.is_relative() { + let mut new_path = importer_dir.clone(); + new_path.push(path); + path = new_path.canonicalize()?; + } + if self.all_modules.contains_key(&path) { + let module = self.all_modules.get(&path).unwrap().clone(); + return Ok(module); + } + + let contents = std::fs::read_to_string(&path)?; + + let module = self.generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?); + let module = Rc::new(module); + self.all_modules.insert(module.source_file.clone(), module.clone()); + if is_entrypoint { + self.entrypoint = module.clone(); + } + Ok(module) + } + + fn generate_internal(&mut self, path: PathBuf, module: &Module) -> IRModule { + // Eventually these will end up being sections in assembly + let mut text = vec![]; + let mut data = vec![]; + + let mut imports = vec![]; + module.imports.iter().for_each(|imported| { + if let Some(parent_path) = path.parent() { + match self.import(&parent_path.to_path_buf(), imported, false) { + Ok(module) => { + imports.push(module); + }, + Err(msg) => { + eprintln!("{}", msg); + } + } } else { - ir } - }).collect::>(); - text.append(&mut mod_text); + }); + + let exports: Vec<_> = module.exports.iter().map(|s| { + self.all_exports.insert(s.to_string()); + s.to_string() + }).collect(); + + + text.push(module.words.iter().map(|def| { + let mut body = def.instructions.iter().map(|inst| { + let mapped_ir = match inst { + Token::Word(word) => { + match *word { + "@" => IR::Load, + "!" => IR::Store, + "dup" => IR::Dup, + "swap" => IR::Swap, + "drop" => IR::Drop, + "over" => IR::Over, + "puts" => IR::PutS, + "putn" => IR::PutN, + "if" => IR::If, + "endif" => IR::EndIf, + "=" => IR::Equals, + ">" => IR::GreaterThan, + "+" => IR::AddU64, + "-" => IR::SubtractU64, + "*" => IR::MultiplyU64, + "/" => IR::DivideU64, + "%" => IR::ModU64, + "|" => IR::BitwiseOr, + "sys0" => IR::Sys0, + "sys1" => IR::Sys1, + "sys2" => IR::Sys2, + "sys3" => IR::Sys3, + "sys4" => IR::Sys4, + "sys5" => IR::Sys5, + "sys6" => IR::Sys6, + // TODO num type specfic math like `+:i32`, etc. + _ => IR::Call(String::from(*word)) + } + }, + Token::String(text) => { + let string_label = format!("string_{}", data.len() - 1); + data.push(IR::StringDef(string_label.clone(), String::from(*text))); + IR::StackPushString(string_label) + }, + Token::NumU8(num) => push_num!(num), + Token::NumI8(num) => push_num!(num, u8), + Token::NumU16(num) => push_num!(num), + Token::NumI16(num) => push_num!(num, u16), + Token::NumU32(num) => push_num!(num), + Token::NumI32(num) => push_num!(num, u32), + Token::NumU64(num) => push_num!(num), + Token::NumI64(num) => push_num!(num), + Token::NumF32(num) => push_num!(num), + Token::NumF64(num) => push_num!(num), + }; + mapped_ir + }).collect::>(); + + let mut result = vec![IR::Label(def.name.to_string())]; + result.append(&mut body); + result.push(IR::Ret); + result + }).flatten().collect::>()); + + + IRModule { + text: text.into_iter().flatten().collect::>(), + data, + imports, + exports, + source_file: path, + } + } - prev_data_len += mod_data_len; + fn collapse(&mut self, module: Rc) -> Result<()> { + if self.collapse_seen.contains(&module.source_file) { + return Ok(()) + } - Ok(()) - })?; - - Ok(IRModule { - data, - text, - }) -} + for imported in module.imports.clone() { + self.collapse(imported)?; + } -pub fn compile(path: &str) -> Result { - let mut imported = HashSet::new(); - let dir = std::env::current_dir()?; - let module = import(&dir, path, &mut imported, true)?.expect("somehow we've already imported this"); - collapse_module(module) // TODO remove unused words -} + let is_entrypoint = module.source_file == self.entrypoint.source_file; + + let module_number = self.module_count; + self.module_count += 1; -#[derive(Debug)] -struct ModuleWithImports { - module: Option, - imports: Option>, -} + for string in &module.data { + if let IR::StringDef(name, val) = string { + let new_name = format!("{}_{}", name, module_number); + self.data.push(IR::StringDef(new_name, val.clone())); + } else { + bail!("non-string data"); + } + } -fn generate_internal(path: PathBuf, module: &Module, imported: &mut HashSet) -> ModuleWithImports { - // Eventually these will end up being sections in assembly - let mut text = vec![]; - let mut data = vec![]; - - let mut imports = vec![]; - - let mut last_was_import = false; - - text.push(module.words.iter().map(|def| { - let mut body = def.instructions.iter().map(|inst| { - let mapped_ir = match inst { - Token::Word(word) => { - match *word { - "@" => IR::Load, - "!" => IR::Store, - "dup" => IR::Dup, - "swap" => IR::Swap, - "drop" => IR::Drop, - "over" => IR::Over, - "puts" => IR::PutS, - "putn" => IR::PutN, - "if" => IR::If, - "endif" => IR::EndIf, - "=" => IR::Equals, - ">" => IR::GreaterThan, - "+" => IR::AddU64, - "-" => IR::SubtractU64, - "*" => IR::MultiplyU64, - "/" => IR::DivideU64, - "%" => IR::ModU64, - "|" => IR::BitwiseOr, - "import" => IR::Import, - "sys0" => IR::Sys0, - "sys1" => IR::Sys1, - "sys2" => IR::Sys2, - "sys3" => IR::Sys3, - "sys4" => IR::Sys4, - "sys5" => IR::Sys5, - "sys6" => IR::Sys6, - // TODO num type specfic math like `+:i32`, etc. - _ => IR::Call(String::from(*word)) + for instruction in &module.text { + let new_instruction = match instruction { + IR::StackPushString(name) => { + let new_name = format!("{}_{}", name, module_number); + IR::StackPushString(new_name) + }, + IR::Label(name) => { + if is_entrypoint || module.exports.contains(name) { + instruction.clone() + } else { + let new_name = format!("_m{}_{}", module_number, name); + IR::Label(new_name) } }, - Token::String(text) => { - if last_was_import { - if let Some(parent_path) = path.parent() { - match import(&parent_path.to_path_buf(), text, imported, false) { - Ok(module) => { - if let Some(module) = module { - imports.push(module); - } - }, - Err(msg) => { - eprintln!("{}", msg); - } - } - } - IR::ImportString // This will be elided later + IR::Call(name) => { + if is_entrypoint || self.all_exports.contains(name) { + instruction.clone() } else { - data.push(IR::StringDef(String::from(*text))); - IR::StackPushString(data.len() - 1) + let new_name = format!("_m{}_{}", module_number, name); + IR::Label(new_name) } }, - Token::NumU8(num) => push_num!(num), - Token::NumI8(num) => push_num!(num, u8), - Token::NumU16(num) => push_num!(num), - Token::NumI16(num) => push_num!(num, u16), - Token::NumU32(num) => push_num!(num), - Token::NumI32(num) => push_num!(num, u32), - Token::NumU64(num) => push_num!(num), - Token::NumI64(num) => push_num!(num), - Token::NumF32(num) => push_num!(num), - Token::NumF64(num) => push_num!(num), + _ => instruction.clone() }; - last_was_import = match mapped_ir { - IR::Import => true, - _ => false, - }; - mapped_ir - }).filter(|ir| { - // Elide IRs that shouldn't actually be in the output IR. - match ir { - IR::Import => false, - IR::ImportString => false, - _ => true, - } - }).collect::>(); + self.text.push(new_instruction); + } - let mut result = vec![IR::Label(def.name.to_string())]; - result.append(&mut body); - result.push(IR::Ret); - result - }).flatten().collect::>()); - - - ModuleWithImports { - module: Some(IRModule { - text: text.into_iter().flatten().collect::>(), - data, - }), - imports: Some(imports), + Ok(()) } } + + + +pub fn compile(path: &str) -> Result { + let dir = std::env::current_dir()?; + let mut tree: ImportTree = Default::default(); + let module = tree.import(&dir, path, true)?; + tree.collapse(module)?; + // TODO remove unused words + Ok(IRObject { + data: tree.data, + text: tree.text, + }) +} diff --git a/hylo-lang/hyloc/src/parser.rs b/hylo-lang/hyloc/src/parser.rs index 5e0ddbd..a5a95ee 100644 --- a/hylo-lang/hyloc/src/parser.rs +++ b/hylo-lang/hyloc/src/parser.rs @@ -9,15 +9,21 @@ pub struct WordDefinition<'a> { #[derive(Debug)] pub struct Module<'a> { - pub words: Vec> + pub words: Vec>, + pub imports: Vec<&'a str>, + pub exports: Vec<&'a str>, } impl<'a> Module<'a> { pub fn parse(input: Vec>, is_entrypoint: bool) -> Result { let mut result = vec![]; let mut main = vec![]; + let mut exports = vec![]; + let mut imports = vec![]; let mut current_word: Option = None; let mut about_to_start_word_def = false; + let mut last_was_import = false; + let mut last_was_export = false; for token in input { if about_to_start_word_def { @@ -52,7 +58,31 @@ impl<'a> Module<'a> { if let Some(ref mut current_word) = current_word { current_word.instructions.push(token); } else { - main.push(token); + match token { + Token::Word(word) => { + if word == "import" { + last_was_import = true; + } else if word == "export" { + last_was_export = true; + } else { + if last_was_export { + exports.push(word); + } else { + main.push(token.clone()); + } + } + }, + Token::String(string) => { + if last_was_import { + imports.push(string); + } else { + main.push(token.clone()); + } + }, + _ => { + main.push(token.clone()); + } + }; } } @@ -67,7 +97,7 @@ impl<'a> Module<'a> { }); } - Ok(Module { words: result }) + Ok(Module { words: result, imports, exports }) } #[cfg(test)] diff --git a/hylo-lang/hyloc/src/riscv_asm_codegen.rs b/hylo-lang/hyloc/src/riscv_asm_codegen.rs index ce790bd..3b43cf0 100644 --- a/hylo-lang/hyloc/src/riscv_asm_codegen.rs +++ b/hylo-lang/hyloc/src/riscv_asm_codegen.rs @@ -6,7 +6,7 @@ use std::collections::{HashMap, HashSet}; use std::fmt::Display; pub struct CodeGen<'a> { - module: &'a IRModule, + module: &'a IRObject, data_stack_size: usize, lines: Vec, } @@ -47,7 +47,7 @@ macro_rules! asm_macro { } impl<'a> CodeGen<'a> { - pub fn new(ir_mod: &'a IRModule, data_stack_size: usize) -> Self { + pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self { Self { module: ir_mod, data_stack_size, @@ -81,19 +81,16 @@ impl<'a> CodeGen<'a> { pub fn assembly(&mut self) -> Result{ let mut string_table = HashMap::new(); - let mut string_index = 0; // Static strings self.label(".section .rodata\n"); for ir in &self.module.data { match ir { - IR::StringDef(some_string) => { - string_table.insert(some_string.clone(), string_index); - self.label(format!("string_id_{}:", string_index)); + IR::StringDef(string_label, some_string) => { + string_table.insert(some_string.clone(), string_label); + self.label(string_label); self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz? self.label(""); - - string_index += 1; }, _ => bail!("Currently only string definitions are supported in the data section.") } @@ -119,7 +116,7 @@ impl<'a> CodeGen<'a> { IR::Label(name) => { last_label = name; if name == "main" { - self.label(".globl _start"); + self.label(".globl _start"); // TODO is globl necessary? self.label("_start:"); self.line("la s2, data_stack_end"); // set stack pointer } else { @@ -159,6 +156,10 @@ impl<'a> CodeGen<'a> { self.line(format!("li t0, {}", num)); self.push_from("t0"); }, + IR::StackPushString(name) => { + self.line(format!("li t0, {}", name)); + self.push_from("t0"); + }, IR::AddU64 => { self.pop_to("t0"); self.pop_to("t1"); diff --git a/hylo-lang/hyloc/src/tokenizer.rs b/hylo-lang/hyloc/src/tokenizer.rs index ca4371b..0240aa2 100644 --- a/hylo-lang/hyloc/src/tokenizer.rs +++ b/hylo-lang/hyloc/src/tokenizer.rs @@ -1,6 +1,6 @@ use anyhow::{Result, anyhow}; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Token<'a> { Word(&'a str), String(&'a str), -- 2.43.0