]> rethought.computer Git - sorel-lang.git/commitdiff
slightly more proper import system keep/4e090f351e7b5cb54b7958e12f56faef694206a1
authorBryan English <bryan@rethought.computer>
Tue, 20 Jan 2026 05:57:44 +0000 (00:57 -0500)
committerBryan English <bryan@rethought.computer>
Tue, 10 Feb 2026 04:08:54 +0000 (04:08 +0000)
Words now need to be exported with `export word-name` to be used in
other modules.

Imports other than at the top level now work.

hylo-lang/examples/put2.hylo
hylo-lang/hylo-interpret/src/lib.rs
hylo-lang/hylo-ir/src/lib.rs
hylo-lang/hyloc/src/ir.rs
hylo-lang/hyloc/src/parser.rs
hylo-lang/hyloc/src/riscv_asm_codegen.rs
hylo-lang/hyloc/src/tokenizer.rs

index 440f50f071f8c7504d63fa17ebca90b3d47755c0..dbdfb89bfe77f879ecded42f7440ad45a4b9fff0 100644 (file)
@@ -1 +1,5 @@
 : put2 putn putn ; 
+
+: foobar dup dup ;
+
+export put2
index 08b87214a0072deb2e5535cef899019d00a19e1c..e5f46bd33736e12854db1977d44a9962f66b333c 100644 (file)
@@ -6,16 +6,16 @@ use syscalls::*;
 use anyhow::{Result, anyhow};
 
 pub struct Interpreter<'a> {
-    module: &'a IRModule,
+    module: &'a IRObject,
     data_stack: Vec<u64>,
     instruction_pointer: usize,
     return_stack: Vec<usize>,
     labels: HashMap<String, usize>,
-    strings: Vec<String>,
+    strings: HashMap<String, String>,
 }
 
 impl<'a> Interpreter<'a> {
-    pub fn new(ir_mod: &'a IRModule) -> Result<Self> {
+    pub fn new(ir_mod: &'a IRObject) -> Result<Self> {
         let mut index = 0;
         let mut labels = HashMap::new();
         for token in ir_mod.text.iter() {
@@ -26,14 +26,15 @@ impl<'a> Interpreter<'a> {
         }
         let instruction_pointer = *labels.get("main").ok_or(anyhow!("no main word found!"))?;
 
-        let strings = ir_mod.data.iter().filter_map(|s| {
+        let mut strings = HashMap::new();
+        ir_mod.data.iter().for_each(|s| {
             match s {
-                IR::StringDef(s) => {
-                    Some(s.clone())
+                IR::StringDef(label, string) => {
+                    strings.insert(label.clone(), string.clone());
                 },
-                _ => None
+                _ => {}
             }
-        }).collect();
+        });
 
         Ok(Self {
             module: ir_mod,
index e1db8f771061a3ff4527154e03711614c10d5381..88173deb68a0b67308ba61723639b35045052f09 100644 (file)
@@ -1,17 +1,14 @@
 use serde_yaml::{from_str, to_string, Error};
 use serde_derive::{Serialize, Deserialize};
 
-#[derive(Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 pub enum IR {
     Label(String),
     Call(String),
     Ret,
     StackPush(u64),
-    StackPushString(usize),
-    StringDef(String),
-
-    Import, // Not actually used at runtime. Should be elided.
-    ImportString, // Not actually used at runtime. Should be elided.
+    StackPushString(String), // refers to string label, not the string itself
+    StringDef(String, String), // first is string label, second is string value
 
     // These next ones should always be inlined, so they're in IR.
     Load, // @ ( addr -- x ) -- Fetch memory contents at addr
@@ -48,13 +45,14 @@ pub enum IR {
     Sys6,
 }
 
+// This is like an .o file.
 #[derive(Serialize, Deserialize, Debug)]
-pub struct IRModule {
+pub struct IRObject {
     pub text: Vec<IR>,
     pub data: Vec<IR>,
 }
 
-impl IRModule {
+impl IRObject {
     pub fn to_s(&self) -> Result<String, Error> {
         to_string(self)
     }
index 643dc8b5eef4a589e5f217fc294d166bb511a983..0860ced4f8a349dff9e867ddd9a899dfb98daa2c 100644 (file)
@@ -2,178 +2,220 @@ use crate::parser::Module;
 use crate::tokenizer::{Token, tokenize};
 use hylo_ir::*;
 
-use std::collections::HashSet;
+use std::collections::{HashSet, HashMap};
 use std::path::PathBuf;
+use std::rc::Rc;
 
-use anyhow::{Result, anyhow};
+use anyhow::{Result, bail};
 
 macro_rules! push_num {
     ($num:ident) => { IR::StackPush(*$num as u64) };
     ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) };
 }
 
-fn import(importer_dir: &PathBuf, specifier: &str, imported: &mut HashSet<PathBuf>, is_entrypoint: bool) -> Result<Option<ModuleWithImports>> {
-    let mut path = PathBuf::from(specifier);
-    if path.is_relative() {
-        let mut new_path = importer_dir.clone();
-        new_path.push(path);
-        path = new_path.canonicalize()?;
-    }
-    if imported.contains(&path) {
-        return Ok(None);
-    }
-
-    let contents = std::fs::read_to_string(&path)?;
+#[derive(Debug, Default)]
+struct IRModule {
+    data: Vec<IR>,
+    text: Vec<IR>,
+    imports: Vec<Rc<IRModule>>,
+    exports: Vec<String>,
+    source_file: PathBuf,
+}
 
-    Ok(Some(generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?, imported)))
+#[derive(Default)]
+struct ImportTree {
+    data: Vec<IR>,
+    text: Vec<IR>,
+    all_modules: HashMap<PathBuf, Rc<IRModule>>,
+    all_exports: HashSet<String>,
+    entrypoint: Rc<IRModule>,
+    module_count: usize,
+    collapse_seen: HashSet<PathBuf>,
 }
 
-fn collapse_module(mut module_w: ModuleWithImports) -> Result<IRModule> {
-    let mut module = module_w.module.take().ok_or(anyhow!("no module to collapse"))?;
-    let mut data = std::mem::take(&mut module.data);
-    let mut prev_data_len = data.len();
-    let mut text = std::mem::take(&mut module.text);
-
-    module_w.imports.take().ok_or(anyhow!("no imports to collapse"))?.into_iter().try_for_each(|imported| -> Result<()>{
-        let mut ir_mod = collapse_module(imported)?;
-        let mut mod_data = std::mem::take(&mut ir_mod.data);
-        let mod_data_len = mod_data.len();
-        data.append(&mut mod_data);
-
-        let mut mod_text = std::mem::take(&mut ir_mod.text).into_iter().map(|ir| {
-            if let IR::StackPushString(num) = ir {
-                IR::StackPushString(num + prev_data_len)
+impl ImportTree {
+    fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result<Rc<IRModule>> {
+        let mut path = PathBuf::from(specifier);
+        if path.is_relative() {
+            let mut new_path = importer_dir.clone();
+            new_path.push(path);
+            path = new_path.canonicalize()?;
+        }
+        if self.all_modules.contains_key(&path) {
+            let module = self.all_modules.get(&path).unwrap().clone();
+            return Ok(module);
+        }
+
+        let contents = std::fs::read_to_string(&path)?;
+
+        let module = self.generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?);
+        let module = Rc::new(module);
+        self.all_modules.insert(module.source_file.clone(), module.clone());
+        if is_entrypoint {
+            self.entrypoint = module.clone();
+        }
+        Ok(module)
+    }
+
+    fn generate_internal(&mut self, path: PathBuf, module: &Module) -> IRModule {
+        // Eventually these will end up being sections in assembly
+        let mut text = vec![];
+        let mut data = vec![];
+
+        let mut imports = vec![];
+        module.imports.iter().for_each(|imported| {
+            if let Some(parent_path) = path.parent() {
+                match self.import(&parent_path.to_path_buf(), imported, false) {
+                    Ok(module) => {
+                        imports.push(module);
+                    },
+                    Err(msg) => {
+                        eprintln!("{}", msg);
+                    }
+                }
             } else {
-                ir
             }
-        }).collect::<Vec<_>>();
-        text.append(&mut mod_text);
+        });
+        
+        let exports: Vec<_> = module.exports.iter().map(|s| {
+            self.all_exports.insert(s.to_string());
+            s.to_string()
+        }).collect();
+
+
+        text.push(module.words.iter().map(|def| {
+            let mut body = def.instructions.iter().map(|inst| {
+                let mapped_ir = match inst {
+                    Token::Word(word) => {
+                        match *word {
+                            "@" => IR::Load,
+                            "!" => IR::Store,
+                            "dup" => IR::Dup,
+                            "swap" => IR::Swap,
+                            "drop" => IR::Drop,
+                            "over" => IR::Over,
+                            "puts" => IR::PutS,
+                            "putn" => IR::PutN,
+                            "if" => IR::If,
+                            "endif" => IR::EndIf,
+                            "=" => IR::Equals,
+                            ">" => IR::GreaterThan,
+                            "+" => IR::AddU64,
+                            "-" => IR::SubtractU64,
+                            "*" => IR::MultiplyU64,
+                            "/" => IR::DivideU64,
+                            "%" => IR::ModU64,
+                            "|" => IR::BitwiseOr,
+                            "sys0" => IR::Sys0,
+                            "sys1" => IR::Sys1,
+                            "sys2" => IR::Sys2,
+                            "sys3" => IR::Sys3,
+                            "sys4" => IR::Sys4,
+                            "sys5" => IR::Sys5,
+                            "sys6" => IR::Sys6,
+                            // TODO num type specfic math like `+:i32`, etc.
+                            _ =>  IR::Call(String::from(*word))
+                        }
+                    },
+                    Token::String(text) => {
+                        let string_label = format!("string_{}", data.len() - 1);
+                        data.push(IR::StringDef(string_label.clone(), String::from(*text)));
+                        IR::StackPushString(string_label)
+                    },
+                    Token::NumU8(num) => push_num!(num),
+                    Token::NumI8(num) => push_num!(num, u8),
+                    Token::NumU16(num) => push_num!(num),
+                    Token::NumI16(num) => push_num!(num, u16),
+                    Token::NumU32(num) => push_num!(num),
+                    Token::NumI32(num) => push_num!(num, u32),
+                    Token::NumU64(num) => push_num!(num),
+                    Token::NumI64(num) => push_num!(num),
+                    Token::NumF32(num) => push_num!(num),
+                    Token::NumF64(num) => push_num!(num),
+                };
+                mapped_ir
+            }).collect::<Vec<_>>();
+
+            let mut result = vec![IR::Label(def.name.to_string())];
+            result.append(&mut body);
+            result.push(IR::Ret);
+            result
+        }).flatten().collect::<Vec<_>>());
+
+
+        IRModule {
+            text: text.into_iter().flatten().collect::<Vec<_>>(),
+            data,
+            imports,
+            exports,
+            source_file: path,
+        }
+    }
 
-        prev_data_len += mod_data_len;
+    fn collapse(&mut self, module: Rc<IRModule>) -> Result<()> {
+        if self.collapse_seen.contains(&module.source_file) {
+            return Ok(())
+        }
 
-        Ok(())
-    })?;
-    
-    Ok(IRModule {
-        data,
-        text,
-    })
-}
+        for imported in module.imports.clone() {
+            self.collapse(imported)?;
+        }
 
-pub fn compile(path: &str) -> Result<IRModule> {
-    let mut imported = HashSet::new();
-    let dir = std::env::current_dir()?;
-    let module = import(&dir, path, &mut imported, true)?.expect("somehow we've already imported this");
-    collapse_module(module) // TODO remove unused words 
-}
+        let is_entrypoint = module.source_file == self.entrypoint.source_file;
+        
+        let module_number = self.module_count;
+        self.module_count += 1;
 
-#[derive(Debug)]
-struct ModuleWithImports {
-    module: Option<IRModule>,
-    imports: Option<Vec<ModuleWithImports>>,
-}
+        for string in &module.data {
+            if let IR::StringDef(name, val) = string {
+                let new_name = format!("{}_{}", name, module_number);
+                self.data.push(IR::StringDef(new_name, val.clone()));
+            } else {
+                bail!("non-string data");
+            }
+        }
 
-fn generate_internal(path: PathBuf, module: &Module, imported: &mut HashSet<PathBuf>) -> ModuleWithImports {
-    // Eventually these will end up being sections in assembly
-    let mut text = vec![];
-    let mut data = vec![];
-
-    let mut imports = vec![];
-    
-    let mut last_was_import = false;
-
-    text.push(module.words.iter().map(|def| {
-        let mut body = def.instructions.iter().map(|inst| {
-            let mapped_ir = match inst {
-                Token::Word(word) => {
-                    match *word {
-                        "@" => IR::Load,
-                        "!" => IR::Store,
-                        "dup" => IR::Dup,
-                        "swap" => IR::Swap,
-                        "drop" => IR::Drop,
-                        "over" => IR::Over,
-                        "puts" => IR::PutS,
-                        "putn" => IR::PutN,
-                        "if" => IR::If,
-                        "endif" => IR::EndIf,
-                        "=" => IR::Equals,
-                        ">" => IR::GreaterThan,
-                        "+" => IR::AddU64,
-                        "-" => IR::SubtractU64,
-                        "*" => IR::MultiplyU64,
-                        "/" => IR::DivideU64,
-                        "%" => IR::ModU64,
-                        "|" => IR::BitwiseOr,
-                        "import" => IR::Import,
-                        "sys0" => IR::Sys0,
-                        "sys1" => IR::Sys1,
-                        "sys2" => IR::Sys2,
-                        "sys3" => IR::Sys3,
-                        "sys4" => IR::Sys4,
-                        "sys5" => IR::Sys5,
-                        "sys6" => IR::Sys6,
-                        // TODO num type specfic math like `+:i32`, etc.
-                        _ =>  IR::Call(String::from(*word))
+        for instruction in &module.text {
+            let new_instruction = match instruction {
+                IR::StackPushString(name) => {
+                    let new_name = format!("{}_{}", name, module_number);
+                    IR::StackPushString(new_name)
+                },
+                IR::Label(name) => {
+                    if is_entrypoint || module.exports.contains(name) {
+                        instruction.clone()
+                    } else {
+                        let new_name = format!("_m{}_{}", module_number, name);
+                        IR::Label(new_name)
                     }
                 },
-                Token::String(text) => {
-                    if last_was_import {
-                        if let Some(parent_path) = path.parent() {
-                            match import(&parent_path.to_path_buf(), text, imported, false) {
-                                Ok(module) => {
-                                    if let Some(module) = module {
-                                        imports.push(module);
-                                    }
-                                },
-                                Err(msg) => {
-                                    eprintln!("{}", msg);
-                                }
-                            }
-                        }
-                        IR::ImportString // This will be elided later
+                IR::Call(name) => {
+                    if is_entrypoint || self.all_exports.contains(name) {
+                        instruction.clone()
                     } else {
-                        data.push(IR::StringDef(String::from(*text)));
-                        IR::StackPushString(data.len() - 1)
+                        let new_name = format!("_m{}_{}", module_number, name);
+                        IR::Label(new_name)
                     }
                 },
-                Token::NumU8(num) => push_num!(num),
-                Token::NumI8(num) => push_num!(num, u8),
-                Token::NumU16(num) => push_num!(num),
-                Token::NumI16(num) => push_num!(num, u16),
-                Token::NumU32(num) => push_num!(num),
-                Token::NumI32(num) => push_num!(num, u32),
-                Token::NumU64(num) => push_num!(num),
-                Token::NumI64(num) => push_num!(num),
-                Token::NumF32(num) => push_num!(num),
-                Token::NumF64(num) => push_num!(num),
+                _ => instruction.clone()
             };
-            last_was_import = match mapped_ir {
-                IR::Import => true,
-                _ => false,
-            };
-            mapped_ir
-        }).filter(|ir| {
-            // Elide IRs that shouldn't actually be in the output IR.
-            match ir {
-                IR::Import => false,
-                IR::ImportString => false,
-                _ => true,
-            }
-        }).collect::<Vec<_>>();
+            self.text.push(new_instruction);
+        }
 
-        let mut result = vec![IR::Label(def.name.to_string())];
-        result.append(&mut body);
-        result.push(IR::Ret);
-        result
-    }).flatten().collect::<Vec<_>>());
-
-    
-    ModuleWithImports {
-        module: Some(IRModule {
-            text: text.into_iter().flatten().collect::<Vec<_>>(),
-            data,
-        }),
-        imports: Some(imports),
+        Ok(())
     }
 }
+
+
+
+pub fn compile(path: &str) -> Result<IRObject> {
+    let dir = std::env::current_dir()?;
+    let mut tree: ImportTree = Default::default(); 
+    let module = tree.import(&dir, path, true)?;
+    tree.collapse(module)?;
+    // TODO remove unused words 
+    Ok(IRObject {
+        data: tree.data,
+        text: tree.text,
+    })
+}
index 5e0ddbd2c01bb348718818f706a6c57373913311..a5a95eeaee0b500b17586ccb7d5689de7b7b0584 100644 (file)
@@ -9,15 +9,21 @@ pub struct WordDefinition<'a> {
 
 #[derive(Debug)]
 pub struct Module<'a> {
-    pub words: Vec<WordDefinition<'a>>
+    pub words: Vec<WordDefinition<'a>>,
+    pub imports: Vec<&'a str>,
+    pub exports: Vec<&'a str>,
 }
 
 impl<'a> Module<'a> {
     pub fn parse(input: Vec<Token<'a>>, is_entrypoint: bool) -> Result<Self> {
         let mut result = vec![];
         let mut main = vec![];
+        let mut exports = vec![];
+        let mut imports = vec![];
         let mut current_word: Option<WordDefinition> = None;
         let mut about_to_start_word_def = false;
+        let mut last_was_import = false;
+        let mut last_was_export = false;
 
         for token in input {
             if about_to_start_word_def {
@@ -52,7 +58,31 @@ impl<'a> Module<'a> {
             if let Some(ref mut current_word) = current_word {
                 current_word.instructions.push(token);
             } else {
-                main.push(token);
+                match token {
+                    Token::Word(word) => {
+                        if word == "import" {
+                            last_was_import = true;
+                        } else if word == "export" {
+                            last_was_export = true;
+                        } else {
+                            if last_was_export {
+                                exports.push(word);
+                            } else {
+                                main.push(token.clone());
+                            }
+                        }
+                    },
+                    Token::String(string) => {
+                        if last_was_import {
+                            imports.push(string);
+                        } else {
+                            main.push(token.clone());
+                        }
+                    },
+                    _ => {
+                        main.push(token.clone());
+                    }
+                };
             }
         }
 
@@ -67,7 +97,7 @@ impl<'a> Module<'a> {
             });
         }
 
-        Ok(Module { words: result })
+        Ok(Module { words: result, imports, exports })
     }
 
     #[cfg(test)]
index ce790bdb76d71cc7879c8fbe379c43bcbfc108c0..3b43cf00cf1ec2fe9491d1377bc1f987a81d9f6d 100644 (file)
@@ -6,7 +6,7 @@ use std::collections::{HashMap, HashSet};
 use std::fmt::Display;
 
 pub struct CodeGen<'a> {
-    module: &'a IRModule,
+    module: &'a IRObject,
     data_stack_size: usize,
     lines: Vec<String>,
 }
@@ -47,7 +47,7 @@ macro_rules! asm_macro {
 }
 
 impl<'a> CodeGen<'a> {
-    pub fn new(ir_mod: &'a IRModule, data_stack_size: usize) -> Self {
+    pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self {
         Self {
             module: ir_mod,
             data_stack_size,
@@ -81,19 +81,16 @@ impl<'a> CodeGen<'a> {
 
     pub fn assembly(&mut self) -> Result<String>{
         let mut string_table = HashMap::new();
-        let mut string_index = 0;
 
         // Static strings
         self.label(".section .rodata\n");
         for ir in &self.module.data {
             match ir {
-                IR::StringDef(some_string) => {
-                    string_table.insert(some_string.clone(), string_index);
-                    self.label(format!("string_id_{}:", string_index));
+                IR::StringDef(string_label, some_string) => {
+                    string_table.insert(some_string.clone(), string_label);
+                    self.label(string_label);
                     self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz?
                     self.label("");
-
-                    string_index += 1;
                 },
                 _ => bail!("Currently only string definitions are supported in the data section.")
             }
@@ -119,7 +116,7 @@ impl<'a> CodeGen<'a> {
                 IR::Label(name) => {
                     last_label = name;
                     if name == "main" {
-                        self.label(".globl _start");
+                        self.label(".globl _start"); // TODO is globl necessary?
                         self.label("_start:");
                         self.line("la s2, data_stack_end"); // set stack pointer
                     } else {
@@ -159,6 +156,10 @@ impl<'a> CodeGen<'a> {
                     self.line(format!("li t0, {}", num));
                     self.push_from("t0");
                 },
+                IR::StackPushString(name) => {
+                    self.line(format!("li t0, {}", name));
+                    self.push_from("t0");
+                },
                 IR::AddU64 => {
                     self.pop_to("t0");
                     self.pop_to("t1");
index ca4371b42859596c8849bbfd1911b205abf93c88..0240aa2eb58a68963c9053534ba5f233979cf5ad 100644 (file)
@@ -1,6 +1,6 @@
 use anyhow::{Result, anyhow};
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub enum Token<'a> {
     Word(&'a str),
     String(&'a str),