]> rethought.computer Git - sorel-lang.git/commitdiff
more cleanup and organizing
authorBryan English <bryan@rethought.computer>
Mon, 16 Feb 2026 03:45:43 +0000 (22:45 -0500)
committerBryan English <bryan@rethought.computer>
Mon, 16 Feb 2026 03:45:56 +0000 (22:45 -0500)
Cargo.lock
Cargo.toml
sorel-codegen/Cargo.toml [new file with mode: 0644]
sorel-codegen/src/lib.rs [new file with mode: 0644]
sorel-codegen/src/riscv64_asm.rs [new file with mode: 0644]
sorelc/Cargo.toml
sorelc/src/import_tree.rs [new file with mode: 0644]
sorelc/src/ir.rs [deleted file]
sorelc/src/main.rs
sorelc/src/riscv_asm_codegen.rs [deleted file]

index 017d4192af85d5543dbc9e66ad39b92469c0cbe1..839a7943e1b4a1fd13c06b29c1c1c0cf31987d8f 100644 (file)
@@ -102,6 +102,14 @@ dependencies = [
  "unsafe-libyaml",
 ]
 
+[[package]]
+name = "sorel-codegen"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "sorel-ir",
+]
+
 [[package]]
 name = "sorel-ir"
 version = "0.1.0"
@@ -132,6 +140,7 @@ name = "sorelc"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "sorel-codegen",
  "sorel-ir",
  "sorel-parser",
  "sorel-tokenizer",
index 8e7cea95e541cd6dcc36e717635f85e4e5f28189..7f1766114548583ab0ddb1a984ffb746b4a576c3 100644 (file)
@@ -1,9 +1,10 @@
 [workspace]
 
 resolver = "3"
-members = ["sorel-ir","sorel-parser","sorel-tokenizer","sorelc"]
+members = ["sorel-codegen","sorel-ir","sorel-parser","sorel-tokenizer","sorelc"]
 
 [workspace.dependencies]
 sorel-ir = { path = "./sorel-ir", version = "0.1.0" }
+sorel-codegen = { path = "./sorel-codegen", version = "0.1.0" }
 sorel-tokenizer = { path = "./sorel-tokenizer", version = "0.1.0" }
 sorel-parser = { path = "./sorel-parser", version = "0.1.0" }
diff --git a/sorel-codegen/Cargo.toml b/sorel-codegen/Cargo.toml
new file mode 100644 (file)
index 0000000..8caa9bd
--- /dev/null
@@ -0,0 +1,8 @@
+[package]
+name = "sorel-codegen"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sorel-ir = { workspace = true }
+anyhow = "1.0.100"
diff --git a/sorel-codegen/src/lib.rs b/sorel-codegen/src/lib.rs
new file mode 100644 (file)
index 0000000..72a6c86
--- /dev/null
@@ -0,0 +1,2 @@
+pub mod riscv64_asm;
+
diff --git a/sorel-codegen/src/riscv64_asm.rs b/sorel-codegen/src/riscv64_asm.rs
new file mode 100644 (file)
index 0000000..de72435
--- /dev/null
@@ -0,0 +1,386 @@
+use sorel_ir::*;
+
+use anyhow::*;
+
+use std::collections::{HashMap, HashSet};
+use std::fmt::Display;
+
+pub struct CodeGen<'a> {
+    module: &'a IRObject,
+    data_stack_size: usize,
+    lines: Vec<String>,
+}
+
+
+// Some inspiration
+// ================
+//
+// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list
+//     * Except using sp as a more C ABI style stack pointer, and s2 for the data stack
+// 
+
+// Implementation Choices
+// ======================
+//
+// Data Stack pointer: s2
+// No return stack pointer (using C ABI, so sp, sorta)
+// Use t0, t1, t2 for temporary values in words
+// Data stack grows down
+
+
+macro_rules! asm_macro {
+    ($name:ident, $src:expr) => {
+        fn $name(&mut self) {
+            self.line($src);
+        }
+    };
+    ($name:ident, $src:expr, $arg0:ty) => {
+        fn $name(&mut self, val0: $arg0) {
+            self.line(format!($src, val0));
+        }
+    };
+    ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => {
+        fn $name(&mut self, val0: $arg0, val1: $arg1) {
+            self.line(format!($src, val0, val1));
+        }
+    };
+}
+
+fn mangle(input: &str) -> String {
+    input
+        .replace("<", "_LT_")
+        .replace(">", "_GT_")
+        .replace("-", "___")
+}
+
+impl<'a> CodeGen<'a> {
+    pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self {
+        Self {
+            module: ir_mod,
+            data_stack_size,
+            lines: vec![],
+        }
+    }
+
+    fn line<S: Display>(&mut self, line: S) {
+        self.lines.push(format!("    {}", line));
+
+    }
+
+    fn label<S: Display>(&mut self, line: S) {
+        self.lines.push(line.to_string());
+    }
+
+    asm_macro!(copy_top_stack_value_to, "ld {}, 0(s2)", &str);
+    asm_macro!(copy_offset_stack_value_to, "ld {}, {}*8(s2)", &str, isize);
+    asm_macro!(copy_to_top_of_stack, "sd {}, 0(s2)", &str);
+    asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize);
+
+    fn pop_to(&mut self, reg: &str) {
+        self.copy_top_stack_value_to(reg);
+        self.move_stack_ptr_by_cells(1);
+    }
+
+    fn pop_some_to(&mut self, regs: &str) {
+        let mut regs = regs.trim().split(" ").collect::<Vec<_>>();
+        regs.reverse();
+        let count = regs.len();
+        let mut index = 0;
+        for reg in regs {
+            self.copy_offset_stack_value_to(reg, index);
+            index += 1;
+        }
+        self.move_stack_ptr_by_cells(count as isize);
+    }
+
+    fn push_from(&mut self, reg: &str) {
+        self.move_stack_ptr_by_cells(-1);
+        self.copy_to_top_of_stack(reg);
+    }
+
+    fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) {
+        self.pop_some_to(regs);
+        self.line(call);
+        self.push_from(reg);
+    }
+
+    pub fn assembly(&mut self) -> Result<String>{
+        let mut string_table = HashMap::new();
+
+        // Static strings
+        self.label(".section .rodata\n");
+        self.label(".align 3\n");
+        for ir in &self.module.data {
+            match ir {
+                IR::StringDef(string_label, some_string) => {
+                    string_table.insert(some_string.clone(), string_label);
+                    self.label(format!("{}:", string_label));
+                    self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz?
+                    self.label("");
+                },
+                _ => bail!("Currently only string definitions are supported in the data section.")
+            }
+        }
+
+        // Data stack
+        self.label(".data\n");
+        self.label(".align 3\n");
+        self.label("data_stack:");
+        self.line(format!(".space {}", self.data_stack_size));
+        self.label(".globl data_stack_end\ndata_stack_end:\n");
+
+        // Code
+        self.label(".text\n");
+        self.label(".align 3\n");
+
+        let mut if_block_count = 0;
+        let mut if_stack = vec![];
+        let mut loop_count = 0;
+        let mut loop_stack = vec![];
+        let mut seen_else = HashSet::new();
+        let mut last_label = "";
+
+        for ir in &self.module.text {
+            match ir {
+                IR::Label(name) => {
+                    last_label = name;
+                    if name == "main" {
+                        self.label(".globl _start"); // TODO is globl necessary?
+                        self.label("_start:");
+                        self.line("la s2, data_stack_end # set initial data stack pointer");
+                    } else {
+                        let mangled = mangle(name);
+                        self.label(format!(".globl {}", mangled));
+                        self.label(format!("{}:", mangled));
+                    }
+                    self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack
+                    self.line("sd ra, 8(sp) # store return address on stack");   // store return address on stack
+                },
+                IR::Call(name) => {
+                    let mangled = mangle(name);
+                    self.label(format!("# call {}", mangled));
+                    self.line(format!("call {}", mangled));    
+                },
+                IR::WordPointer(name) => {
+                    let mangled = mangle(name);
+                    self.label(format!("# '{} (word pointer)", mangled));
+                    self.line(format!("la t0, {}", mangled));
+                    self.push_from("t0");
+                },
+                IR::CallPtr => {
+                    self.label("# callptr");
+                    self.pop_to("t0");
+                    self.line("jalr t0");
+                },
+                IR::Ret => {
+                    if last_label == "main" {
+                        self.label("# exit 0 syscall");
+                        self.line("li a7, 93");
+                        self.line("mv a0, x0");
+                        self.line("ecall");
+                    } else {
+                        self.line("ld ra, 8(sp)");  // load return address from stack
+                        self.line("addi sp, sp, 16"); // restore stack pointer
+                        self.line("ret");
+                    }
+                },
+                IR::Load8 => {
+                    self.label("# load 8");
+                    self.copy_top_stack_value_to("t0");
+                    self.line("lbu   t0, 0(t0)"); // deref pointer in t0 to t0
+                    self.copy_to_top_of_stack("t0");
+                },
+                IR::Load16 => {
+                    self.label("# load 16");
+                    self.copy_top_stack_value_to("t0");
+                    self.line("lhu   t0, 0(t0)"); // deref pointer in t0 to t0
+                    self.copy_to_top_of_stack("t0");
+                },
+                IR::Load32 => {
+                    self.label("# load 32");
+                    self.copy_top_stack_value_to("t0");
+                    self.line("lwu   t0, 0(t0)"); // deref pointer in t0 to t0
+                    self.copy_to_top_of_stack("t0");
+                },
+                IR::Load => {
+                    self.label("# load 64");
+                    self.copy_top_stack_value_to("t0");
+                    self.line("ld   t0, 0(t0)"); // deref pointer in t0 to t0
+                    self.copy_to_top_of_stack("t0");
+                },
+                IR::Store8 => { // ( x addr -- )
+                    self.pop_some_to("t0 t1");
+                    self.line("sb t0, 0(t1)"); // store x at addr 
+                },
+                IR::Store16 => { // ( x addr -- )
+                    self.pop_some_to("t0 t1");
+                    self.line("sh t0, 0(t1)"); // store x at addr 
+                },
+                IR::Store32 => { // ( x addr -- )
+                    self.pop_some_to("t0 t1");
+                    self.line("sw t0, 0(t1)"); // store x at addr 
+                },
+                IR::Store => { // ( x addr -- )
+                    self.pop_some_to("t0 t1");
+                    self.line("sd t0, 0(t1)"); // store x at addr 
+                },
+                IR::StackPush(num) => {
+                    self.label(format!("# stackpush {}", num));
+                    self.line(format!("li t0, {}", num));
+                    self.push_from("t0");
+                },
+                IR::StackPushString(name) => {
+                    self.label(format!("# stackpushstring {}", name));
+                    self.line(format!("la t0, {}", name));
+                    self.push_from("t0");
+                },
+                IR::AddU64 => {
+                    self.label("# add");
+                    self.pop_call_push("t0 t1", "add t0, t0, t1", "t0"); 
+                },
+                IR::SubtractU64 => {
+                    self.label("# sub");
+                    self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); 
+                },
+                IR::MultiplyU64 => {
+                    self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0"); 
+                },
+                IR::DivideU64 => {
+                    self.pop_call_push("t0 t1", "div t0, t0, t1", "t0"); 
+                },
+                IR::ModU64 => {
+                    self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0"); 
+                },
+                IR::Dup => {
+                    self.label("# dup");
+                    self.copy_top_stack_value_to("t0");
+                    self.push_from("t0");
+                },
+                IR::Swap => {
+                    self.label("# swap");
+                    self.pop_some_to("t1 t0");
+                    self.push_from("t0");
+                    self.push_from("t1");
+                },
+                IR::Over => {
+                    // TODO this is super inefficient. There's no need to pop anything. Just read
+                    // from the second stack position and push it.
+                    self.label("# over");
+                    self.pop_some_to("t0 t1");
+                    self.push_from("t0");
+                    self.push_from("t1");
+                    self.push_from("t0");
+                },
+                IR::Rot => {
+                    self.label("# rot");
+                    self.pop_some_to("t0 t1 t2");
+                    self.push_from("t1");
+                    self.push_from("t2");
+                    self.push_from("t0");
+                },
+                IR::StackPointer => {
+                    self.label("# sp");
+                    self.line("addi t0, s2, 0");
+                    self.push_from("t0");
+                },
+                IR::StackBottom => {
+                    self.label("# stackbottom");
+                    self.line("la t0, data_stack_end");
+                    self.push_from("t0");
+                }
+                IR::Drop => {
+                    self.label("# drop");
+                    self.move_stack_ptr_by_cells(1);
+                },
+                IR::Equals => {
+                    // Yes, this is the same as subtract, since we're treating 0 as true, and
+                    // others as false.
+                    self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); 
+                },
+                IR::GreaterThan => {
+                    self.label("# >");
+                    self.pop_some_to("t0 t1");
+                    self.line("sgt  t0, t0, t1");
+                    self.line("seqz t0, t0"); // remember, 0 is true, others are false
+                    self.push_from("t0");
+                },
+                IR::LessThan => {
+                    self.label("# <");
+                    self.pop_some_to("t0 t1");
+                    self.line("slt  t0, t0, t1");
+                    self.line("seqz t0, t0"); // remember, 0 is true, others are false
+                    self.push_from("t0");
+                },
+                IR::BitwiseOr => {
+                    self.pop_call_push("t0 t1", "or t0, t0, t1", "t0"); 
+                },
+                IR::Sys0 => {
+                    self.pop_call_push("a7", "ecall", "a0");
+                },
+                IR::Sys1 => {
+                    self.pop_call_push("a0 a7", "ecall", "a0");
+                },
+                IR::Sys2 => {
+                    self.pop_call_push("a0 a1 a7", "ecall", "a0");
+                },
+                IR::Sys3 => {
+                    self.pop_call_push("a0 a1 a2 a7", "ecall", "a0");
+                },
+                IR::Sys4 => {
+                    self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0");
+                },
+                IR::Sys5 => {
+                    self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0");
+                },
+                IR::Sys6 => {
+                    self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0");
+                },
+                // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html
+                IR::If => {
+                    self.label("# if");
+                    self.pop_to("t0");
+                    self.line(format!("bnez t0, _else_{}", if_block_count));
+                    if_stack.push(if_block_count);
+                    if_block_count += 1;
+                },
+                IR::Else => {
+                    self.label("# else");
+                    let if_counter = *if_stack.last().unwrap();
+                    self.line(format!("j _endif_{}", if_counter));
+                    self.label(format!("_else_{}:", if_counter));
+                    seen_else.insert(if_counter);
+                },
+                IR::EndIf => {
+                    self.label("# endif");
+                    let stack = &mut if_stack;
+                    let if_counter = *stack.last().unwrap();
+                    if !seen_else.contains(&if_counter) {
+                        self.label(format!("_else_{}:", if_counter));
+                    } else {
+                        self.label(format!("_endif_{}:", if_counter));
+                        seen_else.remove(&if_counter);
+                    }
+                    stack.pop();
+                },
+                IR::Loop => { // keep looping until is true/0
+                    self.label(format!("_loop_{}:", loop_count));
+                    self.pop_to("t0");
+                    self.line(format!("beqz t0, _endloop_{}", loop_count));
+                    loop_stack.push(loop_count);
+                    loop_count += 1;
+                },
+                IR::EndLoop => {
+                    let stack = &mut loop_stack;
+                    let loop_counter = *stack.last().unwrap();
+                    self.line(format!("j _loop_{}", loop_counter));
+                    self.label(format!("_endloop_{}:", loop_counter));
+                    stack.pop();
+                },
+                _ => bail!("not implemented yet: {:?}", ir),
+            }
+        }
+
+        Ok(self.lines.join("\n"))
+    }
+}
+
index 012909912fd0d3b6ce48e954c57f83ec223692aa..9550d4e8bd40eb9e3ff18a71941b38d54b75474a 100644 (file)
@@ -4,6 +4,7 @@ version = "0.1.0"
 edition = "2024"
 
 [dependencies]
+sorel-codegen = { workspace = true }
 sorel-ir = { workspace = true }
 sorel-tokenizer = { workspace = true }
 sorel-parser = { workspace = true }
diff --git a/sorelc/src/import_tree.rs b/sorelc/src/import_tree.rs
new file mode 100644 (file)
index 0000000..9c6896b
--- /dev/null
@@ -0,0 +1,185 @@
+use sorel_parser::Module;
+use sorel_ir::*;
+use sorel_tokenizer::tokenize;
+
+use std::collections::{HashSet, HashMap};
+use std::path::PathBuf;
+use std::rc::Rc;
+use std::cell::RefCell;
+use std::include_str;
+
+use anyhow::{Result, bail, anyhow};
+
+#[derive(Default)]
+pub(crate) struct ImportTree {
+    data: Vec<IR>,
+    text: Vec<IR>,
+    all_modules: HashMap<String, WrappedIRModule>,
+    all_exports: HashSet<String>,
+    entrypoint: WrappedIRModule,
+    module_count: usize,
+    collapse_seen: HashSet<String>,
+}
+
+fn std_import(specifier: &str) -> Result<&str> {
+    match specifier {
+        "std:mem" => Ok(include_str!("../../stdlib/mem.sorel")),
+        "std:out" => Ok(include_str!("../../stdlib/out.sorel")),
+        "std:string" => Ok(include_str!("../../stdlib/string.sorel")),
+        "std:process" => Ok(include_str!("../../stdlib/process.sorel")),
+        _ => bail!("{} is not a standard library module", specifier),
+    }
+}
+
+impl ImportTree {
+    fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result<WrappedIRModule> {
+        let (contents, module_id) = if specifier.starts_with("std:") {
+            if self.all_modules.contains_key(specifier) {
+                let module = self.all_modules.get(specifier).unwrap().clone();
+                return Ok(module);
+            }
+            let contents = std_import(specifier)?;
+            (contents.to_string(), ModuleID::StdSpecifier(specifier.to_string()))
+        } else {
+            let mut path = PathBuf::from(specifier);
+            if path.is_relative() {
+                let mut new_path = importer_dir.clone();
+                new_path.push(path);
+                path = new_path.canonicalize()?;
+            }
+            let path_key = path.to_string_lossy().to_string();
+            if self.all_modules.contains_key(&path_key) {
+                let module = self.all_modules.get(&path_key).unwrap().clone();
+                return Ok(module);
+            }
+
+            let contents = std::fs::read_to_string(&path)?;
+            (contents, ModuleID::SourceFile(path))
+        };
+        let tokens = tokenize(&contents)?;
+        let parsed = &Module::parse(tokens, is_entrypoint)?;
+        let module = self.ir_mod_from_parsed(module_id.clone(), parsed)?;
+        let module = Rc::new(RefCell::new(module));
+        self.all_modules.insert(module_id.to_string(), module.clone());
+        if is_entrypoint {
+            self.entrypoint = module.clone();
+        }
+        Ok(module)
+    }
+
+    fn ir_mod_from_parsed(&mut self, module_id: ModuleID, module: &Module) -> Result<IRModule> {
+        // Eventually these will end up being sections in assembly
+        let mut text = vec![];
+        let mut data = vec![];
+
+        let mut imports = vec![];
+
+        let parent_path = match module_id {
+            ModuleID::SourceFile(ref path) => {
+                path.parent().ok_or(anyhow!("no parent for path: {:?}", path))?.to_path_buf()
+            },
+            // A stdlib module can only import other stdlib
+            // modules, so no need for parent path.
+            ModuleID::StdSpecifier(_) => PathBuf::new(),
+        };
+        module.imports.iter().try_for_each(|imported| -> Result<()> {
+            let new_module = self.import(&parent_path, imported, false)?;
+            imports.push(new_module);
+            Ok(())
+        })?;
+
+        let exports: Vec<_> = module.exports.iter().map(|s| {
+            self.all_exports.insert(s.to_string());
+            s.to_string()
+        }).collect();
+
+        let externs = module.externs.iter().map(|s| s.to_string()).collect();
+
+        text.push(module.words.iter().flat_map(|def| {
+            let mut body = def.instructions.iter().map(|inst| {
+                IR::from_token(inst, &mut data)
+            }).collect::<Vec<_>>();
+
+            let mut result = vec![IR::Label(def.name.to_string())];
+            result.append(&mut body);
+            result.push(IR::Ret);
+            result
+        }).collect::<Vec<_>>());
+
+        let number = self.module_count;
+        self.module_count += 1;
+
+        Ok(IRModule {
+            text: text.into_iter().flatten().collect::<Vec<_>>(),
+            data,
+            imports,
+            exports,
+            externs,
+            module_id,
+            number,
+        })
+    }
+
+    fn collapse(&mut self, module: WrappedIRModule, is_entrypoint: bool) -> Result<()> {
+        let module = module.borrow_mut();
+        let seen_key = module.module_id.to_string();
+        if self.collapse_seen.contains(&seen_key) {
+            return Ok(())
+        }
+
+        for imported in module.imports.clone() {
+            self.collapse(imported, false)?;
+        }
+
+        let module_number = module.number;
+
+        for string in &module.data {
+            if let IR::StringDef(name, val) = string {
+                let new_name = format!("{}_{}", name, module_number);
+                self.data.push(IR::StringDef(new_name, val.clone()));
+            } else {
+                bail!("non-string data");
+            }
+        }
+
+        for instruction in &module.text {
+            let new_instruction = match instruction {
+                IR::StackPushString(name) => {
+                    let new_name = format!("{}_{}", name, module_number);
+                    IR::StackPushString(new_name)
+                },
+                IR::Label(name) => {
+                    if is_entrypoint && name == "main" {
+                        instruction.clone()
+                    } else {
+                        IR::Label(module.get_label(name))
+                    }
+                },
+                IR::Call(name) => {
+                    IR::Call(module.get_label_for_call(name))
+                },
+                IR::WordPointer(name) => {
+                    IR::WordPointer(module.get_label_for_call(name))
+                },
+                _ => instruction.clone()
+            };
+            self.text.push(new_instruction);
+        }
+
+        self.collapse_seen.insert(seen_key);
+
+        Ok(())
+    }
+}
+
+pub fn build_and_collapse(path: &str) -> Result<IRObject> {
+    let dir = std::env::current_dir()?;
+    let mut tree: ImportTree = Default::default(); 
+    let module = tree.import(&dir, path, true)?;
+    tree.collapse(module, true)?;
+    // TODO remove unused words 
+    Ok(IRObject {
+        data: tree.data,
+        text: tree.text,
+    })
+}
diff --git a/sorelc/src/ir.rs b/sorelc/src/ir.rs
deleted file mode 100644 (file)
index 8176da5..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-use sorel_parser::Module;
-use sorel_ir::*;
-use sorel_tokenizer::tokenize;
-
-use std::collections::{HashSet, HashMap};
-use std::path::PathBuf;
-use std::rc::Rc;
-use std::cell::RefCell;
-use std::include_str;
-
-use anyhow::{Result, bail, anyhow};
-
-#[derive(Default)]
-pub(crate) struct ImportTree {
-    data: Vec<IR>,
-    text: Vec<IR>,
-    all_modules: HashMap<String, WrappedIRModule>,
-    all_exports: HashSet<String>,
-    entrypoint: WrappedIRModule,
-    module_count: usize,
-    collapse_seen: HashSet<String>,
-}
-
-fn std_import(specifier: &str) -> Result<&str> {
-    match specifier {
-        "std:mem" => Ok(include_str!("../../stdlib/mem.sorel")),
-        "std:out" => Ok(include_str!("../../stdlib/out.sorel")),
-        "std:string" => Ok(include_str!("../../stdlib/string.sorel")),
-        "std:process" => Ok(include_str!("../../stdlib/process.sorel")),
-        _ => bail!("{} is not a standard library module", specifier),
-    }
-}
-
-impl ImportTree {
-    fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result<WrappedIRModule> {
-        let (contents, module_id) = if specifier.starts_with("std:") {
-            if self.all_modules.contains_key(specifier) {
-                let module = self.all_modules.get(specifier).unwrap().clone();
-                return Ok(module);
-            }
-            let contents = std_import(specifier)?;
-            (contents.to_string(), ModuleID::StdSpecifier(specifier.to_string()))
-        } else {
-            let mut path = PathBuf::from(specifier);
-            if path.is_relative() {
-                let mut new_path = importer_dir.clone();
-                new_path.push(path);
-                path = new_path.canonicalize()?;
-            }
-            let path_key = path.to_string_lossy().to_string();
-            if self.all_modules.contains_key(&path_key) {
-                let module = self.all_modules.get(&path_key).unwrap().clone();
-                return Ok(module);
-            }
-
-            let contents = std::fs::read_to_string(&path)?;
-            (contents, ModuleID::SourceFile(path))
-        };
-        let tokens = tokenize(&contents)?;
-        let parsed = &Module::parse(tokens, is_entrypoint)?;
-        let module = self.generate_internal(module_id.clone(), parsed)?;
-        let module = Rc::new(RefCell::new(module));
-        self.all_modules.insert(module_id.to_string(), module.clone());
-        if is_entrypoint {
-            self.entrypoint = module.clone();
-        }
-        Ok(module)
-    }
-
-    fn generate_internal(&mut self, module_id: ModuleID, module: &Module) -> Result<IRModule> {
-        // Eventually these will end up being sections in assembly
-        let mut text = vec![];
-        let mut data = vec![];
-
-        let mut imports = vec![];
-
-        let parent_path = match module_id {
-            ModuleID::SourceFile(ref path) => {
-                path.parent().ok_or(anyhow!("no parent for path: {:?}", path))?.to_path_buf()
-            },
-            // A stdlib module can only import other stdlib
-            // modules, so no need for parent path.
-            ModuleID::StdSpecifier(_) => PathBuf::new(),
-        };
-        module.imports.iter().try_for_each(|imported| -> Result<()> {
-            let new_module = self.import(&parent_path, imported, false)?;
-            imports.push(new_module);
-            Ok(())
-        })?;
-
-        let exports: Vec<_> = module.exports.iter().map(|s| {
-            self.all_exports.insert(s.to_string());
-            s.to_string()
-        }).collect();
-
-        let externs = module.externs.iter().map(|s| s.to_string()).collect();
-
-        text.push(module.words.iter().flat_map(|def| {
-            let mut body = def.instructions.iter().map(|inst| {
-                IR::from_token(inst, &mut data)
-            }).collect::<Vec<_>>();
-
-            let mut result = vec![IR::Label(def.name.to_string())];
-            result.append(&mut body);
-            result.push(IR::Ret);
-            result
-        }).collect::<Vec<_>>());
-
-        let number = self.module_count;
-        self.module_count += 1;
-
-        Ok(IRModule {
-            text: text.into_iter().flatten().collect::<Vec<_>>(),
-            data,
-            imports,
-            exports,
-            externs,
-            module_id,
-            number,
-        })
-    }
-
-    fn collapse(&mut self, module: WrappedIRModule, is_entrypoint: bool) -> Result<()> {
-        let module = module.borrow_mut();
-        let seen_key = module.module_id.to_string();
-        if self.collapse_seen.contains(&seen_key) {
-            return Ok(())
-        }
-
-        for imported in module.imports.clone() {
-            self.collapse(imported, false)?;
-        }
-
-        let module_number = module.number;
-
-        for string in &module.data {
-            if let IR::StringDef(name, val) = string {
-                let new_name = format!("{}_{}", name, module_number);
-                self.data.push(IR::StringDef(new_name, val.clone()));
-            } else {
-                bail!("non-string data");
-            }
-        }
-
-        for instruction in &module.text {
-            let new_instruction = match instruction {
-                IR::StackPushString(name) => {
-                    let new_name = format!("{}_{}", name, module_number);
-                    IR::StackPushString(new_name)
-                },
-                IR::Label(name) => {
-                    if is_entrypoint && name == "main" {
-                        instruction.clone()
-                    } else {
-                        IR::Label(module.get_label(name))
-                    }
-                },
-                IR::Call(name) => {
-                    IR::Call(module.get_label_for_call(name))
-                },
-                IR::WordPointer(name) => {
-                    IR::WordPointer(module.get_label_for_call(name))
-                },
-                _ => instruction.clone()
-            };
-            self.text.push(new_instruction);
-        }
-
-        self.collapse_seen.insert(seen_key);
-
-        Ok(())
-    }
-}
-
-pub fn compile(path: &str) -> Result<IRObject> {
-    let dir = std::env::current_dir()?;
-    let mut tree: ImportTree = Default::default(); 
-    let module = tree.import(&dir, path, true)?;
-    tree.collapse(module, true)?;
-    // TODO remove unused words 
-    Ok(IRObject {
-        data: tree.data,
-        text: tree.text,
-    })
-}
index 141e32256ae8fcba0eeac713d7236bc3d8305658..18c93a2084a86d414120dd82f99d96045bc07c23 100644 (file)
@@ -1,5 +1,5 @@
-mod ir;
-mod riscv_asm_codegen;
+mod import_tree;
+use sorel_codegen::riscv64_asm::*;
 
 use anyhow::Result;
 
@@ -9,8 +9,8 @@ use std::path::PathBuf;
 
 fn main() -> Result<()> {
     let filename = std::env::args().nth(1).expect("must provide a file to compile");
-    let module = ir::compile(&filename)?;
-    let mut generator = riscv_asm_codegen::CodeGen::new(&module, 4096);
+    let module = import_tree::build_and_collapse(&filename)?;
+    let mut generator = CodeGen::new(&module, 4096);
     let mut asm_path = PathBuf::from(filename);
     asm_path.set_extension("asm");
     let mut output = File::create(asm_path)?;
diff --git a/sorelc/src/riscv_asm_codegen.rs b/sorelc/src/riscv_asm_codegen.rs
deleted file mode 100644 (file)
index de72435..0000000
+++ /dev/null
@@ -1,386 +0,0 @@
-use sorel_ir::*;
-
-use anyhow::*;
-
-use std::collections::{HashMap, HashSet};
-use std::fmt::Display;
-
-pub struct CodeGen<'a> {
-    module: &'a IRObject,
-    data_stack_size: usize,
-    lines: Vec<String>,
-}
-
-
-// Some inspiration
-// ================
-//
-// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list
-//     * Except using sp as a more C ABI style stack pointer, and s2 for the data stack
-// 
-
-// Implementation Choices
-// ======================
-//
-// Data Stack pointer: s2
-// No return stack pointer (using C ABI, so sp, sorta)
-// Use t0, t1, t2 for temporary values in words
-// Data stack grows down
-
-
-macro_rules! asm_macro {
-    ($name:ident, $src:expr) => {
-        fn $name(&mut self) {
-            self.line($src);
-        }
-    };
-    ($name:ident, $src:expr, $arg0:ty) => {
-        fn $name(&mut self, val0: $arg0) {
-            self.line(format!($src, val0));
-        }
-    };
-    ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => {
-        fn $name(&mut self, val0: $arg0, val1: $arg1) {
-            self.line(format!($src, val0, val1));
-        }
-    };
-}
-
-fn mangle(input: &str) -> String {
-    input
-        .replace("<", "_LT_")
-        .replace(">", "_GT_")
-        .replace("-", "___")
-}
-
-impl<'a> CodeGen<'a> {
-    pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self {
-        Self {
-            module: ir_mod,
-            data_stack_size,
-            lines: vec![],
-        }
-    }
-
-    fn line<S: Display>(&mut self, line: S) {
-        self.lines.push(format!("    {}", line));
-
-    }
-
-    fn label<S: Display>(&mut self, line: S) {
-        self.lines.push(line.to_string());
-    }
-
-    asm_macro!(copy_top_stack_value_to, "ld {}, 0(s2)", &str);
-    asm_macro!(copy_offset_stack_value_to, "ld {}, {}*8(s2)", &str, isize);
-    asm_macro!(copy_to_top_of_stack, "sd {}, 0(s2)", &str);
-    asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize);
-
-    fn pop_to(&mut self, reg: &str) {
-        self.copy_top_stack_value_to(reg);
-        self.move_stack_ptr_by_cells(1);
-    }
-
-    fn pop_some_to(&mut self, regs: &str) {
-        let mut regs = regs.trim().split(" ").collect::<Vec<_>>();
-        regs.reverse();
-        let count = regs.len();
-        let mut index = 0;
-        for reg in regs {
-            self.copy_offset_stack_value_to(reg, index);
-            index += 1;
-        }
-        self.move_stack_ptr_by_cells(count as isize);
-    }
-
-    fn push_from(&mut self, reg: &str) {
-        self.move_stack_ptr_by_cells(-1);
-        self.copy_to_top_of_stack(reg);
-    }
-
-    fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) {
-        self.pop_some_to(regs);
-        self.line(call);
-        self.push_from(reg);
-    }
-
-    pub fn assembly(&mut self) -> Result<String>{
-        let mut string_table = HashMap::new();
-
-        // Static strings
-        self.label(".section .rodata\n");
-        self.label(".align 3\n");
-        for ir in &self.module.data {
-            match ir {
-                IR::StringDef(string_label, some_string) => {
-                    string_table.insert(some_string.clone(), string_label);
-                    self.label(format!("{}:", string_label));
-                    self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz?
-                    self.label("");
-                },
-                _ => bail!("Currently only string definitions are supported in the data section.")
-            }
-        }
-
-        // Data stack
-        self.label(".data\n");
-        self.label(".align 3\n");
-        self.label("data_stack:");
-        self.line(format!(".space {}", self.data_stack_size));
-        self.label(".globl data_stack_end\ndata_stack_end:\n");
-
-        // Code
-        self.label(".text\n");
-        self.label(".align 3\n");
-
-        let mut if_block_count = 0;
-        let mut if_stack = vec![];
-        let mut loop_count = 0;
-        let mut loop_stack = vec![];
-        let mut seen_else = HashSet::new();
-        let mut last_label = "";
-
-        for ir in &self.module.text {
-            match ir {
-                IR::Label(name) => {
-                    last_label = name;
-                    if name == "main" {
-                        self.label(".globl _start"); // TODO is globl necessary?
-                        self.label("_start:");
-                        self.line("la s2, data_stack_end # set initial data stack pointer");
-                    } else {
-                        let mangled = mangle(name);
-                        self.label(format!(".globl {}", mangled));
-                        self.label(format!("{}:", mangled));
-                    }
-                    self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack
-                    self.line("sd ra, 8(sp) # store return address on stack");   // store return address on stack
-                },
-                IR::Call(name) => {
-                    let mangled = mangle(name);
-                    self.label(format!("# call {}", mangled));
-                    self.line(format!("call {}", mangled));    
-                },
-                IR::WordPointer(name) => {
-                    let mangled = mangle(name);
-                    self.label(format!("# '{} (word pointer)", mangled));
-                    self.line(format!("la t0, {}", mangled));
-                    self.push_from("t0");
-                },
-                IR::CallPtr => {
-                    self.label("# callptr");
-                    self.pop_to("t0");
-                    self.line("jalr t0");
-                },
-                IR::Ret => {
-                    if last_label == "main" {
-                        self.label("# exit 0 syscall");
-                        self.line("li a7, 93");
-                        self.line("mv a0, x0");
-                        self.line("ecall");
-                    } else {
-                        self.line("ld ra, 8(sp)");  // load return address from stack
-                        self.line("addi sp, sp, 16"); // restore stack pointer
-                        self.line("ret");
-                    }
-                },
-                IR::Load8 => {
-                    self.label("# load 8");
-                    self.copy_top_stack_value_to("t0");
-                    self.line("lbu   t0, 0(t0)"); // deref pointer in t0 to t0
-                    self.copy_to_top_of_stack("t0");
-                },
-                IR::Load16 => {
-                    self.label("# load 16");
-                    self.copy_top_stack_value_to("t0");
-                    self.line("lhu   t0, 0(t0)"); // deref pointer in t0 to t0
-                    self.copy_to_top_of_stack("t0");
-                },
-                IR::Load32 => {
-                    self.label("# load 32");
-                    self.copy_top_stack_value_to("t0");
-                    self.line("lwu   t0, 0(t0)"); // deref pointer in t0 to t0
-                    self.copy_to_top_of_stack("t0");
-                },
-                IR::Load => {
-                    self.label("# load 64");
-                    self.copy_top_stack_value_to("t0");
-                    self.line("ld   t0, 0(t0)"); // deref pointer in t0 to t0
-                    self.copy_to_top_of_stack("t0");
-                },
-                IR::Store8 => { // ( x addr -- )
-                    self.pop_some_to("t0 t1");
-                    self.line("sb t0, 0(t1)"); // store x at addr 
-                },
-                IR::Store16 => { // ( x addr -- )
-                    self.pop_some_to("t0 t1");
-                    self.line("sh t0, 0(t1)"); // store x at addr 
-                },
-                IR::Store32 => { // ( x addr -- )
-                    self.pop_some_to("t0 t1");
-                    self.line("sw t0, 0(t1)"); // store x at addr 
-                },
-                IR::Store => { // ( x addr -- )
-                    self.pop_some_to("t0 t1");
-                    self.line("sd t0, 0(t1)"); // store x at addr 
-                },
-                IR::StackPush(num) => {
-                    self.label(format!("# stackpush {}", num));
-                    self.line(format!("li t0, {}", num));
-                    self.push_from("t0");
-                },
-                IR::StackPushString(name) => {
-                    self.label(format!("# stackpushstring {}", name));
-                    self.line(format!("la t0, {}", name));
-                    self.push_from("t0");
-                },
-                IR::AddU64 => {
-                    self.label("# add");
-                    self.pop_call_push("t0 t1", "add t0, t0, t1", "t0"); 
-                },
-                IR::SubtractU64 => {
-                    self.label("# sub");
-                    self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); 
-                },
-                IR::MultiplyU64 => {
-                    self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0"); 
-                },
-                IR::DivideU64 => {
-                    self.pop_call_push("t0 t1", "div t0, t0, t1", "t0"); 
-                },
-                IR::ModU64 => {
-                    self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0"); 
-                },
-                IR::Dup => {
-                    self.label("# dup");
-                    self.copy_top_stack_value_to("t0");
-                    self.push_from("t0");
-                },
-                IR::Swap => {
-                    self.label("# swap");
-                    self.pop_some_to("t1 t0");
-                    self.push_from("t0");
-                    self.push_from("t1");
-                },
-                IR::Over => {
-                    // TODO this is super inefficient. There's no need to pop anything. Just read
-                    // from the second stack position and push it.
-                    self.label("# over");
-                    self.pop_some_to("t0 t1");
-                    self.push_from("t0");
-                    self.push_from("t1");
-                    self.push_from("t0");
-                },
-                IR::Rot => {
-                    self.label("# rot");
-                    self.pop_some_to("t0 t1 t2");
-                    self.push_from("t1");
-                    self.push_from("t2");
-                    self.push_from("t0");
-                },
-                IR::StackPointer => {
-                    self.label("# sp");
-                    self.line("addi t0, s2, 0");
-                    self.push_from("t0");
-                },
-                IR::StackBottom => {
-                    self.label("# stackbottom");
-                    self.line("la t0, data_stack_end");
-                    self.push_from("t0");
-                }
-                IR::Drop => {
-                    self.label("# drop");
-                    self.move_stack_ptr_by_cells(1);
-                },
-                IR::Equals => {
-                    // Yes, this is the same as subtract, since we're treating 0 as true, and
-                    // others as false.
-                    self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); 
-                },
-                IR::GreaterThan => {
-                    self.label("# >");
-                    self.pop_some_to("t0 t1");
-                    self.line("sgt  t0, t0, t1");
-                    self.line("seqz t0, t0"); // remember, 0 is true, others are false
-                    self.push_from("t0");
-                },
-                IR::LessThan => {
-                    self.label("# <");
-                    self.pop_some_to("t0 t1");
-                    self.line("slt  t0, t0, t1");
-                    self.line("seqz t0, t0"); // remember, 0 is true, others are false
-                    self.push_from("t0");
-                },
-                IR::BitwiseOr => {
-                    self.pop_call_push("t0 t1", "or t0, t0, t1", "t0"); 
-                },
-                IR::Sys0 => {
-                    self.pop_call_push("a7", "ecall", "a0");
-                },
-                IR::Sys1 => {
-                    self.pop_call_push("a0 a7", "ecall", "a0");
-                },
-                IR::Sys2 => {
-                    self.pop_call_push("a0 a1 a7", "ecall", "a0");
-                },
-                IR::Sys3 => {
-                    self.pop_call_push("a0 a1 a2 a7", "ecall", "a0");
-                },
-                IR::Sys4 => {
-                    self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0");
-                },
-                IR::Sys5 => {
-                    self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0");
-                },
-                IR::Sys6 => {
-                    self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0");
-                },
-                // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html
-                IR::If => {
-                    self.label("# if");
-                    self.pop_to("t0");
-                    self.line(format!("bnez t0, _else_{}", if_block_count));
-                    if_stack.push(if_block_count);
-                    if_block_count += 1;
-                },
-                IR::Else => {
-                    self.label("# else");
-                    let if_counter = *if_stack.last().unwrap();
-                    self.line(format!("j _endif_{}", if_counter));
-                    self.label(format!("_else_{}:", if_counter));
-                    seen_else.insert(if_counter);
-                },
-                IR::EndIf => {
-                    self.label("# endif");
-                    let stack = &mut if_stack;
-                    let if_counter = *stack.last().unwrap();
-                    if !seen_else.contains(&if_counter) {
-                        self.label(format!("_else_{}:", if_counter));
-                    } else {
-                        self.label(format!("_endif_{}:", if_counter));
-                        seen_else.remove(&if_counter);
-                    }
-                    stack.pop();
-                },
-                IR::Loop => { // keep looping until is true/0
-                    self.label(format!("_loop_{}:", loop_count));
-                    self.pop_to("t0");
-                    self.line(format!("beqz t0, _endloop_{}", loop_count));
-                    loop_stack.push(loop_count);
-                    loop_count += 1;
-                },
-                IR::EndLoop => {
-                    let stack = &mut loop_stack;
-                    let loop_counter = *stack.last().unwrap();
-                    self.line(format!("j _loop_{}", loop_counter));
-                    self.label(format!("_endloop_{}:", loop_counter));
-                    stack.pop();
-                },
-                _ => bail!("not implemented yet: {:?}", ir),
-            }
-        }
-
-        Ok(self.lines.join("\n"))
-    }
-}
-