From: Bryan English Date: Mon, 16 Feb 2026 03:45:43 +0000 (-0500) Subject: more cleanup and organizing X-Git-Url: https://rethought.computer/gitweb//gitweb//git?a=commitdiff_plain;h=9f36a6e7412af41931a04eb54829b11c62a8fed0;p=sorel-lang.git more cleanup and organizing --- diff --git a/Cargo.lock b/Cargo.lock index 017d419..839a794 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,14 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sorel-codegen" +version = "0.1.0" +dependencies = [ + "anyhow", + "sorel-ir", +] + [[package]] name = "sorel-ir" version = "0.1.0" @@ -132,6 +140,7 @@ name = "sorelc" version = "0.1.0" dependencies = [ "anyhow", + "sorel-codegen", "sorel-ir", "sorel-parser", "sorel-tokenizer", diff --git a/Cargo.toml b/Cargo.toml index 8e7cea9..7f17661 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,10 @@ [workspace] resolver = "3" -members = ["sorel-ir","sorel-parser","sorel-tokenizer","sorelc"] +members = ["sorel-codegen","sorel-ir","sorel-parser","sorel-tokenizer","sorelc"] [workspace.dependencies] sorel-ir = { path = "./sorel-ir", version = "0.1.0" } +sorel-codegen = { path = "./sorel-codegen", version = "0.1.0" } sorel-tokenizer = { path = "./sorel-tokenizer", version = "0.1.0" } sorel-parser = { path = "./sorel-parser", version = "0.1.0" } diff --git a/sorel-codegen/Cargo.toml b/sorel-codegen/Cargo.toml new file mode 100644 index 0000000..8caa9bd --- /dev/null +++ b/sorel-codegen/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "sorel-codegen" +version = "0.1.0" +edition = "2024" + +[dependencies] +sorel-ir = { workspace = true } +anyhow = "1.0.100" diff --git a/sorel-codegen/src/lib.rs b/sorel-codegen/src/lib.rs new file mode 100644 index 0000000..72a6c86 --- /dev/null +++ b/sorel-codegen/src/lib.rs @@ -0,0 +1,2 @@ +pub mod riscv64_asm; + diff --git a/sorel-codegen/src/riscv64_asm.rs b/sorel-codegen/src/riscv64_asm.rs new file mode 100644 index 0000000..de72435 --- /dev/null +++ b/sorel-codegen/src/riscv64_asm.rs @@ -0,0 +1,386 @@ +use sorel_ir::*; + +use anyhow::*; + +use std::collections::{HashMap, HashSet}; +use std::fmt::Display; + +pub struct CodeGen<'a> { + module: &'a IRObject, + data_stack_size: usize, + lines: Vec, +} + + +// Some inspiration +// ================ +// +// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list +// * Except using sp as a more C ABI style stack pointer, and s2 for the data stack +// + +// Implementation Choices +// ====================== +// +// Data Stack pointer: s2 +// No return stack pointer (using C ABI, so sp, sorta) +// Use t0, t1, t2 for temporary values in words +// Data stack grows down + + +macro_rules! asm_macro { + ($name:ident, $src:expr) => { + fn $name(&mut self) { + self.line($src); + } + }; + ($name:ident, $src:expr, $arg0:ty) => { + fn $name(&mut self, val0: $arg0) { + self.line(format!($src, val0)); + } + }; + ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => { + fn $name(&mut self, val0: $arg0, val1: $arg1) { + self.line(format!($src, val0, val1)); + } + }; +} + +fn mangle(input: &str) -> String { + input + .replace("<", "_LT_") + .replace(">", "_GT_") + .replace("-", "___") +} + +impl<'a> CodeGen<'a> { + pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self { + Self { + module: ir_mod, + data_stack_size, + lines: vec![], + } + } + + fn line(&mut self, line: S) { + self.lines.push(format!(" {}", line)); + + } + + fn label(&mut self, line: S) { + self.lines.push(line.to_string()); + } + + asm_macro!(copy_top_stack_value_to, "ld {}, 0(s2)", &str); + asm_macro!(copy_offset_stack_value_to, "ld {}, {}*8(s2)", &str, isize); + asm_macro!(copy_to_top_of_stack, "sd {}, 0(s2)", &str); + asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize); + + fn pop_to(&mut self, reg: &str) { + self.copy_top_stack_value_to(reg); + self.move_stack_ptr_by_cells(1); + } + + fn pop_some_to(&mut self, regs: &str) { + let mut regs = regs.trim().split(" ").collect::>(); + regs.reverse(); + let count = regs.len(); + let mut index = 0; + for reg in regs { + self.copy_offset_stack_value_to(reg, index); + index += 1; + } + self.move_stack_ptr_by_cells(count as isize); + } + + fn push_from(&mut self, reg: &str) { + self.move_stack_ptr_by_cells(-1); + self.copy_to_top_of_stack(reg); + } + + fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) { + self.pop_some_to(regs); + self.line(call); + self.push_from(reg); + } + + pub fn assembly(&mut self) -> Result{ + let mut string_table = HashMap::new(); + + // Static strings + self.label(".section .rodata\n"); + self.label(".align 3\n"); + for ir in &self.module.data { + match ir { + IR::StringDef(string_label, some_string) => { + string_table.insert(some_string.clone(), string_label); + self.label(format!("{}:", string_label)); + self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz? + self.label(""); + }, + _ => bail!("Currently only string definitions are supported in the data section.") + } + } + + // Data stack + self.label(".data\n"); + self.label(".align 3\n"); + self.label("data_stack:"); + self.line(format!(".space {}", self.data_stack_size)); + self.label(".globl data_stack_end\ndata_stack_end:\n"); + + // Code + self.label(".text\n"); + self.label(".align 3\n"); + + let mut if_block_count = 0; + let mut if_stack = vec![]; + let mut loop_count = 0; + let mut loop_stack = vec![]; + let mut seen_else = HashSet::new(); + let mut last_label = ""; + + for ir in &self.module.text { + match ir { + IR::Label(name) => { + last_label = name; + if name == "main" { + self.label(".globl _start"); // TODO is globl necessary? + self.label("_start:"); + self.line("la s2, data_stack_end # set initial data stack pointer"); + } else { + let mangled = mangle(name); + self.label(format!(".globl {}", mangled)); + self.label(format!("{}:", mangled)); + } + self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack + self.line("sd ra, 8(sp) # store return address on stack"); // store return address on stack + }, + IR::Call(name) => { + let mangled = mangle(name); + self.label(format!("# call {}", mangled)); + self.line(format!("call {}", mangled)); + }, + IR::WordPointer(name) => { + let mangled = mangle(name); + self.label(format!("# '{} (word pointer)", mangled)); + self.line(format!("la t0, {}", mangled)); + self.push_from("t0"); + }, + IR::CallPtr => { + self.label("# callptr"); + self.pop_to("t0"); + self.line("jalr t0"); + }, + IR::Ret => { + if last_label == "main" { + self.label("# exit 0 syscall"); + self.line("li a7, 93"); + self.line("mv a0, x0"); + self.line("ecall"); + } else { + self.line("ld ra, 8(sp)"); // load return address from stack + self.line("addi sp, sp, 16"); // restore stack pointer + self.line("ret"); + } + }, + IR::Load8 => { + self.label("# load 8"); + self.copy_top_stack_value_to("t0"); + self.line("lbu t0, 0(t0)"); // deref pointer in t0 to t0 + self.copy_to_top_of_stack("t0"); + }, + IR::Load16 => { + self.label("# load 16"); + self.copy_top_stack_value_to("t0"); + self.line("lhu t0, 0(t0)"); // deref pointer in t0 to t0 + self.copy_to_top_of_stack("t0"); + }, + IR::Load32 => { + self.label("# load 32"); + self.copy_top_stack_value_to("t0"); + self.line("lwu t0, 0(t0)"); // deref pointer in t0 to t0 + self.copy_to_top_of_stack("t0"); + }, + IR::Load => { + self.label("# load 64"); + self.copy_top_stack_value_to("t0"); + self.line("ld t0, 0(t0)"); // deref pointer in t0 to t0 + self.copy_to_top_of_stack("t0"); + }, + IR::Store8 => { // ( x addr -- ) + self.pop_some_to("t0 t1"); + self.line("sb t0, 0(t1)"); // store x at addr + }, + IR::Store16 => { // ( x addr -- ) + self.pop_some_to("t0 t1"); + self.line("sh t0, 0(t1)"); // store x at addr + }, + IR::Store32 => { // ( x addr -- ) + self.pop_some_to("t0 t1"); + self.line("sw t0, 0(t1)"); // store x at addr + }, + IR::Store => { // ( x addr -- ) + self.pop_some_to("t0 t1"); + self.line("sd t0, 0(t1)"); // store x at addr + }, + IR::StackPush(num) => { + self.label(format!("# stackpush {}", num)); + self.line(format!("li t0, {}", num)); + self.push_from("t0"); + }, + IR::StackPushString(name) => { + self.label(format!("# stackpushstring {}", name)); + self.line(format!("la t0, {}", name)); + self.push_from("t0"); + }, + IR::AddU64 => { + self.label("# add"); + self.pop_call_push("t0 t1", "add t0, t0, t1", "t0"); + }, + IR::SubtractU64 => { + self.label("# sub"); + self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); + }, + IR::MultiplyU64 => { + self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0"); + }, + IR::DivideU64 => { + self.pop_call_push("t0 t1", "div t0, t0, t1", "t0"); + }, + IR::ModU64 => { + self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0"); + }, + IR::Dup => { + self.label("# dup"); + self.copy_top_stack_value_to("t0"); + self.push_from("t0"); + }, + IR::Swap => { + self.label("# swap"); + self.pop_some_to("t1 t0"); + self.push_from("t0"); + self.push_from("t1"); + }, + IR::Over => { + // TODO this is super inefficient. There's no need to pop anything. Just read + // from the second stack position and push it. + self.label("# over"); + self.pop_some_to("t0 t1"); + self.push_from("t0"); + self.push_from("t1"); + self.push_from("t0"); + }, + IR::Rot => { + self.label("# rot"); + self.pop_some_to("t0 t1 t2"); + self.push_from("t1"); + self.push_from("t2"); + self.push_from("t0"); + }, + IR::StackPointer => { + self.label("# sp"); + self.line("addi t0, s2, 0"); + self.push_from("t0"); + }, + IR::StackBottom => { + self.label("# stackbottom"); + self.line("la t0, data_stack_end"); + self.push_from("t0"); + } + IR::Drop => { + self.label("# drop"); + self.move_stack_ptr_by_cells(1); + }, + IR::Equals => { + // Yes, this is the same as subtract, since we're treating 0 as true, and + // others as false. + self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); + }, + IR::GreaterThan => { + self.label("# >"); + self.pop_some_to("t0 t1"); + self.line("sgt t0, t0, t1"); + self.line("seqz t0, t0"); // remember, 0 is true, others are false + self.push_from("t0"); + }, + IR::LessThan => { + self.label("# <"); + self.pop_some_to("t0 t1"); + self.line("slt t0, t0, t1"); + self.line("seqz t0, t0"); // remember, 0 is true, others are false + self.push_from("t0"); + }, + IR::BitwiseOr => { + self.pop_call_push("t0 t1", "or t0, t0, t1", "t0"); + }, + IR::Sys0 => { + self.pop_call_push("a7", "ecall", "a0"); + }, + IR::Sys1 => { + self.pop_call_push("a0 a7", "ecall", "a0"); + }, + IR::Sys2 => { + self.pop_call_push("a0 a1 a7", "ecall", "a0"); + }, + IR::Sys3 => { + self.pop_call_push("a0 a1 a2 a7", "ecall", "a0"); + }, + IR::Sys4 => { + self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0"); + }, + IR::Sys5 => { + self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0"); + }, + IR::Sys6 => { + self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0"); + }, + // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html + IR::If => { + self.label("# if"); + self.pop_to("t0"); + self.line(format!("bnez t0, _else_{}", if_block_count)); + if_stack.push(if_block_count); + if_block_count += 1; + }, + IR::Else => { + self.label("# else"); + let if_counter = *if_stack.last().unwrap(); + self.line(format!("j _endif_{}", if_counter)); + self.label(format!("_else_{}:", if_counter)); + seen_else.insert(if_counter); + }, + IR::EndIf => { + self.label("# endif"); + let stack = &mut if_stack; + let if_counter = *stack.last().unwrap(); + if !seen_else.contains(&if_counter) { + self.label(format!("_else_{}:", if_counter)); + } else { + self.label(format!("_endif_{}:", if_counter)); + seen_else.remove(&if_counter); + } + stack.pop(); + }, + IR::Loop => { // keep looping until is true/0 + self.label(format!("_loop_{}:", loop_count)); + self.pop_to("t0"); + self.line(format!("beqz t0, _endloop_{}", loop_count)); + loop_stack.push(loop_count); + loop_count += 1; + }, + IR::EndLoop => { + let stack = &mut loop_stack; + let loop_counter = *stack.last().unwrap(); + self.line(format!("j _loop_{}", loop_counter)); + self.label(format!("_endloop_{}:", loop_counter)); + stack.pop(); + }, + _ => bail!("not implemented yet: {:?}", ir), + } + } + + Ok(self.lines.join("\n")) + } +} + diff --git a/sorelc/Cargo.toml b/sorelc/Cargo.toml index 0129099..9550d4e 100644 --- a/sorelc/Cargo.toml +++ b/sorelc/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] +sorel-codegen = { workspace = true } sorel-ir = { workspace = true } sorel-tokenizer = { workspace = true } sorel-parser = { workspace = true } diff --git a/sorelc/src/import_tree.rs b/sorelc/src/import_tree.rs new file mode 100644 index 0000000..9c6896b --- /dev/null +++ b/sorelc/src/import_tree.rs @@ -0,0 +1,185 @@ +use sorel_parser::Module; +use sorel_ir::*; +use sorel_tokenizer::tokenize; + +use std::collections::{HashSet, HashMap}; +use std::path::PathBuf; +use std::rc::Rc; +use std::cell::RefCell; +use std::include_str; + +use anyhow::{Result, bail, anyhow}; + +#[derive(Default)] +pub(crate) struct ImportTree { + data: Vec, + text: Vec, + all_modules: HashMap, + all_exports: HashSet, + entrypoint: WrappedIRModule, + module_count: usize, + collapse_seen: HashSet, +} + +fn std_import(specifier: &str) -> Result<&str> { + match specifier { + "std:mem" => Ok(include_str!("../../stdlib/mem.sorel")), + "std:out" => Ok(include_str!("../../stdlib/out.sorel")), + "std:string" => Ok(include_str!("../../stdlib/string.sorel")), + "std:process" => Ok(include_str!("../../stdlib/process.sorel")), + _ => bail!("{} is not a standard library module", specifier), + } +} + +impl ImportTree { + fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result { + let (contents, module_id) = if specifier.starts_with("std:") { + if self.all_modules.contains_key(specifier) { + let module = self.all_modules.get(specifier).unwrap().clone(); + return Ok(module); + } + let contents = std_import(specifier)?; + (contents.to_string(), ModuleID::StdSpecifier(specifier.to_string())) + } else { + let mut path = PathBuf::from(specifier); + if path.is_relative() { + let mut new_path = importer_dir.clone(); + new_path.push(path); + path = new_path.canonicalize()?; + } + let path_key = path.to_string_lossy().to_string(); + if self.all_modules.contains_key(&path_key) { + let module = self.all_modules.get(&path_key).unwrap().clone(); + return Ok(module); + } + + let contents = std::fs::read_to_string(&path)?; + (contents, ModuleID::SourceFile(path)) + }; + let tokens = tokenize(&contents)?; + let parsed = &Module::parse(tokens, is_entrypoint)?; + let module = self.ir_mod_from_parsed(module_id.clone(), parsed)?; + let module = Rc::new(RefCell::new(module)); + self.all_modules.insert(module_id.to_string(), module.clone()); + if is_entrypoint { + self.entrypoint = module.clone(); + } + Ok(module) + } + + fn ir_mod_from_parsed(&mut self, module_id: ModuleID, module: &Module) -> Result { + // Eventually these will end up being sections in assembly + let mut text = vec![]; + let mut data = vec![]; + + let mut imports = vec![]; + + let parent_path = match module_id { + ModuleID::SourceFile(ref path) => { + path.parent().ok_or(anyhow!("no parent for path: {:?}", path))?.to_path_buf() + }, + // A stdlib module can only import other stdlib + // modules, so no need for parent path. + ModuleID::StdSpecifier(_) => PathBuf::new(), + }; + module.imports.iter().try_for_each(|imported| -> Result<()> { + let new_module = self.import(&parent_path, imported, false)?; + imports.push(new_module); + Ok(()) + })?; + + let exports: Vec<_> = module.exports.iter().map(|s| { + self.all_exports.insert(s.to_string()); + s.to_string() + }).collect(); + + let externs = module.externs.iter().map(|s| s.to_string()).collect(); + + text.push(module.words.iter().flat_map(|def| { + let mut body = def.instructions.iter().map(|inst| { + IR::from_token(inst, &mut data) + }).collect::>(); + + let mut result = vec![IR::Label(def.name.to_string())]; + result.append(&mut body); + result.push(IR::Ret); + result + }).collect::>()); + + let number = self.module_count; + self.module_count += 1; + + Ok(IRModule { + text: text.into_iter().flatten().collect::>(), + data, + imports, + exports, + externs, + module_id, + number, + }) + } + + fn collapse(&mut self, module: WrappedIRModule, is_entrypoint: bool) -> Result<()> { + let module = module.borrow_mut(); + let seen_key = module.module_id.to_string(); + if self.collapse_seen.contains(&seen_key) { + return Ok(()) + } + + for imported in module.imports.clone() { + self.collapse(imported, false)?; + } + + let module_number = module.number; + + for string in &module.data { + if let IR::StringDef(name, val) = string { + let new_name = format!("{}_{}", name, module_number); + self.data.push(IR::StringDef(new_name, val.clone())); + } else { + bail!("non-string data"); + } + } + + for instruction in &module.text { + let new_instruction = match instruction { + IR::StackPushString(name) => { + let new_name = format!("{}_{}", name, module_number); + IR::StackPushString(new_name) + }, + IR::Label(name) => { + if is_entrypoint && name == "main" { + instruction.clone() + } else { + IR::Label(module.get_label(name)) + } + }, + IR::Call(name) => { + IR::Call(module.get_label_for_call(name)) + }, + IR::WordPointer(name) => { + IR::WordPointer(module.get_label_for_call(name)) + }, + _ => instruction.clone() + }; + self.text.push(new_instruction); + } + + self.collapse_seen.insert(seen_key); + + Ok(()) + } +} + +pub fn build_and_collapse(path: &str) -> Result { + let dir = std::env::current_dir()?; + let mut tree: ImportTree = Default::default(); + let module = tree.import(&dir, path, true)?; + tree.collapse(module, true)?; + // TODO remove unused words + Ok(IRObject { + data: tree.data, + text: tree.text, + }) +} diff --git a/sorelc/src/ir.rs b/sorelc/src/ir.rs deleted file mode 100644 index 8176da5..0000000 --- a/sorelc/src/ir.rs +++ /dev/null @@ -1,185 +0,0 @@ -use sorel_parser::Module; -use sorel_ir::*; -use sorel_tokenizer::tokenize; - -use std::collections::{HashSet, HashMap}; -use std::path::PathBuf; -use std::rc::Rc; -use std::cell::RefCell; -use std::include_str; - -use anyhow::{Result, bail, anyhow}; - -#[derive(Default)] -pub(crate) struct ImportTree { - data: Vec, - text: Vec, - all_modules: HashMap, - all_exports: HashSet, - entrypoint: WrappedIRModule, - module_count: usize, - collapse_seen: HashSet, -} - -fn std_import(specifier: &str) -> Result<&str> { - match specifier { - "std:mem" => Ok(include_str!("../../stdlib/mem.sorel")), - "std:out" => Ok(include_str!("../../stdlib/out.sorel")), - "std:string" => Ok(include_str!("../../stdlib/string.sorel")), - "std:process" => Ok(include_str!("../../stdlib/process.sorel")), - _ => bail!("{} is not a standard library module", specifier), - } -} - -impl ImportTree { - fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result { - let (contents, module_id) = if specifier.starts_with("std:") { - if self.all_modules.contains_key(specifier) { - let module = self.all_modules.get(specifier).unwrap().clone(); - return Ok(module); - } - let contents = std_import(specifier)?; - (contents.to_string(), ModuleID::StdSpecifier(specifier.to_string())) - } else { - let mut path = PathBuf::from(specifier); - if path.is_relative() { - let mut new_path = importer_dir.clone(); - new_path.push(path); - path = new_path.canonicalize()?; - } - let path_key = path.to_string_lossy().to_string(); - if self.all_modules.contains_key(&path_key) { - let module = self.all_modules.get(&path_key).unwrap().clone(); - return Ok(module); - } - - let contents = std::fs::read_to_string(&path)?; - (contents, ModuleID::SourceFile(path)) - }; - let tokens = tokenize(&contents)?; - let parsed = &Module::parse(tokens, is_entrypoint)?; - let module = self.generate_internal(module_id.clone(), parsed)?; - let module = Rc::new(RefCell::new(module)); - self.all_modules.insert(module_id.to_string(), module.clone()); - if is_entrypoint { - self.entrypoint = module.clone(); - } - Ok(module) - } - - fn generate_internal(&mut self, module_id: ModuleID, module: &Module) -> Result { - // Eventually these will end up being sections in assembly - let mut text = vec![]; - let mut data = vec![]; - - let mut imports = vec![]; - - let parent_path = match module_id { - ModuleID::SourceFile(ref path) => { - path.parent().ok_or(anyhow!("no parent for path: {:?}", path))?.to_path_buf() - }, - // A stdlib module can only import other stdlib - // modules, so no need for parent path. - ModuleID::StdSpecifier(_) => PathBuf::new(), - }; - module.imports.iter().try_for_each(|imported| -> Result<()> { - let new_module = self.import(&parent_path, imported, false)?; - imports.push(new_module); - Ok(()) - })?; - - let exports: Vec<_> = module.exports.iter().map(|s| { - self.all_exports.insert(s.to_string()); - s.to_string() - }).collect(); - - let externs = module.externs.iter().map(|s| s.to_string()).collect(); - - text.push(module.words.iter().flat_map(|def| { - let mut body = def.instructions.iter().map(|inst| { - IR::from_token(inst, &mut data) - }).collect::>(); - - let mut result = vec![IR::Label(def.name.to_string())]; - result.append(&mut body); - result.push(IR::Ret); - result - }).collect::>()); - - let number = self.module_count; - self.module_count += 1; - - Ok(IRModule { - text: text.into_iter().flatten().collect::>(), - data, - imports, - exports, - externs, - module_id, - number, - }) - } - - fn collapse(&mut self, module: WrappedIRModule, is_entrypoint: bool) -> Result<()> { - let module = module.borrow_mut(); - let seen_key = module.module_id.to_string(); - if self.collapse_seen.contains(&seen_key) { - return Ok(()) - } - - for imported in module.imports.clone() { - self.collapse(imported, false)?; - } - - let module_number = module.number; - - for string in &module.data { - if let IR::StringDef(name, val) = string { - let new_name = format!("{}_{}", name, module_number); - self.data.push(IR::StringDef(new_name, val.clone())); - } else { - bail!("non-string data"); - } - } - - for instruction in &module.text { - let new_instruction = match instruction { - IR::StackPushString(name) => { - let new_name = format!("{}_{}", name, module_number); - IR::StackPushString(new_name) - }, - IR::Label(name) => { - if is_entrypoint && name == "main" { - instruction.clone() - } else { - IR::Label(module.get_label(name)) - } - }, - IR::Call(name) => { - IR::Call(module.get_label_for_call(name)) - }, - IR::WordPointer(name) => { - IR::WordPointer(module.get_label_for_call(name)) - }, - _ => instruction.clone() - }; - self.text.push(new_instruction); - } - - self.collapse_seen.insert(seen_key); - - Ok(()) - } -} - -pub fn compile(path: &str) -> Result { - let dir = std::env::current_dir()?; - let mut tree: ImportTree = Default::default(); - let module = tree.import(&dir, path, true)?; - tree.collapse(module, true)?; - // TODO remove unused words - Ok(IRObject { - data: tree.data, - text: tree.text, - }) -} diff --git a/sorelc/src/main.rs b/sorelc/src/main.rs index 141e322..18c93a2 100644 --- a/sorelc/src/main.rs +++ b/sorelc/src/main.rs @@ -1,5 +1,5 @@ -mod ir; -mod riscv_asm_codegen; +mod import_tree; +use sorel_codegen::riscv64_asm::*; use anyhow::Result; @@ -9,8 +9,8 @@ use std::path::PathBuf; fn main() -> Result<()> { let filename = std::env::args().nth(1).expect("must provide a file to compile"); - let module = ir::compile(&filename)?; - let mut generator = riscv_asm_codegen::CodeGen::new(&module, 4096); + let module = import_tree::build_and_collapse(&filename)?; + let mut generator = CodeGen::new(&module, 4096); let mut asm_path = PathBuf::from(filename); asm_path.set_extension("asm"); let mut output = File::create(asm_path)?; diff --git a/sorelc/src/riscv_asm_codegen.rs b/sorelc/src/riscv_asm_codegen.rs deleted file mode 100644 index de72435..0000000 --- a/sorelc/src/riscv_asm_codegen.rs +++ /dev/null @@ -1,386 +0,0 @@ -use sorel_ir::*; - -use anyhow::*; - -use std::collections::{HashMap, HashSet}; -use std::fmt::Display; - -pub struct CodeGen<'a> { - module: &'a IRObject, - data_stack_size: usize, - lines: Vec, -} - - -// Some inspiration -// ================ -// -// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list -// * Except using sp as a more C ABI style stack pointer, and s2 for the data stack -// - -// Implementation Choices -// ====================== -// -// Data Stack pointer: s2 -// No return stack pointer (using C ABI, so sp, sorta) -// Use t0, t1, t2 for temporary values in words -// Data stack grows down - - -macro_rules! asm_macro { - ($name:ident, $src:expr) => { - fn $name(&mut self) { - self.line($src); - } - }; - ($name:ident, $src:expr, $arg0:ty) => { - fn $name(&mut self, val0: $arg0) { - self.line(format!($src, val0)); - } - }; - ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => { - fn $name(&mut self, val0: $arg0, val1: $arg1) { - self.line(format!($src, val0, val1)); - } - }; -} - -fn mangle(input: &str) -> String { - input - .replace("<", "_LT_") - .replace(">", "_GT_") - .replace("-", "___") -} - -impl<'a> CodeGen<'a> { - pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self { - Self { - module: ir_mod, - data_stack_size, - lines: vec![], - } - } - - fn line(&mut self, line: S) { - self.lines.push(format!(" {}", line)); - - } - - fn label(&mut self, line: S) { - self.lines.push(line.to_string()); - } - - asm_macro!(copy_top_stack_value_to, "ld {}, 0(s2)", &str); - asm_macro!(copy_offset_stack_value_to, "ld {}, {}*8(s2)", &str, isize); - asm_macro!(copy_to_top_of_stack, "sd {}, 0(s2)", &str); - asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize); - - fn pop_to(&mut self, reg: &str) { - self.copy_top_stack_value_to(reg); - self.move_stack_ptr_by_cells(1); - } - - fn pop_some_to(&mut self, regs: &str) { - let mut regs = regs.trim().split(" ").collect::>(); - regs.reverse(); - let count = regs.len(); - let mut index = 0; - for reg in regs { - self.copy_offset_stack_value_to(reg, index); - index += 1; - } - self.move_stack_ptr_by_cells(count as isize); - } - - fn push_from(&mut self, reg: &str) { - self.move_stack_ptr_by_cells(-1); - self.copy_to_top_of_stack(reg); - } - - fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) { - self.pop_some_to(regs); - self.line(call); - self.push_from(reg); - } - - pub fn assembly(&mut self) -> Result{ - let mut string_table = HashMap::new(); - - // Static strings - self.label(".section .rodata\n"); - self.label(".align 3\n"); - for ir in &self.module.data { - match ir { - IR::StringDef(string_label, some_string) => { - string_table.insert(some_string.clone(), string_label); - self.label(format!("{}:", string_label)); - self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz? - self.label(""); - }, - _ => bail!("Currently only string definitions are supported in the data section.") - } - } - - // Data stack - self.label(".data\n"); - self.label(".align 3\n"); - self.label("data_stack:"); - self.line(format!(".space {}", self.data_stack_size)); - self.label(".globl data_stack_end\ndata_stack_end:\n"); - - // Code - self.label(".text\n"); - self.label(".align 3\n"); - - let mut if_block_count = 0; - let mut if_stack = vec![]; - let mut loop_count = 0; - let mut loop_stack = vec![]; - let mut seen_else = HashSet::new(); - let mut last_label = ""; - - for ir in &self.module.text { - match ir { - IR::Label(name) => { - last_label = name; - if name == "main" { - self.label(".globl _start"); // TODO is globl necessary? - self.label("_start:"); - self.line("la s2, data_stack_end # set initial data stack pointer"); - } else { - let mangled = mangle(name); - self.label(format!(".globl {}", mangled)); - self.label(format!("{}:", mangled)); - } - self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack - self.line("sd ra, 8(sp) # store return address on stack"); // store return address on stack - }, - IR::Call(name) => { - let mangled = mangle(name); - self.label(format!("# call {}", mangled)); - self.line(format!("call {}", mangled)); - }, - IR::WordPointer(name) => { - let mangled = mangle(name); - self.label(format!("# '{} (word pointer)", mangled)); - self.line(format!("la t0, {}", mangled)); - self.push_from("t0"); - }, - IR::CallPtr => { - self.label("# callptr"); - self.pop_to("t0"); - self.line("jalr t0"); - }, - IR::Ret => { - if last_label == "main" { - self.label("# exit 0 syscall"); - self.line("li a7, 93"); - self.line("mv a0, x0"); - self.line("ecall"); - } else { - self.line("ld ra, 8(sp)"); // load return address from stack - self.line("addi sp, sp, 16"); // restore stack pointer - self.line("ret"); - } - }, - IR::Load8 => { - self.label("# load 8"); - self.copy_top_stack_value_to("t0"); - self.line("lbu t0, 0(t0)"); // deref pointer in t0 to t0 - self.copy_to_top_of_stack("t0"); - }, - IR::Load16 => { - self.label("# load 16"); - self.copy_top_stack_value_to("t0"); - self.line("lhu t0, 0(t0)"); // deref pointer in t0 to t0 - self.copy_to_top_of_stack("t0"); - }, - IR::Load32 => { - self.label("# load 32"); - self.copy_top_stack_value_to("t0"); - self.line("lwu t0, 0(t0)"); // deref pointer in t0 to t0 - self.copy_to_top_of_stack("t0"); - }, - IR::Load => { - self.label("# load 64"); - self.copy_top_stack_value_to("t0"); - self.line("ld t0, 0(t0)"); // deref pointer in t0 to t0 - self.copy_to_top_of_stack("t0"); - }, - IR::Store8 => { // ( x addr -- ) - self.pop_some_to("t0 t1"); - self.line("sb t0, 0(t1)"); // store x at addr - }, - IR::Store16 => { // ( x addr -- ) - self.pop_some_to("t0 t1"); - self.line("sh t0, 0(t1)"); // store x at addr - }, - IR::Store32 => { // ( x addr -- ) - self.pop_some_to("t0 t1"); - self.line("sw t0, 0(t1)"); // store x at addr - }, - IR::Store => { // ( x addr -- ) - self.pop_some_to("t0 t1"); - self.line("sd t0, 0(t1)"); // store x at addr - }, - IR::StackPush(num) => { - self.label(format!("# stackpush {}", num)); - self.line(format!("li t0, {}", num)); - self.push_from("t0"); - }, - IR::StackPushString(name) => { - self.label(format!("# stackpushstring {}", name)); - self.line(format!("la t0, {}", name)); - self.push_from("t0"); - }, - IR::AddU64 => { - self.label("# add"); - self.pop_call_push("t0 t1", "add t0, t0, t1", "t0"); - }, - IR::SubtractU64 => { - self.label("# sub"); - self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); - }, - IR::MultiplyU64 => { - self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0"); - }, - IR::DivideU64 => { - self.pop_call_push("t0 t1", "div t0, t0, t1", "t0"); - }, - IR::ModU64 => { - self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0"); - }, - IR::Dup => { - self.label("# dup"); - self.copy_top_stack_value_to("t0"); - self.push_from("t0"); - }, - IR::Swap => { - self.label("# swap"); - self.pop_some_to("t1 t0"); - self.push_from("t0"); - self.push_from("t1"); - }, - IR::Over => { - // TODO this is super inefficient. There's no need to pop anything. Just read - // from the second stack position and push it. - self.label("# over"); - self.pop_some_to("t0 t1"); - self.push_from("t0"); - self.push_from("t1"); - self.push_from("t0"); - }, - IR::Rot => { - self.label("# rot"); - self.pop_some_to("t0 t1 t2"); - self.push_from("t1"); - self.push_from("t2"); - self.push_from("t0"); - }, - IR::StackPointer => { - self.label("# sp"); - self.line("addi t0, s2, 0"); - self.push_from("t0"); - }, - IR::StackBottom => { - self.label("# stackbottom"); - self.line("la t0, data_stack_end"); - self.push_from("t0"); - } - IR::Drop => { - self.label("# drop"); - self.move_stack_ptr_by_cells(1); - }, - IR::Equals => { - // Yes, this is the same as subtract, since we're treating 0 as true, and - // others as false. - self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); - }, - IR::GreaterThan => { - self.label("# >"); - self.pop_some_to("t0 t1"); - self.line("sgt t0, t0, t1"); - self.line("seqz t0, t0"); // remember, 0 is true, others are false - self.push_from("t0"); - }, - IR::LessThan => { - self.label("# <"); - self.pop_some_to("t0 t1"); - self.line("slt t0, t0, t1"); - self.line("seqz t0, t0"); // remember, 0 is true, others are false - self.push_from("t0"); - }, - IR::BitwiseOr => { - self.pop_call_push("t0 t1", "or t0, t0, t1", "t0"); - }, - IR::Sys0 => { - self.pop_call_push("a7", "ecall", "a0"); - }, - IR::Sys1 => { - self.pop_call_push("a0 a7", "ecall", "a0"); - }, - IR::Sys2 => { - self.pop_call_push("a0 a1 a7", "ecall", "a0"); - }, - IR::Sys3 => { - self.pop_call_push("a0 a1 a2 a7", "ecall", "a0"); - }, - IR::Sys4 => { - self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0"); - }, - IR::Sys5 => { - self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0"); - }, - IR::Sys6 => { - self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0"); - }, - // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html - IR::If => { - self.label("# if"); - self.pop_to("t0"); - self.line(format!("bnez t0, _else_{}", if_block_count)); - if_stack.push(if_block_count); - if_block_count += 1; - }, - IR::Else => { - self.label("# else"); - let if_counter = *if_stack.last().unwrap(); - self.line(format!("j _endif_{}", if_counter)); - self.label(format!("_else_{}:", if_counter)); - seen_else.insert(if_counter); - }, - IR::EndIf => { - self.label("# endif"); - let stack = &mut if_stack; - let if_counter = *stack.last().unwrap(); - if !seen_else.contains(&if_counter) { - self.label(format!("_else_{}:", if_counter)); - } else { - self.label(format!("_endif_{}:", if_counter)); - seen_else.remove(&if_counter); - } - stack.pop(); - }, - IR::Loop => { // keep looping until is true/0 - self.label(format!("_loop_{}:", loop_count)); - self.pop_to("t0"); - self.line(format!("beqz t0, _endloop_{}", loop_count)); - loop_stack.push(loop_count); - loop_count += 1; - }, - IR::EndLoop => { - let stack = &mut loop_stack; - let loop_counter = *stack.last().unwrap(); - self.line(format!("j _loop_{}", loop_counter)); - self.label(format!("_endloop_{}:", loop_counter)); - stack.pop(); - }, - _ => bail!("not implemented yet: {:?}", ir), - } - } - - Ok(self.lines.join("\n")) - } -} -