"unsafe-libyaml",
]
+[[package]]
+name = "sorel-codegen"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "sorel-ir",
+]
+
[[package]]
name = "sorel-ir"
version = "0.1.0"
version = "0.1.0"
dependencies = [
"anyhow",
+ "sorel-codegen",
"sorel-ir",
"sorel-parser",
"sorel-tokenizer",
[workspace]
resolver = "3"
-members = ["sorel-ir","sorel-parser","sorel-tokenizer","sorelc"]
+members = ["sorel-codegen","sorel-ir","sorel-parser","sorel-tokenizer","sorelc"]
[workspace.dependencies]
sorel-ir = { path = "./sorel-ir", version = "0.1.0" }
+sorel-codegen = { path = "./sorel-codegen", version = "0.1.0" }
sorel-tokenizer = { path = "./sorel-tokenizer", version = "0.1.0" }
sorel-parser = { path = "./sorel-parser", version = "0.1.0" }
--- /dev/null
+[package]
+name = "sorel-codegen"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sorel-ir = { workspace = true }
+anyhow = "1.0.100"
--- /dev/null
+pub mod riscv64_asm;
+
--- /dev/null
+use sorel_ir::*;
+
+use anyhow::*;
+
+use std::collections::{HashMap, HashSet};
+use std::fmt::Display;
+
+pub struct CodeGen<'a> {
+ module: &'a IRObject,
+ data_stack_size: usize,
+ lines: Vec<String>,
+}
+
+
+// Some inspiration
+// ================
+//
+// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list
+// * Except using sp as a more C ABI style stack pointer, and s2 for the data stack
+//
+
+// Implementation Choices
+// ======================
+//
+// Data Stack pointer: s2
+// No return stack pointer (using C ABI, so sp, sorta)
+// Use t0, t1, t2 for temporary values in words
+// Data stack grows down
+
+
+macro_rules! asm_macro {
+ ($name:ident, $src:expr) => {
+ fn $name(&mut self) {
+ self.line($src);
+ }
+ };
+ ($name:ident, $src:expr, $arg0:ty) => {
+ fn $name(&mut self, val0: $arg0) {
+ self.line(format!($src, val0));
+ }
+ };
+ ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => {
+ fn $name(&mut self, val0: $arg0, val1: $arg1) {
+ self.line(format!($src, val0, val1));
+ }
+ };
+}
+
+fn mangle(input: &str) -> String {
+ input
+ .replace("<", "_LT_")
+ .replace(">", "_GT_")
+ .replace("-", "___")
+}
+
+impl<'a> CodeGen<'a> {
+ pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self {
+ Self {
+ module: ir_mod,
+ data_stack_size,
+ lines: vec![],
+ }
+ }
+
+ fn line<S: Display>(&mut self, line: S) {
+ self.lines.push(format!(" {}", line));
+
+ }
+
+ fn label<S: Display>(&mut self, line: S) {
+ self.lines.push(line.to_string());
+ }
+
+ asm_macro!(copy_top_stack_value_to, "ld {}, 0(s2)", &str);
+ asm_macro!(copy_offset_stack_value_to, "ld {}, {}*8(s2)", &str, isize);
+ asm_macro!(copy_to_top_of_stack, "sd {}, 0(s2)", &str);
+ asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize);
+
+ fn pop_to(&mut self, reg: &str) {
+ self.copy_top_stack_value_to(reg);
+ self.move_stack_ptr_by_cells(1);
+ }
+
+ fn pop_some_to(&mut self, regs: &str) {
+ let mut regs = regs.trim().split(" ").collect::<Vec<_>>();
+ regs.reverse();
+ let count = regs.len();
+ let mut index = 0;
+ for reg in regs {
+ self.copy_offset_stack_value_to(reg, index);
+ index += 1;
+ }
+ self.move_stack_ptr_by_cells(count as isize);
+ }
+
+ fn push_from(&mut self, reg: &str) {
+ self.move_stack_ptr_by_cells(-1);
+ self.copy_to_top_of_stack(reg);
+ }
+
+ fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) {
+ self.pop_some_to(regs);
+ self.line(call);
+ self.push_from(reg);
+ }
+
+ pub fn assembly(&mut self) -> Result<String>{
+ let mut string_table = HashMap::new();
+
+ // Static strings
+ self.label(".section .rodata\n");
+ self.label(".align 3\n");
+ for ir in &self.module.data {
+ match ir {
+ IR::StringDef(string_label, some_string) => {
+ string_table.insert(some_string.clone(), string_label);
+ self.label(format!("{}:", string_label));
+ self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz?
+ self.label("");
+ },
+ _ => bail!("Currently only string definitions are supported in the data section.")
+ }
+ }
+
+ // Data stack
+ self.label(".data\n");
+ self.label(".align 3\n");
+ self.label("data_stack:");
+ self.line(format!(".space {}", self.data_stack_size));
+ self.label(".globl data_stack_end\ndata_stack_end:\n");
+
+ // Code
+ self.label(".text\n");
+ self.label(".align 3\n");
+
+ let mut if_block_count = 0;
+ let mut if_stack = vec![];
+ let mut loop_count = 0;
+ let mut loop_stack = vec![];
+ let mut seen_else = HashSet::new();
+ let mut last_label = "";
+
+ for ir in &self.module.text {
+ match ir {
+ IR::Label(name) => {
+ last_label = name;
+ if name == "main" {
+ self.label(".globl _start"); // TODO is globl necessary?
+ self.label("_start:");
+ self.line("la s2, data_stack_end # set initial data stack pointer");
+ } else {
+ let mangled = mangle(name);
+ self.label(format!(".globl {}", mangled));
+ self.label(format!("{}:", mangled));
+ }
+ self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack
+ self.line("sd ra, 8(sp) # store return address on stack"); // store return address on stack
+ },
+ IR::Call(name) => {
+ let mangled = mangle(name);
+ self.label(format!("# call {}", mangled));
+ self.line(format!("call {}", mangled));
+ },
+ IR::WordPointer(name) => {
+ let mangled = mangle(name);
+ self.label(format!("# '{} (word pointer)", mangled));
+ self.line(format!("la t0, {}", mangled));
+ self.push_from("t0");
+ },
+ IR::CallPtr => {
+ self.label("# callptr");
+ self.pop_to("t0");
+ self.line("jalr t0");
+ },
+ IR::Ret => {
+ if last_label == "main" {
+ self.label("# exit 0 syscall");
+ self.line("li a7, 93");
+ self.line("mv a0, x0");
+ self.line("ecall");
+ } else {
+ self.line("ld ra, 8(sp)"); // load return address from stack
+ self.line("addi sp, sp, 16"); // restore stack pointer
+ self.line("ret");
+ }
+ },
+ IR::Load8 => {
+ self.label("# load 8");
+ self.copy_top_stack_value_to("t0");
+ self.line("lbu t0, 0(t0)"); // deref pointer in t0 to t0
+ self.copy_to_top_of_stack("t0");
+ },
+ IR::Load16 => {
+ self.label("# load 16");
+ self.copy_top_stack_value_to("t0");
+ self.line("lhu t0, 0(t0)"); // deref pointer in t0 to t0
+ self.copy_to_top_of_stack("t0");
+ },
+ IR::Load32 => {
+ self.label("# load 32");
+ self.copy_top_stack_value_to("t0");
+ self.line("lwu t0, 0(t0)"); // deref pointer in t0 to t0
+ self.copy_to_top_of_stack("t0");
+ },
+ IR::Load => {
+ self.label("# load 64");
+ self.copy_top_stack_value_to("t0");
+ self.line("ld t0, 0(t0)"); // deref pointer in t0 to t0
+ self.copy_to_top_of_stack("t0");
+ },
+ IR::Store8 => { // ( x addr -- )
+ self.pop_some_to("t0 t1");
+ self.line("sb t0, 0(t1)"); // store x at addr
+ },
+ IR::Store16 => { // ( x addr -- )
+ self.pop_some_to("t0 t1");
+ self.line("sh t0, 0(t1)"); // store x at addr
+ },
+ IR::Store32 => { // ( x addr -- )
+ self.pop_some_to("t0 t1");
+ self.line("sw t0, 0(t1)"); // store x at addr
+ },
+ IR::Store => { // ( x addr -- )
+ self.pop_some_to("t0 t1");
+ self.line("sd t0, 0(t1)"); // store x at addr
+ },
+ IR::StackPush(num) => {
+ self.label(format!("# stackpush {}", num));
+ self.line(format!("li t0, {}", num));
+ self.push_from("t0");
+ },
+ IR::StackPushString(name) => {
+ self.label(format!("# stackpushstring {}", name));
+ self.line(format!("la t0, {}", name));
+ self.push_from("t0");
+ },
+ IR::AddU64 => {
+ self.label("# add");
+ self.pop_call_push("t0 t1", "add t0, t0, t1", "t0");
+ },
+ IR::SubtractU64 => {
+ self.label("# sub");
+ self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0");
+ },
+ IR::MultiplyU64 => {
+ self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0");
+ },
+ IR::DivideU64 => {
+ self.pop_call_push("t0 t1", "div t0, t0, t1", "t0");
+ },
+ IR::ModU64 => {
+ self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0");
+ },
+ IR::Dup => {
+ self.label("# dup");
+ self.copy_top_stack_value_to("t0");
+ self.push_from("t0");
+ },
+ IR::Swap => {
+ self.label("# swap");
+ self.pop_some_to("t1 t0");
+ self.push_from("t0");
+ self.push_from("t1");
+ },
+ IR::Over => {
+ // TODO this is super inefficient. There's no need to pop anything. Just read
+ // from the second stack position and push it.
+ self.label("# over");
+ self.pop_some_to("t0 t1");
+ self.push_from("t0");
+ self.push_from("t1");
+ self.push_from("t0");
+ },
+ IR::Rot => {
+ self.label("# rot");
+ self.pop_some_to("t0 t1 t2");
+ self.push_from("t1");
+ self.push_from("t2");
+ self.push_from("t0");
+ },
+ IR::StackPointer => {
+ self.label("# sp");
+ self.line("addi t0, s2, 0");
+ self.push_from("t0");
+ },
+ IR::StackBottom => {
+ self.label("# stackbottom");
+ self.line("la t0, data_stack_end");
+ self.push_from("t0");
+ }
+ IR::Drop => {
+ self.label("# drop");
+ self.move_stack_ptr_by_cells(1);
+ },
+ IR::Equals => {
+ // Yes, this is the same as subtract, since we're treating 0 as true, and
+ // others as false.
+ self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0");
+ },
+ IR::GreaterThan => {
+ self.label("# >");
+ self.pop_some_to("t0 t1");
+ self.line("sgt t0, t0, t1");
+ self.line("seqz t0, t0"); // remember, 0 is true, others are false
+ self.push_from("t0");
+ },
+ IR::LessThan => {
+ self.label("# <");
+ self.pop_some_to("t0 t1");
+ self.line("slt t0, t0, t1");
+ self.line("seqz t0, t0"); // remember, 0 is true, others are false
+ self.push_from("t0");
+ },
+ IR::BitwiseOr => {
+ self.pop_call_push("t0 t1", "or t0, t0, t1", "t0");
+ },
+ IR::Sys0 => {
+ self.pop_call_push("a7", "ecall", "a0");
+ },
+ IR::Sys1 => {
+ self.pop_call_push("a0 a7", "ecall", "a0");
+ },
+ IR::Sys2 => {
+ self.pop_call_push("a0 a1 a7", "ecall", "a0");
+ },
+ IR::Sys3 => {
+ self.pop_call_push("a0 a1 a2 a7", "ecall", "a0");
+ },
+ IR::Sys4 => {
+ self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0");
+ },
+ IR::Sys5 => {
+ self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0");
+ },
+ IR::Sys6 => {
+ self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0");
+ },
+ // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html
+ IR::If => {
+ self.label("# if");
+ self.pop_to("t0");
+ self.line(format!("bnez t0, _else_{}", if_block_count));
+ if_stack.push(if_block_count);
+ if_block_count += 1;
+ },
+ IR::Else => {
+ self.label("# else");
+ let if_counter = *if_stack.last().unwrap();
+ self.line(format!("j _endif_{}", if_counter));
+ self.label(format!("_else_{}:", if_counter));
+ seen_else.insert(if_counter);
+ },
+ IR::EndIf => {
+ self.label("# endif");
+ let stack = &mut if_stack;
+ let if_counter = *stack.last().unwrap();
+ if !seen_else.contains(&if_counter) {
+ self.label(format!("_else_{}:", if_counter));
+ } else {
+ self.label(format!("_endif_{}:", if_counter));
+ seen_else.remove(&if_counter);
+ }
+ stack.pop();
+ },
+ IR::Loop => { // keep looping until is true/0
+ self.label(format!("_loop_{}:", loop_count));
+ self.pop_to("t0");
+ self.line(format!("beqz t0, _endloop_{}", loop_count));
+ loop_stack.push(loop_count);
+ loop_count += 1;
+ },
+ IR::EndLoop => {
+ let stack = &mut loop_stack;
+ let loop_counter = *stack.last().unwrap();
+ self.line(format!("j _loop_{}", loop_counter));
+ self.label(format!("_endloop_{}:", loop_counter));
+ stack.pop();
+ },
+ _ => bail!("not implemented yet: {:?}", ir),
+ }
+ }
+
+ Ok(self.lines.join("\n"))
+ }
+}
+
edition = "2024"
[dependencies]
+sorel-codegen = { workspace = true }
sorel-ir = { workspace = true }
sorel-tokenizer = { workspace = true }
sorel-parser = { workspace = true }
--- /dev/null
+use sorel_parser::Module;
+use sorel_ir::*;
+use sorel_tokenizer::tokenize;
+
+use std::collections::{HashSet, HashMap};
+use std::path::PathBuf;
+use std::rc::Rc;
+use std::cell::RefCell;
+use std::include_str;
+
+use anyhow::{Result, bail, anyhow};
+
+#[derive(Default)]
+pub(crate) struct ImportTree {
+ data: Vec<IR>,
+ text: Vec<IR>,
+ all_modules: HashMap<String, WrappedIRModule>,
+ all_exports: HashSet<String>,
+ entrypoint: WrappedIRModule,
+ module_count: usize,
+ collapse_seen: HashSet<String>,
+}
+
+fn std_import(specifier: &str) -> Result<&str> {
+ match specifier {
+ "std:mem" => Ok(include_str!("../../stdlib/mem.sorel")),
+ "std:out" => Ok(include_str!("../../stdlib/out.sorel")),
+ "std:string" => Ok(include_str!("../../stdlib/string.sorel")),
+ "std:process" => Ok(include_str!("../../stdlib/process.sorel")),
+ _ => bail!("{} is not a standard library module", specifier),
+ }
+}
+
+impl ImportTree {
+ fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result<WrappedIRModule> {
+ let (contents, module_id) = if specifier.starts_with("std:") {
+ if self.all_modules.contains_key(specifier) {
+ let module = self.all_modules.get(specifier).unwrap().clone();
+ return Ok(module);
+ }
+ let contents = std_import(specifier)?;
+ (contents.to_string(), ModuleID::StdSpecifier(specifier.to_string()))
+ } else {
+ let mut path = PathBuf::from(specifier);
+ if path.is_relative() {
+ let mut new_path = importer_dir.clone();
+ new_path.push(path);
+ path = new_path.canonicalize()?;
+ }
+ let path_key = path.to_string_lossy().to_string();
+ if self.all_modules.contains_key(&path_key) {
+ let module = self.all_modules.get(&path_key).unwrap().clone();
+ return Ok(module);
+ }
+
+ let contents = std::fs::read_to_string(&path)?;
+ (contents, ModuleID::SourceFile(path))
+ };
+ let tokens = tokenize(&contents)?;
+ let parsed = &Module::parse(tokens, is_entrypoint)?;
+ let module = self.ir_mod_from_parsed(module_id.clone(), parsed)?;
+ let module = Rc::new(RefCell::new(module));
+ self.all_modules.insert(module_id.to_string(), module.clone());
+ if is_entrypoint {
+ self.entrypoint = module.clone();
+ }
+ Ok(module)
+ }
+
+ fn ir_mod_from_parsed(&mut self, module_id: ModuleID, module: &Module) -> Result<IRModule> {
+ // Eventually these will end up being sections in assembly
+ let mut text = vec![];
+ let mut data = vec![];
+
+ let mut imports = vec![];
+
+ let parent_path = match module_id {
+ ModuleID::SourceFile(ref path) => {
+ path.parent().ok_or(anyhow!("no parent for path: {:?}", path))?.to_path_buf()
+ },
+ // A stdlib module can only import other stdlib
+ // modules, so no need for parent path.
+ ModuleID::StdSpecifier(_) => PathBuf::new(),
+ };
+ module.imports.iter().try_for_each(|imported| -> Result<()> {
+ let new_module = self.import(&parent_path, imported, false)?;
+ imports.push(new_module);
+ Ok(())
+ })?;
+
+ let exports: Vec<_> = module.exports.iter().map(|s| {
+ self.all_exports.insert(s.to_string());
+ s.to_string()
+ }).collect();
+
+ let externs = module.externs.iter().map(|s| s.to_string()).collect();
+
+ text.push(module.words.iter().flat_map(|def| {
+ let mut body = def.instructions.iter().map(|inst| {
+ IR::from_token(inst, &mut data)
+ }).collect::<Vec<_>>();
+
+ let mut result = vec![IR::Label(def.name.to_string())];
+ result.append(&mut body);
+ result.push(IR::Ret);
+ result
+ }).collect::<Vec<_>>());
+
+ let number = self.module_count;
+ self.module_count += 1;
+
+ Ok(IRModule {
+ text: text.into_iter().flatten().collect::<Vec<_>>(),
+ data,
+ imports,
+ exports,
+ externs,
+ module_id,
+ number,
+ })
+ }
+
+ fn collapse(&mut self, module: WrappedIRModule, is_entrypoint: bool) -> Result<()> {
+ let module = module.borrow_mut();
+ let seen_key = module.module_id.to_string();
+ if self.collapse_seen.contains(&seen_key) {
+ return Ok(())
+ }
+
+ for imported in module.imports.clone() {
+ self.collapse(imported, false)?;
+ }
+
+ let module_number = module.number;
+
+ for string in &module.data {
+ if let IR::StringDef(name, val) = string {
+ let new_name = format!("{}_{}", name, module_number);
+ self.data.push(IR::StringDef(new_name, val.clone()));
+ } else {
+ bail!("non-string data");
+ }
+ }
+
+ for instruction in &module.text {
+ let new_instruction = match instruction {
+ IR::StackPushString(name) => {
+ let new_name = format!("{}_{}", name, module_number);
+ IR::StackPushString(new_name)
+ },
+ IR::Label(name) => {
+ if is_entrypoint && name == "main" {
+ instruction.clone()
+ } else {
+ IR::Label(module.get_label(name))
+ }
+ },
+ IR::Call(name) => {
+ IR::Call(module.get_label_for_call(name))
+ },
+ IR::WordPointer(name) => {
+ IR::WordPointer(module.get_label_for_call(name))
+ },
+ _ => instruction.clone()
+ };
+ self.text.push(new_instruction);
+ }
+
+ self.collapse_seen.insert(seen_key);
+
+ Ok(())
+ }
+}
+
+pub fn build_and_collapse(path: &str) -> Result<IRObject> {
+ let dir = std::env::current_dir()?;
+ let mut tree: ImportTree = Default::default();
+ let module = tree.import(&dir, path, true)?;
+ tree.collapse(module, true)?;
+ // TODO remove unused words
+ Ok(IRObject {
+ data: tree.data,
+ text: tree.text,
+ })
+}
+++ /dev/null
-use sorel_parser::Module;
-use sorel_ir::*;
-use sorel_tokenizer::tokenize;
-
-use std::collections::{HashSet, HashMap};
-use std::path::PathBuf;
-use std::rc::Rc;
-use std::cell::RefCell;
-use std::include_str;
-
-use anyhow::{Result, bail, anyhow};
-
-#[derive(Default)]
-pub(crate) struct ImportTree {
- data: Vec<IR>,
- text: Vec<IR>,
- all_modules: HashMap<String, WrappedIRModule>,
- all_exports: HashSet<String>,
- entrypoint: WrappedIRModule,
- module_count: usize,
- collapse_seen: HashSet<String>,
-}
-
-fn std_import(specifier: &str) -> Result<&str> {
- match specifier {
- "std:mem" => Ok(include_str!("../../stdlib/mem.sorel")),
- "std:out" => Ok(include_str!("../../stdlib/out.sorel")),
- "std:string" => Ok(include_str!("../../stdlib/string.sorel")),
- "std:process" => Ok(include_str!("../../stdlib/process.sorel")),
- _ => bail!("{} is not a standard library module", specifier),
- }
-}
-
-impl ImportTree {
- fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result<WrappedIRModule> {
- let (contents, module_id) = if specifier.starts_with("std:") {
- if self.all_modules.contains_key(specifier) {
- let module = self.all_modules.get(specifier).unwrap().clone();
- return Ok(module);
- }
- let contents = std_import(specifier)?;
- (contents.to_string(), ModuleID::StdSpecifier(specifier.to_string()))
- } else {
- let mut path = PathBuf::from(specifier);
- if path.is_relative() {
- let mut new_path = importer_dir.clone();
- new_path.push(path);
- path = new_path.canonicalize()?;
- }
- let path_key = path.to_string_lossy().to_string();
- if self.all_modules.contains_key(&path_key) {
- let module = self.all_modules.get(&path_key).unwrap().clone();
- return Ok(module);
- }
-
- let contents = std::fs::read_to_string(&path)?;
- (contents, ModuleID::SourceFile(path))
- };
- let tokens = tokenize(&contents)?;
- let parsed = &Module::parse(tokens, is_entrypoint)?;
- let module = self.generate_internal(module_id.clone(), parsed)?;
- let module = Rc::new(RefCell::new(module));
- self.all_modules.insert(module_id.to_string(), module.clone());
- if is_entrypoint {
- self.entrypoint = module.clone();
- }
- Ok(module)
- }
-
- fn generate_internal(&mut self, module_id: ModuleID, module: &Module) -> Result<IRModule> {
- // Eventually these will end up being sections in assembly
- let mut text = vec![];
- let mut data = vec![];
-
- let mut imports = vec![];
-
- let parent_path = match module_id {
- ModuleID::SourceFile(ref path) => {
- path.parent().ok_or(anyhow!("no parent for path: {:?}", path))?.to_path_buf()
- },
- // A stdlib module can only import other stdlib
- // modules, so no need for parent path.
- ModuleID::StdSpecifier(_) => PathBuf::new(),
- };
- module.imports.iter().try_for_each(|imported| -> Result<()> {
- let new_module = self.import(&parent_path, imported, false)?;
- imports.push(new_module);
- Ok(())
- })?;
-
- let exports: Vec<_> = module.exports.iter().map(|s| {
- self.all_exports.insert(s.to_string());
- s.to_string()
- }).collect();
-
- let externs = module.externs.iter().map(|s| s.to_string()).collect();
-
- text.push(module.words.iter().flat_map(|def| {
- let mut body = def.instructions.iter().map(|inst| {
- IR::from_token(inst, &mut data)
- }).collect::<Vec<_>>();
-
- let mut result = vec![IR::Label(def.name.to_string())];
- result.append(&mut body);
- result.push(IR::Ret);
- result
- }).collect::<Vec<_>>());
-
- let number = self.module_count;
- self.module_count += 1;
-
- Ok(IRModule {
- text: text.into_iter().flatten().collect::<Vec<_>>(),
- data,
- imports,
- exports,
- externs,
- module_id,
- number,
- })
- }
-
- fn collapse(&mut self, module: WrappedIRModule, is_entrypoint: bool) -> Result<()> {
- let module = module.borrow_mut();
- let seen_key = module.module_id.to_string();
- if self.collapse_seen.contains(&seen_key) {
- return Ok(())
- }
-
- for imported in module.imports.clone() {
- self.collapse(imported, false)?;
- }
-
- let module_number = module.number;
-
- for string in &module.data {
- if let IR::StringDef(name, val) = string {
- let new_name = format!("{}_{}", name, module_number);
- self.data.push(IR::StringDef(new_name, val.clone()));
- } else {
- bail!("non-string data");
- }
- }
-
- for instruction in &module.text {
- let new_instruction = match instruction {
- IR::StackPushString(name) => {
- let new_name = format!("{}_{}", name, module_number);
- IR::StackPushString(new_name)
- },
- IR::Label(name) => {
- if is_entrypoint && name == "main" {
- instruction.clone()
- } else {
- IR::Label(module.get_label(name))
- }
- },
- IR::Call(name) => {
- IR::Call(module.get_label_for_call(name))
- },
- IR::WordPointer(name) => {
- IR::WordPointer(module.get_label_for_call(name))
- },
- _ => instruction.clone()
- };
- self.text.push(new_instruction);
- }
-
- self.collapse_seen.insert(seen_key);
-
- Ok(())
- }
-}
-
-pub fn compile(path: &str) -> Result<IRObject> {
- let dir = std::env::current_dir()?;
- let mut tree: ImportTree = Default::default();
- let module = tree.import(&dir, path, true)?;
- tree.collapse(module, true)?;
- // TODO remove unused words
- Ok(IRObject {
- data: tree.data,
- text: tree.text,
- })
-}
-mod ir;
-mod riscv_asm_codegen;
+mod import_tree;
+use sorel_codegen::riscv64_asm::*;
use anyhow::Result;
fn main() -> Result<()> {
let filename = std::env::args().nth(1).expect("must provide a file to compile");
- let module = ir::compile(&filename)?;
- let mut generator = riscv_asm_codegen::CodeGen::new(&module, 4096);
+ let module = import_tree::build_and_collapse(&filename)?;
+ let mut generator = CodeGen::new(&module, 4096);
let mut asm_path = PathBuf::from(filename);
asm_path.set_extension("asm");
let mut output = File::create(asm_path)?;
+++ /dev/null
-use sorel_ir::*;
-
-use anyhow::*;
-
-use std::collections::{HashMap, HashSet};
-use std::fmt::Display;
-
-pub struct CodeGen<'a> {
- module: &'a IRObject,
- data_stack_size: usize,
- lines: Vec<String>,
-}
-
-
-// Some inspiration
-// ================
-//
-// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list
-// * Except using sp as a more C ABI style stack pointer, and s2 for the data stack
-//
-
-// Implementation Choices
-// ======================
-//
-// Data Stack pointer: s2
-// No return stack pointer (using C ABI, so sp, sorta)
-// Use t0, t1, t2 for temporary values in words
-// Data stack grows down
-
-
-macro_rules! asm_macro {
- ($name:ident, $src:expr) => {
- fn $name(&mut self) {
- self.line($src);
- }
- };
- ($name:ident, $src:expr, $arg0:ty) => {
- fn $name(&mut self, val0: $arg0) {
- self.line(format!($src, val0));
- }
- };
- ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => {
- fn $name(&mut self, val0: $arg0, val1: $arg1) {
- self.line(format!($src, val0, val1));
- }
- };
-}
-
-fn mangle(input: &str) -> String {
- input
- .replace("<", "_LT_")
- .replace(">", "_GT_")
- .replace("-", "___")
-}
-
-impl<'a> CodeGen<'a> {
- pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self {
- Self {
- module: ir_mod,
- data_stack_size,
- lines: vec![],
- }
- }
-
- fn line<S: Display>(&mut self, line: S) {
- self.lines.push(format!(" {}", line));
-
- }
-
- fn label<S: Display>(&mut self, line: S) {
- self.lines.push(line.to_string());
- }
-
- asm_macro!(copy_top_stack_value_to, "ld {}, 0(s2)", &str);
- asm_macro!(copy_offset_stack_value_to, "ld {}, {}*8(s2)", &str, isize);
- asm_macro!(copy_to_top_of_stack, "sd {}, 0(s2)", &str);
- asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize);
-
- fn pop_to(&mut self, reg: &str) {
- self.copy_top_stack_value_to(reg);
- self.move_stack_ptr_by_cells(1);
- }
-
- fn pop_some_to(&mut self, regs: &str) {
- let mut regs = regs.trim().split(" ").collect::<Vec<_>>();
- regs.reverse();
- let count = regs.len();
- let mut index = 0;
- for reg in regs {
- self.copy_offset_stack_value_to(reg, index);
- index += 1;
- }
- self.move_stack_ptr_by_cells(count as isize);
- }
-
- fn push_from(&mut self, reg: &str) {
- self.move_stack_ptr_by_cells(-1);
- self.copy_to_top_of_stack(reg);
- }
-
- fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) {
- self.pop_some_to(regs);
- self.line(call);
- self.push_from(reg);
- }
-
- pub fn assembly(&mut self) -> Result<String>{
- let mut string_table = HashMap::new();
-
- // Static strings
- self.label(".section .rodata\n");
- self.label(".align 3\n");
- for ir in &self.module.data {
- match ir {
- IR::StringDef(string_label, some_string) => {
- string_table.insert(some_string.clone(), string_label);
- self.label(format!("{}:", string_label));
- self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz?
- self.label("");
- },
- _ => bail!("Currently only string definitions are supported in the data section.")
- }
- }
-
- // Data stack
- self.label(".data\n");
- self.label(".align 3\n");
- self.label("data_stack:");
- self.line(format!(".space {}", self.data_stack_size));
- self.label(".globl data_stack_end\ndata_stack_end:\n");
-
- // Code
- self.label(".text\n");
- self.label(".align 3\n");
-
- let mut if_block_count = 0;
- let mut if_stack = vec![];
- let mut loop_count = 0;
- let mut loop_stack = vec![];
- let mut seen_else = HashSet::new();
- let mut last_label = "";
-
- for ir in &self.module.text {
- match ir {
- IR::Label(name) => {
- last_label = name;
- if name == "main" {
- self.label(".globl _start"); // TODO is globl necessary?
- self.label("_start:");
- self.line("la s2, data_stack_end # set initial data stack pointer");
- } else {
- let mangled = mangle(name);
- self.label(format!(".globl {}", mangled));
- self.label(format!("{}:", mangled));
- }
- self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack
- self.line("sd ra, 8(sp) # store return address on stack"); // store return address on stack
- },
- IR::Call(name) => {
- let mangled = mangle(name);
- self.label(format!("# call {}", mangled));
- self.line(format!("call {}", mangled));
- },
- IR::WordPointer(name) => {
- let mangled = mangle(name);
- self.label(format!("# '{} (word pointer)", mangled));
- self.line(format!("la t0, {}", mangled));
- self.push_from("t0");
- },
- IR::CallPtr => {
- self.label("# callptr");
- self.pop_to("t0");
- self.line("jalr t0");
- },
- IR::Ret => {
- if last_label == "main" {
- self.label("# exit 0 syscall");
- self.line("li a7, 93");
- self.line("mv a0, x0");
- self.line("ecall");
- } else {
- self.line("ld ra, 8(sp)"); // load return address from stack
- self.line("addi sp, sp, 16"); // restore stack pointer
- self.line("ret");
- }
- },
- IR::Load8 => {
- self.label("# load 8");
- self.copy_top_stack_value_to("t0");
- self.line("lbu t0, 0(t0)"); // deref pointer in t0 to t0
- self.copy_to_top_of_stack("t0");
- },
- IR::Load16 => {
- self.label("# load 16");
- self.copy_top_stack_value_to("t0");
- self.line("lhu t0, 0(t0)"); // deref pointer in t0 to t0
- self.copy_to_top_of_stack("t0");
- },
- IR::Load32 => {
- self.label("# load 32");
- self.copy_top_stack_value_to("t0");
- self.line("lwu t0, 0(t0)"); // deref pointer in t0 to t0
- self.copy_to_top_of_stack("t0");
- },
- IR::Load => {
- self.label("# load 64");
- self.copy_top_stack_value_to("t0");
- self.line("ld t0, 0(t0)"); // deref pointer in t0 to t0
- self.copy_to_top_of_stack("t0");
- },
- IR::Store8 => { // ( x addr -- )
- self.pop_some_to("t0 t1");
- self.line("sb t0, 0(t1)"); // store x at addr
- },
- IR::Store16 => { // ( x addr -- )
- self.pop_some_to("t0 t1");
- self.line("sh t0, 0(t1)"); // store x at addr
- },
- IR::Store32 => { // ( x addr -- )
- self.pop_some_to("t0 t1");
- self.line("sw t0, 0(t1)"); // store x at addr
- },
- IR::Store => { // ( x addr -- )
- self.pop_some_to("t0 t1");
- self.line("sd t0, 0(t1)"); // store x at addr
- },
- IR::StackPush(num) => {
- self.label(format!("# stackpush {}", num));
- self.line(format!("li t0, {}", num));
- self.push_from("t0");
- },
- IR::StackPushString(name) => {
- self.label(format!("# stackpushstring {}", name));
- self.line(format!("la t0, {}", name));
- self.push_from("t0");
- },
- IR::AddU64 => {
- self.label("# add");
- self.pop_call_push("t0 t1", "add t0, t0, t1", "t0");
- },
- IR::SubtractU64 => {
- self.label("# sub");
- self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0");
- },
- IR::MultiplyU64 => {
- self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0");
- },
- IR::DivideU64 => {
- self.pop_call_push("t0 t1", "div t0, t0, t1", "t0");
- },
- IR::ModU64 => {
- self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0");
- },
- IR::Dup => {
- self.label("# dup");
- self.copy_top_stack_value_to("t0");
- self.push_from("t0");
- },
- IR::Swap => {
- self.label("# swap");
- self.pop_some_to("t1 t0");
- self.push_from("t0");
- self.push_from("t1");
- },
- IR::Over => {
- // TODO this is super inefficient. There's no need to pop anything. Just read
- // from the second stack position and push it.
- self.label("# over");
- self.pop_some_to("t0 t1");
- self.push_from("t0");
- self.push_from("t1");
- self.push_from("t0");
- },
- IR::Rot => {
- self.label("# rot");
- self.pop_some_to("t0 t1 t2");
- self.push_from("t1");
- self.push_from("t2");
- self.push_from("t0");
- },
- IR::StackPointer => {
- self.label("# sp");
- self.line("addi t0, s2, 0");
- self.push_from("t0");
- },
- IR::StackBottom => {
- self.label("# stackbottom");
- self.line("la t0, data_stack_end");
- self.push_from("t0");
- }
- IR::Drop => {
- self.label("# drop");
- self.move_stack_ptr_by_cells(1);
- },
- IR::Equals => {
- // Yes, this is the same as subtract, since we're treating 0 as true, and
- // others as false.
- self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0");
- },
- IR::GreaterThan => {
- self.label("# >");
- self.pop_some_to("t0 t1");
- self.line("sgt t0, t0, t1");
- self.line("seqz t0, t0"); // remember, 0 is true, others are false
- self.push_from("t0");
- },
- IR::LessThan => {
- self.label("# <");
- self.pop_some_to("t0 t1");
- self.line("slt t0, t0, t1");
- self.line("seqz t0, t0"); // remember, 0 is true, others are false
- self.push_from("t0");
- },
- IR::BitwiseOr => {
- self.pop_call_push("t0 t1", "or t0, t0, t1", "t0");
- },
- IR::Sys0 => {
- self.pop_call_push("a7", "ecall", "a0");
- },
- IR::Sys1 => {
- self.pop_call_push("a0 a7", "ecall", "a0");
- },
- IR::Sys2 => {
- self.pop_call_push("a0 a1 a7", "ecall", "a0");
- },
- IR::Sys3 => {
- self.pop_call_push("a0 a1 a2 a7", "ecall", "a0");
- },
- IR::Sys4 => {
- self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0");
- },
- IR::Sys5 => {
- self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0");
- },
- IR::Sys6 => {
- self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0");
- },
- // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html
- IR::If => {
- self.label("# if");
- self.pop_to("t0");
- self.line(format!("bnez t0, _else_{}", if_block_count));
- if_stack.push(if_block_count);
- if_block_count += 1;
- },
- IR::Else => {
- self.label("# else");
- let if_counter = *if_stack.last().unwrap();
- self.line(format!("j _endif_{}", if_counter));
- self.label(format!("_else_{}:", if_counter));
- seen_else.insert(if_counter);
- },
- IR::EndIf => {
- self.label("# endif");
- let stack = &mut if_stack;
- let if_counter = *stack.last().unwrap();
- if !seen_else.contains(&if_counter) {
- self.label(format!("_else_{}:", if_counter));
- } else {
- self.label(format!("_endif_{}:", if_counter));
- seen_else.remove(&if_counter);
- }
- stack.pop();
- },
- IR::Loop => { // keep looping until is true/0
- self.label(format!("_loop_{}:", loop_count));
- self.pop_to("t0");
- self.line(format!("beqz t0, _endloop_{}", loop_count));
- loop_stack.push(loop_count);
- loop_count += 1;
- },
- IR::EndLoop => {
- let stack = &mut loop_stack;
- let loop_counter = *stack.last().unwrap();
- self.line(format!("j _loop_{}", loop_counter));
- self.label(format!("_endloop_{}:", loop_counter));
- stack.pop();
- },
- _ => bail!("not implemented yet: {:?}", ir),
- }
- }
-
- Ok(self.lines.join("\n"))
- }
-}
-