From 00fb519b66f9b3746694e8d9f75d16b722822e86 Mon Sep 17 00:00:00 2001 From: Bryan English Date: Sat, 14 Feb 2026 22:01:38 -0500 Subject: [PATCH] refactor, reorganize, clippy --- Cargo.lock | 18 +++ Cargo.toml | 5 +- sorel-ir/Cargo.toml | 1 + sorel-ir/src/ir.rs | 134 ++++++++++++++++ sorel-ir/src/lib.rs | 78 +--------- sorel-ir/src/module.rs | 66 ++++++++ sorel-ir/src/object.rs | 20 +++ sorel-parser/Cargo.toml | 8 + .../src/parser.rs => sorel-parser/src/lib.rs | 18 +-- sorel-tokenizer/Cargo.toml | 7 + .../src/lib.rs | 12 +- sorelc/Cargo.toml | 2 + sorelc/src/ir.rs | 144 +----------------- sorelc/src/main.rs | 4 +- sorelc/src/riscv_asm_codegen.rs | 6 +- 15 files changed, 287 insertions(+), 236 deletions(-) create mode 100644 sorel-ir/src/ir.rs create mode 100644 sorel-ir/src/module.rs create mode 100644 sorel-ir/src/object.rs create mode 100644 sorel-parser/Cargo.toml rename sorelc/src/parser.rs => sorel-parser/src/lib.rs (89%) create mode 100644 sorel-tokenizer/Cargo.toml rename sorelc/src/tokenizer.rs => sorel-tokenizer/src/lib.rs (94%) diff --git a/Cargo.lock b/Cargo.lock index 47524a9..017d419 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -109,6 +109,22 @@ dependencies = [ "serde", "serde_derive", "serde_yaml", + "sorel-tokenizer", +] + +[[package]] +name = "sorel-parser" +version = "0.1.0" +dependencies = [ + "anyhow", + "sorel-tokenizer", +] + +[[package]] +name = "sorel-tokenizer" +version = "0.1.0" +dependencies = [ + "anyhow", ] [[package]] @@ -117,6 +133,8 @@ version = "0.1.0" dependencies = [ "anyhow", "sorel-ir", + "sorel-parser", + "sorel-tokenizer", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index fae2a78..8e7cea9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,9 @@ [workspace] resolver = "3" -members = ["sorel-ir","sorelc"] - +members = ["sorel-ir","sorel-parser","sorel-tokenizer","sorelc"] [workspace.dependencies] sorel-ir = { path = "./sorel-ir", version = "0.1.0" } +sorel-tokenizer = { path = "./sorel-tokenizer", version = "0.1.0" } +sorel-parser = { path = "./sorel-parser", version = "0.1.0" } diff --git a/sorel-ir/Cargo.toml b/sorel-ir/Cargo.toml index 5618a61..27d199a 100644 --- a/sorel-ir/Cargo.toml +++ b/sorel-ir/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] +sorel-tokenizer = { workspace = true } serde = "1.0.228" serde_derive = "1.0.228" serde_yaml = "0.9.34" diff --git a/sorel-ir/src/ir.rs b/sorel-ir/src/ir.rs new file mode 100644 index 0000000..0e31aa0 --- /dev/null +++ b/sorel-ir/src/ir.rs @@ -0,0 +1,134 @@ +use serde_derive::{Serialize, Deserialize}; +use sorel_tokenizer::Token; + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub enum IR { + Label(String), + Call(String), + WordPointer(String), + CallPtr, + Ret, + StackPush(u64), + StackPushString(String), // refers to string label, not the string itself + StringDef(String, String), // first is string label, second is string value + + // These next ones should always be inlined, so they're in IR. + Load, // @ ( addr -- x ) -- Fetch memory contents at addr + Load8, + Load16, + Load32, + Store, // ! ( x addr -- ) -- Store x at addr + Store8, + Store16, + Store32, + + // These ones might not be inlined, but should be built-in, so a compiler might + // turn this into `Call(String)` before translating to assembly/machine-code, but + // an IR interpreter may just execute them. + AddU64, + SubtractU64, + MultiplyU64, + DivideU64, + ModU64, + Equals, + GreaterThan, + LessThan, + BitwiseOr, + Dup, + Swap, + Drop, + Over, + Rot, + StackPointer, + StackBottom, + If, + Else, + EndIf, + Loop, + EndLoop, + + // System calls + Sys0, + Sys1, + Sys2, + Sys3, + Sys4, + Sys5, + Sys6, +} + +macro_rules! push_num { + ($num:ident) => { IR::StackPush(*$num as u64) }; + ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) }; +} + +impl IR { + pub fn from_token(token: &Token, data: &mut Vec) -> IR { + match token { + Token::Word(word) => { + match *word { + "@" => IR::Load, + "@:8" => IR::Load8, + "@:16" => IR::Load16, + "@:32" => IR::Load32, + "!" => IR::Store, + "!:8" => IR::Store8, + "!:16" => IR::Store16, + "!:32" => IR::Store32, + "dup" => IR::Dup, + "swap" => IR::Swap, + "drop" => IR::Drop, + "over" => IR::Over, + "rot" => IR::Rot, + "sp" => IR::StackPointer, + "stackbottom" => IR::StackBottom, + "if" => IR::If, + "else" => IR::Else, + "endif" => IR::EndIf, + "loop" => IR::Loop, + "endloop" => IR::EndLoop, + "call" => IR::CallPtr, + "=" => IR::Equals, + ">" => IR::GreaterThan, + "<" => IR::LessThan, + "+" => IR::AddU64, + "-" => IR::SubtractU64, + "*" => IR::MultiplyU64, + "/" => IR::DivideU64, + "%" => IR::ModU64, + "|" => IR::BitwiseOr, + "sys0" => IR::Sys0, + "sys1" => IR::Sys1, + "sys2" => IR::Sys2, + "sys3" => IR::Sys3, + "sys4" => IR::Sys4, + "sys5" => IR::Sys5, + "sys6" => IR::Sys6, + // TODO num type specfic math like `+:i32`, etc. + _ => { + if let Some(actual_word) = word.strip_prefix("'") { + IR::WordPointer(String::from(actual_word)) + } else { + IR::Call(String::from(*word)) + } + } + } + }, + Token::String(text) => { + let string_label = format!("string_{}", data.len()); + data.push(IR::StringDef(string_label.clone(), String::from(*text))); + IR::StackPushString(string_label) + }, + Token::NumU8(num) => push_num!(num), + Token::NumI8(num) => push_num!(num, u8), + Token::NumU16(num) => push_num!(num), + Token::NumI16(num) => push_num!(num, u16), + Token::NumU32(num) => push_num!(num), + Token::NumI32(num) => push_num!(num, u32), + Token::NumU64(num) => push_num!(num), + Token::NumI64(num) => push_num!(num), + Token::NumF32(num) => push_num!(num), + Token::NumF64(num) => push_num!(num), + } + } +} diff --git a/sorel-ir/src/lib.rs b/sorel-ir/src/lib.rs index 35970e9..f27c139 100644 --- a/sorel-ir/src/lib.rs +++ b/sorel-ir/src/lib.rs @@ -1,75 +1,9 @@ -use serde_yaml::{from_str, to_string, Error}; -use serde_derive::{Serialize, Deserialize}; +mod ir; +pub use ir::*; -#[derive(Serialize, Deserialize, Debug, Clone)] -pub enum IR { - Label(String), - Call(String), - WordPointer(String), - CallPtr, - Ret, - StackPush(u64), - StackPushString(String), // refers to string label, not the string itself - StringDef(String, String), // first is string label, second is string value +mod object; +pub use object::*; - // These next ones should always be inlined, so they're in IR. - Load, // @ ( addr -- x ) -- Fetch memory contents at addr - Load8, - Load16, - Load32, - Store, // ! ( x addr -- ) -- Store x at addr - Store8, - Store16, - Store32, +mod module; +pub use module::*; - // These ones might not be inlined, but should be built-in, so a compiler might - // turn this into `Call(String)` before translating to assembly/machine-code, but - // an IR interpreter may just execute them. - AddU64, - SubtractU64, - MultiplyU64, - DivideU64, - ModU64, - Equals, - GreaterThan, - LessThan, - BitwiseOr, - Dup, - Swap, - Drop, - Over, - Rot, - StackPointer, - StackBottom, - If, - Else, - EndIf, - Loop, - EndLoop, - - // System calls - Sys0, - Sys1, - Sys2, - Sys3, - Sys4, - Sys5, - Sys6, -} - -// This is like an .o file. -#[derive(Serialize, Deserialize, Debug)] -pub struct IRObject { - pub text: Vec, - pub data: Vec, -} - -impl IRObject { - pub fn to_s(&self) -> Result { - to_string(self) - } - - pub fn from_s(source: &str) -> Result { - from_str(source) - } -} diff --git a/sorel-ir/src/module.rs b/sorel-ir/src/module.rs new file mode 100644 index 0000000..f559177 --- /dev/null +++ b/sorel-ir/src/module.rs @@ -0,0 +1,66 @@ +use std::rc::Rc; +use std::cell::RefCell; +use std::path::PathBuf; + +use crate::ir::IR; + +pub type WrappedIRModule = Rc>; + +#[derive(Debug, PartialEq, Clone)] +pub enum ModuleID { + SourceFile(PathBuf), + StdSpecifier(String) +} + +impl Default for ModuleID { + fn default() -> Self { + ModuleID::StdSpecifier(String::from("")) + } +} + +impl ModuleID { + pub fn to_string(&self) -> String { + match self { + ModuleID::SourceFile(f) => f.to_string_lossy().to_string(), + ModuleID::StdSpecifier(s) => s.clone() + } + } +} + +#[derive(Debug, Default)] +pub struct IRModule { + pub data: Vec, + pub text: Vec, + pub imports: Vec, + pub exports: Vec, + pub externs: Vec, + // TODO these next two should form an enum, not two options + pub module_id: ModuleID, + pub number: usize, +} + +impl IRModule { + pub fn get_label_for_call(&self, name: &String) -> String { + if self.externs.contains(name) { + return name.clone(); + } + let mut found: Option = None; + for imported in &self.imports { + let imported = imported.borrow(); + if imported.exports.contains(name) { + found = Some(imported.number); + // Don't break here, since the last one should win. + } + } + if let Some(found) = found { + format!("_m{}_{}", found, name) + } else { + // TODO check if it's even a word locally. If not, bail. + format!("_m{}_{}", self.number, name) + } + } + + pub fn get_label(&self, name: &String) -> String { + format!("_m{}_{}", self.number, name) + } +} diff --git a/sorel-ir/src/object.rs b/sorel-ir/src/object.rs new file mode 100644 index 0000000..87f6468 --- /dev/null +++ b/sorel-ir/src/object.rs @@ -0,0 +1,20 @@ +use serde_yaml::{from_str, to_string, Error}; +use serde_derive::{Serialize, Deserialize}; +use crate::ir::IR; + +// This is like an .o file. +#[derive(Serialize, Deserialize, Debug)] +pub struct IRObject { + pub text: Vec, + pub data: Vec, +} + +impl IRObject { + pub fn to_s(&self) -> Result { + to_string(self) + } + + pub fn from_s(source: &str) -> Result { + from_str(source) + } +} diff --git a/sorel-parser/Cargo.toml b/sorel-parser/Cargo.toml new file mode 100644 index 0000000..f5e1f5a --- /dev/null +++ b/sorel-parser/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "sorel-parser" +version = "0.1.0" +edition = "2024" + +[dependencies] +sorel-tokenizer = { workspace = true } +anyhow = "1.0.100" diff --git a/sorelc/src/parser.rs b/sorel-parser/src/lib.rs similarity index 89% rename from sorelc/src/parser.rs rename to sorel-parser/src/lib.rs index 94b53e4..dd94d97 100644 --- a/sorelc/src/parser.rs +++ b/sorel-parser/src/lib.rs @@ -1,4 +1,4 @@ -use crate::tokenizer::Token; +use sorel_tokenizer::Token; use anyhow::{Result, bail}; #[derive(Debug)] @@ -69,16 +69,14 @@ impl<'a> Module<'a> { last_was_export = true; } else if word == "extern" { last_was_extern = true; + } else if last_was_export { + exports.push(word); + last_was_export = false; + } else if last_was_extern { + externs.push(word); + last_was_extern = false; } else { - if last_was_export { - exports.push(word); - last_was_export = false; - } else if last_was_extern { - externs.push(word); - last_was_extern = false; - } else { - main.push(token.clone()); - } + main.push(token.clone()); } }, Token::String(string) => { diff --git a/sorel-tokenizer/Cargo.toml b/sorel-tokenizer/Cargo.toml new file mode 100644 index 0000000..e210133 --- /dev/null +++ b/sorel-tokenizer/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "sorel-tokenizer" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0.100" diff --git a/sorelc/src/tokenizer.rs b/sorel-tokenizer/src/lib.rs similarity index 94% rename from sorelc/src/tokenizer.rs rename to sorel-tokenizer/src/lib.rs index d546c6b..06dac68 100644 --- a/sorelc/src/tokenizer.rs +++ b/sorel-tokenizer/src/lib.rs @@ -42,14 +42,12 @@ impl<'a> Token<'a>{ "f64" => Ok(Token::NumF64(num.parse()?)), _ => panic!("unknown number type") } + } else if input.contains('.') { + Ok(Token::NumF64(input.parse()?)) + } else if input.starts_with('-') { + Ok(Token::NumI64(input.parse()?)) } else { - if input.contains('.') { - Ok(Token::NumF64(input.parse()?)) - } else if input.starts_with('-') { - Ok(Token::NumI64(input.parse()?)) - } else { - Ok(Token::NumU64(input.parse()?)) - } + Ok(Token::NumU64(input.parse()?)) } } else { Ok(Token::Word(input)) diff --git a/sorelc/Cargo.toml b/sorelc/Cargo.toml index 7859ecf..0129099 100644 --- a/sorelc/Cargo.toml +++ b/sorelc/Cargo.toml @@ -5,4 +5,6 @@ edition = "2024" [dependencies] sorel-ir = { workspace = true } +sorel-tokenizer = { workspace = true } +sorel-parser = { workspace = true } anyhow = "1.0.100" diff --git a/sorelc/src/ir.rs b/sorelc/src/ir.rs index 88c2f47..8176da5 100644 --- a/sorelc/src/ir.rs +++ b/sorelc/src/ir.rs @@ -1,7 +1,6 @@ -use crate::parser::Module; -use crate::tokenizer::{Token, tokenize}; - +use sorel_parser::Module; use sorel_ir::*; +use sorel_tokenizer::tokenize; use std::collections::{HashSet, HashMap}; use std::path::PathBuf; @@ -11,72 +10,6 @@ use std::include_str; use anyhow::{Result, bail, anyhow}; -macro_rules! push_num { - ($num:ident) => { IR::StackPush(*$num as u64) }; - ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) }; -} - -type WrappedIRModule = Rc>; - -#[derive(Debug, PartialEq, Clone)] -enum ModuleID { - SourceFile(PathBuf), - StdSpecifier(String) -} - -impl Default for ModuleID { - fn default() -> Self { - ModuleID::StdSpecifier(String::from("")) - } -} - -impl ToString for ModuleID { - fn to_string(&self) -> String { - match self { - ModuleID::SourceFile(f) => f.to_string_lossy().to_string(), - ModuleID::StdSpecifier(s) => s.clone() - } - } -} - -#[derive(Debug, Default)] -struct IRModule { - data: Vec, - text: Vec, - imports: Vec, - exports: Vec, - externs: Vec, - // TODO these next two should form an enum, not two options - module_id: ModuleID, - number: usize, -} - -impl IRModule { - fn get_label_for_call(&self, name: &String) -> String { - if self.externs.contains(name) { - return name.clone(); - } - let mut found: Option = None; - for imported in &self.imports { - let imported = imported.borrow(); - if imported.exports.contains(name) { - found = Some(imported.number); - // Don't break here, since the last one should win. - } - } - if let Some(found) = found { - format!("_m{}_{}", found, name) - } else { - // TODO check if it's even a word locally. If not, bail. - format!("_m{}_{}", self.number, name) - } - } - - fn get_label(&self, name: &String) -> String { - format!("_m{}_{}", self.number, name) - } -} - #[derive(Default)] pub(crate) struct ImportTree { data: Vec, @@ -162,83 +95,16 @@ impl ImportTree { let externs = module.externs.iter().map(|s| s.to_string()).collect(); - text.push(module.words.iter().map(|def| { + text.push(module.words.iter().flat_map(|def| { let mut body = def.instructions.iter().map(|inst| { - let mapped_ir = match inst { - Token::Word(word) => { - match *word { - "@" => IR::Load, - "@:8" => IR::Load8, - "@:16" => IR::Load16, - "@:32" => IR::Load32, - "!" => IR::Store, - "!:8" => IR::Store8, - "!:16" => IR::Store16, - "!:32" => IR::Store32, - "dup" => IR::Dup, - "swap" => IR::Swap, - "drop" => IR::Drop, - "over" => IR::Over, - "rot" => IR::Rot, - "sp" => IR::StackPointer, - "stackbottom" => IR::StackBottom, - "if" => IR::If, - "else" => IR::Else, - "endif" => IR::EndIf, - "loop" => IR::Loop, - "endloop" => IR::EndLoop, - "call" => IR::CallPtr, - "=" => IR::Equals, - ">" => IR::GreaterThan, - "<" => IR::LessThan, - "+" => IR::AddU64, - "-" => IR::SubtractU64, - "*" => IR::MultiplyU64, - "/" => IR::DivideU64, - "%" => IR::ModU64, - "|" => IR::BitwiseOr, - "sys0" => IR::Sys0, - "sys1" => IR::Sys1, - "sys2" => IR::Sys2, - "sys3" => IR::Sys3, - "sys4" => IR::Sys4, - "sys5" => IR::Sys5, - "sys6" => IR::Sys6, - // TODO num type specfic math like `+:i32`, etc. - _ => { - if word.starts_with("'") { - let actual_word = &word[1..]; - IR::WordPointer(String::from(actual_word)) - } else { - IR::Call(String::from(*word)) - } - } - } - }, - Token::String(text) => { - let string_label = format!("string_{}", data.len()); - data.push(IR::StringDef(string_label.clone(), String::from(*text))); - IR::StackPushString(string_label) - }, - Token::NumU8(num) => push_num!(num), - Token::NumI8(num) => push_num!(num, u8), - Token::NumU16(num) => push_num!(num), - Token::NumI16(num) => push_num!(num, u16), - Token::NumU32(num) => push_num!(num), - Token::NumI32(num) => push_num!(num, u32), - Token::NumU64(num) => push_num!(num), - Token::NumI64(num) => push_num!(num), - Token::NumF32(num) => push_num!(num), - Token::NumF64(num) => push_num!(num), - }; - mapped_ir + IR::from_token(inst, &mut data) }).collect::>(); let mut result = vec![IR::Label(def.name.to_string())]; result.append(&mut body); result.push(IR::Ret); result - }).flatten().collect::>()); + }).collect::>()); let number = self.module_count; self.module_count += 1; diff --git a/sorelc/src/main.rs b/sorelc/src/main.rs index 724df84..141e322 100644 --- a/sorelc/src/main.rs +++ b/sorelc/src/main.rs @@ -1,5 +1,3 @@ -mod tokenizer; -mod parser; mod ir; mod riscv_asm_codegen; @@ -16,6 +14,6 @@ fn main() -> Result<()> { let mut asm_path = PathBuf::from(filename); asm_path.set_extension("asm"); let mut output = File::create(asm_path)?; - write!(output, "{}\n", generator.assembly()?)?; + writeln!(output, "{}", generator.assembly()?)?; Ok(()) } diff --git a/sorelc/src/riscv_asm_codegen.rs b/sorelc/src/riscv_asm_codegen.rs index e9b0e86..de72435 100644 --- a/sorelc/src/riscv_asm_codegen.rs +++ b/sorelc/src/riscv_asm_codegen.rs @@ -345,7 +345,7 @@ impl<'a> CodeGen<'a> { }, IR::Else => { self.label("# else"); - let if_counter = if_stack.last().unwrap().clone(); + let if_counter = *if_stack.last().unwrap(); self.line(format!("j _endif_{}", if_counter)); self.label(format!("_else_{}:", if_counter)); seen_else.insert(if_counter); @@ -353,7 +353,7 @@ impl<'a> CodeGen<'a> { IR::EndIf => { self.label("# endif"); let stack = &mut if_stack; - let if_counter = stack.last().unwrap().clone(); + let if_counter = *stack.last().unwrap(); if !seen_else.contains(&if_counter) { self.label(format!("_else_{}:", if_counter)); } else { @@ -371,7 +371,7 @@ impl<'a> CodeGen<'a> { }, IR::EndLoop => { let stack = &mut loop_stack; - let loop_counter = stack.last().unwrap().clone(); + let loop_counter = *stack.last().unwrap(); self.line(format!("j _loop_{}", loop_counter)); self.label(format!("_endloop_{}:", loop_counter)); stack.pop(); -- 2.43.0