From: Bryan English Date: Thu, 5 Feb 2026 04:50:47 +0000 (-0500) Subject: rename to sorel X-Git-Url: https://rethought.computer/gitweb//gitweb//git?a=commitdiff_plain;h=beb97c20dd808849c5ffee059f54911c93576b1b;p=sorel-lang.git rename to sorel --- diff --git a/rel-lang/Cargo.lock b/rel-lang/Cargo.lock index b6c79c0..4280816 100644 --- a/rel-lang/Cargo.lock +++ b/rel-lang/Cargo.lock @@ -54,33 +54,6 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "rel-interpret" -version = "0.1.0" -dependencies = [ - "anyhow", - "rel-ir", - "syscalls", -] - -[[package]] -name = "rel-ir" -version = "0.1.0" -dependencies = [ - "serde", - "serde_derive", - "serde_yaml", -] - -[[package]] -name = "relc" -version = "0.1.0" -dependencies = [ - "anyhow", - "rel-interpret", - "rel-ir", -] - [[package]] name = "ryu" version = "1.0.20" @@ -141,6 +114,33 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sorel-interpret" +version = "0.1.0" +dependencies = [ + "anyhow", + "sorel-ir", + "syscalls", +] + +[[package]] +name = "sorel-ir" +version = "0.1.0" +dependencies = [ + "serde", + "serde_derive", + "serde_yaml", +] + +[[package]] +name = "sorelc" +version = "0.1.0" +dependencies = [ + "anyhow", + "sorel-interpret", + "sorel-ir", +] + [[package]] name = "syn" version = "2.0.111" diff --git a/rel-lang/Cargo.toml b/rel-lang/Cargo.toml index fcafffc..19da6a8 100644 --- a/rel-lang/Cargo.toml +++ b/rel-lang/Cargo.toml @@ -1,9 +1,9 @@ [workspace] resolver = "3" -members = ["rel-ir","relc", "rel-interpret"] +members = ["sorel-ir","sorelc", "sorel-interpret"] [workspace.dependencies] -rel-ir = { path = "./rel-ir", version = "0.1.0" } -rel-interpret = { path = "./rel-interpret", version = "0.1.0" } +sorel-ir = { path = "./sorel-ir", version = "0.1.0" } +sorel-interpret = { path = "./sorel-interpret", version = "0.1.0" } diff --git a/rel-lang/README.md b/rel-lang/README.md index e703eb1..40b5544 100644 --- a/rel-lang/README.md +++ b/rel-lang/README.md @@ -1,13 +1,13 @@ -# rel +# sorel -The name means "Rethought Language". +The name means "Stack-Oriented Rethought Language". ## TODO * [x] Imports * [x] Syscalls -* [ ] Loops +* [x] Loops * [ ] Structs * [ ] many, many more things diff --git a/rel-lang/examples/alloc.rel b/rel-lang/examples/alloc.rel deleted file mode 100644 index ab014eb..0000000 --- a/rel-lang/examples/alloc.rel +++ /dev/null @@ -1,18 +0,0 @@ -\ vim: filetype=forth - -: mmap 9 sys6 ; - -: PROT_READ 1 ; -: PROT_WRITE 2 ; -: MAP_PRIVATE 2 ; -: MAP_ANONYMOUS 32 ; - -: ALLOC_PROT PROT_READ PROT_WRITE | ; -: ALLOC_MAP MAP_PRIVATE MAP_ANONYMOUS | ; - -: alloc 0 swap ALLOC_PROT ALLOC_MAP -1:i16 0 mmap ; - -1024 alloc -putn -swap -putn diff --git a/rel-lang/examples/alloc.sorel b/rel-lang/examples/alloc.sorel new file mode 100644 index 0000000..ab014eb --- /dev/null +++ b/rel-lang/examples/alloc.sorel @@ -0,0 +1,18 @@ +\ vim: filetype=forth + +: mmap 9 sys6 ; + +: PROT_READ 1 ; +: PROT_WRITE 2 ; +: MAP_PRIVATE 2 ; +: MAP_ANONYMOUS 32 ; + +: ALLOC_PROT PROT_READ PROT_WRITE | ; +: ALLOC_MAP MAP_PRIVATE MAP_ANONYMOUS | ; + +: alloc 0 swap ALLOC_PROT ALLOC_MAP -1:i16 0 mmap ; + +1024 alloc +putn +swap +putn diff --git a/rel-lang/examples/fib.rel b/rel-lang/examples/fib.rel deleted file mode 100644 index ef22ee9..0000000 --- a/rel-lang/examples/fib.rel +++ /dev/null @@ -1,25 +0,0 @@ -\ vim: filetype=forth - -import "./put2.rel" - -: fib - dup 1 > if - dup 1 - fib - swap 2 - fib - + - endif -; - -0 fib putn -1 fib putn -2 fib putn -3 fib putn -4 fib putn -5 fib putn -6 fib putn -7 fib putn -8 fib putn -9 fib putn -10 fib putn - -5 fib 6 fib put2 diff --git a/rel-lang/examples/fib.sorel b/rel-lang/examples/fib.sorel new file mode 100644 index 0000000..ef22ee9 --- /dev/null +++ b/rel-lang/examples/fib.sorel @@ -0,0 +1,25 @@ +\ vim: filetype=forth + +import "./put2.rel" + +: fib + dup 1 > if + dup 1 - fib + swap 2 - fib + + + endif +; + +0 fib putn +1 fib putn +2 fib putn +3 fib putn +4 fib putn +5 fib putn +6 fib putn +7 fib putn +8 fib putn +9 fib putn +10 fib putn + +5 fib 6 fib put2 diff --git a/rel-lang/examples/put2.rel b/rel-lang/examples/put2.rel deleted file mode 100644 index 4f756c2..0000000 --- a/rel-lang/examples/put2.rel +++ /dev/null @@ -1,7 +0,0 @@ -\ vim: filetype=forth - -: put2 putn putn ; - -: foobar dup dup ; - -export put2 diff --git a/rel-lang/examples/put2.sorel b/rel-lang/examples/put2.sorel new file mode 100644 index 0000000..4f756c2 --- /dev/null +++ b/rel-lang/examples/put2.sorel @@ -0,0 +1,7 @@ +\ vim: filetype=forth + +: put2 putn putn ; + +: foobar dup dup ; + +export put2 diff --git a/rel-lang/examples/syscalls.rel b/rel-lang/examples/syscalls.rel deleted file mode 100644 index 9d7b88c..0000000 --- a/rel-lang/examples/syscalls.rel +++ /dev/null @@ -1,8 +0,0 @@ -\ vim: filetype=forth - -: getpid - 39 sys0 - drop -; - -getpid putn diff --git a/rel-lang/examples/syscalls.sorel b/rel-lang/examples/syscalls.sorel new file mode 100644 index 0000000..9d7b88c --- /dev/null +++ b/rel-lang/examples/syscalls.sorel @@ -0,0 +1,8 @@ +\ vim: filetype=forth + +: getpid + 39 sys0 + drop +; + +getpid putn diff --git a/rel-lang/fib-example/compile.sh b/rel-lang/fib-example/compile.sh index 2e8be05..6ef003e 100644 --- a/rel-lang/fib-example/compile.sh +++ b/rel-lang/fib-example/compile.sh @@ -1,4 +1,4 @@ -../target/debug/relc fib.rel +../target/debug/sorelc fib.sorel riscv64-unknown-linux-gnu-as -o fib.o fib.asm riscv64-unknown-linux-gnu-cc -O1 -no-pie -o test.out fib.o putn.c -nostartfiles ./test.out diff --git a/rel-lang/rel-interpret/Cargo.toml b/rel-lang/rel-interpret/Cargo.toml deleted file mode 100644 index c1dc773..0000000 --- a/rel-lang/rel-interpret/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "rel-interpret" -version = "0.1.0" -edition = "2024" - -[dependencies] -anyhow = "1.0.100" -rel-ir = { workspace = true } -syscalls = "0.7.0" diff --git a/rel-lang/rel-interpret/src/lib.rs b/rel-lang/rel-interpret/src/lib.rs deleted file mode 100644 index edd6e48..0000000 --- a/rel-lang/rel-interpret/src/lib.rs +++ /dev/null @@ -1,205 +0,0 @@ -use rel_ir::*; - -use std::collections::HashMap; - -use syscalls::*; -use anyhow::{Result, anyhow}; - -pub struct Interpreter<'a> { - module: &'a IRObject, - data_stack: Vec, - instruction_pointer: usize, - return_stack: Vec, - labels: HashMap, - strings: HashMap, -} - -impl<'a> Interpreter<'a> { - pub fn new(ir_mod: &'a IRObject) -> Result { - let mut index = 0; - let mut labels = HashMap::new(); - for token in ir_mod.text.iter() { - if let IR::Label(name) = token { - labels.insert(name.clone(), index); - } - index += 1; - } - let instruction_pointer = *labels.get("main").ok_or(anyhow!("no main word found!"))?; - - let mut strings = HashMap::new(); - ir_mod.data.iter().for_each(|s| { - match s { - IR::StringDef(label, string) => { - strings.insert(label.clone(), string.clone()); - }, - _ => {} - } - }); - - Ok(Self { - module: ir_mod, - data_stack: vec![], - instruction_pointer, - return_stack: vec![], - labels, - strings - }) - } - - fn process_syscall_result(&mut self, result: Result) { - match result { - Ok(result) => { - self.data_stack.push(result as u64); - self.data_stack.push(0); - } - Err(err) => { - self.data_stack.push(0); - self.data_stack.push(err.into_raw() as u64); - } - } - } - - fn ds_pop(&mut self) -> Result { - self.data_stack.pop().ok_or(anyhow!("popping from empty data stack")) - } - - pub fn run(&mut self) -> Result<()> { - let mut looking_for_endif = false; - loop { - if looking_for_endif { - match &self.module.text[self.instruction_pointer] { - IR::EndIf => { - looking_for_endif = false; - }, - IR::Else => { - looking_for_endif = false; - } - _ => {} - } - } else { - match &self.module.text[self.instruction_pointer] { - IR::Label(_) => {}, - IR::Call(name) => { - self.return_stack.push(self.instruction_pointer); - self.instruction_pointer = *self.labels.get(name).ok_or(anyhow!("calling undefined word `{}`", name))?; - }, - IR::Ret => { - if self.return_stack.len() == 0 { - return Ok(()); - } - self.instruction_pointer = self.return_stack.pop().ok_or(anyhow!("returning from top level"))?; - }, - IR::StackPush(num) => { - self.data_stack.push(*num); - }, - IR::AddU64 => { - let a = self.ds_pop()?; - let b = self.ds_pop()?; - self.data_stack.push(a + b); - }, - IR::SubtractU64 => { - let b = self.ds_pop()?; - let a = self.ds_pop()?; - self.data_stack.push(a - b); - }, - IR::Dup => { - self.data_stack.push(*self.data_stack.last().ok_or(anyhow!("empty data stack"))?); - }, - IR::Swap => { - let a = self.ds_pop()?; - let b = self.ds_pop()?; - self.data_stack.push(a); - self.data_stack.push(b); - }, - IR::Drop => { - self.data_stack.pop(); - }, - IR::Equals => { - let a = self.ds_pop()?; - let b = self.ds_pop()?; - self.data_stack.push(if a == b { - 0 - } else { - -1 as i64 as u64 - }); - }, - IR::GreaterThan => { - let b = self.ds_pop()?; - let a = self.ds_pop()?; - self.data_stack.push(if a > b { - 0 - } else { - -1 as i64 as u64 - }); - - }, - IR::BitwiseOr => { - let b = self.ds_pop()?; - let a = self.ds_pop()?; - self.data_stack.push(a | b); - } - IR::If => { - if self.ds_pop()? != 0 { - looking_for_endif = true; - } - }, - IR::EndIf => {}, - IR::Sys0 => { - let call_num = Sysno::from(self.ds_pop()? as i32); - self.process_syscall_result(unsafe { syscall!(call_num) }); - }, - IR::Sys1 => { - let call_num = Sysno::from(self.ds_pop()? as i32); - let a1 = self.ds_pop()?; - self.process_syscall_result(unsafe { syscall!(call_num, a1) }); - }, - IR::Sys2 => { - let call_num = Sysno::from(self.ds_pop()? as i32); - let a2 = self.ds_pop()?; - let a1 = self.ds_pop()?; - self.process_syscall_result(unsafe { syscall!(call_num, a1, a2) }); - }, - IR::Sys3 => { - let call_num = Sysno::from(self.ds_pop()? as i32); - let a3 = self.ds_pop()?; - let a2 = self.ds_pop()?; - let a1 = self.ds_pop()?; - self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3) }); - }, - IR::Sys4 => { - let call_num = Sysno::from(self.ds_pop()? as i32); - let a4 = self.ds_pop()?; - let a3 = self.ds_pop()?; - let a2 = self.ds_pop()?; - let a1 = self.ds_pop()?; - self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4) }); - }, - IR::Sys5 => { - let call_num = Sysno::from(self.ds_pop()? as i32); - let a5 = self.ds_pop()?; - let a4 = self.ds_pop()?; - let a3 = self.ds_pop()?; - let a2 = self.ds_pop()?; - let a1 = self.ds_pop()?; - self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5) }); - }, - IR::Sys6 => { - println!("stack: {:?}", self.data_stack); - let call_num = Sysno::from(self.ds_pop()? as i32); - let a6 = self.ds_pop()?; - let a5 = self.ds_pop()?; - let a4 = self.ds_pop()?; - let a3 = self.ds_pop()?; - let a2 = self.ds_pop()?; - let a1 = self.ds_pop()?; - self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5, a6) }); - }, - _ => { - println!("Instruction not implemented."); - }, - } - } - self.instruction_pointer += 1; - } - } -} diff --git a/rel-lang/rel-ir/Cargo.toml b/rel-lang/rel-ir/Cargo.toml deleted file mode 100644 index 831de69..0000000 --- a/rel-lang/rel-ir/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "rel-ir" -version = "0.1.0" -edition = "2024" - -[dependencies] -serde = "1.0.228" -serde_derive = "1.0.228" -serde_yaml = "0.9.34" diff --git a/rel-lang/rel-ir/src/lib.rs b/rel-lang/rel-ir/src/lib.rs deleted file mode 100644 index 919a08f..0000000 --- a/rel-lang/rel-ir/src/lib.rs +++ /dev/null @@ -1,69 +0,0 @@ -use serde_yaml::{from_str, to_string, Error}; -use serde_derive::{Serialize, Deserialize}; - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub enum IR { - Label(String), - Call(String), - Ret, - StackPush(u64), - StackPushString(String), // refers to string label, not the string itself - StringDef(String, String), // first is string label, second is string value - - // These next ones should always be inlined, so they're in IR. - Load, // @ ( addr -- x ) -- Fetch memory contents at addr - Load8, - Load16, - Load32, - Store, // ! ( x addr -- ) -- Store x at addr - - // These ones might not be inlined, but should be built-in, so a compiler might - // turn this into `Call(String)` before translating to assembly/machine-code, but - // an IR interpreter may just execute them. - AddU64, - SubtractU64, - MultiplyU64, - DivideU64, - ModU64, - Equals, - GreaterThan, - LessThan, - BitwiseOr, - Dup, - Swap, - Drop, - Over, - Rot, - StackPointer, - If, - Else, - EndIf, - Loop, - EndLoop, - - // System calls - Sys0, - Sys1, - Sys2, - Sys3, - Sys4, - Sys5, - Sys6, -} - -// This is like an .o file. -#[derive(Serialize, Deserialize, Debug)] -pub struct IRObject { - pub text: Vec, - pub data: Vec, -} - -impl IRObject { - pub fn to_s(&self) -> Result { - to_string(self) - } - - pub fn from_s(source: &str) -> Result { - from_str(source) - } -} diff --git a/rel-lang/relc/Cargo.toml b/rel-lang/relc/Cargo.toml deleted file mode 100644 index e5419b6..0000000 --- a/rel-lang/relc/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "relc" -version = "0.1.0" -edition = "2024" - -[dependencies] -rel-ir = { workspace = true } -rel-interpret = { workspace = true } -anyhow = "1.0.100" diff --git a/rel-lang/relc/src/ir.rs b/rel-lang/relc/src/ir.rs deleted file mode 100644 index 55006f6..0000000 --- a/rel-lang/relc/src/ir.rs +++ /dev/null @@ -1,289 +0,0 @@ -use crate::parser::Module; -use crate::tokenizer::{Token, tokenize}; -use rel_ir::*; - -use std::collections::{HashSet, HashMap}; -use std::path::PathBuf; -use std::rc::Rc; -use std::include_str; - -use anyhow::{Result, bail}; - -macro_rules! push_num { - ($num:ident) => { IR::StackPush(*$num as u64) }; - ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) }; -} - -#[derive(Debug, Default)] -struct IRModule { - data: Vec, - text: Vec, - imports: Vec>, - exports: Vec, - // TODO these next two should form an enum, not two options - source_file: Option, - std_specifier: Option, - number: usize, -} - -impl IRModule { - fn get_label_for_call(&self, name: &String) -> String { - let mut found: Option = None; - for imported in &self.imports { - if imported.exports.contains(name) { - found = Some(imported.number); - // Don't break here, since the last one should win. - } - } - if let Some(found) = found { - format!("_m{}_{}", found, name) - } else { - // TODO check if it's even a word locally. If not, bail. - format!("_m{}_{}", self.number, name) - } - } - - fn get_label(&self, name: &String) -> String { - format!("_m{}_{}", self.number, name) - } -} - -#[derive(Default)] -struct ImportTree { - data: Vec, - text: Vec, - all_modules: HashMap>, - all_exports: HashSet, - entrypoint: Rc, - module_count: usize, - collapse_seen: HashSet, -} - -fn std_import(specifier: &str) -> Result<&str> { - match specifier { - "std:mem" => Ok(include_str!("../../stdlib/mem.rel")), - "std:out" => Ok(include_str!("../../stdlib/out.rel")), - _ => bail!("{} is not a standard library module", specifier), - } -} - -impl ImportTree { - fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result> { - Ok(if specifier.starts_with("std:") { - let contents = std_import(specifier)?; - let parsed = &Module::parse(tokenize(&contents)?, is_entrypoint)?; - let module = self.generate_internal(None, Some(specifier.to_string()), parsed); - let module = Rc::new(module); - self.all_modules.insert(specifier.to_string(), module.clone()); - module - } else { - let mut path = PathBuf::from(specifier); - if path.is_relative() { - let mut new_path = importer_dir.clone(); - new_path.push(path); - path = new_path.canonicalize()?; - } - let path_key = path.to_string_lossy().to_string(); - if self.all_modules.contains_key(&path_key) { - let module = self.all_modules.get(&path_key).unwrap().clone(); - return Ok(module); - } - - let contents = &std::fs::read_to_string(&path)?; - let module = self.generate_internal(Some(path), None, &Module::parse(tokenize(&contents)?, is_entrypoint)?); - let module = Rc::new(module); - self.all_modules.insert(path_key, module.clone()); - if is_entrypoint { - self.entrypoint = module.clone(); - } - module - }) - - } - - fn generate_internal(&mut self, path: Option, std_specifier: Option, module: &Module) -> IRModule { - // Eventually these will end up being sections in assembly - let mut text = vec![]; - let mut data = vec![]; - - let mut imports = vec![]; - if let Some(ref path) = path { - module.imports.iter().for_each(|imported| { - if let Some(parent_path) = path.parent() { - match self.import(&parent_path.to_path_buf(), imported, false) { - Ok(module) => { - imports.push(module); - }, - Err(msg) => { - eprintln!("{}", msg); - } - } - } else { - } - }); - } else { - // We're in a stdlib module, which can only import other stdlib - // modules. - module.imports.iter().for_each(|imported| { - match self.import(&PathBuf::new(), imported, false) { - Ok(module) => { - imports.push(module); - }, - Err(msg) => { - eprintln!("{}", msg); - } - } - }); - } - - let exports: Vec<_> = module.exports.iter().map(|s| { - self.all_exports.insert(s.to_string()); - s.to_string() - }).collect(); - - - text.push(module.words.iter().map(|def| { - let mut body = def.instructions.iter().map(|inst| { - let mapped_ir = match inst { - Token::Word(word) => { - match *word { - "@" => IR::Load, - "@:8" => IR::Load8, - "@:16" => IR::Load16, - "@:32" => IR::Load32, - "!" => IR::Store, - "dup" => IR::Dup, - "swap" => IR::Swap, - "drop" => IR::Drop, - "over" => IR::Over, - "rot" => IR::Rot, - "sp" => IR::StackPointer, - "if" => IR::If, - "else" => IR::Else, - "endif" => IR::EndIf, - "loop" => IR::Loop, - "endloop" => IR::EndLoop, - "=" => IR::Equals, - ">" => IR::GreaterThan, - "<" => IR::LessThan, - "+" => IR::AddU64, - "-" => IR::SubtractU64, - "*" => IR::MultiplyU64, - "/" => IR::DivideU64, - "%" => IR::ModU64, - "|" => IR::BitwiseOr, - "sys0" => IR::Sys0, - "sys1" => IR::Sys1, - "sys2" => IR::Sys2, - "sys3" => IR::Sys3, - "sys4" => IR::Sys4, - "sys5" => IR::Sys5, - "sys6" => IR::Sys6, - // TODO num type specfic math like `+:i32`, etc. - _ => IR::Call(String::from(*word)) - } - }, - Token::String(text) => { - let string_label = format!("string_{}", data.len()); - data.push(IR::StringDef(string_label.clone(), String::from(*text))); - IR::StackPushString(string_label) - }, - Token::NumU8(num) => push_num!(num), - Token::NumI8(num) => push_num!(num, u8), - Token::NumU16(num) => push_num!(num), - Token::NumI16(num) => push_num!(num, u16), - Token::NumU32(num) => push_num!(num), - Token::NumI32(num) => push_num!(num, u32), - Token::NumU64(num) => push_num!(num), - Token::NumI64(num) => push_num!(num), - Token::NumF32(num) => push_num!(num), - Token::NumF64(num) => push_num!(num), - }; - mapped_ir - }).collect::>(); - - let mut result = vec![IR::Label(def.name.to_string())]; - result.append(&mut body); - result.push(IR::Ret); - result - }).flatten().collect::>()); - - let number = self.module_count; - self.module_count += 1; - - IRModule { - text: text.into_iter().flatten().collect::>(), - data, - imports, - exports, - source_file: path, - std_specifier, - number, - } - } - - fn collapse(&mut self, module: Rc) -> Result<()> { - let seen_key = if let Some(source_file) = &module.source_file { - source_file.to_string_lossy().to_string() - } else { - module.std_specifier.clone().unwrap() - }; - if self.collapse_seen.contains(&seen_key) { - return Ok(()) - } - - for imported in module.imports.clone() { - self.collapse(imported)?; - } - - let is_entrypoint = module.source_file == self.entrypoint.source_file; - - let module_number = module.number; - - for string in &module.data { - if let IR::StringDef(name, val) = string { - let new_name = format!("{}_{}", name, module_number); - self.data.push(IR::StringDef(new_name, val.clone())); - } else { - bail!("non-string data"); - } - } - - for instruction in &module.text { - let new_instruction = match instruction { - IR::StackPushString(name) => { - let new_name = format!("{}_{}", name, module_number); - IR::StackPushString(new_name) - }, - IR::Label(name) => { - if is_entrypoint && name == "main" { - instruction.clone() - } else { - IR::Label(module.get_label(name)) - } - }, - IR::Call(name) => { - IR::Call(module.get_label_for_call(name)) - }, - _ => instruction.clone() - }; - self.text.push(new_instruction); - } - - self.collapse_seen.insert(seen_key); - - Ok(()) - } -} - -pub fn compile(path: &str) -> Result { - let dir = std::env::current_dir()?; - let mut tree: ImportTree = Default::default(); - let module = tree.import(&dir, path, true)?; - tree.collapse(module)?; - // TODO remove unused words - Ok(IRObject { - data: tree.data, - text: tree.text, - }) -} diff --git a/rel-lang/relc/src/main.rs b/rel-lang/relc/src/main.rs deleted file mode 100644 index 3171c43..0000000 --- a/rel-lang/relc/src/main.rs +++ /dev/null @@ -1,25 +0,0 @@ -mod tokenizer; -mod parser; -mod ir; -mod riscv_asm_codegen; - -use rel_interpret::Interpreter; - -use anyhow::Result; - -use std::fs::File; -use std::io::Write; -use std::path::PathBuf; - -fn main() -> Result<()> { - let filename = std::env::args().nth(1).expect("must provide a file to compile"); - let module = ir::compile(&filename)?; - // let mut interp = Interpreter::new(&module)?; - // interp.run()?; - let mut generator = riscv_asm_codegen::CodeGen::new(&module, 4096); - let mut asm_path = PathBuf::from(filename); - asm_path.set_extension("asm"); - let mut output = File::create(asm_path)?; - write!(output, "{}\n", generator.assembly()?)?; - Ok(()) -} diff --git a/rel-lang/relc/src/parser.rs b/rel-lang/relc/src/parser.rs deleted file mode 100644 index 3214139..0000000 --- a/rel-lang/relc/src/parser.rs +++ /dev/null @@ -1,132 +0,0 @@ -use crate::tokenizer::Token; -use anyhow::{Result, bail}; - -#[derive(Debug)] -pub struct WordDefinition<'a> { - pub name: &'a str, - pub instructions: Vec>, -} - -#[derive(Debug)] -pub struct Module<'a> { - pub words: Vec>, - pub imports: Vec<&'a str>, - pub exports: Vec<&'a str>, -} - -impl<'a> Module<'a> { - pub fn parse(input: Vec>, is_entrypoint: bool) -> Result { - let mut result = vec![]; - let mut main = vec![]; - let mut exports = vec![]; - let mut imports = vec![]; - let mut current_word: Option = None; - let mut about_to_start_word_def = false; - let mut last_was_import = false; - let mut last_was_export = false; - - for token in input { - if about_to_start_word_def { - if let Token::Word(name) = token { - current_word = Some(WordDefinition { - name, - instructions: vec![], - }); - about_to_start_word_def = false; - continue; - } else { - bail!("{:?} is not a valid word name!", token); - } - } else if let Token::Word(word) = token { - if word == ":" { - if current_word.is_some() { - bail!("can't define words inside word definitions!"); - } - about_to_start_word_def = true; - continue; - } - if word == ";" { - let word = current_word.take(); - if let Some(word) = word { - result.push(word); - continue; - } else { - bail!("`;` must be at the end of a word definition"); - } - } - } - if let Some(ref mut current_word) = current_word { - current_word.instructions.push(token); - } else { - match token { - Token::Word(word) => { - if word == "import" { - last_was_import = true; - } else if word == "export" { - last_was_export = true; - } else { - if last_was_export { - exports.push(word); - last_was_export = false; - } else { - main.push(token.clone()); - } - } - }, - Token::String(string) => { - if last_was_import { - imports.push(string); - last_was_import = false; - } else { - main.push(token.clone()); - } - }, - _ => { - main.push(token.clone()); - } - }; - } - } - - if about_to_start_word_def || current_word.is_some() { - bail!("unfinished word definition!"); - } - - if is_entrypoint { - result.push(WordDefinition { - name: "main", - instructions: main, - }); - } - - Ok(Module { words: result, imports, exports }) - } - - #[cfg(test)] - pub fn debug_print(&self) { - for word in &self.words { - println!("{}", word.name); - for instruction in &word.instructions { - println!(" {:?}", instruction); - } - } - } -} - - - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn try_some_parsing() { - let result = Module::parse(crate::tokenizer::tokenize(" -: hello world 16 \"planet\" ; -: soup chicken 4.5 hello ; - -hello soup -").unwrap(), true).unwrap(); - result.debug_print(); - } -} diff --git a/rel-lang/relc/src/riscv_asm_codegen.rs b/rel-lang/relc/src/riscv_asm_codegen.rs deleted file mode 100644 index 5ade98e..0000000 --- a/rel-lang/relc/src/riscv_asm_codegen.rs +++ /dev/null @@ -1,347 +0,0 @@ -use rel_ir::*; - -use anyhow::*; - -use std::collections::{HashMap, HashSet}; -use std::fmt::Display; - -pub struct CodeGen<'a> { - module: &'a IRObject, - data_stack_size: usize, - lines: Vec, -} - - -// Some inspiration -// ================ -// -// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list -// * Except using sp as a more C ABI style stack pointer, and s2 for the data stack -// - -// Implementation Choices -// ====================== -// -// Data Stack pointer: s2 -// No return stack pointer (using C ABI, so sp, sorta) -// Use t0, t1, t2 for temporary values in words -// Data stack grows down - - -macro_rules! asm_macro { - ($name:ident, $src:expr) => { - fn $name(&mut self) { - self.line($src); - } - }; - ($name:ident, $src:expr, $arg0:ty) => { - fn $name(&mut self, val0: $arg0) { - self.line(format!($src, val0)); - } - }; - ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => { - fn $name(&mut self, val0: $arg0, val1: $arg1) { - self.line(format!($src, val0, val1)); - } - }; -} - -fn mangle(input: &str) -> String { - input - .replace("<", "_LT_") - .replace(">", "_GT_") - .replace("-", "___") -} - -impl<'a> CodeGen<'a> { - pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self { - Self { - module: ir_mod, - data_stack_size, - lines: vec![], - } - } - - fn line(&mut self, line: S) { - self.lines.push(format!(" {}", line)); - - } - - fn label(&mut self, line: S) { - self.lines.push(line.to_string()); - } - - asm_macro!(copy_top_stack_value_to, "ld {}, 0(s2)", &str); - asm_macro!(copy_offset_stack_value_to, "ld {}, {}*8(s2)", &str, isize); - asm_macro!(copy_to_top_of_stack, "sd {}, 0(s2)", &str); - asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize); - - fn pop_to(&mut self, reg: &str) { - self.copy_top_stack_value_to(reg); - self.move_stack_ptr_by_cells(1); - } - - fn pop_some_to(&mut self, regs: &str) { - let mut regs = regs.trim().split(" ").collect::>(); - regs.reverse(); - let count = regs.len(); - let mut index = 0; - for reg in regs { - self.copy_offset_stack_value_to(reg, index); - index += 1; - } - self.move_stack_ptr_by_cells(count as isize); - } - - fn push_from(&mut self, reg: &str) { - self.move_stack_ptr_by_cells(-1); - self.copy_to_top_of_stack(reg); - } - - fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) { - self.pop_some_to(regs); - self.line(call); - self.push_from(reg); - } - - pub fn assembly(&mut self) -> Result{ - let mut string_table = HashMap::new(); - - // Static strings - self.label(".section .rodata\n"); - for ir in &self.module.data { - match ir { - IR::StringDef(string_label, some_string) => { - string_table.insert(some_string.clone(), string_label); - self.label(format!("{}:", string_label)); - self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz? - self.label(""); - }, - _ => bail!("Currently only string definitions are supported in the data section.") - } - } - - // Data stack - self.label(".data\n"); - self.label("data_stack:"); - self.line(format!(".space {}", self.data_stack_size)); - self.label(".globl data_stack_end\ndata_stack_end:\n"); - - // Code - self.label(".text\n"); - self.label(".align 3\n"); - - let mut if_block_count = 0; - let mut if_stack = vec![]; - let mut loop_count = 0; - let mut loop_stack = vec![]; - let mut seen_else = HashSet::new(); - let mut last_label = ""; - - for ir in &self.module.text { - match ir { - IR::Label(name) => { - last_label = name; - if name == "main" { - self.label(".globl _start"); // TODO is globl necessary? - self.label("_start:"); - self.line("la s2, data_stack_end # set initial data stack pointer"); - } else { - let mangled = mangle(name); - self.label(format!(".globl {}", mangled)); - self.label(format!("{}:", mangled)); - } - self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack - self.line("sd ra, 8(sp) # store return address on stack"); // store return address on stack - }, - IR::Call(name) => { - let mangled = mangle(name); - self.label(format!("# call {}", mangled)); - self.line(format!("call {}", mangled)); - }, - IR::Ret => { - if last_label == "main" { - self.label("# exit 0 syscall"); - self.line("li a7, 93"); - self.line("mv a0, x0"); - self.line("ecall"); - } else { - self.line("ld ra, 8(sp)"); // load return address from stack - self.line("addi sp, sp, 16"); // restore stack pointer - self.line("ret"); - } - }, - IR::Load8 => { - self.label("# load 8"); - self.copy_top_stack_value_to("t0"); - self.line("lbu t0, 0(t0)"); // deref pointer in t0 to t0 - self.copy_to_top_of_stack("t0"); - }, - IR::Load16 => { - self.label("# load 16"); - self.copy_top_stack_value_to("t0"); - self.line("lhu t0, 0(t0)"); // deref pointer in t0 to t0 - self.copy_to_top_of_stack("t0"); - }, - IR::Load32 => { - self.label("# load 32"); - self.copy_top_stack_value_to("t0"); - self.line("lwu t0, 0(t0)"); // deref pointer in t0 to t0 - self.copy_to_top_of_stack("t0"); - }, - IR::Load => { - self.label("# load 64"); - self.copy_top_stack_value_to("t0"); - self.line("ld t0, 0(t0)"); // deref pointer in t0 to t0 - self.copy_to_top_of_stack("t0"); - }, - IR::Store => { // ( x addr -- ) - self.pop_some_to("t0 t1"); - self.line("sd t0, 0(t1)"); // store x at addr - }, - IR::StackPush(num) => { - self.label(format!("# stackpush {}", num)); - self.line(format!("li t0, {}", num)); - self.push_from("t0"); - }, - IR::StackPushString(name) => { - self.label(format!("# stackpushstring {}", name)); - self.line(format!("la t0, {}", name)); - self.push_from("t0"); - }, - IR::AddU64 => { - self.label("# add"); - self.pop_call_push("t0 t1", "add t0, t0, t1", "t0"); - }, - IR::SubtractU64 => { - self.label("# sub"); - self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); - }, - IR::MultiplyU64 => { - self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0"); - }, - IR::DivideU64 => { - self.pop_call_push("t0 t1", "div t0, t0, t1", "t0"); - }, - IR::ModU64 => { - self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0"); - }, - IR::Dup => { - self.label("# dup"); - self.copy_top_stack_value_to("t0"); - self.push_from("t0"); - }, - IR::Swap => { - self.label("# swap"); - self.pop_some_to("t1 t0"); - self.push_from("t0"); - self.push_from("t1"); - }, - IR::Rot => { - self.label("# rot"); - self.pop_some_to("t0 t1 t2"); - self.push_from("t1"); - self.push_from("t2"); - self.push_from("t0"); - }, - IR::StackPointer => { - self.label("# sp"); - self.line("addi t0, s2, 0"); - self.push_from("t0"); - }, - IR::Drop => { - self.label("# drop"); - self.move_stack_ptr_by_cells(1); - }, - IR::Equals => { - // Yes, this is the same as subtract, since we're treating 0 as true, and - // others as false. - self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); - }, - IR::GreaterThan => { - self.label("# >"); - self.pop_some_to("t0 t1"); - self.line("sgt t0, t0, t1"); - self.line("seqz t0, t0"); // remember, 0 is true, others are false - self.push_from("t0"); - }, - IR::LessThan => { - self.label("# <"); - self.pop_some_to("t0 t1"); - self.line("slt t0, t0, t1"); - self.line("seqz t0, t0"); // remember, 0 is true, others are false - self.push_from("t0"); - }, - IR::BitwiseOr => { - self.pop_call_push("t0 t1", "or t0, t0, t1", "t0"); - }, - IR::Sys0 => { - self.pop_call_push("a7", "ecall", "a0"); - }, - IR::Sys1 => { - self.pop_call_push("a0 a7", "ecall", "a0"); - }, - IR::Sys2 => { - self.pop_call_push("a0 a1 a7", "ecall", "a0"); - }, - IR::Sys3 => { - self.pop_call_push("a0 a1 a2 a7", "ecall", "a0"); - }, - IR::Sys4 => { - self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0"); - }, - IR::Sys5 => { - self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0"); - }, - IR::Sys6 => { - self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0"); - }, - // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html - IR::If => { - self.label("# if"); - self.pop_to("t0"); - self.line(format!("bnez t0, _else_{}", if_block_count)); - if_stack.push(if_block_count); - if_block_count += 1; - }, - IR::Else => { - self.label("# else"); - let if_counter = if_stack.last().unwrap().clone(); - self.line(format!("j _endif_{}", if_counter)); - self.label(format!("_else_{}:", if_counter)); - seen_else.insert(if_counter); - }, - IR::EndIf => { - self.label("# endif"); - let stack = &mut if_stack; - let if_counter = stack.last().unwrap().clone(); - if !seen_else.contains(&if_counter) { - self.label(format!("_else_{}:", if_counter)); - } else { - self.label(format!("_endif_{}:", if_counter)); - seen_else.remove(&if_counter); - } - stack.pop(); - }, - IR::Loop => { // keep looping until is true/0 - self.label(format!("_loop_{}:", loop_count)); - self.pop_to("t0"); - self.line(format!("beqz t0, _endloop_{}", loop_count)); - loop_stack.push(loop_count); - loop_count += 1; - }, - IR::EndLoop => { - let stack = &mut loop_stack; - let loop_counter = stack.last().unwrap().clone(); - self.line(format!("j _loop_{}", loop_counter)); - self.label(format!("_endloop_{}:", loop_counter)); - stack.pop(); - }, - _ => bail!("not implemented yet: {:?}", ir), - } - } - - Ok(self.lines.join("\n")) - } -} - diff --git a/rel-lang/relc/src/tokenizer.rs b/rel-lang/relc/src/tokenizer.rs deleted file mode 100644 index 093c012..0000000 --- a/rel-lang/relc/src/tokenizer.rs +++ /dev/null @@ -1,178 +0,0 @@ -use anyhow::{Result, anyhow}; - -#[derive(Debug, Clone)] -pub enum Token<'a> { - Word(&'a str), - String(&'a str), - NumU8(u8), - NumI8(i8), - NumU16(u16), - NumI16(i16), - NumU32(u32), - NumI32(i32), - NumU64(u64), - NumI64(i64), - NumF32(f32), - NumF64(f64), -} - -impl<'a> Token<'a>{ - fn parse_word_or_num(input: &'a str) -> Result> { - if input == "-" { - return Ok(Token::Word(input)) - } - - // we're assuming any token starting with `-` with length greater than one - // is a negative number - if input.starts_with('-') || input.chars().nth(0).map(|x| x.is_numeric()).unwrap_or(false) { - if input.contains(':') { - let mut splat = input.split(':'); - let num = splat.next().ok_or(anyhow!("no number found"))?; - let typ = splat.next().ok_or(anyhow!("no number type found"))?; - match typ { - "u8" => Ok(Token::NumU8(num.parse()?)), - "i8" => Ok(Token::NumI8(num.parse()?)), - "u16" => Ok(Token::NumU16(num.parse()?)), - "i16" => Ok(Token::NumI16(num.parse()?)), - "u32" => Ok(Token::NumU32(num.parse()?)), - "i32" => Ok(Token::NumI32(num.parse()?)), - "u64" => Ok(Token::NumU64(num.parse()?)), - "i64" => Ok(Token::NumI64(num.parse()?)), - "f32" => Ok(Token::NumF32(num.parse()?)), - "f64" => Ok(Token::NumF64(num.parse()?)), - _ => panic!("unknown number type") - } - } else { - if input.contains('.') { - Ok(Token::NumF64(input.parse()?)) - } else if input.starts_with('-') { - Ok(Token::NumI64(input.parse()?)) - } else { - Ok(Token::NumU64(input.parse()?)) - } - } - } else { - Ok(Token::Word(input)) - } - } -} - -// TODO really want an iterator, not a vector -pub fn tokenize<'a>(input: &'a str) -> Result>> { - let mut result = vec![]; - let mut string_start: Option = None; - let mut word_or_num_start: Option = None; - let mut last_is_backslash = false; - let mut last_is_whitespace = true; - let mut in_doc_comment = false; - let mut in_line_comment = false; - let mut index = 0; - let mut first_char = true; - - - for char in input.chars() { - if first_char { - first_char = false; - } else { - index += 1; - } - - if in_doc_comment { - if char == ')' { - in_doc_comment = false; - last_is_whitespace = true; // not really true, but means don't need space after - } - continue; - } - - if in_line_comment { - if char == '\n' { - in_line_comment = false; - last_is_whitespace = true; // not really true, but means don't need space after - } - continue; - } - - if char == '"' { - if let Some(start) = string_start { - if !last_is_backslash { - result.push(Token::String(&input[start..index])); - string_start = None; - } - } else { - string_start = Some(index + 1) - } - last_is_backslash = false; - last_is_whitespace = false; - continue; - } - - - if string_start.is_some() { - last_is_backslash = char == '\\'; - continue; - } - - if char.is_whitespace() { - if last_is_backslash { - in_line_comment = true; - } else if !last_is_whitespace && let Some(start) = word_or_num_start { - let token = &input[start..index]; - if token == "(" { - in_doc_comment = true; - } else { - result.push(Token::parse_word_or_num(&input[start..index])?); - } - word_or_num_start = None; - } - last_is_whitespace = true; - last_is_backslash = false; - continue; - } - - last_is_backslash = char == '\\'; - - if index == input.len() - 1 { - if !last_is_whitespace && let Some(start) = word_or_num_start { - result.push(Token::parse_word_or_num(&input[start..])?); - } - continue; - } - - if last_is_whitespace { // start of word or num (we already handled strings) - word_or_num_start = Some(index); - last_is_whitespace = false; - } - } - Ok(result) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn try_some_tokenizing() { - let result = tokenize(" - - \\ soup - 2 3.4 - -88 bacon \"hello\" 43:f32 2345:u32 -57:i8 soup -"); - println!("result: {:?}", result); - } - - #[test] - fn comments() { - let result = tokenize(" - ( - foo - bar - ) - : baz ( x y -- z ) - chicken - soup - ; - "); - println!("result: {:?}", result); - } -} diff --git a/rel-lang/sorel-interpret/Cargo.toml b/rel-lang/sorel-interpret/Cargo.toml new file mode 100644 index 0000000..6afe8a9 --- /dev/null +++ b/rel-lang/sorel-interpret/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "sorel-interpret" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0.100" +sorel-ir = { workspace = true } +syscalls = "0.7.0" diff --git a/rel-lang/sorel-interpret/src/lib.rs b/rel-lang/sorel-interpret/src/lib.rs new file mode 100644 index 0000000..b637594 --- /dev/null +++ b/rel-lang/sorel-interpret/src/lib.rs @@ -0,0 +1,205 @@ +use sorel_ir::*; + +use std::collections::HashMap; + +use syscalls::*; +use anyhow::{Result, anyhow}; + +pub struct Interpreter<'a> { + module: &'a IRObject, + data_stack: Vec, + instruction_pointer: usize, + return_stack: Vec, + labels: HashMap, + strings: HashMap, +} + +impl<'a> Interpreter<'a> { + pub fn new(ir_mod: &'a IRObject) -> Result { + let mut index = 0; + let mut labels = HashMap::new(); + for token in ir_mod.text.iter() { + if let IR::Label(name) = token { + labels.insert(name.clone(), index); + } + index += 1; + } + let instruction_pointer = *labels.get("main").ok_or(anyhow!("no main word found!"))?; + + let mut strings = HashMap::new(); + ir_mod.data.iter().for_each(|s| { + match s { + IR::StringDef(label, string) => { + strings.insert(label.clone(), string.clone()); + }, + _ => {} + } + }); + + Ok(Self { + module: ir_mod, + data_stack: vec![], + instruction_pointer, + return_stack: vec![], + labels, + strings + }) + } + + fn process_syscall_result(&mut self, result: Result) { + match result { + Ok(result) => { + self.data_stack.push(result as u64); + self.data_stack.push(0); + } + Err(err) => { + self.data_stack.push(0); + self.data_stack.push(err.into_raw() as u64); + } + } + } + + fn ds_pop(&mut self) -> Result { + self.data_stack.pop().ok_or(anyhow!("popping from empty data stack")) + } + + pub fn run(&mut self) -> Result<()> { + let mut looking_for_endif = false; + loop { + if looking_for_endif { + match &self.module.text[self.instruction_pointer] { + IR::EndIf => { + looking_for_endif = false; + }, + IR::Else => { + looking_for_endif = false; + } + _ => {} + } + } else { + match &self.module.text[self.instruction_pointer] { + IR::Label(_) => {}, + IR::Call(name) => { + self.return_stack.push(self.instruction_pointer); + self.instruction_pointer = *self.labels.get(name).ok_or(anyhow!("calling undefined word `{}`", name))?; + }, + IR::Ret => { + if self.return_stack.len() == 0 { + return Ok(()); + } + self.instruction_pointer = self.return_stack.pop().ok_or(anyhow!("returning from top level"))?; + }, + IR::StackPush(num) => { + self.data_stack.push(*num); + }, + IR::AddU64 => { + let a = self.ds_pop()?; + let b = self.ds_pop()?; + self.data_stack.push(a + b); + }, + IR::SubtractU64 => { + let b = self.ds_pop()?; + let a = self.ds_pop()?; + self.data_stack.push(a - b); + }, + IR::Dup => { + self.data_stack.push(*self.data_stack.last().ok_or(anyhow!("empty data stack"))?); + }, + IR::Swap => { + let a = self.ds_pop()?; + let b = self.ds_pop()?; + self.data_stack.push(a); + self.data_stack.push(b); + }, + IR::Drop => { + self.data_stack.pop(); + }, + IR::Equals => { + let a = self.ds_pop()?; + let b = self.ds_pop()?; + self.data_stack.push(if a == b { + 0 + } else { + -1 as i64 as u64 + }); + }, + IR::GreaterThan => { + let b = self.ds_pop()?; + let a = self.ds_pop()?; + self.data_stack.push(if a > b { + 0 + } else { + -1 as i64 as u64 + }); + + }, + IR::BitwiseOr => { + let b = self.ds_pop()?; + let a = self.ds_pop()?; + self.data_stack.push(a | b); + } + IR::If => { + if self.ds_pop()? != 0 { + looking_for_endif = true; + } + }, + IR::EndIf => {}, + IR::Sys0 => { + let call_num = Sysno::from(self.ds_pop()? as i32); + self.process_syscall_result(unsafe { syscall!(call_num) }); + }, + IR::Sys1 => { + let call_num = Sysno::from(self.ds_pop()? as i32); + let a1 = self.ds_pop()?; + self.process_syscall_result(unsafe { syscall!(call_num, a1) }); + }, + IR::Sys2 => { + let call_num = Sysno::from(self.ds_pop()? as i32); + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + self.process_syscall_result(unsafe { syscall!(call_num, a1, a2) }); + }, + IR::Sys3 => { + let call_num = Sysno::from(self.ds_pop()? as i32); + let a3 = self.ds_pop()?; + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3) }); + }, + IR::Sys4 => { + let call_num = Sysno::from(self.ds_pop()? as i32); + let a4 = self.ds_pop()?; + let a3 = self.ds_pop()?; + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4) }); + }, + IR::Sys5 => { + let call_num = Sysno::from(self.ds_pop()? as i32); + let a5 = self.ds_pop()?; + let a4 = self.ds_pop()?; + let a3 = self.ds_pop()?; + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5) }); + }, + IR::Sys6 => { + println!("stack: {:?}", self.data_stack); + let call_num = Sysno::from(self.ds_pop()? as i32); + let a6 = self.ds_pop()?; + let a5 = self.ds_pop()?; + let a4 = self.ds_pop()?; + let a3 = self.ds_pop()?; + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5, a6) }); + }, + _ => { + println!("Instruction not implemented."); + }, + } + } + self.instruction_pointer += 1; + } + } +} diff --git a/rel-lang/sorel-ir/Cargo.toml b/rel-lang/sorel-ir/Cargo.toml new file mode 100644 index 0000000..5618a61 --- /dev/null +++ b/rel-lang/sorel-ir/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "sorel-ir" +version = "0.1.0" +edition = "2024" + +[dependencies] +serde = "1.0.228" +serde_derive = "1.0.228" +serde_yaml = "0.9.34" diff --git a/rel-lang/sorel-ir/src/lib.rs b/rel-lang/sorel-ir/src/lib.rs new file mode 100644 index 0000000..919a08f --- /dev/null +++ b/rel-lang/sorel-ir/src/lib.rs @@ -0,0 +1,69 @@ +use serde_yaml::{from_str, to_string, Error}; +use serde_derive::{Serialize, Deserialize}; + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub enum IR { + Label(String), + Call(String), + Ret, + StackPush(u64), + StackPushString(String), // refers to string label, not the string itself + StringDef(String, String), // first is string label, second is string value + + // These next ones should always be inlined, so they're in IR. + Load, // @ ( addr -- x ) -- Fetch memory contents at addr + Load8, + Load16, + Load32, + Store, // ! ( x addr -- ) -- Store x at addr + + // These ones might not be inlined, but should be built-in, so a compiler might + // turn this into `Call(String)` before translating to assembly/machine-code, but + // an IR interpreter may just execute them. + AddU64, + SubtractU64, + MultiplyU64, + DivideU64, + ModU64, + Equals, + GreaterThan, + LessThan, + BitwiseOr, + Dup, + Swap, + Drop, + Over, + Rot, + StackPointer, + If, + Else, + EndIf, + Loop, + EndLoop, + + // System calls + Sys0, + Sys1, + Sys2, + Sys3, + Sys4, + Sys5, + Sys6, +} + +// This is like an .o file. +#[derive(Serialize, Deserialize, Debug)] +pub struct IRObject { + pub text: Vec, + pub data: Vec, +} + +impl IRObject { + pub fn to_s(&self) -> Result { + to_string(self) + } + + pub fn from_s(source: &str) -> Result { + from_str(source) + } +} diff --git a/rel-lang/sorelc/Cargo.toml b/rel-lang/sorelc/Cargo.toml new file mode 100644 index 0000000..dc3b9bc --- /dev/null +++ b/rel-lang/sorelc/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "sorelc" +version = "0.1.0" +edition = "2024" + +[dependencies] +sorel-ir = { workspace = true } +sorel-interpret = { workspace = true } +anyhow = "1.0.100" diff --git a/rel-lang/sorelc/src/ir.rs b/rel-lang/sorelc/src/ir.rs new file mode 100644 index 0000000..7d31d1a --- /dev/null +++ b/rel-lang/sorelc/src/ir.rs @@ -0,0 +1,289 @@ +use crate::parser::Module; +use crate::tokenizer::{Token, tokenize}; +use sorel_ir::*; + +use std::collections::{HashSet, HashMap}; +use std::path::PathBuf; +use std::rc::Rc; +use std::include_str; + +use anyhow::{Result, bail}; + +macro_rules! push_num { + ($num:ident) => { IR::StackPush(*$num as u64) }; + ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) }; +} + +#[derive(Debug, Default)] +struct IRModule { + data: Vec, + text: Vec, + imports: Vec>, + exports: Vec, + // TODO these next two should form an enum, not two options + source_file: Option, + std_specifier: Option, + number: usize, +} + +impl IRModule { + fn get_label_for_call(&self, name: &String) -> String { + let mut found: Option = None; + for imported in &self.imports { + if imported.exports.contains(name) { + found = Some(imported.number); + // Don't break here, since the last one should win. + } + } + if let Some(found) = found { + format!("_m{}_{}", found, name) + } else { + // TODO check if it's even a word locally. If not, bail. + format!("_m{}_{}", self.number, name) + } + } + + fn get_label(&self, name: &String) -> String { + format!("_m{}_{}", self.number, name) + } +} + +#[derive(Default)] +struct ImportTree { + data: Vec, + text: Vec, + all_modules: HashMap>, + all_exports: HashSet, + entrypoint: Rc, + module_count: usize, + collapse_seen: HashSet, +} + +fn std_import(specifier: &str) -> Result<&str> { + match specifier { + "std:mem" => Ok(include_str!("../../stdlib/mem.sorel")), + "std:out" => Ok(include_str!("../../stdlib/out.sorel")), + _ => bail!("{} is not a standard library module", specifier), + } +} + +impl ImportTree { + fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result> { + Ok(if specifier.starts_with("std:") { + let contents = std_import(specifier)?; + let parsed = &Module::parse(tokenize(&contents)?, is_entrypoint)?; + let module = self.generate_internal(None, Some(specifier.to_string()), parsed); + let module = Rc::new(module); + self.all_modules.insert(specifier.to_string(), module.clone()); + module + } else { + let mut path = PathBuf::from(specifier); + if path.is_relative() { + let mut new_path = importer_dir.clone(); + new_path.push(path); + path = new_path.canonicalize()?; + } + let path_key = path.to_string_lossy().to_string(); + if self.all_modules.contains_key(&path_key) { + let module = self.all_modules.get(&path_key).unwrap().clone(); + return Ok(module); + } + + let contents = &std::fs::read_to_string(&path)?; + let module = self.generate_internal(Some(path), None, &Module::parse(tokenize(&contents)?, is_entrypoint)?); + let module = Rc::new(module); + self.all_modules.insert(path_key, module.clone()); + if is_entrypoint { + self.entrypoint = module.clone(); + } + module + }) + + } + + fn generate_internal(&mut self, path: Option, std_specifier: Option, module: &Module) -> IRModule { + // Eventually these will end up being sections in assembly + let mut text = vec![]; + let mut data = vec![]; + + let mut imports = vec![]; + if let Some(ref path) = path { + module.imports.iter().for_each(|imported| { + if let Some(parent_path) = path.parent() { + match self.import(&parent_path.to_path_buf(), imported, false) { + Ok(module) => { + imports.push(module); + }, + Err(msg) => { + eprintln!("{}", msg); + } + } + } else { + } + }); + } else { + // We're in a stdlib module, which can only import other stdlib + // modules. + module.imports.iter().for_each(|imported| { + match self.import(&PathBuf::new(), imported, false) { + Ok(module) => { + imports.push(module); + }, + Err(msg) => { + eprintln!("{}", msg); + } + } + }); + } + + let exports: Vec<_> = module.exports.iter().map(|s| { + self.all_exports.insert(s.to_string()); + s.to_string() + }).collect(); + + + text.push(module.words.iter().map(|def| { + let mut body = def.instructions.iter().map(|inst| { + let mapped_ir = match inst { + Token::Word(word) => { + match *word { + "@" => IR::Load, + "@:8" => IR::Load8, + "@:16" => IR::Load16, + "@:32" => IR::Load32, + "!" => IR::Store, + "dup" => IR::Dup, + "swap" => IR::Swap, + "drop" => IR::Drop, + "over" => IR::Over, + "rot" => IR::Rot, + "sp" => IR::StackPointer, + "if" => IR::If, + "else" => IR::Else, + "endif" => IR::EndIf, + "loop" => IR::Loop, + "endloop" => IR::EndLoop, + "=" => IR::Equals, + ">" => IR::GreaterThan, + "<" => IR::LessThan, + "+" => IR::AddU64, + "-" => IR::SubtractU64, + "*" => IR::MultiplyU64, + "/" => IR::DivideU64, + "%" => IR::ModU64, + "|" => IR::BitwiseOr, + "sys0" => IR::Sys0, + "sys1" => IR::Sys1, + "sys2" => IR::Sys2, + "sys3" => IR::Sys3, + "sys4" => IR::Sys4, + "sys5" => IR::Sys5, + "sys6" => IR::Sys6, + // TODO num type specfic math like `+:i32`, etc. + _ => IR::Call(String::from(*word)) + } + }, + Token::String(text) => { + let string_label = format!("string_{}", data.len()); + data.push(IR::StringDef(string_label.clone(), String::from(*text))); + IR::StackPushString(string_label) + }, + Token::NumU8(num) => push_num!(num), + Token::NumI8(num) => push_num!(num, u8), + Token::NumU16(num) => push_num!(num), + Token::NumI16(num) => push_num!(num, u16), + Token::NumU32(num) => push_num!(num), + Token::NumI32(num) => push_num!(num, u32), + Token::NumU64(num) => push_num!(num), + Token::NumI64(num) => push_num!(num), + Token::NumF32(num) => push_num!(num), + Token::NumF64(num) => push_num!(num), + }; + mapped_ir + }).collect::>(); + + let mut result = vec![IR::Label(def.name.to_string())]; + result.append(&mut body); + result.push(IR::Ret); + result + }).flatten().collect::>()); + + let number = self.module_count; + self.module_count += 1; + + IRModule { + text: text.into_iter().flatten().collect::>(), + data, + imports, + exports, + source_file: path, + std_specifier, + number, + } + } + + fn collapse(&mut self, module: Rc) -> Result<()> { + let seen_key = if let Some(source_file) = &module.source_file { + source_file.to_string_lossy().to_string() + } else { + module.std_specifier.clone().unwrap() + }; + if self.collapse_seen.contains(&seen_key) { + return Ok(()) + } + + for imported in module.imports.clone() { + self.collapse(imported)?; + } + + let is_entrypoint = module.source_file == self.entrypoint.source_file; + + let module_number = module.number; + + for string in &module.data { + if let IR::StringDef(name, val) = string { + let new_name = format!("{}_{}", name, module_number); + self.data.push(IR::StringDef(new_name, val.clone())); + } else { + bail!("non-string data"); + } + } + + for instruction in &module.text { + let new_instruction = match instruction { + IR::StackPushString(name) => { + let new_name = format!("{}_{}", name, module_number); + IR::StackPushString(new_name) + }, + IR::Label(name) => { + if is_entrypoint && name == "main" { + instruction.clone() + } else { + IR::Label(module.get_label(name)) + } + }, + IR::Call(name) => { + IR::Call(module.get_label_for_call(name)) + }, + _ => instruction.clone() + }; + self.text.push(new_instruction); + } + + self.collapse_seen.insert(seen_key); + + Ok(()) + } +} + +pub fn compile(path: &str) -> Result { + let dir = std::env::current_dir()?; + let mut tree: ImportTree = Default::default(); + let module = tree.import(&dir, path, true)?; + tree.collapse(module)?; + // TODO remove unused words + Ok(IRObject { + data: tree.data, + text: tree.text, + }) +} diff --git a/rel-lang/sorelc/src/main.rs b/rel-lang/sorelc/src/main.rs new file mode 100644 index 0000000..89f1d7a --- /dev/null +++ b/rel-lang/sorelc/src/main.rs @@ -0,0 +1,25 @@ +mod tokenizer; +mod parser; +mod ir; +mod riscv_asm_codegen; + +use sorel_interpret::Interpreter; + +use anyhow::Result; + +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; + +fn main() -> Result<()> { + let filename = std::env::args().nth(1).expect("must provide a file to compile"); + let module = ir::compile(&filename)?; + // let mut interp = Interpreter::new(&module)?; + // interp.run()?; + let mut generator = riscv_asm_codegen::CodeGen::new(&module, 4096); + let mut asm_path = PathBuf::from(filename); + asm_path.set_extension("asm"); + let mut output = File::create(asm_path)?; + write!(output, "{}\n", generator.assembly()?)?; + Ok(()) +} diff --git a/rel-lang/sorelc/src/parser.rs b/rel-lang/sorelc/src/parser.rs new file mode 100644 index 0000000..3214139 --- /dev/null +++ b/rel-lang/sorelc/src/parser.rs @@ -0,0 +1,132 @@ +use crate::tokenizer::Token; +use anyhow::{Result, bail}; + +#[derive(Debug)] +pub struct WordDefinition<'a> { + pub name: &'a str, + pub instructions: Vec>, +} + +#[derive(Debug)] +pub struct Module<'a> { + pub words: Vec>, + pub imports: Vec<&'a str>, + pub exports: Vec<&'a str>, +} + +impl<'a> Module<'a> { + pub fn parse(input: Vec>, is_entrypoint: bool) -> Result { + let mut result = vec![]; + let mut main = vec![]; + let mut exports = vec![]; + let mut imports = vec![]; + let mut current_word: Option = None; + let mut about_to_start_word_def = false; + let mut last_was_import = false; + let mut last_was_export = false; + + for token in input { + if about_to_start_word_def { + if let Token::Word(name) = token { + current_word = Some(WordDefinition { + name, + instructions: vec![], + }); + about_to_start_word_def = false; + continue; + } else { + bail!("{:?} is not a valid word name!", token); + } + } else if let Token::Word(word) = token { + if word == ":" { + if current_word.is_some() { + bail!("can't define words inside word definitions!"); + } + about_to_start_word_def = true; + continue; + } + if word == ";" { + let word = current_word.take(); + if let Some(word) = word { + result.push(word); + continue; + } else { + bail!("`;` must be at the end of a word definition"); + } + } + } + if let Some(ref mut current_word) = current_word { + current_word.instructions.push(token); + } else { + match token { + Token::Word(word) => { + if word == "import" { + last_was_import = true; + } else if word == "export" { + last_was_export = true; + } else { + if last_was_export { + exports.push(word); + last_was_export = false; + } else { + main.push(token.clone()); + } + } + }, + Token::String(string) => { + if last_was_import { + imports.push(string); + last_was_import = false; + } else { + main.push(token.clone()); + } + }, + _ => { + main.push(token.clone()); + } + }; + } + } + + if about_to_start_word_def || current_word.is_some() { + bail!("unfinished word definition!"); + } + + if is_entrypoint { + result.push(WordDefinition { + name: "main", + instructions: main, + }); + } + + Ok(Module { words: result, imports, exports }) + } + + #[cfg(test)] + pub fn debug_print(&self) { + for word in &self.words { + println!("{}", word.name); + for instruction in &word.instructions { + println!(" {:?}", instruction); + } + } + } +} + + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn try_some_parsing() { + let result = Module::parse(crate::tokenizer::tokenize(" +: hello world 16 \"planet\" ; +: soup chicken 4.5 hello ; + +hello soup +").unwrap(), true).unwrap(); + result.debug_print(); + } +} diff --git a/rel-lang/sorelc/src/riscv_asm_codegen.rs b/rel-lang/sorelc/src/riscv_asm_codegen.rs new file mode 100644 index 0000000..efc1521 --- /dev/null +++ b/rel-lang/sorelc/src/riscv_asm_codegen.rs @@ -0,0 +1,347 @@ +use sorel_ir::*; + +use anyhow::*; + +use std::collections::{HashMap, HashSet}; +use std::fmt::Display; + +pub struct CodeGen<'a> { + module: &'a IRObject, + data_stack_size: usize, + lines: Vec, +} + + +// Some inspiration +// ================ +// +// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list +// * Except using sp as a more C ABI style stack pointer, and s2 for the data stack +// + +// Implementation Choices +// ====================== +// +// Data Stack pointer: s2 +// No return stack pointer (using C ABI, so sp, sorta) +// Use t0, t1, t2 for temporary values in words +// Data stack grows down + + +macro_rules! asm_macro { + ($name:ident, $src:expr) => { + fn $name(&mut self) { + self.line($src); + } + }; + ($name:ident, $src:expr, $arg0:ty) => { + fn $name(&mut self, val0: $arg0) { + self.line(format!($src, val0)); + } + }; + ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => { + fn $name(&mut self, val0: $arg0, val1: $arg1) { + self.line(format!($src, val0, val1)); + } + }; +} + +fn mangle(input: &str) -> String { + input + .replace("<", "_LT_") + .replace(">", "_GT_") + .replace("-", "___") +} + +impl<'a> CodeGen<'a> { + pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self { + Self { + module: ir_mod, + data_stack_size, + lines: vec![], + } + } + + fn line(&mut self, line: S) { + self.lines.push(format!(" {}", line)); + + } + + fn label(&mut self, line: S) { + self.lines.push(line.to_string()); + } + + asm_macro!(copy_top_stack_value_to, "ld {}, 0(s2)", &str); + asm_macro!(copy_offset_stack_value_to, "ld {}, {}*8(s2)", &str, isize); + asm_macro!(copy_to_top_of_stack, "sd {}, 0(s2)", &str); + asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize); + + fn pop_to(&mut self, reg: &str) { + self.copy_top_stack_value_to(reg); + self.move_stack_ptr_by_cells(1); + } + + fn pop_some_to(&mut self, regs: &str) { + let mut regs = regs.trim().split(" ").collect::>(); + regs.reverse(); + let count = regs.len(); + let mut index = 0; + for reg in regs { + self.copy_offset_stack_value_to(reg, index); + index += 1; + } + self.move_stack_ptr_by_cells(count as isize); + } + + fn push_from(&mut self, reg: &str) { + self.move_stack_ptr_by_cells(-1); + self.copy_to_top_of_stack(reg); + } + + fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) { + self.pop_some_to(regs); + self.line(call); + self.push_from(reg); + } + + pub fn assembly(&mut self) -> Result{ + let mut string_table = HashMap::new(); + + // Static strings + self.label(".section .rodata\n"); + for ir in &self.module.data { + match ir { + IR::StringDef(string_label, some_string) => { + string_table.insert(some_string.clone(), string_label); + self.label(format!("{}:", string_label)); + self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz? + self.label(""); + }, + _ => bail!("Currently only string definitions are supported in the data section.") + } + } + + // Data stack + self.label(".data\n"); + self.label("data_stack:"); + self.line(format!(".space {}", self.data_stack_size)); + self.label(".globl data_stack_end\ndata_stack_end:\n"); + + // Code + self.label(".text\n"); + self.label(".align 3\n"); + + let mut if_block_count = 0; + let mut if_stack = vec![]; + let mut loop_count = 0; + let mut loop_stack = vec![]; + let mut seen_else = HashSet::new(); + let mut last_label = ""; + + for ir in &self.module.text { + match ir { + IR::Label(name) => { + last_label = name; + if name == "main" { + self.label(".globl _start"); // TODO is globl necessary? + self.label("_start:"); + self.line("la s2, data_stack_end # set initial data stack pointer"); + } else { + let mangled = mangle(name); + self.label(format!(".globl {}", mangled)); + self.label(format!("{}:", mangled)); + } + self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack + self.line("sd ra, 8(sp) # store return address on stack"); // store return address on stack + }, + IR::Call(name) => { + let mangled = mangle(name); + self.label(format!("# call {}", mangled)); + self.line(format!("call {}", mangled)); + }, + IR::Ret => { + if last_label == "main" { + self.label("# exit 0 syscall"); + self.line("li a7, 93"); + self.line("mv a0, x0"); + self.line("ecall"); + } else { + self.line("ld ra, 8(sp)"); // load return address from stack + self.line("addi sp, sp, 16"); // restore stack pointer + self.line("ret"); + } + }, + IR::Load8 => { + self.label("# load 8"); + self.copy_top_stack_value_to("t0"); + self.line("lbu t0, 0(t0)"); // deref pointer in t0 to t0 + self.copy_to_top_of_stack("t0"); + }, + IR::Load16 => { + self.label("# load 16"); + self.copy_top_stack_value_to("t0"); + self.line("lhu t0, 0(t0)"); // deref pointer in t0 to t0 + self.copy_to_top_of_stack("t0"); + }, + IR::Load32 => { + self.label("# load 32"); + self.copy_top_stack_value_to("t0"); + self.line("lwu t0, 0(t0)"); // deref pointer in t0 to t0 + self.copy_to_top_of_stack("t0"); + }, + IR::Load => { + self.label("# load 64"); + self.copy_top_stack_value_to("t0"); + self.line("ld t0, 0(t0)"); // deref pointer in t0 to t0 + self.copy_to_top_of_stack("t0"); + }, + IR::Store => { // ( x addr -- ) + self.pop_some_to("t0 t1"); + self.line("sd t0, 0(t1)"); // store x at addr + }, + IR::StackPush(num) => { + self.label(format!("# stackpush {}", num)); + self.line(format!("li t0, {}", num)); + self.push_from("t0"); + }, + IR::StackPushString(name) => { + self.label(format!("# stackpushstring {}", name)); + self.line(format!("la t0, {}", name)); + self.push_from("t0"); + }, + IR::AddU64 => { + self.label("# add"); + self.pop_call_push("t0 t1", "add t0, t0, t1", "t0"); + }, + IR::SubtractU64 => { + self.label("# sub"); + self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); + }, + IR::MultiplyU64 => { + self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0"); + }, + IR::DivideU64 => { + self.pop_call_push("t0 t1", "div t0, t0, t1", "t0"); + }, + IR::ModU64 => { + self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0"); + }, + IR::Dup => { + self.label("# dup"); + self.copy_top_stack_value_to("t0"); + self.push_from("t0"); + }, + IR::Swap => { + self.label("# swap"); + self.pop_some_to("t1 t0"); + self.push_from("t0"); + self.push_from("t1"); + }, + IR::Rot => { + self.label("# rot"); + self.pop_some_to("t0 t1 t2"); + self.push_from("t1"); + self.push_from("t2"); + self.push_from("t0"); + }, + IR::StackPointer => { + self.label("# sp"); + self.line("addi t0, s2, 0"); + self.push_from("t0"); + }, + IR::Drop => { + self.label("# drop"); + self.move_stack_ptr_by_cells(1); + }, + IR::Equals => { + // Yes, this is the same as subtract, since we're treating 0 as true, and + // others as false. + self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0"); + }, + IR::GreaterThan => { + self.label("# >"); + self.pop_some_to("t0 t1"); + self.line("sgt t0, t0, t1"); + self.line("seqz t0, t0"); // remember, 0 is true, others are false + self.push_from("t0"); + }, + IR::LessThan => { + self.label("# <"); + self.pop_some_to("t0 t1"); + self.line("slt t0, t0, t1"); + self.line("seqz t0, t0"); // remember, 0 is true, others are false + self.push_from("t0"); + }, + IR::BitwiseOr => { + self.pop_call_push("t0 t1", "or t0, t0, t1", "t0"); + }, + IR::Sys0 => { + self.pop_call_push("a7", "ecall", "a0"); + }, + IR::Sys1 => { + self.pop_call_push("a0 a7", "ecall", "a0"); + }, + IR::Sys2 => { + self.pop_call_push("a0 a1 a7", "ecall", "a0"); + }, + IR::Sys3 => { + self.pop_call_push("a0 a1 a2 a7", "ecall", "a0"); + }, + IR::Sys4 => { + self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0"); + }, + IR::Sys5 => { + self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0"); + }, + IR::Sys6 => { + self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0"); + }, + // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html + IR::If => { + self.label("# if"); + self.pop_to("t0"); + self.line(format!("bnez t0, _else_{}", if_block_count)); + if_stack.push(if_block_count); + if_block_count += 1; + }, + IR::Else => { + self.label("# else"); + let if_counter = if_stack.last().unwrap().clone(); + self.line(format!("j _endif_{}", if_counter)); + self.label(format!("_else_{}:", if_counter)); + seen_else.insert(if_counter); + }, + IR::EndIf => { + self.label("# endif"); + let stack = &mut if_stack; + let if_counter = stack.last().unwrap().clone(); + if !seen_else.contains(&if_counter) { + self.label(format!("_else_{}:", if_counter)); + } else { + self.label(format!("_endif_{}:", if_counter)); + seen_else.remove(&if_counter); + } + stack.pop(); + }, + IR::Loop => { // keep looping until is true/0 + self.label(format!("_loop_{}:", loop_count)); + self.pop_to("t0"); + self.line(format!("beqz t0, _endloop_{}", loop_count)); + loop_stack.push(loop_count); + loop_count += 1; + }, + IR::EndLoop => { + let stack = &mut loop_stack; + let loop_counter = stack.last().unwrap().clone(); + self.line(format!("j _loop_{}", loop_counter)); + self.label(format!("_endloop_{}:", loop_counter)); + stack.pop(); + }, + _ => bail!("not implemented yet: {:?}", ir), + } + } + + Ok(self.lines.join("\n")) + } +} + diff --git a/rel-lang/sorelc/src/tokenizer.rs b/rel-lang/sorelc/src/tokenizer.rs new file mode 100644 index 0000000..093c012 --- /dev/null +++ b/rel-lang/sorelc/src/tokenizer.rs @@ -0,0 +1,178 @@ +use anyhow::{Result, anyhow}; + +#[derive(Debug, Clone)] +pub enum Token<'a> { + Word(&'a str), + String(&'a str), + NumU8(u8), + NumI8(i8), + NumU16(u16), + NumI16(i16), + NumU32(u32), + NumI32(i32), + NumU64(u64), + NumI64(i64), + NumF32(f32), + NumF64(f64), +} + +impl<'a> Token<'a>{ + fn parse_word_or_num(input: &'a str) -> Result> { + if input == "-" { + return Ok(Token::Word(input)) + } + + // we're assuming any token starting with `-` with length greater than one + // is a negative number + if input.starts_with('-') || input.chars().nth(0).map(|x| x.is_numeric()).unwrap_or(false) { + if input.contains(':') { + let mut splat = input.split(':'); + let num = splat.next().ok_or(anyhow!("no number found"))?; + let typ = splat.next().ok_or(anyhow!("no number type found"))?; + match typ { + "u8" => Ok(Token::NumU8(num.parse()?)), + "i8" => Ok(Token::NumI8(num.parse()?)), + "u16" => Ok(Token::NumU16(num.parse()?)), + "i16" => Ok(Token::NumI16(num.parse()?)), + "u32" => Ok(Token::NumU32(num.parse()?)), + "i32" => Ok(Token::NumI32(num.parse()?)), + "u64" => Ok(Token::NumU64(num.parse()?)), + "i64" => Ok(Token::NumI64(num.parse()?)), + "f32" => Ok(Token::NumF32(num.parse()?)), + "f64" => Ok(Token::NumF64(num.parse()?)), + _ => panic!("unknown number type") + } + } else { + if input.contains('.') { + Ok(Token::NumF64(input.parse()?)) + } else if input.starts_with('-') { + Ok(Token::NumI64(input.parse()?)) + } else { + Ok(Token::NumU64(input.parse()?)) + } + } + } else { + Ok(Token::Word(input)) + } + } +} + +// TODO really want an iterator, not a vector +pub fn tokenize<'a>(input: &'a str) -> Result>> { + let mut result = vec![]; + let mut string_start: Option = None; + let mut word_or_num_start: Option = None; + let mut last_is_backslash = false; + let mut last_is_whitespace = true; + let mut in_doc_comment = false; + let mut in_line_comment = false; + let mut index = 0; + let mut first_char = true; + + + for char in input.chars() { + if first_char { + first_char = false; + } else { + index += 1; + } + + if in_doc_comment { + if char == ')' { + in_doc_comment = false; + last_is_whitespace = true; // not really true, but means don't need space after + } + continue; + } + + if in_line_comment { + if char == '\n' { + in_line_comment = false; + last_is_whitespace = true; // not really true, but means don't need space after + } + continue; + } + + if char == '"' { + if let Some(start) = string_start { + if !last_is_backslash { + result.push(Token::String(&input[start..index])); + string_start = None; + } + } else { + string_start = Some(index + 1) + } + last_is_backslash = false; + last_is_whitespace = false; + continue; + } + + + if string_start.is_some() { + last_is_backslash = char == '\\'; + continue; + } + + if char.is_whitespace() { + if last_is_backslash { + in_line_comment = true; + } else if !last_is_whitespace && let Some(start) = word_or_num_start { + let token = &input[start..index]; + if token == "(" { + in_doc_comment = true; + } else { + result.push(Token::parse_word_or_num(&input[start..index])?); + } + word_or_num_start = None; + } + last_is_whitespace = true; + last_is_backslash = false; + continue; + } + + last_is_backslash = char == '\\'; + + if index == input.len() - 1 { + if !last_is_whitespace && let Some(start) = word_or_num_start { + result.push(Token::parse_word_or_num(&input[start..])?); + } + continue; + } + + if last_is_whitespace { // start of word or num (we already handled strings) + word_or_num_start = Some(index); + last_is_whitespace = false; + } + } + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn try_some_tokenizing() { + let result = tokenize(" + + \\ soup + 2 3.4 - -88 bacon \"hello\" 43:f32 2345:u32 -57:i8 soup +"); + println!("result: {:?}", result); + } + + #[test] + fn comments() { + let result = tokenize(" + ( + foo + bar + ) + : baz ( x y -- z ) + chicken + soup + ; + "); + println!("result: {:?}", result); + } +} diff --git a/rel-lang/stdlib/mem.rel b/rel-lang/stdlib/mem.rel deleted file mode 100644 index da5bfca..0000000 --- a/rel-lang/stdlib/mem.rel +++ /dev/null @@ -1,44 +0,0 @@ -\ vim: filetype=forth - -: mmap 222 sys6 ; -: munmap 215 sys2 ; - -: PROT_READ 1 ; -: PROT_WRITE 2 ; -: MAP_PRIVATE 2 ; -: MAP_ANONYMOUS 32 ; - -: ALLOC_PROT PROT_READ PROT_WRITE | ; -: ALLOC_MAP MAP_PRIVATE MAP_ANONYMOUS | ; - -\ This is a raw way to allocate memory. Callers to this will need to know -\ how much memory was allocated, to call dealloc later. -: alloc_raw ( byte-count -- addr ) - 0 swap ALLOC_PROT ALLOC_MAP -1:i32 0 mmap -; - -: dealloc ( addr byte-count -- ) - munmap -; - -export alloc -: alloc ( byte-count -- addr ) - dup \ we'll need the length again later - 8 + \ add 64 bits to the front where we'll store the length - alloc_raw \ ( byte-count addr ) - dup \ ( byte-count addr addr) - rot \ ( addr addr byte-count ) - rot \ ( addr byte-count addr ) - ! \ write the length to the addr, now ( addr ) - 8 + \ here's the address we'll return, 8 bytes forward to account for the length -; - -export free -: free ( addr -- ) - 8 - \ the real memory start is 8 bytes before - dup \ ( addr addr ) - @ \ ( addr byte-length ) - 8 + \ add 8 to the byte length to refer to the whole region from alloc. - dealloc -; - diff --git a/rel-lang/stdlib/mem.sorel b/rel-lang/stdlib/mem.sorel new file mode 100644 index 0000000..da5bfca --- /dev/null +++ b/rel-lang/stdlib/mem.sorel @@ -0,0 +1,44 @@ +\ vim: filetype=forth + +: mmap 222 sys6 ; +: munmap 215 sys2 ; + +: PROT_READ 1 ; +: PROT_WRITE 2 ; +: MAP_PRIVATE 2 ; +: MAP_ANONYMOUS 32 ; + +: ALLOC_PROT PROT_READ PROT_WRITE | ; +: ALLOC_MAP MAP_PRIVATE MAP_ANONYMOUS | ; + +\ This is a raw way to allocate memory. Callers to this will need to know +\ how much memory was allocated, to call dealloc later. +: alloc_raw ( byte-count -- addr ) + 0 swap ALLOC_PROT ALLOC_MAP -1:i32 0 mmap +; + +: dealloc ( addr byte-count -- ) + munmap +; + +export alloc +: alloc ( byte-count -- addr ) + dup \ we'll need the length again later + 8 + \ add 64 bits to the front where we'll store the length + alloc_raw \ ( byte-count addr ) + dup \ ( byte-count addr addr) + rot \ ( addr addr byte-count ) + rot \ ( addr byte-count addr ) + ! \ write the length to the addr, now ( addr ) + 8 + \ here's the address we'll return, 8 bytes forward to account for the length +; + +export free +: free ( addr -- ) + 8 - \ the real memory start is 8 bytes before + dup \ ( addr addr ) + @ \ ( addr byte-length ) + 8 + \ add 8 to the byte length to refer to the whole region from alloc. + dealloc +; + diff --git a/rel-lang/stdlib/out.rel b/rel-lang/stdlib/out.rel deleted file mode 100644 index 000b320..0000000 --- a/rel-lang/stdlib/out.rel +++ /dev/null @@ -1,69 +0,0 @@ -\ vim: filetype=forth - -: write ( fd ptr len -- bytes-written-or-err ) - 64 - sys3 -; - -: strlen ( addr -- len ) - dup dup \ ( addr addr addr ) - @:8 \ ( addr addr byte ) - loop \ ( addr addr ) - 1 + \ ( addr addr+1 ) - dup \ ( addr addr+1 addr+1) - @:8 \ ( addr addr+1 byte ) - endloop \ ( addr addr+len ) - swap \ ( addr+len addr ) - - \ ( len ) -; - -export puts - -: puts ( addr -- addr) - dup dup \ ( addr addr addr ) - strlen \ ( addr addr len ) - 1 \ ( addr addr len 1 ) - rot \ ( addr len 1 addr ) - rot \ ( addr 1 addr len ) - write \ ( addr bytes-written-or-err ) - drop \ ( addr ) -; - -: ZERO_CHAR 48 ; -: NEWLINE_CHAR 10 ; - -export putn - -: putn ( num -- num ) - dup dup \ ( num num num ) - 10 rot rot \ ( num 10 num num ) // Mutltidigit stop point - 10 < \ ( num 10 num is<10 ) - loop \ ( num 10 num' ) - dup \ ( num 10 num' num' ) - 10 % \ ( num 10 num' digit ) - swap \ ( num 10 digit num' ) - 10 / \ ( num 10 digit num'/10 ) - dup 10 < \ ( num 10 digit num'/10 is<10 ) - endloop \ ( num 10 digitn ... digit1 ) - dup 9 > \ ( num 10 digitn ... digit1 digit1>9 ) - loop \ ( num 10 digitn ... digit1 ) - ZERO_CHAR + \ \ ( num 10 digitn ... digit1 ) - sp \ ( num 10 digitn ... digit1 ptr ) - 1 \ ( num 10 digitn ... digit1 ptr 1 ) - swap \ ( num 10 digitn ... digit1 1 ptr ) - 1 \ ( num 10 digitn ... digit1 1 ptr 1 ) - write \ ( num 10 digitn ... digit1 result ) - drop drop \ ( num 10 digitn ... digit2 ) - dup 9 > \ ( num 10 digitn ... digit2 digit2>9 ) - endloop \ ( num 10 ) - drop \ ( num ) - NEWLINE_CHAR \ ( num 10 ) - sp \ ( num 10 ptr ) - 1 \ ( num 10 ptr 1 ) - swap \ (num 10 1 ptr ) - 1 \ ( num 10 1 ptr 1 ) - write \ ( num 10 result ) - drop \ ( num 10 ) - drop \ ( num ) -; - diff --git a/rel-lang/stdlib/out.sorel b/rel-lang/stdlib/out.sorel new file mode 100644 index 0000000..000b320 --- /dev/null +++ b/rel-lang/stdlib/out.sorel @@ -0,0 +1,69 @@ +\ vim: filetype=forth + +: write ( fd ptr len -- bytes-written-or-err ) + 64 + sys3 +; + +: strlen ( addr -- len ) + dup dup \ ( addr addr addr ) + @:8 \ ( addr addr byte ) + loop \ ( addr addr ) + 1 + \ ( addr addr+1 ) + dup \ ( addr addr+1 addr+1) + @:8 \ ( addr addr+1 byte ) + endloop \ ( addr addr+len ) + swap \ ( addr+len addr ) + - \ ( len ) +; + +export puts + +: puts ( addr -- addr) + dup dup \ ( addr addr addr ) + strlen \ ( addr addr len ) + 1 \ ( addr addr len 1 ) + rot \ ( addr len 1 addr ) + rot \ ( addr 1 addr len ) + write \ ( addr bytes-written-or-err ) + drop \ ( addr ) +; + +: ZERO_CHAR 48 ; +: NEWLINE_CHAR 10 ; + +export putn + +: putn ( num -- num ) + dup dup \ ( num num num ) + 10 rot rot \ ( num 10 num num ) // Mutltidigit stop point + 10 < \ ( num 10 num is<10 ) + loop \ ( num 10 num' ) + dup \ ( num 10 num' num' ) + 10 % \ ( num 10 num' digit ) + swap \ ( num 10 digit num' ) + 10 / \ ( num 10 digit num'/10 ) + dup 10 < \ ( num 10 digit num'/10 is<10 ) + endloop \ ( num 10 digitn ... digit1 ) + dup 9 > \ ( num 10 digitn ... digit1 digit1>9 ) + loop \ ( num 10 digitn ... digit1 ) + ZERO_CHAR + \ \ ( num 10 digitn ... digit1 ) + sp \ ( num 10 digitn ... digit1 ptr ) + 1 \ ( num 10 digitn ... digit1 ptr 1 ) + swap \ ( num 10 digitn ... digit1 1 ptr ) + 1 \ ( num 10 digitn ... digit1 1 ptr 1 ) + write \ ( num 10 digitn ... digit1 result ) + drop drop \ ( num 10 digitn ... digit2 ) + dup 9 > \ ( num 10 digitn ... digit2 digit2>9 ) + endloop \ ( num 10 ) + drop \ ( num ) + NEWLINE_CHAR \ ( num 10 ) + sp \ ( num 10 ptr ) + 1 \ ( num 10 ptr 1 ) + swap \ (num 10 1 ptr ) + 1 \ ( num 10 1 ptr 1 ) + write \ ( num 10 result ) + drop \ ( num 10 ) + drop \ ( num ) +; + diff --git a/rel-lang/tests/test.sh b/rel-lang/tests/test.sh index 328e1e6..2838100 100644 --- a/rel-lang/tests/test.sh +++ b/rel-lang/tests/test.sh @@ -3,7 +3,7 @@ CMD_PREFIX=$([ "$UNAME" = "riscv64" ] && echo "" || echo "riscv64-unknown-linux- AS="${CMD_PREFIX}as" LD="${CMD_PREFIX}ld" -../target/debug/relc test1.rel +../target/debug/sorelc test1.sorel $AS -o test1.o test1.asm $LD -o test1.out test1.o ./test1.out diff --git a/rel-lang/tests/test1.rel b/rel-lang/tests/test1.rel deleted file mode 100644 index 0adf96a..0000000 --- a/rel-lang/tests/test1.rel +++ /dev/null @@ -1,14 +0,0 @@ -\ vim: filetype=forth - -import "std:out" - -"Hello, World! \n" puts -drop - -42 putn -drop - -import "std:mem" - -64 alloc -free diff --git a/rel-lang/tests/test1.sorel b/rel-lang/tests/test1.sorel new file mode 100644 index 0000000..0adf96a --- /dev/null +++ b/rel-lang/tests/test1.sorel @@ -0,0 +1,14 @@ +\ vim: filetype=forth + +import "std:out" + +"Hello, World! \n" puts +drop + +42 putn +drop + +import "std:mem" + +64 alloc +free