From 1d9680853d9ef21e35cd6d2f094ff1d866194551 Mon Sep 17 00:00:00 2001 From: Bryan English Date: Thu, 8 Jan 2026 22:47:48 -0500 Subject: [PATCH] better error handling --- hylo-lang/Cargo.lock | 8 +++ hylo-lang/hylo-interpret/Cargo.toml | 1 + hylo-lang/hylo-interpret/src/lib.rs | 104 +++++++++++++++------------- hylo-lang/hyloc/Cargo.toml | 1 + hylo-lang/hyloc/src/ir.rs | 46 +++++++----- hylo-lang/hyloc/src/main.rs | 11 +-- hylo-lang/hyloc/src/parser.rs | 21 +++--- hylo-lang/hyloc/src/tokenizer.rs | 48 +++++++------ 8 files changed, 139 insertions(+), 101 deletions(-) diff --git a/hylo-lang/Cargo.lock b/hylo-lang/Cargo.lock index f782ca8..400afe8 100644 --- a/hylo-lang/Cargo.lock +++ b/hylo-lang/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + [[package]] name = "equivalent" version = "1.0.2" @@ -18,6 +24,7 @@ checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" name = "hylo-interpret" version = "0.1.0" dependencies = [ + "anyhow", "hylo-ir", "syscalls", ] @@ -35,6 +42,7 @@ dependencies = [ name = "hyloc" version = "0.1.0" dependencies = [ + "anyhow", "hylo-interpret", "hylo-ir", ] diff --git a/hylo-lang/hylo-interpret/Cargo.toml b/hylo-lang/hylo-interpret/Cargo.toml index 196666a..3de19d9 100644 --- a/hylo-lang/hylo-interpret/Cargo.toml +++ b/hylo-lang/hylo-interpret/Cargo.toml @@ -4,5 +4,6 @@ version = "0.1.0" edition = "2024" [dependencies] +anyhow = "1.0.100" hylo-ir = { workspace = true } syscalls = "0.7.0" diff --git a/hylo-lang/hylo-interpret/src/lib.rs b/hylo-lang/hylo-interpret/src/lib.rs index 3ccdd29..0c725d6 100644 --- a/hylo-lang/hylo-interpret/src/lib.rs +++ b/hylo-lang/hylo-interpret/src/lib.rs @@ -3,6 +3,7 @@ use hylo_ir::*; use std::collections::HashMap; use syscalls::*; +use anyhow::{Result, anyhow}; pub struct Interpreter<'a> { module: &'a IRModule, @@ -14,7 +15,7 @@ pub struct Interpreter<'a> { } impl<'a> Interpreter<'a> { - pub fn new(ir_mod: &'a IRModule) -> Self { + pub fn new(ir_mod: &'a IRModule) -> Result { let mut index = 0; let mut labels = HashMap::new(); for token in ir_mod.text.iter() { @@ -23,7 +24,7 @@ impl<'a> Interpreter<'a> { } index += 1; } - let instruction_pointer = *labels.get("main").unwrap(); + let instruction_pointer = *labels.get("main").ok_or(anyhow!("no main word found!"))?; let strings = ir_mod.data.iter().filter_map(|s| { match s { @@ -34,14 +35,14 @@ impl<'a> Interpreter<'a> { } }).collect(); - Self { + Ok(Self { module: ir_mod, data_stack: vec![], instruction_pointer, return_stack: vec![], labels, strings - } + }) } fn process_syscall_result(&mut self, result: Result) { @@ -57,7 +58,11 @@ impl<'a> Interpreter<'a> { } } - pub fn run(&mut self) { + fn ds_pop(&mut self) -> Result { + self.data_stack.pop().ok_or(anyhow!("popping from empty data stack")) + } + + pub fn run(&mut self) -> Result<()> { let mut looking_for_endif = false; loop { if looking_for_endif { @@ -70,42 +75,41 @@ impl<'a> Interpreter<'a> { } _ => {} } - } else { match &self.module.text[self.instruction_pointer] { IR::Label(_) => {}, IR::Call(name) => { self.return_stack.push(self.instruction_pointer); - self.instruction_pointer = *self.labels.get(name).unwrap(); + self.instruction_pointer = *self.labels.get(name).ok_or(anyhow!("calling undefined word `{}`", name))?; }, IR::Ret => { if self.return_stack.len() == 0 { - break; + return Ok(()); } - self.instruction_pointer = self.return_stack.pop().unwrap(); + self.instruction_pointer = self.return_stack.pop().ok_or(anyhow!("returning from top level"))?; }, IR::StackPush(num) => { self.data_stack.push(*num); }, IR::AddU64 => { - let a = self.data_stack.pop().unwrap(); - let b = self.data_stack.pop().unwrap(); + let a = self.ds_pop()?; + let b = self.ds_pop()?; self.data_stack.push(a + b); }, IR::SubtractU64 => { - let b = self.data_stack.pop().unwrap(); - let a = self.data_stack.pop().unwrap(); + let b = self.ds_pop()?; + let a = self.ds_pop()?; self.data_stack.push(a - b); }, IR::PutN => { - println!("{}", self.data_stack.last().unwrap()); + println!("{}", self.data_stack.last().ok_or(anyhow!("empty data stack"))?); }, IR::Dup => { - self.data_stack.push(*self.data_stack.last().unwrap()); + self.data_stack.push(*self.data_stack.last().ok_or(anyhow!("empty data stack"))?); }, IR::Swap => { - let a = self.data_stack.pop().unwrap(); - let b = self.data_stack.pop().unwrap(); + let a = self.ds_pop()?; + let b = self.ds_pop()?; self.data_stack.push(a); self.data_stack.push(b); }, @@ -113,8 +117,8 @@ impl<'a> Interpreter<'a> { self.data_stack.pop(); }, IR::Equals => { - let a = self.data_stack.pop().unwrap(); - let b = self.data_stack.pop().unwrap(); + let a = self.ds_pop()?; + let b = self.ds_pop()?; self.data_stack.push(if a == b { 0 } else { @@ -122,8 +126,8 @@ impl<'a> Interpreter<'a> { }); }, IR::GreaterThan => { - let b = self.data_stack.pop().unwrap(); - let a = self.data_stack.pop().unwrap(); + let b = self.ds_pop()?; + let a = self.ds_pop()?; self.data_stack.push(if a > b { 0 } else { @@ -132,58 +136,58 @@ impl<'a> Interpreter<'a> { }, IR::If => { - if self.data_stack.pop().unwrap() != 0 { + if self.ds_pop()? != 0 { looking_for_endif = true; } }, IR::EndIf => {}, IR::Sys0 => { - let call_num = Sysno::from(self.data_stack.pop().unwrap() as i32); + let call_num = Sysno::from(self.ds_pop()? as i32); self.process_syscall_result(unsafe { syscall!(call_num) }); }, IR::Sys1 => { - let a1 = self.data_stack.pop().unwrap(); - let call_num = Sysno::from(self.data_stack.pop().unwrap() as i32); + let a1 = self.ds_pop()?; + let call_num = Sysno::from(self.ds_pop()? as i32); self.process_syscall_result(unsafe { syscall!(call_num, a1) }); }, IR::Sys2 => { - let a2 = self.data_stack.pop().unwrap(); - let a1 = self.data_stack.pop().unwrap(); - let call_num = Sysno::from(self.data_stack.pop().unwrap() as i32); + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + let call_num = Sysno::from(self.ds_pop()? as i32); self.process_syscall_result(unsafe { syscall!(call_num, a1, a2) }); }, IR::Sys3 => { - let a3 = self.data_stack.pop().unwrap(); - let a2 = self.data_stack.pop().unwrap(); - let a1 = self.data_stack.pop().unwrap(); - let call_num = Sysno::from(self.data_stack.pop().unwrap() as i32); + let a3 = self.ds_pop()?; + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + let call_num = Sysno::from(self.ds_pop()? as i32); self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3) }); }, IR::Sys4 => { - let a4 = self.data_stack.pop().unwrap(); - let a3 = self.data_stack.pop().unwrap(); - let a2 = self.data_stack.pop().unwrap(); - let a1 = self.data_stack.pop().unwrap(); - let call_num = Sysno::from(self.data_stack.pop().unwrap() as i32); + let a4 = self.ds_pop()?; + let a3 = self.ds_pop()?; + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + let call_num = Sysno::from(self.ds_pop()? as i32); self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4) }); }, IR::Sys5 => { - let a5 = self.data_stack.pop().unwrap(); - let a4 = self.data_stack.pop().unwrap(); - let a3 = self.data_stack.pop().unwrap(); - let a2 = self.data_stack.pop().unwrap(); - let a1 = self.data_stack.pop().unwrap(); - let call_num = Sysno::from(self.data_stack.pop().unwrap() as i32); + let a5 = self.ds_pop()?; + let a4 = self.ds_pop()?; + let a3 = self.ds_pop()?; + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + let call_num = Sysno::from(self.ds_pop()? as i32); self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5) }); }, IR::Sys6 => { - let a6 = self.data_stack.pop().unwrap(); - let a5 = self.data_stack.pop().unwrap(); - let a4 = self.data_stack.pop().unwrap(); - let a3 = self.data_stack.pop().unwrap(); - let a2 = self.data_stack.pop().unwrap(); - let a1 = self.data_stack.pop().unwrap(); - let call_num = Sysno::from(self.data_stack.pop().unwrap() as i32); + let a6 = self.ds_pop()?; + let a5 = self.ds_pop()?; + let a4 = self.ds_pop()?; + let a3 = self.ds_pop()?; + let a2 = self.ds_pop()?; + let a1 = self.ds_pop()?; + let call_num = Sysno::from(self.ds_pop()? as i32); self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5, a6) }); }, _ => { diff --git a/hylo-lang/hyloc/Cargo.toml b/hylo-lang/hyloc/Cargo.toml index 6c0b016..04e06cb 100644 --- a/hylo-lang/hyloc/Cargo.toml +++ b/hylo-lang/hyloc/Cargo.toml @@ -6,3 +6,4 @@ edition = "2024" [dependencies] hylo-ir = { workspace = true } hylo-interpret = { workspace = true } +anyhow = "1.0.100" diff --git a/hylo-lang/hyloc/src/ir.rs b/hylo-lang/hyloc/src/ir.rs index 7d0374e..cc2f422 100644 --- a/hylo-lang/hyloc/src/ir.rs +++ b/hylo-lang/hyloc/src/ir.rs @@ -5,34 +5,36 @@ use hylo_ir::*; use std::collections::HashSet; use std::path::PathBuf; +use anyhow::{Result, anyhow}; + macro_rules! push_num { ($num:ident) => { IR::StackPush(*$num as u64) } } -fn import(importer_dir: &PathBuf, specifier: &str, imported: &mut HashSet, is_entrypoint: bool) -> Option { +fn import(importer_dir: &PathBuf, specifier: &str, imported: &mut HashSet, is_entrypoint: bool) -> Result> { let mut path = PathBuf::from(specifier); if path.is_relative() { let mut new_path = importer_dir.clone(); new_path.push(path); - path = new_path.canonicalize().unwrap(); + path = new_path.canonicalize()?; } if imported.contains(&path) { - return None; + return Ok(None); } - let contents = std::fs::read_to_string(&path).unwrap(); + let contents = std::fs::read_to_string(&path)?; - Some(generate_internal(path, &Module::parse(tokenize(&contents), is_entrypoint), imported)) + Ok(Some(generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?, imported))) } -fn collapse_module(mut module_w: ModuleWithImports) -> IRModule { - let mut module = module_w.module.take().unwrap(); +fn collapse_module(mut module_w: ModuleWithImports) -> Result { + let mut module = module_w.module.take().ok_or(anyhow!("no module to collapse"))?; let mut data = std::mem::take(&mut module.data); let mut prev_data_len = data.len(); let mut text = std::mem::take(&mut module.text); - module_w.imports.take().unwrap().into_iter().for_each(|imported| { - let mut ir_mod = collapse_module(imported); + module_w.imports.take().ok_or(anyhow!("no imports to collapse"))?.into_iter().try_for_each(|imported| -> Result<()>{ + let mut ir_mod = collapse_module(imported)?; let mut mod_data = std::mem::take(&mut ir_mod.data); let mod_data_len = mod_data.len(); data.append(&mut mod_data); @@ -47,17 +49,20 @@ fn collapse_module(mut module_w: ModuleWithImports) -> IRModule { text.append(&mut mod_text); prev_data_len += mod_data_len; - }); + + Ok(()) + })?; - IRModule { + Ok(IRModule { data, text, - } + }) } -pub fn compile(path: &str) -> IRModule { +pub fn compile(path: &str) -> Result { let mut imported = HashSet::new(); - let module = import(&std::env::current_dir().unwrap(), path, &mut imported, true).unwrap(); + let dir = std::env::current_dir()?; + let module = import(&dir, path, &mut imported, true)?.expect("somehow we've already imported this"); collapse_module(module) // TODO remove unused words } @@ -111,8 +116,17 @@ fn generate_internal(path: PathBuf, module: &Module, imported: &mut HashSet { if last_was_import { - if let Some(module) = import(&path.parent().unwrap().to_path_buf(), text, imported, false) { - imports.push(module); + if let Some(parent_path) = path.parent() { + match import(&parent_path.to_path_buf(), text, imported, false) { + Ok(module) => { + if let Some(module) = module { + imports.push(module); + } + }, + Err(msg) => { + eprintln!("{}", msg); + } + } } IR::ImportString // This will be elided later } else { diff --git a/hylo-lang/hyloc/src/main.rs b/hylo-lang/hyloc/src/main.rs index aebd197..245a9e0 100644 --- a/hylo-lang/hyloc/src/main.rs +++ b/hylo-lang/hyloc/src/main.rs @@ -4,9 +4,12 @@ mod ir; use hylo_interpret::Interpreter; -fn main() { +use anyhow::Result; + +fn main() -> Result<()> { let filename = std::env::args().nth(1).expect("must provide a file to compile"); - let module = ir::compile(&filename); - let mut interp = Interpreter::new(&module); - interp.run(); + let module = ir::compile(&filename)?; + let mut interp = Interpreter::new(&module)?; + interp.run()?; + Ok(()) } diff --git a/hylo-lang/hyloc/src/parser.rs b/hylo-lang/hyloc/src/parser.rs index 040a03f..5e0ddbd 100644 --- a/hylo-lang/hyloc/src/parser.rs +++ b/hylo-lang/hyloc/src/parser.rs @@ -1,4 +1,5 @@ use crate::tokenizer::Token; +use anyhow::{Result, bail}; #[derive(Debug)] pub struct WordDefinition<'a> { @@ -12,7 +13,7 @@ pub struct Module<'a> { } impl<'a> Module<'a> { - pub fn parse(input: Vec>, is_entrypoint: bool) -> Self { + pub fn parse(input: Vec>, is_entrypoint: bool) -> Result { let mut result = vec![]; let mut main = vec![]; let mut current_word: Option = None; @@ -28,20 +29,24 @@ impl<'a> Module<'a> { about_to_start_word_def = false; continue; } else { - panic!("{:?} is not a valid word name!", token); + bail!("{:?} is not a valid word name!", token); } } else if let Token::Word(word) = token { if word == ":" { if current_word.is_some() { - panic!("can't define words inside word definitions!"); + bail!("can't define words inside word definitions!"); } about_to_start_word_def = true; continue; } if word == ";" { let word = current_word.take(); - result.push(word.unwrap()); - continue; + if let Some(word) = word { + result.push(word); + continue; + } else { + bail!("`;` must be at the end of a word definition"); + } } } if let Some(ref mut current_word) = current_word { @@ -52,7 +57,7 @@ impl<'a> Module<'a> { } if about_to_start_word_def || current_word.is_some() { - panic!("unfinished word definition!"); + bail!("unfinished word definition!"); } if is_entrypoint { @@ -62,7 +67,7 @@ impl<'a> Module<'a> { }); } - Module { words: result } + Ok(Module { words: result }) } #[cfg(test)] @@ -89,7 +94,7 @@ mod tests { : soup chicken 4.5 hello ; hello soup -"), true); +").unwrap(), true).unwrap(); result.debug_print(); } } diff --git a/hylo-lang/hyloc/src/tokenizer.rs b/hylo-lang/hyloc/src/tokenizer.rs index dbc764a..a489f15 100644 --- a/hylo-lang/hyloc/src/tokenizer.rs +++ b/hylo-lang/hyloc/src/tokenizer.rs @@ -1,3 +1,5 @@ +use anyhow::{Result, anyhow}; + #[derive(Debug)] pub enum Token<'a> { Word(&'a str), @@ -15,48 +17,48 @@ pub enum Token<'a> { } impl<'a> Token<'a>{ - fn parse_word_or_num(input: &'a str) -> Token<'a> { + fn parse_word_or_num(input: &'a str) -> Result> { if input == "-" { - return Token::Word(input) + return Ok(Token::Word(input)) } // we're assuming any token starting with `-` with length greater than one // is a negative number - if input.starts_with('-') || input.chars().nth(0).unwrap().is_numeric() { + if input.starts_with('-') || input.chars().nth(0).map(|x| x.is_numeric()).unwrap_or(false) { if input.contains(':') { let mut splat = input.split(':'); - let num = splat.next().unwrap(); - let typ = splat.next().unwrap(); + let num = splat.next().ok_or(anyhow!("no number found"))?; + let typ = splat.next().ok_or(anyhow!("no number type found"))?; match typ { - "u8" => Token::NumU8(num.parse().unwrap()), - "i8" => Token::NumI8(num.parse().unwrap()), - "u16" => Token::NumU16(num.parse().unwrap()), - "i16" => Token::NumI16(num.parse().unwrap()), - "u32" => Token::NumU32(num.parse().unwrap()), - "i32" => Token::NumI32(num.parse().unwrap()), - "u64" => Token::NumU64(num.parse().unwrap()), - "i64" => Token::NumI64(num.parse().unwrap()), - "f32" => Token::NumF32(num.parse().unwrap()), - "f64" => Token::NumF64(num.parse().unwrap()), + "u8" => Ok(Token::NumU8(num.parse()?)), + "i8" => Ok(Token::NumI8(num.parse()?)), + "u16" => Ok(Token::NumU16(num.parse()?)), + "i16" => Ok(Token::NumI16(num.parse()?)), + "u32" => Ok(Token::NumU32(num.parse()?)), + "i32" => Ok(Token::NumI32(num.parse()?)), + "u64" => Ok(Token::NumU64(num.parse()?)), + "i64" => Ok(Token::NumI64(num.parse()?)), + "f32" => Ok(Token::NumF32(num.parse()?)), + "f64" => Ok(Token::NumF64(num.parse()?)), _ => panic!("unknown number type") } } else { if input.contains('.') { - Token:: NumF64(input.parse().unwrap()) + Ok(Token::NumF64(input.parse()?)) } else if input.starts_with('-') { - Token::NumI64(input.parse().unwrap()) + Ok(Token::NumI64(input.parse()?)) } else { - Token::NumU64(input.parse().unwrap()) + Ok(Token::NumU64(input.parse()?)) } } } else { - Token::Word(input) + Ok(Token::Word(input)) } } } // TODO really want an iterator, not a vector -pub fn tokenize<'a>(input: &'a str) -> Vec> { +pub fn tokenize<'a>(input: &'a str) -> Result>> { let mut result = vec![]; let mut string_start: Option = None; let mut word_or_num_start: Option = None; @@ -108,7 +110,7 @@ pub fn tokenize<'a>(input: &'a str) -> Vec> { if token == "(" { in_comment = true; } else { - result.push(Token::parse_word_or_num(&input[start..index])); + result.push(Token::parse_word_or_num(&input[start..index])?); } word_or_num_start = None; } @@ -118,7 +120,7 @@ pub fn tokenize<'a>(input: &'a str) -> Vec> { if index == input.len() - 1 { if !last_is_whitespace && let Some(start) = word_or_num_start { - result.push(Token::parse_word_or_num(&input[start..])); + result.push(Token::parse_word_or_num(&input[start..])?); } continue; } @@ -128,7 +130,7 @@ pub fn tokenize<'a>(input: &'a str) -> Vec> { last_is_whitespace = false; } } - result + Ok(result) } #[cfg(test)] -- 2.43.0