From: Bryan English Date: Sat, 13 Dec 2025 05:08:39 +0000 (-0500) Subject: working interpreter for hylo X-Git-Url: https://rethought.computer/gitweb//gitweb//git?a=commitdiff_plain;h=288fa6be44bc383d1ff3d59a46587e96d1c63fc0;p=sorel-lang.git working interpreter for hylo --- 288fa6be44bc383d1ff3d59a46587e96d1c63fc0 diff --git a/hylo-lang/.gitignore b/hylo-lang/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/hylo-lang/.gitignore @@ -0,0 +1 @@ +target diff --git a/hylo-lang/Cargo.lock b/hylo-lang/Cargo.lock new file mode 100644 index 0000000..75faf25 --- /dev/null +++ b/hylo-lang/Cargo.lock @@ -0,0 +1,144 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "hylo-interpret" +version = "0.1.0" +dependencies = [ + "hylo-ir", +] + +[[package]] +name = "hylo-ir" +version = "0.1.0" +dependencies = [ + "serde", + "serde_derive", + "serde_yaml", +] + +[[package]] +name = "hyloc" +version = "0.1.0" +dependencies = [ + "hylo-interpret", + "hylo-ir", +] + +[[package]] +name = "indexmap" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" diff --git a/hylo-lang/Cargo.toml b/hylo-lang/Cargo.toml new file mode 100644 index 0000000..9fc6f82 --- /dev/null +++ b/hylo-lang/Cargo.toml @@ -0,0 +1,9 @@ +[workspace] + +resolver = "3" +members = ["hylo-ir","hyloc", "hylo-interpret"] + + +[workspace.dependencies] +hylo-ir = { path = "./hylo-ir", version = "0.1.0" } +hylo-interpret = { path = "./hylo-interpret", version = "0.1.0" } diff --git a/hylo-lang/README.md b/hylo-lang/README.md new file mode 100644 index 0000000..f971508 --- /dev/null +++ b/hylo-lang/README.md @@ -0,0 +1,3 @@ +# hylo-lang + +The name means "it's high-level and low-level at the same time". diff --git a/hylo-lang/examples/fib.hylo b/hylo-lang/examples/fib.hylo new file mode 100644 index 0000000..e59b34e --- /dev/null +++ b/hylo-lang/examples/fib.hylo @@ -0,0 +1,20 @@ + +: fib + dup 1 > if + dup 1 - fib + swap 2 - fib + + + endif +; + +0 fib putn +1 fib putn +2 fib putn +3 fib putn +4 fib putn +5 fib putn +6 fib putn +7 fib putn +8 fib putn +9 fib putn +10 fib putn diff --git a/hylo-lang/flake.lock b/hylo-lang/flake.lock new file mode 100644 index 0000000..1384c7f --- /dev/null +++ b/hylo-lang/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1765425892, + "narHash": "sha256-jlQpSkg2sK6IJVzTQBDyRxQZgKADC2HKMRfGCSgNMHo=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "5d6bdbddb4695a62f0d00a3620b37a15275a5093", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/hylo-lang/flake.nix b/hylo-lang/flake.nix new file mode 100644 index 0000000..e198852 --- /dev/null +++ b/hylo-lang/flake.nix @@ -0,0 +1,17 @@ +{ + description = "uxn11"; + + inputs = { + nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable"; + }; + + outputs = {nixpkgs, ...}: let + system = "x86_64-linux"; + pkgs = import nixpkgs { inherit system; }; + in { + devShells.${system}.default = pkgs.mkShell { + packages = [ + ]; + }; + }; +} diff --git a/hylo-lang/hylo-interpret/Cargo.toml b/hylo-lang/hylo-interpret/Cargo.toml new file mode 100644 index 0000000..cc703fa --- /dev/null +++ b/hylo-lang/hylo-interpret/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "hylo-interpret" +version = "0.1.0" +edition = "2024" + +[dependencies] +hylo-ir = { workspace = true } diff --git a/hylo-lang/hylo-interpret/src/lib.rs b/hylo-lang/hylo-interpret/src/lib.rs new file mode 100644 index 0000000..aaca00f --- /dev/null +++ b/hylo-lang/hylo-interpret/src/lib.rs @@ -0,0 +1,133 @@ +use hylo_ir::*; + +use std::collections::HashMap; + +pub struct Interpreter<'a> { + module: &'a IRModule, + data_stack: Vec, + instruction_pointer: usize, + return_stack: Vec, + labels: HashMap, + strings: Vec, +} + +impl<'a> Interpreter<'a> { + pub fn new(ir_mod: &'a IRModule) -> Self { + let mut index = 0; + let mut labels = HashMap::new(); + for token in ir_mod.text.iter() { + if let IR::Label(name) = token { + labels.insert(name.clone(), index); + } + index += 1; + } + let instruction_pointer = *labels.get("main").unwrap(); + + let strings = ir_mod.data.iter().filter_map(|s| { + match s { + IR::StringDef(s) => { + Some(s.clone()) + }, + _ => None + } + }).collect(); + + Self { + module: ir_mod, + data_stack: vec![], + instruction_pointer, + return_stack: vec![], + labels, + strings + } + } + + pub fn run(&mut self) { + let mut looking_for_endif = false; + loop { + if looking_for_endif { + match &self.module.text[self.instruction_pointer] { + IR::EndIf => { + looking_for_endif = false; + }, + IR::Else => { + looking_for_endif = false; + } + _ => {} + } + + } else { + match &self.module.text[self.instruction_pointer] { + IR::Label(_) => {}, + IR::Call(name) => { + self.return_stack.push(self.instruction_pointer); + self.instruction_pointer = *self.labels.get(name).unwrap(); + }, + IR::Ret => { + if self.return_stack.len() == 0 { + break; + } + self.instruction_pointer = self.return_stack.pop().unwrap(); + }, + IR::StackPush(num) => { + self.data_stack.push(*num); + }, + IR::AddU64 => { + let a = self.data_stack.pop().unwrap(); + let b = self.data_stack.pop().unwrap(); + self.data_stack.push(a + b); + }, + IR::SubtractU64 => { + let b = self.data_stack.pop().unwrap(); + let a = self.data_stack.pop().unwrap(); + self.data_stack.push(a - b); + }, + IR::PutN => { + println!("{}", self.data_stack.last().unwrap()); + }, + IR::Dup => { + self.data_stack.push(*self.data_stack.last().unwrap()); + }, + IR::Swap => { + let a = self.data_stack.pop().unwrap(); + let b = self.data_stack.pop().unwrap(); + self.data_stack.push(a); + self.data_stack.push(b); + }, + IR::Drop => { + self.data_stack.pop(); + }, + IR::Equals => { + let a = self.data_stack.pop().unwrap(); + let b = self.data_stack.pop().unwrap(); + self.data_stack.push(if a == b { + 0 + } else { + -1 as i64 as u64 + }); + }, + IR::GreaterThan => { + let b = self.data_stack.pop().unwrap(); + let a = self.data_stack.pop().unwrap(); + self.data_stack.push(if a > b { + 0 + } else { + -1 as i64 as u64 + }); + + }, + IR::If => { + if self.data_stack.pop().unwrap() != 0 { + looking_for_endif = true; + } + }, + IR::EndIf => {}, + _ => { + println!("Instruction not implemented."); + } + } + } + self.instruction_pointer += 1; + } + } +} diff --git a/hylo-lang/hylo-ir/Cargo.toml b/hylo-lang/hylo-ir/Cargo.toml new file mode 100644 index 0000000..2f31ca3 --- /dev/null +++ b/hylo-lang/hylo-ir/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "hylo-ir" +version = "0.1.0" +edition = "2024" + +[dependencies] +serde = "1.0.228" +serde_derive = "1.0.228" +serde_yaml = "0.9.34" diff --git a/hylo-lang/hylo-ir/src/lib.rs b/hylo-lang/hylo-ir/src/lib.rs new file mode 100644 index 0000000..be8608a --- /dev/null +++ b/hylo-lang/hylo-ir/src/lib.rs @@ -0,0 +1,51 @@ +use serde_yaml::{from_str, to_string, Error}; +use serde_derive::{Serialize, Deserialize}; + +#[derive(Serialize, Deserialize, Debug)] +pub enum IR { + Label(String), + Call(String), + Ret, + StackPush(u64), + StackPushString(usize), + StringDef(String), + + // These next ones should always be inlined, so they're in IR. + Load, // @ ( addr -- x ) -- Fetch memory contents at addr + Store, // ! ( x addr -- ) -- Store x at addr + + // These ones might not be inlined, but should be built-in, so a compiler might + // turn this into `Call(String)` before translating to assembly/machine-code, but + // an IR interpreter may just execute them. + AddU64, + SubtractU64, + MultiplyU64, + DivideU64, + Equals, + GreaterThan, + Dup, + Swap, + Drop, + Over, + PutS, + PutN, + If, + Else, + EndIf, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct IRModule { + pub text: Vec, + pub data: Vec, +} + +impl IRModule { + pub fn to_s(&self) -> Result { + to_string(self) + } + + pub fn from_s(source: &str) -> Result { + from_str(source) + } +} diff --git a/hylo-lang/hyloc/Cargo.toml b/hylo-lang/hyloc/Cargo.toml new file mode 100644 index 0000000..6c0b016 --- /dev/null +++ b/hylo-lang/hyloc/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "hyloc" +version = "0.1.0" +edition = "2024" + +[dependencies] +hylo-ir = { workspace = true } +hylo-interpret = { workspace = true } diff --git a/hylo-lang/hyloc/src/ir.rs b/hylo-lang/hyloc/src/ir.rs new file mode 100644 index 0000000..fa1a895 --- /dev/null +++ b/hylo-lang/hyloc/src/ir.rs @@ -0,0 +1,67 @@ +use crate::parser::Module; +use crate::tokenizer::Token; +use hylo_ir::*; + +macro_rules! push_num { + ($num:ident) => { IR::StackPush(*$num as u64) } +} + +pub fn generate(module: &Module) -> IRModule { + // Eventually these will end up being sections in assembly + let mut text = vec![]; + let mut data = vec![]; + + text.push(module.words.iter().map(|def| { + let mut body = def.instructions.iter().map(|inst| { + match inst { + Token::Word(word) => { + match *word { + "@" => IR::Load, + "!" => IR::Store, + "dup" => IR::Dup, + "swap" => IR::Swap, + "drop" => IR::Drop, + "over" => IR::Over, + "puts" => IR::PutS, + "putn" => IR::PutN, + "if" => IR::If, + "endif" => IR::EndIf, + "=" => IR::Equals, + ">" => IR::GreaterThan, + "+" => IR::AddU64, + "-" => IR::SubtractU64, + "*" => IR::MultiplyU64, + "/" => IR::DivideU64, + // TODO num type specfic math like `+:i32`, etc. + _ => IR::Call(String::from(*word)) + } + }, + Token::String(text) => { + data.push(IR::StringDef(String::from(*text))); + IR::StackPushString(data.len() - 1) + }, + Token::NumU8(num) => push_num!(num), + Token::NumI8(num) => push_num!(num), + Token::NumU16(num) => push_num!(num), + Token::NumI16(num) => push_num!(num), + Token::NumU32(num) => push_num!(num), + Token::NumI32(num) => push_num!(num), + Token::NumU64(num) => push_num!(num), + Token::NumI64(num) => push_num!(num), + Token::NumF32(num) => push_num!(num), + Token::NumF64(num) => push_num!(num), + } + }).collect::>(); + + let mut result = vec![IR::Label(def.name.to_string())]; + result.append(&mut body); + result.push(IR::Ret); + result + }).flatten().collect::>()); + + + IRModule { + text: text.into_iter().flatten().collect::>(), + data + } +} diff --git a/hylo-lang/hyloc/src/main.rs b/hylo-lang/hyloc/src/main.rs new file mode 100644 index 0000000..518e198 --- /dev/null +++ b/hylo-lang/hyloc/src/main.rs @@ -0,0 +1,18 @@ +mod tokenizer; +mod parser; +mod ir; + +use hylo_ir::IRModule; +use hylo_interpret::Interpreter; + +fn compile(source: &str) -> IRModule { + ir::generate(&parser::Module::parse(tokenizer::tokenize(source))) +} + +fn main() { + let filename = std::env::args().nth(1).expect("must provide a file to compile"); + let contents = std::fs::read_to_string(&filename).unwrap(); + let module = compile(&contents); + let mut interp = Interpreter::new(&module); + interp.run(); +} diff --git a/hylo-lang/hyloc/src/parser.rs b/hylo-lang/hyloc/src/parser.rs new file mode 100644 index 0000000..43d2ca1 --- /dev/null +++ b/hylo-lang/hyloc/src/parser.rs @@ -0,0 +1,92 @@ +use crate::tokenizer::Token; + +#[derive(Debug)] +pub struct WordDefinition<'a> { + pub name: &'a str, + pub instructions: Vec>, +} + +#[derive(Debug)] +pub struct Module<'a> { + pub words: Vec> +} + +impl<'a> Module<'a> { + pub fn parse(input: Vec>) -> Self { + let mut result = vec![]; + let mut main = vec![]; + let mut current_word: Option = None; + let mut about_to_start_word_def = false; + + for token in input { + if about_to_start_word_def { + if let Token::Word(name) = token { + current_word = Some(WordDefinition { + name, + instructions: vec![], + }); + about_to_start_word_def = false; + continue; + } else { + panic!("{:?} is not a valid word name!", token); + } + } else if let Token::Word(word) = token { + if word == ":" { + if current_word.is_some() { + panic!("can't define words inside word definitions!"); + } + about_to_start_word_def = true; + continue; + } + if word == ";" { + let word = current_word.take(); + result.push(word.unwrap()); + continue; + } + } + if let Some(ref mut current_word) = current_word { + current_word.instructions.push(token); + } else { + main.push(token); + } + } + + if about_to_start_word_def || current_word.is_some() { + panic!("unfinished word definition!"); + } + + result.push(WordDefinition { + name: "main", + instructions: main, + }); + + Module { words: result } + } + + pub fn debug_print(&self) { + for word in &self.words { + println!("{}", word.name); + for instruction in &word.instructions { + println!(" {:?}", instruction); + } + } + } +} + + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn try_some_parsing() { + let result = Module::parse(crate::tokenizer::tokenize(" +: hello world 16 \"planet\" ; +: soup chicken 4.5 hello ; + +hello soup +")); + result.debug_print(); + } +} diff --git a/hylo-lang/hyloc/src/tokenizer.rs b/hylo-lang/hyloc/src/tokenizer.rs new file mode 100644 index 0000000..b2e79b0 --- /dev/null +++ b/hylo-lang/hyloc/src/tokenizer.rs @@ -0,0 +1,145 @@ +#[derive(Debug)] +pub enum Token<'a> { + Word(&'a str), + String(&'a str), + NumU8(u8), + NumI8(i8), + NumU16(u16), + NumI16(i16), + NumU32(u32), + NumI32(i32), + NumU64(u64), + NumI64(i64), + NumF32(f32), + NumF64(f64), +} + +impl<'a> Token<'a>{ + fn parse_word_or_num(input: &'a str) -> Token<'a> { + if input == "-" { + return Token::Word(input) + } + + // we're assuming any token starting with `-` with length greater than one + // is a negative number + if input.starts_with('-') || input.chars().nth(0).unwrap().is_numeric() { + if input.contains(':') { + let mut splat = input.split(':'); + let num = splat.next().unwrap(); + let typ = splat.next().unwrap(); + match typ { + "u8" => Token::NumU8(num.parse().unwrap()), + "i8" => Token::NumI8(num.parse().unwrap()), + "u16" => Token::NumU16(num.parse().unwrap()), + "i16" => Token::NumI16(num.parse().unwrap()), + "u32" => Token::NumU32(num.parse().unwrap()), + "i32" => Token::NumI32(num.parse().unwrap()), + "u64" => Token::NumU64(num.parse().unwrap()), + "i64" => Token::NumI64(num.parse().unwrap()), + "f32" => Token::NumF32(num.parse().unwrap()), + "f64" => Token::NumF64(num.parse().unwrap()), + _ => panic!("unknown number type") + } + } else { + if input.contains('.') { + Token:: NumF64(input.parse().unwrap()) + } else if input.starts_with('-') { + Token::NumI64(input.parse().unwrap()) + } else { + Token::NumU64(input.parse().unwrap()) + } + } + } else { + Token::Word(input) + } + } +} + +// TODO really want an iterator, not a vector +pub fn tokenize<'a>(input: &'a str) -> Vec> { + let mut result = vec![]; + let mut string_start: Option = None; + let mut word_or_num_start: Option = None; + let mut last_is_escape = false; + let mut last_is_whitespace = false; + let mut in_comment = false; + let mut index = 0; + let mut first_char = true; + + + for char in input.chars() { + if first_char { + first_char = false; + } else { + index += 1; + } + + if in_comment { + if char == ')' || char == '\n' { + in_comment = false; + last_is_whitespace = true; // not really true, but means don't need space after + } + continue; + } + + if char == '"' { + if let Some(start) = string_start { + if !last_is_escape { + result.push(Token::String(&input[start..index])); + string_start = None; + } + } else { + string_start = Some(index + 1) + } + last_is_escape = false; + last_is_whitespace = false; + continue; + } + + last_is_escape = char == '\\'; + + if string_start.is_some() { + continue; + } + + if char.is_whitespace() { + if !last_is_whitespace && let Some(start) = word_or_num_start { + let token = &input[start..index]; + if token == "(" { + in_comment = true; + } else { + result.push(Token::parse_word_or_num(&input[start..index])); + } + word_or_num_start = None; + } + last_is_whitespace = true; + continue; + } + + if index == input.len() - 1 { + if !last_is_whitespace && let Some(start) = word_or_num_start { + result.push(Token::parse_word_or_num(&input[start..])); + } + continue; + } + + if last_is_whitespace { // start of word or num (we already handled strings) + word_or_num_start = Some(index); + last_is_whitespace = false; + } + } + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn try_some_tokenizing() { + let result = tokenize(" + 2 3.4 - -88 bacon \"hello\" 43:f32 2345:u32 -57:i8 soup +"); + println!("result: {:?}", result); + } +}