]> rethought.computer Git - sorel-lang.git/commitdiff
working interpreter for hylo keep/288fa6be44bc383d1ff3d59a46587e96d1c63fc0
authorBryan English <bryan@rethought.computer>
Sat, 13 Dec 2025 05:08:39 +0000 (00:08 -0500)
committerBryan English <bryan@rethought.computer>
Tue, 10 Feb 2026 04:08:54 +0000 (04:08 +0000)
16 files changed:
hylo-lang/.gitignore [new file with mode: 0644]
hylo-lang/Cargo.lock [new file with mode: 0644]
hylo-lang/Cargo.toml [new file with mode: 0644]
hylo-lang/README.md [new file with mode: 0644]
hylo-lang/examples/fib.hylo [new file with mode: 0644]
hylo-lang/flake.lock [new file with mode: 0644]
hylo-lang/flake.nix [new file with mode: 0644]
hylo-lang/hylo-interpret/Cargo.toml [new file with mode: 0644]
hylo-lang/hylo-interpret/src/lib.rs [new file with mode: 0644]
hylo-lang/hylo-ir/Cargo.toml [new file with mode: 0644]
hylo-lang/hylo-ir/src/lib.rs [new file with mode: 0644]
hylo-lang/hyloc/Cargo.toml [new file with mode: 0644]
hylo-lang/hyloc/src/ir.rs [new file with mode: 0644]
hylo-lang/hyloc/src/main.rs [new file with mode: 0644]
hylo-lang/hyloc/src/parser.rs [new file with mode: 0644]
hylo-lang/hyloc/src/tokenizer.rs [new file with mode: 0644]

diff --git a/hylo-lang/.gitignore b/hylo-lang/.gitignore
new file mode 100644 (file)
index 0000000..eb5a316
--- /dev/null
@@ -0,0 +1 @@
+target
diff --git a/hylo-lang/Cargo.lock b/hylo-lang/Cargo.lock
new file mode 100644 (file)
index 0000000..75faf25
--- /dev/null
@@ -0,0 +1,144 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "hylo-interpret"
+version = "0.1.0"
+dependencies = [
+ "hylo-ir",
+]
+
+[[package]]
+name = "hylo-ir"
+version = "0.1.0"
+dependencies = [
+ "serde",
+ "serde_derive",
+ "serde_yaml",
+]
+
+[[package]]
+name = "hyloc"
+version = "0.1.0"
+dependencies = [
+ "hylo-interpret",
+ "hylo-ir",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.9.34+deprecated"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.111"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+
+[[package]]
+name = "unsafe-libyaml"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
diff --git a/hylo-lang/Cargo.toml b/hylo-lang/Cargo.toml
new file mode 100644 (file)
index 0000000..9fc6f82
--- /dev/null
@@ -0,0 +1,9 @@
+[workspace]
+
+resolver = "3"
+members = ["hylo-ir","hyloc", "hylo-interpret"]
+
+
+[workspace.dependencies]
+hylo-ir = { path = "./hylo-ir", version = "0.1.0" }
+hylo-interpret = { path = "./hylo-interpret", version = "0.1.0" }
diff --git a/hylo-lang/README.md b/hylo-lang/README.md
new file mode 100644 (file)
index 0000000..f971508
--- /dev/null
@@ -0,0 +1,3 @@
+# hylo-lang
+
+The name means "it's high-level and low-level at the same time".
diff --git a/hylo-lang/examples/fib.hylo b/hylo-lang/examples/fib.hylo
new file mode 100644 (file)
index 0000000..e59b34e
--- /dev/null
@@ -0,0 +1,20 @@
+
+: fib 
+    dup 1 > if
+        dup 1 - fib
+        swap 2 - fib
+        +
+    endif
+;
+
+0 fib putn
+1 fib putn
+2 fib putn
+3 fib putn
+4 fib putn
+5 fib putn
+6 fib putn
+7 fib putn
+8 fib putn
+9 fib putn
+10 fib putn
diff --git a/hylo-lang/flake.lock b/hylo-lang/flake.lock
new file mode 100644 (file)
index 0000000..1384c7f
--- /dev/null
@@ -0,0 +1,27 @@
+{
+  "nodes": {
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1765425892,
+        "narHash": "sha256-jlQpSkg2sK6IJVzTQBDyRxQZgKADC2HKMRfGCSgNMHo=",
+        "owner": "nixos",
+        "repo": "nixpkgs",
+        "rev": "5d6bdbddb4695a62f0d00a3620b37a15275a5093",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nixos",
+        "ref": "nixpkgs-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "nixpkgs": "nixpkgs"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/hylo-lang/flake.nix b/hylo-lang/flake.nix
new file mode 100644 (file)
index 0000000..e198852
--- /dev/null
@@ -0,0 +1,17 @@
+{
+  description = "uxn11";
+
+  inputs = {
+    nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
+  };
+
+  outputs = {nixpkgs, ...}: let
+    system = "x86_64-linux";
+    pkgs = import nixpkgs { inherit system; };
+  in {
+    devShells.${system}.default = pkgs.mkShell {
+      packages = [
+      ];
+    };
+  };
+}
diff --git a/hylo-lang/hylo-interpret/Cargo.toml b/hylo-lang/hylo-interpret/Cargo.toml
new file mode 100644 (file)
index 0000000..cc703fa
--- /dev/null
@@ -0,0 +1,7 @@
+[package]
+name = "hylo-interpret"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+hylo-ir = { workspace = true }
diff --git a/hylo-lang/hylo-interpret/src/lib.rs b/hylo-lang/hylo-interpret/src/lib.rs
new file mode 100644 (file)
index 0000000..aaca00f
--- /dev/null
@@ -0,0 +1,133 @@
+use hylo_ir::*;
+
+use std::collections::HashMap;
+
+pub struct Interpreter<'a> {
+    module: &'a IRModule,
+    data_stack: Vec<u64>,
+    instruction_pointer: usize,
+    return_stack: Vec<usize>,
+    labels: HashMap<String, usize>,
+    strings: Vec<String>,
+}
+
+impl<'a> Interpreter<'a> {
+    pub fn new(ir_mod: &'a IRModule) -> Self {
+        let mut index = 0;
+        let mut labels = HashMap::new();
+        for token in ir_mod.text.iter() {
+            if let IR::Label(name) = token {
+                labels.insert(name.clone(), index);
+            }
+            index += 1;
+        }
+        let instruction_pointer = *labels.get("main").unwrap();
+
+        let strings = ir_mod.data.iter().filter_map(|s| {
+            match s {
+                IR::StringDef(s) => {
+                    Some(s.clone())
+                },
+                _ => None
+            }
+        }).collect();
+
+        Self {
+            module: ir_mod,
+            data_stack: vec![],
+            instruction_pointer,
+            return_stack: vec![],
+            labels,
+            strings
+        }
+    }
+
+    pub fn run(&mut self) {
+        let mut looking_for_endif = false;
+        loop {
+            if looking_for_endif {
+                match &self.module.text[self.instruction_pointer] {
+                    IR::EndIf => {
+                        looking_for_endif = false;
+                    },
+                    IR::Else => {
+                        looking_for_endif = false;
+                    }
+                    _ => {}
+                }
+
+            } else {
+                match &self.module.text[self.instruction_pointer] {
+                    IR::Label(_) => {},
+                    IR::Call(name) => {
+                        self.return_stack.push(self.instruction_pointer);
+                        self.instruction_pointer = *self.labels.get(name).unwrap();
+                    },
+                    IR::Ret => {
+                        if self.return_stack.len() == 0 {
+                            break;
+                        }
+                        self.instruction_pointer = self.return_stack.pop().unwrap();
+                    },
+                    IR::StackPush(num) => {
+                        self.data_stack.push(*num);
+                    },
+                    IR::AddU64 => {
+                        let a = self.data_stack.pop().unwrap();
+                        let b = self.data_stack.pop().unwrap();
+                        self.data_stack.push(a + b);
+                    },
+                    IR::SubtractU64 => {
+                        let b = self.data_stack.pop().unwrap();
+                        let a = self.data_stack.pop().unwrap();
+                        self.data_stack.push(a - b);
+                    },
+                    IR::PutN => {
+                        println!("{}", self.data_stack.last().unwrap());
+                    },
+                    IR::Dup => {
+                        self.data_stack.push(*self.data_stack.last().unwrap());
+                    },
+                    IR::Swap => {
+                        let a = self.data_stack.pop().unwrap();
+                        let b = self.data_stack.pop().unwrap();
+                        self.data_stack.push(a);
+                        self.data_stack.push(b);
+                    },
+                    IR::Drop => {
+                        self.data_stack.pop();
+                    },
+                    IR::Equals => {
+                        let a = self.data_stack.pop().unwrap();
+                        let b = self.data_stack.pop().unwrap();
+                        self.data_stack.push(if a == b {
+                            0
+                        } else {
+                            -1 as i64 as u64
+                        });
+                    },
+                    IR::GreaterThan => {
+                        let b = self.data_stack.pop().unwrap();
+                        let a = self.data_stack.pop().unwrap();
+                        self.data_stack.push(if a > b {
+                            0
+                        } else {
+                            -1 as i64 as u64
+                        });
+
+                    },
+                    IR::If => {
+                        if self.data_stack.pop().unwrap() != 0 {
+                            looking_for_endif = true;
+                        }
+                    },
+                    IR::EndIf => {},
+                    _ => {
+                        println!("Instruction not implemented.");
+                    }
+                }
+            }
+            self.instruction_pointer += 1;
+        }
+    }
+}
diff --git a/hylo-lang/hylo-ir/Cargo.toml b/hylo-lang/hylo-ir/Cargo.toml
new file mode 100644 (file)
index 0000000..2f31ca3
--- /dev/null
@@ -0,0 +1,9 @@
+[package]
+name = "hylo-ir"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+serde = "1.0.228"
+serde_derive = "1.0.228"
+serde_yaml = "0.9.34"
diff --git a/hylo-lang/hylo-ir/src/lib.rs b/hylo-lang/hylo-ir/src/lib.rs
new file mode 100644 (file)
index 0000000..be8608a
--- /dev/null
@@ -0,0 +1,51 @@
+use serde_yaml::{from_str, to_string, Error};
+use serde_derive::{Serialize, Deserialize};
+
+#[derive(Serialize, Deserialize, Debug)]
+pub enum IR {
+    Label(String),
+    Call(String),
+    Ret,
+    StackPush(u64),
+    StackPushString(usize),
+    StringDef(String),
+
+    // These next ones should always be inlined, so they're in IR.
+    Load, // @ ( addr -- x ) -- Fetch memory contents at addr
+    Store, // ! ( x addr -- ) -- Store x at addr
+
+    // These ones might not be inlined, but should be built-in, so a compiler might
+    // turn this into `Call(String)` before translating to assembly/machine-code, but
+    // an IR interpreter may just execute them.
+    AddU64,
+    SubtractU64,
+    MultiplyU64,
+    DivideU64,
+    Equals,
+    GreaterThan,
+    Dup,
+    Swap,
+    Drop,
+    Over,
+    PutS,
+    PutN,
+    If,
+    Else,
+    EndIf,
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+pub struct IRModule {
+    pub text: Vec<IR>,
+    pub data: Vec<IR>,
+}
+
+impl IRModule {
+    pub fn to_s(&self) -> Result<String, Error> {
+        to_string(self)
+    }
+
+    pub fn from_s(source: &str) -> Result<Self, Error> {
+        from_str(source)
+    }
+}
diff --git a/hylo-lang/hyloc/Cargo.toml b/hylo-lang/hyloc/Cargo.toml
new file mode 100644 (file)
index 0000000..6c0b016
--- /dev/null
@@ -0,0 +1,8 @@
+[package]
+name = "hyloc"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+hylo-ir = { workspace = true }
+hylo-interpret = { workspace = true }
diff --git a/hylo-lang/hyloc/src/ir.rs b/hylo-lang/hyloc/src/ir.rs
new file mode 100644 (file)
index 0000000..fa1a895
--- /dev/null
@@ -0,0 +1,67 @@
+use crate::parser::Module;
+use crate::tokenizer::Token;
+use hylo_ir::*;
+
+macro_rules! push_num {
+    ($num:ident) => { IR::StackPush(*$num as u64) }
+}
+
+pub fn generate(module: &Module) -> IRModule {
+    // Eventually these will end up being sections in assembly
+    let mut text = vec![];
+    let mut data = vec![];
+
+    text.push(module.words.iter().map(|def| {
+        let mut body = def.instructions.iter().map(|inst| {
+            match inst {
+                Token::Word(word) => {
+                    match *word {
+                        "@" => IR::Load,
+                        "!" => IR::Store,
+                        "dup" => IR::Dup,
+                        "swap" => IR::Swap,
+                        "drop" => IR::Drop,
+                        "over" => IR::Over,
+                        "puts" => IR::PutS,
+                        "putn" => IR::PutN,
+                        "if" => IR::If,
+                        "endif" => IR::EndIf,
+                        "=" => IR::Equals,
+                        ">" => IR::GreaterThan,
+                        "+" => IR::AddU64,
+                        "-" => IR::SubtractU64,
+                        "*" => IR::MultiplyU64,
+                        "/" => IR::DivideU64,
+                        // TODO num type specfic math like `+:i32`, etc.
+                        _ =>  IR::Call(String::from(*word))
+                    }
+                },
+                Token::String(text) => {
+                    data.push(IR::StringDef(String::from(*text)));
+                    IR::StackPushString(data.len() - 1)
+                },
+                Token::NumU8(num) => push_num!(num),
+                Token::NumI8(num) => push_num!(num),
+                Token::NumU16(num) => push_num!(num),
+                Token::NumI16(num) => push_num!(num),
+                Token::NumU32(num) => push_num!(num),
+                Token::NumI32(num) => push_num!(num),
+                Token::NumU64(num) => push_num!(num),
+                Token::NumI64(num) => push_num!(num),
+                Token::NumF32(num) => push_num!(num),
+                Token::NumF64(num) => push_num!(num),
+            }
+        }).collect::<Vec<_>>();
+
+        let mut result = vec![IR::Label(def.name.to_string())];
+        result.append(&mut body);
+        result.push(IR::Ret);
+        result
+    }).flatten().collect::<Vec<_>>());
+
+    
+    IRModule {
+        text: text.into_iter().flatten().collect::<Vec<_>>(),
+        data
+    }
+}
diff --git a/hylo-lang/hyloc/src/main.rs b/hylo-lang/hyloc/src/main.rs
new file mode 100644 (file)
index 0000000..518e198
--- /dev/null
@@ -0,0 +1,18 @@
+mod tokenizer;
+mod parser;
+mod ir;
+
+use hylo_ir::IRModule;
+use hylo_interpret::Interpreter;
+
+fn compile(source: &str) -> IRModule {
+    ir::generate(&parser::Module::parse(tokenizer::tokenize(source)))
+}
+
+fn main() {
+    let filename = std::env::args().nth(1).expect("must provide a file to compile");
+    let contents = std::fs::read_to_string(&filename).unwrap();
+    let module = compile(&contents);
+    let mut interp = Interpreter::new(&module);
+    interp.run();
+}
diff --git a/hylo-lang/hyloc/src/parser.rs b/hylo-lang/hyloc/src/parser.rs
new file mode 100644 (file)
index 0000000..43d2ca1
--- /dev/null
@@ -0,0 +1,92 @@
+use crate::tokenizer::Token;
+
+#[derive(Debug)]
+pub struct WordDefinition<'a> {
+    pub name: &'a str,
+    pub instructions: Vec<Token<'a>>,
+}
+
+#[derive(Debug)]
+pub struct Module<'a> {
+    pub words: Vec<WordDefinition<'a>>
+}
+
+impl<'a> Module<'a> {
+    pub fn parse(input: Vec<Token<'a>>) -> Self {
+        let mut result = vec![];
+        let mut main = vec![];
+        let mut current_word: Option<WordDefinition> = None;
+        let mut about_to_start_word_def = false;
+
+        for token in input {
+            if about_to_start_word_def {
+                if let Token::Word(name) = token {
+                    current_word = Some(WordDefinition {
+                        name,
+                        instructions: vec![],
+                    });
+                    about_to_start_word_def = false;
+                    continue;
+                } else {
+                    panic!("{:?} is not a valid word name!", token);
+                }
+            } else if let Token::Word(word) = token {
+                if word == ":" {
+                    if current_word.is_some() {
+                        panic!("can't define words inside word definitions!");
+                    }
+                    about_to_start_word_def = true;
+                    continue;
+                }
+                if word == ";" {
+                    let word = current_word.take();
+                    result.push(word.unwrap());
+                    continue;
+                }
+            }
+            if let Some(ref mut current_word) = current_word {
+                current_word.instructions.push(token);
+            } else {
+                main.push(token);
+            }
+        }
+
+        if about_to_start_word_def || current_word.is_some() {
+            panic!("unfinished word definition!");
+        }
+
+        result.push(WordDefinition {
+            name: "main",
+            instructions: main,
+        });
+
+        Module { words: result }
+    }
+
+    pub fn debug_print(&self) {
+        for word in &self.words {
+            println!("{}", word.name);
+            for instruction in &word.instructions {
+                println!("    {:?}", instruction);
+            }
+        }
+    }
+}
+
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn try_some_parsing() {
+        let result = Module::parse(crate::tokenizer::tokenize("
+: hello world 16 \"planet\" ;
+: soup chicken 4.5 hello ;
+
+hello soup
+"));
+        result.debug_print();
+    }
+}
diff --git a/hylo-lang/hyloc/src/tokenizer.rs b/hylo-lang/hyloc/src/tokenizer.rs
new file mode 100644 (file)
index 0000000..b2e79b0
--- /dev/null
@@ -0,0 +1,145 @@
+#[derive(Debug)]
+pub enum Token<'a> {
+    Word(&'a str),
+    String(&'a str),
+    NumU8(u8),
+    NumI8(i8),
+    NumU16(u16),
+    NumI16(i16),
+    NumU32(u32),
+    NumI32(i32),
+    NumU64(u64),
+    NumI64(i64),
+    NumF32(f32),
+    NumF64(f64),
+}
+
+impl<'a> Token<'a>{
+    fn parse_word_or_num(input: &'a str) -> Token<'a> {
+        if input == "-" {
+            return Token::Word(input)
+        }
+        
+        // we're assuming any token starting with `-` with length greater than one
+        // is a negative number
+        if input.starts_with('-') || input.chars().nth(0).unwrap().is_numeric() {
+            if input.contains(':') {
+                let mut splat = input.split(':');
+                let num = splat.next().unwrap();
+                let typ = splat.next().unwrap();
+                match typ {
+                    "u8" => Token::NumU8(num.parse().unwrap()),
+                    "i8" => Token::NumI8(num.parse().unwrap()),
+                    "u16" => Token::NumU16(num.parse().unwrap()),
+                    "i16" => Token::NumI16(num.parse().unwrap()),
+                    "u32" => Token::NumU32(num.parse().unwrap()),
+                    "i32" => Token::NumI32(num.parse().unwrap()),
+                    "u64" => Token::NumU64(num.parse().unwrap()),
+                    "i64" => Token::NumI64(num.parse().unwrap()),
+                    "f32" => Token::NumF32(num.parse().unwrap()),
+                    "f64" => Token::NumF64(num.parse().unwrap()),
+                    _ => panic!("unknown number type")
+                }
+            } else {
+                if input.contains('.') {
+                    Token:: NumF64(input.parse().unwrap())
+                } else if input.starts_with('-') {
+                    Token::NumI64(input.parse().unwrap())
+                } else {
+                    Token::NumU64(input.parse().unwrap())
+                }
+            }
+        } else {
+            Token::Word(input)
+        }
+    }
+}
+
+// TODO really want an iterator, not a vector
+pub fn tokenize<'a>(input: &'a str) -> Vec<Token<'a>> {
+    let mut result = vec![];
+    let mut string_start: Option<usize> = None;
+    let mut word_or_num_start: Option<usize> = None;
+    let mut last_is_escape = false;
+    let mut last_is_whitespace = false;
+    let mut in_comment = false;
+    let mut index = 0;
+    let mut first_char = true;
+
+
+    for char in input.chars() {
+        if first_char {
+            first_char = false;
+        } else {
+            index += 1;
+        }
+        
+        if in_comment {
+            if char == ')' || char == '\n' {
+                in_comment = false;
+                last_is_whitespace = true; // not really true, but means don't need space after
+            }
+            continue;
+        }
+
+        if char == '"' {
+            if let Some(start) = string_start {
+                if !last_is_escape {
+                    result.push(Token::String(&input[start..index]));
+                    string_start = None;
+                }
+            } else {
+                string_start = Some(index + 1)                
+            }
+            last_is_escape = false;
+            last_is_whitespace = false;
+            continue;
+        }
+
+        last_is_escape = char == '\\';
+
+        if string_start.is_some() {
+            continue;
+        }
+
+        if char.is_whitespace() {
+            if !last_is_whitespace && let Some(start) = word_or_num_start {
+                let token = &input[start..index];
+                if token == "(" {
+                    in_comment = true;
+                } else {
+                    result.push(Token::parse_word_or_num(&input[start..index]));
+                }
+                word_or_num_start = None;
+            }
+            last_is_whitespace = true;
+            continue;
+        }
+
+        if index == input.len() - 1 {
+            if !last_is_whitespace && let Some(start) = word_or_num_start {
+                result.push(Token::parse_word_or_num(&input[start..]));
+            }
+            continue;
+        }
+
+        if last_is_whitespace { // start of word or num (we already handled strings)
+            word_or_num_start = Some(index);
+            last_is_whitespace = false;
+        }
+    }
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn try_some_tokenizing() {
+        let result = tokenize("
+            2 3.4 - -88 bacon \"hello\" 43:f32 2345:u32 -57:i8 soup
+");
+        println!("result: {:?}", result);
+    }
+}