"serde",
"serde_derive",
"serde_yaml",
+ "sorel-tokenizer",
+]
+
+[[package]]
+name = "sorel-parser"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "sorel-tokenizer",
+]
+
+[[package]]
+name = "sorel-tokenizer"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
]
[[package]]
dependencies = [
"anyhow",
"sorel-ir",
+ "sorel-parser",
+ "sorel-tokenizer",
]
[[package]]
[workspace]
resolver = "3"
-members = ["sorel-ir","sorelc"]
-
+members = ["sorel-ir","sorel-parser","sorel-tokenizer","sorelc"]
[workspace.dependencies]
sorel-ir = { path = "./sorel-ir", version = "0.1.0" }
+sorel-tokenizer = { path = "./sorel-tokenizer", version = "0.1.0" }
+sorel-parser = { path = "./sorel-parser", version = "0.1.0" }
edition = "2024"
[dependencies]
+sorel-tokenizer = { workspace = true }
serde = "1.0.228"
serde_derive = "1.0.228"
serde_yaml = "0.9.34"
--- /dev/null
+use serde_derive::{Serialize, Deserialize};
+use sorel_tokenizer::Token;
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub enum IR {
+ Label(String),
+ Call(String),
+ WordPointer(String),
+ CallPtr,
+ Ret,
+ StackPush(u64),
+ StackPushString(String), // refers to string label, not the string itself
+ StringDef(String, String), // first is string label, second is string value
+
+ // These next ones should always be inlined, so they're in IR.
+ Load, // @ ( addr -- x ) -- Fetch memory contents at addr
+ Load8,
+ Load16,
+ Load32,
+ Store, // ! ( x addr -- ) -- Store x at addr
+ Store8,
+ Store16,
+ Store32,
+
+ // These ones might not be inlined, but should be built-in, so a compiler might
+ // turn this into `Call(String)` before translating to assembly/machine-code, but
+ // an IR interpreter may just execute them.
+ AddU64,
+ SubtractU64,
+ MultiplyU64,
+ DivideU64,
+ ModU64,
+ Equals,
+ GreaterThan,
+ LessThan,
+ BitwiseOr,
+ Dup,
+ Swap,
+ Drop,
+ Over,
+ Rot,
+ StackPointer,
+ StackBottom,
+ If,
+ Else,
+ EndIf,
+ Loop,
+ EndLoop,
+
+ // System calls
+ Sys0,
+ Sys1,
+ Sys2,
+ Sys3,
+ Sys4,
+ Sys5,
+ Sys6,
+}
+
+macro_rules! push_num {
+ ($num:ident) => { IR::StackPush(*$num as u64) };
+ ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) };
+}
+
+impl IR {
+ pub fn from_token(token: &Token, data: &mut Vec<IR>) -> IR {
+ match token {
+ Token::Word(word) => {
+ match *word {
+ "@" => IR::Load,
+ "@:8" => IR::Load8,
+ "@:16" => IR::Load16,
+ "@:32" => IR::Load32,
+ "!" => IR::Store,
+ "!:8" => IR::Store8,
+ "!:16" => IR::Store16,
+ "!:32" => IR::Store32,
+ "dup" => IR::Dup,
+ "swap" => IR::Swap,
+ "drop" => IR::Drop,
+ "over" => IR::Over,
+ "rot" => IR::Rot,
+ "sp" => IR::StackPointer,
+ "stackbottom" => IR::StackBottom,
+ "if" => IR::If,
+ "else" => IR::Else,
+ "endif" => IR::EndIf,
+ "loop" => IR::Loop,
+ "endloop" => IR::EndLoop,
+ "call" => IR::CallPtr,
+ "=" => IR::Equals,
+ ">" => IR::GreaterThan,
+ "<" => IR::LessThan,
+ "+" => IR::AddU64,
+ "-" => IR::SubtractU64,
+ "*" => IR::MultiplyU64,
+ "/" => IR::DivideU64,
+ "%" => IR::ModU64,
+ "|" => IR::BitwiseOr,
+ "sys0" => IR::Sys0,
+ "sys1" => IR::Sys1,
+ "sys2" => IR::Sys2,
+ "sys3" => IR::Sys3,
+ "sys4" => IR::Sys4,
+ "sys5" => IR::Sys5,
+ "sys6" => IR::Sys6,
+ // TODO num type specfic math like `+:i32`, etc.
+ _ => {
+ if let Some(actual_word) = word.strip_prefix("'") {
+ IR::WordPointer(String::from(actual_word))
+ } else {
+ IR::Call(String::from(*word))
+ }
+ }
+ }
+ },
+ Token::String(text) => {
+ let string_label = format!("string_{}", data.len());
+ data.push(IR::StringDef(string_label.clone(), String::from(*text)));
+ IR::StackPushString(string_label)
+ },
+ Token::NumU8(num) => push_num!(num),
+ Token::NumI8(num) => push_num!(num, u8),
+ Token::NumU16(num) => push_num!(num),
+ Token::NumI16(num) => push_num!(num, u16),
+ Token::NumU32(num) => push_num!(num),
+ Token::NumI32(num) => push_num!(num, u32),
+ Token::NumU64(num) => push_num!(num),
+ Token::NumI64(num) => push_num!(num),
+ Token::NumF32(num) => push_num!(num),
+ Token::NumF64(num) => push_num!(num),
+ }
+ }
+}
-use serde_yaml::{from_str, to_string, Error};
-use serde_derive::{Serialize, Deserialize};
+mod ir;
+pub use ir::*;
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub enum IR {
- Label(String),
- Call(String),
- WordPointer(String),
- CallPtr,
- Ret,
- StackPush(u64),
- StackPushString(String), // refers to string label, not the string itself
- StringDef(String, String), // first is string label, second is string value
+mod object;
+pub use object::*;
- // These next ones should always be inlined, so they're in IR.
- Load, // @ ( addr -- x ) -- Fetch memory contents at addr
- Load8,
- Load16,
- Load32,
- Store, // ! ( x addr -- ) -- Store x at addr
- Store8,
- Store16,
- Store32,
+mod module;
+pub use module::*;
- // These ones might not be inlined, but should be built-in, so a compiler might
- // turn this into `Call(String)` before translating to assembly/machine-code, but
- // an IR interpreter may just execute them.
- AddU64,
- SubtractU64,
- MultiplyU64,
- DivideU64,
- ModU64,
- Equals,
- GreaterThan,
- LessThan,
- BitwiseOr,
- Dup,
- Swap,
- Drop,
- Over,
- Rot,
- StackPointer,
- StackBottom,
- If,
- Else,
- EndIf,
- Loop,
- EndLoop,
-
- // System calls
- Sys0,
- Sys1,
- Sys2,
- Sys3,
- Sys4,
- Sys5,
- Sys6,
-}
-
-// This is like an .o file.
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IRObject {
- pub text: Vec<IR>,
- pub data: Vec<IR>,
-}
-
-impl IRObject {
- pub fn to_s(&self) -> Result<String, Error> {
- to_string(self)
- }
-
- pub fn from_s(source: &str) -> Result<Self, Error> {
- from_str(source)
- }
-}
--- /dev/null
+use std::rc::Rc;
+use std::cell::RefCell;
+use std::path::PathBuf;
+
+use crate::ir::IR;
+
+pub type WrappedIRModule = Rc<RefCell<IRModule>>;
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum ModuleID {
+ SourceFile(PathBuf),
+ StdSpecifier(String)
+}
+
+impl Default for ModuleID {
+ fn default() -> Self {
+ ModuleID::StdSpecifier(String::from(""))
+ }
+}
+
+impl ModuleID {
+ pub fn to_string(&self) -> String {
+ match self {
+ ModuleID::SourceFile(f) => f.to_string_lossy().to_string(),
+ ModuleID::StdSpecifier(s) => s.clone()
+ }
+ }
+}
+
+#[derive(Debug, Default)]
+pub struct IRModule {
+ pub data: Vec<IR>,
+ pub text: Vec<IR>,
+ pub imports: Vec<WrappedIRModule>,
+ pub exports: Vec<String>,
+ pub externs: Vec<String>,
+ // TODO these next two should form an enum, not two options
+ pub module_id: ModuleID,
+ pub number: usize,
+}
+
+impl IRModule {
+ pub fn get_label_for_call(&self, name: &String) -> String {
+ if self.externs.contains(name) {
+ return name.clone();
+ }
+ let mut found: Option<usize> = None;
+ for imported in &self.imports {
+ let imported = imported.borrow();
+ if imported.exports.contains(name) {
+ found = Some(imported.number);
+ // Don't break here, since the last one should win.
+ }
+ }
+ if let Some(found) = found {
+ format!("_m{}_{}", found, name)
+ } else {
+ // TODO check if it's even a word locally. If not, bail.
+ format!("_m{}_{}", self.number, name)
+ }
+ }
+
+ pub fn get_label(&self, name: &String) -> String {
+ format!("_m{}_{}", self.number, name)
+ }
+}
--- /dev/null
+use serde_yaml::{from_str, to_string, Error};
+use serde_derive::{Serialize, Deserialize};
+use crate::ir::IR;
+
+// This is like an .o file.
+#[derive(Serialize, Deserialize, Debug)]
+pub struct IRObject {
+ pub text: Vec<IR>,
+ pub data: Vec<IR>,
+}
+
+impl IRObject {
+ pub fn to_s(&self) -> Result<String, Error> {
+ to_string(self)
+ }
+
+ pub fn from_s(source: &str) -> Result<Self, Error> {
+ from_str(source)
+ }
+}
--- /dev/null
+[package]
+name = "sorel-parser"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+sorel-tokenizer = { workspace = true }
+anyhow = "1.0.100"
--- /dev/null
+use sorel_tokenizer::Token;
+use anyhow::{Result, bail};
+
+#[derive(Debug)]
+pub struct WordDefinition<'a> {
+ pub name: &'a str,
+ pub instructions: Vec<Token<'a>>,
+}
+
+#[derive(Debug)]
+pub struct Module<'a> {
+ pub words: Vec<WordDefinition<'a>>,
+ pub imports: Vec<&'a str>,
+ pub exports: Vec<&'a str>,
+ pub externs: Vec<&'a str>,
+}
+
+impl<'a> Module<'a> {
+ pub fn parse(input: Vec<Token<'a>>, is_entrypoint: bool) -> Result<Self> {
+ let mut result = vec![];
+ let mut main = vec![];
+ let mut exports = vec![];
+ let mut imports = vec![];
+ let mut externs = vec![];
+ let mut current_word: Option<WordDefinition> = None;
+ let mut about_to_start_word_def = false;
+ let mut last_was_import = false;
+ let mut last_was_export = false;
+ let mut last_was_extern = false;
+
+ for token in input {
+ if about_to_start_word_def {
+ if let Token::Word(name) = token {
+ current_word = Some(WordDefinition {
+ name,
+ instructions: vec![],
+ });
+ about_to_start_word_def = false;
+ continue;
+ } else {
+ bail!("{:?} is not a valid word name!", token);
+ }
+ } else if let Token::Word(word) = token {
+ if word == ":" {
+ if current_word.is_some() {
+ bail!("can't define words inside word definitions!");
+ }
+ about_to_start_word_def = true;
+ continue;
+ }
+ if word == ";" {
+ let word = current_word.take();
+ if let Some(word) = word {
+ result.push(word);
+ continue;
+ } else {
+ bail!("`;` must be at the end of a word definition");
+ }
+ }
+ }
+ if let Some(ref mut current_word) = current_word {
+ current_word.instructions.push(token);
+ } else {
+ match token {
+ Token::Word(word) => {
+ if word == "import" {
+ last_was_import = true;
+ } else if word == "export" {
+ last_was_export = true;
+ } else if word == "extern" {
+ last_was_extern = true;
+ } else if last_was_export {
+ exports.push(word);
+ last_was_export = false;
+ } else if last_was_extern {
+ externs.push(word);
+ last_was_extern = false;
+ } else {
+ main.push(token.clone());
+ }
+ },
+ Token::String(string) => {
+ if last_was_import {
+ imports.push(string);
+ last_was_import = false;
+ } else {
+ main.push(token.clone());
+ }
+ },
+ _ => {
+ main.push(token.clone());
+ }
+ };
+ }
+ }
+
+ if about_to_start_word_def || current_word.is_some() {
+ bail!("unfinished word definition!");
+ }
+
+ if is_entrypoint {
+ result.push(WordDefinition {
+ name: "main",
+ instructions: main,
+ });
+ }
+
+ Ok(Module { words: result, imports, exports, externs })
+ }
+
+ #[cfg(test)]
+ pub fn debug_print(&self) {
+ for word in &self.words {
+ println!("{}", word.name);
+ for instruction in &word.instructions {
+ println!(" {:?}", instruction);
+ }
+ }
+ }
+}
+
+
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn try_some_parsing() {
+ let result = Module::parse(crate::tokenizer::tokenize("
+: hello world 16 \"planet\" ;
+: soup chicken 4.5 hello ;
+
+hello soup
+").unwrap(), true).unwrap();
+ result.debug_print();
+ }
+}
--- /dev/null
+[package]
+name = "sorel-tokenizer"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+anyhow = "1.0.100"
--- /dev/null
+use anyhow::{Result, anyhow};
+
+#[derive(Debug, Clone)]
+pub enum Token<'a> {
+ Word(&'a str),
+ String(&'a str),
+ NumU8(u8),
+ NumI8(i8),
+ NumU16(u16),
+ NumI16(i16),
+ NumU32(u32),
+ NumI32(i32),
+ NumU64(u64),
+ NumI64(i64),
+ NumF32(f32),
+ NumF64(f64),
+}
+
+impl<'a> Token<'a>{
+ fn parse_word_or_num(input: &'a str) -> Result<Token<'a>> {
+ if input == "-" {
+ return Ok(Token::Word(input))
+ }
+
+ // we're assuming any token starting with `-` with length greater than one
+ // is a negative number
+ if input.starts_with('-') || input.chars().nth(0).map(|x| x.is_numeric()).unwrap_or(false) {
+ if input.contains(':') {
+ let mut splat = input.split(':');
+ let num = splat.next().ok_or(anyhow!("no number found"))?;
+ let typ = splat.next().ok_or(anyhow!("no number type found"))?;
+ match typ {
+ "u8" => Ok(Token::NumU8(num.parse()?)),
+ "i8" => Ok(Token::NumI8(num.parse()?)),
+ "u16" => Ok(Token::NumU16(num.parse()?)),
+ "i16" => Ok(Token::NumI16(num.parse()?)),
+ "u32" => Ok(Token::NumU32(num.parse()?)),
+ "i32" => Ok(Token::NumI32(num.parse()?)),
+ "u64" => Ok(Token::NumU64(num.parse()?)),
+ "i64" => Ok(Token::NumI64(num.parse()?)),
+ "f32" => Ok(Token::NumF32(num.parse()?)),
+ "f64" => Ok(Token::NumF64(num.parse()?)),
+ _ => panic!("unknown number type")
+ }
+ } else if input.contains('.') {
+ Ok(Token::NumF64(input.parse()?))
+ } else if input.starts_with('-') {
+ Ok(Token::NumI64(input.parse()?))
+ } else {
+ Ok(Token::NumU64(input.parse()?))
+ }
+ } else {
+ Ok(Token::Word(input))
+ }
+ }
+}
+
+// TODO really want an iterator, not a vector
+pub fn tokenize<'a>(input: &'a str) -> Result<Vec<Token<'a>>> {
+ let mut result = vec![];
+ let mut string_start: Option<usize> = None;
+ let mut word_or_num_start: Option<usize> = None;
+ let mut last_is_backslash = false;
+ let mut last_is_whitespace = true;
+ let mut in_doc_comment = false;
+ let mut in_line_comment = false;
+ let mut index = 0;
+ let mut first_char = true;
+
+
+ for char in input.chars() {
+ if first_char {
+ first_char = false;
+ } else {
+ index += 1;
+ }
+
+ if in_doc_comment {
+ if char == ')' {
+ in_doc_comment = false;
+ last_is_whitespace = true; // not really true, but means don't need space after
+ }
+ continue;
+ }
+
+ if in_line_comment {
+ word_or_num_start = None;
+ if char == '\n' {
+ in_line_comment = false;
+ last_is_whitespace = true; // not really true, but means don't need space after
+ }
+ continue;
+ }
+
+ if char == '"' {
+ if let Some(start) = string_start {
+ if !last_is_backslash {
+ result.push(Token::String(&input[start..index]));
+ string_start = None;
+ }
+ } else {
+ string_start = Some(index + 1)
+ }
+ last_is_backslash = false;
+ last_is_whitespace = false;
+ continue;
+ }
+
+
+ if string_start.is_some() {
+ last_is_backslash = char == '\\';
+ continue;
+ }
+
+ if char.is_whitespace() {
+ if last_is_backslash {
+ in_line_comment = true;
+ } else if !last_is_whitespace && let Some(start) = word_or_num_start {
+ let token = &input[start..index];
+ if token == "(" {
+ in_doc_comment = true;
+ } else {
+ result.push(Token::parse_word_or_num(&input[start..index])?);
+ }
+ word_or_num_start = None;
+ }
+ last_is_whitespace = true;
+ last_is_backslash = false;
+ continue;
+ }
+
+ last_is_backslash = char == '\\';
+
+ if index == input.len() - 1 {
+ if !last_is_whitespace && let Some(start) = word_or_num_start {
+ result.push(Token::parse_word_or_num(&input[start..])?);
+ }
+ continue;
+ }
+
+ if last_is_whitespace { // start of word or num (we already handled strings)
+ word_or_num_start = Some(index);
+ last_is_whitespace = false;
+ }
+ }
+ Ok(result)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn try_some_tokenizing() {
+ let result = tokenize("
+
+ \\ soup
+ 2 3.4 - -88 bacon \"hello\" 43:f32 2345:u32 -57:i8 soup
+");
+ println!("result: {:?}", result);
+ }
+
+ #[test]
+ fn comments() {
+ let result = tokenize("
+ (
+ foo
+ bar
+ )
+ : baz ( x y -- z )
+ chicken
+ soup
+ ;
+ ");
+ println!("result: {:?}", result);
+ }
+
+ #[test]
+ fn strings() {
+ let result = tokenize("
+dup \\ ( stuff )
+\"hello!\"
+");
+ println!("result: {:?}", result);
+ }
+}
[dependencies]
sorel-ir = { workspace = true }
+sorel-tokenizer = { workspace = true }
+sorel-parser = { workspace = true }
anyhow = "1.0.100"
-use crate::parser::Module;
-use crate::tokenizer::{Token, tokenize};
-
+use sorel_parser::Module;
use sorel_ir::*;
+use sorel_tokenizer::tokenize;
use std::collections::{HashSet, HashMap};
use std::path::PathBuf;
use anyhow::{Result, bail, anyhow};
-macro_rules! push_num {
- ($num:ident) => { IR::StackPush(*$num as u64) };
- ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) };
-}
-
-type WrappedIRModule = Rc<RefCell<IRModule>>;
-
-#[derive(Debug, PartialEq, Clone)]
-enum ModuleID {
- SourceFile(PathBuf),
- StdSpecifier(String)
-}
-
-impl Default for ModuleID {
- fn default() -> Self {
- ModuleID::StdSpecifier(String::from(""))
- }
-}
-
-impl ToString for ModuleID {
- fn to_string(&self) -> String {
- match self {
- ModuleID::SourceFile(f) => f.to_string_lossy().to_string(),
- ModuleID::StdSpecifier(s) => s.clone()
- }
- }
-}
-
-#[derive(Debug, Default)]
-struct IRModule {
- data: Vec<IR>,
- text: Vec<IR>,
- imports: Vec<WrappedIRModule>,
- exports: Vec<String>,
- externs: Vec<String>,
- // TODO these next two should form an enum, not two options
- module_id: ModuleID,
- number: usize,
-}
-
-impl IRModule {
- fn get_label_for_call(&self, name: &String) -> String {
- if self.externs.contains(name) {
- return name.clone();
- }
- let mut found: Option<usize> = None;
- for imported in &self.imports {
- let imported = imported.borrow();
- if imported.exports.contains(name) {
- found = Some(imported.number);
- // Don't break here, since the last one should win.
- }
- }
- if let Some(found) = found {
- format!("_m{}_{}", found, name)
- } else {
- // TODO check if it's even a word locally. If not, bail.
- format!("_m{}_{}", self.number, name)
- }
- }
-
- fn get_label(&self, name: &String) -> String {
- format!("_m{}_{}", self.number, name)
- }
-}
-
#[derive(Default)]
pub(crate) struct ImportTree {
data: Vec<IR>,
let externs = module.externs.iter().map(|s| s.to_string()).collect();
- text.push(module.words.iter().map(|def| {
+ text.push(module.words.iter().flat_map(|def| {
let mut body = def.instructions.iter().map(|inst| {
- let mapped_ir = match inst {
- Token::Word(word) => {
- match *word {
- "@" => IR::Load,
- "@:8" => IR::Load8,
- "@:16" => IR::Load16,
- "@:32" => IR::Load32,
- "!" => IR::Store,
- "!:8" => IR::Store8,
- "!:16" => IR::Store16,
- "!:32" => IR::Store32,
- "dup" => IR::Dup,
- "swap" => IR::Swap,
- "drop" => IR::Drop,
- "over" => IR::Over,
- "rot" => IR::Rot,
- "sp" => IR::StackPointer,
- "stackbottom" => IR::StackBottom,
- "if" => IR::If,
- "else" => IR::Else,
- "endif" => IR::EndIf,
- "loop" => IR::Loop,
- "endloop" => IR::EndLoop,
- "call" => IR::CallPtr,
- "=" => IR::Equals,
- ">" => IR::GreaterThan,
- "<" => IR::LessThan,
- "+" => IR::AddU64,
- "-" => IR::SubtractU64,
- "*" => IR::MultiplyU64,
- "/" => IR::DivideU64,
- "%" => IR::ModU64,
- "|" => IR::BitwiseOr,
- "sys0" => IR::Sys0,
- "sys1" => IR::Sys1,
- "sys2" => IR::Sys2,
- "sys3" => IR::Sys3,
- "sys4" => IR::Sys4,
- "sys5" => IR::Sys5,
- "sys6" => IR::Sys6,
- // TODO num type specfic math like `+:i32`, etc.
- _ => {
- if word.starts_with("'") {
- let actual_word = &word[1..];
- IR::WordPointer(String::from(actual_word))
- } else {
- IR::Call(String::from(*word))
- }
- }
- }
- },
- Token::String(text) => {
- let string_label = format!("string_{}", data.len());
- data.push(IR::StringDef(string_label.clone(), String::from(*text)));
- IR::StackPushString(string_label)
- },
- Token::NumU8(num) => push_num!(num),
- Token::NumI8(num) => push_num!(num, u8),
- Token::NumU16(num) => push_num!(num),
- Token::NumI16(num) => push_num!(num, u16),
- Token::NumU32(num) => push_num!(num),
- Token::NumI32(num) => push_num!(num, u32),
- Token::NumU64(num) => push_num!(num),
- Token::NumI64(num) => push_num!(num),
- Token::NumF32(num) => push_num!(num),
- Token::NumF64(num) => push_num!(num),
- };
- mapped_ir
+ IR::from_token(inst, &mut data)
}).collect::<Vec<_>>();
let mut result = vec![IR::Label(def.name.to_string())];
result.append(&mut body);
result.push(IR::Ret);
result
- }).flatten().collect::<Vec<_>>());
+ }).collect::<Vec<_>>());
let number = self.module_count;
self.module_count += 1;
-mod tokenizer;
-mod parser;
mod ir;
mod riscv_asm_codegen;
let mut asm_path = PathBuf::from(filename);
asm_path.set_extension("asm");
let mut output = File::create(asm_path)?;
- write!(output, "{}\n", generator.assembly()?)?;
+ writeln!(output, "{}", generator.assembly()?)?;
Ok(())
}
+++ /dev/null
-use crate::tokenizer::Token;
-use anyhow::{Result, bail};
-
-#[derive(Debug)]
-pub struct WordDefinition<'a> {
- pub name: &'a str,
- pub instructions: Vec<Token<'a>>,
-}
-
-#[derive(Debug)]
-pub struct Module<'a> {
- pub words: Vec<WordDefinition<'a>>,
- pub imports: Vec<&'a str>,
- pub exports: Vec<&'a str>,
- pub externs: Vec<&'a str>,
-}
-
-impl<'a> Module<'a> {
- pub fn parse(input: Vec<Token<'a>>, is_entrypoint: bool) -> Result<Self> {
- let mut result = vec![];
- let mut main = vec![];
- let mut exports = vec![];
- let mut imports = vec![];
- let mut externs = vec![];
- let mut current_word: Option<WordDefinition> = None;
- let mut about_to_start_word_def = false;
- let mut last_was_import = false;
- let mut last_was_export = false;
- let mut last_was_extern = false;
-
- for token in input {
- if about_to_start_word_def {
- if let Token::Word(name) = token {
- current_word = Some(WordDefinition {
- name,
- instructions: vec![],
- });
- about_to_start_word_def = false;
- continue;
- } else {
- bail!("{:?} is not a valid word name!", token);
- }
- } else if let Token::Word(word) = token {
- if word == ":" {
- if current_word.is_some() {
- bail!("can't define words inside word definitions!");
- }
- about_to_start_word_def = true;
- continue;
- }
- if word == ";" {
- let word = current_word.take();
- if let Some(word) = word {
- result.push(word);
- continue;
- } else {
- bail!("`;` must be at the end of a word definition");
- }
- }
- }
- if let Some(ref mut current_word) = current_word {
- current_word.instructions.push(token);
- } else {
- match token {
- Token::Word(word) => {
- if word == "import" {
- last_was_import = true;
- } else if word == "export" {
- last_was_export = true;
- } else if word == "extern" {
- last_was_extern = true;
- } else {
- if last_was_export {
- exports.push(word);
- last_was_export = false;
- } else if last_was_extern {
- externs.push(word);
- last_was_extern = false;
- } else {
- main.push(token.clone());
- }
- }
- },
- Token::String(string) => {
- if last_was_import {
- imports.push(string);
- last_was_import = false;
- } else {
- main.push(token.clone());
- }
- },
- _ => {
- main.push(token.clone());
- }
- };
- }
- }
-
- if about_to_start_word_def || current_word.is_some() {
- bail!("unfinished word definition!");
- }
-
- if is_entrypoint {
- result.push(WordDefinition {
- name: "main",
- instructions: main,
- });
- }
-
- Ok(Module { words: result, imports, exports, externs })
- }
-
- #[cfg(test)]
- pub fn debug_print(&self) {
- for word in &self.words {
- println!("{}", word.name);
- for instruction in &word.instructions {
- println!(" {:?}", instruction);
- }
- }
- }
-}
-
-
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn try_some_parsing() {
- let result = Module::parse(crate::tokenizer::tokenize("
-: hello world 16 \"planet\" ;
-: soup chicken 4.5 hello ;
-
-hello soup
-").unwrap(), true).unwrap();
- result.debug_print();
- }
-}
},
IR::Else => {
self.label("# else");
- let if_counter = if_stack.last().unwrap().clone();
+ let if_counter = *if_stack.last().unwrap();
self.line(format!("j _endif_{}", if_counter));
self.label(format!("_else_{}:", if_counter));
seen_else.insert(if_counter);
IR::EndIf => {
self.label("# endif");
let stack = &mut if_stack;
- let if_counter = stack.last().unwrap().clone();
+ let if_counter = *stack.last().unwrap();
if !seen_else.contains(&if_counter) {
self.label(format!("_else_{}:", if_counter));
} else {
},
IR::EndLoop => {
let stack = &mut loop_stack;
- let loop_counter = stack.last().unwrap().clone();
+ let loop_counter = *stack.last().unwrap();
self.line(format!("j _loop_{}", loop_counter));
self.label(format!("_endloop_{}:", loop_counter));
stack.pop();
+++ /dev/null
-use anyhow::{Result, anyhow};
-
-#[derive(Debug, Clone)]
-pub enum Token<'a> {
- Word(&'a str),
- String(&'a str),
- NumU8(u8),
- NumI8(i8),
- NumU16(u16),
- NumI16(i16),
- NumU32(u32),
- NumI32(i32),
- NumU64(u64),
- NumI64(i64),
- NumF32(f32),
- NumF64(f64),
-}
-
-impl<'a> Token<'a>{
- fn parse_word_or_num(input: &'a str) -> Result<Token<'a>> {
- if input == "-" {
- return Ok(Token::Word(input))
- }
-
- // we're assuming any token starting with `-` with length greater than one
- // is a negative number
- if input.starts_with('-') || input.chars().nth(0).map(|x| x.is_numeric()).unwrap_or(false) {
- if input.contains(':') {
- let mut splat = input.split(':');
- let num = splat.next().ok_or(anyhow!("no number found"))?;
- let typ = splat.next().ok_or(anyhow!("no number type found"))?;
- match typ {
- "u8" => Ok(Token::NumU8(num.parse()?)),
- "i8" => Ok(Token::NumI8(num.parse()?)),
- "u16" => Ok(Token::NumU16(num.parse()?)),
- "i16" => Ok(Token::NumI16(num.parse()?)),
- "u32" => Ok(Token::NumU32(num.parse()?)),
- "i32" => Ok(Token::NumI32(num.parse()?)),
- "u64" => Ok(Token::NumU64(num.parse()?)),
- "i64" => Ok(Token::NumI64(num.parse()?)),
- "f32" => Ok(Token::NumF32(num.parse()?)),
- "f64" => Ok(Token::NumF64(num.parse()?)),
- _ => panic!("unknown number type")
- }
- } else {
- if input.contains('.') {
- Ok(Token::NumF64(input.parse()?))
- } else if input.starts_with('-') {
- Ok(Token::NumI64(input.parse()?))
- } else {
- Ok(Token::NumU64(input.parse()?))
- }
- }
- } else {
- Ok(Token::Word(input))
- }
- }
-}
-
-// TODO really want an iterator, not a vector
-pub fn tokenize<'a>(input: &'a str) -> Result<Vec<Token<'a>>> {
- let mut result = vec![];
- let mut string_start: Option<usize> = None;
- let mut word_or_num_start: Option<usize> = None;
- let mut last_is_backslash = false;
- let mut last_is_whitespace = true;
- let mut in_doc_comment = false;
- let mut in_line_comment = false;
- let mut index = 0;
- let mut first_char = true;
-
-
- for char in input.chars() {
- if first_char {
- first_char = false;
- } else {
- index += 1;
- }
-
- if in_doc_comment {
- if char == ')' {
- in_doc_comment = false;
- last_is_whitespace = true; // not really true, but means don't need space after
- }
- continue;
- }
-
- if in_line_comment {
- word_or_num_start = None;
- if char == '\n' {
- in_line_comment = false;
- last_is_whitespace = true; // not really true, but means don't need space after
- }
- continue;
- }
-
- if char == '"' {
- if let Some(start) = string_start {
- if !last_is_backslash {
- result.push(Token::String(&input[start..index]));
- string_start = None;
- }
- } else {
- string_start = Some(index + 1)
- }
- last_is_backslash = false;
- last_is_whitespace = false;
- continue;
- }
-
-
- if string_start.is_some() {
- last_is_backslash = char == '\\';
- continue;
- }
-
- if char.is_whitespace() {
- if last_is_backslash {
- in_line_comment = true;
- } else if !last_is_whitespace && let Some(start) = word_or_num_start {
- let token = &input[start..index];
- if token == "(" {
- in_doc_comment = true;
- } else {
- result.push(Token::parse_word_or_num(&input[start..index])?);
- }
- word_or_num_start = None;
- }
- last_is_whitespace = true;
- last_is_backslash = false;
- continue;
- }
-
- last_is_backslash = char == '\\';
-
- if index == input.len() - 1 {
- if !last_is_whitespace && let Some(start) = word_or_num_start {
- result.push(Token::parse_word_or_num(&input[start..])?);
- }
- continue;
- }
-
- if last_is_whitespace { // start of word or num (we already handled strings)
- word_or_num_start = Some(index);
- last_is_whitespace = false;
- }
- }
- Ok(result)
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn try_some_tokenizing() {
- let result = tokenize("
-
- \\ soup
- 2 3.4 - -88 bacon \"hello\" 43:f32 2345:u32 -57:i8 soup
-");
- println!("result: {:?}", result);
- }
-
- #[test]
- fn comments() {
- let result = tokenize("
- (
- foo
- bar
- )
- : baz ( x y -- z )
- chicken
- soup
- ;
- ");
- println!("result: {:?}", result);
- }
-
- #[test]
- fn strings() {
- let result = tokenize("
-dup \\ ( stuff )
-\"hello!\"
-");
- println!("result: {:?}", result);
- }
-}