+++ /dev/null
-target
-qemu/machine
-*.o
-*.asm
+++ /dev/null
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 4
-
-[[package]]
-name = "anyhow"
-version = "1.0.100"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
-
-[[package]]
-name = "equivalent"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
-
-[[package]]
-name = "hashbrown"
-version = "0.16.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
-
-[[package]]
-name = "hylo-interpret"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "hylo-ir",
- "syscalls",
-]
-
-[[package]]
-name = "hylo-ir"
-version = "0.1.0"
-dependencies = [
- "serde",
- "serde_derive",
- "serde_yaml",
-]
-
-[[package]]
-name = "hyloc"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "hylo-interpret",
- "hylo-ir",
-]
-
-[[package]]
-name = "indexmap"
-version = "2.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
-dependencies = [
- "equivalent",
- "hashbrown",
-]
-
-[[package]]
-name = "itoa"
-version = "1.0.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.103"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.42"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "ryu"
-version = "1.0.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
-
-[[package]]
-name = "serde"
-version = "1.0.228"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
-dependencies = [
- "serde_core",
- "serde_derive",
-]
-
-[[package]]
-name = "serde_core"
-version = "1.0.228"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
-dependencies = [
- "serde_derive",
-]
-
-[[package]]
-name = "serde_derive"
-version = "1.0.228"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "serde_repr"
-version = "0.1.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "serde_yaml"
-version = "0.9.34+deprecated"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
-dependencies = [
- "indexmap",
- "itoa",
- "ryu",
- "serde",
- "unsafe-libyaml",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.111"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "syscalls"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90db46b5b4962319605d435986c775ea45a0ad2561c09e1d5372b89afeb49cf4"
-dependencies = [
- "serde",
- "serde_repr",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.22"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
-
-[[package]]
-name = "unsafe-libyaml"
-version = "0.2.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
+++ /dev/null
-[workspace]
-
-resolver = "3"
-members = ["hylo-ir","hyloc", "hylo-interpret"]
-
-
-[workspace.dependencies]
-hylo-ir = { path = "./hylo-ir", version = "0.1.0" }
-hylo-interpret = { path = "./hylo-interpret", version = "0.1.0" }
+++ /dev/null
-# hylo-lang
-
-The name means "it's high-level and low-level at the same time".
-
-
-## TODO
-
-* [x] Imports
-* [x] Syscalls
-* [ ] Structs
-* [ ] many, many more things
+++ /dev/null
-: mmap 9 sys6 ;
-
-: PROT_READ 1 ;
-: PROT_WRITE 2 ;
-: MAP_PRIVATE 2 ;
-: MAP_ANONYMOUS 32 ;
-
-: ALLOC_PROT PROT_READ PROT_WRITE | ;
-: ALLOC_MAP MAP_PRIVATE MAP_ANONYMOUS | ;
-
-: alloc 0 swap ALLOC_PROT ALLOC_MAP -1:i16 0 mmap ;
-
-1024 alloc
-putn
-swap
-putn
+++ /dev/null
-import "./put2.hylo"
-
-: fib
- dup 1 > if
- dup 1 - fib
- swap 2 - fib
- +
- endif
-;
-
-0 fib putn
-1 fib putn
-2 fib putn
-3 fib putn
-4 fib putn
-5 fib putn
-6 fib putn
-7 fib putn
-8 fib putn
-9 fib putn
-10 fib putn
-
-5 fib 6 fib put2
+++ /dev/null
-: put2 putn putn ;
-
-: foobar dup dup ;
-
-export put2
+++ /dev/null
-: getpid
- 39 sys0
- drop
-;
-
-getpid putn
+++ /dev/null
-../target/debug/hyloc fib.hylo > fib.asm
-riscv64-unknown-linux-gnu-as -o fib.o fib.asm
-riscv64-unknown-linux-gnu-cc -O1 -no-pie -o test fib.o putn.c -nostartfiles
-./test
+++ /dev/null
-\ foo bar
-: fib
- dup
- 1
- >
- if
- dup
- 1 - fib
- swap
- 2
- -
- fib
- +
- endif
-;
-
-0 fib putn drop
-1 fib putn drop
-2 fib putn drop
-3 fib putn drop
-4 fib putn drop
-5 fib putn drop
-6 fib putn drop
-7 fib putn drop
+++ /dev/null
-#include <stdio.h>
-
-extern unsigned long data_stack_end;
-register unsigned long * stack_pointer asm("s2");
-
-void putn() {
- unsigned long * stack_index = &data_stack_end;
- printf("stack: ");
- while (stack_index != stack_pointer) {
- printf("%ld ", *stack_index);
- stack_index -= 1;
- }
- printf("%ld\n", *stack_pointer);
-}
-
-
+++ /dev/null
-{
- "nodes": {
- "nixpkgs": {
- "locked": {
- "lastModified": 1765425892,
- "narHash": "sha256-jlQpSkg2sK6IJVzTQBDyRxQZgKADC2HKMRfGCSgNMHo=",
- "owner": "nixos",
- "repo": "nixpkgs",
- "rev": "5d6bdbddb4695a62f0d00a3620b37a15275a5093",
- "type": "github"
- },
- "original": {
- "owner": "nixos",
- "ref": "nixpkgs-unstable",
- "repo": "nixpkgs",
- "type": "github"
- }
- },
- "root": {
- "inputs": {
- "nixpkgs": "nixpkgs"
- }
- }
- },
- "root": "root",
- "version": 7
-}
+++ /dev/null
-{
- description = "uxn11";
-
- inputs = {
- nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
- };
-
- outputs = {nixpkgs, ...}: let
- system = "x86_64-linux";
- pkgs = import nixpkgs {
- # uncomment the next bit to install cross-compiler toolchain
-
- # crossSystem = {
- # config = "riscv64-unknown-linux-gnu";
- # # Or if you want to build against MUSL:
- # # config = "riscv64-unknown-linux-musl";
- # };
- inherit system;
- };
- in {
- devShells.${system}.default = pkgs.mkShell {
- packages = [
- pkgs.qemu
- ];
- };
- };
-}
+++ /dev/null
-[package]
-name = "hylo-interpret"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-anyhow = "1.0.100"
-hylo-ir = { workspace = true }
-syscalls = "0.7.0"
+++ /dev/null
-use hylo_ir::*;
-
-use std::collections::HashMap;
-
-use syscalls::*;
-use anyhow::{Result, anyhow};
-
-pub struct Interpreter<'a> {
- module: &'a IRObject,
- data_stack: Vec<u64>,
- instruction_pointer: usize,
- return_stack: Vec<usize>,
- labels: HashMap<String, usize>,
- strings: HashMap<String, String>,
-}
-
-impl<'a> Interpreter<'a> {
- pub fn new(ir_mod: &'a IRObject) -> Result<Self> {
- let mut index = 0;
- let mut labels = HashMap::new();
- for token in ir_mod.text.iter() {
- if let IR::Label(name) = token {
- labels.insert(name.clone(), index);
- }
- index += 1;
- }
- let instruction_pointer = *labels.get("main").ok_or(anyhow!("no main word found!"))?;
-
- let mut strings = HashMap::new();
- ir_mod.data.iter().for_each(|s| {
- match s {
- IR::StringDef(label, string) => {
- strings.insert(label.clone(), string.clone());
- },
- _ => {}
- }
- });
-
- Ok(Self {
- module: ir_mod,
- data_stack: vec![],
- instruction_pointer,
- return_stack: vec![],
- labels,
- strings
- })
- }
-
- fn process_syscall_result(&mut self, result: Result<usize, Errno>) {
- match result {
- Ok(result) => {
- self.data_stack.push(result as u64);
- self.data_stack.push(0);
- }
- Err(err) => {
- self.data_stack.push(0);
- self.data_stack.push(err.into_raw() as u64);
- }
- }
- }
-
- fn ds_pop(&mut self) -> Result<u64> {
- self.data_stack.pop().ok_or(anyhow!("popping from empty data stack"))
- }
-
- pub fn run(&mut self) -> Result<()> {
- let mut looking_for_endif = false;
- loop {
- if looking_for_endif {
- match &self.module.text[self.instruction_pointer] {
- IR::EndIf => {
- looking_for_endif = false;
- },
- IR::Else => {
- looking_for_endif = false;
- }
- _ => {}
- }
- } else {
- match &self.module.text[self.instruction_pointer] {
- IR::Label(_) => {},
- IR::Call(name) => {
- self.return_stack.push(self.instruction_pointer);
- self.instruction_pointer = *self.labels.get(name).ok_or(anyhow!("calling undefined word `{}`", name))?;
- },
- IR::Ret => {
- if self.return_stack.len() == 0 {
- return Ok(());
- }
- self.instruction_pointer = self.return_stack.pop().ok_or(anyhow!("returning from top level"))?;
- },
- IR::StackPush(num) => {
- self.data_stack.push(*num);
- },
- IR::AddU64 => {
- let a = self.ds_pop()?;
- let b = self.ds_pop()?;
- self.data_stack.push(a + b);
- },
- IR::SubtractU64 => {
- let b = self.ds_pop()?;
- let a = self.ds_pop()?;
- self.data_stack.push(a - b);
- },
- IR::PutN => {
- println!("{}", self.data_stack.last().ok_or(anyhow!("empty data stack"))?);
- },
- IR::Dup => {
- self.data_stack.push(*self.data_stack.last().ok_or(anyhow!("empty data stack"))?);
- },
- IR::Swap => {
- let a = self.ds_pop()?;
- let b = self.ds_pop()?;
- self.data_stack.push(a);
- self.data_stack.push(b);
- },
- IR::Drop => {
- self.data_stack.pop();
- },
- IR::Equals => {
- let a = self.ds_pop()?;
- let b = self.ds_pop()?;
- self.data_stack.push(if a == b {
- 0
- } else {
- -1 as i64 as u64
- });
- },
- IR::GreaterThan => {
- let b = self.ds_pop()?;
- let a = self.ds_pop()?;
- self.data_stack.push(if a > b {
- 0
- } else {
- -1 as i64 as u64
- });
-
- },
- IR::BitwiseOr => {
- let b = self.ds_pop()?;
- let a = self.ds_pop()?;
- self.data_stack.push(a | b);
- }
- IR::If => {
- if self.ds_pop()? != 0 {
- looking_for_endif = true;
- }
- },
- IR::EndIf => {},
- IR::Sys0 => {
- let call_num = Sysno::from(self.ds_pop()? as i32);
- self.process_syscall_result(unsafe { syscall!(call_num) });
- },
- IR::Sys1 => {
- let call_num = Sysno::from(self.ds_pop()? as i32);
- let a1 = self.ds_pop()?;
- self.process_syscall_result(unsafe { syscall!(call_num, a1) });
- },
- IR::Sys2 => {
- let call_num = Sysno::from(self.ds_pop()? as i32);
- let a2 = self.ds_pop()?;
- let a1 = self.ds_pop()?;
- self.process_syscall_result(unsafe { syscall!(call_num, a1, a2) });
- },
- IR::Sys3 => {
- let call_num = Sysno::from(self.ds_pop()? as i32);
- let a3 = self.ds_pop()?;
- let a2 = self.ds_pop()?;
- let a1 = self.ds_pop()?;
- self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3) });
- },
- IR::Sys4 => {
- let call_num = Sysno::from(self.ds_pop()? as i32);
- let a4 = self.ds_pop()?;
- let a3 = self.ds_pop()?;
- let a2 = self.ds_pop()?;
- let a1 = self.ds_pop()?;
- self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4) });
- },
- IR::Sys5 => {
- let call_num = Sysno::from(self.ds_pop()? as i32);
- let a5 = self.ds_pop()?;
- let a4 = self.ds_pop()?;
- let a3 = self.ds_pop()?;
- let a2 = self.ds_pop()?;
- let a1 = self.ds_pop()?;
- self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5) });
- },
- IR::Sys6 => {
- println!("stack: {:?}", self.data_stack);
- let call_num = Sysno::from(self.ds_pop()? as i32);
- let a6 = self.ds_pop()?;
- let a5 = self.ds_pop()?;
- let a4 = self.ds_pop()?;
- let a3 = self.ds_pop()?;
- let a2 = self.ds_pop()?;
- let a1 = self.ds_pop()?;
- self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5, a6) });
- },
- _ => {
- println!("Instruction not implemented.");
- },
- }
- }
- self.instruction_pointer += 1;
- }
- }
-}
+++ /dev/null
-[package]
-name = "hylo-ir"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-serde = "1.0.228"
-serde_derive = "1.0.228"
-serde_yaml = "0.9.34"
+++ /dev/null
-use serde_yaml::{from_str, to_string, Error};
-use serde_derive::{Serialize, Deserialize};
-
-#[derive(Serialize, Deserialize, Debug, Clone)]
-pub enum IR {
- Label(String),
- Call(String),
- Ret,
- StackPush(u64),
- StackPushString(String), // refers to string label, not the string itself
- StringDef(String, String), // first is string label, second is string value
-
- // These next ones should always be inlined, so they're in IR.
- Load, // @ ( addr -- x ) -- Fetch memory contents at addr
- Store, // ! ( x addr -- ) -- Store x at addr
-
- // These ones might not be inlined, but should be built-in, so a compiler might
- // turn this into `Call(String)` before translating to assembly/machine-code, but
- // an IR interpreter may just execute them.
- AddU64,
- SubtractU64,
- MultiplyU64,
- DivideU64,
- ModU64,
- Equals,
- GreaterThan,
- BitwiseOr,
- Dup,
- Swap,
- Drop,
- Over,
- PutS,
- PutN,
- If,
- Else,
- EndIf,
-
- // System calls
- Sys0,
- Sys1,
- Sys2,
- Sys3,
- Sys4,
- Sys5,
- Sys6,
-}
-
-// This is like an .o file.
-#[derive(Serialize, Deserialize, Debug)]
-pub struct IRObject {
- pub text: Vec<IR>,
- pub data: Vec<IR>,
-}
-
-impl IRObject {
- pub fn to_s(&self) -> Result<String, Error> {
- to_string(self)
- }
-
- pub fn from_s(source: &str) -> Result<Self, Error> {
- from_str(source)
- }
-}
+++ /dev/null
-[package]
-name = "hyloc"
-version = "0.1.0"
-edition = "2024"
-
-[dependencies]
-hylo-ir = { workspace = true }
-hylo-interpret = { workspace = true }
-anyhow = "1.0.100"
+++ /dev/null
-use crate::parser::Module;
-use crate::tokenizer::{Token, tokenize};
-use hylo_ir::*;
-
-use std::collections::{HashSet, HashMap};
-use std::path::PathBuf;
-use std::rc::Rc;
-
-use anyhow::{Result, bail};
-
-macro_rules! push_num {
- ($num:ident) => { IR::StackPush(*$num as u64) };
- ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) };
-}
-
-#[derive(Debug, Default)]
-struct IRModule {
- data: Vec<IR>,
- text: Vec<IR>,
- imports: Vec<Rc<IRModule>>,
- exports: Vec<String>,
- source_file: PathBuf,
- number: usize,
-}
-
-impl IRModule {
- fn get_label_for_call(&self, name: &String) -> String {
- let mut found: Option<usize> = None;
- for imported in &self.imports {
- if imported.exports.contains(name) {
- found = Some(imported.number);
- // Don't break here, since the last one should win.
- }
- }
- if let Some(found) = found {
- format!("_m{}_{}", found, name)
- } else {
- // TODO check if it's even a word locally. If not, bail.
- format!("_m{}_{}", self.number, name)
- }
- }
-
- fn get_label(&self, name: &String) -> String {
- format!("_m{}_{}", self.number, name)
- }
-}
-
-#[derive(Default)]
-struct ImportTree {
- data: Vec<IR>,
- text: Vec<IR>,
- all_modules: HashMap<PathBuf, Rc<IRModule>>,
- all_exports: HashSet<String>,
- entrypoint: Rc<IRModule>,
- module_count: usize,
- collapse_seen: HashSet<PathBuf>,
-}
-
-impl ImportTree {
- fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result<Rc<IRModule>> {
- let mut path = PathBuf::from(specifier);
- if path.is_relative() {
- let mut new_path = importer_dir.clone();
- new_path.push(path);
- path = new_path.canonicalize()?;
- }
- if self.all_modules.contains_key(&path) {
- let module = self.all_modules.get(&path).unwrap().clone();
- return Ok(module);
- }
-
- let contents = std::fs::read_to_string(&path)?;
-
- let module = self.generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?);
- let module = Rc::new(module);
- self.all_modules.insert(module.source_file.clone(), module.clone());
- if is_entrypoint {
- self.entrypoint = module.clone();
- }
- Ok(module)
- }
-
- fn generate_internal(&mut self, path: PathBuf, module: &Module) -> IRModule {
- // Eventually these will end up being sections in assembly
- let mut text = vec![];
- let mut data = vec![];
-
- let mut imports = vec![];
- module.imports.iter().for_each(|imported| {
- if let Some(parent_path) = path.parent() {
- match self.import(&parent_path.to_path_buf(), imported, false) {
- Ok(module) => {
- imports.push(module);
- },
- Err(msg) => {
- eprintln!("{}", msg);
- }
- }
- } else {
- }
- });
-
- let exports: Vec<_> = module.exports.iter().map(|s| {
- self.all_exports.insert(s.to_string());
- s.to_string()
- }).collect();
-
-
- text.push(module.words.iter().map(|def| {
- let mut body = def.instructions.iter().map(|inst| {
- let mapped_ir = match inst {
- Token::Word(word) => {
- match *word {
- "@" => IR::Load,
- "!" => IR::Store,
- "dup" => IR::Dup,
- "swap" => IR::Swap,
- "drop" => IR::Drop,
- "over" => IR::Over,
- "puts" => IR::PutS,
- "putn" => IR::PutN,
- "if" => IR::If,
- "else" => IR::Else,
- "endif" => IR::EndIf,
- "=" => IR::Equals,
- ">" => IR::GreaterThan,
- "+" => IR::AddU64,
- "-" => IR::SubtractU64,
- "*" => IR::MultiplyU64,
- "/" => IR::DivideU64,
- "%" => IR::ModU64,
- "|" => IR::BitwiseOr,
- "sys0" => IR::Sys0,
- "sys1" => IR::Sys1,
- "sys2" => IR::Sys2,
- "sys3" => IR::Sys3,
- "sys4" => IR::Sys4,
- "sys5" => IR::Sys5,
- "sys6" => IR::Sys6,
- // TODO num type specfic math like `+:i32`, etc.
- _ => IR::Call(String::from(*word))
- }
- },
- Token::String(text) => {
- let string_label = format!("string_{}", data.len() - 1);
- data.push(IR::StringDef(string_label.clone(), String::from(*text)));
- IR::StackPushString(string_label)
- },
- Token::NumU8(num) => push_num!(num),
- Token::NumI8(num) => push_num!(num, u8),
- Token::NumU16(num) => push_num!(num),
- Token::NumI16(num) => push_num!(num, u16),
- Token::NumU32(num) => push_num!(num),
- Token::NumI32(num) => push_num!(num, u32),
- Token::NumU64(num) => push_num!(num),
- Token::NumI64(num) => push_num!(num),
- Token::NumF32(num) => push_num!(num),
- Token::NumF64(num) => push_num!(num),
- };
- mapped_ir
- }).collect::<Vec<_>>();
-
- let mut result = vec![IR::Label(def.name.to_string())];
- result.append(&mut body);
- result.push(IR::Ret);
- result
- }).flatten().collect::<Vec<_>>());
-
- let number = self.module_count;
- self.module_count += 1;
-
- IRModule {
- text: text.into_iter().flatten().collect::<Vec<_>>(),
- data,
- imports,
- exports,
- source_file: path,
- number,
- }
- }
-
- fn collapse(&mut self, module: Rc<IRModule>) -> Result<()> {
- if self.collapse_seen.contains(&module.source_file) {
- return Ok(())
- }
-
- for imported in module.imports.clone() {
- self.collapse(imported)?;
- }
-
- let is_entrypoint = module.source_file == self.entrypoint.source_file;
-
- let module_number = module.number;
-
- for string in &module.data {
- if let IR::StringDef(name, val) = string {
- let new_name = format!("{}_{}", name, module_number);
- self.data.push(IR::StringDef(new_name, val.clone()));
- } else {
- bail!("non-string data");
- }
- }
-
- for instruction in &module.text {
- let new_instruction = match instruction {
- IR::StackPushString(name) => {
- let new_name = format!("{}_{}", name, module_number);
- IR::StackPushString(new_name)
- },
- IR::Label(name) => {
- if is_entrypoint && name == "main" {
- instruction.clone()
- } else {
- IR::Label(module.get_label(name))
- }
- },
- IR::Call(name) => {
- IR::Call(module.get_label_for_call(name))
- },
- _ => instruction.clone()
- };
- self.text.push(new_instruction);
- }
-
- Ok(())
- }
-}
-
-pub fn compile(path: &str) -> Result<IRObject> {
- let dir = std::env::current_dir()?;
- let mut tree: ImportTree = Default::default();
- let module = tree.import(&dir, path, true)?;
- tree.collapse(module)?;
- // TODO remove unused words
- Ok(IRObject {
- data: tree.data,
- text: tree.text,
- })
-}
+++ /dev/null
-mod tokenizer;
-mod parser;
-mod ir;
-mod riscv_asm_codegen;
-
-use hylo_interpret::Interpreter;
-
-use anyhow::Result;
-
-fn main() -> Result<()> {
- let filename = std::env::args().nth(1).expect("must provide a file to compile");
- let module = ir::compile(&filename)?;
- // let mut interp = Interpreter::new(&module)?;
- // interp.run()?;
- let mut generator = riscv_asm_codegen::CodeGen::new(&module, 4096);
- println!("{}", generator.assembly()?);
- Ok(())
-}
+++ /dev/null
-use crate::tokenizer::Token;
-use anyhow::{Result, bail};
-
-#[derive(Debug)]
-pub struct WordDefinition<'a> {
- pub name: &'a str,
- pub instructions: Vec<Token<'a>>,
-}
-
-#[derive(Debug)]
-pub struct Module<'a> {
- pub words: Vec<WordDefinition<'a>>,
- pub imports: Vec<&'a str>,
- pub exports: Vec<&'a str>,
-}
-
-impl<'a> Module<'a> {
- pub fn parse(input: Vec<Token<'a>>, is_entrypoint: bool) -> Result<Self> {
- let mut result = vec![];
- let mut main = vec![];
- let mut exports = vec![];
- let mut imports = vec![];
- let mut current_word: Option<WordDefinition> = None;
- let mut about_to_start_word_def = false;
- let mut last_was_import = false;
- let mut last_was_export = false;
-
- for token in input {
- if about_to_start_word_def {
- if let Token::Word(name) = token {
- current_word = Some(WordDefinition {
- name,
- instructions: vec![],
- });
- about_to_start_word_def = false;
- continue;
- } else {
- bail!("{:?} is not a valid word name!", token);
- }
- } else if let Token::Word(word) = token {
- if word == ":" {
- if current_word.is_some() {
- bail!("can't define words inside word definitions!");
- }
- about_to_start_word_def = true;
- continue;
- }
- if word == ";" {
- let word = current_word.take();
- if let Some(word) = word {
- result.push(word);
- continue;
- } else {
- bail!("`;` must be at the end of a word definition");
- }
- }
- }
- if let Some(ref mut current_word) = current_word {
- current_word.instructions.push(token);
- } else {
- match token {
- Token::Word(word) => {
- if word == "import" {
- last_was_import = true;
- } else if word == "export" {
- last_was_export = true;
- } else {
- if last_was_export {
- exports.push(word);
- } else {
- main.push(token.clone());
- }
- }
- },
- Token::String(string) => {
- if last_was_import {
- imports.push(string);
- } else {
- main.push(token.clone());
- }
- },
- _ => {
- main.push(token.clone());
- }
- };
- }
- }
-
- if about_to_start_word_def || current_word.is_some() {
- bail!("unfinished word definition!");
- }
-
- if is_entrypoint {
- result.push(WordDefinition {
- name: "main",
- instructions: main,
- });
- }
-
- Ok(Module { words: result, imports, exports })
- }
-
- #[cfg(test)]
- pub fn debug_print(&self) {
- for word in &self.words {
- println!("{}", word.name);
- for instruction in &word.instructions {
- println!(" {:?}", instruction);
- }
- }
- }
-}
-
-
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn try_some_parsing() {
- let result = Module::parse(crate::tokenizer::tokenize("
-: hello world 16 \"planet\" ;
-: soup chicken 4.5 hello ;
-
-hello soup
-").unwrap(), true).unwrap();
- result.debug_print();
- }
-}
+++ /dev/null
-use hylo_ir::*;
-
-use anyhow::*;
-
-use std::collections::{HashMap, HashSet};
-use std::fmt::Display;
-
-pub struct CodeGen<'a> {
- module: &'a IRObject,
- data_stack_size: usize,
- lines: Vec<String>,
-}
-
-
-// Some inspiration
-// ================
-//
-// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list
-// * Except using sp as a more C ABI style stack pointer, and s2 for the data stack
-//
-
-// Implementation Choices
-// ======================
-//
-// Data Stack pointer: s2
-// No return stack pointer (using C ABI, so sp, sorta)
-// Use t0, t1, t2 for temporary values in words
-// Data stack grows down
-
-
-macro_rules! asm_macro {
- ($name:ident, $src:expr) => {
- fn $name(&mut self) {
- self.line($src);
- }
- };
- ($name:ident, $src:expr, $arg0:ty) => {
- fn $name(&mut self, val0: $arg0) {
- self.line(format!($src, val0));
- }
- };
- ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => {
- fn $name(&mut self, val0: $arg0, val1: $arg1) {
- self.line(format!($src, val0, val1));
- }
- };
-}
-
-impl<'a> CodeGen<'a> {
- pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self {
- Self {
- module: ir_mod,
- data_stack_size,
- lines: vec![],
- }
- }
-
- fn line<S: Display>(&mut self, line: S) {
- self.lines.push(format!(" {}", line));
-
- }
-
- fn label<S: Display>(&mut self, line: S) {
- self.lines.push(line.to_string());
- }
-
- asm_macro!(copy_top_stack_value_to, "lw {}, 0(s2)", &str);
- asm_macro!(copy_offset_stack_value_to, "lw {}, {}*8(s2)", &str, isize);
- asm_macro!(copy_to_top_of_stack, "sw {}, 0(s2)", &str);
- asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize);
-
- fn pop_to(&mut self, reg: &str) {
- self.copy_top_stack_value_to(reg);
- self.move_stack_ptr_by_cells(1);
- }
-
- fn pop_some_to(&mut self, regs: &str) {
- let mut regs = regs.trim().split(" ").collect::<Vec<_>>();
- regs.reverse();
- let count = regs.len();
- let mut index = 0;
- for reg in regs {
- self.copy_offset_stack_value_to(reg, index);
- index += 1;
- }
- self.move_stack_ptr_by_cells(count as isize);
- }
-
- fn push_from(&mut self, reg: &str) {
- self.move_stack_ptr_by_cells(-1);
- self.copy_to_top_of_stack(reg);
- }
-
- fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) {
- self.pop_some_to(regs);
- self.line(call);
- self.push_from(reg);
- }
-
- pub fn assembly(&mut self) -> Result<String>{
- let mut string_table = HashMap::new();
-
- // Static strings
- self.label(".section .rodata\n");
- for ir in &self.module.data {
- match ir {
- IR::StringDef(string_label, some_string) => {
- string_table.insert(some_string.clone(), string_label);
- self.label(string_label);
- self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz?
- self.label("");
- },
- _ => bail!("Currently only string definitions are supported in the data section.")
- }
- }
-
- // Data stack
- self.label(".data\n");
- self.label("data_stack:");
- self.line(format!(".space {}", self.data_stack_size));
- self.label(".globl data_stack_end\ndata_stack_end:\n");
-
- // Code
- self.label(".text\n");
- self.label(".align 3\n");
-
- let mut if_block_count = 0;
- let mut if_stack = vec![];
- let mut seen_else = HashSet::new();
- let mut last_label = "";
-
- for ir in &self.module.text {
- match ir {
- IR::Label(name) => {
- last_label = name;
- if name == "main" {
- self.label(".globl _start"); // TODO is globl necessary?
- self.label("_start:");
- self.line("la s2, data_stack_end # set initial data stack pointer");
- } else {
- self.label(format!(".globl {}", name));
- self.label(format!("{}:", name));
- }
- self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack
- self.line("sw ra, 8(sp) # store return address on stack"); // store return address on stack
- },
- IR::Call(name) => {
- self.label(format!("# call {}", name));
-
- self.line(format!("call {}", name));
- },
- IR::Ret => {
- if last_label == "main" {
- self.label("# exit 0 syscall");
- self.line("li a7, 93");
- self.line("mv a0, x0");
- self.line("ecall");
- } else {
- self.line("lw ra, 8(sp)"); // load return address from stack
- self.line("addi sp, sp, 16"); // restore stack pointer
- self.line("ret");
- }
- },
- IR::Load => {
- self.copy_top_stack_value_to("t0");
- self.line(format!("lw {}, 0({})", "t0", "t0")); // deref pointer int t0 to t0
- self.copy_to_top_of_stack("t0");
- },
- IR::Store => { // ( x addr -- )
- self.copy_top_stack_value_to("t1");
- self.copy_offset_stack_value_to("t0", 1);
- self.line("sw t0, 0(t1)"); // store x at addr
- self.move_stack_ptr_by_cells(2);
- },
- IR::StackPush(num) => {
- self.label(format!("# stackpush {}", num));
- self.line(format!("li t0, {}", num));
- self.push_from("t0");
- },
- IR::StackPushString(name) => {
- self.label(format!("# stackpushstring {}", name));
- self.line(format!("li t0, {}", name));
- self.push_from("t0");
- },
- IR::AddU64 => {
- self.label("# add");
- self.pop_call_push("t0 t1", "add t0, t0, t1", "t0");
- },
- IR::SubtractU64 => {
- self.label("# sub");
- self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0");
- },
- IR::MultiplyU64 => {
- self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0");
- },
- IR::DivideU64 => {
- self.pop_call_push("t0 t1", "div t0, t0, t1", "t0");
- },
- IR::ModU64 => {
- self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0");
- },
- IR::Dup => {
- self.label("# dup");
- self.copy_top_stack_value_to("t0");
- self.push_from("t0");
- },
- IR::Swap => {
- self.label("# swap");
- self.pop_some_to("t1 t0");
- self.push_from("t0");
- self.push_from("t1");
- },
- IR::Drop => {
- self.label("# drop");
- self.move_stack_ptr_by_cells(1);
- },
- IR::Equals => {
- // Yes, this is the same as subtract, since we're treating 0 as true, and
- // others as false.
- self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0");
- },
- IR::GreaterThan => {
- self.label("# >");
- self.pop_some_to("t0 t1");
- self.line("sgt t0, t0, t1");
- self.line("seqz t0, t0"); // remember, 0 is true, others are false
- self.push_from("t0");
- },
- IR::BitwiseOr => {
- self.pop_call_push("t0 t1", "or t0, t0, t1", "t0");
- },
- IR::Sys0 => {
- self.pop_call_push("a7", "ecall", "a0");
- },
- IR::Sys1 => {
- self.pop_call_push("a0 a7", "ecall", "a0");
- },
- IR::Sys2 => {
- self.pop_call_push("a0 a1 a7", "ecall", "a0");
- },
- IR::Sys3 => {
- self.pop_call_push("a0 a1 a2 a7", "ecall", "a0");
- },
- IR::Sys4 => {
- self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0");
- },
- IR::Sys5 => {
- self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0");
- },
- IR::Sys6 => {
- self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0");
- },
- IR::PutN => {
- self.line("call putn");
- },
- // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html
- IR::If => {
- self.label("# if");
- self.pop_to("t0");
- self.line(format!("bnez t0, _else_{}", if_block_count));
- if_stack.push(if_block_count);
- if_block_count += 1;
- },
- IR::Else => {
- self.label("# else");
- let if_counter = if_stack.last().unwrap().clone();
- self.line(format!("j _endif_{}", if_counter));
- self.label(format!("_else_{}:", if_counter));
- seen_else.insert(if_counter);
- },
- IR::EndIf => {
- self.label("# endif");
- let stack = &mut if_stack;
- let if_counter = stack.last().unwrap().clone();
- if !seen_else.contains(&if_counter) {
- self.label(format!("_else_{}:", if_counter));
- } else {
- self.label(format!("_endif_{}:", if_counter));
- seen_else.remove(&if_counter);
- }
- stack.pop();
- }
- _ => bail!("not implemented yet"),
- }
- }
-
- Ok(self.lines.join("\n"))
- }
-}
-
+++ /dev/null
-use anyhow::{Result, anyhow};
-
-#[derive(Debug, Clone)]
-pub enum Token<'a> {
- Word(&'a str),
- String(&'a str),
- NumU8(u8),
- NumI8(i8),
- NumU16(u16),
- NumI16(i16),
- NumU32(u32),
- NumI32(i32),
- NumU64(u64),
- NumI64(i64),
- NumF32(f32),
- NumF64(f64),
-}
-
-impl<'a> Token<'a>{
- fn parse_word_or_num(input: &'a str) -> Result<Token<'a>> {
- if input == "-" {
- return Ok(Token::Word(input))
- }
-
- // we're assuming any token starting with `-` with length greater than one
- // is a negative number
- if input.starts_with('-') || input.chars().nth(0).map(|x| x.is_numeric()).unwrap_or(false) {
- if input.contains(':') {
- let mut splat = input.split(':');
- let num = splat.next().ok_or(anyhow!("no number found"))?;
- let typ = splat.next().ok_or(anyhow!("no number type found"))?;
- match typ {
- "u8" => Ok(Token::NumU8(num.parse()?)),
- "i8" => Ok(Token::NumI8(num.parse()?)),
- "u16" => Ok(Token::NumU16(num.parse()?)),
- "i16" => Ok(Token::NumI16(num.parse()?)),
- "u32" => Ok(Token::NumU32(num.parse()?)),
- "i32" => Ok(Token::NumI32(num.parse()?)),
- "u64" => Ok(Token::NumU64(num.parse()?)),
- "i64" => Ok(Token::NumI64(num.parse()?)),
- "f32" => Ok(Token::NumF32(num.parse()?)),
- "f64" => Ok(Token::NumF64(num.parse()?)),
- _ => panic!("unknown number type")
- }
- } else {
- if input.contains('.') {
- Ok(Token::NumF64(input.parse()?))
- } else if input.starts_with('-') {
- Ok(Token::NumI64(input.parse()?))
- } else {
- Ok(Token::NumU64(input.parse()?))
- }
- }
- } else {
- Ok(Token::Word(input))
- }
- }
-}
-
-// TODO really want an iterator, not a vector
-pub fn tokenize<'a>(input: &'a str) -> Result<Vec<Token<'a>>> {
- let mut result = vec![];
- let mut string_start: Option<usize> = None;
- let mut word_or_num_start: Option<usize> = None;
- let mut last_is_backslash = false;
- let mut last_is_whitespace = true;
- let mut in_doc_comment = false;
- let mut in_line_comment = false;
- let mut index = 0;
- let mut first_char = true;
-
-
- for char in input.chars() {
- if first_char {
- first_char = false;
- } else {
- index += 1;
- }
-
- if in_doc_comment {
- if char == ')' {
- in_doc_comment = false;
- last_is_whitespace = true; // not really true, but means don't need space after
- }
- continue;
- }
-
- if in_line_comment {
- if char == '\n' {
- in_line_comment = false;
- last_is_whitespace = true; // not really true, but means don't need space after
- }
- continue;
- }
-
- if char == '"' {
- if let Some(start) = string_start {
- if !last_is_backslash {
- result.push(Token::String(&input[start..index]));
- string_start = None;
- }
- } else {
- string_start = Some(index + 1)
- }
- last_is_backslash = false;
- last_is_whitespace = false;
- continue;
- }
-
-
- if string_start.is_some() {
- last_is_backslash = char == '\\';
- continue;
- }
-
- if char.is_whitespace() {
- if last_is_backslash {
- in_line_comment = true;
- } else if !last_is_whitespace && let Some(start) = word_or_num_start {
- let token = &input[start..index];
- if token == "(" {
- in_doc_comment = true;
- } else {
- result.push(Token::parse_word_or_num(&input[start..index])?);
- }
- word_or_num_start = None;
- }
- last_is_whitespace = true;
- last_is_backslash = false;
- continue;
- }
-
- last_is_backslash = char == '\\';
-
- if index == input.len() - 1 {
- if !last_is_whitespace && let Some(start) = word_or_num_start {
- result.push(Token::parse_word_or_num(&input[start..])?);
- }
- continue;
- }
-
- if last_is_whitespace { // start of word or num (we already handled strings)
- word_or_num_start = Some(index);
- last_is_whitespace = false;
- }
- }
- Ok(result)
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn try_some_tokenizing() {
- let result = tokenize("
-
- \\ soup
- 2 3.4 - -88 bacon \"hello\" 43:f32 2345:u32 -57:i8 soup
-");
- println!("result: {:?}", result);
- }
-
- #[test]
- fn comments() {
- let result = tokenize("
- (
- foo
- bar
- )
- : baz ( x y -- z )
- chicken
- soup
- ;
- ");
- println!("result: {:?}", result);
- }
-}
+++ /dev/null
-
-qemu-system-riscv64 \
- -machine virt \
- -cpu rv64 \
- -m 1G \
- -device virtio-blk-device,drive=hd \
- -drive file=machine/overlay.qcow2,if=none,id=hd \
- -device virtio-net-device,netdev=net \
- -netdev user,id=net,hostfwd=tcp::2222-:22 \
- -kernel machine/u-boot-qemu/uboot.elf \
- -object rng-random,filename=/dev/urandom,id=rng \
- -device virtio-rng-device,rng=rng \
- -append "root=LABEL=rootfs console=ttyS0" \
- -nographic \
-
-echo "now do:"
-echo " ssh debian@localhost -p 2222"
+++ /dev/null
-
-mkdir machine
-cd machine
-wget "https://gitlab.com/api/v4/projects/giomasce%2Fdqib/jobs/artifacts/master/download?job=convert_riscv64-virt" -O debian-rv64.zip
-
-unzip ./debian-rv64.zip
-
-# Grab the URL from https://packages.debian.org/sid/u-boot-qemu
-wget "http://ftp.us.debian.org/debian/pool/main/u/u-boot/u-boot-qemu_2025.01-3.1_all.deb" -O u-boot-qemu.deb
-mkdir u-boot-qemu
-cd u-boot-qemu
-ar -x ../u-boot-qemu.deb
-tar xvf data.tar.xz
-cp ./usr/lib/u-boot/qemu-riscv64_smode/uboot.elf ./uboot.elf
-
-qemu-img create -o backing_file=./dqib_riscv64-virt/image.qcow2,backing_fmt=qcow2 -f qcow2 overlay.qcow2
-
--- /dev/null
+target
+qemu/machine
+*.o
+*.asm
--- /dev/null
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "anyhow"
+version = "1.0.100"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "indexmap"
+version = "2.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rel-interpret"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "rel-ir",
+ "syscalls",
+]
+
+[[package]]
+name = "rel-ir"
+version = "0.1.0"
+dependencies = [
+ "serde",
+ "serde_derive",
+ "serde_yaml",
+]
+
+[[package]]
+name = "relc"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "rel-interpret",
+ "rel-ir",
+]
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_repr"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_yaml"
+version = "0.9.34+deprecated"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
+dependencies = [
+ "indexmap",
+ "itoa",
+ "ryu",
+ "serde",
+ "unsafe-libyaml",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.111"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "syscalls"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90db46b5b4962319605d435986c775ea45a0ad2561c09e1d5372b89afeb49cf4"
+dependencies = [
+ "serde",
+ "serde_repr",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
+
+[[package]]
+name = "unsafe-libyaml"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
--- /dev/null
+[workspace]
+
+resolver = "3"
+members = ["rel-ir","relc", "rel-interpret"]
+
+
+[workspace.dependencies]
+rel-ir = { path = "./rel-ir", version = "0.1.0" }
+rel-interpret = { path = "./rel-interpret", version = "0.1.0" }
--- /dev/null
+# rel
+
+The name means "Rethought Language".
+
+
+## TODO
+
+* [x] Imports
+* [x] Syscalls
+* [ ] Structs
+* [ ] many, many more things
--- /dev/null
+\ vim: filetype=forth
+
+: mmap 9 sys6 ;
+
+: PROT_READ 1 ;
+: PROT_WRITE 2 ;
+: MAP_PRIVATE 2 ;
+: MAP_ANONYMOUS 32 ;
+
+: ALLOC_PROT PROT_READ PROT_WRITE | ;
+: ALLOC_MAP MAP_PRIVATE MAP_ANONYMOUS | ;
+
+: alloc 0 swap ALLOC_PROT ALLOC_MAP -1:i16 0 mmap ;
+
+1024 alloc
+putn
+swap
+putn
--- /dev/null
+\ vim: filetype=forth
+
+import "./put2.rel"
+
+: fib
+ dup 1 > if
+ dup 1 - fib
+ swap 2 - fib
+ +
+ endif
+;
+
+0 fib putn
+1 fib putn
+2 fib putn
+3 fib putn
+4 fib putn
+5 fib putn
+6 fib putn
+7 fib putn
+8 fib putn
+9 fib putn
+10 fib putn
+
+5 fib 6 fib put2
--- /dev/null
+\ vim: filetype=forth
+
+: put2 putn putn ;
+
+: foobar dup dup ;
+
+export put2
--- /dev/null
+\ vim: filetype=forth
+
+: getpid
+ 39 sys0
+ drop
+;
+
+getpid putn
--- /dev/null
+../target/debug/relc fib.rel > fib.asm
+riscv64-unknown-linux-gnu-as -o fib.o fib.asm
+riscv64-unknown-linux-gnu-cc -O1 -no-pie -o test fib.o putn.c -nostartfiles
+./test
--- /dev/null
+\ foo bar
+: fib
+ dup
+ 1
+ >
+ if
+ dup
+ 1 - fib
+ swap
+ 2
+ -
+ fib
+ +
+ endif
+;
+
+0 fib putn drop
+1 fib putn drop
+2 fib putn drop
+3 fib putn drop
+4 fib putn drop
+5 fib putn drop
+6 fib putn drop
+7 fib putn drop
--- /dev/null
+#include <stdio.h>
+
+extern unsigned long data_stack_end;
+register unsigned long * stack_pointer asm("s2");
+
+void putn() {
+ unsigned long * stack_index = &data_stack_end;
+ printf("stack: ");
+ while (stack_index != stack_pointer) {
+ printf("%ld ", *stack_index);
+ stack_index -= 1;
+ }
+ printf("%ld\n", *stack_pointer);
+}
+
+
--- /dev/null
+{
+ "nodes": {
+ "nixpkgs": {
+ "locked": {
+ "lastModified": 1765425892,
+ "narHash": "sha256-jlQpSkg2sK6IJVzTQBDyRxQZgKADC2HKMRfGCSgNMHo=",
+ "owner": "nixos",
+ "repo": "nixpkgs",
+ "rev": "5d6bdbddb4695a62f0d00a3620b37a15275a5093",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nixos",
+ "ref": "nixpkgs-unstable",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "root": {
+ "inputs": {
+ "nixpkgs": "nixpkgs"
+ }
+ }
+ },
+ "root": "root",
+ "version": 7
+}
--- /dev/null
+{
+ description = "uxn11";
+
+ inputs = {
+ nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
+ };
+
+ outputs = {nixpkgs, ...}: let
+ system = "x86_64-linux";
+ pkgs = import nixpkgs {
+ # uncomment the next bit to install cross-compiler toolchain
+
+ # crossSystem = {
+ # config = "riscv64-unknown-linux-gnu";
+ # # Or if you want to build against MUSL:
+ # # config = "riscv64-unknown-linux-musl";
+ # };
+ inherit system;
+ };
+ in {
+ devShells.${system}.default = pkgs.mkShell {
+ packages = [
+ pkgs.qemu
+ ];
+ };
+ };
+}
--- /dev/null
+
+qemu-system-riscv64 \
+ -machine virt \
+ -cpu rv64 \
+ -m 1G \
+ -device virtio-blk-device,drive=hd \
+ -drive file=machine/overlay.qcow2,if=none,id=hd \
+ -device virtio-net-device,netdev=net \
+ -netdev user,id=net,hostfwd=tcp::2222-:22 \
+ -kernel machine/u-boot-qemu/uboot.elf \
+ -object rng-random,filename=/dev/urandom,id=rng \
+ -device virtio-rng-device,rng=rng \
+ -append "root=LABEL=rootfs console=ttyS0" \
+ -nographic \
+
+echo "now do:"
+echo " ssh debian@localhost -p 2222"
--- /dev/null
+
+mkdir machine
+cd machine
+wget "https://gitlab.com/api/v4/projects/giomasce%2Fdqib/jobs/artifacts/master/download?job=convert_riscv64-virt" -O debian-rv64.zip
+
+unzip ./debian-rv64.zip
+
+# Grab the URL from https://packages.debian.org/sid/u-boot-qemu
+wget "http://ftp.us.debian.org/debian/pool/main/u/u-boot/u-boot-qemu_2025.01-3.1_all.deb" -O u-boot-qemu.deb
+mkdir u-boot-qemu
+cd u-boot-qemu
+ar -x ../u-boot-qemu.deb
+tar xvf data.tar.xz
+cp ./usr/lib/u-boot/qemu-riscv64_smode/uboot.elf ./uboot.elf
+
+qemu-img create -o backing_file=./dqib_riscv64-virt/image.qcow2,backing_fmt=qcow2 -f qcow2 overlay.qcow2
+
--- /dev/null
+[package]
+name = "rel-interpret"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+anyhow = "1.0.100"
+rel-ir = { workspace = true }
+syscalls = "0.7.0"
--- /dev/null
+use rel_ir::*;
+
+use std::collections::HashMap;
+
+use syscalls::*;
+use anyhow::{Result, anyhow};
+
+pub struct Interpreter<'a> {
+ module: &'a IRObject,
+ data_stack: Vec<u64>,
+ instruction_pointer: usize,
+ return_stack: Vec<usize>,
+ labels: HashMap<String, usize>,
+ strings: HashMap<String, String>,
+}
+
+impl<'a> Interpreter<'a> {
+ pub fn new(ir_mod: &'a IRObject) -> Result<Self> {
+ let mut index = 0;
+ let mut labels = HashMap::new();
+ for token in ir_mod.text.iter() {
+ if let IR::Label(name) = token {
+ labels.insert(name.clone(), index);
+ }
+ index += 1;
+ }
+ let instruction_pointer = *labels.get("main").ok_or(anyhow!("no main word found!"))?;
+
+ let mut strings = HashMap::new();
+ ir_mod.data.iter().for_each(|s| {
+ match s {
+ IR::StringDef(label, string) => {
+ strings.insert(label.clone(), string.clone());
+ },
+ _ => {}
+ }
+ });
+
+ Ok(Self {
+ module: ir_mod,
+ data_stack: vec![],
+ instruction_pointer,
+ return_stack: vec![],
+ labels,
+ strings
+ })
+ }
+
+ fn process_syscall_result(&mut self, result: Result<usize, Errno>) {
+ match result {
+ Ok(result) => {
+ self.data_stack.push(result as u64);
+ self.data_stack.push(0);
+ }
+ Err(err) => {
+ self.data_stack.push(0);
+ self.data_stack.push(err.into_raw() as u64);
+ }
+ }
+ }
+
+ fn ds_pop(&mut self) -> Result<u64> {
+ self.data_stack.pop().ok_or(anyhow!("popping from empty data stack"))
+ }
+
+ pub fn run(&mut self) -> Result<()> {
+ let mut looking_for_endif = false;
+ loop {
+ if looking_for_endif {
+ match &self.module.text[self.instruction_pointer] {
+ IR::EndIf => {
+ looking_for_endif = false;
+ },
+ IR::Else => {
+ looking_for_endif = false;
+ }
+ _ => {}
+ }
+ } else {
+ match &self.module.text[self.instruction_pointer] {
+ IR::Label(_) => {},
+ IR::Call(name) => {
+ self.return_stack.push(self.instruction_pointer);
+ self.instruction_pointer = *self.labels.get(name).ok_or(anyhow!("calling undefined word `{}`", name))?;
+ },
+ IR::Ret => {
+ if self.return_stack.len() == 0 {
+ return Ok(());
+ }
+ self.instruction_pointer = self.return_stack.pop().ok_or(anyhow!("returning from top level"))?;
+ },
+ IR::StackPush(num) => {
+ self.data_stack.push(*num);
+ },
+ IR::AddU64 => {
+ let a = self.ds_pop()?;
+ let b = self.ds_pop()?;
+ self.data_stack.push(a + b);
+ },
+ IR::SubtractU64 => {
+ let b = self.ds_pop()?;
+ let a = self.ds_pop()?;
+ self.data_stack.push(a - b);
+ },
+ IR::PutN => {
+ println!("{}", self.data_stack.last().ok_or(anyhow!("empty data stack"))?);
+ },
+ IR::Dup => {
+ self.data_stack.push(*self.data_stack.last().ok_or(anyhow!("empty data stack"))?);
+ },
+ IR::Swap => {
+ let a = self.ds_pop()?;
+ let b = self.ds_pop()?;
+ self.data_stack.push(a);
+ self.data_stack.push(b);
+ },
+ IR::Drop => {
+ self.data_stack.pop();
+ },
+ IR::Equals => {
+ let a = self.ds_pop()?;
+ let b = self.ds_pop()?;
+ self.data_stack.push(if a == b {
+ 0
+ } else {
+ -1 as i64 as u64
+ });
+ },
+ IR::GreaterThan => {
+ let b = self.ds_pop()?;
+ let a = self.ds_pop()?;
+ self.data_stack.push(if a > b {
+ 0
+ } else {
+ -1 as i64 as u64
+ });
+
+ },
+ IR::BitwiseOr => {
+ let b = self.ds_pop()?;
+ let a = self.ds_pop()?;
+ self.data_stack.push(a | b);
+ }
+ IR::If => {
+ if self.ds_pop()? != 0 {
+ looking_for_endif = true;
+ }
+ },
+ IR::EndIf => {},
+ IR::Sys0 => {
+ let call_num = Sysno::from(self.ds_pop()? as i32);
+ self.process_syscall_result(unsafe { syscall!(call_num) });
+ },
+ IR::Sys1 => {
+ let call_num = Sysno::from(self.ds_pop()? as i32);
+ let a1 = self.ds_pop()?;
+ self.process_syscall_result(unsafe { syscall!(call_num, a1) });
+ },
+ IR::Sys2 => {
+ let call_num = Sysno::from(self.ds_pop()? as i32);
+ let a2 = self.ds_pop()?;
+ let a1 = self.ds_pop()?;
+ self.process_syscall_result(unsafe { syscall!(call_num, a1, a2) });
+ },
+ IR::Sys3 => {
+ let call_num = Sysno::from(self.ds_pop()? as i32);
+ let a3 = self.ds_pop()?;
+ let a2 = self.ds_pop()?;
+ let a1 = self.ds_pop()?;
+ self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3) });
+ },
+ IR::Sys4 => {
+ let call_num = Sysno::from(self.ds_pop()? as i32);
+ let a4 = self.ds_pop()?;
+ let a3 = self.ds_pop()?;
+ let a2 = self.ds_pop()?;
+ let a1 = self.ds_pop()?;
+ self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4) });
+ },
+ IR::Sys5 => {
+ let call_num = Sysno::from(self.ds_pop()? as i32);
+ let a5 = self.ds_pop()?;
+ let a4 = self.ds_pop()?;
+ let a3 = self.ds_pop()?;
+ let a2 = self.ds_pop()?;
+ let a1 = self.ds_pop()?;
+ self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5) });
+ },
+ IR::Sys6 => {
+ println!("stack: {:?}", self.data_stack);
+ let call_num = Sysno::from(self.ds_pop()? as i32);
+ let a6 = self.ds_pop()?;
+ let a5 = self.ds_pop()?;
+ let a4 = self.ds_pop()?;
+ let a3 = self.ds_pop()?;
+ let a2 = self.ds_pop()?;
+ let a1 = self.ds_pop()?;
+ self.process_syscall_result(unsafe { syscall!(call_num, a1, a2, a3, a4, a5, a6) });
+ },
+ _ => {
+ println!("Instruction not implemented.");
+ },
+ }
+ }
+ self.instruction_pointer += 1;
+ }
+ }
+}
--- /dev/null
+[package]
+name = "rel-ir"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+serde = "1.0.228"
+serde_derive = "1.0.228"
+serde_yaml = "0.9.34"
--- /dev/null
+use serde_yaml::{from_str, to_string, Error};
+use serde_derive::{Serialize, Deserialize};
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub enum IR {
+ Label(String),
+ Call(String),
+ Ret,
+ StackPush(u64),
+ StackPushString(String), // refers to string label, not the string itself
+ StringDef(String, String), // first is string label, second is string value
+
+ // These next ones should always be inlined, so they're in IR.
+ Load, // @ ( addr -- x ) -- Fetch memory contents at addr
+ Store, // ! ( x addr -- ) -- Store x at addr
+
+ // These ones might not be inlined, but should be built-in, so a compiler might
+ // turn this into `Call(String)` before translating to assembly/machine-code, but
+ // an IR interpreter may just execute them.
+ AddU64,
+ SubtractU64,
+ MultiplyU64,
+ DivideU64,
+ ModU64,
+ Equals,
+ GreaterThan,
+ BitwiseOr,
+ Dup,
+ Swap,
+ Drop,
+ Over,
+ PutS,
+ PutN,
+ If,
+ Else,
+ EndIf,
+
+ // System calls
+ Sys0,
+ Sys1,
+ Sys2,
+ Sys3,
+ Sys4,
+ Sys5,
+ Sys6,
+}
+
+// This is like an .o file.
+#[derive(Serialize, Deserialize, Debug)]
+pub struct IRObject {
+ pub text: Vec<IR>,
+ pub data: Vec<IR>,
+}
+
+impl IRObject {
+ pub fn to_s(&self) -> Result<String, Error> {
+ to_string(self)
+ }
+
+ pub fn from_s(source: &str) -> Result<Self, Error> {
+ from_str(source)
+ }
+}
--- /dev/null
+[package]
+name = "relc"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+rel-ir = { workspace = true }
+rel-interpret = { workspace = true }
+anyhow = "1.0.100"
--- /dev/null
+use crate::parser::Module;
+use crate::tokenizer::{Token, tokenize};
+use rel_ir::*;
+
+use std::collections::{HashSet, HashMap};
+use std::path::PathBuf;
+use std::rc::Rc;
+
+use anyhow::{Result, bail};
+
+macro_rules! push_num {
+ ($num:ident) => { IR::StackPush(*$num as u64) };
+ ($num:ident, $num_typ:ty) => { IR::StackPush(*$num as $num_typ as u64) };
+}
+
+#[derive(Debug, Default)]
+struct IRModule {
+ data: Vec<IR>,
+ text: Vec<IR>,
+ imports: Vec<Rc<IRModule>>,
+ exports: Vec<String>,
+ source_file: PathBuf,
+ number: usize,
+}
+
+impl IRModule {
+ fn get_label_for_call(&self, name: &String) -> String {
+ let mut found: Option<usize> = None;
+ for imported in &self.imports {
+ if imported.exports.contains(name) {
+ found = Some(imported.number);
+ // Don't break here, since the last one should win.
+ }
+ }
+ if let Some(found) = found {
+ format!("_m{}_{}", found, name)
+ } else {
+ // TODO check if it's even a word locally. If not, bail.
+ format!("_m{}_{}", self.number, name)
+ }
+ }
+
+ fn get_label(&self, name: &String) -> String {
+ format!("_m{}_{}", self.number, name)
+ }
+}
+
+#[derive(Default)]
+struct ImportTree {
+ data: Vec<IR>,
+ text: Vec<IR>,
+ all_modules: HashMap<PathBuf, Rc<IRModule>>,
+ all_exports: HashSet<String>,
+ entrypoint: Rc<IRModule>,
+ module_count: usize,
+ collapse_seen: HashSet<PathBuf>,
+}
+
+impl ImportTree {
+ fn import(&mut self, importer_dir: &PathBuf, specifier: &str, is_entrypoint: bool) -> Result<Rc<IRModule>> {
+ let mut path = PathBuf::from(specifier);
+ if path.is_relative() {
+ let mut new_path = importer_dir.clone();
+ new_path.push(path);
+ path = new_path.canonicalize()?;
+ }
+ if self.all_modules.contains_key(&path) {
+ let module = self.all_modules.get(&path).unwrap().clone();
+ return Ok(module);
+ }
+
+ let contents = std::fs::read_to_string(&path)?;
+
+ let module = self.generate_internal(path, &Module::parse(tokenize(&contents)?, is_entrypoint)?);
+ let module = Rc::new(module);
+ self.all_modules.insert(module.source_file.clone(), module.clone());
+ if is_entrypoint {
+ self.entrypoint = module.clone();
+ }
+ Ok(module)
+ }
+
+ fn generate_internal(&mut self, path: PathBuf, module: &Module) -> IRModule {
+ // Eventually these will end up being sections in assembly
+ let mut text = vec![];
+ let mut data = vec![];
+
+ let mut imports = vec![];
+ module.imports.iter().for_each(|imported| {
+ if let Some(parent_path) = path.parent() {
+ match self.import(&parent_path.to_path_buf(), imported, false) {
+ Ok(module) => {
+ imports.push(module);
+ },
+ Err(msg) => {
+ eprintln!("{}", msg);
+ }
+ }
+ } else {
+ }
+ });
+
+ let exports: Vec<_> = module.exports.iter().map(|s| {
+ self.all_exports.insert(s.to_string());
+ s.to_string()
+ }).collect();
+
+
+ text.push(module.words.iter().map(|def| {
+ let mut body = def.instructions.iter().map(|inst| {
+ let mapped_ir = match inst {
+ Token::Word(word) => {
+ match *word {
+ "@" => IR::Load,
+ "!" => IR::Store,
+ "dup" => IR::Dup,
+ "swap" => IR::Swap,
+ "drop" => IR::Drop,
+ "over" => IR::Over,
+ "puts" => IR::PutS,
+ "putn" => IR::PutN,
+ "if" => IR::If,
+ "else" => IR::Else,
+ "endif" => IR::EndIf,
+ "=" => IR::Equals,
+ ">" => IR::GreaterThan,
+ "+" => IR::AddU64,
+ "-" => IR::SubtractU64,
+ "*" => IR::MultiplyU64,
+ "/" => IR::DivideU64,
+ "%" => IR::ModU64,
+ "|" => IR::BitwiseOr,
+ "sys0" => IR::Sys0,
+ "sys1" => IR::Sys1,
+ "sys2" => IR::Sys2,
+ "sys3" => IR::Sys3,
+ "sys4" => IR::Sys4,
+ "sys5" => IR::Sys5,
+ "sys6" => IR::Sys6,
+ // TODO num type specfic math like `+:i32`, etc.
+ _ => IR::Call(String::from(*word))
+ }
+ },
+ Token::String(text) => {
+ let string_label = format!("string_{}", data.len() - 1);
+ data.push(IR::StringDef(string_label.clone(), String::from(*text)));
+ IR::StackPushString(string_label)
+ },
+ Token::NumU8(num) => push_num!(num),
+ Token::NumI8(num) => push_num!(num, u8),
+ Token::NumU16(num) => push_num!(num),
+ Token::NumI16(num) => push_num!(num, u16),
+ Token::NumU32(num) => push_num!(num),
+ Token::NumI32(num) => push_num!(num, u32),
+ Token::NumU64(num) => push_num!(num),
+ Token::NumI64(num) => push_num!(num),
+ Token::NumF32(num) => push_num!(num),
+ Token::NumF64(num) => push_num!(num),
+ };
+ mapped_ir
+ }).collect::<Vec<_>>();
+
+ let mut result = vec![IR::Label(def.name.to_string())];
+ result.append(&mut body);
+ result.push(IR::Ret);
+ result
+ }).flatten().collect::<Vec<_>>());
+
+ let number = self.module_count;
+ self.module_count += 1;
+
+ IRModule {
+ text: text.into_iter().flatten().collect::<Vec<_>>(),
+ data,
+ imports,
+ exports,
+ source_file: path,
+ number,
+ }
+ }
+
+ fn collapse(&mut self, module: Rc<IRModule>) -> Result<()> {
+ if self.collapse_seen.contains(&module.source_file) {
+ return Ok(())
+ }
+
+ for imported in module.imports.clone() {
+ self.collapse(imported)?;
+ }
+
+ let is_entrypoint = module.source_file == self.entrypoint.source_file;
+
+ let module_number = module.number;
+
+ for string in &module.data {
+ if let IR::StringDef(name, val) = string {
+ let new_name = format!("{}_{}", name, module_number);
+ self.data.push(IR::StringDef(new_name, val.clone()));
+ } else {
+ bail!("non-string data");
+ }
+ }
+
+ for instruction in &module.text {
+ let new_instruction = match instruction {
+ IR::StackPushString(name) => {
+ let new_name = format!("{}_{}", name, module_number);
+ IR::StackPushString(new_name)
+ },
+ IR::Label(name) => {
+ if is_entrypoint && name == "main" {
+ instruction.clone()
+ } else {
+ IR::Label(module.get_label(name))
+ }
+ },
+ IR::Call(name) => {
+ IR::Call(module.get_label_for_call(name))
+ },
+ _ => instruction.clone()
+ };
+ self.text.push(new_instruction);
+ }
+
+ Ok(())
+ }
+}
+
+pub fn compile(path: &str) -> Result<IRObject> {
+ let dir = std::env::current_dir()?;
+ let mut tree: ImportTree = Default::default();
+ let module = tree.import(&dir, path, true)?;
+ tree.collapse(module)?;
+ // TODO remove unused words
+ Ok(IRObject {
+ data: tree.data,
+ text: tree.text,
+ })
+}
--- /dev/null
+mod tokenizer;
+mod parser;
+mod ir;
+mod riscv_asm_codegen;
+
+use rel_interpret::Interpreter;
+
+use anyhow::Result;
+
+fn main() -> Result<()> {
+ let filename = std::env::args().nth(1).expect("must provide a file to compile");
+ let module = ir::compile(&filename)?;
+ // let mut interp = Interpreter::new(&module)?;
+ // interp.run()?;
+ let mut generator = riscv_asm_codegen::CodeGen::new(&module, 4096);
+ println!("{}", generator.assembly()?);
+ Ok(())
+}
--- /dev/null
+use crate::tokenizer::Token;
+use anyhow::{Result, bail};
+
+#[derive(Debug)]
+pub struct WordDefinition<'a> {
+ pub name: &'a str,
+ pub instructions: Vec<Token<'a>>,
+}
+
+#[derive(Debug)]
+pub struct Module<'a> {
+ pub words: Vec<WordDefinition<'a>>,
+ pub imports: Vec<&'a str>,
+ pub exports: Vec<&'a str>,
+}
+
+impl<'a> Module<'a> {
+ pub fn parse(input: Vec<Token<'a>>, is_entrypoint: bool) -> Result<Self> {
+ let mut result = vec![];
+ let mut main = vec![];
+ let mut exports = vec![];
+ let mut imports = vec![];
+ let mut current_word: Option<WordDefinition> = None;
+ let mut about_to_start_word_def = false;
+ let mut last_was_import = false;
+ let mut last_was_export = false;
+
+ for token in input {
+ if about_to_start_word_def {
+ if let Token::Word(name) = token {
+ current_word = Some(WordDefinition {
+ name,
+ instructions: vec![],
+ });
+ about_to_start_word_def = false;
+ continue;
+ } else {
+ bail!("{:?} is not a valid word name!", token);
+ }
+ } else if let Token::Word(word) = token {
+ if word == ":" {
+ if current_word.is_some() {
+ bail!("can't define words inside word definitions!");
+ }
+ about_to_start_word_def = true;
+ continue;
+ }
+ if word == ";" {
+ let word = current_word.take();
+ if let Some(word) = word {
+ result.push(word);
+ continue;
+ } else {
+ bail!("`;` must be at the end of a word definition");
+ }
+ }
+ }
+ if let Some(ref mut current_word) = current_word {
+ current_word.instructions.push(token);
+ } else {
+ match token {
+ Token::Word(word) => {
+ if word == "import" {
+ last_was_import = true;
+ } else if word == "export" {
+ last_was_export = true;
+ } else {
+ if last_was_export {
+ exports.push(word);
+ } else {
+ main.push(token.clone());
+ }
+ }
+ },
+ Token::String(string) => {
+ if last_was_import {
+ imports.push(string);
+ } else {
+ main.push(token.clone());
+ }
+ },
+ _ => {
+ main.push(token.clone());
+ }
+ };
+ }
+ }
+
+ if about_to_start_word_def || current_word.is_some() {
+ bail!("unfinished word definition!");
+ }
+
+ if is_entrypoint {
+ result.push(WordDefinition {
+ name: "main",
+ instructions: main,
+ });
+ }
+
+ Ok(Module { words: result, imports, exports })
+ }
+
+ #[cfg(test)]
+ pub fn debug_print(&self) {
+ for word in &self.words {
+ println!("{}", word.name);
+ for instruction in &word.instructions {
+ println!(" {:?}", instruction);
+ }
+ }
+ }
+}
+
+
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn try_some_parsing() {
+ let result = Module::parse(crate::tokenizer::tokenize("
+: hello world 16 \"planet\" ;
+: soup chicken 4.5 hello ;
+
+hello soup
+").unwrap(), true).unwrap();
+ result.debug_print();
+ }
+}
--- /dev/null
+use rel_ir::*;
+
+use anyhow::*;
+
+use std::collections::{HashMap, HashSet};
+use std::fmt::Display;
+
+pub struct CodeGen<'a> {
+ module: &'a IRObject,
+ data_stack_size: usize,
+ lines: Vec<String>,
+}
+
+
+// Some inspiration
+// ================
+//
+// * https://github.com/aw/fiveforths/blob/master/docs/REFERENCE.md#registers-list
+// * Except using sp as a more C ABI style stack pointer, and s2 for the data stack
+//
+
+// Implementation Choices
+// ======================
+//
+// Data Stack pointer: s2
+// No return stack pointer (using C ABI, so sp, sorta)
+// Use t0, t1, t2 for temporary values in words
+// Data stack grows down
+
+
+macro_rules! asm_macro {
+ ($name:ident, $src:expr) => {
+ fn $name(&mut self) {
+ self.line($src);
+ }
+ };
+ ($name:ident, $src:expr, $arg0:ty) => {
+ fn $name(&mut self, val0: $arg0) {
+ self.line(format!($src, val0));
+ }
+ };
+ ($name:ident, $src:expr, $arg0:ty, $arg1:ty) => {
+ fn $name(&mut self, val0: $arg0, val1: $arg1) {
+ self.line(format!($src, val0, val1));
+ }
+ };
+}
+
+impl<'a> CodeGen<'a> {
+ pub fn new(ir_mod: &'a IRObject, data_stack_size: usize) -> Self {
+ Self {
+ module: ir_mod,
+ data_stack_size,
+ lines: vec![],
+ }
+ }
+
+ fn line<S: Display>(&mut self, line: S) {
+ self.lines.push(format!(" {}", line));
+
+ }
+
+ fn label<S: Display>(&mut self, line: S) {
+ self.lines.push(line.to_string());
+ }
+
+ asm_macro!(copy_top_stack_value_to, "lw {}, 0(s2)", &str);
+ asm_macro!(copy_offset_stack_value_to, "lw {}, {}*8(s2)", &str, isize);
+ asm_macro!(copy_to_top_of_stack, "sw {}, 0(s2)", &str);
+ asm_macro!(move_stack_ptr_by_cells, "addi s2, s2, {}*8", isize);
+
+ fn pop_to(&mut self, reg: &str) {
+ self.copy_top_stack_value_to(reg);
+ self.move_stack_ptr_by_cells(1);
+ }
+
+ fn pop_some_to(&mut self, regs: &str) {
+ let mut regs = regs.trim().split(" ").collect::<Vec<_>>();
+ regs.reverse();
+ let count = regs.len();
+ let mut index = 0;
+ for reg in regs {
+ self.copy_offset_stack_value_to(reg, index);
+ index += 1;
+ }
+ self.move_stack_ptr_by_cells(count as isize);
+ }
+
+ fn push_from(&mut self, reg: &str) {
+ self.move_stack_ptr_by_cells(-1);
+ self.copy_to_top_of_stack(reg);
+ }
+
+ fn pop_call_push(&mut self, regs: &str, call: &str, reg: &str) {
+ self.pop_some_to(regs);
+ self.line(call);
+ self.push_from(reg);
+ }
+
+ pub fn assembly(&mut self) -> Result<String>{
+ let mut string_table = HashMap::new();
+
+ // Static strings
+ self.label(".section .rodata\n");
+ for ir in &self.module.data {
+ match ir {
+ IR::StringDef(string_label, some_string) => {
+ string_table.insert(some_string.clone(), string_label);
+ self.label(string_label);
+ self.line(format!(".asciz \"{}\"", some_string)); // should this be .asciz?
+ self.label("");
+ },
+ _ => bail!("Currently only string definitions are supported in the data section.")
+ }
+ }
+
+ // Data stack
+ self.label(".data\n");
+ self.label("data_stack:");
+ self.line(format!(".space {}", self.data_stack_size));
+ self.label(".globl data_stack_end\ndata_stack_end:\n");
+
+ // Code
+ self.label(".text\n");
+ self.label(".align 3\n");
+
+ let mut if_block_count = 0;
+ let mut if_stack = vec![];
+ let mut seen_else = HashSet::new();
+ let mut last_label = "";
+
+ for ir in &self.module.text {
+ match ir {
+ IR::Label(name) => {
+ last_label = name;
+ if name == "main" {
+ self.label(".globl _start"); // TODO is globl necessary?
+ self.label("_start:");
+ self.line("la s2, data_stack_end # set initial data stack pointer");
+ } else {
+ self.label(format!(".globl {}", name));
+ self.label(format!("{}:", name));
+ }
+ self.line("addi sp, sp, -16 # allocate 16 bytes on stack"); // allocate 16 bytes on stack
+ self.line("sw ra, 8(sp) # store return address on stack"); // store return address on stack
+ },
+ IR::Call(name) => {
+ self.label(format!("# call {}", name));
+
+ self.line(format!("call {}", name));
+ },
+ IR::Ret => {
+ if last_label == "main" {
+ self.label("# exit 0 syscall");
+ self.line("li a7, 93");
+ self.line("mv a0, x0");
+ self.line("ecall");
+ } else {
+ self.line("lw ra, 8(sp)"); // load return address from stack
+ self.line("addi sp, sp, 16"); // restore stack pointer
+ self.line("ret");
+ }
+ },
+ IR::Load => {
+ self.copy_top_stack_value_to("t0");
+ self.line(format!("lw {}, 0({})", "t0", "t0")); // deref pointer int t0 to t0
+ self.copy_to_top_of_stack("t0");
+ },
+ IR::Store => { // ( x addr -- )
+ self.copy_top_stack_value_to("t1");
+ self.copy_offset_stack_value_to("t0", 1);
+ self.line("sw t0, 0(t1)"); // store x at addr
+ self.move_stack_ptr_by_cells(2);
+ },
+ IR::StackPush(num) => {
+ self.label(format!("# stackpush {}", num));
+ self.line(format!("li t0, {}", num));
+ self.push_from("t0");
+ },
+ IR::StackPushString(name) => {
+ self.label(format!("# stackpushstring {}", name));
+ self.line(format!("li t0, {}", name));
+ self.push_from("t0");
+ },
+ IR::AddU64 => {
+ self.label("# add");
+ self.pop_call_push("t0 t1", "add t0, t0, t1", "t0");
+ },
+ IR::SubtractU64 => {
+ self.label("# sub");
+ self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0");
+ },
+ IR::MultiplyU64 => {
+ self.pop_call_push("t0 t1", "mul t0, t0, t1", "t0");
+ },
+ IR::DivideU64 => {
+ self.pop_call_push("t0 t1", "div t0, t0, t1", "t0");
+ },
+ IR::ModU64 => {
+ self.pop_call_push("t0 t1", "rem t0, t0, t1", "t0");
+ },
+ IR::Dup => {
+ self.label("# dup");
+ self.copy_top_stack_value_to("t0");
+ self.push_from("t0");
+ },
+ IR::Swap => {
+ self.label("# swap");
+ self.pop_some_to("t1 t0");
+ self.push_from("t0");
+ self.push_from("t1");
+ },
+ IR::Drop => {
+ self.label("# drop");
+ self.move_stack_ptr_by_cells(1);
+ },
+ IR::Equals => {
+ // Yes, this is the same as subtract, since we're treating 0 as true, and
+ // others as false.
+ self.pop_call_push("t0 t1", "sub t0, t0, t1", "t0");
+ },
+ IR::GreaterThan => {
+ self.label("# >");
+ self.pop_some_to("t0 t1");
+ self.line("sgt t0, t0, t1");
+ self.line("seqz t0, t0"); // remember, 0 is true, others are false
+ self.push_from("t0");
+ },
+ IR::BitwiseOr => {
+ self.pop_call_push("t0 t1", "or t0, t0, t1", "t0");
+ },
+ IR::Sys0 => {
+ self.pop_call_push("a7", "ecall", "a0");
+ },
+ IR::Sys1 => {
+ self.pop_call_push("a0 a7", "ecall", "a0");
+ },
+ IR::Sys2 => {
+ self.pop_call_push("a0 a1 a7", "ecall", "a0");
+ },
+ IR::Sys3 => {
+ self.pop_call_push("a0 a1 a2 a7", "ecall", "a0");
+ },
+ IR::Sys4 => {
+ self.pop_call_push("a0 a1 a2 a3 a7", "ecall", "a0");
+ },
+ IR::Sys5 => {
+ self.pop_call_push("a0 a1 a2 a3 a4 a7", "ecall", "a0");
+ },
+ IR::Sys6 => {
+ self.pop_call_push("a0 a1 a2 a3 a4 a5 a7", "ecall", "a0");
+ },
+ IR::PutN => {
+ self.line("call putn");
+ },
+ // https://cmput229.github.io/229-labs-RISCV/RISC-V-Examples_Public/03-Conditionals/03b-If_Else.html
+ IR::If => {
+ self.label("# if");
+ self.pop_to("t0");
+ self.line(format!("bnez t0, _else_{}", if_block_count));
+ if_stack.push(if_block_count);
+ if_block_count += 1;
+ },
+ IR::Else => {
+ self.label("# else");
+ let if_counter = if_stack.last().unwrap().clone();
+ self.line(format!("j _endif_{}", if_counter));
+ self.label(format!("_else_{}:", if_counter));
+ seen_else.insert(if_counter);
+ },
+ IR::EndIf => {
+ self.label("# endif");
+ let stack = &mut if_stack;
+ let if_counter = stack.last().unwrap().clone();
+ if !seen_else.contains(&if_counter) {
+ self.label(format!("_else_{}:", if_counter));
+ } else {
+ self.label(format!("_endif_{}:", if_counter));
+ seen_else.remove(&if_counter);
+ }
+ stack.pop();
+ }
+ _ => bail!("not implemented yet"),
+ }
+ }
+
+ Ok(self.lines.join("\n"))
+ }
+}
+
--- /dev/null
+use anyhow::{Result, anyhow};
+
+#[derive(Debug, Clone)]
+pub enum Token<'a> {
+ Word(&'a str),
+ String(&'a str),
+ NumU8(u8),
+ NumI8(i8),
+ NumU16(u16),
+ NumI16(i16),
+ NumU32(u32),
+ NumI32(i32),
+ NumU64(u64),
+ NumI64(i64),
+ NumF32(f32),
+ NumF64(f64),
+}
+
+impl<'a> Token<'a>{
+ fn parse_word_or_num(input: &'a str) -> Result<Token<'a>> {
+ if input == "-" {
+ return Ok(Token::Word(input))
+ }
+
+ // we're assuming any token starting with `-` with length greater than one
+ // is a negative number
+ if input.starts_with('-') || input.chars().nth(0).map(|x| x.is_numeric()).unwrap_or(false) {
+ if input.contains(':') {
+ let mut splat = input.split(':');
+ let num = splat.next().ok_or(anyhow!("no number found"))?;
+ let typ = splat.next().ok_or(anyhow!("no number type found"))?;
+ match typ {
+ "u8" => Ok(Token::NumU8(num.parse()?)),
+ "i8" => Ok(Token::NumI8(num.parse()?)),
+ "u16" => Ok(Token::NumU16(num.parse()?)),
+ "i16" => Ok(Token::NumI16(num.parse()?)),
+ "u32" => Ok(Token::NumU32(num.parse()?)),
+ "i32" => Ok(Token::NumI32(num.parse()?)),
+ "u64" => Ok(Token::NumU64(num.parse()?)),
+ "i64" => Ok(Token::NumI64(num.parse()?)),
+ "f32" => Ok(Token::NumF32(num.parse()?)),
+ "f64" => Ok(Token::NumF64(num.parse()?)),
+ _ => panic!("unknown number type")
+ }
+ } else {
+ if input.contains('.') {
+ Ok(Token::NumF64(input.parse()?))
+ } else if input.starts_with('-') {
+ Ok(Token::NumI64(input.parse()?))
+ } else {
+ Ok(Token::NumU64(input.parse()?))
+ }
+ }
+ } else {
+ Ok(Token::Word(input))
+ }
+ }
+}
+
+// TODO really want an iterator, not a vector
+pub fn tokenize<'a>(input: &'a str) -> Result<Vec<Token<'a>>> {
+ let mut result = vec![];
+ let mut string_start: Option<usize> = None;
+ let mut word_or_num_start: Option<usize> = None;
+ let mut last_is_backslash = false;
+ let mut last_is_whitespace = true;
+ let mut in_doc_comment = false;
+ let mut in_line_comment = false;
+ let mut index = 0;
+ let mut first_char = true;
+
+
+ for char in input.chars() {
+ if first_char {
+ first_char = false;
+ } else {
+ index += 1;
+ }
+
+ if in_doc_comment {
+ if char == ')' {
+ in_doc_comment = false;
+ last_is_whitespace = true; // not really true, but means don't need space after
+ }
+ continue;
+ }
+
+ if in_line_comment {
+ if char == '\n' {
+ in_line_comment = false;
+ last_is_whitespace = true; // not really true, but means don't need space after
+ }
+ continue;
+ }
+
+ if char == '"' {
+ if let Some(start) = string_start {
+ if !last_is_backslash {
+ result.push(Token::String(&input[start..index]));
+ string_start = None;
+ }
+ } else {
+ string_start = Some(index + 1)
+ }
+ last_is_backslash = false;
+ last_is_whitespace = false;
+ continue;
+ }
+
+
+ if string_start.is_some() {
+ last_is_backslash = char == '\\';
+ continue;
+ }
+
+ if char.is_whitespace() {
+ if last_is_backslash {
+ in_line_comment = true;
+ } else if !last_is_whitespace && let Some(start) = word_or_num_start {
+ let token = &input[start..index];
+ if token == "(" {
+ in_doc_comment = true;
+ } else {
+ result.push(Token::parse_word_or_num(&input[start..index])?);
+ }
+ word_or_num_start = None;
+ }
+ last_is_whitespace = true;
+ last_is_backslash = false;
+ continue;
+ }
+
+ last_is_backslash = char == '\\';
+
+ if index == input.len() - 1 {
+ if !last_is_whitespace && let Some(start) = word_or_num_start {
+ result.push(Token::parse_word_or_num(&input[start..])?);
+ }
+ continue;
+ }
+
+ if last_is_whitespace { // start of word or num (we already handled strings)
+ word_or_num_start = Some(index);
+ last_is_whitespace = false;
+ }
+ }
+ Ok(result)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn try_some_tokenizing() {
+ let result = tokenize("
+
+ \\ soup
+ 2 3.4 - -88 bacon \"hello\" 43:f32 2345:u32 -57:i8 soup
+");
+ println!("result: {:?}", result);
+ }
+
+ #[test]
+ fn comments() {
+ let result = tokenize("
+ (
+ foo
+ bar
+ )
+ : baz ( x y -- z )
+ chicken
+ soup
+ ;
+ ");
+ println!("result: {:?}", result);
+ }
+}