use std::io; use std::io::Read; use std::io::Write; use std::convert::TryInto; /* What is this file? * * This is a tutorial that will show you how to bootstrap an interactive * programming environment from a small amount of code. * * First we will design a virtual computer. * * Then we will design software to run on that computer, to enable REPL-style * interactive programming. * * A REPL is a "Read, Evaluate, Print loop". A REPL lets you type code at * the keyboard and immediately get a result back. You can also define * functions, including functions that change how the environment works in * fundamental ways. */ /* What is Forth? * * Forth is the programming language we will use with our computer. * * Forth was invented by Chuck Moore in the 1960s as a tool for quickly * coming to grips with new computer systems. * * "Let us imagine a situation in which you have access to * your computer. I mean sole user sitting at the board with * all the lights, for some hours at a time. This is * admittedly an atypical situation, but one that can * always be arranged if you are competent, press hard, and * will work odd hours. Can you and the computer write a * program? Can you write a program that didn't descend from * a pre-existing program? You can learn a bit and have a * lot of fun trying." * -- Chuck Moore, "Programming a Problem-Oriented Language", 1970 * https://colorforth.github.io/POL.htm * * As you will see, it does not take much work to get Forth running on a * new machine, including a machine with a completely unfamiliar instruction * set. * * But before we can do any of that we will need a machine. Let's make one. */ /* --------------------------------------------------------------------------- * Part 1 - The Computer * ------------------------------------------------------------------------ */ /* This computer will have a 16-bit CPU. It will be able to address * 2^16 (65536) memory locations, numbered 0 to 65535. */ const ADDRESS_SPACE: usize = 65536; /* The job of a CPU is to load numbers from memory, do math or logic on them, * then write the resulting number back into memory. * * The CPU needs a temporary place to hold numbers while it is working with * them. * * In most CPUs, this place is called a "register". Registers work like * variables in a programming language but there are only a few of them * (between 1 to 32 depending on your CPU). * * On 64-bit ARM the registers are named r0, r1, ..., r15. * On 64-bit Intel they are instead named rax, rbx, ..., etc. * * Having immediate access to many registers is quite handy, but it means * many choices are available to the programmer, or more likely, to the * compiler. And making good choices is Hard. * * A lot of work goes into deciding what variable to store in what register * ("register allocation") and when to dump register contents back into * memory ("spilling"). * * Our CPU avoids these problems by not having registers; instead we store * numbers in a stack. The CPU can only access the value that was most * recently pushed onto the stack. This may seem like a big limitation * right now but you will see ways of dealing with it. * * This makes our CPU a "stack machine" as opposed to a "register machine". */ #[derive(Debug)] struct Stack { mem: [u16; N], tos: usize // top-of-stack. } impl Stack { // Add a number to the stack. fn push(&mut self, val: u16) { self.tos = (self.tos.wrapping_add(1)) & (N - 1); /* This stack is fixed-sized and can hold N values. * * When a fixed-size stack fills up, there is a failure case * (stack overflow) that must be handled somehow. * * In this stack, the bitwise & makes it a circular stack, meaning, if * it ever fills up, it will discard the oldest entry instead of * signaling an error. The lack of error handling makes the CPU * simpler. */ self.mem[self.tos] = val; } // Return the most recently pushed number. fn pop(&mut self) -> u16 { let val = self.mem[self.tos]; self.mem[self.tos] = 0; /* You don't have to set the value back to zero. I am only doing * this because it makes makes the stack look nicer when dumped * out with print!(). */ self.tos = (self.tos.wrapping_sub(1)) & (N - 1); return val; } } /* Now that we have a stack let's use one! Or two? * * Why two stacks? * * The first stack will be called the "data stack" and is used instead of * registers as already described. * * The second stack will be called the "return stack". This one holds * subroutine return addresses. Don't worry if you don't know what that * means; we'll get to it later when we talk about the instruction set. * * In addition to stacks we are going to give the CPU a couple more things: * * 1. An "instruction pointer", which holds the memory address of the next * instruction that the CPU will execute. * * 2. To make life simpler we put main memory straight on "the CPU" even * though in a real computer, RAM would be off-chip and accessed through a * data bus. */ struct Core { ram: [u8; ADDRESS_SPACE], /* In our memory, each of the 65536 possible memory addresses will store * one 8-bit byte (u8 data type in Rust). This makes it a 65536 byte * (64 KB) memory. * * We could have chosen to make each memory address store 16-bits instead. * That would make this a "word-addressed memory". * * Instead we are going with the "byte-addressed memory" that is more * conventional in today's computers. This choice is arbitrary. */ ip: u16, // instruction pointer dstack: Stack<16>, // data stack rstack: Stack<32> // return stack } /* Function to initialize the cpu. * There is probably a better idiom for this but I am bad at rust */ fn new_core() -> Core { let c = Core { ram: [0; ADDRESS_SPACE], ip: 0, dstack: Stack {tos: 15, mem: [0; 16]}, rstack: Stack {tos: 31, mem: [0; 32]}}; /* Because these are circular stacks it doesn't matter where top-of-stack * starts off pointing. I arbitrarily set it to the highest index so * the first value pushed will wind up at index 0, again because this * makes the stack look nicer when printed out. */ return c; } /* Now we have a CPU sitting there but it does nothing. * * A working CPU would execute a list of instructions. An instruction is * a number that is a command for the CPU. For example: * * 65522 might mean "add the top two values on the data stack". * 65524 might mean "invert the bits of the top value on the data stack". * * The map of instruction-to-behavior comes from the CPU's * "instruction set" i.e. the set of all possible instructions and their * behaviors. * * Normally you program a CPU by putting instructions into memory and then * telling the CPU the memory address where it can find the first instruction. * * The CPU will: * 1. Fetch the instruction (load it from memory) * 2. Decode the instruction (look it up in the instruction set) * 3. Execute that instruction (do the thing the instruction set said to do) * 4. Move on to the next instruction and repeat. * * So now we will make the CPU do those things. * We'll start off by teaching it how to access memory, and then we will * define the instruction set. */ impl Core { /* Helper function - Read a number from the specified memory address. */ fn load(&self, addr: u16) -> u16 { let a = addr as usize; /* We immediately run into trouble because we are using byte-addressed * memory as mentioned earlier. * * Each memory location stores 8 bits (a byte) * * Our CPU operates on 16 bit values and we want each memory operation * to take 16 bits at a time for efficiency reasons. * * What do we do? * * This CPU chooses to do the following: * - Read the low byte of the 16-bit number from address a * - Read the high byte of the 16-bit number from address a+1 * * 16 bit number in CPU: [00000000 00000001] = 1 * | | * | memory address a = 1 * | * memory address a+1 = 0 * * This is called "little endian" because the low byte comes first. * * We could have just as easily done the opposite: * - Read the high byte of the 16-bit number from address a * - Read the low byte of the 16-bit number from address a+1 * * 16 bit number in CPU: [00000000 00000001] = 1 * | | * | memory address a+1 = 1 * | * memory address a = 0 * * This is called "big endian" because the high byte comes first. */ return u16::from_le_bytes(self.ram[a..=a+1].try_into().unwrap()); /* The le in this function call stands for little-endian. */ } /* Helper function - Write a number to the specified memory address. */ fn store(&mut self, addr: u16, val: u16) { let a = addr as usize; self.ram[a..=a+1].copy_from_slice(&val.to_le_bytes()); } /* With that taken care of, we can get around to defining the CPU's * instruction set. * * Each instruction on this CPU will be the same size, 16 bits, for * the following reasons: * * 1. Instruction fetch always completes in 1 read. You never have to * go back and fetch more bytes. * * 2. If you put the first instruction at an even numbered address then * you know all the rest of the instructions will also be at even * numbered addresses. I will take advantage of this later. * * 3. A variable length encoding would save space but 2 bytes per * instruction is already pretty small so it doesn't matter very much. * * Here are the instructions I picked. * * CALL * ------------------------------------------------------------+---- * | n n n n n n n n n n n n n n n | 0 | * ------------------------------------------------------------+---- * * What CALL does: * --------------- * - Push instruction pointer onto the return stack. * - Set instruction pointer to address nnnnnnnnnnnnnnn0. * * This lets you call a subroutine at any even numbered address * from 0 to 65534. * * Why this is useful: * ------------------- * Together with the return stack, CALL lets you call subroutines. * * A subroutine is a list of instructions that does something * useful and then returns control to the caller. * * For example: * * Address Instruction Meaning * 100 -> 200 Call 200 * 102 -> ??? Add the top two values on the data stack. * ... * 200 -> ??? Push the value 3 onto the data stack * 202 -> ??? Push the value 4 onto the data stack * 204 -> ??? Return to caller * * Don't worry about the other instructions I am using here. I will * define them later. * * I mostly want to point out the three instructions that I put * at address 200 because they are a subroutine, * a small self contained piece of code (6 bytes) that * performs a specific task. * * Do you think it's cool that you can count exactly how many bytes it * took? I think it's cool. * * Here is what happens when the CPU begins execution at address 100. * * Address Data stack Return stack * 100 [] [] <--- About to call subroutine... * 200 [] [102] * 202 [3] [102] * 204 [3 4] [102] <--- About to return from subroutine... * 102 [3 4] [] * 104 [5] [] * * The return stack is there to make sure that returning from a subroutine * picks up where the caller left off. We will talk more about the return * stack later when we talk about the RET instruction. * * Limitations of CALL: * -------------------- * This CPU cannot call an instruction that starts at an odd address. * a.k.a. "unaligned call" is impossible. * * At first this seems like a limitation, but it really isn't. * If you put the first instruction at an even numbered address then * all the rest of the instructions will also be at even numbered * addresses. So this works fine. * * Of course if you intersperse instructions and data in memory... * _________ * ________ |_________| _____________ * |________| Data |_____________| * Instructions More instructions * * ...then you will have to be careful to make sure the second block * of instructions also starts at an even numbered address. * You might need to include an extra byte of data as "padding". * * Data processing instructions * --------------------------------------------+---------------+---- * | 1 1 1 1 1 1 1 1 1 1 1 | x x x x | 0 | * --------------------------------------------+---------------+---- * Sixteen of the even numbers are reserved for additional instructions * that will be be described later. * * The even numbers 1111111111100000 to 1111111111111110 (65504 to 65534) * are reserved for these instructions. This means that CALL 65504 through * CALL 65534 are not possible. Put another way, it is not possible to * call a subroutine living in the top 32 bytes of memory. This is not a * very severe limitation. * * LITERAL * ------------------------------------------------------------+---- * | n n n n n n n n n n n n n n n | 1 | * ------------------------------------------------------------+---- * * What LITERAL does * ----------------- * - Place the value 0nnnnnnnnnnnnnnn on the data stack. * * Why this is useful: * ------------------- * Program will often need to deal with constant numbers. * For example, you might want to add 2 to a memory address (to move * on to the next even-numbered address) or add 32 to a character code * (to convert it to lower-case). These constants have to come from * somewhere. * * Limitations of LITERAL: * ----------------------- * To differentiate it from a call, this instruction is always an * odd number. The trailing 1 is discarded before placing the number on * the data stack. This missing bit means that only 2^15 values can be * represented (0 to 32767). 32768 on up cannot be stored directly. * You would need to do some follow-up math to get these numbers. * The most direct way is to use the INV instruction, described later. */ /* Now that the instruction set is generally described * let's look at the code that implements it */ fn step(&mut self) { // 1. Fetch the instruction. // Also advance ip to point at the next instruction for next time. let opcode = self.load(self.ip); self.ip = self.ip.wrapping_add(2); // 2. Decode and execute the instruction if (opcode >= 0xffe0) && (opcode & 1 == 0) { // Data processing instruction PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self); /* These instructions get looked up in a table. The bit * math converts the instruction code into an index in the * table as follows: * * 0xffe0 --> 0 * 0xffe2 --> 1 * ... * 0xfffe --> 15 * * The table will be described below, and these instructions * explained. */ } else if (opcode & 1) == 1 { // Literal self.dstack.push(opcode >> 1); } else { // Call self.rstack.push(self.ip); self.ip = opcode; } } } // The names of the 16 remaining CPU instructions enum Op { RET = 0xffe0, TOR = 0xffe2, RTO = 0xffe4, LD = 0xffe6, ST = 0xffe8, DUP = 0xffea, SWP = 0xffec, DRP = 0xffee, Q = 0xfff0, ADD = 0xfff2, SFT = 0xfff4, OR = 0xfff6, AND = 0xfff8, INV = 0xfffa, GEQ = 0xfffc, IO = 0xfffe, } type Primitive = fn(&mut Core); // A table of functions for each of the 16 remaining CPU instructions const PRIMITIVES: [Primitive; 16] = [ /* Return-stack instructions */ | x | { // RET - Return from subroutine x.ip = x.rstack.pop() }, | x | { // TOR - Transfer number from data stack to return stack x.rstack.push(x.dstack.pop()) }, | x | { // RTO - Transfer number from return stack to data stack x.dstack.push(x.rstack.pop()) }, /* Memory instructions */ | x | { // LD - Load number from memory address specified on the data stack let a = x.dstack.pop(); x.dstack.push(x.load(a)); }, | x | { // ST - Store number to memory address specified on the data stack let a = x.dstack.pop(); let v = x.dstack.pop(); x.store(a, v); }, /* Stack shuffling instructions * * Remember the problem of "register allocation" mentioned earlier, * and how stack machines are supposed to avoid that problem? Well, * nothing comes for free. Stack machines can only process the top * value(s) on the stack. So sometimes you will have to do some work * to "unbury" a crucial value and move it to the top of the stack. * That's what these instructions are for. * * Their use will become more obvious when we start programming the * machine, soon. */ | x | { // DUP - Duplicate the top number on the data stack let v = x.dstack.pop(); x.dstack.push(v); x.dstack.push(v); }, | x | { // SWP - Exchange the top two numbers on the data stack let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1); x.dstack.push(v2); }, | x | { // DRP - Discard the top number on the data stack let _ = x.dstack.pop(); }, /* Conditional branch instruction */ | x | { /* Q - If the top number on the data stack is zero, skip the next * instruction. * * Note Q is the only "decision-making" instruction that our CPU * has. This means that all "if-then" logic, counted loops, etc. * will be built using Q. */ let f = x.dstack.pop(); if f == 0 { x.ip = x.ip.wrapping_add(2) /* Because all of our instructions are two bytes, adding two * to the instruction pointer skips the next instruction. */ }; }, /* Arithmetic and logic */ | x | { // ADD - Sum the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1.wrapping_add(v2)); }, | x | { /* SFT - Bit shift number left or right by the specified amount. * A positive shift amount will shift left, negative will shift right. */ let amt = x.dstack.pop(); let val = x.dstack.pop(); x.dstack.push( if amt <= 0xf { val << amt } else if amt >= 0xfff0 { val >> (0xffff - amt + 1) } else { 0 } ); }, | x | { // OR - Bitwise-or the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1 | v2); }, | x | { // AND - Bitwise-and the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1 & v2); }, | x | { // INV - Bitwise-invert the top number on the data stack. let v1 = x.dstack.pop(); x.dstack.push(!v1); /* You can use the INV instruction to compensate for the LITERAL * instruction's inability to encode constants 32768 to 65535. * Use two instructions instead: * - LITERAL the complement of your desired constant * - INV * * For example, LITERAL(0) INV yields 65535 (signed -1) * For example, LITERAL(1) INV yields 65534 (signed -2) * etc. */ }, | x | { // GEQ - Unsigned-compare the top two items on the data stack. let v2 = x.dstack.pop(); let v1 = x.dstack.pop(); x.dstack.push(if v1 >= v2 { 0xffff } else { 0 }); }, /* Input/output. * * The CPU needs some way to communicate with the outside world. * * Some machines use memory mapped IO where certain memory addresses are * routed to hardware devices instead of main memory. This machine already * has the full 64K of memory connected so no address space is readily * available for hardware devices. * * Instead we define a separate input-output space of 65536 possible * locations. Each of these possible locations is called an IO "port". * * For a real CPU you could hook up hardware such as a serial * transmitter that sends data to a computer terminal, or just an * output pin controller that is wired to a light bulb. * * This is a fake software CPU so I am going to hook it up to * stdin and stdout. */ | x | { // IO - Write/read a number from/to input/output port. let port = x.dstack.pop(); /* I'm loosely following a convention where even ports are inputs * and odd ports are outputs. But each port acts different. * In a hardware CPU this would not be suitable but it is fine for * a software emulation. */ match port { 0 => { /* Push a character from stdin onto the data stack */ let mut buf: [u8; 1] = [0]; let _ = io::stdin().read(&mut buf); x.dstack.push(buf[0] as u16); /* You are welcome to make your own computer that supports * utf-8, but this one does not. */ } 1 => { /* Pop a character from the data stack to stdout */ let val = x.dstack.pop(); print!("{}", ((val & 0xff) as u8) as char); let _ = io::stdout().flush(); } 2 => { /* Dump CPU status. * Like the front panel on a 1960s-1970s minicomputer. */ println!("{:?} {:?}", x.dstack, x.rstack); let _ = io::stdout().flush(); } _ => {} } } ]; /* --------------------------------------------------------------------------- * Part 2 - The Program * ------------------------------------------------------------------------ */ /* You now have an unfamiliar computer with no software. It sits there doing * nothing. Can you and the computer write a program? * * We are going to need to give the computer a list of instructions, by * which I mean a list of numbers. If we were sitting in front of a * minicomputer in 196x, we would need a copy of the target machine's * instruction set, paper and pencil, and a lot of coffee. * * In 20xx we are fortunate enough to have rust so we will put it to work. * * Regardless, this bootstrapping process isn't going to be very pleasant * so the goal is to make it short. We want a language that can stand on * its own as quickly as possible, so the computer can start helping us * write the program. * * Forth is a weird language but its design decisions make a lot of sense * if you view it as a bootstrapping tool. */ /* * Forth organizes all the computer's memory as a "dictionary". * The dictionary starts at a low address and grows towards high addresses. * It is organized like a linked-list, like this: * * * Code is stored in the dictionary as a list of addresses. * * */ /* Here is the stuff that you would normally be doing with pencil and paper */ struct Dict<'a> { dp: u16, here: u16, c: &'a mut Core } enum Item { Literal(u16), Call(u16), Opcode(Op) } impl From for Item { fn from(a: u16) -> Self { Item::Call(a) } } impl From for Item { fn from(o: Op) -> Self { Item::Opcode(o) } } impl Dict<'_> { fn allot(&mut self, n: u16) { self.here = self.here.wrapping_add(n); } fn comma(&mut self, val: u16) { self.c.store(self.here, val); self.allot(2); } fn emit>(&mut self, val: T) { match val.into() { Item::Call(val) => { self.comma(val) } Item::Opcode(val) => { self.comma(val as u16) } Item::Literal(val) => { assert!(val <= 0x7fff); self.comma((val << 1) | 1) } } } fn name(&mut self, n: u8, val: [u8; 3]) { self.comma(n as u16 | ((val[0] as u16) << 8)); self.comma(val[1] as u16 | ((val[2] as u16) << 8)); } fn entry(&mut self) { let here = self.here; self.comma(self.dp); self.dp = here; } } fn build_dictionary(c: &mut Core) { use Op::*; use Item::*; let mut d = Dict {dp: 0, here: 2, c: c}; macro_rules! forth { ($x:expr) => (d.emit($x)); ($x:expr, $($y:expr),+) => (d.emit($x); forth!($($y),+)) } // key ( -- n ) d.entry(); d.name(3, *b"key"); let key = d.here; forth!(Literal(0), IO, RET); // emit ( n -- ) d.entry(); d.name(4, *b"emi"); let emit = d.here; forth!(Literal(1), IO, RET); // - ( a b -- a-b ) d.entry(); d.name(1, *b"- "); let sub = d.here; forth!(INV, Literal(1), ADD, ADD, RET); let zero = d.here; forth!(Literal(0), RTO, DRP, RET); // 0= ( n -- f ) d.entry(); d.name(2, *b"0= "); let zero_eq = d.here; forth!(Q, zero, Literal(0), INV, RET); // = ( a b -- a=b ) d.entry(); d.name(1, *b"= "); let eq = d.here; forth!(sub, zero_eq, RET); // Advance past whitespace let skip_helper = d.here; forth!(RTO, DRP, key, DUP, Literal(33), GEQ, Q, RET, DRP, skip_helper); d.entry(); d.name(6, *b"ski"); let skipws = d.here; forth!(skip_helper); // over ( a b -- a b a ) d.entry(); d.name(4, *b"ove"); let over = d.here; forth!(TOR, DUP, RTO, SWP, RET); // 2dup ( a b -- a b a b ) d.entry(); d.name(4, *b"2du"); let twodup = d.here; forth!(over, over, RET); // Buffer for parsing an input word, formatted as Nabcde. let word_buf = d.here; d.allot(6); // min ( a b -- n ) d.entry(); d.name(3, *b"min"); let min = d.here; forth!(twodup, GEQ, Q, SWP, DRP, RET); // c@ ( a -- n ) d.entry(); d.name(2, *b"c@ "); let cld = d.here; forth!(LD, Literal(0xff), AND, RET); // c! ( n a -- ) d.entry(); d.name(2, *b"c! "); let cst = d.here; forth!(DUP, LD, Literal(0xff), INV, AND, SWP, TOR, OR, RTO, ST, RET); // Load 1 letter into buffer. let stchar = d.here; forth!(Literal(word_buf), cld, Literal(1), ADD, DUP, Literal(word_buf), cst, Literal(5), min, Literal(word_buf), ADD, cst, RET); // Load letters into buffer until whitespace is hit again. // Return the whitespace character that was seen. let getcs_helper = d.here; forth!(RTO, DRP, stchar, key, DUP, Literal(32), SWP, GEQ, Q, RET, getcs_helper); d.entry(); d.name(5, *b"get"); let getcs = d.here; forth!(getcs_helper, RET); // word ( -- ) // Not quite standard. d.entry(); d.name(4, *b"wor"); let word = d.here; forth!(Literal(word_buf), DUP, Literal(2), ADD, Literal(0x2020), SWP, ST, Literal(0x2000), SWP, ST, skipws, getcs, DRP, RET); // latest ( -- a ) // Address of "latest" variable. This variable stores the address of // the latest word in the dictionary. let latest_ptr = d.here; d.allot(2); d.entry(); d.name(6, *b"lat"); let latest = d.here; forth!(Literal(latest_ptr), RET); let matches = d.here; forth!(Literal(2), ADD, TOR, Literal(word_buf), DUP, Literal(2), ADD, LD, SWP, LD, RTO, DUP, TOR, LD, Literal(0x0080), INV, AND, eq, SWP, RTO, Literal(2), ADD, LD, eq, AND, RET); let matched = d.here; forth!(Literal(6), ADD, RTO, DRP, RET); let find_helper = d.here; forth!(RTO, DRP, DUP, Literal(0), eq, Q, RET, DUP, matches, Q, matched, LD, find_helper); // find ( -- xt|0 ) d.entry(); d.name(4, *b"fin"); let find = d.here; forth!(latest, LD, find_helper); // ' ( -- xt|0 ) d.entry(); d.name(1, *b"' "); forth!(word, find, RET); /* --- The outer interpreter --- */ // x10 ( n -- n*10 ) d.entry(); d.name(3, *b"x10"); let x10 = d.here; forth!(DUP, DUP, Literal(3), SFT, ADD, ADD, RET); // here ( -- a ) // Address of "here" variable. This variable stores the address of // the first free space in the dictionary let here_ptr = d.here; d.allot(2); d.entry(); d.name(4, *b"her"); let here = d.here; forth!(Literal(here_ptr), RET); // state ( -- a ) // Address of "state" variable. This variable stores -1 if // interpreting or 0 if compiling. let state_ptr = d.here; d.allot(2); d.entry(); d.name(5, *b"sta"); let state = d.here; forth!(Literal(state_ptr), RET); let word_addr = d.here; forth!(Literal(latest_ptr), LD, Literal(2), ADD, RET); // immediate ( -- ) d.entry(); d.name(9 | 0x80, *b"imm"); forth!(word_addr, DUP, LD, Literal(0x0080), OR, SWP, ST, RET); // smudge ( -- ) d.entry(); d.name(6 | 0x80, *b"smu"); let smudge = d.here; forth!(word_addr, DUP, LD, Literal(0x0040), OR, SWP, ST, RET); // unsmudge ( -- ) d.entry(); d.name(8 | 0x80, *b"uns"); let unsmudge = d.here; forth!(word_addr, DUP, LD, Literal(0x0040), INV, AND, SWP, ST, RET); // [ ( -- ) d.entry(); d.name(1 | 0x80, *b"[ "); let lbracket = d.here; forth!(Literal(0), INV, state, ST, RET); // ] ( -- ) d.entry(); d.name(1 | 0x80, *b"] "); let rbracket = d.here; forth!(Literal(0), state, ST, RET); // , ( n -- ) d.entry(); d.name(1, *b", "); let comma = d.here; forth!(here, LD, ST, here, LD, Literal(2), ADD, here, ST, RET); let compile_call = d.here; forth!(DUP, Literal(4), sub, LD, Literal(0x0080), AND, state, LD, OR, Q, RET, comma, RTO, DRP, RET); let compile_lit = d.here; forth!(state, LD, Q, RET, DUP, ADD, Literal(1), ADD, comma, RTO, DRP, RET); let end_num = d.here; forth!(DRP, RTO, DRP, RET); let bad_num = d.here; forth!(DRP, DRP, DRP, Literal(0), INV, RTO, DRP, RET); let number_helper = d.here; forth!(RTO, DRP, DUP, Literal(word_buf), ADD, cld, Literal(48), sub, DUP, Literal(10), GEQ, Q, bad_num, SWP, TOR, SWP, x10, ADD, RTO, DUP, Literal(word_buf), cld, GEQ, Q, end_num, Literal(1), ADD, number_helper); // number ( -- n|-1 ) d.entry(); d.name(6, *b"num"); let number = d.here; forth!(Literal(0), Literal(1), number_helper); // execute ( xt -- ) d.entry(); d.name(7, *b"exe"); let execute = d.here; forth!(TOR, RET); let doit = d.here; forth!(RTO, DRP, compile_call, execute, RET); let bad = d.here; forth!(DRP, Literal(63), emit, RTO, DRP, RET); // dispatch ( xt -- ) d.entry(); d.name(9, *b"int"); let dispatch = d.here; forth!(DUP, Q, doit, DRP, number, DUP, Literal(1), ADD, zero_eq, Q, bad, compile_lit, RET); // quit ( -- ) d.entry(); d.name(4, *b"qui"); let quit = d.here; forth!(word, find, dispatch, quit); // create ( -- ) d.entry(); d.name(6, *b"cre"); let create = d.here; forth!(word, here, LD, latest, LD, comma, latest, ST, Literal(word_buf), DUP, LD, comma, Literal(2), ADD, LD, comma, RET); // : ( -- ) d.entry(); d.name(1, *b": "); forth!(create, smudge, rbracket, RET); // ; ( -- ) d.entry(); d.name(1 | 0x80, *b"; "); forth!(Literal(!(RET as u16)), INV, comma, lbracket, unsmudge, RET); // Finally put the primitives in the dictionary so they can be called directly. d.entry(); d.name(3, *b"ret"); forth!(RTO, DRP, RET); d.entry(); d.name(2, *b">r "); forth!(RTO, SWP, TOR, TOR, RET); d.entry(); d.name(2, *b"r> "); forth!(RTO, RTO, SWP, TOR, RET); d.entry(); d.name(1, *b"@ "); forth!(LD, RET); d.entry(); d.name(1, *b"! "); forth!(ST, RET); d.entry(); d.name(3, *b"dup"); forth!(DUP, RET); d.entry(); d.name(4, *b"swa"); forth!(SWP, RET); d.entry(); d.name(4, *b"dro"); forth!(DRP, RET); d.entry(); d.name(1 | 0x80, *b"? "); // This one only works in-line. forth!(Literal(!(Q as u16)), INV, comma, RET); d.entry(); d.name(1, *b"+ "); forth!(ADD, RET); d.entry(); d.name(5, *b"shi"); forth!(SFT, RET); d.entry(); d.name(2, *b"or "); forth!(OR, RET); d.entry(); d.name(3, *b"and"); forth!(AND, RET); d.entry(); d.name(3, *b"inv"); forth!(INV, RET); d.entry(); d.name(3, *b"u>="); forth!(GEQ, RET); d.entry(); d.name(2, *b"io "); let io = d.here; forth!(IO, RET); d.c.store(latest_ptr, io-6); d.c.store(here_ptr, d.here); d.c.store(state_ptr, 0xffff); d.c.store(0, quit); } fn main() { let mut c = new_core(); build_dictionary(&mut c); c.ip = 0; loop { c.step(); } }