use std::io; use std::io::Read; use std::io::Write; use std::convert::TryInto; /* What is this file? * * This is a tutorial that will show you how to bootstrap an interactive * programming environment from a small amount of code. * * First we will design a virtual computer. * * Then we will design software to run on that computer, to enable REPL-style * interactive programming. * * A REPL is a "Read, Evaluate, Print loop". A REPL lets you type code at * the keyboard and immediately get a result back. You can also define * functions, including functions that change how the environment works in * fundamental ways. */ /* What is Forth? * * Forth is the programming language we will use with our computer. * * Forth was invented by Chuck Moore in the 1960s as a tool for quickly * coming to grips with new computer systems. * * "Let us imagine a situation in which you have access to * your computer. I mean sole user sitting at the board with * all the lights, for some hours at a time. This is * admittedly an atypical situation, but one that can * always be arranged if you are competent, press hard, and * will work odd hours. Can you and the computer write a * program? Can you write a program that didn't descend from * a pre-existing program? You can learn a bit and have a * lot of fun trying." * -- Chuck Moore, "Programming a Problem-Oriented Language", 1970 * https://colorforth.github.io/POL.htm * * As you will see, it does not take much work to get Forth running on a * new machine, including a machine with a completely unfamiliar instruction * set. * * But before we can do any of that we will need a machine. Let's make one. */ /* --------------------------------------------------------------------------- * Part 1 - The Computer * ------------------------------------------------------------------------ */ /* This computer will have a 16-bit CPU. It will be able to address * 2^16 (65536) memory locations, numbered 0 to 65535. */ const ADDRESS_SPACE: usize = 65536; /* The job of a CPU is to load numbers from memory, do math or logic on them, * then write the resulting number back into memory. * * The CPU needs a temporary place to hold numbers while it is working with * them. * * In most CPUs, this place is called a "register". Registers work like * variables in a programming language but there are only a few of them * (most CPUs have between 1 and 32). * * On 64-bit ARM the registers are named r0, r1, ..., r15. * On 64-bit Intel they are instead named rax, rbx, .... * Just in case those names ring any bells. * * Having immediate access to dozens of registers is quite handy, but it means * many choices are available to the programmer, or more likely, to the * compiler. And making good choices is Hard. A lot of work goes into * deciding what variable to store in what register ("register allocation") and * when to dump register contents back into memory ("spilling"). * * Our CPU avoids these problems by not having registers; instead we store * numbers in a stack. The CPU can only access the value that was most * recently pushed onto the stack. This may seem like a big limitation * right now but you will see ways of dealing with it. * * This makes our CPU a "stack machine" as opposed to a "register machine". */ #[derive(Debug)] struct Stack { mem: [u16; N], tos: usize /* top-of-stack */ } impl Stack { /* Add a number to the stack. */ fn push(&mut self, val: u16) { self.tos = (self.tos.wrapping_add(1)) & (N - 1); /* This stack is fixed-size and can hold N values. * * When a fixed-size stack fills up, there is a failure case * (stack overflow) that must be handled somehow. * * In this stack, the bitwise & makes it a circular stack, meaning, if * it ever fills up, it will discard the oldest entry instead of * signaling an error. The lack of error handling makes the CPU * simpler. */ self.mem[self.tos] = val; } /* Return the most recently pushed number. */ fn pop(&mut self) -> u16 { let val = self.mem[self.tos]; self.mem[self.tos] = 0; /* You don't have to set the value back to zero. I am only doing * this because it makes makes the stack look nicer when dumped * out with print!(). */ self.tos = (self.tos.wrapping_sub(1)) & (N - 1); return val; } } /* Now that we have a stack let's use one! Or two? * * Why two stacks? * * The first stack will be called the "data stack" and is used instead of * registers as already described. * * The second stack will be called the "return stack". This one holds * subroutine return addresses. Don't worry if you don't know what that * means; we'll get to it later when we talk about the instruction set. * * In addition to stacks we are going to give the CPU a couple more things: * * 1. An "instruction pointer", which holds the memory address of the next * instruction that the CPU will execute. * * 2. To make life simpler we put main memory straight on "the CPU" even * though in a real computer, RAM would be off-chip and accessed through a * data bus. */ struct Core { ram: [u8; ADDRESS_SPACE], /* In our memory, each of the 65536 possible memory addresses will store * one 8-bit byte (u8 data type in Rust). This makes it a 65536 byte * (64 KB) memory. * * We could have chosen to make each memory address store 16-bits instead. * That would make this a "word-addressed memory". * * Instead we are going with the "byte-addressed memory" that is more * conventional in today's computers. This choice is arbitrary. */ ip: u16, /* instruction pointer */ dstack: Stack<16>, /* data stack */ rstack: Stack<32> /* return stack */ } /* Helper to initialize the cpu. * There is probably a better idiom for this but I am bad at rust */ fn new_core() -> Core { let c = Core { ram: [0; ADDRESS_SPACE], ip: 0, dstack: Stack {tos: 15, mem: [0; 16]}, rstack: Stack {tos: 31, mem: [0; 32]}}; /* Because these are circular stacks it doesn't matter where top-of-stack * starts off pointing. I arbitrarily set it to the highest index so * the first value pushed will wind up at index 0, again because this * makes the stack look nicer when printed out. */ return c; } /* Now we have a CPU sitting there but it does nothing. * * A working CPU would execute a list of instructions. An instruction is * a number that is a command for the CPU. For example: * * 65522 might mean "add the top two values on the data stack". * 65524 might mean "invert the bits of the top value on the data stack". * * The map of instruction-to-behavior comes from the CPU's * "instruction set" i.e. the set of all possible instructions and their * behaviors. * * Normally you program a CPU by putting instructions into memory and then * telling the CPU the memory address where it can find the first instruction. * * The CPU will: * 1. Fetch the instruction (load it from memory) * 2. Decode the instruction (look it up in the instruction set) * 3. Execute that instruction (do the thing the instruction set said to do) * 4. Move on to the next instruction and repeat. * * So now we will make the CPU do those things. * We'll start off by teaching it how to access memory, and then we will * define the instruction set. */ impl Core { /* Helper to read a number from the specified memory address. */ fn load(&self, addr: u16) -> u16 { let a = addr as usize; /* We immediately run into trouble because we are using byte-addressed * memory as mentioned earlier. * * Each memory location stores 8 bits (a byte) * * Our CPU operates on 16 bit values and we want each memory operation * to take 16 bits at a time for efficiency reasons. * * What do we do? * * This CPU chooses to do the following: * - Read the low byte of the 16-bit number from address a * - Read the high byte of the 16-bit number from address a+1 * * 16 bit number in CPU: [00000000 00000001] = 1 * | | * | memory address a = 1 * | * memory address a+1 = 0 * * This is called "little endian" because the low byte comes first. * * We could have just as easily done the opposite: * - Read the high byte of the 16-bit number from address a * - Read the low byte of the 16-bit number from address a+1 * * 16 bit number in CPU: [00000000 00000001] = 1 * | | * | memory address a+1 = 1 * | * memory address a = 0 * * This is called "big endian" because the high byte comes first. */ return u16::from_le_bytes(self.ram[a..=a+1].try_into().unwrap()); /* The le in this function call stands for little-endian. */ } /* Helper to write a number to the specified memory address. */ fn store(&mut self, addr: u16, val: u16) { let a = addr as usize; self.ram[a..=a+1].copy_from_slice(&val.to_le_bytes()); } /* With that taken care of, we can get around to defining the CPU's * instruction set. * * Each instruction on this CPU will be the same size, 16 bits, for * the following reasons: * * 1. Instruction fetch always completes in 1 read. You never have to * go back and fetch more bytes. * * 2. If you put the first instruction at an even numbered address then * you know all the rest of the instructions will also be at even * numbered addresses. I will take advantage of this later. * * 3. A variable length encoding would save space but 2 bytes per * instruction is already pretty small so it doesn't matter very much. * * Here are the instructions I picked. * * CALL * ------------------------------------------------------------+---- * | n n n n n n n n n n n n n n n | 0 | * ------------------------------------------------------------+---- * * What CALL does: * --------------- * - Push instruction pointer onto the return stack. * - Set instruction pointer to address nnnnnnnnnnnnnnn0. * * This lets you call a subroutine at any even numbered address * from 0 to 65534. * * Why this is useful: * ------------------- * Together with the return stack, CALL lets you call subroutines. * * A subroutine is a list of instructions that does something * useful and then returns control to the caller. * * For example: * * Address Instruction Meaning * 100 -> 200 Call 200 * 102 -> ??? Add the top two values on the data stack. * ... * 200 -> ??? Push the value 3 onto the data stack * 202 -> ??? Push the value 4 onto the data stack * 204 -> ??? Return to caller * * Don't worry about the other instructions I am using here. I will * define them later. * * I mostly want to point out the three instructions that I put * at address 200 because they are a subroutine, * a small self contained piece of code (6 bytes) that * performs a specific task. * * Do you think it's cool that you can count exactly how many bytes it * took? I think it's cool. * * Here is what happens when the CPU begins execution at address 100. * * Address Data stack Return stack * 100 [] [] <--- About to call subroutine... * 200 [] [102] * 202 [3] [102] * 204 [3 4] [102] <--- About to return from subroutine... * 102 [3 4] [] * 104 [5] [] * * The return stack is there to make sure that returning from a subroutine * picks up where the caller left off. We will talk more about the return * stack later when we talk about the RET instruction. * * Limitations of CALL: * -------------------- * This CPU cannot call an instruction that starts at an odd address. * a.k.a. "unaligned call" is impossible. * * At first this seems like a limitation, but it really isn't. * If you put the first instruction at an even numbered address then * all the rest of the instructions will also be at even numbered * addresses. So this works fine. * * Of course if you intersperse instructions and data in memory... * _________ * ________ |_________| _____________ * |________| Data |_____________| * Instructions More instructions * * ...then you will have to be careful to make sure the second block * of instructions also starts at an even numbered address. * You might need to include an extra byte of data as "padding". * * Data processing instructions * --------------------------------------------+---------------+---- * | 1 1 1 1 1 1 1 1 1 1 1 | x x x x | 0 | * --------------------------------------------+---------------+---- * Sixteen of the even numbers are reserved for additional instructions * that will be be described later. * * The even numbers 1111111111100000 to 1111111111111110 (65504 to 65534) * are reserved for these instructions. This means that CALL 65504 through * CALL 65534 are not possible. Put another way, it is not possible to * call a subroutine living in the top 32 bytes of memory. This is not a * very severe limitation. * * LITERAL * ------------------------------------------------------------+---- * | n n n n n n n n n n n n n n n | 1 | * ------------------------------------------------------------+---- * * What LITERAL does * ----------------- * - Place the value 0nnnnnnnnnnnnnnn on the data stack. * * Why this is useful: * ------------------- * Program will often need to deal with constant numbers. * For example, you might want to add 2 to a memory address (to move * on to the next even-numbered address) or add 32 to a character code * (to convert it to lower-case). These constants have to come from * somewhere. * * Limitations of LITERAL: * ----------------------- * To differentiate it from a call, this instruction is always an * odd number. The trailing 1 is discarded before placing the number on * the data stack. This missing bit means that only 2^15 values can be * represented (0 to 32767). 32768 on up cannot be stored directly. * You would need to do some follow-up math to get these numbers. * The most direct way is to use the INV instruction, described later. */ /* Now that the instruction set is generally described * let's look at the code that implements it */ fn step(&mut self) { /* 1. Fetch the instruction. * Also advance ip to point at the next instruction for next time. */ let opcode = self.load(self.ip); self.ip = self.ip.wrapping_add(2); /* 2. Decode and execute the instruction */ if (opcode >= 0xffe0) && (opcode & 1 == 0) { /* Data processing instruction */ PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self); /* These instructions get looked up in a table. The bit * math converts the instruction code into an index in the * table as follows: * * 0xffe0 --> 0 * 0xffe2 --> 1 * ... * 0xfffe --> 15 * * The table will be described below, and these instructions * explained. */ } else if (opcode & 1) == 1 { /* Literal */ self.dstack.push(opcode >> 1); } else { /* Call */ self.rstack.push(self.ip); self.ip = opcode; } } } /* The names of the 16 remaining CPU instructions */ enum Op { RET = 0xffe0, TOR = 0xffe2, RTO = 0xffe4, LD = 0xffe6, ST = 0xffe8, DUP = 0xffea, SWP = 0xffec, DRP = 0xffee, Q = 0xfff0, ADD = 0xfff2, SFT = 0xfff4, OR = 0xfff6, AND = 0xfff8, INV = 0xfffa, GEQ = 0xfffc, IO = 0xfffe, } type Primitive = fn(&mut Core); /* A table of functions for each of the 16 remaining CPU instructions */ const PRIMITIVES: [Primitive; 16] = [ /* Return-stack instructions */ | x | { /* RET - Return from subroutine */ x.ip = x.rstack.pop() }, | x | { /* TOR - Transfer number from data stack to return stack */ x.rstack.push(x.dstack.pop()) }, | x | { /* RTO - Transfer number from return stack to data stack */ x.dstack.push(x.rstack.pop()) }, /* Memory instructions */ | x | { /* LD - Load number from memory address specified on the data stack */ let a = x.dstack.pop(); x.dstack.push(x.load(a)); }, | x | { /* ST - Store number to memory address specified on the data stack */ let a = x.dstack.pop(); let v = x.dstack.pop(); x.store(a, v); }, /* Stack shuffling instructions * * Remember the problem of "register allocation" mentioned earlier, * and how stack machines are supposed to avoid that problem? Well, * nothing comes for free. Stack machines can only process the top * value(s) on the stack. So sometimes you will have to do some work * to "unbury" a crucial value and move it to the top of the stack. * That's what these instructions are for. * * Their use will become more obvious when we start programming the * machine, soon. */ | x | { // DUP - Duplicate the top number on the data stack let v = x.dstack.pop(); x.dstack.push(v); x.dstack.push(v); }, | x | { // SWP - Exchange the top two numbers on the data stack let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1); x.dstack.push(v2); }, | x | { // DRP - Discard the top number on the data stack let _ = x.dstack.pop(); }, /* Conditional branch instruction */ | x | { /* Q - If the top number on the data stack is zero, skip the next * instruction. * * Note Q is the only "decision-making" instruction that our CPU * has. This means that all "if-then" logic, counted loops, etc. * will be built using Q. */ let f = x.dstack.pop(); if f == 0 { x.ip = x.ip.wrapping_add(2) /* Because all of our instructions are two bytes, adding two * to the instruction pointer skips the next instruction. */ }; }, /* Arithmetic and logic */ | x | { // ADD - Sum the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1.wrapping_add(v2)); }, | x | { /* SFT - Bit shift number left or right by the specified amount. * A positive shift amount will shift left, negative will shift right. */ let amt = x.dstack.pop(); let val = x.dstack.pop(); x.dstack.push( if amt <= 0xf { val << amt } else if amt >= 0xfff0 { val >> (0xffff - amt + 1) } else { 0 } ); }, | x | { // OR - Bitwise-or the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1 | v2); }, | x | { // AND - Bitwise-and the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1 & v2); }, | x | { // INV - Bitwise-invert the top number on the data stack. let v1 = x.dstack.pop(); x.dstack.push(!v1); /* You can use the INV instruction to compensate for the LITERAL * instruction's inability to encode constants 32768 to 65535. * Use two instructions instead: * - LITERAL the complement of your desired constant * - INV * * For example, LITERAL(0) INV yields 65535 (signed -1) * For example, LITERAL(1) INV yields 65534 (signed -2) * etc. */ }, | x | { // GEQ - Unsigned-compare the top two items on the data stack. let v2 = x.dstack.pop(); let v1 = x.dstack.pop(); x.dstack.push(if v1 >= v2 { 0xffff } else { 0 }); }, /* Input/output. * * The CPU needs some way to communicate with the outside world. * * Some machines use memory mapped IO where certain memory addresses are * routed to hardware devices instead of main memory. This machine already * has the full 64K of memory connected so no address space is readily * available for hardware devices. * * Instead we define a separate input-output space of 65536 possible * locations. Each of these possible locations is called an IO "port". * * For a real CPU you could hook up hardware such as a serial * transmitter that sends data to a computer terminal, or just an * output pin controller that is wired to a light bulb. * * This is a fake software CPU so I am going to hook it up to * stdin and stdout. */ | x | { // IO - Write/read a number from/to input/output port. let port = x.dstack.pop(); /* I'm loosely following a convention where even ports are inputs * and odd ports are outputs. But each port acts different. * In a hardware CPU this would not be suitable but it is fine for * a software emulation. */ match port { 0 => { /* Push a character from stdin onto the data stack */ let mut buf: [u8; 1] = [0]; let _ = io::stdin().read(&mut buf); x.dstack.push(buf[0] as u16); /* You are welcome to make your own computer that supports * utf-8, but this one does not. */ } 1 => { /* Pop a character from the data stack to stdout */ let val = x.dstack.pop(); print!("{}", ((val & 0xff) as u8) as char); let _ = io::stdout().flush(); } 2 => { /* Dump CPU status. * Like the front panel on a 1960s-1970s minicomputer. */ println!("{:?} {:?}", x.dstack, x.rstack); let _ = io::stdout().flush(); } _ => {} } } ]; /* --------------------------------------------------------------------------- * Part 2 - The Program * ------------------------------------------------------------------------ */ /* "In a sense we're building a tree. We've now reached a * point where we can start making the roots. For a while * everything will be concealed but we'll eventually reach * daylight and start on branches." * * -- Chuck Moore, "Programming a Problem-Oriented Language", 1970 * https://colorforth.github.io/POL.htm * * You now have an unfamiliar computer with no software. Can you and the * computer write a program? * * The first program is the hardest to write because you don't have any tools to * help write it. The computer itself is going to be no help. Without any * program it will sit there doing nothing. * * What should the first program be? * A natural choice would be a tool that helps you program more easily. * * An interactive programming environment needs to let you do 2 things: * * 1. Call subroutines by typing their name at the keyboard * 2. Define new subroutines in terms of existing ones * * Begin with step 1: * Call subroutines by typing their name at the keyboard * * This is where we will meet Forth. * * The below is a small Forth for bootstrapping this computer. If you want to * learn how to implement a full featured Forth, please read Jonesforth, and * Brad Rodriguez' series of articles "Moving Forth". The simple Forth I write * below will probably help you understand those Forths a little better. * * Forth organizes all the computer's memory as a "dictionary" of subroutines. * The point of the dictionary is to give each subroutine a name so you * can run a subroutine by typing its name. The computer will look up its * address for you and call it. * * The dictionary starts at a low address and grows towards high addresses. * It is organized like a linked-list, like this: * * [Link field][Name][Code .......... ] * ^ * | * [Link field][Name][Code ...... ] * ^ * | * [Link field][Name][Code ............... ] * * The reason it is a linked list is to allow each list entry to be a * different length. * * Each dictionary entry contains three things: * * - "Link field": The address of the previous dictionary entry. * For the first dictionary entry this field is 0. * * - "Name": A few letters to name this dictionary entry. * Later you will type this name at the keyboard to call up * this dictionary entry. * * - "Code": A subroutine to execute when you call up this dictionary * entry. This is a list of CPU instructions. Note that one * of the CPU instructions is "call". So you can have a subroutine * that call other subroutines, or calls itself. * * This code must end with a return (RET) instruction. * * Example subroutine: * * Number Instruction Meaning * ------ ----------- ------- * 7 Literal(3) Push the value 3 onto the data stack * 9 Literal(4) Push the value 4 onto the data stack * 65504 RET Return to caller * * A linked list is not a very fast data structure but this doesn't really * matter because dictionary lookup doesn't need to be fast. Lookups are * for converting text you typed at the keyboard to subroutine addresses. * You can't type very fast compared to a computer so this lookup doesn't * need to be fast. * * In addition to the linked list itself, you will need a couple of * variables to keep track of where the dictionary is in memory: * * - Dictionary pointer: The address of the newest dictionary entry. * - Here: The address of the first unused memory location, * which comes just after the newest dictionary entry. * * [Link field][Name][Code .......... ] * ^ * | * [Link field][Name][Code ...... ] * ^ * | * [Link field][Name][Code ............... ] * ^ ^ * | | * [Dictionary pointer] [Here] * * Got all that? * * To create our Forth interactive programmming environment, we will start * by defining subroutines that: * - read names from the keyboard * - look up and execute dictionary entries by name * * We will put these subroutines themselves in the dictionary so they are * available for use once our interactive environment is up and running! * * If you were sitting in front of a microcomputer in 196x you would need * to create the dictionary with pencil and paper, but in 20xx we will * write a Rust program to help create the dictionary. * * First we need to keep track of where the dictionary is: */ struct Dict<'a> { dp: u16, // The dictionary pointer here: u16, // The "here" variable c: &'a mut Core // The dictionary lives in memory. We are going to // hang on to a mutable reference to the core to give // us easy access to the memory. } /* Helpers to help put new routines in the dictionary */ enum Item { Literal(u16), Call(u16), Opcode(Op) } impl From for Item { fn from(a: u16) -> Self { Item::Call(a) } } impl From for Item { fn from(o: Op) -> Self { Item::Opcode(o) } } impl Dict<'_> { /* Helper to reserve space in the dictionary by advancing the "here" * pointer */ fn allot(&mut self, n: u16) { self.here = self.here.wrapping_add(n); } /* Helper to append a 16 bit integer to the dictionary */ fn comma(&mut self, val: u16) { self.c.store(self.here, val); self.allot(2); } /* Helper to append a CPU instruction to the dictionary */ fn emit>(&mut self, val: T) { match val.into() { Item::Call(val) => { self.comma(val) } Item::Opcode(val) => { self.comma(val as u16) } Item::Literal(val) => { assert!(val <= 0x7fff); self.comma((val << 1) | 1) } } } /* Helper to append a "name" field to the dictionary. To save space and * to make each dictionary header a consistent size, I am choosing to not * store every letter of the name. Instead I am storing only the length of * the name and then the first three letters of the name. * * That means these two names will compare equal: * - ALLOW (-> 5ALL) * - ALLOT (-> 5ALL) * * Even though their first three letters are the same, these two names * will compare unequal because they are different lengths: * - FORTH (-> 5FOR) * - FORGET (-> 6FOR) * * If a name is shorter than 3 letters it is padded out with spaces. * - X (-> 1X ) * * You can see that the name field is always four bytes regardless * of how many letters are in the name, and the link field is two bytes. * This means a dictionary header in this Forth is always six bytes. */ fn name(&mut self, n: u8, val: [u8; 3]) { /* Store the length and the first character */ self.comma(n as u16 | ((val[0] as u16) << 8)); /* Store the next two characters */ self.comma(val[1] as u16 | ((val[2] as u16) << 8)); } /* Helper to append a new link field to the dictionary and update the * dictionary pointer appropriately. */ fn entry(&mut self) { let here = self.here; self.comma(self.dp); self.dp = here; } } /* Now we can start building the dictionary. */ fn build_dictionary(c: &mut Core) { use Op::*; use Item::*; let mut d = Dict { dp: 0, /* Nothing in the dictionary yet */ here: 2, /* Reserve address 0 as an "entry point", i.e. where the CPU will jump to start running Forth. We don't have a Forth interpreter yet so we'll leave address 0 alone for now and start the dictionary at address 2 instead. */ c: c }; /* Consider the following facts: * - The CPU knows how to execute a bunch of instructions strung together. * - Forth consists of a bunch of subroutine calls strung together. * - Subroutine CALL is a valid instruction of our CPU. * * This means that we can immediately begin programming our machine in * a language resembling Forth, just by writing a list of subroutine * calls into the dictionary. * * The line between "machine code program" and "Forth program" is * very blurry. To illustrate: * * Here is a subroutine consisting of a few instructions strung together. * * Instruction Number Meaning * ----------- ------ ------- * Literal(3) 7 Push the value 3 onto the data stack * Literal(4) 9 Push the value 4 onto the data stack * RET 65504 Return to caller * * Here is a Forth subroutine consisting of a few subroutine calls strung * together. * Call Number Meaning * ----------- ------ ------- * S1 1230 Call subroutine S1 which happens to live * at address 1230 * S2 1250 Call subroutine S2 which happens to live * at address 1250 * RET 65504 Return to caller * * This duality between CPU instructions and Forth code comes from * an idea called "subroutine threading". It is a refinement of an * idea called "threaded code". This has no relation to the kind of * threading that lets you run programs in parallel. You can read more * about threaded code on Wikipedia or in the other Forth resources I * mentioned earlier (Jonesforth, and Moving Forth by Brad Rodriguez). * * Our new language starts out with the sixteen (well, eighteen) * instructions built into the CPU. We can string those instructions * together into a new subroutine. This subroutine adds to the pool * of functions we have available for making new subroutines. * * Repeat until you have built what you wanted to build, via * function composition. This is the idea behind Forth. */ /* * We are going to be writing many series of instructions so let's * start out by making a Rust macro that makes them easier to type * and lets us specify a CPU instruction vs. a subroutine call with * equal ease. * * The macro below will convert: * * forth!(Literal(2), ADD, RET) * * to: * * d.emit(Literal(2)); * d.emit(ADD); * d.emit(RET); * * which you probably recognize as code that will add a new subroutine * to the dictionary. */ macro_rules! forth { ($x:expr) => (d.emit($x)); ($x:expr, $($y:expr),+) => (d.emit($x); forth!($($y),+)) } /* Now we can add the first subroutine to the dictionary! * * key: Reads a character from the keyboard and places its character * code on the stack. * * There is a tradition of writing stack comments for Forth subroutines * to describe the stack effect of executing the subroutine. * They look like this: key ( -- n ) * * Read as: key does not take any parameters off the stack, and leaves * one new number pushed onto the stack. * * Also remember that a dictionary entry looks like this: * [Link field][Name][Code .......... ] */ // key ( -- n ) d.entry(); /* Compile the link field into the dictionary */ d.name(3, *b"key"); /* Compile the name field into the dictionary */ let key = d.here; /* (Save off the start address of the code so we can call it later) */ forth!( Literal(0), /* Compile a LITERAL instruction that pushes 0 to the stack */ IO, /* Compile an IO instruction. * * Remember from the CPU code that IO takes a * parameter on the stack to specify which port * to use. * * Also remember that IO port 0 reads * a character from standard input. */ RET /* Compile a RET instruction */ ); /* We have now compiled the "key" subroutine into the dictionary. * [Link field][Name][Code .......... ] * 0000 3key 1, 65534, 65504 * * The next subroutine we will make is "emit". This is a companion * to "key" that works in the opposite direction. * * key ( -- n ) reads a character from stdin and pushes it to the stack. * emit ( n -- ) pops a character from the stack and writes it to stdout. */ // emit ( n -- ) d.entry(); d.name(4, *b"emi"); let emit = d.here; forth!(Literal(1), IO, RET); /* I am tired of saying "subroutine" so many times, so I am going to * introduce a new term. Remember the goal our language is working * towards -- we want to be able to type a word at the keyboard, and * let the computer look it up in the dictionary and execute the * appropriate code. * * So far we have two named items in the dictionary, call and emit. * * We are going to term a named dictionary item a "word". * This is a Forth tradition. * * So call and emit are "words", or "dictionary words" if you want to be * precise about it. So far these are the only words we've defined. * * Let's define some more words. */ /* Our CPU does not have subtraction so let's make subtraction by adding * the two's complement. * * To get the two's complement, do a bitwise invert and add 1. * * This will be the most complicated Forth that we've written so far * so let's walk through step by step. */ // - ( a b -- a-b ) d.entry(); d.name(1, *b"- "); let sub = d.here; forth!( /* Stack contents: a b, to start off with. * We want to compute a minus b */ INV, /* Bitwise invert the top item on the stack. * Stack contents: a ~b */ Literal(1), /* Push 1 onto the stack. * Stack contents: a ~b 1 */ ADD, /* Add the top two items on the stack. * Stack contents: a ~b+1 * Note that ~b+1 is the two's complement of b. */ ADD, /* Add the top two items on the stack. * Stack contents: n * Note that n = (a + ~b+1) = a - b */ RET /* Done, return to caller, leaving n on the data stack. */ ); /* Writing it out like that takes a lot of space. Normally Forth code * is written on a single line, like this: * * INV 1 ADD ADD RET * * Looking at it this way, it's easy to see the new word we just * created (-) is made from 5 instructions. It's pretty typical for * a Forth word to be made of 2-7 of them. Beyond that length, things * get successively harder to understand, and it becomes a good idea * to split some work off into helper words. * * We will see an example of this below. */ /* Our next word will be useful for Boolean logic. * * 0= ( n -- f ) * * In a stack comment, "f" means "flag", a.k.a. Boolean value. * By Forth convention, zero is false and any nonzero value is true. * However the "best" value to use for a true flag is 65535 (all ones) * so the bitwise logical operations can double as Boolean logical * operations. * * So what 0= does is: * - if n=0, leave on the stack f=65535 * - otherwise, leave on the stack f=0 * * It is like C's ! operator. * * In Rust this could be implemented as: * * fn zero_eq(n: u16) { * if (n == 0) { * return 65535; * } else { * return 0; * } * } * * Rust has an if-then and block scope, so this is easy to write. * * The literal translation to a typical register-machine assembly * language would look something like this: * * zero_eq: compare r0, 0 * jump_eq is_zero * move r0, 0 * ret * is_zero: move r0, 65535 * ret * * It looks simple but I want to point out a couple things about it * that are not so simple. * * The conditional jump instruction, jump_eq. * ------------------------------------------ * Our CPU doesn't have this. Q is the only "decision-making" * instruction that our CPU has. * * Q - If the top number on the data stack is zero, skip the next * instruction. * * The forward reference * --------------------- * This is another problem. Think of the job of an assembler which is * converting an assembly language program to machine code. We are * currently writing our code in a tiny assembler that we made in Rust! It * is very simple but so far it has worked for us. The assembler of our * hypothetical register-machine below has a rather nasty problem to solve. * * zero_eq: compare r0, 0 * jump_eq is_zero <----- On this line. * move r0, 0 * ret * is_zero: move r0, 65535 * ret * * It wants to jump to is_zero but that symbol has not been seen yet and is * unrecognized. On top of that, the assembler also doesn't yet know what * address is_zero will have, so doesn't know what jump target to emit. * To successfully assemble that kind of program you would need an * assembler smarter than the assembler we made for ourselves in Rust. * * There are ways to solve this but let's NOT solve it. * * Our CPU has no jump instruction (only call) and our assembler only lets * us call things we already defined. Instead of removing these * constraints, find a way to write 0= within the constraints. * * Here is a start at solving the problem * * is_nonzero ( -- 0 ) * Literal(0) * RET * * 0= ( n -- f ) * Q <-- pop n, if n=0 skip next instruction * is_nonzero <-- f=0 is now pushed to stack * Literal(0) * INV <-- f=65535 is now pushed to stack * RET <-- Return * * We got rid of the forward reference by defining is_nonzero before it * was used. * * We got rid of the jump instruction by using a subroutine call instead. * * This code is close to working but it doesn't quite work. The problem * is that is_nonzero gives control back to 0= when done, just like * a subroutine call normally does, and then 0= runs as normal until it * hits the return instruction at the end. * So we wind up executing both the f=0 branch and the f=65535 branch, * instead of just executing the f=0 branch like we wanted in this case. * * It is possible to fix this last problem by adding the instructions * RTO DRP to is_nonzero. * * is_nonzero ( -- 0 ) * RTO <-- Pop the return address, push to data stack * DRP <-- Discard it * Literal(0) <-- Put 0 on the data stack * RET <-- Return * * Because we popped off and discarded one item from the return stack, the * final RET instruction will not return to 0= any more. Instead it will * skip one level and return to whoever called 0=. This has the result of * ending 0= early, which is what we wanted to do. * * I call this pattern "return-from-caller". It is used occasionally in * real Forth systems. My dialect of Forth will use it extensively to work * around my CPU's lack of conditional branch. * * Now we've explained how 0= is going to work, let's make it. */ /* First we define the helper. It won't be reused, so I am not going * to bother giving it a dictionary header and name for easy lookup later. * Think of it as a private function. */ let zero = d.here; forth!(Literal(0), RTO, DRP, RET); /* Now define 0= using the helper. */ // 0= ( n -- f ) d.entry(); d.name(2, *b"0= "); let zero_eq = d.here; forth!(Q, zero, Literal(0), INV, RET); /* Next let's make a = equality comparison operator, using 0= and subtract. * I call it an "operator" because that's what other languages would * call it, but Forth has no special idea of an "operator". Everything * is just words. */ // = ( a b -- a=b ) d.entry(); d.name(1, *b"= "); let eq = d.here; forth!(sub, zero_eq, RET); /* Note that 0= and subtract are both words, not CPU instructions. * This makes = the first "pure" Forth word we have defined, with no * direct dependency on the machine's instruction set. * We could define = as - 0= on a real standards-compliant Forth system * and it would still work. So Forth gets you to the point of writing * "portable" code really quickly. Often you can reuse routines early in * bootstrapping even though they were written and tested on a different * machine. Many languages offer portability but few offer it so quickly. */ /* Now that we've got some basics in place let's go back to solving * the real problem of getting our language to read words from the * keyboard. The first problem we have is that we need some way to * separate words from each other so we know where one word ends and the * next begins. This problem is called "lexing". Forth has about the * simplest lexer ever, it just splits on whitespace. Anything with * character code <=32 is considered whitespace. Words are delimited by * whitespace. And that is all the syntax Forth has. * * To read a word from the keyboard you will need to: * - Advance past any leading whitespace * - Read characters into a buffer until whitespace is seen again. */ /* Let's start with the "advance past whitespace" part */ let skip_helper = d.here; forth!(RTO, DRP, key, DUP, Literal(33), GEQ, Q, RET, DRP, skip_helper); // skipws ( -- c ) d.entry(); d.name(6, *b"ski"); let skipws = d.here; forth!(skip_helper); // over ( a b -- a b a ) d.entry(); d.name(4, *b"ove"); let over = d.here; forth!(TOR, DUP, RTO, SWP, RET); // 2dup ( a b -- a b a b ) d.entry(); d.name(4, *b"2du"); let twodup = d.here; forth!(over, over, RET); // Buffer for parsing an input word, formatted as Nabcde. let word_buf = d.here; d.allot(6); // min ( a b -- n ) d.entry(); d.name(3, *b"min"); let min = d.here; forth!(twodup, GEQ, Q, SWP, DRP, RET); // c@ ( a -- n ) d.entry(); d.name(2, *b"c@ "); let cld = d.here; forth!(LD, Literal(0xff), AND, RET); // c! ( n a -- ) d.entry(); d.name(2, *b"c! "); let cst = d.here; forth!(DUP, LD, Literal(0xff), INV, AND, SWP, TOR, OR, RTO, ST, RET); /* Load 1 letter into buffer. */ let stchar = d.here; forth!(Literal(word_buf), cld, Literal(1), ADD, DUP, Literal(word_buf), cst, Literal(5), min, Literal(word_buf), ADD, cst, RET); /* Load letters into buffer until whitespace is hit again. * Return the whitespace character that was seen. */ let getcs_helper = d.here; forth!(RTO, DRP, stchar, key, DUP, Literal(32), SWP, GEQ, Q, RET, getcs_helper); d.entry(); d.name(5, *b"get"); let getcs = d.here; forth!(getcs_helper, RET); // word ( -- ) // Not quite standard. d.entry(); d.name(4, *b"wor"); let word = d.here; forth!(Literal(word_buf), DUP, Literal(2), ADD, Literal(0x2020), SWP, ST, Literal(0x2000), SWP, ST, skipws, getcs, DRP, RET); // latest ( -- a ) /* Address of "latest" variable. This variable stores the address of the latest word in the dictionary. */ let latest_ptr = d.here; d.allot(2); d.entry(); d.name(6, *b"lat"); let latest = d.here; forth!(Literal(latest_ptr), RET); let matches = d.here; forth!(Literal(2), ADD, TOR, Literal(word_buf), DUP, Literal(2), ADD, LD, SWP, LD, RTO, DUP, TOR, LD, Literal(0x0080), INV, AND, eq, SWP, RTO, Literal(2), ADD, LD, eq, AND, RET); let matched = d.here; forth!(Literal(6), ADD, RTO, DRP, RET); let find_helper = d.here; forth!(RTO, DRP, DUP, Literal(0), eq, Q, RET, DUP, matches, Q, matched, LD, find_helper); // find ( -- xt|0 ) d.entry(); d.name(4, *b"fin"); let find = d.here; forth!(latest, LD, find_helper); // ' ( -- xt|0 ) d.entry(); d.name(1, *b"' "); forth!(word, find, RET); /* --- The outer interpreter --- */ // x10 ( n -- n*10 ) d.entry(); d.name(3, *b"x10"); let x10 = d.here; forth!(DUP, DUP, Literal(3), SFT, ADD, ADD, RET); // here ( -- a ) /* Address of "here" variable. This variable stores the address of the first free space in the dictionary */ let here_ptr = d.here; d.allot(2); d.entry(); d.name(4, *b"her"); let here = d.here; forth!(Literal(here_ptr), RET); // state ( -- a ) /* Address of "state" variable. This variable stores -1 if * interpreting or 0 if compiling. */ let state_ptr = d.here; d.allot(2); d.entry(); d.name(5, *b"sta"); let state = d.here; forth!(Literal(state_ptr), RET); let word_addr = d.here; forth!(Literal(latest_ptr), LD, Literal(2), ADD, RET); // immediate ( -- ) d.entry(); d.name(9 | 0x80, *b"imm"); forth!(word_addr, DUP, LD, Literal(0x0080), OR, SWP, ST, RET); // smudge ( -- ) d.entry(); d.name(6 | 0x80, *b"smu"); let smudge = d.here; forth!(word_addr, DUP, LD, Literal(0x0040), OR, SWP, ST, RET); // unsmudge ( -- ) d.entry(); d.name(8 | 0x80, *b"uns"); let unsmudge = d.here; forth!(word_addr, DUP, LD, Literal(0x0040), INV, AND, SWP, ST, RET); // [ ( -- ) d.entry(); d.name(1 | 0x80, *b"[ "); let lbracket = d.here; forth!(Literal(0), INV, state, ST, RET); // ] ( -- ) d.entry(); d.name(1 | 0x80, *b"] "); let rbracket = d.here; forth!(Literal(0), state, ST, RET); // , ( n -- ) d.entry(); d.name(1, *b", "); let comma = d.here; forth!(here, LD, ST, here, LD, Literal(2), ADD, here, ST, RET); let compile_call = d.here; forth!(DUP, Literal(4), sub, LD, Literal(0x0080), AND, state, LD, OR, Q, RET, comma, RTO, DRP, RET); let compile_lit = d.here; forth!(state, LD, Q, RET, DUP, ADD, Literal(1), ADD, comma, RTO, DRP, RET); let end_num = d.here; forth!(DRP, RTO, DRP, RET); let bad_num = d.here; forth!(DRP, DRP, DRP, Literal(0), INV, RTO, DRP, RET); let number_helper = d.here; forth!(RTO, DRP, DUP, Literal(word_buf), ADD, cld, Literal(48), sub, DUP, Literal(10), GEQ, Q, bad_num, SWP, TOR, SWP, x10, ADD, RTO, DUP, Literal(word_buf), cld, GEQ, Q, end_num, Literal(1), ADD, number_helper); // number ( -- n|-1 ) d.entry(); d.name(6, *b"num"); let number = d.here; forth!(Literal(0), Literal(1), number_helper); // execute ( xt -- ) d.entry(); d.name(7, *b"exe"); let execute = d.here; forth!(TOR, RET); let doit = d.here; forth!(RTO, DRP, compile_call, execute, RET); let bad = d.here; forth!(DRP, Literal(63), emit, RTO, DRP, RET); // dispatch ( xt -- ) d.entry(); d.name(9, *b"int"); let dispatch = d.here; forth!(DUP, Q, doit, DRP, number, DUP, Literal(1), ADD, zero_eq, Q, bad, compile_lit, RET); // quit ( -- ) d.entry(); d.name(4, *b"qui"); let quit = d.here; forth!(word, find, dispatch, quit); // create ( -- ) d.entry(); d.name(6, *b"cre"); let create = d.here; forth!(word, here, LD, latest, LD, comma, latest, ST, Literal(word_buf), DUP, LD, comma, Literal(2), ADD, LD, comma, RET); // : ( -- ) d.entry(); d.name(1, *b": "); forth!(create, smudge, rbracket, RET); // ; ( -- ) d.entry(); d.name(1 | 0x80, *b"; "); forth!(Literal(!(RET as u16)), INV, comma, lbracket, unsmudge, RET); /* Finally put the primitives in the dictionary so they can be * called interactively. */ d.entry(); d.name(3, *b"ret"); forth!(RTO, DRP, RET); d.entry(); d.name(2, *b">r "); forth!(RTO, SWP, TOR, TOR, RET); d.entry(); d.name(2, *b"r> "); forth!(RTO, RTO, SWP, TOR, RET); d.entry(); d.name(1, *b"@ "); forth!(LD, RET); d.entry(); d.name(1, *b"! "); forth!(ST, RET); d.entry(); d.name(3, *b"dup"); forth!(DUP, RET); d.entry(); d.name(4, *b"swa"); forth!(SWP, RET); d.entry(); d.name(4, *b"dro"); forth!(DRP, RET); d.entry(); d.name(1 | 0x80, *b"? "); // This one only works in-line. forth!(Literal(!(Q as u16)), INV, comma, RET); d.entry(); d.name(1, *b"+ "); forth!(ADD, RET); d.entry(); d.name(5, *b"shi"); forth!(SFT, RET); d.entry(); d.name(2, *b"or "); forth!(OR, RET); d.entry(); d.name(3, *b"and"); forth!(AND, RET); d.entry(); d.name(3, *b"inv"); forth!(INV, RET); d.entry(); d.name(3, *b"u>="); forth!(GEQ, RET); d.entry(); d.name(2, *b"io "); let io = d.here; forth!(IO, RET); d.c.store(latest_ptr, io-6); d.c.store(here_ptr, d.here); d.c.store(state_ptr, 0xffff); d.c.store(0, quit); } fn main() { let mut c = new_core(); build_dictionary(&mut c); c.ip = 0; loop { c.step(); } }