From e5531bc19f9d881c2d89be240556da8cdc09250e Mon Sep 17 00:00:00 2001 From: psf Date: Thu, 19 May 2022 02:23:25 -0700 Subject: [PATCH] Writeup, part 1 --- frustration.rs | 516 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 493 insertions(+), 23 deletions(-) diff --git a/frustration.rs b/frustration.rs index aae61a9..292795a 100644 --- a/frustration.rs +++ b/frustration.rs @@ -2,60 +2,408 @@ use std::io; use std::io::Read; use std::io::Write; use std::convert::TryInto; -const ADDRESS_SPACE: usize = 65535; + +/* What is this file? + * + * This is a tutorial that will show you how to bootstrap an interactive + * programming environment from a small amount of code. + * + * First we will design a virtual computer. + * + * Then we will design software to run on that computer, to enable REPL-style + * interactive programming. + * + * A REPL is a "Read, Evaluate, Print loop". A REPL lets you type code at + * the keyboard and immediately get a result back. You can also define + * functions, including functions that change how the environment works in + * fundamental ways. + */ + +/* What is Forth? + * + * Forth is the programming language we will use with our computer. + * + * Forth was invented by Chuck Moore in the 1960s as a tool for quickly + * coming to grips with new computer systems. + * + * "Let us imagine a situation in which you have access to + * your computer. I mean sole user sitting at the board with + * all the lights, for some hours at a time. This is + * admittedly an atypical situation, but one that can + * always be arranged if you are competent, press hard, and + * will work odd hours. Can you and the computer write a + * program? Can you write a program that didn't descend from + * a pre-existing program? You can learn a bit and have a + * lot of fun trying." + * -- Chuck Moore, "Programming a Problem-Oriented Language", 1970 + * https://colorforth.github.io/POL.htm + * + * As you will see, it does not take much work to get Forth running on a + * new machine, including a machine with a completely unfamiliar instruction + * set. + * + * But before we can do any of that we will need a machine. Let's make one. + */ + +/* --------------------------------------------------------------------------- + * Part 1 - The Computer + * ------------------------------------------------------------------------ */ + +/* This computer will have a 16-bit CPU. It will be able to address + * 2^16 (65536) memory locations, numbered 0 to 65535. + */ +const ADDRESS_SPACE: usize = 65536; + +/* The job of a CPU is to load numbers from memory, do math or logic on them, + * then write the resulting number back into memory. + * + * The CPU needs a temporary place to hold numbers while it is working with + * them. + * + * In most CPUs, this place is called a "register". Registers work like + * variables in a programming language but there are only a few of them + * (between 1 to 32 depending on your CPU). + * + * On 64-bit ARM the registers are named r0, r1, ..., r15. + * On 64-bit Intel they are instead named rax, rbx, ..., etc. + * + * Having immediate access to many registers is quite handy, but it means + * many choices are available to the programmer, or more likely, to the + * compiler. And making good choices is Hard. + * + * A lot of work goes into deciding what variable to store in what register + * ("register allocation") and when to dump register contents back into + * memory ("spilling"). + * + * Our CPU avoids these problems by not having registers; instead we store + * numbers in a stack. The CPU can only access the value that was most + * recently pushed onto the stack. This may seem like a big limitation + * right now but you will see ways of dealing with it. + * + * This makes our CPU a "stack machine" as opposed to a "register machine". + */ #[derive(Debug)] struct Stack { mem: [u16; N], - tos: usize + tos: usize // top-of-stack. } impl Stack { + // Add a number to the stack. fn push(&mut self, val: u16) { self.tos = (self.tos.wrapping_add(1)) & (N - 1); + /* This stack is fixed-sized and can hold N values. + * + * When a fixed-size stack fills up, there is a failure case + * (stack overflow) that must be handled somehow. + * + * In this stack, the bitwise & makes it a circular stack, meaning, if + * it ever fills up, it will discard the oldest entry instead of + * signaling an error. The lack of error handling makes the CPU + * simpler. + */ self.mem[self.tos] = val; } + // Return the most recently pushed number. fn pop(&mut self) -> u16 { let val = self.mem[self.tos]; self.mem[self.tos] = 0; + /* You don't have to set the value back to zero. I am only doing + * this because it makes makes the stack look nicer when dumped + * out with print!(). + */ self.tos = (self.tos.wrapping_sub(1)) & (N - 1); return val; } } +/* Now that we have a stack let's use one! Or two? + * + * Why two stacks? + * + * The first stack will be called the "data stack" and is used instead of + * registers as already described. + * + * The second stack will be called the "return stack". This one holds + * subroutine return addresses. Don't worry if you don't know what that + * means; we'll get to it later when we talk about the instruction set. + * + * In addition to stacks we are going to give the CPU a couple more things: + * + * 1. An "instruction pointer", which holds the memory address of the next + * instruction that the CPU will execute. + * + * 2. To make life simpler we put main memory straight on "the CPU" even + * though in a real computer, RAM would be off-chip and accessed through a + * data bus. + */ + struct Core { ram: [u8; ADDRESS_SPACE], - ip: u16, - dstack: Stack<16>, - rstack: Stack<32> + /* In our memory, each of the 65536 possible memory addresses will store + * one 8-bit byte (u8 data type in Rust). This makes it a 65536 byte + * (64 KB) memory. + * + * We could have chosen to make each memory address store 16-bits instead. + * That would make this a "word-addressed memory". + * + * Instead we are going with the "byte-addressed memory" that is more + * conventional in today's computers. This choice is arbitrary. + */ + ip: u16, // instruction pointer + dstack: Stack<16>, // data stack + rstack: Stack<32> // return stack } +/* Function to initialize the cpu. + * There is probably a better idiom for this but I am bad at rust */ fn new_core() -> Core { let c = Core { ram: [0; ADDRESS_SPACE], ip: 0, dstack: Stack {tos: 15, mem: [0; 16]}, rstack: Stack {tos: 31, mem: [0; 32]}}; + /* Because these are circular stacks it doesn't matter where top-of-stack + * starts off pointing. I arbitrarily set it to the highest index so + * the first value pushed will wind up at index 0, again because this + * makes the stack look nicer when printed out. + */ return c; } +/* Now we have a CPU sitting there but it does nothing. + * + * A working CPU would execute a list of instructions. An instruction is + * a number that is a command for the CPU. For example: + * + * 65522 might mean "add the top two values on the data stack". + * 65524 might mean "invert the bits of the top value on the data stack". + * + * The map of instruction-to-behavior comes from the CPU's + * "instruction set" i.e. the set of all possible instructions and their + * behaviors. + * + * Normally you program a CPU by putting instructions into memory and then + * telling the CPU the memory address where it can find the first instruction. + * + * The CPU will: + * 1. Fetch the instruction (load it from memory) + * 2. Decode the instruction (look it up in the instruction set) + * 3. Execute that instruction (do the thing the instruction set said to do) + * 4. Move on to the next instruction and repeat. + * + * So now we will make the CPU do those things. + * We'll start off by teaching it how to access memory, and then we will + * define the instruction set. + */ + impl Core { + /* Helper function - Read a number from the specified memory address. */ fn load(&self, addr: u16) -> u16 { let a = addr as usize; + /* We immediately run into trouble because we are using byte-addressed + * memory as mentioned earlier. + * + * Each memory location stores 8 bits (a byte) + * + * Our CPU operates on 16 bit values and we want each memory operation + * to take 16 bits at a time for efficiency reasons. + * + * What do we do? + * + * This CPU chooses to do the following: + * - Read the low byte of the 16-bit number from address a + * - Read the high byte of the 16-bit number from address a+1 + * + * 16 bit number in CPU: [00000000 00000001] = 1 + * | | + * | memory address a = 1 + * | + * memory address a+1 = 0 + * + * This is called "little endian" because the low byte comes first. + * + * We could have just as easily done the opposite: + * - Read the high byte of the 16-bit number from address a + * - Read the low byte of the 16-bit number from address a+1 + * + * 16 bit number in CPU: [00000000 00000001] = 1 + * | | + * | memory address a+1 = 1 + * | + * memory address a = 0 + * + * This is called "big endian" because the high byte comes first. + */ return u16::from_le_bytes(self.ram[a..=a+1].try_into().unwrap()); + /* The le in this function call stands for little-endian. */ } + /* Helper function - Write a number to the specified memory address. */ fn store(&mut self, addr: u16, val: u16) { let a = addr as usize; self.ram[a..=a+1].copy_from_slice(&val.to_le_bytes()); } + /* With that taken care of, we can get around to defining the CPU's + * instruction set. + * + * Each instruction on this CPU will be the same size, 16 bits, for + * the following reasons: + * + * 1. Instruction fetch always completes in 1 read. You never have to + * go back and fetch more bytes. + * + * 2. If you put the first instruction at an even numbered address then + * you know all the rest of the instructions will also be at even + * numbered addresses. I will take advantage of this later. + * + * 3. A variable length encoding would save space but 2 bytes per + * instruction is already pretty small so it doesn't matter very much. + * + * Here are the instructions I picked. + * + * CALL + * ------------------------------------------------------------+---- + * | n n n n n n n n n n n n n n n | 0 | + * ------------------------------------------------------------+---- + * + * What CALL does: + * --------------- + * - Push instruction pointer onto the return stack. + * - Set instruction pointer to address nnnnnnnnnnnnnnn0. + * + * This lets you call a subroutine at any even numbered address + * from 0 to 65534. + * + * Why this is useful: + * ------------------- + * Together with the return stack, CALL lets you call subroutines. + * + * A subroutine is a list of instructions that does something + * useful and then returns control to the caller. + * + * For example: + * + * Address Instruction Meaning + * 100 -> 200 Call 200 + * 102 -> ??? Add the top two values on the data stack. + * ... + * 200 -> ??? Push the value 3 onto the data stack + * 202 -> ??? Push the value 4 onto the data stack + * 204 -> ??? Return to caller + * + * Don't worry about the other instructions I am using here. I will + * define them later. + * + * I mostly want to point out the three instructions that I put + * at address 200 because they are a subroutine, + * a small self contained piece of code (6 bytes) that + * performs a specific task. + * + * Do you think it's cool that you can count exactly how many bytes it + * took? I think it's cool. + * + * Here is what happens when the CPU begins execution at address 100. + * + * Address Data stack Return stack + * 100 [] [] <--- About to call subroutine... + * 200 [] [102] + * 202 [3] [102] + * 204 [3 4] [102] <--- About to return from subroutine... + * 102 [3 4] [] + * 104 [5] [] + * + * The return stack is there to make sure that returning from a subroutine + * picks up where the caller left off. We will talk more about the return + * stack later when we talk about the RET instruction. + * + * Limitations of CALL: + * -------------------- + * This CPU cannot call an instruction that starts at an odd address. + * a.k.a. "unaligned call" is impossible. + * + * At first this seems like a limitation, but it really isn't. + * If you put the first instruction at an even numbered address then + * all the rest of the instructions will also be at even numbered + * addresses. So this works fine. + * + * Of course if you intersperse instructions and data in memory... + * _________ + * ________ |_________| _____________ + * |________| Data |_____________| + * Instructions More instructions + * + * ...then you will have to be careful to make sure the second block + * of instructions also starts at an even numbered address. + * You might need to include an extra byte of data as "padding". + * + * Data processing instructions + * --------------------------------------------+---------------+---- + * | 1 1 1 1 1 1 1 1 1 1 1 | x x x x | 0 | + * --------------------------------------------+---------------+---- + * Sixteen of the even numbers are reserved for additional instructions + * that will be be described later. + * + * The even numbers 1111111111100000 to 1111111111111110 (65504 to 65534) + * are reserved for these instructions. This means that CALL 65504 through + * CALL 65534 are not possible. Put another way, it is not possible to + * call a subroutine living in the top 32 bytes of memory. This is not a + * very severe limitation. + * + * LITERAL + * ------------------------------------------------------------+---- + * | n n n n n n n n n n n n n n n | 1 | + * ------------------------------------------------------------+---- + * + * What LITERAL does + * ----------------- + * - Place the value 0nnnnnnnnnnnnnnn on the data stack. + * + * Why this is useful: + * ------------------- + * Program will often need to deal with constant numbers. + * For example, you might want to add 2 to a memory address (to move + * on to the next even-numbered address) or add 32 to a character code + * (to convert it to lower-case). These constants have to come from + * somewhere. + * + * Limitations of LITERAL: + * ----------------------- + * To differentiate it from a call, this instruction is always an + * odd number. The trailing 1 is discarded before placing the number on + * the data stack. This missing bit means that only 2^15 values can be + * represented (0 to 32767). 32768 on up cannot be stored directly. + * You would need to do some follow-up math to get these numbers. + * The most direct way is to use the INV instruction, described later. + */ + + /* Now that the instruction set is generally described + * let's look at the code that implements it */ fn step(&mut self) { + // 1. Fetch the instruction. + // Also advance ip to point at the next instruction for next time. let opcode = self.load(self.ip); self.ip = self.ip.wrapping_add(2); + + // 2. Decode and execute the instruction if (opcode >= 0xffe0) && (opcode & 1 == 0) { + // Data processing instruction PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self); + /* These instructions get looked up in a table. The bit + * math converts the instruction code into an index in the + * table as follows: + * + * 0xffe0 --> 0 + * 0xffe2 --> 1 + * ... + * 0xfffe --> 15 + * + * The table will be described below, and these instructions + * explained. + */ } else if (opcode & 1) == 1 { // Literal @@ -69,8 +417,7 @@ impl Core { } } -type Primitive = fn(&mut Core); - +// The names of the 16 remaining CPU instructions enum Op { RET = 0xffe0, TOR = 0xffe2, RTO = 0xffe4, LD = 0xffe6, ST = 0xffe8, DUP = 0xffea, SWP = 0xffec, DRP = 0xffee, @@ -78,43 +425,91 @@ enum Op { AND = 0xfff8, INV = 0xfffa, GEQ = 0xfffc, IO = 0xfffe, } +type Primitive = fn(&mut Core); + +// A table of functions for each of the 16 remaining CPU instructions const PRIMITIVES: [Primitive; 16] = [ - | x | { /* ret */ x.ip = x.rstack.pop() }, - | x | { /* >r */ x.rstack.push(x.dstack.pop()) }, - | x | { /* r> */ x.dstack.push(x.rstack.pop()) }, - | x | { // ld + /* Return-stack instructions */ + | x | { + // RET - Return from subroutine + x.ip = x.rstack.pop() + }, + | x | { + // TOR - Transfer number from data stack to return stack + x.rstack.push(x.dstack.pop()) + }, + | x | { + // RTO - Transfer number from return stack to data stack + x.dstack.push(x.rstack.pop()) + }, + /* Memory instructions */ + | x | { + // LD - Load number from memory address specified on the data stack let a = x.dstack.pop(); x.dstack.push(x.load(a)); }, - | x | { // st + | x | { + // ST - Store number to memory address specified on the data stack let a = x.dstack.pop(); let v = x.dstack.pop(); x.store(a, v); }, - | x | { // dup + /* Stack shuffling instructions + * + * Remember the problem of "register allocation" mentioned earlier, + * and how stack machines are supposed to avoid that problem? Well, + * nothing comes for free. Stack machines can only process the top + * value(s) on the stack. So sometimes you will have to do some work + * to "unbury" a crucial value and move it to the top of the stack. + * That's what these instructions are for. + * + * Their use will become more obvious when we start programming the + * machine, soon. + */ + | x | { + // DUP - Duplicate the top number on the data stack let v = x.dstack.pop(); x.dstack.push(v); x.dstack.push(v); }, - | x | { // swp + | x | { + // SWP - Exchange the top two numbers on the data stack let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1); x.dstack.push(v2); }, - | x | { /* drp */ let _ = x.dstack.pop(); }, - | x | { // ? + | x | { + // DRP - Discard the top number on the data stack + let _ = x.dstack.pop(); + }, + /* Conditional branch instruction */ + | x | { + /* Q - If the top number on the data stack is zero, skip the next + * instruction. + * + * Note Q is the only "decision-making" instruction that our CPU + * has. This means that all "if-then" logic, counted loops, etc. + * will be built using Q. + */ let f = x.dstack.pop(); if f == 0 { x.ip = x.ip.wrapping_add(2) + /* Because all of our instructions are two bytes, adding two + * to the instruction pointer skips the next instruction. */ }; }, - | x | { // add + /* Arithmetic and logic */ + | x | { + // ADD - Sum the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1.wrapping_add(v2)); }, - | x | { // sft + | x | { + /* SFT - Bit shift number left or right by the specified amount. + * A positive shift amount will shift left, negative will shift right. + */ let amt = x.dstack.pop(); let val = x.dstack.pop(); x.dstack.push( @@ -127,39 +522,79 @@ const PRIMITIVES: [Primitive; 16] = [ } ); }, - | x | { // or + | x | { // OR - Bitwise-or the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1 | v2); }, - | x | { // and + | x | { // AND - Bitwise-and the top two numbers on the data stack. let v1 = x.dstack.pop(); let v2 = x.dstack.pop(); x.dstack.push(v1 & v2); }, - | x | { // inv + | x | { // INV - Bitwise-invert the top number on the data stack. let v1 = x.dstack.pop(); x.dstack.push(!v1); + /* You can use the INV instruction to compensate for the LITERAL + * instruction's inability to encode constants 32768 to 65535. + * Use two instructions instead: + * - LITERAL the complement of your desired constant + * - INV + * + * For example, LITERAL(0) INV yields 65535 (signed -1) + * For example, LITERAL(1) INV yields 65534 (signed -2) + * etc. + */ }, - | x | { // geq (unsigned) + | x | { // GEQ - Unsigned-compare the top two items on the data stack. let v2 = x.dstack.pop(); let v1 = x.dstack.pop(); x.dstack.push(if v1 >= v2 { 0xffff } else { 0 }); }, - | x | { // io + /* Input/output. + * + * The CPU needs some way to communicate with the outside world. + * + * Some machines use memory mapped IO where certain memory addresses are + * routed to hardware devices instead of main memory. This machine already + * has the full 64K of memory connected so no address space is readily + * available for hardware devices. + * + * Instead we define a separate input-output space of 65536 possible + * locations. Each of these possible locations is called an IO "port". + * + * For a real CPU you could hook up hardware such as a serial + * transmitter that sends data to a computer terminal, or just an + * output pin controller that is wired to a light bulb. + * + * This is a fake software CPU so I am going to hook it up to + * stdin and stdout. + */ + | x | { // IO - Write/read a number from/to input/output port. let port = x.dstack.pop(); + /* I'm loosely following a convention where even ports are inputs + * and odd ports are outputs. But each port acts different. + * In a hardware CPU this would not be suitable but it is fine for + * a software emulation. + */ match port { 0 => { + /* Push a character from stdin onto the data stack */ let mut buf: [u8; 1] = [0]; let _ = io::stdin().read(&mut buf); x.dstack.push(buf[0] as u16); + /* You are welcome to make your own computer that supports + * utf-8, but this one does not. */ } 1 => { + /* Pop a character from the data stack to stdout */ let val = x.dstack.pop(); print!("{}", ((val & 0xff) as u8) as char); let _ = io::stdout().flush(); } 2 => { + /* Dump CPU status. + * Like the front panel on a 1960s-1970s minicomputer. */ println!("{:?} {:?}", x.dstack, x.rstack); let _ = io::stdout().flush(); } @@ -168,6 +603,41 @@ const PRIMITIVES: [Primitive; 16] = [ } ]; +/* --------------------------------------------------------------------------- + * Part 2 - The Program + * ------------------------------------------------------------------------ */ + +/* You now have an unfamiliar computer with no software. It sits there doing + * nothing. Can you and the computer write a program? + * + * We are going to need to give the computer a list of instructions, by + * which I mean a list of numbers. If we were sitting in front of a + * minicomputer in 196x, we would need a copy of the target machine's + * instruction set, paper and pencil, and a lot of coffee. + * + * In 20xx we are fortunate enough to have rust so we will put it to work. + * + * Regardless, this bootstrapping process isn't going to be very pleasant + * so the goal is to make it short. We want a language that can stand on + * its own as quickly as possible, so the computer can start helping us + * write the program. + * + * Forth is a weird language but its design decisions make a lot of sense + * if you view it as a bootstrapping tool. + */ + + /* + * Forth organizes all the computer's memory as a "dictionary". + * The dictionary starts at a low address and grows towards high addresses. + * It is organized like a linked-list, like this: + * + * + * Code is stored in the dictionary as a list of addresses. + * + * + */ + +/* Here is the stuff that you would normally be doing with pencil and paper */ struct Dict<'a> { dp: u16, here: u16,