Writeup, part 2, partially done

2024-12-25 21:58:11 +01:00 · 2022-05-21 01:50:46 -07:00 · 2022-05-21 01:50:46 -07:00 · 07fee611a6
commit 07fee611a6
parent e5531bc19f
1 changed files with 514 additions and 68 deletions
--- a/frustration.rs
+++ b/frustration.rs
@ -62,18 +62,17 @@ const ADDRESS_SPACE: usize = 65536;
 *
 * In most CPUs, this place is called a "register".  Registers work like
 * variables in a programming language but there are only a few of them
- * (between 1 to 32 depending on your CPU).
+ * (most CPUs have between 1 and 32).
 *
 * On 64-bit ARM the registers are named  r0, r1, ..., r15.
- * On 64-bit Intel they are instead named rax, rbx, ..., etc.
+ * On 64-bit Intel they are instead named rax, rbx, ....
 * Just in case those names ring any bells.
 *
- * Having immediate access to many registers is quite handy, but it means
+ * Having immediate access to dozens of registers is quite handy, but it means
 * many choices are available to the programmer, or more likely, to the
- * compiler.  And making good choices is Hard.
+ * compiler.  And making good choices is Hard.  A lot of work goes into
- *
+ * deciding what variable to store in what register ("register allocation") and
- * A lot of work goes into deciding what variable to store in what register
+ * when to dump register contents back into memory ("spilling").
 * ("register allocation") and when to dump register contents back into
 * memory ("spilling").
 *
 * Our CPU avoids these problems by not having registers; instead we store
 * numbers in a stack.  The CPU can only access the value that was most
@ -86,14 +85,14 @@ const ADDRESS_SPACE: usize = 65536;
 #[derive(Debug)]
 struct Stack<const N: usize> {
    mem: [u16; N],
-    tos: usize  // top-of-stack.
+    tos: usize  /* top-of-stack */
 }
 impl<const N: usize> Stack<N> {
-    // Add a number to the stack.
+    /* Add a number to the stack. */
    fn push(&mut self, val: u16) {
        self.tos = (self.tos.wrapping_add(1)) & (N - 1);
-        /* This stack is fixed-sized and can hold N values.
+        /* This stack is fixed-size and can hold N values.
         *
         * When a fixed-size stack fills up, there is a failure case
         * (stack overflow) that must be handled somehow.
@ -106,7 +105,7 @@ impl<const N: usize> Stack<N> {
        self.mem[self.tos] = val;
    }
-    // Return the most recently pushed number.
+    /* Return the most recently pushed number. */
    fn pop(&mut self) -> u16 {
        let val = self.mem[self.tos];
        self.mem[self.tos] = 0;
@ -152,12 +151,12 @@ struct Core {
     * Instead we are going with the "byte-addressed memory" that is more
     * conventional in today's computers.  This choice is arbitrary.
     */
-    ip: u16,  // instruction pointer
+    ip: u16,  /* instruction pointer */
-    dstack: Stack<16>, // data stack
+    dstack: Stack<16>, /* data stack */
-    rstack: Stack<32>  // return stack
+    rstack: Stack<32>  /* return stack */
 }
-/* Function to initialize the cpu.
+/* Helper to initialize the cpu.
 * There is probably a better idiom for this but I am bad at rust */
 fn new_core() -> Core {
    let c = Core {
@ -200,7 +199,7 @@ fn new_core() -> Core {
 */
 impl Core {
-    /* Helper function - Read a number from the specified memory address. */
+    /* Helper to read a number from the specified memory address. */
    fn load(&self, addr: u16) -> u16 {
        let a = addr as usize;
        /* We immediately run into trouble because we are using byte-addressed
@ -241,7 +240,7 @@ impl Core {
        /* The le in this function call stands for little-endian. */
    }
-    /* Helper function - Write a number to the specified memory address. */
+    /* Helper to write a number to the specified memory address. */
    fn store(&mut self, addr: u16, val: u16) {
        let a = addr as usize;
        self.ram[a..=a+1].copy_from_slice(&val.to_le_bytes());
@ -383,14 +382,14 @@ impl Core {
     /* Now that the instruction set is generally described
      * let's look at the code that implements it */
    fn step(&mut self) {
-        // 1. Fetch the instruction.
+        /* 1. Fetch the instruction.
-        // Also advance ip to point at the next instruction for next time.
+         * Also advance ip to point at the next instruction for next time. */
        let opcode = self.load(self.ip);
        self.ip = self.ip.wrapping_add(2);
-        // 2. Decode and execute the instruction
+        /* 2. Decode and execute the instruction */
        if (opcode >= 0xffe0) && (opcode & 1 == 0) {
-            // Data processing instruction
+            /* Data processing instruction */
            PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self);
            /* These instructions get looked up in a table.  The bit
             * math converts the instruction code into an index in the
@ -406,18 +405,18 @@ impl Core {
             */
        }
        else if (opcode & 1) == 1 {
-            // Literal
+            /* Literal */
            self.dstack.push(opcode >> 1);
        }
        else {
-            // Call
+            /* Call */
            self.rstack.push(self.ip);
            self.ip = opcode;
        }
    }
 }
-// The names of the 16 remaining CPU instructions
+/* The names of the 16 remaining CPU instructions */
 enum Op {
    RET = 0xffe0, TOR = 0xffe2, RTO = 0xffe4, LD  = 0xffe6,
    ST  = 0xffe8, DUP = 0xffea, SWP = 0xffec, DRP = 0xffee,
@ -427,29 +426,29 @@ enum Op {
 type Primitive = fn(&mut Core);
-// A table of functions for each of the 16 remaining CPU instructions
+/* A table of functions for each of the 16 remaining CPU instructions */
 const PRIMITIVES: [Primitive; 16] = [
    /* Return-stack instructions */
    | x | {
-        // RET - Return from subroutine
+        /* RET - Return from subroutine */
        x.ip = x.rstack.pop()
    },
    | x | {
-        // TOR - Transfer number from data stack to return stack
+        /* TOR - Transfer number from data stack to return stack */
        x.rstack.push(x.dstack.pop())
    },
    | x | {
-        // RTO - Transfer number from return stack to data stack
+        /* RTO - Transfer number from return stack to data stack */
        x.dstack.push(x.rstack.pop())
    },
    /* Memory instructions */
    | x | {
-        // LD - Load number from memory address specified on the data stack
+        /* LD - Load number from memory address specified on the data stack */
        let a = x.dstack.pop();
        x.dstack.push(x.load(a));
    },
    | x | {
-        // ST - Store number to memory address specified on the data stack
+        /* ST - Store number to memory address specified on the data stack */
        let a = x.dstack.pop();
        let v = x.dstack.pop();
        x.store(a, v);
@ -607,43 +606,133 @@ const PRIMITIVES: [Primitive; 16] = [
 *                            Part 2 - The Program
 *  ------------------------------------------------------------------------ */
-/* You now have an unfamiliar computer with no software.  It sits there doing
+/* "In a sense we're building a tree. We've now reached a
- * nothing.  Can you and the computer write a program?
+ * point where we can start making the roots. For a while
 * everything will be concealed but we'll eventually reach
 * daylight and start on branches."
 *
- * We are going to need to give the computer a list of instructions, by
+ * -- Chuck Moore, "Programming a Problem-Oriented Language", 1970
- * which I mean a list of numbers.  If we were sitting in front of a
+ * https://colorforth.github.io/POL.htm
 * minicomputer in 196x, we would need a copy of the target machine's
 * instruction set, paper and pencil, and a lot of coffee.
 *
- * In 20xx we are fortunate enough to have rust so we will put it to work.
+ * You now have an unfamiliar computer with no software.  Can you and the
 * computer write a program?
 *
- * Regardless, this bootstrapping process isn't going to be very pleasant
+ * The first program is the hardest to write because you don't have any tools to
- * so the goal is to make it short.  We want a language that can stand on
+ * help write it.  The computer itself is going to be no help.  Without any
- * its own as quickly as possible, so the computer can start helping us
+ * program it will sit there doing nothing.
- * write the program.
+ *
 * What should the first program be?
 * A natural choice would be a tool that helps you program more easily.
 *
 * An interactive programming environment needs to let you do 2 things:
 *
 * 1. Call subroutines by typing their name at the keyboard
 * 2. Define new subroutines in terms of existing ones
 *
 * Begin with step 1:
 * Call subroutines by typing their name at the keyboard
 *
 * This is where we will meet Forth.
 *
 * The below is a small Forth for bootstrapping this computer.  If you want to
 * learn how to implement a full featured Forth, please read Jonesforth, and
 * Brad Rodriguez' series of articles "Moving Forth".  The simple Forth I write
 * below will probably help you understand those Forths a little better.
 *
 * Forth organizes all the computer's memory as a "dictionary" of subroutines.
 * The point of the dictionary is to give each subroutine a name so you
 * can run a subroutine by typing its name.  The computer will look up its
 * address for you and call it.
 *
 * Forth is a weird language but its design decisions make a lot of sense
 * if you view it as a bootstrapping tool.
 */
 /*
 * Forth organizes all the computer's memory as a "dictionary".
 * The dictionary starts at a low address and grows towards high addresses.
 * It is organized like a linked-list, like this:
 *
 * [Link field][Name][Code .......... ]
 *  ^
 *  |
 * [Link field][Name][Code ...... ]
 *  ^
 *  |
 * [Link field][Name][Code ............... ]
 *
- * Code is stored in the dictionary as a list of addresses.
+ * The reason it is a linked list is to allow each list entry to be a
 * different length.
 *
 * Each dictionary entry contains three things:
 *
 * - "Link field": The address of the previous dictionary entry.
 *                 For the first dictionary entry this field is 0.
 *
 * - "Name": A few letters to name this dictionary entry.
 *           Later you will type this name at the keyboard to call up
 *           this dictionary entry.
 *
 * - "Code": A subroutine to execute when you call up this dictionary
 *           entry.  This is a list of CPU instructions.  Note that one
 *           of the CPU instructions is "call".  So you can have a subroutine
 *           that call other subroutines, or calls itself.
 *
 *           This code must end with a return (RET) instruction.
 *
 *           Example subroutine:
 *
 *           Number Instruction  Meaning
 *           ------ -----------  -------
 *           7      Literal(3)   Push the value 3 onto the data stack
 *           9      Literal(4)   Push the value 4 onto the data stack 
 *           65504  RET          Return to caller
 *
 * A linked list is not a very fast data structure but this doesn't really
 * matter because dictionary lookup doesn't need to be fast.  Lookups are
 * for converting text you typed at the keyboard to subroutine addresses.
 * You can't type very fast compared to a computer so this lookup doesn't
 * need to be fast.
 *
 * In addition to the linked list itself, you will need a couple of
 * variables to keep track of where the dictionary is in memory:
 *
 * - Dictionary pointer:  The address of the newest dictionary entry.
 * - Here:                The address of the first unused memory location,
 *                        which comes just after the newest dictionary entry.
 *
 * [Link field][Name][Code .......... ]
 *  ^
 *  |
 * [Link field][Name][Code ...... ]
 *  ^
 *  |
 * [Link field][Name][Code ............... ]
 *  ^                                       ^
 *  |                                       |
 * [Dictionary pointer]                    [Here]
 *
 * Got all that?
 *
 * To create our Forth interactive programmming environment, we will start
 * by defining subroutines that:
 * - read names from the keyboard
 * - look up and execute dictionary entries by name
 *
 * We will put these subroutines themselves in the dictionary so they are
 * available for use once our interactive environment is up and running!
 *
 * If you were sitting in front of a microcomputer in 196x you would need
 * to create the dictionary with pencil and paper, but in 20xx we will
 * write a Rust program to help create the dictionary.
 *
 * First we need to keep track of where the dictionary is:
 */
 /* Here is the stuff that you would normally be doing with pencil and paper */
 struct Dict<'a> {
-    dp: u16,
+    dp: u16,   // The dictionary pointer
-    here: u16,
+    here: u16, // The "here" variable
-    c: &'a mut Core
+    c: &'a mut Core  // The dictionary lives in memory.  We are going to
                     // hang on to a mutable reference to the core to give
                     // us easy access to the memory.
 }
 /* Helpers to help put new routines in the dictionary */
 enum Item {
    Literal(u16),
    Call(u16),
@ -653,15 +742,19 @@ impl From<u16> for Item { fn from(a: u16) -> Self { Item::Call(a) } }
 impl From<Op>  for Item { fn from(o: Op)  -> Self { Item::Opcode(o) } }
 impl Dict<'_> {
    /* Helper to reserve space in the dictionary by advancing the "here"
     * pointer */
    fn allot(&mut self, n: u16) {
        self.here = self.here.wrapping_add(n);
    }
    /* Helper to append a 16 bit integer to the dictionary */
    fn comma(&mut self, val: u16) {
        self.c.store(self.here, val);
        self.allot(2);
    }
    /* Helper to append a CPU instruction to the dictionary */
    fn emit<T: Into<Item>>(&mut self, val: T) {
        match val.into() {
            Item::Call(val)    => { self.comma(val) }
@ -671,11 +764,36 @@ impl Dict<'_> {
        }
    }
    /* Helper to append a "name" field to the dictionary.  To save space and
     * to make each dictionary header a consistent size, I am choosing to not
     * store every letter of the name.  Instead I am storing only the length of
     * the name and then the first three letters of the name.
     *
     * That means these two names will compare equal:
     * - ALLOW (-> 5ALL)
     * - ALLOT (-> 5ALL)
     *
     * Even though their first three letters are the same, these two names
     * will compare unequal because they are different lengths:
     * - FORTH (-> 5FOR)
     * - FORGET (-> 6FOR)
     *
     * If a name is shorter than 3 letters it is padded out with spaces.
     * - X (-> 1X  )
     *
     * You can see that the name field is always four bytes regardless
     * of how many letters are in the name, and the link field is two bytes.
     * This means a dictionary header in this Forth is always six bytes.
     */
    fn name(&mut self, n: u8, val: [u8; 3]) {
        /* Store the length and the first character */
        self.comma(n as u16 | ((val[0] as u16) << 8));
        /* Store the next two characters */
        self.comma(val[1] as u16 | ((val[2] as u16) << 8));
    }
    /* Helper to append a new link field to the dictionary and update the
     * dictionary pointer appropriately. */
    fn entry(&mut self) {
        let here = self.here;
        self.comma(self.dp);
@ -683,44 +801,371 @@ impl Dict<'_> {
    }
 }
 /* Now we can start building the dictionary. */
 fn build_dictionary(c: &mut Core) {
    use Op::*;
    use Item::*;
-    let mut d = Dict {dp: 0, here: 2, c: c};
+    let mut d = Dict {
        dp: 0,  /* Nothing in the dictionary yet */
        here: 2,  /* Reserve address 0 as an "entry point", i.e. where the
                     CPU will jump to start running Forth.  We don't have a
                     Forth interpreter yet so we'll leave address 0 alone for
                     now and start the dictionary at address 2 instead. */
        c: c
    };
    /* Consider the following facts:
     * - The CPU knows how to execute a bunch of instructions strung together.
     * - Forth consists of a bunch of subroutine calls strung together.
     * - Subroutine CALL is a valid instruction of our CPU.
     *
     * This means that we can immediately begin programming our machine in
     * a language resembling Forth, just by writing a list of subroutine
     * calls into the dictionary.
     *
     * The line between "machine code program" and "Forth program" is
     * very blurry.  To illustrate:
     *
     * Here is a subroutine consisting of a few instructions strung together.
     *
     *       Instruction Number  Meaning
     *       ----------- ------  -------
     *       Literal(3)  7       Push the value 3 onto the data stack
     *       Literal(4)  9       Push the value 4 onto the data stack 
     *       RET         65504   Return to caller
     *
     * Here is a Forth subroutine consisting of a few subroutine calls strung
     * together.
     *       Call        Number  Meaning
     *       ----------- ------  -------
     *       S1          1230    Call subroutine S1 which happens to live
     *                           at address 1230
     *       S2          1250    Call subroutine S2 which happens to live
     *                           at address 1250
     *       RET         65504   Return to caller
     *
     * This duality between CPU instructions and Forth code comes from
     * an idea called "subroutine threading".  It is a refinement of an
     * idea called "threaded code".  This has no relation to the kind of
     * threading that lets you run programs in parallel.  You can read more
     * about threaded code on Wikipedia or in the other Forth resources I
     * mentioned earlier (Jonesforth, and Moving Forth by Brad Rodriguez).
     *
     * Our new language starts out with the sixteen (well, eighteen)
     * instructions built into the CPU.  We can string those instructions
     * together into a new subroutine.  This subroutine adds to the pool
     * of functions we have available for making new subroutines.
     *
     * Repeat until you have built what you wanted to build, via
     * function composition.  This is the idea behind Forth.
     */
    /*
     * We are going to be writing many series of instructions so let's
     * start out by making a Rust macro that makes them easier to type
     * and lets us specify a CPU instruction vs. a subroutine call with
     * equal ease.
     *
     * The macro below will convert:
     *
     *     forth!(Literal(2), ADD, RET)
     *
     * to:
     *
     *     d.emit(Literal(2));
     *     d.emit(ADD);
     *     d.emit(RET);
     *
     * which you probably recognize as code that will add a new subroutine
     * to the dictionary.
     */
    macro_rules! forth {
        ($x:expr) => (d.emit($x));
        ($x:expr, $($y:expr),+) => (d.emit($x); forth!($($y),+))
    }
    /* Now we can add the first subroutine to the dictionary!
     *
     * key: Reads a character from the keyboard and places its character
     * code on the stack.
     *
     * There is a tradition of writing stack comments for Forth subroutines
     * to describe the stack effect of executing the subroutine.
     * They look like this: key ( -- n )
     *
     * Read as: key does not take any parameters off the stack, and leaves
     * one new number pushed onto the stack.
     *
     * Also remember that a dictionary entry looks like this:
     * [Link field][Name][Code .......... ]
     */
    // key ( -- n )
-    d.entry(); d.name(3, *b"key");  let key = d.here;
+    d.entry();           /* Compile the link field into the dictionary */
-    forth!(Literal(0), IO, RET);
+    d.name(3, *b"key");  /* Compile the name field into the dictionary */
    let key = d.here;    /* (Save off the start address of the code so we
                            can call it later) */
    forth!(
        Literal(0),      /* Compile a LITERAL instruction that pushes
                            0 to the stack */
        IO,              /* Compile an IO instruction.
                          *
                          * Remember from the CPU code that IO takes a
                          * parameter on the stack to specify which port
                          * to use.
                          *
                          * Also remember that IO port 0 reads
                          * a character from standard input.
                          */
        RET              /* Compile a RET instruction */
    );
    /* We have now compiled the "key" subroutine into the dictionary.
     * [Link field][Name][Code .......... ]
     *        0000  3key  1, 65534, 65504
     *
     * The next subroutine we will make is "emit".  This is a companion
     * to "key" that works in the opposite direction.
     *
     * key ( -- n ) reads a character from stdin and pushes it to the stack.
     * emit ( n -- ) pops a character from the stack and writes it to stdout.
     */
    // emit ( n -- )
    d.entry(); d.name(4, *b"emi");  let emit = d.here;
    forth!(Literal(1), IO, RET);
    /* I am tired of saying "subroutine" so many times, so I am going to
     * introduce a new term.  Remember the goal our language is working
     * towards -- we want to be able to type a word at the keyboard, and
     * let the computer look it up in the dictionary and execute the
     * appropriate code.
     *
     * So far we have two named items in the dictionary, call and emit.
     *
     * We are going to term a named dictionary item a "word".
     * This is a Forth tradition.
     *
     * So call and emit are "words", or "dictionary words" if you want to be
     * precise about it.  So far these are the only words we've defined.
     *
     * Let's define some more words.
     */
    /* Our CPU does not have subtraction so let's make subtraction by adding
     * the two's complement.
     *
     * To get the two's complement, do a bitwise invert and add 1.
     *
     * This will be the most complicated Forth that we've written so far
     * so let's walk through step by step. */
    // - ( a b -- a-b )
    d.entry(); d.name(1, *b"-  ");  let sub = d.here;
-    forth!(INV, Literal(1), ADD, ADD, RET);
+    forth!(         /* Stack contents:  a b, to start off with.
                     * We want to compute a minus b */
        INV,        /* Bitwise invert the top item on the stack.
                     * Stack contents: a ~b */
        Literal(1), /* Push 1 onto the stack.
                     * Stack contents: a ~b 1 */
        ADD,        /* Add the top two items on the stack.
                     * Stack contents: a ~b+1
                     * Note that ~b+1 is the two's complement of b. */
        ADD,        /* Add the top two items on the stack.
                     * Stack contents: n
                     * Note that n = (a + ~b+1) = a - b */
        RET         /* Done, return to caller, leaving n on the data stack. */
    );
    /* Writing it out like that takes a lot of space.  Normally Forth code
     * is written on a single line, like this:
     *
     * INV 1 ADD ADD RET
     *
     * Looking at it this way, it's easy to see the new word we just
     * created (-) is made from 5 instructions.  It's pretty typical for
     * a Forth word to be made of 2-7 of them.  Beyond that length, things
     * get successively harder to understand, and it becomes a good idea
     * to split some work off into helper words.
     *
     * We will see an example of this below.
     */
    /* Our next word will be useful for Boolean logic.
     *
     * 0= ( n -- f )
     *
     * In a stack comment, "f" means "flag", a.k.a. Boolean value.
     * By Forth convention, zero is false and any nonzero value is true.
     * However the "best" value to use for a true flag is 65535 (all ones)
     * so the bitwise logical operations can double as Boolean logical
     * operations.
     *
     * So what 0= does is:
     * - if n=0,    leave on the stack f=65535
     * - otherwise, leave on the stack f=0
     *
     * It is like C's ! operator.
     *
     * In Rust this could be implemented as:
     *
     * fn zero_eq(n: u16) {
     *     if (n == 0) {
     *         return 65535;
     *     } else {
     *         return 0;
     *     }
     * }
     *
     * Rust has an if-then and block scope, so this is easy to write.
     *
     * The literal translation to a typical register-machine assembly
     * language would look something like this:
     *
     * zero_eq:     compare r0, 0
     *              jump_eq is_zero
     *              move    r0, 0
     *              ret
     * is_zero:     move    r0, 65535
     *              ret
     *
     * It looks simple but I want to point out a couple things about it
     * that are not so simple.
     *
     * The conditional jump instruction, jump_eq.
     * ------------------------------------------
     * Our CPU doesn't have this.  Q is the only "decision-making"
     * instruction that our CPU has.
     *
     * Q - If the top number on the data stack is zero, skip the next
     * instruction.
     *
     * The forward reference
     * ---------------------
     * This is another problem.  Think of the job of an assembler which is
     * converting an assembly language program to machine code.  We are
     * currently writing our code in a tiny assembler that we made in Rust!  It
     * is very simple but so far it has worked for us.  The assembler of our
     * hypothetical register-machine below has a rather nasty problem to solve.
     *
     * zero_eq:     compare r0, 0
     *              jump_eq is_zero  <----- On this line.
     *              move    r0, 0
     *              ret
     * is_zero:     move    r0, 65535
     *              ret
     * 
     * It wants to jump to is_zero but that symbol has not been seen yet and is
     * unrecognized.  On top of that, the assembler also doesn't yet know what
     * address is_zero will have, so doesn't know what jump target to emit.
     * To successfully assemble that kind of program you would need an
     * assembler smarter than the assembler we made for ourselves in Rust.
     *
     * There are ways to solve this but let's NOT solve it.
     *
     * Our CPU has no jump instruction (only call) and our assembler only lets
     * us call things we already defined.  Instead of removing these
     * constraints, find a way to write 0= within the constraints.
     *
     * Here is a start at solving the problem
     * 
     * is_nonzero ( -- 0 )
     *     Literal(0)
     *     RET
     *
     * 0= ( n -- f )
     *     Q            <-- pop n, if n=0 skip next instruction
     *     is_nonzero   <-- f=0 is now pushed to stack
     *     Literal(0)
     *     INV          <-- f=65535 is now pushed to stack
     *     RET          <-- Return
     *
     * We got rid of the forward reference by defining is_nonzero before it
     * was used.
     *
     * We got rid of the jump instruction by using a subroutine call instead.
     *
     * This code is close to working but it doesn't quite work.  The problem
     * is that is_nonzero gives control back to 0= when done, just like
     * a subroutine call normally does, and then 0= runs as normal until it
     * hits the return instruction at the end.
     * So we wind up executing both the f=0 branch and the f=65535 branch,
     * instead of just executing the f=0 branch like we wanted in this case.
     *
     * It is possible to fix this last problem by adding the instructions
     * RTO DRP to is_nonzero.
     *
     * is_nonzero ( -- 0 )
     *     RTO          <-- Pop the return address, push to data stack
     *     DRP          <-- Discard it
     *     Literal(0)   <-- Put 0 on the data stack
     *     RET          <-- Return
     * 
     * Because we popped off and discarded one item from the return stack, the
     * final RET instruction will not return to 0= any more.  Instead it will
     * skip one level and return to whoever called 0=.  This has the result of
     * ending 0= early, which is what we wanted to do.
     * 
     * I call this pattern "return-from-caller".  It is used occasionally in
     * real Forth systems.  My dialect of Forth will use it extensively to work
     * around my CPU's lack of conditional branch.
     *
     * Now we've explained how 0= is going to work, let's make it.
     */
    /* First we define the helper.  It won't be reused, so I am not going
     * to bother giving it a dictionary header and name for easy lookup later.
     * Think of it as a private function. */
    let zero = d.here;
    forth!(Literal(0), RTO, DRP, RET);
    /* Now define 0= using the helper. */
    // 0= ( n -- f )
    d.entry(); d.name(2, *b"0= ");  let zero_eq = d.here;
    forth!(Q, zero, Literal(0), INV, RET);
    /* Next let's make a = equality comparison operator, using 0= and subtract.
     * I call it an "operator" because that's what other languages would
     * call it, but Forth has no special idea of an "operator".  Everything
     * is just words. */
    // = ( a b -- a=b )
    d.entry(); d.name(1, *b"=  ");  let eq = d.here;
    forth!(sub, zero_eq, RET);
    /* Note that 0= and subtract are both words, not CPU instructions.
     * This makes = the first "pure" Forth word we have defined, with no
     * direct dependency on the machine's instruction set.
     * We could define = as - 0= on a real standards-compliant Forth system
     * and it would still work.  So Forth gets you to the point of writing
     * "portable" code really quickly.  Often you can reuse routines early in
     * bootstrapping even though they were written and tested on a different
     * machine.  Many languages offer portability but few offer it so quickly.
     */
-    // Advance past whitespace
+    /* Now that we've got some basics in place let's go back to solving
     * the real problem of getting our language to read words from the
     * keyboard.  The first problem we have is that we need some way to
     * separate words from each other so we know where one word ends and the
     * next begins.  This problem is called "lexing".  Forth has about the
     * simplest lexer ever, it just splits on whitespace.  Anything with
     * character code <=32 is considered whitespace.  Words are delimited by
     * whitespace.  And that is all the syntax Forth has.
     *
     * To read a word from the keyboard you will need to:
     * - Advance past any leading whitespace
     * - Read characters into a buffer until whitespace is seen again.
     */
    /* Let's start with the "advance past whitespace" part */
    let skip_helper = d.here;
    forth!(RTO, DRP, key, DUP, Literal(33), GEQ, Q, RET, DRP, skip_helper);
    // skipws ( -- c )
    d.entry(); d.name(6, *b"ski");  let skipws = d.here;
    forth!(skip_helper);
@ -748,13 +1193,13 @@ fn build_dictionary(c: &mut Core) {
    d.entry(); d.name(2, *b"c! ");  let cst = d.here;
    forth!(DUP, LD, Literal(0xff), INV, AND, SWP, TOR, OR, RTO, ST, RET);
-    // Load 1 letter into buffer.
+    /* Load 1 letter into buffer. */
    let stchar = d.here;
    forth!(Literal(word_buf), cld, Literal(1), ADD, DUP, Literal(word_buf), cst,
           Literal(5), min, Literal(word_buf), ADD, cst, RET);
-    // Load letters into buffer until whitespace is hit again.
+    /* Load letters into buffer until whitespace is hit again.
-    // Return the whitespace character that was seen.
+     * Return the whitespace character that was seen. */
    let getcs_helper = d.here;
    forth!(RTO, DRP, stchar, key, DUP, Literal(32), SWP, GEQ, Q, RET, getcs_helper);
@ -769,8 +1214,8 @@ fn build_dictionary(c: &mut Core) {
           skipws, getcs, DRP, RET);
    // latest ( -- a )
-    // Address of "latest" variable.  This variable stores the address of
+    /* Address of "latest" variable.  This variable stores the address of
-    // the latest word in the dictionary.
+       the latest word in the dictionary. */
    let latest_ptr = d.here; d.allot(2);
    d.entry(); d.name(6, *b"lat");  let latest = d.here;
    forth!(Literal(latest_ptr), RET);
@ -806,15 +1251,15 @@ fn build_dictionary(c: &mut Core) {
    forth!(DUP, DUP, Literal(3), SFT, ADD, ADD, RET);
    // here ( -- a )
-    // Address of "here" variable.  This variable stores the address of
+    /* Address of "here" variable.  This variable stores the address of
-    // the first free space in the dictionary
+       the first free space in the dictionary */
    let here_ptr = d.here; d.allot(2);
    d.entry(); d.name(4, *b"her");  let here = d.here;
    forth!(Literal(here_ptr), RET);
    // state ( -- a )
-    // Address of "state" variable.  This variable stores -1 if
+    /* Address of "state" variable.  This variable stores -1 if
-    // interpreting or 0 if compiling.
+     * interpreting or 0 if compiling. */
    let state_ptr = d.here; d.allot(2);
    d.entry(); d.name(5, *b"sta");  let state = d.here;
    forth!(Literal(state_ptr), RET);
@ -906,7 +1351,8 @@ fn build_dictionary(c: &mut Core) {
    d.entry(); d.name(1 | 0x80, *b";  ");
    forth!(Literal(!(RET as u16)), INV, comma, lbracket, unsmudge, RET);
-    // Finally put the primitives in the dictionary so they can be called directly.
+    /* Finally put the primitives in the dictionary so they can be
     * called interactively. */
    d.entry(); d.name(3, *b"ret"); forth!(RTO, DRP, RET);
    d.entry(); d.name(2, *b">r "); forth!(RTO, SWP, TOR, TOR, RET);
    d.entry(); d.name(2, *b"r> "); forth!(RTO, RTO, SWP, TOR, RET);