frustration/frustration.rs

use std::io;
use std::io::Read;
use std::io::Write;
use std::convert::TryInto;

/* What is this file?
 *
 * This is a tutorial that will show you how to bootstrap an interactive
 * programming environment from a small amount of code.
 *
 * First we will design a virtual computer.
 *
 * Then we will design software to run on that computer, to enable REPL-style
 * interactive programming.
 *
 * A REPL is a "Read, Evaluate, Print loop".  A REPL lets you type code at
 * the keyboard and immediately get a result back.  You can also define
 * functions, including functions that change how the environment works in
 * fundamental ways.
 */

/* What is Forth?
 *
 * Forth is the programming language we will use with our computer.
 *
 * Forth was invented by Chuck Moore in the 1960s as a tool for quickly
 * coming to grips with new computer systems.
 *
 * "Let us imagine a situation in which you have access to
 * your computer. I mean sole user sitting at the board with
 * all the lights, for some hours at a time. This is
 * admittedly an atypical situation, but one that can
 * always be arranged if you are competent, press hard, and
 * will work odd hours. Can you and the computer write a
 * program? Can you write a program that didn't descend from
 * a pre-existing program? You can learn a bit and have a
 * lot of fun trying."

 * -- Chuck Moore, "Programming a Problem-Oriented Language", 1970
 *    https://colorforth.github.io/POL.htm
 *
 * As you will see, it does not take much work to get Forth running on a
 * new machine, including a machine with a completely unfamiliar instruction
 * set.
 *
 * But before we can do any of that we will need a machine.  Let's make one.
 */

/* ---------------------------------------------------------------------------
 *                           Part 1 - The Computer
 *  ------------------------------------------------------------------------ */

/* This computer will have a 16-bit CPU.  It will be able to access
 * 2^16 (65536) memory locations, numbered 0 to 65535.
 * Each of these locations, 0 to 65535, is called a "memory address".
 */
const ADDRESS_SPACE: usize = 65536;

/* The job of a CPU is to load numbers from memory, do math or logic on them,
 * then write the resulting number back into memory.
 *
 * The CPU needs a temporary place to hold numbers while it is working with
 * them.
 *
 * In most CPUs, this place is called a "register".  Registers work like
 * variables in a programming language but there are only a few of them
 * (most CPUs have between 1 and 32).
 *
 * On 64-bit ARM the registers are named  r0, r1, ..., r15.
 * On 64-bit Intel they are instead named rax, rbx, ....
 * Just in case those names ring any bells.
 *
 * Having immediate access to dozens of registers is quite handy, but it means
 * many choices are available to the programmer, or more likely, to the
 * compiler.  And making good choices is Hard.  A lot of work goes into
 * deciding what variable to store in what register ("register allocation") and
 * when to dump register contents back into memory ("spilling").
 *
 * Our CPU avoids these problems by not having registers; instead we store
 * numbers in a stack.
 * - Putting a number onto the top of the stack is called "push".
 * - Taking the most recent number off the top of the stack is called "pop".
 *
 * The CPU can only access the value that was most recently pushed onto the
 * stack.  This may seem like a big limitation right now but you will see ways
 * of dealing with it.
 *
 * The choice to use a stack instead of registers makes our CPU a
 * "stack machine" as opposed to a "register machine".
 */

#[derive(Debug)]
struct Stack<const N: usize> {
    mem: [u16; N],
    tos: usize  /* top-of-stack */
}

impl<const N: usize> Stack<N> {
    /* Add a number to the stack. */
    fn push(&mut self, val: u16) {
        self.tos = (self.tos.wrapping_add(1)) & (N - 1);

        /* This stack is fixed-size and can hold N values.
         *
         * When a fixed-size stack fills up, there is a failure case
         * (stack overflow) that must be handled somehow.
         *
         * This particular stack is a circular stack, meaning that if
         * it ever fills up, it will discard the oldest entry instead of
         * signaling an error.  The lack of error handling makes the CPU
         * simpler.
         */

        self.mem[self.tos] = val;
    }

    /* Return the most recently pushed number. */
    fn pop(&mut self) -> u16 {
        let val = self.mem[self.tos];
        self.mem[self.tos] = 0;

        /* You don't have to set the value back to zero.  I am only doing
         * this because it makes makes the stack look nicer when dumped
         * out with print!().
         */

        self.tos = (self.tos.wrapping_sub(1)) & (N - 1);
        return val;
    }
}

/* Now that we have a stack let's use one!  Or two?
 *
 * Why two stacks?
 *
 * The first stack is called the "data stack" and is used instead of
 * registers, as already described.
 *
 * The second stack will be called the "return stack".  This one holds
 * subroutine return addresses.  Don't worry if you don't know what that
 * means; we'll get to it later when we talk about the instruction set.
 *
 * In addition to stacks we are going to give the CPU a couple more things:
 *
 * 1. An "instruction pointer", which holds the memory address of the next
 *    instruction that the CPU will execute.
 *
 * 2. To make life simpler we put main memory straight on "the CPU" even
 *    though in a real computer, RAM would be off-chip and accessed through a
 *    data bus.
 */

struct Core {
    ram: [u8; ADDRESS_SPACE],

    /* In our memory, each of the 65536 possible memory addresses will store
     * one 8-bit byte (u8 data type in Rust).  This makes it a 65536 byte
     * (64 KB) memory.
     *
     * We could have chosen to make each memory address store 16-bits instead.
     * That would make this a "word-addressed memory".
     *
     * Instead we are going with the "byte-addressed memory" that is more
     * conventional in today's computers.  This choice is arbitrary.
     */

    ip: u16,  /* instruction pointer */
    dstack: Stack<16>, /* data stack */
    rstack: Stack<32>  /* return stack */
}

/* Helper to initialize the CPU.
 * There is probably a better idiom for this but I am bad at rust */
fn new_core() -> Core {
    let c = Core {
        ram: [0; ADDRESS_SPACE],
        ip: 0,
        dstack: Stack {tos: 15, mem: [0; 16]},
        rstack: Stack {tos: 31, mem: [0; 32]}};

    /* Because these are circular stacks it doesn't matter where top-of-stack
     * starts off pointing.  I arbitrarily set it to the highest index so
     * the first value pushed will wind up at index 0, again because this
     * makes the stack look nicer when printed out.
     */

    return c;
}

/* Now we have a CPU sitting there but it does nothing.
 *
 * A working CPU would execute a list of instructions.  An instruction is
 * a number that is a command for the CPU.  For example:
 *
 * 65522 might mean "add the top two values on the data stack".
 * 65524 might mean "invert the bits of the top value on the data stack".
 *
 * The map of instruction-to-behavior comes from the CPU's
 * "instruction set" i.e. the set of all possible instructions and their
 * behaviors.
 *
 * Normally you program a CPU by putting instructions into memory and then
 * telling the CPU the memory address where it can find the first instruction.
 *
 * The CPU will:
 * 1. Fetch the instruction (load it from memory)
 * 2. Decode the instruction (look it up in the instruction set)
 * 3. Execute that instruction (do the thing the instruction set said to do)
 * 4. Move on to the next instruction and repeat.
 *
 * So now we will make the CPU do those things.
 * We'll start off by teaching it how to access memory, and then we will
 * define the instruction set.
 */

impl Core {
    /* Helper to read a number from the specified memory address. */
    fn load(&self, addr: u16) -> u16 {
        let a = addr as usize;

        /* We immediately run into trouble because we are using byte-addressed
         * memory as mentioned earlier.
         *
         * Each memory location stores 8 bits (a byte)
         *
         * Our CPU operates on 16 bit values and we want each memory operation
         * to read/write 16 bits at a time for efficiency reasons.
         *
         * What do we do?
         *
         * This CPU chooses to do the following:
         * - Read the low  byte of the 16-bit number from address a
         * - Read the high byte of the 16-bit number from address a+1
         *
         * 16 bit number in CPU: [00000000 00000001]        = 1
         *                        |        |
         *                        |        memory address a = 1
         *                        |
         *                        memory address a+1        = 0
         *
         * This is called "little endian" because the low byte comes first.
         *
         * We could have just as easily done the opposite:
         * - Read the high byte of the 16-bit number from address a
         * - Read the low  byte of the 16-bit number from address a+1
         *
         * 16 bit number in CPU: [00000000 00000001]          = 1
         *                        |        |
         *                        |        memory address a+1 = 1
         *                        |
         *                        memory address a            = 0
         *
         * This is called "big endian" because the high byte comes first.
         */

        return u16::from_le_bytes(self.ram[a..=a+1].try_into().unwrap());

        /* The le in this function call stands for little-endian. */
    }

    /* Helper to write a number to the specified memory address. */
    fn store(&mut self, addr: u16, val: u16) {
        let a = addr as usize;
        self.ram[a..=a+1].copy_from_slice(&val.to_le_bytes());
    }

    /* With that taken care of, we can get around to defining the CPU's
     * instruction set.
     *
     * Each instruction on this CPU will be the same size, 16 bits, for
     * the following reasons:
     *
     * 1. Instruction fetch always completes in 1 read.  You never have to
     *    go back and fetch more bytes.
     *
     * 2. If you put the first instruction at an even numbered address then
     *    you know all the rest of the instructions will also be at even
     *    numbered addresses.  I will take advantage of this later.
     *
     * 3. A variable length encoding would save space but 2 bytes per
     *    instruction is already pretty small so it doesn't matter very much.
     *
     * Here are the instructions I picked.
     *
     *  CALL
     *  ------------------------------------------------------------+----
     *  | n   n   n   n   n   n   n   n   n   n   n   n   n   n   n | 0 |
     *  ------------------------------------------------------------+----
     *
     * What CALL does:
     * ---------------
     *  - Push instruction pointer onto the return stack.
     *  - Set instruction pointer to address nnnnnnnnnnnnnnn0.
     *
     * This lets you call a subroutine at any even numbered address
     * from 0 to 65534.
     *
     * Why this is useful:
     * -------------------
     * Together with the return stack, CALL lets you call subroutines.
     *
     * A subroutine is a list of instructions that does something
     * useful and then returns control to the caller.
     *
     * For example:
     *
     * Address   Instruction   Meaning
     * 100 ->           200    Call 200
     * 102 ->           ???    Add the top two values on the data stack.
     * ...
     * 200 ->           ???    Push the value 3 onto the data stack
     * 202 ->           ???    Push the value 4 onto the data stack
     * 204 ->           ???    Return to caller
     *
     * Don't worry about the other instructions I am using here.  I will
     * define them later.
     *
     * I mostly want to point out the three instructions that I put
     * at address 200 because they are a subroutine,
     * a small self contained piece of code (6 bytes) that
     * performs a specific task.
     *
     * Do you think it's cool that you can count exactly how many bytes it
     * took?  I think it's cool.
     *
     * Here is what happens when the CPU begins execution at address 100.
     *
     * Address   Data stack   Return stack
     * 100       []           []    <--- About to call subroutine...
     * 200       []           [102]
     * 202       [3]          [102]
     * 204       [3 4]        [102] <--- About to return from subroutine...
     * 102       [3 4]        []
     * 104       [5]          []
     *
     * The return stack is there to make sure that returning from a subroutine
     * goes back to where it came from.  We will talk more about the return
     * stack later when we talk about the RET instruction.
     *
     * Limitations of CALL:
     * --------------------
     * This CPU cannot call an instruction that starts at an odd address.
     * a.k.a. "unaligned call" is impossible.
     *
     * At first this seems like a limitation, but it really isn't.
     * If you put the first instruction at an even numbered address then
     * all the rest of the instructions will also be at even numbered
     * addresses.  So this works fine.
     *
     * Of course if you intersperse instructions and data in memory...
     *            _________
     *  ________ |_________| _____________
     * |________|    Data   |_____________|
     * Instructions         More instructions
     *
     * ...then you will have to be careful to make sure the second block
     * of instructions also starts at an even numbered address.
     * You might need to include an extra byte of data as "padding".
     *
     *  Data processing instructions
     *  --------------------------------------------+---------------+----
     *  | 1   1   1   1   1   1   1   1   1   1   1 | x   x   x   x | 0 |
     *  --------------------------------------------+---------------+----
     * Sixteen of the even numbers are reserved for additional instructions
     * that will be be described later.
     *
     * The even numbers 1111111111100000 to 1111111111111110 (65504 to 65534)
     * are reserved for these instructions.  This means that CALL 65504 through
     * CALL 65534 are not possible.  Put another way, it is not possible to
     * call a subroutine living in the top 32 bytes of memory.  This is not a
     * very severe limitation.
     *
     *  LITERAL
     *  ------------------------------------------------------------+----
     *  | n   n   n   n   n   n   n   n   n   n   n   n   n   n   n | 1 |
     *  ------------------------------------------------------------+----
     *
     * What LITERAL does
     * -----------------
     * - Place the value 0nnnnnnnnnnnnnnn on the data stack.
     *
     * Why this is useful:
     * -------------------
     * Program will often need to deal with constant numbers.
     * For example, you might want to add 2 to a memory address (to move
     * on to the next even-numbered address) or add 32 to a character code
     * (to convert an uppercase letter to lowercase).  These constants have
     * to come from somewhere.
     *
     * Limitations of LITERAL:
     * -----------------------
     * To differentiate it from a call, this instruction is always an
     * odd number.  The trailing 1 is discarded before placing the number on
     * the data stack.  This missing bit means that only 2^15 values can be
     * represented (0 to 32767).  32768 on up cannot be stored directly.
     * You would need to do some follow-up math to get these numbers.
     * The most direct way is to use the INV instruction, described later.
     */

     /* Now that the instruction set is generally described
      * let's look at the code that implements it */

    fn step(&mut self) {

        /* 1. Fetch the instruction.
         * Also advance ip to point at the next instruction for next time. */

        let opcode = self.load(self.ip);
        self.ip = self.ip.wrapping_add(2);

        /* 2. Decode and execute the instruction */

        if (opcode >= 0xffe0) && (opcode & 1 == 0) {
            /* Data processing instruction */

            PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self);

            /* These instructions get looked up in a table.  The bit
             * math converts the instruction code into an index in the
             * table as follows:
             *
             * 0xffe0 --> 0
             * 0xffe2 --> 1
             * ...
             * 0xfffe --> 15
             *
             * The table will be described below, and these instructions
             * explained.
             */
        }
        else if (opcode & 1) == 1 {
            /* Literal */
            self.dstack.push(opcode >> 1);
        }
        else {
            /* Call */
            self.rstack.push(self.ip);
            self.ip = opcode;
        }
    }
}

/* The names of the 16 remaining CPU instructions */
enum Op {
    RET = 0xffe0, TOR = 0xffe2, RTO = 0xffe4, LD  = 0xffe6,
    ST  = 0xffe8, DUP = 0xffea, SWP = 0xffec, DRP = 0xffee,
    Q   = 0xfff0, ADD = 0xfff2, SFT = 0xfff4, OR  = 0xfff6,
    AND = 0xfff8, INV = 0xfffa, GEQ = 0xfffc, IO  = 0xfffe,
}

type Primitive = fn(&mut Core);

/* A table of functions for each of the 16 remaining CPU instructions */
const PRIMITIVES: [Primitive; 16] = [
    /* Return-stack instructions */
    | x | {
        /* RET - Return from subroutine */
        x.ip = x.rstack.pop()
    },
    | x | {
        /* TOR - Transfer number from data stack to return stack */
        x.rstack.push(x.dstack.pop())
    },
    | x | {
        /* RTO - Transfer number from return stack to data stack */
        x.dstack.push(x.rstack.pop())
    },
    /* Memory instructions */
    | x | {
        /* LD - Load number from memory address specified on the data stack */
        let a = x.dstack.pop();
        x.dstack.push(x.load(a));
    },
    | x | {
        /* ST - Store number to memory address specified on the data stack */
        let a = x.dstack.pop();
        let v = x.dstack.pop();
        x.store(a, v);
    },

    /* Stack shuffling instructions
     *
     * Remember the problem of "register allocation" mentioned earlier,
     * and how stack machines are supposed to avoid that problem?  Well,
     * nothing comes for free.  Stack machines can only process the top
     * value(s) on the stack.  So sometimes you will have to do some work
     * to "unbury" a crucial value and move it to the top of the stack.
     * That's what these instructions are for.
     *
     * Their use will become more obvious when we start programming the
     * machine, soon.
     */

    | x | {
        /* DUP - Duplicate the top number on the data stack */
        let v = x.dstack.pop();
        x.dstack.push(v);
        x.dstack.push(v);
    },
    | x | {
        /* SWP - Exchange the top two numbers on the data stack */
        let v1 = x.dstack.pop();
        let v2 = x.dstack.pop();
        x.dstack.push(v1);
        x.dstack.push(v2);
    },
    | x | {
        /* DRP - Discard the top number on the data stack */
        let _ = x.dstack.pop();
    },
    /* Conditional skip instruction */
    | x | {
        /* Q - If the top number on the data stack is zero, skip the next
         * instruction.
         *
         * Note Q is the only "decision-making" instruction that our CPU
         * has.  This means that all "if-then" logic, counted loops, etc.
         * will be built using Q.
         */

        let f = x.dstack.pop();
        if f == 0 {
            x.ip = x.ip.wrapping_add(2)

            /* Because all of our instructions are two bytes, adding two
             * to the instruction pointer skips the next instruction. */
        };
    },
    /* Arithmetic and logic */
    | x | {
        /* ADD - Sum the top two numbers on the data stack. */
        let v1 = x.dstack.pop();
        let v2 = x.dstack.pop();
        x.dstack.push(v1.wrapping_add(v2));
    },
    | x | {
        /* SFT - Bit shift number left or right by the specified amount.
         * A positive shift amount will shift left, negative will shift right.
         */

        let amt = x.dstack.pop();
        let val = x.dstack.pop();
        x.dstack.push(
            if amt <= 0xf {
                val << amt
            } else if amt >= 0xfff0 {
                val >> (0xffff - amt + 1)
            } else {
                0
            }
        );
    },
    | x | { // OR - Bitwise-or the top two numbers on the data stack.
        let v1 = x.dstack.pop();
        let v2 = x.dstack.pop();
        x.dstack.push(v1 | v2);
    },
    | x | { // AND - Bitwise-and the top two numbers on the data stack.
        let v1 = x.dstack.pop();
        let v2 = x.dstack.pop();
        x.dstack.push(v1 & v2);
    },
    | x | { // INV - Bitwise-invert the top number on the data stack.
        let v1 = x.dstack.pop();
        x.dstack.push(!v1);

        /* You can use the INV instruction to compensate for the LITERAL
         * instruction's inability to encode constants 32768 to 65535.
         * Use two instructions instead:
         * - LITERAL the complement of your desired constant
         * - INV
         *
         * For example, LITERAL(0) INV yields 65535 (signed -1)
         * For example, LITERAL(1) INV yields 65534 (signed -2)
         * etc.
         */
    },
    | x | { // GEQ - Unsigned-compare the top two items on the data stack.
        let v2 = x.dstack.pop();
        let v1 = x.dstack.pop();
        x.dstack.push(if v1 >= v2 { 0xffff } else { 0 });
    },

    /* Input/output.
     *
     * The CPU needs some way to communicate with the outside world.
     *
     * Some machines use memory mapped IO where certain memory addresses are
     * routed to hardware devices instead of main memory.  This machine already
     * has the full 64K of memory connected so no address space is readily
     * available for hardware devices.
     *
     * Instead we define a separate input-output space of 65536 possible
     * locations.  Each of these possible locations is called an IO "port".
     *
     * For a real CPU you could hook up hardware such as a serial
     * transmitter that sends data to a computer terminal, or just an
     * output pin controller that is wired to a light bulb.
     *
     * This is a fake software CPU so I am going to hook it up to
     * stdin and stdout.
     */

    | x | { // IO - Write/read a number from/to input/output port.
        let port = x.dstack.pop();

        /* I'm loosely following a pattern in which even ports are inputs
         * and odd ports are outputs.  But each port acts different.
         * In a hardware CPU this would not be suitable but it is fine for
         * a software emulation.
         */

        match port {
            0 => {
                /* Push a character from stdin onto the data stack */
                let mut buf: [u8; 1] = [0];
                let _ = io::stdin().read(&mut buf);
                x.dstack.push(buf[0] as u16);
                /* You are welcome to make your own computer that supports
                 * utf-8, but this one does not. */
            }
            1 => {
                /* Pop a character from the data stack to stdout */
                let val  = x.dstack.pop();
                print!("{}", ((val & 0xff) as u8) as char);
                let _ = io::stdout().flush();
            }
            2 => {
                /* Dump CPU status.
                 * Like the front panel with the blinking lights that Chuck
                 * talked about. */

                println!("{:?} {:?}", x.ip, x.dstack);
                let _ = io::stdout().flush();
            }
            _ => {}
        }
    }
];

/* ---------------------------------------------------------------------------
 *                            Part 2 - The Program
 *  ------------------------------------------------------------------------ */

/* You now have an unfamiliar computer with no software.  Can you and the
 * computer write a program?
 *
 * The first program is the hardest to write because you don't have any tools
 * to help write it.  The computer itself is going to be no help.  Without any
 * program it will sit there doing nothing.
 *
 * What should the first program be?
 * A natural choice would be a tool that helps you program more easily.
 *
 * An interactive programming environment needs to let you do 2 things:
 *
 * 1. Call subroutines by typing their name at the keyboard
 * 2. Define new subroutines in terms of existing ones
 *
 * Begin with step 1:
 * Call subroutines by typing their name at the keyboard
 *
 * This is where we will meet Forth.
 *
 * Our interactive programming environment will be a small language in the
 * Forth family.  If you want to learn how to implement a full featured Forth,
 * please read Jonesforth, and Brad Rodriguez' series of articles "Moving
 * Forth".  The small Forth I write below will probably help you understand
 * those Forths a little better.
 *
 * Forth organizes all the computer's memory as a "dictionary" of subroutines.
 * The point of the dictionary is to give each subroutine a name so you
 * can run a subroutine by typing its name.  The computer will look up its
 * address for you and call it.
 *
 * The dictionary starts at a low address and grows towards high addresses.
 * It is organized as a linked list, like this:
 *
 * [Link field][Name][Code .......... ]
 *  ^
 *  |
 * [Link field][Name][Code ...... ]
 *  ^
 *  |
 * [Link field][Name][Code ............... ]
 *
 * The reason it is a linked list is to allow each list entry to be a
 * different length.
 *
 * Each dictionary entry contains three things:
 *
 * - "Link field": The address of the previous dictionary entry.
 *                 For the first dictionary entry this field is 0.
 *
 * - "Name": A few letters to name this dictionary entry.
 *           Later you will type this name at the keyboard to call up
 *           this dictionary entry.
 *
 * - "Code": A subroutine to execute when you call up this dictionary
 *           entry.  This is a list of CPU instructions.  Note that one
 *           of the CPU instructions is "call".  So you can have a subroutine
 *           that call other subroutines, or calls itself.
 *
 *           This code should end with a return (RET) instruction.
 *
 *           Example subroutine:
 *
 *           Number Instruction  Meaning
 *           ------ -----------  -------
 *           7      Literal(3)   Push the value 3 onto the data stack
 *           9      Literal(4)   Push the value 4 onto the data stack
 *           65504  RET          Return to caller
 *
 * A linked list is not a very fast data structure but this doesn't really
 * matter because dictionary lookup doesn't need to be fast.  Lookups are
 * for converting text you typed at the keyboard to subroutine addresses.
 * You can't type very fast compared to a computer so this lookup doesn't
 * need to be fast.
 *
 * In addition to the linked list itself, you will need a couple of
 * variables to keep track of where the dictionary is in memory:
 *
 * - Dictionary pointer:  The address of the newest dictionary entry.
 * - Here:                The address of the first unused memory location,
 *                        which comes just after the newest dictionary entry.
 *
 * [Link field][Name][Code .......... ]
 *  ^
 *  |
 * [Link field][Name][Code ...... ]
 *  ^
 *  |
 * [Link field][Name][Code ............... ]
 *  ^                                       ^
 *  |                                       |
 * [Dictionary pointer]                    [Here]
 *
 * To create our Forth interactive programmming environment, we will start
 * by defining subroutines that:
 * - read names from the keyboard
 * - look up and execute dictionary entries by name
 *
 * We will put these subroutines themselves in the dictionary so they are
 * available for use once our interactive environment is up and running!
 *
 * If you were sitting in front of a minicomputer in 196x you would need
 * to create the dictionary with pencil and paper, but in 20xx we will
 * write a Rust program to help create the dictionary.
 *
 * First we need to keep track of where the dictionary is:
 */

struct Dict<'a> {
    dp: u16,   // The dictionary pointer
    here: u16, // The "here" variable
    c: &'a mut Core  // The dictionary lives in memory.  We are going to
                     // hang on to a mutable reference to the core to give
                     // us easy access to the memory.
}

/* Helpers to help put new routines in the dictionary */

enum Item {
    Literal(u16),
    Call(u16),
    Opcode(Op)
}
impl From<u16> for Item { fn from(a: u16) -> Self { Item::Call(a) } }
impl From<Op>  for Item { fn from(o: Op)  -> Self { Item::Opcode(o) } }

impl Dict<'_> {

    /* Helper to reserve space in the dictionary by advancing the "here"
     * pointer */

    fn allot(&mut self, n: u16) {
        self.here = self.here.wrapping_add(n);
    }

    /* Helper to append a 16 bit integer to the dictionary */

    fn comma(&mut self, val: u16) {
        self.c.store(self.here, val);
        self.allot(2);
    }

    /* Helper to append a CPU instruction to the dictionary */

    fn emit<T: Into<Item>>(&mut self, val: T) {
        match val.into() {
            Item::Call(val)    => { self.comma(val) }
            Item::Opcode(val)  => { self.comma(val as u16) }
            Item::Literal(val) => { assert!(val <= 0x7fff);
                                    self.comma((val << 1) | 1) }
        }
    }

    /* Helper to append a "name" field to the dictionary.  To save space and
     * to make each dictionary header a consistent size, I am choosing to not
     * store every letter of the name.  Instead I am storing only the length of
     * the name and then the first three letters of the name.
     *
     * That means these two names will compare equal:
     * - ALLOW (-> 5ALL)
     * - ALLOT (-> 5ALL)
     *
     * Even though their first three letters are the same, these two names
     * will compare unequal because they are different lengths:
     * - FORTH (-> 5FOR)
     * - FORGET (-> 6FOR)
     *
     * If a name is shorter than 3 letters it is padded out with spaces.
     * - X (-> 1X  )
     *
     * You can see that the name field is always four bytes regardless
     * of how many letters are in the name, and the link field is two bytes.
     * This means a dictionary header in this Forth is always six bytes.
     */

    fn name(&mut self, n: u8, val: [u8; 3]) {
        /* Store the length and the first character */
        self.comma(n as u16 | ((val[0] as u16) << 8));
        /* Store the next two characters */
        self.comma(val[1] as u16 | ((val[2] as u16) << 8));
    }

    /* Helper to append a new link field to the dictionary and update the
     * dictionary pointer appropriately. */

    fn entry(&mut self) {
        let here = self.here;
        self.comma(self.dp);
        self.dp = here;
    }
}

/* Now we can start building the dictionary. */
fn build_dictionary(c: &mut Core) {
    use Op::*;
    use Item::*;

    let mut d = Dict {
        dp: 0,  /* Nothing in the dictionary yet */

        here: 2,  /* Reserve address 0 as an "entry point", i.e. where the
                     CPU will jump to start running Forth.  We don't have a
                     Forth interpreter yet so we'll leave address 0 alone for
                     now and start the dictionary at address 2 instead. */
        c: c
    };

    /* Consider the following facts:
     * - The CPU knows how to execute a bunch of instructions strung together.
     * - Forth consists of a bunch of subroutine calls strung together.
     * - Subroutine CALL is a valid instruction of our CPU.
     *
     * This means that we can immediately begin programming our machine in
     * a language resembling Forth, just by writing a list of subroutine
     * calls into the dictionary.
     *
     * The line between "machine code program" and "Forth program" is
     * very blurry.  To illustrate:
     *
     * Here is a subroutine consisting of a few instructions strung together.
     *
     *       Instruction Number  Meaning
     *       ----------- ------  -------
     *       Literal(3)  7       Push the value 3 onto the data stack
     *       Literal(4)  9       Push the value 4 onto the data stack
     *       RET         65504   Return to caller
     *
     * Here is a Forth subroutine consisting of a few subroutine calls strung
     * together.
     *       Call        Number  Meaning
     *       ----------- ------  -------
     *       S1          1230    Call subroutine S1 which happens to live
     *                           at address 1230
     *       S2          1250    Call subroutine S2 which happens to live
     *                           at address 1250
     *       RET         65504   Return to caller
     *
     * Both of these are valid machine code programs (list of numbers that
     * our CPU can directly execute).
     *
     * This duality between CPU instructions and Forth code comes from
     * an idea called "subroutine threading".  It is a refinement of an
     * idea called "threaded code".  This has no relation to the kind of
     * threading that lets you run programs in parallel.  You can read more
     * about threaded code on Wikipedia or in the other Forth resources I
     * mentioned earlier (Jonesforth, and Moving Forth by Brad Rodriguez).
     *
     * Our new language starts out with the sixteen (well, eighteen)
     * instructions built into the CPU.  We can string those instructions
     * together into a new subroutine.  Each new subroutine adds to the
     * toolbox we have available for making the next new subroutine.
     * Repeat until you have built what you wanted to build, via
     * function composition.  This is the idea behind Forth.
     */

    /*
     * We are going to be writing many series of instructions so let's
     * start out by making a Rust macro that makes them easier to type
     * and lets us specify a CPU instruction vs. a subroutine call with
     * equal ease.
     *
     * The macro below will convert:
     *
     *     forth!(Literal(2), ADD, RET)
     *
     * to:
     *
     *     d.emit(Literal(2));
     *     d.emit(ADD);
     *     d.emit(RET);
     *
     * which you probably recognize as code that will add a new subroutine
     * to the dictionary.
     */
    macro_rules! forth {
        ($x:expr) => (d.emit($x));
        ($x:expr, $($y:expr),+) => (d.emit($x); forth!($($y),+))
    }

    /* Now we can add the first subroutine to the dictionary!
     *
     * key: Reads a character from the keyboard and places its character
     * code on the stack.
     *
     * There is a tradition of writing stack comments for Forth subroutines
     * to describe the stack effect of executing the subroutine.
     * They look like this: key ( -- n )
     *
     * Read as: key does not take any parameters off the stack, and leaves
     * one new number pushed onto the stack.
     *
     * Also remember that a dictionary entry looks like this:
     * [Link field][Name][Code .......... ]
     */

    // key ( -- n )
    d.entry();           /* Compile the link field into the dictionary */
    d.name(3, *b"key");  /* Compile the name field into the dictionary */
    let key = d.here;    /* (Save off the start address of the code so we
                            can call it later) */
    forth!(
        Literal(0),      /* Compile a LITERAL instruction that pushes
                            0 to the stack */

        IO,              /* Compile an IO instruction.
                          *
                          * Remember from the CPU code that IO takes a
                          * parameter on the stack to specify which port
                          * to use.
                          *
                          * Also remember that IO port 0 reads
                          * a character from standard input.
                          */

        RET              /* Compile a RET instruction */
    );

    /* We have now compiled the "key" subroutine into the dictionary.
     * [Link field][Name][Code .......... ]
     *        0000  3key  1, 65534, 65504
     *
     * The next subroutine we will make is "emit".  This is a companion
     * to "key" that works in the opposite direction.
     *
     * key ( -- n ) reads a character from stdin and pushes it to the stack.
     * emit ( n -- ) pops a character from the stack and writes it to stdout.
     */

    // emit ( n -- )
    d.entry(); d.name(4, *b"emi");  let emit = d.here;
    forth!(Literal(1), IO, RET);

    /* I am tired of saying "subroutine" so many times, so I am going to
     * introduce a new term.  Remember the goal our language is working
     * towards -- we want to be able to type a word at the keyboard, and
     * let the computer look it up in the dictionary and execute the
     * appropriate code.
     *
     * So far we have two named items in the dictionary, call and emit.
     *
     * We are going to term a named dictionary item a "word".
     * This is a Forth tradition.
     *
     * So call and emit are "words", or "dictionary words" if you want to be
     * precise about it.  So far these are the only words we've defined.
     *
     * Let's define some more words.
     */

    /* Our CPU does not have subtraction so let's make subtraction by adding
     * the two's complement.
     *
     * To get the two's complement, do a bitwise invert and add 1.
     *
     * This will be the most complicated Forth that we've written so far
     * so let's walk through step by step. */

    // - ( a b -- a-b )
    d.entry(); d.name(1, *b"-  ");  let sub = d.here;
    forth!(         /* Stack contents:  a b, to start off with.
                     * We want to compute a minus b */

        INV,        /* Bitwise invert the top item on the stack.
                     * Stack contents: a ~b */

        Literal(1), /* Push 1 onto the stack.
                     * Stack contents: a ~b 1 */

        ADD,        /* Add the top two items on the stack.
                     * Stack contents: a ~b+1
                     * Note that ~b+1 is the two's complement of b. */

        ADD,        /* Add the top two items on the stack.
                     * Stack contents: n
                     * Note that n = (a + ~b+1) = a - b */

        RET         /* Done, return to caller, leaving n on the data stack. */
    );

    /* Writing it out like that takes a lot of space.  Normally Forth code
     * is written on a single line, like this:
     *
     * INV 1 ADD ADD RET
     *
     * Looking at it this way, it's easy to see the new word we just
     * created (-) is made from 5 instructions.  It's pretty typical for
     * a Forth word to be made of 2-7 of them.  Beyond that length, things
     * get successively harder to understand, and it becomes a good idea
     * to split some work off into helper words.
     *
     * We will see an example of this below.
     */

    /* Our next word will be useful for Boolean logic.
     *
     * 0= ( n -- f )
     *
     * In a stack comment, "f" means "flag", a.k.a. Boolean value.
     * By Forth convention, zero is false and any nonzero value is true.
     * However the "best" value to use for a true flag is 65535 (all ones)
     * so the bitwise logical operations can double as Boolean logical
     * operations.
     *
     * So what 0= does is:
     * - if n=0,    leave on the stack f=65535
     * - otherwise, leave on the stack f=0
     *
     * It is like C's ! operator.
     *
     * In Rust this could be implemented as:
     *
     * fn zero_eq(n: u16) -> u16 {
     *     if (n == 0) {
     *         return 65535;
     *     } else {
     *         return 0;
     *     }
     * }
     *
     * Rust has an if-then and block scope, so this is easy to write.
     *
     * The literal translation to a typical register-machine assembly
     * language would look something like this:
     *
     * zero_eq:     compare r0, 0
     *              jump_eq is_zero
     *              move    r0, 0
     *              ret
     * is_zero:     move    r0, 65535
     *              ret
     *
     * It looks simple but I want to point out a couple things about it
     * that are not so simple.
     *
     * The conditional jump instruction, jump_eq.
     * ------------------------------------------
     * Our CPU doesn't have this.  The only decision-making instruction
     * we have is Q which is a conditional skip.
     *
     * Q - If the top number on the data stack is zero, skip the next
     * instruction.
     *
     * A conditional jump can go anywhere.  A conditional skip can only decide
     * whether or not to skip the next instruction (i.e., it is a fixed forward
     * jump of 2 bytes).  You cannot give Q a specific address to jump to, the
     * way jump_eq worked.
     *
     * So our CPU does not make it easy to jump around in a long block of
     * instructions -- our CPU prefers that you use subroutine calls.
     *
     * The forward reference
     * ---------------------
     * This is another problem.  Think of the job of an assembler which is
     * converting an assembly language program to machine code.  We are
     * currently writing our code in a tiny assembler that we made in Rust!  It
     * is very simple but so far it has worked for us.  The assembler of our
     * hypothetical register-machine below has a rather nasty problem to solve.
     *
     * zero_eq:     compare r0, 0
     *              jump_eq is_zero  <----- On this line.
     *              move    r0, 0
     *              ret
     * is_zero:     move    r0, 65535
     *              ret
     *
     * It wants to emit a jump to is_zero, but that symbol has not been seen
     * yet and is unrecognized.  On top of that, the assembler also doesn't yet
     * know what address is_zero will have, so doesn't know what jump target to
     * emit.  To successfully assemble that kind of program you would need an
     * assembler smarter than the assembler we made for ourselves in Rust.
     *
     * There are ways to solve this but let's NOT solve it.
     *
     * Our CPU has no jump instruction (only call) and our assembler only lets
     * us call things we already defined.  Instead of removing these
     * constraints, find a way to write 0= within the constraints.
     *
     * Here is a start at solving the problem
     *
     * is_nonzero ( -- 0 )
     *     Literal(0)
     *     RET
     *
     * 0= ( n -- f )
     *     Q            <-- pop n, if n=0 skip next instruction
     *     is_nonzero   <-- f=0 is now pushed to stack
     *     Literal(0)
     *     INV          <-- f=65535 is now pushed to stack
     *     RET          <-- Return
     *
     * We got rid of the forward reference by defining is_nonzero before it
     * was used.
     *
     * We got rid of the jump instruction by using a subroutine call instead.
     *
     * This code is close to working but it doesn't quite work.  The problem
     * is that is_nonzero gives control back to 0= when done, just like
     * a subroutine call normally does, and then 0= runs as normal until it
     * hits the return instruction at the end.
     * So we wind up executing both the f=0 branch and the f=65535 branch,
     * instead of just executing the f=0 branch like we wanted in this case.
     *
     * It is possible to fix this last problem by adding the instructions
     * RTO DRP to is_nonzero.
     *
     * is_nonzero ( -- 0 )
     *     RTO          <-- Pop the return address, push to data stack
     *     DRP          <-- Discard it
     *     Literal(0)   <-- Put 0 on the data stack
     *     RET          <-- Return
     *
     * Because we popped off and discarded one item from the return stack, the
     * final RET instruction will not return to 0= any more.  Instead it will
     * skip one level and return to whoever called 0=.  This has the result of
     * ending 0= early, which is what we wanted to do.
     *
     * 0= ( n -- f )
     *     Q            <-- pop n, if n=0 skip next instruction
     *     is_nonzero   <-- this word puts f=0 on the stack then ends 0= early
     *     Literal(0)
     *     INV          <-- f=65535 is now pushed to stack
     *     RET          <-- Return
     *
     * I call this pattern "return-from-caller".  It is used occasionally in
     * real Forth systems.  My dialect of Forth will use it extensively to work
     * around my CPU's lack of conditional jump.
     *
     * Now we've explained how 0= is going to work, let's write it.
     */

    /* First we define the helper.  It won't be reused, so I am not going
     * to bother giving it a dictionary header and name for easy lookup later.
     * Think of it as a private function. */

    let zero = d.here;
    forth!(Literal(0), RTO, DRP, RET);

    /* Now define 0= using the helper. */

    // 0= ( n -- f )
    d.entry(); d.name(2, *b"0= ");  let zero_eq = d.here;
    forth!(Q, zero, Literal(0), INV, RET);

    /* Next let's make a = equality comparison operator, using 0= and subtract.
     * I call it an "operator" because that's what other languages would
     * call it, but Forth has no special idea of an "operator".  Everything
     * is just words. */

    // = ( a b -- a=b )
    d.entry(); d.name(1, *b"=  ");  let eq = d.here;
    forth!(sub, zero_eq, RET);

    /* Note that 0= and subtract are both words, not CPU instructions.
     * This makes = the first "pure" Forth word we have defined, with no
     * direct dependency on the machine's instruction set.
     * We could define = as - 0= on a real standards-compliant Forth system
     * and it would still work.  So Forth gets you to the point of writing
     * "portable" code really quickly.  Often you can reuse routines early in
     * bootstrapping even though they were written and tested on a different
     * machine.  Many languages offer portability but few offer it so quickly.
     */

    /* -----------------------------------------------------------------------
     * Part 2a. The lexer
     *---------------------------------------------------------------------- */

    /* Now that we've got some basics in place let's go back to solving
     * the real problem of getting our language to read words from the
     * keyboard.  The first problem we have is that we need some way to
     * separate words from each other so we know where one word ends and the
     * next begins.  This problem is called "lexing".  Forth has about the
     * simplest lexer ever, it just splits on whitespace.  Anything with
     * character code <=32 is considered whitespace.  Words are delimited by
     * whitespace.  And that is all the syntax Forth has.
     *
     * To read a word from the keyboard you will need to:
     * 1. Advance past any leading whitespace
     * 2. Read characters into a buffer until whitespace is seen again.
     */

    /* Let's start with the "advance past leading whitespace" part
     *
     * The "key" word gives us the latest keystroke as an ASCII code.
     * (Really it is reading utf-8 characters one byte at a time but let's
     * not get into that right now, pretend the year is 196x, we're sitting
     * in front of a minicomputer and and utf-8 hasn't been invented yet.)
     *
     * ASCII codes 0 to 32 are whitespace or control characters.  Codes
     * 33 and up are letters, numbers and symbols.  So to skip whitespace
     * all you need to do is read keys until you get an ASCII code >= 33,
     * then return that to tell the rest of the program what key code you
     * saw.
     *
     * In Rust this could be implemented as:
     *
     * fn skipws() -> u16 {
     *     loop {
     *         let c = key();
     *         if c >= 33 {
     *             return c;
     *         }
     *     }
     * }
     *
     * Rust has a loop keyword, so this is easy to write.
     * (Alarm bells should be ringing in your head at this point because
     * we haven't put any looping constructs in our CPU or language.)
     *
     * The literal translation to a typical register-machine assembly
     * language would look something like this:
     *
     * skipws:      call key
     *              compare r0, 32
     *              jump_le skipws
     *              ret
     *
     * (More alarm bells should be ringing in your head because this is
     * using conditional jump, which our CPU doesn't have.)
     *
     * Like last time, is there a way to solve this without conditional
     * jump?
     *
     * Here is a start at solving the problem:
     *
     * skipws ( -- c )
     *     key          <-- Put keycode on the stack:           ( c )
     *     DUP          <-- Duplicate top value on the stack:   ( c c )
     *     Literal(33)  <-- Put 33 on the stack:                ( c c 33 )
     *     GEQ          <-- Is c >= 33?                         ( c f )
     *     Q            <-- If so...
     *     RET          <-- ... return, leaving c on the stack. ( c )
     *     DRP          <-- Discard c from the stack.           ( )
     *     skipws       <-- Call skipws again
     *
     *  You will notice there is no RET statement at the end of skipws.
     *  At the end of skipws we call skipws again.  This makes an infinite
     *  loop.  The only way out of the loop is the RET instruction in the
     *  middle.  This works similarly to the Rust code that uses a loop { }
     *  and breaks out when it sees the condition it's looking for.
     *
     *  Writing a word that calls itself is called "recursion".
     *
     *  This code almost works but there is still something wrong with it.
     *  Youll notice we were careful to make sure "skipws" removed all items
     *  it added to the data stack, before it called itself.  Its last two
     * lines were:
     *
     *  DRP    <-- Discard c from the stack
     *  skipws <-- Call skipws again
     *
     *  If we didn't do that, skipws would leave each whitespace character
     *  it saw, on the data stack, as it looped again and again.
     *  So instead of returning the first nonwhitespace character it would
     *  return EVERY character it saw.
     *
     * 1st recursion: data stack: ( c1 )
     * 2nd recursion: data stack: ( c1 c2 )
     * 3rd recursion: data stack: ( c1 c2 c3 )
     *
     * There are problems with this.  It's messy.  The caller has no idea
     * how many values we are going to leave on the stack, so has no idea
     * how many to pop off.  Also, we might see more than 16 whitespace
     * characters in a row, which would make weird things happen because
     * our CPU's data stack only has room for 16 numbers.
     *
     * For these reasons it's better to leave the data stack as we found it,
     * when we do a recursive call.  That is the reason the last two lines are
     * DRP, skipws -- it's to stop items building up on the data stack.  The
     * final pass through this function goes down a different path that does
     * not DRP, so it leaves something on the data stack -- the last key read.
     *
     * The problem skipws still has, is that we haven't taken the same care
     * with its return stack.
     *
     * At the first line of skipws the return stack looks like this:
     * ( caller )
     *
     * That's because skipws must have been called by our CPU's CALL
     * instruction (we have no other way of calling subroutines!), and the
     * CALL instruction leaves a return address on the top of the return
     * stack so RET knows where to return to at the end of the subroutine.
     *
     * But we are also using CALL for a different purpose:  to repeat skipws.
     * Every time we repeat skipws, the CALL instruction will push another
     * return address to the call stack.
     *
     *     DRP                                 return stack:( caller )
     *     skipws       <-- Call skipws again. return stack:( caller x )
     *     <-- This location has address x.
     *
     * first call:    return stack: ( caller )
     * 1st recursion: return stack: ( caller x )
     * 2nd recursion: return stack: ( caller x x )
     * 3rd recursion: return stack: ( caller x x x )
     *
     * Clearly all these x's are garbage.  When we are done with skipws we
     * want to return to our caller, not to x.
     *
     * We could patch over the problem somewhat by putting a RET instruction
     * at x.
     *
     *     DRP                                 return stack:( caller )
     *     skipws       <-- Call skipws again. return stack:( caller x )
     *     RET          <-- x
     *
     * This yields working recursive code.
     *
     * Each time we loop, a useless return address x is left on the return
     * stack.  When skipws wants to quit, skipws runs a RET instruction, which
     * transfers control to x.  x is the address of a RET instruction, left on
     * the stack earler.  So we wind up running RET RET RET ... until we burn
     * through all x's on the return stack and finally transfer control back to
     * caller.
     *
     * first call:    return stack: ( caller )         data stack: ( )
     * 1st recursion: return stack: ( caller x )       data stack: ( )
     * 2nd recursion: return stack: ( caller x x )     data stack: ( )
     * 3rd recursion: return stack: ( caller x x x )   data stack: ( c )
     * RET:         : return stack: ( caller x x )     data stack: ( c )
     * RET:         : return stack: ( caller x )       data stack: ( c )
     * RET:         : return stack: ( caller )         data stack: ( c )
     * RET:         < control is passed back to our caller,
     *                and now they can do stuff with the "c" on the data
     *                stack, yay >
     *
     * This works.  It isn't very fast but we don't care about speed right
     * now, just about getting our computer to work.
     *
     * But there is still a problem.
     *
     * Our CPU has a fixed-size circular return stack that can hold 32 numbers.
     * What happens if you loop 32 times or more?  The return stack fills up
     * completely with the useless "x" addresses, and the address of caller
     * is lost.
     *
     * recursive call N  :  return stack: ( caller x x x ... x )
     * recursive call N+1:  return stack: (      x x x x ... x )  :-(
     *
     * So skipping 32 or more whitespace characters in a row wouldn't work.
     * To fix that problem we need to find a way to stop the useless "x"
     * addresses from building up on the return stack.
     *
     * 1st loop: return stack: ( caller )   data stack: ( )
     * 2nd loop: return stack: ( caller )   data stack: ( )
     * 3rd loop: return stack: ( caller )   data stack: ( c )
     * RET:      < control is passed back to our caller >
     *
     * The most common solution is called "tail call optimization".
     * If a function's last instruction is a recursive call, that call can be
     * replaced with a jump.  On paper this doesn't work very well on our
     * computer, for two reasons:
     *
     * 1. Our CPU has no jump, only call.
     *
     * 2. Our assembler, and eventually our interactive environment, would need
     *    to be smart enough to emit a call sometimes and a jump other times.
     *    This is the same "look-ahead" problem that we saw with forward
     *    references -- you don't know that a given CALL will be followed by a
     *    RET, unless you can see the future.
     *
     *    Earlier we decided to keep our assembler very dumb so it would be
     *    weird to start making it smart now.
     *
     * So what are we going to do?
     *
     * It is possible to get a very, very dumb caveman version of tail call
     * optimization, by manually using the "return-from-caller" trick, RTO DRP,
     * to "get rid of" the x that is pushed on by the skipws CALL.
     *
     * skipws ( -- c ) RTO DRP ... Q RET ... skipws
     *
     * 1st loop: return stack: ( caller )   data stack: ( )
     * 2nd loop: return stack: ( )          data stack: ( )
     * 3rd loop: return stack: ( )          data stack: ( )
     *
     * So now recursive calls will leave the return-stack as they found it,
     * which is good!  We don't have the useless-x problem any more.
     * Unfortunately, the first pass through skipws discards the original
     * caller's return address, which we wanted to keep.  There is a quick
     * hack around that problem: wrap skipws in another subroutine, and
     * always call it through that wrapper.
     *
     * skipws ( -- c ) RTO DRP ... Q RET ... skipws
     *
     * wrapper ( -- c ) skipws RET
     *
     * The RET in skipws returns from wrapper, but that's ok.
     *
     * Finally we are able to write loops, and we did not even need to add
     * anything to our language or CPU to get that to work, we just needed to
     * look at things differently.  Learning to look at things differently is a
     * big part of the Forth philosophy.
     *
     * We'll see a better way of solving this problem later, in the file
     * frustration.4th, but for now this is good enough and we can get back to
     * solving our original problem, skipping whitespace.
     */

    /* You should now understand what the next two functions are doing
     * because we just talked about them at length.  In the real program
     * I swapped the names of the two functions because I wanted to let the
     * wrapper have the friendly "skipws" name. */

    let skip_helper = d.here;
    forth!(RTO, DRP, key, DUP, Literal(33), GEQ, Q, RET, DRP, skip_helper);

    // skipws ( -- c )
    d.entry(); d.name(6, *b"ski");  let skipws = d.here;
    forth!(skip_helper);

    /* Step 1 of the lexer is now working!
     * We can now discard whitespace characters typed at the keyboard,
     * i.e. advance to the first character of a word.
     */

    /* The next stage of lexing is once again going to be more complicated than
     * any code we've written before, so we are going to need some more helper
     * words.
     *
     * Until now, we have been able to structure our code in such a way that
     * the next value we need is conveniently stored at the top of the stack.
     * The most we've had to do is either DUPlicate this value or DRP it
     * because it's no longer needed.  In more complicated code, sometimes we
     * will need to "dig through" the values on the stack to surface the one we
     * want to use next.  This is inefficient and ugly so we will do it as
     * little as possible, but it will soon be necessary.
     *
     * The CPU instruction SWP does stack shuffling by swapping the first
     * two values on the data stack.  We already have SWP (it's built into the
     * CPU) but I will write out its stack effect below as a recap of what it
     * does.
     *
     * SWP ( a b -- b a ).
     *
     * The problem with SWP is that it can only reach the top two values
     * on the stack.  If you wanted to dig further, you couldn't do it with
     * SWP.
     *
     * One way of digging further is by using the RTO and TOR instructions
     * as demonstrated below in the "over" word.
     */

    // over ( a b -- a b a )
    d.entry(); d.name(4, *b"ove");  let over = d.here;
    forth!(TOR,  /* data stack: ( a )      return stack: ( caller b ) */
           DUP,  /* data stack: ( a a )    return stack: ( caller b ) */
           RTO,  /* data stack: ( a a b )  return stack: ( caller ) */
           SWP,  /* data stack: ( a b a )  return stack: ( caller ) */
           RET);

    /* "over" is a good building block for further stack shuffling words. */

    // 2dup ( a b -- a b a b )
    d.entry(); d.name(4, *b"2du");  let twodup = d.here;
    forth!(over, over, RET);

    /* Now we can get back to writing the lexer.  Step 2 of lexing is "Read
     * characters into a buffer until whitespace is seen again", and once that
     * works we will be done writing the lexer!
     *
     * Start by setting aside the word input buffer.  We'll format it as Nabcde
     * where N is the number of characters stored.
     */

    let word_buf = d.here;
    d.allot(6);

    /* It may seem strange to be plopping this down in the middle of the
     * dictionary but it will work fine, just as long as we're setting aside
     * an even number of bytes.  As mentioned earlier, if you intersperse
     * instructions and data in memory...
     *            _________
     *  ________ |_________| _____________
     * |________|    Data   |_____________|
     * Instructions         More instructions
     *
     * ...then you will have to be careful to make sure the second block
     * of instructions also starts at an even numbered address.
     * You might need to include an extra byte of data as "padding".
     *
     * In this case we set aside one byte for length and five bytes for
     * characters, which is a total of six bytes, so no padding is needed.
     */

    /* We are about to do some buffer handling so we want bounds checking.
     * Let's write a min-value word.  It will look at the top two items
     * on the stack and return whichever is less.
     *
     * This word is simple enough that I'm not going to walk through it
     * like I did with some of the earlier words.  If you want to understand
     * how it works I recommend walking through it on paper or in your head.
     * With a little practice this will become as natural as walking through
     * code in any other language.
     */

    // min ( a b -- n )
    d.entry(); d.name(3, *b"min");  let min = d.here;
    forth!(twodup, GEQ, Q, SWP, DRP, RET);

    /* We want to access the buffer byte-by-byte, but our machine only
     * accesses memory 16 bits at a time.
     *
     * Reading one byte at a time is pretty easy, just do a 16-bit read and
     * discard the high byte with Literal(0xFF) AND. */

    // c@ ( a -- n )
    d.entry(); d.name(2, *b"c@ ");  let cld = d.here;
    forth!(LD, Literal(0xff), AND, RET);

    /* To write one byte at a time, we'll take the approach of reading two
     * bytes, editing just the low byte, and then writing the full two-byte
     * value back to memory.  The high byte gets unnecessarily rewritten but
     * we are writing back its old value so no one will know the difference.
     *
     * If our CPU was multi-core, or had interrupts, there could be some
     * problems with this approach (search the Internet for "non-atomic
     * read-modify-write"), but ours isn't, so we are fine.
     */

    // c! ( n a -- )
    d.entry(); d.name(2, *b"c! ");  let cst = d.here;
    forth!(DUP,                 /* ( n a a )              r: ( caller )   */
           LD,                  /* ( n a old-n )          r: ( caller )   */
           Literal(0xff), INV,  /* ( n a old-n 0xff00 )   r: ( caller )   */
           AND,                 /* ( n a old-highbyte )   r: ( caller )   */
           SWP, TOR,            /* ( n old-highbyte )     r: ( caller a ) */
           OR,                  /* ( new-n )              r: ( caller a ) */
           RTO,                 /* ( new-n )              r: ( caller )   */
           ST,                  /* ( )                    r: ( caller )   */
           RET);

    /* Load 1 letter into the buffer. */
    let stchar = d.here;
    forth!(Literal(word_buf), cld,  /* Retrieve the first byte of the buffer,
                                       i.e. its current length. */

           Literal(1), ADD,         /* Increment the length. */

           DUP, Literal(word_buf), cst,  /* Write-back the incremented length
                                            to the first byte of the buffer */

           /* Decide where to store the letter in the buffer.
            *
            * The 1st letter should be stored 1 byte past the buffer start
            *   (to leave room for the length).
            *
            * The 2nd letter should be stored 2 bytes past the buffer start
            * ...
            * The 5th letter should be stored 5 bytes past the buffer start.
            *
            * Any letters beyond the 5th will also be stored in the 5th slot
            * overwriting whatever letter was seen there previously.  This
            * is fine because only the first 3 letters of the word are
            * significant anyway.  What's important is that we not overrun
            * the buffer and corrupt adjacent parts of the dictionary.
            */
           Literal(5), min, Literal(word_buf), ADD,

           cst,  /* Store the letter in the buffer */
           RET);

    /* Function to load letters into buffer until whitespace is hit again.
     * Return the whitespace character that was seen.
     *
     * This will tail-recursively call the function we just wrote, until
     * whitespace is seen again (a character code that is <= 32).
     */

    let getcs_helper = d.here;
    forth!(RTO, DRP, /* The "return-from-caller" trick */
           stchar,
           key, DUP, Literal(32), SWP, GEQ, Q, RET,
           getcs_helper);

    // getcs ( -- c )
    d.entry(); d.name(5, *b"get");  let getcs = d.here;
    forth!(getcs_helper, RET);

    /* The lexer is almost done, now we'll write the word that the rest of the
     * program will use to call it.
     *
     * This word is named "word".
     *
     * First, it clears word_buf by setting its length byte to 0 and
     * padding out the first three name bytes by setting them to 32 (space).
     *
     * Then, reads a word from the keyboard into the word_buf.
     */

    // word ( -- )
    d.entry(); d.name(4, *b"wor");  let word = d.here;
    forth!(
        Literal(word_buf),    /* Address of word_buf */

        DUP, Literal(2), ADD, /* Address of word_buf + 2 */

        Literal(0x2020), SWP, ST, /* Set name bytes 2 and 1 to space */

        Literal(0x2000), SWP, ST, /* Set name byte 0 to space and
                                        set length to zero */

        skipws, /* Lexer step 1, skip leading whitespace */

        getcs,  /* Lexer step 2, read letters into buffer until whitespace
                      is seen again */

        DRP,    /* We don't care what whitespace character was last seen
                      so drop it */
        RET);

    /* The lexer is now complete: we can read space-delimited words from
     * the keyboard.
     *
     * This took a long while, because we had to figure out how to do things
     * like branching and looping, while also figuring out how to write the
     * lexer itself.
     * But now our dictionary is filled with useful helper words so our next
     * steps will be faster to write.
     */

    /* Let's move on to dictionary lookup, so we can do something useful with
     * the space-delimited words we now know how to read from the keyboard.
     *
     * To do dictionary lookup we first need to keep track of where the
     * dictionary is, so let's teach Forth about the dictionary pointer (dp)
     * variable that we've so far been tracking in Rust.
     *
     * The traditional Forth name for this variable is "latest".
     */

    // latest ( -- a )
    /* Address of "latest" variable.  This variable stores the address of
     * the latest word in the dictionary. */
    let latest_ptr = d.here; d.allot(2);
    d.entry(); d.name(6, *b"lat");  let latest = d.here;
    forth!(Literal(latest_ptr), RET);

    /* Now we will write "find" which is the word that does dictionary
     * lookup.  Dictionary lookup is a linked list traversal starting
     * at latest (the end of the dictionary).  For each dictionary entry, we
     * compare its name against the name that "word" placed in the input
     * buffer.  If it matches, we return the address of this dictionary entry's
     * code field.  Otherwise we advance to the previous dictionary entry and
     * try again.  If we don't match anything before we hit address 0 (the
     * start of the dictionary) that means the name in the input buffer
     * was not found in the dictionary.
     *
     * The stack effect of find will be:
     *
     * find ( -- xt|0 )
     *
     * It's time to explain a couple more conventions often used in stack
     * effect comments:
     *
     * - xt is "execution token".  In our Forth, "execution token" just means
     *   the address of some code.
     *
     * - A vertical bar | means "or".  So find will return either an execution
     *   token, or 0 if no execution token is found.
     */

    /* Helper word ( a -- f )
     */
    let matches = d.here;
    forth!(
        /* Stash the address of the name field by putting it on the
         * return stack
         */
        Literal(2), ADD, TOR,

        /* Load the 4 bytes at word_buf */
        Literal(word_buf), DUP, Literal(2), ADD, LD, SWP, LD,

        /* Load the first 2 bytes of the name field */
        RTO, DUP, TOR, LD,

        /* Compare to the first 2 bytes at word_buf.
         * Don't worry about that bitwise AND: it will be explained later
         * when we are adding "immediate" words to the outer interpreter.
         */
        Literal(0x0080), INV, AND, eq,

        /* Compare the second 2 bytes of the name field to the second
         * 2 bytes at word_buf
         */
        SWP, RTO, Literal(2), ADD, LD, eq,

        /* If both comparisons were true, return true, else return false */
        AND, RET);

    /* Helper word ( a -- a' )
     */
    let matched = d.here;
    forth!(
        Literal(6), ADD,  /* Advance six bytes (the length of the dictionary
                             header).  This advances from the start of the
                             header to the address of the code field. */

         RTO, DRP,         /* Return-from-caller */
         RET);

    let find_helper = d.here;
    forth!(
        RTO, DRP,
        DUP, Literal(0), eq, Q, RET, /* No match - return 0 */
        DUP, matches, Q, matched,    /* Match - return the code address */
        LD, find_helper);            /* Try the next one */

    /* And find itself is just a wrapper around the tail-recursive
     * find_helper word. */

    // find ( -- xt|0 )
    d.entry(); d.name(4, *b"fin");  let find = d.here;
    forth!(latest, LD, find_helper);

    /* The ' (quote) word reads the next word from the keyboard and then looks
     * it up in the dictionary.  It works very similarly to the "address-of"
     * operator in C.  ' fn in Forth is like &fn in C.
     */

    // ' ( -- xt|0 )
    d.entry(); d.name(1, *b"'  ");  let quote = d.here;
    forth!(word, find, RET);

    /* -----------------------------------------------------------------------
     * Part 2b. The outer interpreter
     *---------------------------------------------------------------------- */

    /* We can now look up a subroutine in the dictionary by typing its name
     * at the keyboard.
     *
     * Remember that an interactive programming environment needs to let you
     * do two things:
     *
     * 1. Call subroutines by typing their name at the keyboard
     * 2. Define new subroutines in terms of existing ones
     *
     * We're also going to succumb to temptation at this point and add a third
     * feature to our language.
     *
     * 3. Push numbers onto the data stack by typing them at the keyboard
     *
     * We haven't achieved any of these three goals yet, but we now have all
     * of the building blocks we need to do so.
     */

    /* To add words to the dictionary we'll need to keep track of where the
     * end of the dictionary is, so let's teach Forth about the "here"
     * variable that we've so far been tracking in Rust.
     */

    // here ( -- a )
    /* Address of "here" variable.  This variable stores the address of
       the first free space in the dictionary */
    let here_ptr = d.here; d.allot(2);
    d.entry(); d.name(4, *b"her");  let here = d.here;
    forth!(Literal(here_ptr), RET);

    /* Let's talk a little bit about how we are going to make our Forth
     * interactive.  We want to do one of two things:
     *
     * 1. Call subroutines by typing their name at the keyboard
     * 2. Define new subroutines in terms of existing ones
     *
     * Both of these things are structurally similar.  We can solve either
     * problem by reading a list of words from the keyboard and doing something
     * with each word.
     *
     * First we look up the word in the dictionary, then we either:
     * 1. Execute it right now        (if we are in interpreting mode).
     * 2. Append it to the dictionary (if we are in compiling mode).
     *
     * Numbers can be handled in a similar way.  If we encounter a number
     * in interpreting mode, we'll put it on the stack.  If we encounter a
     * number in compiling mode, we'll compile a LITERAL instruction that
     * will put the number on the stack when executed.
     *
     * It seems a pretty good bet that we'll be able to solve our problem
     * with an interpreting/compiling mode flag, so let's make one.
     */

    // state ( -- a )
    /* Address of "state" variable.  This variable stores -1 if
     * interpreting or 0 if compiling. */
    let state_ptr = d.here; d.allot(2);
    d.entry(); d.name(5, *b"sta");  let state = d.here;
    forth!(Literal(state_ptr), RET);

    /* We need a way of switching between interpreting and compiling mode.
     *
     * If you are interpreting, this is easy -- just write 0 to state.
     *
     * If you are compiling, it is not so easy to go back into interpreting
     * mode, because everything you type gets compiled.  There is no way to
     * execute a word when you are in compiling mode, so you are stuck
     * compiling forever.
     *
     * What if there was a way to execute a word in compiling mode?
     *
     * We will define a special category of words called "immediate" words
     * that are executed whenever they are seen, even if you are in compiling
     * mode.
     *
     * We will mark a word as "immediate" by setting the high bit of the
     * length byte, in the name field of its dictionary entry.
     *
     * ----+---+---+---+---+---+---+---+
     * | i | n | n | n | n | n | n | n |
     * ----+---+---+---+---+---+---+---+
     * - nnnnnnn = length (0 to 127)
     * - i       = "immediate" bit (1 = immediate, 0 = ordinary)
     *
     * Do you remember the bit math in "find" that I told you to not worry
     * about just yet?
     *
     * Literal(0x0080), INV, AND
     *
     * This math was masking out the "immediate" flag so it would not interfere
     * with dictionary lookup.
     */

    /* Helper function to get the address of the latest dictionary entry */
    let word_addr = d.here;
    forth!(Literal(latest_ptr), LD, Literal(2), ADD, RET);

    // immediate ( -- )
    /* Set the "immediate" flag on the latest dictionary entry */
    d.entry(); d.name(9, *b"imm");
    forth!(word_addr, DUP, LD, Literal(0x0080), OR, SWP, ST, RET);

    /* Now we can define words to switch between interpreting and compiling
     * mode.  The names [ and ] are traditional Forth names. */

    // [ ( -- )
    d.entry();

    d.name(
        1 | 0x80,  /* In Rust we do not have access to the handy "immediate"
                      function, but we can make a word "immediate" by setting
                      the high bit in its length field, as is done here. */
        *b"[  ");

    let lbracket = d.here;
    forth!(Literal(0), INV, state, ST, RET);

    // ] ( -- )
    d.entry(); d.name(1 | 0x80, *b"]  ");  let rbracket = d.here;
    forth!(Literal(0), state, ST, RET);

    /* By setting / unsetting a different bit of the name field we can
     * temporarily hide a word from name lookups.  We will talk more
     * about this later. */

    // smudge ( -- )
    d.entry(); d.name(6 | 0x80, *b"smu");  let smudge = d.here;
    forth!(word_addr, DUP, LD, Literal(0x0040), OR, SWP, ST, RET);

    // unsmudge ( -- )
    d.entry(); d.name(8 | 0x80, *b"uns");  let unsmudge = d.here;
    forth!(word_addr, DUP, LD, Literal(0x0040), INV, AND, SWP, ST, RET);

    /* Now let's make a word that appends to the dictionary.
     * We have had a Rust helper function for this for a long time.
     * The word below is the same thing but callable from Forth. */

    // , ( n -- )
    d.entry(); d.name(1, *b",  "); let comma = d.here;
    forth!(here, LD, ST,
           here, LD, Literal(2), ADD, here, ST, RET);

    /* We will read numbers the same way we read words:  from the input
     * buffer.  This, incidentally, is why we chose to reserve space for five
     * characters in the input buffer, even though we only needed to store
     * three for word lookup.  The largest 16-bit number will fit in five
     * decimal digits.
     *
     * Our numbers will be base-10.  To build up a base-10 number digit by
     * digit, we'll need to be able to multiply by 10.  Our CPU has no multiply
     * but it does have bit shift, which can be used to multiply or divide an
     * unsigned value by any power of two.
     */

    // x10 ( n -- n*10 )
    d.entry(); d.name(3, *b"x10");  let x10 = d.here;
    forth!(
        DUP, DUP, Literal(3), SFT, /* Find n*8 */
        ADD, ADD,                  /* (n*8) + n + n = (n*10) */
        RET);

    /* Now we can write a word that goes through the input buffer
     * character by character and converts it to an integer on the stack. */

    /* Helper function to clear junk off the stack. */
    let end_num = d.here;
    forth!(DRP, RTO, DRP, RET);

    /* Helper function to clear junk off the stack and return -1. */
    let bad_num = d.here;
    forth!(DRP, DRP, DRP, Literal(0), INV, RTO, DRP, RET);

    // Helper function ( 0 1 -- n|-1 )
    let number_helper = d.here;
    forth!(
        RTO, DRP,
        /* Load the next character */
        DUP, Literal(word_buf), ADD, cld,

        /* If the character is not in the range 48 to 57
         * (which are the character codes for '0' to '9')
         * then this is not a number, so return the error code -1 (65535)
         */
        Literal(48), sub, DUP, Literal(10), GEQ, Q, bad_num,
        SWP, TOR, SWP, x10, ADD, RTO,

        /* If we've come to the end of the input buffer then end. */
        DUP, Literal(word_buf), cld, GEQ, Q, end_num,

        /* Move on to the next digit */
        Literal(1), ADD, number_helper);

    // number ( -- n|-1 )
    d.entry(); d.name(6, *b"num");  let number = d.here;
    forth!(Literal(0), Literal(1), number_helper);

    /* Compile a number */
    d.entry(); d.name(3, *b"lit");  let lit = d.here;
    forth!(DUP, ADD, Literal(1), ADD, comma, RET);

    // Helper function to compile a number ( n -- n? )
    let try_compile_lit = d.here;
    forth!(
        /* If we are in interpreting mode, */
        state, LD,

        /* then exit immediately, leaving this number on the stack. */
        Q, RET,

        /* Otherwise, turn it into a LITERAL instruction and append that
         * to the dictionary, */
        lit,

        /* and then return-from-caller. */
        RTO, DRP, RET);

    // Helper function to compile a call ( xt -- xt? )
    let try_compile_call = d.here;
    forth!(
        /* If this is an immediate word, */
        DUP, Literal(4), sub, LD, Literal(0x0080), AND,

        /* or if we are in interpreting mode, */
        state, LD, OR,

        /* then we should execute this word, not compile it. */
        Q, RET,

        /* Otherwise, compile it by appending its address to the dictionary, */
        comma,

        /* and then return-from-caller. */
        RTO, DRP, RET);

    /* Given the address of a word, execute that word. */
    // execute ( xt -- )
    d.entry(); d.name(7, *b"exe");  let execute = d.here;
    forth!(TOR, RET);

    // Helper function to compile or execute a word ( xt -- )
    let do_word = d.here;
    forth!(
        /* When this function concludes, return-from-caller. */
        RTO, DRP,

        /* If this word should be compiled, compile it, */
        try_compile_call,

        /* otherwise, execute it. */
        execute, RET);

    /* Forth can have very good error handling.  This Forth does not.
     * If we try to look up a word in the dictionary and can't find it,
     * and if the word also can't be parsed as an number,
     * then we print out a ? and move on to the next word.
     *
     * This helper function does some stack cleanup, prints the ?, then
     * uses the return-from-caller trick to move on to the next word.
     */
    let bad = d.here;
    forth!(DRP, Literal(63), emit, RTO, DRP, RET);

    /* Figure out what to do with the contents of the input buffer.  */

    // dispatch ( xt -- )
    d.entry(); d.name(9, *b"int");  let dispatch = d.here;
    forth!(
        /* If the word was found in the dictionary, treat it as a word. */
        DUP, Q, do_word,

        /* If it wasn't found in the dictionary, try to parse it as a number.
         * If it isn't a number, flag it as an error. */
        DRP, number, DUP, Literal(1), ADD, zero_eq, Q, bad,

        /* If it is a number, treat it as a number. */
        try_compile_lit, RET);

    /* And now we can write the main interpreter/compiler loop.
     *
     * This is the top-level code for our entire Forth system!
     *
     * Forth names this "quit", for the reason that calling "quit" in
     * the middle of a compiled program is a reasonable way to bring
     * you back to top-level.
     */

    // quit ( -- )
    d.entry(); d.name(4, *b"qui");  let quit = d.here;
    forth!(
        quote,    /* Read a word from the keyboard and look it up in
                   * the dictionary */
        dispatch, /* Figure out what to do with the word */
        quit      /* Repeat forever */

        /* You might have noticed that "quit" isn't tail-recursive -- it
         * just calls itself normally.  "quit" is never supposed to return
         * so it doesn't matter for it to properly maintain the return stack.
         * It will just fill up the circular stack and wrap around.  That's
         * fine.
         */
    );

    /* We now have an interpreter that can compile or execute code!!!
     *
     * We have now succeeded at:
     *
     * 1. Call subroutines by typing their name at the keyboard
     * 3. Push numbers onto the data stack by typing them at the keyboard
     *
     * But there are still a few more words we'll need if we want to:
     *
     * 2. Define new subroutines in terms of existing ones
     *
     * Let's take care of that now.
     */

    /* Here is a word to create a new dictionary header. */

    // create ( -- )
    d.entry(); d.name(6, *b"cre");  let create = d.here;
    forth!(
        here, LD,
        latest, LD, comma,  /* emit the link field */
        latest, ST,         /* point "latest" at us */
        word,               /* read a word from the keyboard */

        /* emit the name field (by copying it from the input buffer) */
        Literal(word_buf), DUP, LD, comma, Literal(2), ADD, LD, comma,

        RET);

    /* And now, here is the word to compile a new Forth word. */

    // : ( -- )
    d.entry(); d.name(1, *b":  ");
    forth!(
        /* Read name from keyboard, create dictionary header */
        create,

        /* Hide the word until we are done defining it.  This lets us
         * redefine a word in terms of a previous incarnation of itself. */
        smudge,

        /* Switch to compiling mode */
        rbracket,
        RET);

    /* And here is ;, the "end" marker that ends the Forth word.
     * Note that ; is immediate, as it has to switch us from compiling mode
     * back into interpreting mode.
     */
    // ; ( -- )
    d.entry(); d.name(1 | 0x80, *b";  ");
    forth!(
        /* Emit a RET instruction.  RET = 65504 which is outside of the
         * LITERAL instruction's 0 to 32767 range, so you have to store the
         * inverse and use INV to swap it back. */
        Literal(!(RET as u16)), INV, comma,

        /* The word is now done, so unhide it. */
        unsmudge,

        /* Switch back to interpreting mode */
        lbracket,

        RET);

    /* Put the CPU instructions into dictionary words so we can call them
     * interactively from Forth.  Instructions that modify the return stack
     * need special care, because otherwise they will mess up the
     * wrapper we created for them, instead of acting on the caller
     * the way they are supposed to.
     */
    d.entry(); d.name(3, *b"ret"); forth!(RTO, DRP, RET);
    d.entry(); d.name(2, *b">r "); forth!(RTO, SWP, TOR, TOR, RET);
    d.entry(); d.name(2, *b"r> "); forth!(RTO, RTO, SWP, TOR, RET);
    d.entry(); d.name(1, *b"@  "); forth!(LD, RET);
    d.entry(); d.name(1, *b"!  "); forth!(ST, RET);
    d.entry(); d.name(3, *b"dup"); forth!(DUP, RET);
    d.entry(); d.name(4, *b"swa"); forth!(SWP, RET);
    d.entry(); d.name(4, *b"dro"); forth!(DRP, RET);

    d.entry(); d.name(1 | 0x80, *b"?  "); /* This one only works in-line. */
    forth!(Literal(!(Q as u16)), INV, comma, RET);

    d.entry(); d.name(1, *b"+  "); forth!(ADD, RET);
    d.entry(); d.name(5, *b"shi"); forth!(SFT, RET);
    d.entry(); d.name(2, *b"or "); forth!(OR, RET);
    d.entry(); d.name(3, *b"and"); forth!(AND, RET);
    d.entry(); d.name(3, *b"inv"); forth!(INV, RET);
    d.entry(); d.name(3, *b"u>="); forth!(GEQ, RET);
    d.entry(); d.name(2, *b"io "); forth!(IO, RET);

    /* Update Forth's "latest" and "here" variables to match the ones
     * we've been tracking in Rust.
     */
    d.c.store(latest_ptr, d.dp);
    d.c.store(here_ptr, d.here);

    /* Start out in interpreting mode.
    */
    d.c.store(state_ptr, 0xffff);

    /* The "entry point" should be the top level interpreter word "quit".
    */
    d.c.store(0, quit);
}

fn main() {
    /* Create the machine */
    let mut c = new_core();

    /* Put the dictionary into memory */
    build_dictionary(&mut c);

    /* Run Forth */
    c.ip = 0;
    loop {
        c.step();
    }
}

/* "The next step is a problem-oriented-language. By permitting
 * the program to dynamically modify its control language, we
 * mark a qualitative change in capability. We also change our
 * attention from the program to the language it implements.
 * This is an important, and dangerous, diversion. For it's
 * easy to lose sight of the problem amidst the beauty of the
 * solution."
 *
 * -- Chuck Moore, "Programming a Problem-Oriented Language", 1970
 */

/* Now we can start programming in "real" Forth, not a weird macro language
 * inside Rust.
 *
 * You can compile our Forth computer with:
 *     rustc frustration.rs
 *
 * You can run our Forth computer with:
 *     ./frustration
 *
 * However, I recommend loading a Forth program (frustration.4th, provided)
 * which does a few more setup steps before letting you loose.
 *
 *     cat frustration.4th - | ./frustration
 *
 * The line above is a good way to run Frustration if you're using Linux.
 * It concatenates together frustration.4th and - (stdin).  This means you
 * can type commands once frustration.4th has been executed.
 *
 * There is a shell script supplied that will do all of the above for you.
 *
 *     bash build.sh
 *
 * Please read frustration.4th if you want to learn more about how to
 * use Forth.
 */