mirror of
https://gitlab.cs.washington.edu/fidelp/frustration.git
synced 2024-12-26 21:58:20 +01:00
Writeup, part 2, partially done
This commit is contained in:
parent
e5531bc19f
commit
07fee611a6
1 changed files with 514 additions and 68 deletions
582
frustration.rs
582
frustration.rs
|
@ -62,18 +62,17 @@ const ADDRESS_SPACE: usize = 65536;
|
||||||
*
|
*
|
||||||
* In most CPUs, this place is called a "register". Registers work like
|
* In most CPUs, this place is called a "register". Registers work like
|
||||||
* variables in a programming language but there are only a few of them
|
* variables in a programming language but there are only a few of them
|
||||||
* (between 1 to 32 depending on your CPU).
|
* (most CPUs have between 1 and 32).
|
||||||
*
|
*
|
||||||
* On 64-bit ARM the registers are named r0, r1, ..., r15.
|
* On 64-bit ARM the registers are named r0, r1, ..., r15.
|
||||||
* On 64-bit Intel they are instead named rax, rbx, ..., etc.
|
* On 64-bit Intel they are instead named rax, rbx, ....
|
||||||
|
* Just in case those names ring any bells.
|
||||||
*
|
*
|
||||||
* Having immediate access to many registers is quite handy, but it means
|
* Having immediate access to dozens of registers is quite handy, but it means
|
||||||
* many choices are available to the programmer, or more likely, to the
|
* many choices are available to the programmer, or more likely, to the
|
||||||
* compiler. And making good choices is Hard.
|
* compiler. And making good choices is Hard. A lot of work goes into
|
||||||
*
|
* deciding what variable to store in what register ("register allocation") and
|
||||||
* A lot of work goes into deciding what variable to store in what register
|
* when to dump register contents back into memory ("spilling").
|
||||||
* ("register allocation") and when to dump register contents back into
|
|
||||||
* memory ("spilling").
|
|
||||||
*
|
*
|
||||||
* Our CPU avoids these problems by not having registers; instead we store
|
* Our CPU avoids these problems by not having registers; instead we store
|
||||||
* numbers in a stack. The CPU can only access the value that was most
|
* numbers in a stack. The CPU can only access the value that was most
|
||||||
|
@ -86,14 +85,14 @@ const ADDRESS_SPACE: usize = 65536;
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct Stack<const N: usize> {
|
struct Stack<const N: usize> {
|
||||||
mem: [u16; N],
|
mem: [u16; N],
|
||||||
tos: usize // top-of-stack.
|
tos: usize /* top-of-stack */
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<const N: usize> Stack<N> {
|
impl<const N: usize> Stack<N> {
|
||||||
// Add a number to the stack.
|
/* Add a number to the stack. */
|
||||||
fn push(&mut self, val: u16) {
|
fn push(&mut self, val: u16) {
|
||||||
self.tos = (self.tos.wrapping_add(1)) & (N - 1);
|
self.tos = (self.tos.wrapping_add(1)) & (N - 1);
|
||||||
/* This stack is fixed-sized and can hold N values.
|
/* This stack is fixed-size and can hold N values.
|
||||||
*
|
*
|
||||||
* When a fixed-size stack fills up, there is a failure case
|
* When a fixed-size stack fills up, there is a failure case
|
||||||
* (stack overflow) that must be handled somehow.
|
* (stack overflow) that must be handled somehow.
|
||||||
|
@ -106,7 +105,7 @@ impl<const N: usize> Stack<N> {
|
||||||
self.mem[self.tos] = val;
|
self.mem[self.tos] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the most recently pushed number.
|
/* Return the most recently pushed number. */
|
||||||
fn pop(&mut self) -> u16 {
|
fn pop(&mut self) -> u16 {
|
||||||
let val = self.mem[self.tos];
|
let val = self.mem[self.tos];
|
||||||
self.mem[self.tos] = 0;
|
self.mem[self.tos] = 0;
|
||||||
|
@ -152,12 +151,12 @@ struct Core {
|
||||||
* Instead we are going with the "byte-addressed memory" that is more
|
* Instead we are going with the "byte-addressed memory" that is more
|
||||||
* conventional in today's computers. This choice is arbitrary.
|
* conventional in today's computers. This choice is arbitrary.
|
||||||
*/
|
*/
|
||||||
ip: u16, // instruction pointer
|
ip: u16, /* instruction pointer */
|
||||||
dstack: Stack<16>, // data stack
|
dstack: Stack<16>, /* data stack */
|
||||||
rstack: Stack<32> // return stack
|
rstack: Stack<32> /* return stack */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Function to initialize the cpu.
|
/* Helper to initialize the cpu.
|
||||||
* There is probably a better idiom for this but I am bad at rust */
|
* There is probably a better idiom for this but I am bad at rust */
|
||||||
fn new_core() -> Core {
|
fn new_core() -> Core {
|
||||||
let c = Core {
|
let c = Core {
|
||||||
|
@ -200,7 +199,7 @@ fn new_core() -> Core {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
impl Core {
|
impl Core {
|
||||||
/* Helper function - Read a number from the specified memory address. */
|
/* Helper to read a number from the specified memory address. */
|
||||||
fn load(&self, addr: u16) -> u16 {
|
fn load(&self, addr: u16) -> u16 {
|
||||||
let a = addr as usize;
|
let a = addr as usize;
|
||||||
/* We immediately run into trouble because we are using byte-addressed
|
/* We immediately run into trouble because we are using byte-addressed
|
||||||
|
@ -241,7 +240,7 @@ impl Core {
|
||||||
/* The le in this function call stands for little-endian. */
|
/* The le in this function call stands for little-endian. */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Helper function - Write a number to the specified memory address. */
|
/* Helper to write a number to the specified memory address. */
|
||||||
fn store(&mut self, addr: u16, val: u16) {
|
fn store(&mut self, addr: u16, val: u16) {
|
||||||
let a = addr as usize;
|
let a = addr as usize;
|
||||||
self.ram[a..=a+1].copy_from_slice(&val.to_le_bytes());
|
self.ram[a..=a+1].copy_from_slice(&val.to_le_bytes());
|
||||||
|
@ -383,14 +382,14 @@ impl Core {
|
||||||
/* Now that the instruction set is generally described
|
/* Now that the instruction set is generally described
|
||||||
* let's look at the code that implements it */
|
* let's look at the code that implements it */
|
||||||
fn step(&mut self) {
|
fn step(&mut self) {
|
||||||
// 1. Fetch the instruction.
|
/* 1. Fetch the instruction.
|
||||||
// Also advance ip to point at the next instruction for next time.
|
* Also advance ip to point at the next instruction for next time. */
|
||||||
let opcode = self.load(self.ip);
|
let opcode = self.load(self.ip);
|
||||||
self.ip = self.ip.wrapping_add(2);
|
self.ip = self.ip.wrapping_add(2);
|
||||||
|
|
||||||
// 2. Decode and execute the instruction
|
/* 2. Decode and execute the instruction */
|
||||||
if (opcode >= 0xffe0) && (opcode & 1 == 0) {
|
if (opcode >= 0xffe0) && (opcode & 1 == 0) {
|
||||||
// Data processing instruction
|
/* Data processing instruction */
|
||||||
PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self);
|
PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self);
|
||||||
/* These instructions get looked up in a table. The bit
|
/* These instructions get looked up in a table. The bit
|
||||||
* math converts the instruction code into an index in the
|
* math converts the instruction code into an index in the
|
||||||
|
@ -406,18 +405,18 @@ impl Core {
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
else if (opcode & 1) == 1 {
|
else if (opcode & 1) == 1 {
|
||||||
// Literal
|
/* Literal */
|
||||||
self.dstack.push(opcode >> 1);
|
self.dstack.push(opcode >> 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Call
|
/* Call */
|
||||||
self.rstack.push(self.ip);
|
self.rstack.push(self.ip);
|
||||||
self.ip = opcode;
|
self.ip = opcode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The names of the 16 remaining CPU instructions
|
/* The names of the 16 remaining CPU instructions */
|
||||||
enum Op {
|
enum Op {
|
||||||
RET = 0xffe0, TOR = 0xffe2, RTO = 0xffe4, LD = 0xffe6,
|
RET = 0xffe0, TOR = 0xffe2, RTO = 0xffe4, LD = 0xffe6,
|
||||||
ST = 0xffe8, DUP = 0xffea, SWP = 0xffec, DRP = 0xffee,
|
ST = 0xffe8, DUP = 0xffea, SWP = 0xffec, DRP = 0xffee,
|
||||||
|
@ -427,29 +426,29 @@ enum Op {
|
||||||
|
|
||||||
type Primitive = fn(&mut Core);
|
type Primitive = fn(&mut Core);
|
||||||
|
|
||||||
// A table of functions for each of the 16 remaining CPU instructions
|
/* A table of functions for each of the 16 remaining CPU instructions */
|
||||||
const PRIMITIVES: [Primitive; 16] = [
|
const PRIMITIVES: [Primitive; 16] = [
|
||||||
/* Return-stack instructions */
|
/* Return-stack instructions */
|
||||||
| x | {
|
| x | {
|
||||||
// RET - Return from subroutine
|
/* RET - Return from subroutine */
|
||||||
x.ip = x.rstack.pop()
|
x.ip = x.rstack.pop()
|
||||||
},
|
},
|
||||||
| x | {
|
| x | {
|
||||||
// TOR - Transfer number from data stack to return stack
|
/* TOR - Transfer number from data stack to return stack */
|
||||||
x.rstack.push(x.dstack.pop())
|
x.rstack.push(x.dstack.pop())
|
||||||
},
|
},
|
||||||
| x | {
|
| x | {
|
||||||
// RTO - Transfer number from return stack to data stack
|
/* RTO - Transfer number from return stack to data stack */
|
||||||
x.dstack.push(x.rstack.pop())
|
x.dstack.push(x.rstack.pop())
|
||||||
},
|
},
|
||||||
/* Memory instructions */
|
/* Memory instructions */
|
||||||
| x | {
|
| x | {
|
||||||
// LD - Load number from memory address specified on the data stack
|
/* LD - Load number from memory address specified on the data stack */
|
||||||
let a = x.dstack.pop();
|
let a = x.dstack.pop();
|
||||||
x.dstack.push(x.load(a));
|
x.dstack.push(x.load(a));
|
||||||
},
|
},
|
||||||
| x | {
|
| x | {
|
||||||
// ST - Store number to memory address specified on the data stack
|
/* ST - Store number to memory address specified on the data stack */
|
||||||
let a = x.dstack.pop();
|
let a = x.dstack.pop();
|
||||||
let v = x.dstack.pop();
|
let v = x.dstack.pop();
|
||||||
x.store(a, v);
|
x.store(a, v);
|
||||||
|
@ -607,43 +606,133 @@ const PRIMITIVES: [Primitive; 16] = [
|
||||||
* Part 2 - The Program
|
* Part 2 - The Program
|
||||||
* ------------------------------------------------------------------------ */
|
* ------------------------------------------------------------------------ */
|
||||||
|
|
||||||
/* You now have an unfamiliar computer with no software. It sits there doing
|
/* "In a sense we're building a tree. We've now reached a
|
||||||
* nothing. Can you and the computer write a program?
|
* point where we can start making the roots. For a while
|
||||||
|
* everything will be concealed but we'll eventually reach
|
||||||
|
* daylight and start on branches."
|
||||||
*
|
*
|
||||||
* We are going to need to give the computer a list of instructions, by
|
* -- Chuck Moore, "Programming a Problem-Oriented Language", 1970
|
||||||
* which I mean a list of numbers. If we were sitting in front of a
|
* https://colorforth.github.io/POL.htm
|
||||||
* minicomputer in 196x, we would need a copy of the target machine's
|
|
||||||
* instruction set, paper and pencil, and a lot of coffee.
|
|
||||||
*
|
*
|
||||||
* In 20xx we are fortunate enough to have rust so we will put it to work.
|
* You now have an unfamiliar computer with no software. Can you and the
|
||||||
|
* computer write a program?
|
||||||
*
|
*
|
||||||
* Regardless, this bootstrapping process isn't going to be very pleasant
|
* The first program is the hardest to write because you don't have any tools to
|
||||||
* so the goal is to make it short. We want a language that can stand on
|
* help write it. The computer itself is going to be no help. Without any
|
||||||
* its own as quickly as possible, so the computer can start helping us
|
* program it will sit there doing nothing.
|
||||||
* write the program.
|
*
|
||||||
|
* What should the first program be?
|
||||||
|
* A natural choice would be a tool that helps you program more easily.
|
||||||
|
*
|
||||||
|
* An interactive programming environment needs to let you do 2 things:
|
||||||
|
*
|
||||||
|
* 1. Call subroutines by typing their name at the keyboard
|
||||||
|
* 2. Define new subroutines in terms of existing ones
|
||||||
|
*
|
||||||
|
* Begin with step 1:
|
||||||
|
* Call subroutines by typing their name at the keyboard
|
||||||
|
*
|
||||||
|
* This is where we will meet Forth.
|
||||||
|
*
|
||||||
|
* The below is a small Forth for bootstrapping this computer. If you want to
|
||||||
|
* learn how to implement a full featured Forth, please read Jonesforth, and
|
||||||
|
* Brad Rodriguez' series of articles "Moving Forth". The simple Forth I write
|
||||||
|
* below will probably help you understand those Forths a little better.
|
||||||
|
*
|
||||||
|
* Forth organizes all the computer's memory as a "dictionary" of subroutines.
|
||||||
|
* The point of the dictionary is to give each subroutine a name so you
|
||||||
|
* can run a subroutine by typing its name. The computer will look up its
|
||||||
|
* address for you and call it.
|
||||||
*
|
*
|
||||||
* Forth is a weird language but its design decisions make a lot of sense
|
|
||||||
* if you view it as a bootstrapping tool.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Forth organizes all the computer's memory as a "dictionary".
|
|
||||||
* The dictionary starts at a low address and grows towards high addresses.
|
* The dictionary starts at a low address and grows towards high addresses.
|
||||||
* It is organized like a linked-list, like this:
|
* It is organized like a linked-list, like this:
|
||||||
*
|
*
|
||||||
|
* [Link field][Name][Code .......... ]
|
||||||
|
* ^
|
||||||
|
* |
|
||||||
|
* [Link field][Name][Code ...... ]
|
||||||
|
* ^
|
||||||
|
* |
|
||||||
|
* [Link field][Name][Code ............... ]
|
||||||
*
|
*
|
||||||
* Code is stored in the dictionary as a list of addresses.
|
* The reason it is a linked list is to allow each list entry to be a
|
||||||
|
* different length.
|
||||||
*
|
*
|
||||||
|
* Each dictionary entry contains three things:
|
||||||
*
|
*
|
||||||
|
* - "Link field": The address of the previous dictionary entry.
|
||||||
|
* For the first dictionary entry this field is 0.
|
||||||
|
*
|
||||||
|
* - "Name": A few letters to name this dictionary entry.
|
||||||
|
* Later you will type this name at the keyboard to call up
|
||||||
|
* this dictionary entry.
|
||||||
|
*
|
||||||
|
* - "Code": A subroutine to execute when you call up this dictionary
|
||||||
|
* entry. This is a list of CPU instructions. Note that one
|
||||||
|
* of the CPU instructions is "call". So you can have a subroutine
|
||||||
|
* that call other subroutines, or calls itself.
|
||||||
|
*
|
||||||
|
* This code must end with a return (RET) instruction.
|
||||||
|
*
|
||||||
|
* Example subroutine:
|
||||||
|
*
|
||||||
|
* Number Instruction Meaning
|
||||||
|
* ------ ----------- -------
|
||||||
|
* 7 Literal(3) Push the value 3 onto the data stack
|
||||||
|
* 9 Literal(4) Push the value 4 onto the data stack
|
||||||
|
* 65504 RET Return to caller
|
||||||
|
*
|
||||||
|
* A linked list is not a very fast data structure but this doesn't really
|
||||||
|
* matter because dictionary lookup doesn't need to be fast. Lookups are
|
||||||
|
* for converting text you typed at the keyboard to subroutine addresses.
|
||||||
|
* You can't type very fast compared to a computer so this lookup doesn't
|
||||||
|
* need to be fast.
|
||||||
|
*
|
||||||
|
* In addition to the linked list itself, you will need a couple of
|
||||||
|
* variables to keep track of where the dictionary is in memory:
|
||||||
|
*
|
||||||
|
* - Dictionary pointer: The address of the newest dictionary entry.
|
||||||
|
* - Here: The address of the first unused memory location,
|
||||||
|
* which comes just after the newest dictionary entry.
|
||||||
|
*
|
||||||
|
* [Link field][Name][Code .......... ]
|
||||||
|
* ^
|
||||||
|
* |
|
||||||
|
* [Link field][Name][Code ...... ]
|
||||||
|
* ^
|
||||||
|
* |
|
||||||
|
* [Link field][Name][Code ............... ]
|
||||||
|
* ^ ^
|
||||||
|
* | |
|
||||||
|
* [Dictionary pointer] [Here]
|
||||||
|
*
|
||||||
|
* Got all that?
|
||||||
|
*
|
||||||
|
* To create our Forth interactive programmming environment, we will start
|
||||||
|
* by defining subroutines that:
|
||||||
|
* - read names from the keyboard
|
||||||
|
* - look up and execute dictionary entries by name
|
||||||
|
*
|
||||||
|
* We will put these subroutines themselves in the dictionary so they are
|
||||||
|
* available for use once our interactive environment is up and running!
|
||||||
|
*
|
||||||
|
* If you were sitting in front of a microcomputer in 196x you would need
|
||||||
|
* to create the dictionary with pencil and paper, but in 20xx we will
|
||||||
|
* write a Rust program to help create the dictionary.
|
||||||
|
*
|
||||||
|
* First we need to keep track of where the dictionary is:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Here is the stuff that you would normally be doing with pencil and paper */
|
|
||||||
struct Dict<'a> {
|
struct Dict<'a> {
|
||||||
dp: u16,
|
dp: u16, // The dictionary pointer
|
||||||
here: u16,
|
here: u16, // The "here" variable
|
||||||
c: &'a mut Core
|
c: &'a mut Core // The dictionary lives in memory. We are going to
|
||||||
|
// hang on to a mutable reference to the core to give
|
||||||
|
// us easy access to the memory.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helpers to help put new routines in the dictionary */
|
||||||
|
|
||||||
enum Item {
|
enum Item {
|
||||||
Literal(u16),
|
Literal(u16),
|
||||||
Call(u16),
|
Call(u16),
|
||||||
|
@ -653,15 +742,19 @@ impl From<u16> for Item { fn from(a: u16) -> Self { Item::Call(a) } }
|
||||||
impl From<Op> for Item { fn from(o: Op) -> Self { Item::Opcode(o) } }
|
impl From<Op> for Item { fn from(o: Op) -> Self { Item::Opcode(o) } }
|
||||||
|
|
||||||
impl Dict<'_> {
|
impl Dict<'_> {
|
||||||
|
/* Helper to reserve space in the dictionary by advancing the "here"
|
||||||
|
* pointer */
|
||||||
fn allot(&mut self, n: u16) {
|
fn allot(&mut self, n: u16) {
|
||||||
self.here = self.here.wrapping_add(n);
|
self.here = self.here.wrapping_add(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helper to append a 16 bit integer to the dictionary */
|
||||||
fn comma(&mut self, val: u16) {
|
fn comma(&mut self, val: u16) {
|
||||||
self.c.store(self.here, val);
|
self.c.store(self.here, val);
|
||||||
self.allot(2);
|
self.allot(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helper to append a CPU instruction to the dictionary */
|
||||||
fn emit<T: Into<Item>>(&mut self, val: T) {
|
fn emit<T: Into<Item>>(&mut self, val: T) {
|
||||||
match val.into() {
|
match val.into() {
|
||||||
Item::Call(val) => { self.comma(val) }
|
Item::Call(val) => { self.comma(val) }
|
||||||
|
@ -671,11 +764,36 @@ impl Dict<'_> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helper to append a "name" field to the dictionary. To save space and
|
||||||
|
* to make each dictionary header a consistent size, I am choosing to not
|
||||||
|
* store every letter of the name. Instead I am storing only the length of
|
||||||
|
* the name and then the first three letters of the name.
|
||||||
|
*
|
||||||
|
* That means these two names will compare equal:
|
||||||
|
* - ALLOW (-> 5ALL)
|
||||||
|
* - ALLOT (-> 5ALL)
|
||||||
|
*
|
||||||
|
* Even though their first three letters are the same, these two names
|
||||||
|
* will compare unequal because they are different lengths:
|
||||||
|
* - FORTH (-> 5FOR)
|
||||||
|
* - FORGET (-> 6FOR)
|
||||||
|
*
|
||||||
|
* If a name is shorter than 3 letters it is padded out with spaces.
|
||||||
|
* - X (-> 1X )
|
||||||
|
*
|
||||||
|
* You can see that the name field is always four bytes regardless
|
||||||
|
* of how many letters are in the name, and the link field is two bytes.
|
||||||
|
* This means a dictionary header in this Forth is always six bytes.
|
||||||
|
*/
|
||||||
fn name(&mut self, n: u8, val: [u8; 3]) {
|
fn name(&mut self, n: u8, val: [u8; 3]) {
|
||||||
|
/* Store the length and the first character */
|
||||||
self.comma(n as u16 | ((val[0] as u16) << 8));
|
self.comma(n as u16 | ((val[0] as u16) << 8));
|
||||||
|
/* Store the next two characters */
|
||||||
self.comma(val[1] as u16 | ((val[2] as u16) << 8));
|
self.comma(val[1] as u16 | ((val[2] as u16) << 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helper to append a new link field to the dictionary and update the
|
||||||
|
* dictionary pointer appropriately. */
|
||||||
fn entry(&mut self) {
|
fn entry(&mut self) {
|
||||||
let here = self.here;
|
let here = self.here;
|
||||||
self.comma(self.dp);
|
self.comma(self.dp);
|
||||||
|
@ -683,44 +801,371 @@ impl Dict<'_> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now we can start building the dictionary. */
|
||||||
fn build_dictionary(c: &mut Core) {
|
fn build_dictionary(c: &mut Core) {
|
||||||
use Op::*;
|
use Op::*;
|
||||||
use Item::*;
|
use Item::*;
|
||||||
|
|
||||||
let mut d = Dict {dp: 0, here: 2, c: c};
|
let mut d = Dict {
|
||||||
|
dp: 0, /* Nothing in the dictionary yet */
|
||||||
|
here: 2, /* Reserve address 0 as an "entry point", i.e. where the
|
||||||
|
CPU will jump to start running Forth. We don't have a
|
||||||
|
Forth interpreter yet so we'll leave address 0 alone for
|
||||||
|
now and start the dictionary at address 2 instead. */
|
||||||
|
c: c
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Consider the following facts:
|
||||||
|
* - The CPU knows how to execute a bunch of instructions strung together.
|
||||||
|
* - Forth consists of a bunch of subroutine calls strung together.
|
||||||
|
* - Subroutine CALL is a valid instruction of our CPU.
|
||||||
|
*
|
||||||
|
* This means that we can immediately begin programming our machine in
|
||||||
|
* a language resembling Forth, just by writing a list of subroutine
|
||||||
|
* calls into the dictionary.
|
||||||
|
*
|
||||||
|
* The line between "machine code program" and "Forth program" is
|
||||||
|
* very blurry. To illustrate:
|
||||||
|
*
|
||||||
|
* Here is a subroutine consisting of a few instructions strung together.
|
||||||
|
*
|
||||||
|
* Instruction Number Meaning
|
||||||
|
* ----------- ------ -------
|
||||||
|
* Literal(3) 7 Push the value 3 onto the data stack
|
||||||
|
* Literal(4) 9 Push the value 4 onto the data stack
|
||||||
|
* RET 65504 Return to caller
|
||||||
|
*
|
||||||
|
* Here is a Forth subroutine consisting of a few subroutine calls strung
|
||||||
|
* together.
|
||||||
|
* Call Number Meaning
|
||||||
|
* ----------- ------ -------
|
||||||
|
* S1 1230 Call subroutine S1 which happens to live
|
||||||
|
* at address 1230
|
||||||
|
* S2 1250 Call subroutine S2 which happens to live
|
||||||
|
* at address 1250
|
||||||
|
* RET 65504 Return to caller
|
||||||
|
*
|
||||||
|
* This duality between CPU instructions and Forth code comes from
|
||||||
|
* an idea called "subroutine threading". It is a refinement of an
|
||||||
|
* idea called "threaded code". This has no relation to the kind of
|
||||||
|
* threading that lets you run programs in parallel. You can read more
|
||||||
|
* about threaded code on Wikipedia or in the other Forth resources I
|
||||||
|
* mentioned earlier (Jonesforth, and Moving Forth by Brad Rodriguez).
|
||||||
|
*
|
||||||
|
* Our new language starts out with the sixteen (well, eighteen)
|
||||||
|
* instructions built into the CPU. We can string those instructions
|
||||||
|
* together into a new subroutine. This subroutine adds to the pool
|
||||||
|
* of functions we have available for making new subroutines.
|
||||||
|
*
|
||||||
|
* Repeat until you have built what you wanted to build, via
|
||||||
|
* function composition. This is the idea behind Forth.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We are going to be writing many series of instructions so let's
|
||||||
|
* start out by making a Rust macro that makes them easier to type
|
||||||
|
* and lets us specify a CPU instruction vs. a subroutine call with
|
||||||
|
* equal ease.
|
||||||
|
*
|
||||||
|
* The macro below will convert:
|
||||||
|
*
|
||||||
|
* forth!(Literal(2), ADD, RET)
|
||||||
|
*
|
||||||
|
* to:
|
||||||
|
*
|
||||||
|
* d.emit(Literal(2));
|
||||||
|
* d.emit(ADD);
|
||||||
|
* d.emit(RET);
|
||||||
|
*
|
||||||
|
* which you probably recognize as code that will add a new subroutine
|
||||||
|
* to the dictionary.
|
||||||
|
*/
|
||||||
macro_rules! forth {
|
macro_rules! forth {
|
||||||
($x:expr) => (d.emit($x));
|
($x:expr) => (d.emit($x));
|
||||||
($x:expr, $($y:expr),+) => (d.emit($x); forth!($($y),+))
|
($x:expr, $($y:expr),+) => (d.emit($x); forth!($($y),+))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now we can add the first subroutine to the dictionary!
|
||||||
|
*
|
||||||
|
* key: Reads a character from the keyboard and places its character
|
||||||
|
* code on the stack.
|
||||||
|
*
|
||||||
|
* There is a tradition of writing stack comments for Forth subroutines
|
||||||
|
* to describe the stack effect of executing the subroutine.
|
||||||
|
* They look like this: key ( -- n )
|
||||||
|
*
|
||||||
|
* Read as: key does not take any parameters off the stack, and leaves
|
||||||
|
* one new number pushed onto the stack.
|
||||||
|
*
|
||||||
|
* Also remember that a dictionary entry looks like this:
|
||||||
|
* [Link field][Name][Code .......... ]
|
||||||
|
*/
|
||||||
|
|
||||||
// key ( -- n )
|
// key ( -- n )
|
||||||
d.entry(); d.name(3, *b"key"); let key = d.here;
|
d.entry(); /* Compile the link field into the dictionary */
|
||||||
forth!(Literal(0), IO, RET);
|
d.name(3, *b"key"); /* Compile the name field into the dictionary */
|
||||||
|
let key = d.here; /* (Save off the start address of the code so we
|
||||||
|
can call it later) */
|
||||||
|
forth!(
|
||||||
|
Literal(0), /* Compile a LITERAL instruction that pushes
|
||||||
|
0 to the stack */
|
||||||
|
|
||||||
|
IO, /* Compile an IO instruction.
|
||||||
|
*
|
||||||
|
* Remember from the CPU code that IO takes a
|
||||||
|
* parameter on the stack to specify which port
|
||||||
|
* to use.
|
||||||
|
*
|
||||||
|
* Also remember that IO port 0 reads
|
||||||
|
* a character from standard input.
|
||||||
|
*/
|
||||||
|
|
||||||
|
RET /* Compile a RET instruction */
|
||||||
|
);
|
||||||
|
/* We have now compiled the "key" subroutine into the dictionary.
|
||||||
|
* [Link field][Name][Code .......... ]
|
||||||
|
* 0000 3key 1, 65534, 65504
|
||||||
|
*
|
||||||
|
* The next subroutine we will make is "emit". This is a companion
|
||||||
|
* to "key" that works in the opposite direction.
|
||||||
|
*
|
||||||
|
* key ( -- n ) reads a character from stdin and pushes it to the stack.
|
||||||
|
* emit ( n -- ) pops a character from the stack and writes it to stdout.
|
||||||
|
*/
|
||||||
|
|
||||||
// emit ( n -- )
|
// emit ( n -- )
|
||||||
d.entry(); d.name(4, *b"emi"); let emit = d.here;
|
d.entry(); d.name(4, *b"emi"); let emit = d.here;
|
||||||
forth!(Literal(1), IO, RET);
|
forth!(Literal(1), IO, RET);
|
||||||
|
|
||||||
|
/* I am tired of saying "subroutine" so many times, so I am going to
|
||||||
|
* introduce a new term. Remember the goal our language is working
|
||||||
|
* towards -- we want to be able to type a word at the keyboard, and
|
||||||
|
* let the computer look it up in the dictionary and execute the
|
||||||
|
* appropriate code.
|
||||||
|
*
|
||||||
|
* So far we have two named items in the dictionary, call and emit.
|
||||||
|
*
|
||||||
|
* We are going to term a named dictionary item a "word".
|
||||||
|
* This is a Forth tradition.
|
||||||
|
*
|
||||||
|
* So call and emit are "words", or "dictionary words" if you want to be
|
||||||
|
* precise about it. So far these are the only words we've defined.
|
||||||
|
*
|
||||||
|
* Let's define some more words.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Our CPU does not have subtraction so let's make subtraction by adding
|
||||||
|
* the two's complement.
|
||||||
|
*
|
||||||
|
* To get the two's complement, do a bitwise invert and add 1.
|
||||||
|
*
|
||||||
|
* This will be the most complicated Forth that we've written so far
|
||||||
|
* so let's walk through step by step. */
|
||||||
|
|
||||||
// - ( a b -- a-b )
|
// - ( a b -- a-b )
|
||||||
d.entry(); d.name(1, *b"- "); let sub = d.here;
|
d.entry(); d.name(1, *b"- "); let sub = d.here;
|
||||||
forth!(INV, Literal(1), ADD, ADD, RET);
|
forth!( /* Stack contents: a b, to start off with.
|
||||||
|
* We want to compute a minus b */
|
||||||
|
|
||||||
|
INV, /* Bitwise invert the top item on the stack.
|
||||||
|
* Stack contents: a ~b */
|
||||||
|
|
||||||
|
Literal(1), /* Push 1 onto the stack.
|
||||||
|
* Stack contents: a ~b 1 */
|
||||||
|
|
||||||
|
ADD, /* Add the top two items on the stack.
|
||||||
|
* Stack contents: a ~b+1
|
||||||
|
* Note that ~b+1 is the two's complement of b. */
|
||||||
|
|
||||||
|
ADD, /* Add the top two items on the stack.
|
||||||
|
* Stack contents: n
|
||||||
|
* Note that n = (a + ~b+1) = a - b */
|
||||||
|
|
||||||
|
RET /* Done, return to caller, leaving n on the data stack. */
|
||||||
|
);
|
||||||
|
/* Writing it out like that takes a lot of space. Normally Forth code
|
||||||
|
* is written on a single line, like this:
|
||||||
|
*
|
||||||
|
* INV 1 ADD ADD RET
|
||||||
|
*
|
||||||
|
* Looking at it this way, it's easy to see the new word we just
|
||||||
|
* created (-) is made from 5 instructions. It's pretty typical for
|
||||||
|
* a Forth word to be made of 2-7 of them. Beyond that length, things
|
||||||
|
* get successively harder to understand, and it becomes a good idea
|
||||||
|
* to split some work off into helper words.
|
||||||
|
*
|
||||||
|
* We will see an example of this below.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Our next word will be useful for Boolean logic.
|
||||||
|
*
|
||||||
|
* 0= ( n -- f )
|
||||||
|
*
|
||||||
|
* In a stack comment, "f" means "flag", a.k.a. Boolean value.
|
||||||
|
* By Forth convention, zero is false and any nonzero value is true.
|
||||||
|
* However the "best" value to use for a true flag is 65535 (all ones)
|
||||||
|
* so the bitwise logical operations can double as Boolean logical
|
||||||
|
* operations.
|
||||||
|
*
|
||||||
|
* So what 0= does is:
|
||||||
|
* - if n=0, leave on the stack f=65535
|
||||||
|
* - otherwise, leave on the stack f=0
|
||||||
|
*
|
||||||
|
* It is like C's ! operator.
|
||||||
|
*
|
||||||
|
* In Rust this could be implemented as:
|
||||||
|
*
|
||||||
|
* fn zero_eq(n: u16) {
|
||||||
|
* if (n == 0) {
|
||||||
|
* return 65535;
|
||||||
|
* } else {
|
||||||
|
* return 0;
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* Rust has an if-then and block scope, so this is easy to write.
|
||||||
|
*
|
||||||
|
* The literal translation to a typical register-machine assembly
|
||||||
|
* language would look something like this:
|
||||||
|
*
|
||||||
|
* zero_eq: compare r0, 0
|
||||||
|
* jump_eq is_zero
|
||||||
|
* move r0, 0
|
||||||
|
* ret
|
||||||
|
* is_zero: move r0, 65535
|
||||||
|
* ret
|
||||||
|
*
|
||||||
|
* It looks simple but I want to point out a couple things about it
|
||||||
|
* that are not so simple.
|
||||||
|
*
|
||||||
|
* The conditional jump instruction, jump_eq.
|
||||||
|
* ------------------------------------------
|
||||||
|
* Our CPU doesn't have this. Q is the only "decision-making"
|
||||||
|
* instruction that our CPU has.
|
||||||
|
*
|
||||||
|
* Q - If the top number on the data stack is zero, skip the next
|
||||||
|
* instruction.
|
||||||
|
*
|
||||||
|
* The forward reference
|
||||||
|
* ---------------------
|
||||||
|
* This is another problem. Think of the job of an assembler which is
|
||||||
|
* converting an assembly language program to machine code. We are
|
||||||
|
* currently writing our code in a tiny assembler that we made in Rust! It
|
||||||
|
* is very simple but so far it has worked for us. The assembler of our
|
||||||
|
* hypothetical register-machine below has a rather nasty problem to solve.
|
||||||
|
*
|
||||||
|
* zero_eq: compare r0, 0
|
||||||
|
* jump_eq is_zero <----- On this line.
|
||||||
|
* move r0, 0
|
||||||
|
* ret
|
||||||
|
* is_zero: move r0, 65535
|
||||||
|
* ret
|
||||||
|
*
|
||||||
|
* It wants to jump to is_zero but that symbol has not been seen yet and is
|
||||||
|
* unrecognized. On top of that, the assembler also doesn't yet know what
|
||||||
|
* address is_zero will have, so doesn't know what jump target to emit.
|
||||||
|
* To successfully assemble that kind of program you would need an
|
||||||
|
* assembler smarter than the assembler we made for ourselves in Rust.
|
||||||
|
*
|
||||||
|
* There are ways to solve this but let's NOT solve it.
|
||||||
|
*
|
||||||
|
* Our CPU has no jump instruction (only call) and our assembler only lets
|
||||||
|
* us call things we already defined. Instead of removing these
|
||||||
|
* constraints, find a way to write 0= within the constraints.
|
||||||
|
*
|
||||||
|
* Here is a start at solving the problem
|
||||||
|
*
|
||||||
|
* is_nonzero ( -- 0 )
|
||||||
|
* Literal(0)
|
||||||
|
* RET
|
||||||
|
*
|
||||||
|
* 0= ( n -- f )
|
||||||
|
* Q <-- pop n, if n=0 skip next instruction
|
||||||
|
* is_nonzero <-- f=0 is now pushed to stack
|
||||||
|
* Literal(0)
|
||||||
|
* INV <-- f=65535 is now pushed to stack
|
||||||
|
* RET <-- Return
|
||||||
|
*
|
||||||
|
* We got rid of the forward reference by defining is_nonzero before it
|
||||||
|
* was used.
|
||||||
|
*
|
||||||
|
* We got rid of the jump instruction by using a subroutine call instead.
|
||||||
|
*
|
||||||
|
* This code is close to working but it doesn't quite work. The problem
|
||||||
|
* is that is_nonzero gives control back to 0= when done, just like
|
||||||
|
* a subroutine call normally does, and then 0= runs as normal until it
|
||||||
|
* hits the return instruction at the end.
|
||||||
|
* So we wind up executing both the f=0 branch and the f=65535 branch,
|
||||||
|
* instead of just executing the f=0 branch like we wanted in this case.
|
||||||
|
*
|
||||||
|
* It is possible to fix this last problem by adding the instructions
|
||||||
|
* RTO DRP to is_nonzero.
|
||||||
|
*
|
||||||
|
* is_nonzero ( -- 0 )
|
||||||
|
* RTO <-- Pop the return address, push to data stack
|
||||||
|
* DRP <-- Discard it
|
||||||
|
* Literal(0) <-- Put 0 on the data stack
|
||||||
|
* RET <-- Return
|
||||||
|
*
|
||||||
|
* Because we popped off and discarded one item from the return stack, the
|
||||||
|
* final RET instruction will not return to 0= any more. Instead it will
|
||||||
|
* skip one level and return to whoever called 0=. This has the result of
|
||||||
|
* ending 0= early, which is what we wanted to do.
|
||||||
|
*
|
||||||
|
* I call this pattern "return-from-caller". It is used occasionally in
|
||||||
|
* real Forth systems. My dialect of Forth will use it extensively to work
|
||||||
|
* around my CPU's lack of conditional branch.
|
||||||
|
*
|
||||||
|
* Now we've explained how 0= is going to work, let's make it.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* First we define the helper. It won't be reused, so I am not going
|
||||||
|
* to bother giving it a dictionary header and name for easy lookup later.
|
||||||
|
* Think of it as a private function. */
|
||||||
let zero = d.here;
|
let zero = d.here;
|
||||||
forth!(Literal(0), RTO, DRP, RET);
|
forth!(Literal(0), RTO, DRP, RET);
|
||||||
|
|
||||||
|
/* Now define 0= using the helper. */
|
||||||
// 0= ( n -- f )
|
// 0= ( n -- f )
|
||||||
d.entry(); d.name(2, *b"0= "); let zero_eq = d.here;
|
d.entry(); d.name(2, *b"0= "); let zero_eq = d.here;
|
||||||
forth!(Q, zero, Literal(0), INV, RET);
|
forth!(Q, zero, Literal(0), INV, RET);
|
||||||
|
|
||||||
|
/* Next let's make a = equality comparison operator, using 0= and subtract.
|
||||||
|
* I call it an "operator" because that's what other languages would
|
||||||
|
* call it, but Forth has no special idea of an "operator". Everything
|
||||||
|
* is just words. */
|
||||||
// = ( a b -- a=b )
|
// = ( a b -- a=b )
|
||||||
d.entry(); d.name(1, *b"= "); let eq = d.here;
|
d.entry(); d.name(1, *b"= "); let eq = d.here;
|
||||||
forth!(sub, zero_eq, RET);
|
forth!(sub, zero_eq, RET);
|
||||||
|
/* Note that 0= and subtract are both words, not CPU instructions.
|
||||||
|
* This makes = the first "pure" Forth word we have defined, with no
|
||||||
|
* direct dependency on the machine's instruction set.
|
||||||
|
* We could define = as - 0= on a real standards-compliant Forth system
|
||||||
|
* and it would still work. So Forth gets you to the point of writing
|
||||||
|
* "portable" code really quickly. Often you can reuse routines early in
|
||||||
|
* bootstrapping even though they were written and tested on a different
|
||||||
|
* machine. Many languages offer portability but few offer it so quickly.
|
||||||
|
*/
|
||||||
|
|
||||||
// Advance past whitespace
|
/* Now that we've got some basics in place let's go back to solving
|
||||||
|
* the real problem of getting our language to read words from the
|
||||||
|
* keyboard. The first problem we have is that we need some way to
|
||||||
|
* separate words from each other so we know where one word ends and the
|
||||||
|
* next begins. This problem is called "lexing". Forth has about the
|
||||||
|
* simplest lexer ever, it just splits on whitespace. Anything with
|
||||||
|
* character code <=32 is considered whitespace. Words are delimited by
|
||||||
|
* whitespace. And that is all the syntax Forth has.
|
||||||
|
*
|
||||||
|
* To read a word from the keyboard you will need to:
|
||||||
|
* - Advance past any leading whitespace
|
||||||
|
* - Read characters into a buffer until whitespace is seen again.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Let's start with the "advance past whitespace" part */
|
||||||
let skip_helper = d.here;
|
let skip_helper = d.here;
|
||||||
forth!(RTO, DRP, key, DUP, Literal(33), GEQ, Q, RET, DRP, skip_helper);
|
forth!(RTO, DRP, key, DUP, Literal(33), GEQ, Q, RET, DRP, skip_helper);
|
||||||
|
|
||||||
|
// skipws ( -- c )
|
||||||
d.entry(); d.name(6, *b"ski"); let skipws = d.here;
|
d.entry(); d.name(6, *b"ski"); let skipws = d.here;
|
||||||
forth!(skip_helper);
|
forth!(skip_helper);
|
||||||
|
|
||||||
|
@ -748,13 +1193,13 @@ fn build_dictionary(c: &mut Core) {
|
||||||
d.entry(); d.name(2, *b"c! "); let cst = d.here;
|
d.entry(); d.name(2, *b"c! "); let cst = d.here;
|
||||||
forth!(DUP, LD, Literal(0xff), INV, AND, SWP, TOR, OR, RTO, ST, RET);
|
forth!(DUP, LD, Literal(0xff), INV, AND, SWP, TOR, OR, RTO, ST, RET);
|
||||||
|
|
||||||
// Load 1 letter into buffer.
|
/* Load 1 letter into buffer. */
|
||||||
let stchar = d.here;
|
let stchar = d.here;
|
||||||
forth!(Literal(word_buf), cld, Literal(1), ADD, DUP, Literal(word_buf), cst,
|
forth!(Literal(word_buf), cld, Literal(1), ADD, DUP, Literal(word_buf), cst,
|
||||||
Literal(5), min, Literal(word_buf), ADD, cst, RET);
|
Literal(5), min, Literal(word_buf), ADD, cst, RET);
|
||||||
|
|
||||||
// Load letters into buffer until whitespace is hit again.
|
/* Load letters into buffer until whitespace is hit again.
|
||||||
// Return the whitespace character that was seen.
|
* Return the whitespace character that was seen. */
|
||||||
let getcs_helper = d.here;
|
let getcs_helper = d.here;
|
||||||
forth!(RTO, DRP, stchar, key, DUP, Literal(32), SWP, GEQ, Q, RET, getcs_helper);
|
forth!(RTO, DRP, stchar, key, DUP, Literal(32), SWP, GEQ, Q, RET, getcs_helper);
|
||||||
|
|
||||||
|
@ -769,8 +1214,8 @@ fn build_dictionary(c: &mut Core) {
|
||||||
skipws, getcs, DRP, RET);
|
skipws, getcs, DRP, RET);
|
||||||
|
|
||||||
// latest ( -- a )
|
// latest ( -- a )
|
||||||
// Address of "latest" variable. This variable stores the address of
|
/* Address of "latest" variable. This variable stores the address of
|
||||||
// the latest word in the dictionary.
|
the latest word in the dictionary. */
|
||||||
let latest_ptr = d.here; d.allot(2);
|
let latest_ptr = d.here; d.allot(2);
|
||||||
d.entry(); d.name(6, *b"lat"); let latest = d.here;
|
d.entry(); d.name(6, *b"lat"); let latest = d.here;
|
||||||
forth!(Literal(latest_ptr), RET);
|
forth!(Literal(latest_ptr), RET);
|
||||||
|
@ -806,15 +1251,15 @@ fn build_dictionary(c: &mut Core) {
|
||||||
forth!(DUP, DUP, Literal(3), SFT, ADD, ADD, RET);
|
forth!(DUP, DUP, Literal(3), SFT, ADD, ADD, RET);
|
||||||
|
|
||||||
// here ( -- a )
|
// here ( -- a )
|
||||||
// Address of "here" variable. This variable stores the address of
|
/* Address of "here" variable. This variable stores the address of
|
||||||
// the first free space in the dictionary
|
the first free space in the dictionary */
|
||||||
let here_ptr = d.here; d.allot(2);
|
let here_ptr = d.here; d.allot(2);
|
||||||
d.entry(); d.name(4, *b"her"); let here = d.here;
|
d.entry(); d.name(4, *b"her"); let here = d.here;
|
||||||
forth!(Literal(here_ptr), RET);
|
forth!(Literal(here_ptr), RET);
|
||||||
|
|
||||||
// state ( -- a )
|
// state ( -- a )
|
||||||
// Address of "state" variable. This variable stores -1 if
|
/* Address of "state" variable. This variable stores -1 if
|
||||||
// interpreting or 0 if compiling.
|
* interpreting or 0 if compiling. */
|
||||||
let state_ptr = d.here; d.allot(2);
|
let state_ptr = d.here; d.allot(2);
|
||||||
d.entry(); d.name(5, *b"sta"); let state = d.here;
|
d.entry(); d.name(5, *b"sta"); let state = d.here;
|
||||||
forth!(Literal(state_ptr), RET);
|
forth!(Literal(state_ptr), RET);
|
||||||
|
@ -906,7 +1351,8 @@ fn build_dictionary(c: &mut Core) {
|
||||||
d.entry(); d.name(1 | 0x80, *b"; ");
|
d.entry(); d.name(1 | 0x80, *b"; ");
|
||||||
forth!(Literal(!(RET as u16)), INV, comma, lbracket, unsmudge, RET);
|
forth!(Literal(!(RET as u16)), INV, comma, lbracket, unsmudge, RET);
|
||||||
|
|
||||||
// Finally put the primitives in the dictionary so they can be called directly.
|
/* Finally put the primitives in the dictionary so they can be
|
||||||
|
* called interactively. */
|
||||||
d.entry(); d.name(3, *b"ret"); forth!(RTO, DRP, RET);
|
d.entry(); d.name(3, *b"ret"); forth!(RTO, DRP, RET);
|
||||||
d.entry(); d.name(2, *b">r "); forth!(RTO, SWP, TOR, TOR, RET);
|
d.entry(); d.name(2, *b">r "); forth!(RTO, SWP, TOR, TOR, RET);
|
||||||
d.entry(); d.name(2, *b"r> "); forth!(RTO, RTO, SWP, TOR, RET);
|
d.entry(); d.name(2, *b"r> "); forth!(RTO, RTO, SWP, TOR, RET);
|
||||||
|
|
Loading…
Reference in a new issue