mirror of
https://gitlab.cs.washington.edu/fidelp/frustration.git
synced 2024-11-16 07:48:10 +01:00
1st draft + 1st pass proofreading
This commit is contained in:
parent
c57c321871
commit
bd1234713e
3 changed files with 205 additions and 130 deletions
|
@ -1,2 +1,2 @@
|
|||
Forth in Rust.
|
||||
|
||||
Please read the comments in frustration.md and then frustration.4th.
|
||||
All is explained.
|
||||
|
|
|
@ -54,8 +54,8 @@ Let's lay out a tail-recursive word in memory like this:
|
|||
[ Subroutine call to B ]
|
||||
[ RET ] <---------------------------------------------- C
|
||||
|
||||
The loop[ word compiles the two subroutine calls at the start of the code field.
|
||||
The ]loop word compiles the one subroutine call at the end of the word.
|
||||
The loop[ word puts the two subroutine calls at the start of the code field.
|
||||
The ]loop word puts the one subroutine call at the end of the word.
|
||||
Later we will talk about how they managed to do that.
|
||||
|
||||
The rdup word is the "subroutine to duplicate the return address".
|
||||
|
|
327
frustration.rs
327
frustration.rs
|
@ -35,8 +35,9 @@ use std::convert::TryInto;
|
|||
* program? Can you write a program that didn't descend from
|
||||
* a pre-existing program? You can learn a bit and have a
|
||||
* lot of fun trying."
|
||||
|
||||
* -- Chuck Moore, "Programming a Problem-Oriented Language", 1970
|
||||
* https://colorforth.github.io/POL.htm
|
||||
* https://colorforth.github.io/POL.htm
|
||||
*
|
||||
* As you will see, it does not take much work to get Forth running on a
|
||||
* new machine, including a machine with a completely unfamiliar instruction
|
||||
|
@ -49,8 +50,9 @@ use std::convert::TryInto;
|
|||
* Part 1 - The Computer
|
||||
* ------------------------------------------------------------------------ */
|
||||
|
||||
/* This computer will have a 16-bit CPU. It will be able to address
|
||||
/* This computer will have a 16-bit CPU. It will be able to access
|
||||
* 2^16 (65536) memory locations, numbered 0 to 65535.
|
||||
* Each of these locations, 0 to 65535, is called a "memory address".
|
||||
*/
|
||||
const ADDRESS_SPACE: usize = 65536;
|
||||
|
||||
|
@ -75,11 +77,16 @@ const ADDRESS_SPACE: usize = 65536;
|
|||
* when to dump register contents back into memory ("spilling").
|
||||
*
|
||||
* Our CPU avoids these problems by not having registers; instead we store
|
||||
* numbers in a stack. The CPU can only access the value that was most
|
||||
* recently pushed onto the stack. This may seem like a big limitation
|
||||
* right now but you will see ways of dealing with it.
|
||||
* numbers in a stack.
|
||||
* - Putting a number onto the top of the stack is called "push".
|
||||
* - Taking the most recent number off the top of the stack is called "pop".
|
||||
*
|
||||
* This makes our CPU a "stack machine" as opposed to a "register machine".
|
||||
* The CPU can only access the value that was most recently pushed onto the
|
||||
* stack. This may seem like a big limitation right now but you will see ways
|
||||
* of dealing with it.
|
||||
*
|
||||
* The choice to use a stack instead of registers makes our CPU a
|
||||
* "stack machine" as opposed to a "register machine".
|
||||
*/
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -92,16 +99,18 @@ impl<const N: usize> Stack<N> {
|
|||
/* Add a number to the stack. */
|
||||
fn push(&mut self, val: u16) {
|
||||
self.tos = (self.tos.wrapping_add(1)) & (N - 1);
|
||||
|
||||
/* This stack is fixed-size and can hold N values.
|
||||
*
|
||||
* When a fixed-size stack fills up, there is a failure case
|
||||
* (stack overflow) that must be handled somehow.
|
||||
*
|
||||
* In this stack, the bitwise & makes it a circular stack, meaning, if
|
||||
* This particular stack is a circular stack, meaning that if
|
||||
* it ever fills up, it will discard the oldest entry instead of
|
||||
* signaling an error. The lack of error handling makes the CPU
|
||||
* simpler.
|
||||
*/
|
||||
|
||||
self.mem[self.tos] = val;
|
||||
}
|
||||
|
||||
|
@ -109,10 +118,12 @@ impl<const N: usize> Stack<N> {
|
|||
fn pop(&mut self) -> u16 {
|
||||
let val = self.mem[self.tos];
|
||||
self.mem[self.tos] = 0;
|
||||
|
||||
/* You don't have to set the value back to zero. I am only doing
|
||||
* this because it makes makes the stack look nicer when dumped
|
||||
* out with print!().
|
||||
*/
|
||||
|
||||
self.tos = (self.tos.wrapping_sub(1)) & (N - 1);
|
||||
return val;
|
||||
}
|
||||
|
@ -122,8 +133,8 @@ impl<const N: usize> Stack<N> {
|
|||
*
|
||||
* Why two stacks?
|
||||
*
|
||||
* The first stack will be called the "data stack" and is used instead of
|
||||
* registers as already described.
|
||||
* The first stack is called the "data stack" and is used instead of
|
||||
* registers, as already described.
|
||||
*
|
||||
* The second stack will be called the "return stack". This one holds
|
||||
* subroutine return addresses. Don't worry if you don't know what that
|
||||
|
@ -141,6 +152,7 @@ impl<const N: usize> Stack<N> {
|
|||
|
||||
struct Core {
|
||||
ram: [u8; ADDRESS_SPACE],
|
||||
|
||||
/* In our memory, each of the 65536 possible memory addresses will store
|
||||
* one 8-bit byte (u8 data type in Rust). This makes it a 65536 byte
|
||||
* (64 KB) memory.
|
||||
|
@ -151,6 +163,7 @@ struct Core {
|
|||
* Instead we are going with the "byte-addressed memory" that is more
|
||||
* conventional in today's computers. This choice is arbitrary.
|
||||
*/
|
||||
|
||||
ip: u16, /* instruction pointer */
|
||||
dstack: Stack<16>, /* data stack */
|
||||
rstack: Stack<32> /* return stack */
|
||||
|
@ -164,11 +177,13 @@ fn new_core() -> Core {
|
|||
ip: 0,
|
||||
dstack: Stack {tos: 15, mem: [0; 16]},
|
||||
rstack: Stack {tos: 31, mem: [0; 32]}};
|
||||
|
||||
/* Because these are circular stacks it doesn't matter where top-of-stack
|
||||
* starts off pointing. I arbitrarily set it to the highest index so
|
||||
* the first value pushed will wind up at index 0, again because this
|
||||
* makes the stack look nicer when printed out.
|
||||
*/
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
@ -202,13 +217,14 @@ impl Core {
|
|||
/* Helper to read a number from the specified memory address. */
|
||||
fn load(&self, addr: u16) -> u16 {
|
||||
let a = addr as usize;
|
||||
|
||||
/* We immediately run into trouble because we are using byte-addressed
|
||||
* memory as mentioned earlier.
|
||||
*
|
||||
* Each memory location stores 8 bits (a byte)
|
||||
*
|
||||
* Our CPU operates on 16 bit values and we want each memory operation
|
||||
* to take 16 bits at a time for efficiency reasons.
|
||||
* to read/write 16 bits at a time for efficiency reasons.
|
||||
*
|
||||
* What do we do?
|
||||
*
|
||||
|
@ -236,7 +252,9 @@ impl Core {
|
|||
*
|
||||
* This is called "big endian" because the high byte comes first.
|
||||
*/
|
||||
|
||||
return u16::from_le_bytes(self.ram[a..=a+1].try_into().unwrap());
|
||||
|
||||
/* The le in this function call stands for little-endian. */
|
||||
}
|
||||
|
||||
|
@ -316,7 +334,7 @@ impl Core {
|
|||
* 104 [5] []
|
||||
*
|
||||
* The return stack is there to make sure that returning from a subroutine
|
||||
* picks up where the caller left off. We will talk more about the return
|
||||
* goes back to where it came from. We will talk more about the return
|
||||
* stack later when we talk about the RET instruction.
|
||||
*
|
||||
* Limitations of CALL:
|
||||
|
@ -366,8 +384,8 @@ impl Core {
|
|||
* Program will often need to deal with constant numbers.
|
||||
* For example, you might want to add 2 to a memory address (to move
|
||||
* on to the next even-numbered address) or add 32 to a character code
|
||||
* (to convert it to lower-case). These constants have to come from
|
||||
* somewhere.
|
||||
* (to convert an uppercase letter to lowercase). These constants have
|
||||
* to come from somewhere.
|
||||
*
|
||||
* Limitations of LITERAL:
|
||||
* -----------------------
|
||||
|
@ -381,16 +399,22 @@ impl Core {
|
|||
|
||||
/* Now that the instruction set is generally described
|
||||
* let's look at the code that implements it */
|
||||
|
||||
fn step(&mut self) {
|
||||
|
||||
/* 1. Fetch the instruction.
|
||||
* Also advance ip to point at the next instruction for next time. */
|
||||
|
||||
let opcode = self.load(self.ip);
|
||||
self.ip = self.ip.wrapping_add(2);
|
||||
|
||||
/* 2. Decode and execute the instruction */
|
||||
|
||||
if (opcode >= 0xffe0) && (opcode & 1 == 0) {
|
||||
/* Data processing instruction */
|
||||
|
||||
PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self);
|
||||
|
||||
/* These instructions get looked up in a table. The bit
|
||||
* math converts the instruction code into an index in the
|
||||
* table as follows:
|
||||
|
@ -453,6 +477,7 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
let v = x.dstack.pop();
|
||||
x.store(a, v);
|
||||
},
|
||||
|
||||
/* Stack shuffling instructions
|
||||
*
|
||||
* Remember the problem of "register allocation" mentioned earlier,
|
||||
|
@ -465,21 +490,22 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
* Their use will become more obvious when we start programming the
|
||||
* machine, soon.
|
||||
*/
|
||||
|
||||
| x | {
|
||||
// DUP - Duplicate the top number on the data stack
|
||||
/* DUP - Duplicate the top number on the data stack */
|
||||
let v = x.dstack.pop();
|
||||
x.dstack.push(v);
|
||||
x.dstack.push(v);
|
||||
},
|
||||
| x | {
|
||||
// SWP - Exchange the top two numbers on the data stack
|
||||
/* SWP - Exchange the top two numbers on the data stack */
|
||||
let v1 = x.dstack.pop();
|
||||
let v2 = x.dstack.pop();
|
||||
x.dstack.push(v1);
|
||||
x.dstack.push(v2);
|
||||
},
|
||||
| x | {
|
||||
// DRP - Discard the top number on the data stack
|
||||
/* DRP - Discard the top number on the data stack */
|
||||
let _ = x.dstack.pop();
|
||||
},
|
||||
/* Conditional skip instruction */
|
||||
|
@ -491,16 +517,18 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
* has. This means that all "if-then" logic, counted loops, etc.
|
||||
* will be built using Q.
|
||||
*/
|
||||
|
||||
let f = x.dstack.pop();
|
||||
if f == 0 {
|
||||
x.ip = x.ip.wrapping_add(2)
|
||||
|
||||
/* Because all of our instructions are two bytes, adding two
|
||||
* to the instruction pointer skips the next instruction. */
|
||||
};
|
||||
},
|
||||
/* Arithmetic and logic */
|
||||
| x | {
|
||||
// ADD - Sum the top two numbers on the data stack.
|
||||
/* ADD - Sum the top two numbers on the data stack. */
|
||||
let v1 = x.dstack.pop();
|
||||
let v2 = x.dstack.pop();
|
||||
x.dstack.push(v1.wrapping_add(v2));
|
||||
|
@ -509,6 +537,7 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
/* SFT - Bit shift number left or right by the specified amount.
|
||||
* A positive shift amount will shift left, negative will shift right.
|
||||
*/
|
||||
|
||||
let amt = x.dstack.pop();
|
||||
let val = x.dstack.pop();
|
||||
x.dstack.push(
|
||||
|
@ -534,6 +563,7 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
| x | { // INV - Bitwise-invert the top number on the data stack.
|
||||
let v1 = x.dstack.pop();
|
||||
x.dstack.push(!v1);
|
||||
|
||||
/* You can use the INV instruction to compensate for the LITERAL
|
||||
* instruction's inability to encode constants 32768 to 65535.
|
||||
* Use two instructions instead:
|
||||
|
@ -550,6 +580,7 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
let v1 = x.dstack.pop();
|
||||
x.dstack.push(if v1 >= v2 { 0xffff } else { 0 });
|
||||
},
|
||||
|
||||
/* Input/output.
|
||||
*
|
||||
* The CPU needs some way to communicate with the outside world.
|
||||
|
@ -569,13 +600,16 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
* This is a fake software CPU so I am going to hook it up to
|
||||
* stdin and stdout.
|
||||
*/
|
||||
|
||||
| x | { // IO - Write/read a number from/to input/output port.
|
||||
let port = x.dstack.pop();
|
||||
/* I'm loosely following a convention where even ports are inputs
|
||||
|
||||
/* I'm loosely following a pattern in which even ports are inputs
|
||||
* and odd ports are outputs. But each port acts different.
|
||||
* In a hardware CPU this would not be suitable but it is fine for
|
||||
* a software emulation.
|
||||
*/
|
||||
|
||||
match port {
|
||||
0 => {
|
||||
/* Push a character from stdin onto the data stack */
|
||||
|
@ -593,7 +627,9 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
}
|
||||
2 => {
|
||||
/* Dump CPU status.
|
||||
* Like the front panel on a 1960s-1970s minicomputer. */
|
||||
* Like the front panel with the blinking lights that Chuck
|
||||
* talked about. */
|
||||
|
||||
println!("{:?} {:?}", x.dstack, x.rstack);
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
|
@ -606,15 +642,7 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
* Part 2 - The Program
|
||||
* ------------------------------------------------------------------------ */
|
||||
|
||||
/* "In a sense we're building a tree. We've now reached a
|
||||
* point where we can start making the roots. For a while
|
||||
* everything will be concealed but we'll eventually reach
|
||||
* daylight and start on branches."
|
||||
*
|
||||
* -- Chuck Moore, "Programming a Problem-Oriented Language", 1970
|
||||
* https://colorforth.github.io/POL.htm
|
||||
*
|
||||
* You now have an unfamiliar computer with no software. Can you and the
|
||||
/* You now have an unfamiliar computer with no software. Can you and the
|
||||
* computer write a program?
|
||||
*
|
||||
* The first program is the hardest to write because you don't have any tools to
|
||||
|
@ -646,7 +674,7 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
* address for you and call it.
|
||||
*
|
||||
* The dictionary starts at a low address and grows towards high addresses.
|
||||
* It is organized like a linked-list, like this:
|
||||
* It is organized as a linked list, like this:
|
||||
*
|
||||
* [Link field][Name][Code .......... ]
|
||||
* ^
|
||||
|
@ -673,7 +701,7 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
* of the CPU instructions is "call". So you can have a subroutine
|
||||
* that call other subroutines, or calls itself.
|
||||
*
|
||||
* This code must end with a return (RET) instruction.
|
||||
* This code should end with a return (RET) instruction.
|
||||
*
|
||||
* Example subroutine:
|
||||
*
|
||||
|
@ -707,8 +735,6 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
* | |
|
||||
* [Dictionary pointer] [Here]
|
||||
*
|
||||
* Got all that?
|
||||
*
|
||||
* To create our Forth interactive programmming environment, we will start
|
||||
* by defining subroutines that:
|
||||
* - read names from the keyboard
|
||||
|
@ -717,7 +743,7 @@ const PRIMITIVES: [Primitive; 16] = [
|
|||
* We will put these subroutines themselves in the dictionary so they are
|
||||
* available for use once our interactive environment is up and running!
|
||||
*
|
||||
* If you were sitting in front of a microcomputer in 196x you would need
|
||||
* If you were sitting in front of a minicomputer in 196x you would need
|
||||
* to create the dictionary with pencil and paper, but in 20xx we will
|
||||
* write a Rust program to help create the dictionary.
|
||||
*
|
||||
|
@ -743,19 +769,23 @@ impl From<u16> for Item { fn from(a: u16) -> Self { Item::Call(a) } }
|
|||
impl From<Op> for Item { fn from(o: Op) -> Self { Item::Opcode(o) } }
|
||||
|
||||
impl Dict<'_> {
|
||||
|
||||
/* Helper to reserve space in the dictionary by advancing the "here"
|
||||
* pointer */
|
||||
|
||||
fn allot(&mut self, n: u16) {
|
||||
self.here = self.here.wrapping_add(n);
|
||||
}
|
||||
|
||||
/* Helper to append a 16 bit integer to the dictionary */
|
||||
|
||||
fn comma(&mut self, val: u16) {
|
||||
self.c.store(self.here, val);
|
||||
self.allot(2);
|
||||
}
|
||||
|
||||
/* Helper to append a CPU instruction to the dictionary */
|
||||
|
||||
fn emit<T: Into<Item>>(&mut self, val: T) {
|
||||
match val.into() {
|
||||
Item::Call(val) => { self.comma(val) }
|
||||
|
@ -786,6 +816,7 @@ impl Dict<'_> {
|
|||
* of how many letters are in the name, and the link field is two bytes.
|
||||
* This means a dictionary header in this Forth is always six bytes.
|
||||
*/
|
||||
|
||||
fn name(&mut self, n: u8, val: [u8; 3]) {
|
||||
/* Store the length and the first character */
|
||||
self.comma(n as u16 | ((val[0] as u16) << 8));
|
||||
|
@ -795,6 +826,7 @@ impl Dict<'_> {
|
|||
|
||||
/* Helper to append a new link field to the dictionary and update the
|
||||
* dictionary pointer appropriately. */
|
||||
|
||||
fn entry(&mut self) {
|
||||
let here = self.here;
|
||||
self.comma(self.dp);
|
||||
|
@ -809,6 +841,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
|
||||
let mut d = Dict {
|
||||
dp: 0, /* Nothing in the dictionary yet */
|
||||
|
||||
here: 2, /* Reserve address 0 as an "entry point", i.e. where the
|
||||
CPU will jump to start running Forth. We don't have a
|
||||
Forth interpreter yet so we'll leave address 0 alone for
|
||||
|
@ -846,6 +879,9 @@ fn build_dictionary(c: &mut Core) {
|
|||
* at address 1250
|
||||
* RET 65504 Return to caller
|
||||
*
|
||||
* Both of these are valid machine code programs (list of numbers that
|
||||
* our CPU can directly execute).
|
||||
*
|
||||
* This duality between CPU instructions and Forth code comes from
|
||||
* an idea called "subroutine threading". It is a refinement of an
|
||||
* idea called "threaded code". This has no relation to the kind of
|
||||
|
@ -855,9 +891,8 @@ fn build_dictionary(c: &mut Core) {
|
|||
*
|
||||
* Our new language starts out with the sixteen (well, eighteen)
|
||||
* instructions built into the CPU. We can string those instructions
|
||||
* together into a new subroutine. This subroutine adds to the pool
|
||||
* of functions we have available for making new subroutines.
|
||||
*
|
||||
* together into a new subroutine. Each new subroutine adds to the
|
||||
* toolbox we have available for making the next new subroutine.
|
||||
* Repeat until you have built what you wanted to build, via
|
||||
* function composition. This is the idea behind Forth.
|
||||
*/
|
||||
|
@ -923,6 +958,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
|
||||
RET /* Compile a RET instruction */
|
||||
);
|
||||
|
||||
/* We have now compiled the "key" subroutine into the dictionary.
|
||||
* [Link field][Name][Code .......... ]
|
||||
* 0000 3key 1, 65534, 65504
|
||||
|
@ -984,6 +1020,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
|
||||
RET /* Done, return to caller, leaving n on the data stack. */
|
||||
);
|
||||
|
||||
/* Writing it out like that takes a lot of space. Normally Forth code
|
||||
* is written on a single line, like this:
|
||||
*
|
||||
|
@ -1132,16 +1169,18 @@ fn build_dictionary(c: &mut Core) {
|
|||
* real Forth systems. My dialect of Forth will use it extensively to work
|
||||
* around my CPU's lack of conditional jump.
|
||||
*
|
||||
* Now we've explained how 0= is going to work, let's make it.
|
||||
* Now we've explained how 0= is going to work, let's write it.
|
||||
*/
|
||||
|
||||
/* First we define the helper. It won't be reused, so I am not going
|
||||
* to bother giving it a dictionary header and name for easy lookup later.
|
||||
* Think of it as a private function. */
|
||||
|
||||
let zero = d.here;
|
||||
forth!(Literal(0), RTO, DRP, RET);
|
||||
|
||||
/* Now define 0= using the helper. */
|
||||
|
||||
// 0= ( n -- f )
|
||||
d.entry(); d.name(2, *b"0= "); let zero_eq = d.here;
|
||||
forth!(Q, zero, Literal(0), INV, RET);
|
||||
|
@ -1150,9 +1189,11 @@ fn build_dictionary(c: &mut Core) {
|
|||
* I call it an "operator" because that's what other languages would
|
||||
* call it, but Forth has no special idea of an "operator". Everything
|
||||
* is just words. */
|
||||
|
||||
// = ( a b -- a=b )
|
||||
d.entry(); d.name(1, *b"= "); let eq = d.here;
|
||||
forth!(sub, zero_eq, RET);
|
||||
|
||||
/* Note that 0= and subtract are both words, not CPU instructions.
|
||||
* This makes = the first "pure" Forth word we have defined, with no
|
||||
* direct dependency on the machine's instruction set.
|
||||
|
@ -1181,17 +1222,17 @@ fn build_dictionary(c: &mut Core) {
|
|||
* 2. Read characters into a buffer until whitespace is seen again.
|
||||
*/
|
||||
|
||||
/* Let's start with the "advance past whitespace" part
|
||||
/* Let's start with the "advance past leading whitespace" part
|
||||
*
|
||||
* The "key" word gives us the latest keystroke as an ASCII code.
|
||||
* (Really it is reading utf-8 characters one byte at a time but let's
|
||||
* not get into that right now, it's 196x and utf-8 hasn't been invented
|
||||
* yet.)
|
||||
* not get into that right now, pretend the year is 196x, we're sitting
|
||||
* in front of a minicomputer and and utf-8 hasn't been invented yet.)
|
||||
*
|
||||
* ASCII codes 0 to 32 are whitespace or control characters. Codes
|
||||
* 33 and up are letters, numbers and symbols. So to skip whitespace
|
||||
* all you need to do is read keys until you get an ASCII code >= 33,
|
||||
* then return that, to tell the rest of the program what key code you
|
||||
* then return that to tell the rest of the program what key code you
|
||||
* saw.
|
||||
*
|
||||
* In Rust this could be implemented as:
|
||||
|
@ -1218,7 +1259,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
* ret
|
||||
*
|
||||
* (More alarm bells should be ringing in your head because this is
|
||||
* using conditional jump.)
|
||||
* using conditional jump, which our CPU doesn't have.)
|
||||
*
|
||||
* Like last time, is there a way to solve this without conditional
|
||||
* jump?
|
||||
|
@ -1245,9 +1286,9 @@ fn build_dictionary(c: &mut Core) {
|
|||
*
|
||||
* This code almost works but there is still something wrong with it.
|
||||
* Youll notice we were careful to make sure "skipws" removed all items
|
||||
* it added to the data stack, before it called itself.
|
||||
* it added to the data stack, before it called itself. Its last two
|
||||
* lines were:
|
||||
*
|
||||
* ...
|
||||
* DRP <-- Discard c from the stack
|
||||
* skipws <-- Call skipws again
|
||||
*
|
||||
|
@ -1267,12 +1308,13 @@ fn build_dictionary(c: &mut Core) {
|
|||
* our CPU's data stack only has room for 16 numbers.
|
||||
*
|
||||
* For these reasons it's better to leave the data stack as we found it,
|
||||
* when we do a recursive call. That is the reason for the DRP skipws.
|
||||
* The last call does not DRP so it is the only one that will leave
|
||||
* something on the data stack -- the last key read.
|
||||
* when we do a recursive call. That is the reason the last two lines are
|
||||
* DRP, skipws -- it's to stop items building up on the data stack. The
|
||||
* final pass through this function goes down a different path that does
|
||||
* not DRP, so it leaves something on the data stack -- the last key read.
|
||||
*
|
||||
* The problem skipws still has, is that we haven't taken the same care
|
||||
* with its return address.
|
||||
* with its return stack.
|
||||
*
|
||||
* At the first line of skipws the return stack looks like this:
|
||||
* ( caller )
|
||||
|
@ -1332,7 +1374,8 @@ fn build_dictionary(c: &mut Core) {
|
|||
*
|
||||
* Our CPU has a fixed-size circular return stack that can hold 32 numbers.
|
||||
* What happens if you loop 32 times or more? The return stack fills up
|
||||
* with the useless "x" addresses and the address of caller is lost.
|
||||
* completely with the useless "x" addresses, and the address of caller
|
||||
* is lost.
|
||||
*
|
||||
* recursive call N : return stack: ( caller x x x ... x )
|
||||
* recursive call N+1: return stack: ( x x x x ... x ) :-(
|
||||
|
@ -1349,19 +1392,18 @@ fn build_dictionary(c: &mut Core) {
|
|||
* The most common solution is called "tail call optimization".
|
||||
* If a function's last instruction is a recursive call, that call can be
|
||||
* replaced with a jump. On paper this doesn't work very well on our
|
||||
* computer:
|
||||
* computer, for two reasons:
|
||||
*
|
||||
* - We have no jump, only call.
|
||||
* 1. Our CPU has no jump, only call.
|
||||
*
|
||||
* - Our assembler, and eventually our interactive environment, would need
|
||||
* to be smart enough to emit a call sometimes and a jump other times.
|
||||
* 2. Our assembler, and eventually our interactive environment, would need
|
||||
* to be smart enough to emit a call sometimes and a jump other times.
|
||||
* This is the same "look-ahead" problem that we saw with forward
|
||||
* references -- you don't know that a given CALL will be followed by a
|
||||
* RET, unless you can see the future.
|
||||
*
|
||||
* This is the same "look-ahead" problem that we saw with forward
|
||||
* references -- you don't know that a given CALL will be followed by a
|
||||
* RET, unless you can see the future.
|
||||
*
|
||||
* Earlier we decided to keep our assembler very dumb so it would be
|
||||
* weird to start making it smart now.
|
||||
* Earlier we decided to keep our assembler very dumb so it would be
|
||||
* weird to start making it smart now.
|
||||
*
|
||||
* So what are we going to do?
|
||||
*
|
||||
|
@ -1377,7 +1419,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
*
|
||||
* So now recursive calls will leave the return-stack as they found it,
|
||||
* which is good! We don't have the useless-x problem any more.
|
||||
* Unfortunately, the first pass through skipws pops off the original
|
||||
* Unfortunately, the first pass through skipws discards the original
|
||||
* caller's return address, which we wanted to keep. There is a quick
|
||||
* hack around that problem: wrap skipws in another subroutine, and
|
||||
* always call it through that wrapper.
|
||||
|
@ -1390,20 +1432,19 @@ fn build_dictionary(c: &mut Core) {
|
|||
*
|
||||
* Finally we are able to write loops, and we did not even need to add
|
||||
* anything to our language or CPU to get that to work, we just needed to
|
||||
* look at things differently.
|
||||
* look at things differently. Learning to look at things differently is a
|
||||
* big part of the Forth philosophy.
|
||||
*
|
||||
* Learning to look at things differently is a big part of the Forth
|
||||
* philosphy.
|
||||
*
|
||||
* We'll see a better way of solving this problem later, but for now this
|
||||
* is good enough and we can get back to solving our original problem,
|
||||
* skipping whitespace.
|
||||
* We'll see a better way of solving this problem later, in the file
|
||||
* frustration.4th, but for now this is good enough and we can get back to
|
||||
* solving our original problem, skipping whitespace.
|
||||
*/
|
||||
|
||||
/* You should now understand what the next two functions are doing
|
||||
* because we just talked about them at length. In the real program
|
||||
* I swapped the names of the two functions because I wanted to let the
|
||||
* wrapper have the friendly "skipws" name. */
|
||||
|
||||
let skip_helper = d.here;
|
||||
forth!(RTO, DRP, key, DUP, Literal(33), GEQ, Q, RET, DRP, skip_helper);
|
||||
|
||||
|
@ -1429,8 +1470,9 @@ fn build_dictionary(c: &mut Core) {
|
|||
* little as possible, but it will soon be necessary.
|
||||
*
|
||||
* The CPU instruction SWP does stack shuffling by swapping the first
|
||||
* two values on the stack. We already have SWP (it's built into the CPU)
|
||||
* but I will write out its stack effect below as a recap of what it does.
|
||||
* two values on the data stack. We already have SWP (it's built into the
|
||||
* CPU) but I will write out its stack effect below as a recap of what it
|
||||
* does.
|
||||
*
|
||||
* SWP ( a b -- b a ).
|
||||
*
|
||||
|
@ -1463,6 +1505,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
* Start by setting aside the word input buffer. We'll format it as Nabcde
|
||||
* where N is the number of characters stored.
|
||||
*/
|
||||
|
||||
let word_buf = d.here;
|
||||
d.allot(6);
|
||||
|
||||
|
@ -1566,6 +1609,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
* This will tail-recursively call the function we just wrote, until
|
||||
* whitespace is seen again (a character code that is <= 32).
|
||||
*/
|
||||
|
||||
let getcs_helper = d.here;
|
||||
forth!(RTO, DRP, /* The "return-from-caller" trick */
|
||||
stchar,
|
||||
|
@ -1589,17 +1633,24 @@ fn build_dictionary(c: &mut Core) {
|
|||
|
||||
// word ( -- )
|
||||
d.entry(); d.name(4, *b"wor"); let word = d.here;
|
||||
forth!(Literal(word_buf), /* Address of word_buf */
|
||||
DUP, Literal(2), ADD, /* Address of word_buf + 2 */
|
||||
Literal(0x2020), SWP, ST, /* Set name byte 2, 1 to space */
|
||||
Literal(0x2000), SWP, ST, /* Set name byte 0 to space and
|
||||
forth!(
|
||||
Literal(word_buf), /* Address of word_buf */
|
||||
|
||||
DUP, Literal(2), ADD, /* Address of word_buf + 2 */
|
||||
|
||||
Literal(0x2020), SWP, ST, /* Set name bytes 2 and 1 to space */
|
||||
|
||||
Literal(0x2000), SWP, ST, /* Set name byte 0 to space and
|
||||
set length to zero */
|
||||
skipws, /* Lexer step 1, skip leading whitespace */
|
||||
getcs, /* Lexer step 2, read letters into buffer until whitespace
|
||||
|
||||
skipws, /* Lexer step 1, skip leading whitespace */
|
||||
|
||||
getcs, /* Lexer step 2, read letters into buffer until whitespace
|
||||
is seen again */
|
||||
DRP, /* We don't care what whitespace character was last seen
|
||||
|
||||
DRP, /* We don't care what whitespace character was last seen
|
||||
so drop it */
|
||||
RET);
|
||||
RET);
|
||||
|
||||
/* The lexer is now complete: we can read space-delimited words from
|
||||
* the keyboard.
|
||||
|
@ -1655,43 +1706,49 @@ fn build_dictionary(c: &mut Core) {
|
|||
/* Helper word ( a -- f )
|
||||
*/
|
||||
let matches = d.here;
|
||||
forth!(/* Stash the address of the name field by putting it on the
|
||||
* return stack */
|
||||
Literal(2), ADD, TOR,
|
||||
forth!(
|
||||
/* Stash the address of the name field by putting it on the
|
||||
* return stack
|
||||
*/
|
||||
Literal(2), ADD, TOR,
|
||||
|
||||
/* Load the 4 bytes at word_buf */
|
||||
Literal(word_buf), DUP, Literal(2), ADD, LD, SWP, LD,
|
||||
/* Load the 4 bytes at word_buf */
|
||||
Literal(word_buf), DUP, Literal(2), ADD, LD, SWP, LD,
|
||||
|
||||
/* Load the first 2 bytes of the name field */
|
||||
RTO, DUP, TOR, LD,
|
||||
/* Load the first 2 bytes of the name field */
|
||||
RTO, DUP, TOR, LD,
|
||||
|
||||
/* Compare to the first 2 bytes at word_buf.
|
||||
* Don't worry about that bitwise AND: it will be explained later
|
||||
* when we are adding "immediate" words to the outer interpreter. */
|
||||
Literal(0x0080), INV, AND, eq,
|
||||
/* Compare to the first 2 bytes at word_buf.
|
||||
* Don't worry about that bitwise AND: it will be explained later
|
||||
* when we are adding "immediate" words to the outer interpreter.
|
||||
*/
|
||||
Literal(0x0080), INV, AND, eq,
|
||||
|
||||
/* Compare the second 2 bytes of the name field to the second
|
||||
* 2 bytes at word_buf */
|
||||
SWP, RTO, Literal(2), ADD, LD, eq,
|
||||
/* Compare the second 2 bytes of the name field to the second
|
||||
* 2 bytes at word_buf
|
||||
*/
|
||||
SWP, RTO, Literal(2), ADD, LD, eq,
|
||||
|
||||
/* If both comparisons were true, return true, else return false */
|
||||
AND, RET);
|
||||
/* If both comparisons were true, return true, else return false */
|
||||
AND, RET);
|
||||
|
||||
/* Helper word ( a -- a' )
|
||||
*/
|
||||
let matched = d.here;
|
||||
forth!(Literal(6), ADD, /* Advance six bytes (the length of the dictionary
|
||||
header). This advances from the start of the
|
||||
header to the address of the code field. */
|
||||
forth!(
|
||||
Literal(6), ADD, /* Advance six bytes (the length of the dictionary
|
||||
header). This advances from the start of the
|
||||
header to the address of the code field. */
|
||||
|
||||
RTO, DRP, /* Return-from-caller */
|
||||
RET);
|
||||
RTO, DRP, /* Return-from-caller */
|
||||
RET);
|
||||
|
||||
let find_helper = d.here;
|
||||
forth!(RTO, DRP,
|
||||
DUP, Literal(0), eq, Q, RET, /* No match - return 0 */
|
||||
DUP, matches, Q, matched, /* Match - return code field address */
|
||||
LD, find_helper); /* Try the next one */
|
||||
forth!(
|
||||
RTO, DRP,
|
||||
DUP, Literal(0), eq, Q, RET, /* No match - return 0 */
|
||||
DUP, matches, Q, matched, /* Match - return the code address */
|
||||
LD, find_helper); /* Try the next one */
|
||||
|
||||
/* And find itself is just a wrapper around the tail-recursive
|
||||
* find_helper word. */
|
||||
|
@ -1717,7 +1774,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
* at the keyboard.
|
||||
*
|
||||
* Remember that an interactive programming environment needs to let you
|
||||
* do 2 things:
|
||||
* do two things:
|
||||
*
|
||||
* 1. Call subroutines by typing their name at the keyboard
|
||||
* 2. Define new subroutines in terms of existing ones
|
||||
|
@ -1812,7 +1869,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
|
||||
// immediate ( -- )
|
||||
/* Set the "immediate" flag on the latest dictionary entry */
|
||||
d.entry(); d.name(9 | 0x80, *b"imm");
|
||||
d.entry(); d.name(9, *b"imm");
|
||||
forth!(word_addr, DUP, LD, Literal(0x0080), OR, SWP, ST, RET);
|
||||
|
||||
/* Now we can define words to switch between interpreting and compiling
|
||||
|
@ -1835,7 +1892,8 @@ fn build_dictionary(c: &mut Core) {
|
|||
forth!(Literal(0), state, ST, RET);
|
||||
|
||||
/* By setting / unsetting a different bit of the name field we can
|
||||
* temporarily hide a word from name lookups. */
|
||||
* temporarily hide a word from name lookups. We will talk more
|
||||
* about this later. */
|
||||
|
||||
// smudge ( -- )
|
||||
d.entry(); d.name(6 | 0x80, *b"smu"); let smudge = d.here;
|
||||
|
@ -1847,7 +1905,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
|
||||
/* Now let's make a word that appends to the dictionary.
|
||||
* We have had a Rust helper function for this for a long time.
|
||||
* This is the same thing but callable from the Forth side. */
|
||||
* The word below is the same thing but callable from Forth. */
|
||||
|
||||
// , ( n -- )
|
||||
d.entry(); d.name(1, *b", "); let comma = d.here;
|
||||
|
@ -1868,14 +1926,15 @@ fn build_dictionary(c: &mut Core) {
|
|||
|
||||
// x10 ( n -- n*10 )
|
||||
d.entry(); d.name(3, *b"x10"); let x10 = d.here;
|
||||
forth!(DUP, DUP, Literal(3), SFT, /* Find n*8 */
|
||||
ADD, ADD, /* (n*8) + n + n = (n*10) */
|
||||
RET);
|
||||
forth!(
|
||||
DUP, DUP, Literal(3), SFT, /* Find n*8 */
|
||||
ADD, ADD, /* (n*8) + n + n = (n*10) */
|
||||
RET);
|
||||
|
||||
/* Now we can write a word that goes through the input buffer
|
||||
* character by character and converts it to an integer on the stack. */
|
||||
|
||||
// Helper function to clear junk off the stack.
|
||||
/* Helper function to clear junk off the stack. */
|
||||
let end_num = d.here;
|
||||
forth!(DRP, RTO, DRP, RET);
|
||||
|
||||
|
@ -1892,7 +1951,8 @@ fn build_dictionary(c: &mut Core) {
|
|||
|
||||
/* If the character is not in the range 48 to 57
|
||||
* (which are the character codes for '0' to '9')
|
||||
* then this is not a number, so return the error code -1 (65535) */
|
||||
* then this is not a number, so return the error code -1 (65535)
|
||||
*/
|
||||
Literal(48), sub, DUP, Literal(10), GEQ, Q, bad_num,
|
||||
SWP, TOR, SWP, x10, ADD, RTO,
|
||||
|
||||
|
@ -1915,11 +1975,14 @@ fn build_dictionary(c: &mut Core) {
|
|||
forth!(
|
||||
/* If we are in interpreting mode, */
|
||||
state, LD,
|
||||
|
||||
/* then exit immediately, leaving this number on the stack. */
|
||||
Q, RET,
|
||||
|
||||
/* Otherwise, turn it into a LITERAL instruction and append that
|
||||
* to the dictionary, */
|
||||
lit,
|
||||
|
||||
/* and then return-from-caller. */
|
||||
RTO, DRP, RET);
|
||||
|
||||
|
@ -1928,12 +1991,16 @@ fn build_dictionary(c: &mut Core) {
|
|||
forth!(
|
||||
/* If this is an immediate word, */
|
||||
DUP, Literal(4), sub, LD, Literal(0x0080), AND,
|
||||
|
||||
/* or if we are in interpreting mode, */
|
||||
state, LD, OR,
|
||||
|
||||
/* then we should execute this word, not compile it. */
|
||||
Q, RET,
|
||||
|
||||
/* Otherwise, compile it by appending its address to the dictionary, */
|
||||
comma,
|
||||
|
||||
/* and then return-from-caller. */
|
||||
RTO, DRP, RET);
|
||||
|
||||
|
@ -1947,14 +2014,16 @@ fn build_dictionary(c: &mut Core) {
|
|||
forth!(
|
||||
/* When this function concludes, return-from-caller. */
|
||||
RTO, DRP,
|
||||
/* If this word should be compiled it, compile it, */
|
||||
|
||||
/* If this word should be compiled, compile it, */
|
||||
try_compile_call,
|
||||
|
||||
/* otherwise, execute it. */
|
||||
execute, RET);
|
||||
|
||||
/* Forth can have very good error handling. This Forth does not.
|
||||
* If we try to look up a word in the dictionary and can't find it,
|
||||
* and if the word also can't be parsed as an integer,
|
||||
* and if the word also can't be parsed as an number,
|
||||
* then we print out a ? and move on to the next word.
|
||||
*
|
||||
* This helper function does some stack cleanup, prints the ?, then
|
||||
|
@ -1970,9 +2039,11 @@ fn build_dictionary(c: &mut Core) {
|
|||
forth!(
|
||||
/* If the word was found in the dictionary, treat it as a word. */
|
||||
DUP, Q, do_word,
|
||||
|
||||
/* If it wasn't found in the dictionary, try to parse it as a number.
|
||||
* If it isn't a number, flag it as an error. */
|
||||
DRP, number, DUP, Literal(1), ADD, zero_eq, Q, bad,
|
||||
|
||||
/* If it is a number, treat it as a number. */
|
||||
try_compile_lit, RET);
|
||||
|
||||
|
@ -2008,8 +2079,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
* 1. Call subroutines by typing their name at the keyboard
|
||||
* 3. Push numbers onto the data stack by typing them at the keyboard
|
||||
*
|
||||
* But there are still a few more words we'll need if we want to
|
||||
* interactively define new subroutines as specified by:
|
||||
* But there are still a few more words we'll need if we want to:
|
||||
*
|
||||
* 2. Define new subroutines in terms of existing ones
|
||||
*
|
||||
|
@ -2025,8 +2095,10 @@ fn build_dictionary(c: &mut Core) {
|
|||
latest, LD, comma, /* emit the link field */
|
||||
latest, ST, /* point "latest" at us */
|
||||
word, /* read a word from the keyboard */
|
||||
|
||||
/* emit the name field (by copying it from the input buffer) */
|
||||
Literal(word_buf), DUP, LD, comma, Literal(2), ADD, LD, comma,
|
||||
|
||||
RET);
|
||||
|
||||
/* And now, here is the word to compile a new Forth word. */
|
||||
|
@ -2036,9 +2108,11 @@ fn build_dictionary(c: &mut Core) {
|
|||
forth!(
|
||||
/* Read name from keyboard, create dictionary header */
|
||||
create,
|
||||
|
||||
/* Hide the word until we are done defining it. This lets us
|
||||
* redefine a word in terms of a previous incarnation of itself. */
|
||||
smudge,
|
||||
|
||||
/* Switch to compiling mode */
|
||||
rbracket,
|
||||
RET);
|
||||
|
@ -2051,7 +2125,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
d.entry(); d.name(1 | 0x80, *b"; ");
|
||||
forth!(
|
||||
/* Emit a RET instruction. RET = 65504 which is outside of the
|
||||
* Literal instruction's 0 to 32767 range, so you have to store the
|
||||
* LITERAL instruction's 0 to 32767 range, so you have to store the
|
||||
* inverse and use INV to swap it back. */
|
||||
Literal(!(RET as u16)), INV, comma,
|
||||
|
||||
|
@ -2067,7 +2141,8 @@ fn build_dictionary(c: &mut Core) {
|
|||
* interactively from Forth. Instructions that modify the return stack
|
||||
* need special care, because otherwise they will mess up the
|
||||
* wrapper we created for them, instead of acting on the caller
|
||||
* the way they are supposed to. */
|
||||
* the way they are supposed to.
|
||||
*/
|
||||
d.entry(); d.name(3, *b"ret"); forth!(RTO, DRP, RET);
|
||||
d.entry(); d.name(2, *b">r "); forth!(RTO, SWP, TOR, TOR, RET);
|
||||
d.entry(); d.name(2, *b"r> "); forth!(RTO, RTO, SWP, TOR, RET);
|
||||
|
@ -2077,7 +2152,7 @@ fn build_dictionary(c: &mut Core) {
|
|||
d.entry(); d.name(4, *b"swa"); forth!(SWP, RET);
|
||||
d.entry(); d.name(4, *b"dro"); forth!(DRP, RET);
|
||||
|
||||
d.entry(); d.name(1 | 0x80, *b"? "); // This one only works in-line.
|
||||
d.entry(); d.name(1 | 0x80, *b"? "); /* This one only works in-line. */
|
||||
forth!(Literal(!(Q as u16)), INV, comma, RET);
|
||||
|
||||
d.entry(); d.name(1, *b"+ "); forth!(ADD, RET);
|
||||
|
@ -2089,14 +2164,17 @@ fn build_dictionary(c: &mut Core) {
|
|||
d.entry(); d.name(2, *b"io "); forth!(IO, RET);
|
||||
|
||||
/* Update Forth's "latest" and "here" variables to match the ones
|
||||
* we've been tracking in Rust */
|
||||
* we've been tracking in Rust.
|
||||
*/
|
||||
d.c.store(latest_ptr, d.dp);
|
||||
d.c.store(here_ptr, d.here);
|
||||
|
||||
/* Start out in interpreting mode. */
|
||||
/* Start out in interpreting mode.
|
||||
*/
|
||||
d.c.store(state_ptr, 0xffff);
|
||||
|
||||
/* The "entry point" should be the top level interpreter word "quit". */
|
||||
/* The "entry point" should be the top level interpreter word "quit".
|
||||
*/
|
||||
d.c.store(0, quit);
|
||||
}
|
||||
|
||||
|
@ -2114,8 +2192,6 @@ fn main() {
|
|||
}
|
||||
}
|
||||
|
||||
/* We're not done. */
|
||||
|
||||
/* "The next step is a problem-oriented-language. By permitting
|
||||
* the program to dynamically modify its control language, we
|
||||
* mark a qualitative change in capability. We also change our
|
||||
|
@ -2125,11 +2201,10 @@ fn main() {
|
|||
* solution."
|
||||
*
|
||||
* -- Chuck Moore, "Programming a Problem-Oriented Language", 1970
|
||||
* https://colorforth.github.io/POL.htm
|
||||
*/
|
||||
|
||||
/* The next step is to start programming in "real" Forth, not a weird
|
||||
* macro language inside Rust.
|
||||
/* Now we can start programming in "real" Forth, not a weird macro language
|
||||
* inside Rust.
|
||||
*
|
||||
* You can compile this Forth with:
|
||||
* rustc frustration.rs
|
||||
|
|
Loading…
Reference in a new issue