From 07fee611a64d0c46b94c9f531378642971828d58 Mon Sep 17 00:00:00 2001 From: psf Date: Sat, 21 May 2022 01:50:46 -0700 Subject: [PATCH] Writeup, part 2, partially done --- frustration.rs | 582 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 514 insertions(+), 68 deletions(-) diff --git a/frustration.rs b/frustration.rs index 292795a..8f4a716 100644 --- a/frustration.rs +++ b/frustration.rs @@ -62,18 +62,17 @@ const ADDRESS_SPACE: usize = 65536; * * In most CPUs, this place is called a "register". Registers work like * variables in a programming language but there are only a few of them - * (between 1 to 32 depending on your CPU). + * (most CPUs have between 1 and 32). * * On 64-bit ARM the registers are named r0, r1, ..., r15. - * On 64-bit Intel they are instead named rax, rbx, ..., etc. + * On 64-bit Intel they are instead named rax, rbx, .... + * Just in case those names ring any bells. * - * Having immediate access to many registers is quite handy, but it means + * Having immediate access to dozens of registers is quite handy, but it means * many choices are available to the programmer, or more likely, to the - * compiler. And making good choices is Hard. - * - * A lot of work goes into deciding what variable to store in what register - * ("register allocation") and when to dump register contents back into - * memory ("spilling"). + * compiler. And making good choices is Hard. A lot of work goes into + * deciding what variable to store in what register ("register allocation") and + * when to dump register contents back into memory ("spilling"). * * Our CPU avoids these problems by not having registers; instead we store * numbers in a stack. The CPU can only access the value that was most @@ -86,14 +85,14 @@ const ADDRESS_SPACE: usize = 65536; #[derive(Debug)] struct Stack { mem: [u16; N], - tos: usize // top-of-stack. + tos: usize /* top-of-stack */ } impl Stack { - // Add a number to the stack. + /* Add a number to the stack. */ fn push(&mut self, val: u16) { self.tos = (self.tos.wrapping_add(1)) & (N - 1); - /* This stack is fixed-sized and can hold N values. + /* This stack is fixed-size and can hold N values. * * When a fixed-size stack fills up, there is a failure case * (stack overflow) that must be handled somehow. @@ -106,7 +105,7 @@ impl Stack { self.mem[self.tos] = val; } - // Return the most recently pushed number. + /* Return the most recently pushed number. */ fn pop(&mut self) -> u16 { let val = self.mem[self.tos]; self.mem[self.tos] = 0; @@ -152,12 +151,12 @@ struct Core { * Instead we are going with the "byte-addressed memory" that is more * conventional in today's computers. This choice is arbitrary. */ - ip: u16, // instruction pointer - dstack: Stack<16>, // data stack - rstack: Stack<32> // return stack + ip: u16, /* instruction pointer */ + dstack: Stack<16>, /* data stack */ + rstack: Stack<32> /* return stack */ } -/* Function to initialize the cpu. +/* Helper to initialize the cpu. * There is probably a better idiom for this but I am bad at rust */ fn new_core() -> Core { let c = Core { @@ -200,7 +199,7 @@ fn new_core() -> Core { */ impl Core { - /* Helper function - Read a number from the specified memory address. */ + /* Helper to read a number from the specified memory address. */ fn load(&self, addr: u16) -> u16 { let a = addr as usize; /* We immediately run into trouble because we are using byte-addressed @@ -241,7 +240,7 @@ impl Core { /* The le in this function call stands for little-endian. */ } - /* Helper function - Write a number to the specified memory address. */ + /* Helper to write a number to the specified memory address. */ fn store(&mut self, addr: u16, val: u16) { let a = addr as usize; self.ram[a..=a+1].copy_from_slice(&val.to_le_bytes()); @@ -383,14 +382,14 @@ impl Core { /* Now that the instruction set is generally described * let's look at the code that implements it */ fn step(&mut self) { - // 1. Fetch the instruction. - // Also advance ip to point at the next instruction for next time. + /* 1. Fetch the instruction. + * Also advance ip to point at the next instruction for next time. */ let opcode = self.load(self.ip); self.ip = self.ip.wrapping_add(2); - // 2. Decode and execute the instruction + /* 2. Decode and execute the instruction */ if (opcode >= 0xffe0) && (opcode & 1 == 0) { - // Data processing instruction + /* Data processing instruction */ PRIMITIVES[((opcode - 0xffe0) >> 1) as usize](self); /* These instructions get looked up in a table. The bit * math converts the instruction code into an index in the @@ -406,18 +405,18 @@ impl Core { */ } else if (opcode & 1) == 1 { - // Literal + /* Literal */ self.dstack.push(opcode >> 1); } else { - // Call + /* Call */ self.rstack.push(self.ip); self.ip = opcode; } } } -// The names of the 16 remaining CPU instructions +/* The names of the 16 remaining CPU instructions */ enum Op { RET = 0xffe0, TOR = 0xffe2, RTO = 0xffe4, LD = 0xffe6, ST = 0xffe8, DUP = 0xffea, SWP = 0xffec, DRP = 0xffee, @@ -427,29 +426,29 @@ enum Op { type Primitive = fn(&mut Core); -// A table of functions for each of the 16 remaining CPU instructions +/* A table of functions for each of the 16 remaining CPU instructions */ const PRIMITIVES: [Primitive; 16] = [ /* Return-stack instructions */ | x | { - // RET - Return from subroutine + /* RET - Return from subroutine */ x.ip = x.rstack.pop() }, | x | { - // TOR - Transfer number from data stack to return stack + /* TOR - Transfer number from data stack to return stack */ x.rstack.push(x.dstack.pop()) }, | x | { - // RTO - Transfer number from return stack to data stack + /* RTO - Transfer number from return stack to data stack */ x.dstack.push(x.rstack.pop()) }, /* Memory instructions */ | x | { - // LD - Load number from memory address specified on the data stack + /* LD - Load number from memory address specified on the data stack */ let a = x.dstack.pop(); x.dstack.push(x.load(a)); }, | x | { - // ST - Store number to memory address specified on the data stack + /* ST - Store number to memory address specified on the data stack */ let a = x.dstack.pop(); let v = x.dstack.pop(); x.store(a, v); @@ -607,43 +606,133 @@ const PRIMITIVES: [Primitive; 16] = [ * Part 2 - The Program * ------------------------------------------------------------------------ */ -/* You now have an unfamiliar computer with no software. It sits there doing - * nothing. Can you and the computer write a program? +/* "In a sense we're building a tree. We've now reached a + * point where we can start making the roots. For a while + * everything will be concealed but we'll eventually reach + * daylight and start on branches." * - * We are going to need to give the computer a list of instructions, by - * which I mean a list of numbers. If we were sitting in front of a - * minicomputer in 196x, we would need a copy of the target machine's - * instruction set, paper and pencil, and a lot of coffee. + * -- Chuck Moore, "Programming a Problem-Oriented Language", 1970 + * https://colorforth.github.io/POL.htm * - * In 20xx we are fortunate enough to have rust so we will put it to work. + * You now have an unfamiliar computer with no software. Can you and the + * computer write a program? * - * Regardless, this bootstrapping process isn't going to be very pleasant - * so the goal is to make it short. We want a language that can stand on - * its own as quickly as possible, so the computer can start helping us - * write the program. + * The first program is the hardest to write because you don't have any tools to + * help write it. The computer itself is going to be no help. Without any + * program it will sit there doing nothing. + * + * What should the first program be? + * A natural choice would be a tool that helps you program more easily. + * + * An interactive programming environment needs to let you do 2 things: + * + * 1. Call subroutines by typing their name at the keyboard + * 2. Define new subroutines in terms of existing ones + * + * Begin with step 1: + * Call subroutines by typing their name at the keyboard + * + * This is where we will meet Forth. + * + * The below is a small Forth for bootstrapping this computer. If you want to + * learn how to implement a full featured Forth, please read Jonesforth, and + * Brad Rodriguez' series of articles "Moving Forth". The simple Forth I write + * below will probably help you understand those Forths a little better. + * + * Forth organizes all the computer's memory as a "dictionary" of subroutines. + * The point of the dictionary is to give each subroutine a name so you + * can run a subroutine by typing its name. The computer will look up its + * address for you and call it. * - * Forth is a weird language but its design decisions make a lot of sense - * if you view it as a bootstrapping tool. - */ - - /* - * Forth organizes all the computer's memory as a "dictionary". * The dictionary starts at a low address and grows towards high addresses. * It is organized like a linked-list, like this: * + * [Link field][Name][Code .......... ] + * ^ + * | + * [Link field][Name][Code ...... ] + * ^ + * | + * [Link field][Name][Code ............... ] * - * Code is stored in the dictionary as a list of addresses. + * The reason it is a linked list is to allow each list entry to be a + * different length. * + * Each dictionary entry contains three things: * + * - "Link field": The address of the previous dictionary entry. + * For the first dictionary entry this field is 0. + * + * - "Name": A few letters to name this dictionary entry. + * Later you will type this name at the keyboard to call up + * this dictionary entry. + * + * - "Code": A subroutine to execute when you call up this dictionary + * entry. This is a list of CPU instructions. Note that one + * of the CPU instructions is "call". So you can have a subroutine + * that call other subroutines, or calls itself. + * + * This code must end with a return (RET) instruction. + * + * Example subroutine: + * + * Number Instruction Meaning + * ------ ----------- ------- + * 7 Literal(3) Push the value 3 onto the data stack + * 9 Literal(4) Push the value 4 onto the data stack + * 65504 RET Return to caller + * + * A linked list is not a very fast data structure but this doesn't really + * matter because dictionary lookup doesn't need to be fast. Lookups are + * for converting text you typed at the keyboard to subroutine addresses. + * You can't type very fast compared to a computer so this lookup doesn't + * need to be fast. + * + * In addition to the linked list itself, you will need a couple of + * variables to keep track of where the dictionary is in memory: + * + * - Dictionary pointer: The address of the newest dictionary entry. + * - Here: The address of the first unused memory location, + * which comes just after the newest dictionary entry. + * + * [Link field][Name][Code .......... ] + * ^ + * | + * [Link field][Name][Code ...... ] + * ^ + * | + * [Link field][Name][Code ............... ] + * ^ ^ + * | | + * [Dictionary pointer] [Here] + * + * Got all that? + * + * To create our Forth interactive programmming environment, we will start + * by defining subroutines that: + * - read names from the keyboard + * - look up and execute dictionary entries by name + * + * We will put these subroutines themselves in the dictionary so they are + * available for use once our interactive environment is up and running! + * + * If you were sitting in front of a microcomputer in 196x you would need + * to create the dictionary with pencil and paper, but in 20xx we will + * write a Rust program to help create the dictionary. + * + * First we need to keep track of where the dictionary is: */ -/* Here is the stuff that you would normally be doing with pencil and paper */ struct Dict<'a> { - dp: u16, - here: u16, - c: &'a mut Core + dp: u16, // The dictionary pointer + here: u16, // The "here" variable + c: &'a mut Core // The dictionary lives in memory. We are going to + // hang on to a mutable reference to the core to give + // us easy access to the memory. } +/* Helpers to help put new routines in the dictionary */ + enum Item { Literal(u16), Call(u16), @@ -653,15 +742,19 @@ impl From for Item { fn from(a: u16) -> Self { Item::Call(a) } } impl From for Item { fn from(o: Op) -> Self { Item::Opcode(o) } } impl Dict<'_> { + /* Helper to reserve space in the dictionary by advancing the "here" + * pointer */ fn allot(&mut self, n: u16) { self.here = self.here.wrapping_add(n); } + /* Helper to append a 16 bit integer to the dictionary */ fn comma(&mut self, val: u16) { self.c.store(self.here, val); self.allot(2); } + /* Helper to append a CPU instruction to the dictionary */ fn emit>(&mut self, val: T) { match val.into() { Item::Call(val) => { self.comma(val) } @@ -671,11 +764,36 @@ impl Dict<'_> { } } + /* Helper to append a "name" field to the dictionary. To save space and + * to make each dictionary header a consistent size, I am choosing to not + * store every letter of the name. Instead I am storing only the length of + * the name and then the first three letters of the name. + * + * That means these two names will compare equal: + * - ALLOW (-> 5ALL) + * - ALLOT (-> 5ALL) + * + * Even though their first three letters are the same, these two names + * will compare unequal because they are different lengths: + * - FORTH (-> 5FOR) + * - FORGET (-> 6FOR) + * + * If a name is shorter than 3 letters it is padded out with spaces. + * - X (-> 1X ) + * + * You can see that the name field is always four bytes regardless + * of how many letters are in the name, and the link field is two bytes. + * This means a dictionary header in this Forth is always six bytes. + */ fn name(&mut self, n: u8, val: [u8; 3]) { + /* Store the length and the first character */ self.comma(n as u16 | ((val[0] as u16) << 8)); + /* Store the next two characters */ self.comma(val[1] as u16 | ((val[2] as u16) << 8)); } + /* Helper to append a new link field to the dictionary and update the + * dictionary pointer appropriately. */ fn entry(&mut self) { let here = self.here; self.comma(self.dp); @@ -683,44 +801,371 @@ impl Dict<'_> { } } +/* Now we can start building the dictionary. */ fn build_dictionary(c: &mut Core) { use Op::*; use Item::*; - let mut d = Dict {dp: 0, here: 2, c: c}; + let mut d = Dict { + dp: 0, /* Nothing in the dictionary yet */ + here: 2, /* Reserve address 0 as an "entry point", i.e. where the + CPU will jump to start running Forth. We don't have a + Forth interpreter yet so we'll leave address 0 alone for + now and start the dictionary at address 2 instead. */ + c: c + }; + /* Consider the following facts: + * - The CPU knows how to execute a bunch of instructions strung together. + * - Forth consists of a bunch of subroutine calls strung together. + * - Subroutine CALL is a valid instruction of our CPU. + * + * This means that we can immediately begin programming our machine in + * a language resembling Forth, just by writing a list of subroutine + * calls into the dictionary. + * + * The line between "machine code program" and "Forth program" is + * very blurry. To illustrate: + * + * Here is a subroutine consisting of a few instructions strung together. + * + * Instruction Number Meaning + * ----------- ------ ------- + * Literal(3) 7 Push the value 3 onto the data stack + * Literal(4) 9 Push the value 4 onto the data stack + * RET 65504 Return to caller + * + * Here is a Forth subroutine consisting of a few subroutine calls strung + * together. + * Call Number Meaning + * ----------- ------ ------- + * S1 1230 Call subroutine S1 which happens to live + * at address 1230 + * S2 1250 Call subroutine S2 which happens to live + * at address 1250 + * RET 65504 Return to caller + * + * This duality between CPU instructions and Forth code comes from + * an idea called "subroutine threading". It is a refinement of an + * idea called "threaded code". This has no relation to the kind of + * threading that lets you run programs in parallel. You can read more + * about threaded code on Wikipedia or in the other Forth resources I + * mentioned earlier (Jonesforth, and Moving Forth by Brad Rodriguez). + * + * Our new language starts out with the sixteen (well, eighteen) + * instructions built into the CPU. We can string those instructions + * together into a new subroutine. This subroutine adds to the pool + * of functions we have available for making new subroutines. + * + * Repeat until you have built what you wanted to build, via + * function composition. This is the idea behind Forth. + */ + + /* + * We are going to be writing many series of instructions so let's + * start out by making a Rust macro that makes them easier to type + * and lets us specify a CPU instruction vs. a subroutine call with + * equal ease. + * + * The macro below will convert: + * + * forth!(Literal(2), ADD, RET) + * + * to: + * + * d.emit(Literal(2)); + * d.emit(ADD); + * d.emit(RET); + * + * which you probably recognize as code that will add a new subroutine + * to the dictionary. + */ macro_rules! forth { ($x:expr) => (d.emit($x)); ($x:expr, $($y:expr),+) => (d.emit($x); forth!($($y),+)) } + /* Now we can add the first subroutine to the dictionary! + * + * key: Reads a character from the keyboard and places its character + * code on the stack. + * + * There is a tradition of writing stack comments for Forth subroutines + * to describe the stack effect of executing the subroutine. + * They look like this: key ( -- n ) + * + * Read as: key does not take any parameters off the stack, and leaves + * one new number pushed onto the stack. + * + * Also remember that a dictionary entry looks like this: + * [Link field][Name][Code .......... ] + */ + // key ( -- n ) - d.entry(); d.name(3, *b"key"); let key = d.here; - forth!(Literal(0), IO, RET); + d.entry(); /* Compile the link field into the dictionary */ + d.name(3, *b"key"); /* Compile the name field into the dictionary */ + let key = d.here; /* (Save off the start address of the code so we + can call it later) */ + forth!( + Literal(0), /* Compile a LITERAL instruction that pushes + 0 to the stack */ + + IO, /* Compile an IO instruction. + * + * Remember from the CPU code that IO takes a + * parameter on the stack to specify which port + * to use. + * + * Also remember that IO port 0 reads + * a character from standard input. + */ + + RET /* Compile a RET instruction */ + ); + /* We have now compiled the "key" subroutine into the dictionary. + * [Link field][Name][Code .......... ] + * 0000 3key 1, 65534, 65504 + * + * The next subroutine we will make is "emit". This is a companion + * to "key" that works in the opposite direction. + * + * key ( -- n ) reads a character from stdin and pushes it to the stack. + * emit ( n -- ) pops a character from the stack and writes it to stdout. + */ // emit ( n -- ) d.entry(); d.name(4, *b"emi"); let emit = d.here; forth!(Literal(1), IO, RET); + /* I am tired of saying "subroutine" so many times, so I am going to + * introduce a new term. Remember the goal our language is working + * towards -- we want to be able to type a word at the keyboard, and + * let the computer look it up in the dictionary and execute the + * appropriate code. + * + * So far we have two named items in the dictionary, call and emit. + * + * We are going to term a named dictionary item a "word". + * This is a Forth tradition. + * + * So call and emit are "words", or "dictionary words" if you want to be + * precise about it. So far these are the only words we've defined. + * + * Let's define some more words. + */ + + /* Our CPU does not have subtraction so let's make subtraction by adding + * the two's complement. + * + * To get the two's complement, do a bitwise invert and add 1. + * + * This will be the most complicated Forth that we've written so far + * so let's walk through step by step. */ + // - ( a b -- a-b ) d.entry(); d.name(1, *b"- "); let sub = d.here; - forth!(INV, Literal(1), ADD, ADD, RET); + forth!( /* Stack contents: a b, to start off with. + * We want to compute a minus b */ + INV, /* Bitwise invert the top item on the stack. + * Stack contents: a ~b */ + + Literal(1), /* Push 1 onto the stack. + * Stack contents: a ~b 1 */ + + ADD, /* Add the top two items on the stack. + * Stack contents: a ~b+1 + * Note that ~b+1 is the two's complement of b. */ + + ADD, /* Add the top two items on the stack. + * Stack contents: n + * Note that n = (a + ~b+1) = a - b */ + + RET /* Done, return to caller, leaving n on the data stack. */ + ); + /* Writing it out like that takes a lot of space. Normally Forth code + * is written on a single line, like this: + * + * INV 1 ADD ADD RET + * + * Looking at it this way, it's easy to see the new word we just + * created (-) is made from 5 instructions. It's pretty typical for + * a Forth word to be made of 2-7 of them. Beyond that length, things + * get successively harder to understand, and it becomes a good idea + * to split some work off into helper words. + * + * We will see an example of this below. + */ + + /* Our next word will be useful for Boolean logic. + * + * 0= ( n -- f ) + * + * In a stack comment, "f" means "flag", a.k.a. Boolean value. + * By Forth convention, zero is false and any nonzero value is true. + * However the "best" value to use for a true flag is 65535 (all ones) + * so the bitwise logical operations can double as Boolean logical + * operations. + * + * So what 0= does is: + * - if n=0, leave on the stack f=65535 + * - otherwise, leave on the stack f=0 + * + * It is like C's ! operator. + * + * In Rust this could be implemented as: + * + * fn zero_eq(n: u16) { + * if (n == 0) { + * return 65535; + * } else { + * return 0; + * } + * } + * + * Rust has an if-then and block scope, so this is easy to write. + * + * The literal translation to a typical register-machine assembly + * language would look something like this: + * + * zero_eq: compare r0, 0 + * jump_eq is_zero + * move r0, 0 + * ret + * is_zero: move r0, 65535 + * ret + * + * It looks simple but I want to point out a couple things about it + * that are not so simple. + * + * The conditional jump instruction, jump_eq. + * ------------------------------------------ + * Our CPU doesn't have this. Q is the only "decision-making" + * instruction that our CPU has. + * + * Q - If the top number on the data stack is zero, skip the next + * instruction. + * + * The forward reference + * --------------------- + * This is another problem. Think of the job of an assembler which is + * converting an assembly language program to machine code. We are + * currently writing our code in a tiny assembler that we made in Rust! It + * is very simple but so far it has worked for us. The assembler of our + * hypothetical register-machine below has a rather nasty problem to solve. + * + * zero_eq: compare r0, 0 + * jump_eq is_zero <----- On this line. + * move r0, 0 + * ret + * is_zero: move r0, 65535 + * ret + * + * It wants to jump to is_zero but that symbol has not been seen yet and is + * unrecognized. On top of that, the assembler also doesn't yet know what + * address is_zero will have, so doesn't know what jump target to emit. + * To successfully assemble that kind of program you would need an + * assembler smarter than the assembler we made for ourselves in Rust. + * + * There are ways to solve this but let's NOT solve it. + * + * Our CPU has no jump instruction (only call) and our assembler only lets + * us call things we already defined. Instead of removing these + * constraints, find a way to write 0= within the constraints. + * + * Here is a start at solving the problem + * + * is_nonzero ( -- 0 ) + * Literal(0) + * RET + * + * 0= ( n -- f ) + * Q <-- pop n, if n=0 skip next instruction + * is_nonzero <-- f=0 is now pushed to stack + * Literal(0) + * INV <-- f=65535 is now pushed to stack + * RET <-- Return + * + * We got rid of the forward reference by defining is_nonzero before it + * was used. + * + * We got rid of the jump instruction by using a subroutine call instead. + * + * This code is close to working but it doesn't quite work. The problem + * is that is_nonzero gives control back to 0= when done, just like + * a subroutine call normally does, and then 0= runs as normal until it + * hits the return instruction at the end. + * So we wind up executing both the f=0 branch and the f=65535 branch, + * instead of just executing the f=0 branch like we wanted in this case. + * + * It is possible to fix this last problem by adding the instructions + * RTO DRP to is_nonzero. + * + * is_nonzero ( -- 0 ) + * RTO <-- Pop the return address, push to data stack + * DRP <-- Discard it + * Literal(0) <-- Put 0 on the data stack + * RET <-- Return + * + * Because we popped off and discarded one item from the return stack, the + * final RET instruction will not return to 0= any more. Instead it will + * skip one level and return to whoever called 0=. This has the result of + * ending 0= early, which is what we wanted to do. + * + * I call this pattern "return-from-caller". It is used occasionally in + * real Forth systems. My dialect of Forth will use it extensively to work + * around my CPU's lack of conditional branch. + * + * Now we've explained how 0= is going to work, let's make it. + */ + + /* First we define the helper. It won't be reused, so I am not going + * to bother giving it a dictionary header and name for easy lookup later. + * Think of it as a private function. */ let zero = d.here; forth!(Literal(0), RTO, DRP, RET); + /* Now define 0= using the helper. */ // 0= ( n -- f ) d.entry(); d.name(2, *b"0= "); let zero_eq = d.here; forth!(Q, zero, Literal(0), INV, RET); + /* Next let's make a = equality comparison operator, using 0= and subtract. + * I call it an "operator" because that's what other languages would + * call it, but Forth has no special idea of an "operator". Everything + * is just words. */ // = ( a b -- a=b ) d.entry(); d.name(1, *b"= "); let eq = d.here; forth!(sub, zero_eq, RET); + /* Note that 0= and subtract are both words, not CPU instructions. + * This makes = the first "pure" Forth word we have defined, with no + * direct dependency on the machine's instruction set. + * We could define = as - 0= on a real standards-compliant Forth system + * and it would still work. So Forth gets you to the point of writing + * "portable" code really quickly. Often you can reuse routines early in + * bootstrapping even though they were written and tested on a different + * machine. Many languages offer portability but few offer it so quickly. + */ - // Advance past whitespace + /* Now that we've got some basics in place let's go back to solving + * the real problem of getting our language to read words from the + * keyboard. The first problem we have is that we need some way to + * separate words from each other so we know where one word ends and the + * next begins. This problem is called "lexing". Forth has about the + * simplest lexer ever, it just splits on whitespace. Anything with + * character code <=32 is considered whitespace. Words are delimited by + * whitespace. And that is all the syntax Forth has. + * + * To read a word from the keyboard you will need to: + * - Advance past any leading whitespace + * - Read characters into a buffer until whitespace is seen again. + */ + + /* Let's start with the "advance past whitespace" part */ let skip_helper = d.here; forth!(RTO, DRP, key, DUP, Literal(33), GEQ, Q, RET, DRP, skip_helper); + // skipws ( -- c ) d.entry(); d.name(6, *b"ski"); let skipws = d.here; forth!(skip_helper); @@ -748,13 +1193,13 @@ fn build_dictionary(c: &mut Core) { d.entry(); d.name(2, *b"c! "); let cst = d.here; forth!(DUP, LD, Literal(0xff), INV, AND, SWP, TOR, OR, RTO, ST, RET); - // Load 1 letter into buffer. + /* Load 1 letter into buffer. */ let stchar = d.here; forth!(Literal(word_buf), cld, Literal(1), ADD, DUP, Literal(word_buf), cst, Literal(5), min, Literal(word_buf), ADD, cst, RET); - // Load letters into buffer until whitespace is hit again. - // Return the whitespace character that was seen. + /* Load letters into buffer until whitespace is hit again. + * Return the whitespace character that was seen. */ let getcs_helper = d.here; forth!(RTO, DRP, stchar, key, DUP, Literal(32), SWP, GEQ, Q, RET, getcs_helper); @@ -769,8 +1214,8 @@ fn build_dictionary(c: &mut Core) { skipws, getcs, DRP, RET); // latest ( -- a ) - // Address of "latest" variable. This variable stores the address of - // the latest word in the dictionary. + /* Address of "latest" variable. This variable stores the address of + the latest word in the dictionary. */ let latest_ptr = d.here; d.allot(2); d.entry(); d.name(6, *b"lat"); let latest = d.here; forth!(Literal(latest_ptr), RET); @@ -806,15 +1251,15 @@ fn build_dictionary(c: &mut Core) { forth!(DUP, DUP, Literal(3), SFT, ADD, ADD, RET); // here ( -- a ) - // Address of "here" variable. This variable stores the address of - // the first free space in the dictionary + /* Address of "here" variable. This variable stores the address of + the first free space in the dictionary */ let here_ptr = d.here; d.allot(2); d.entry(); d.name(4, *b"her"); let here = d.here; forth!(Literal(here_ptr), RET); // state ( -- a ) - // Address of "state" variable. This variable stores -1 if - // interpreting or 0 if compiling. + /* Address of "state" variable. This variable stores -1 if + * interpreting or 0 if compiling. */ let state_ptr = d.here; d.allot(2); d.entry(); d.name(5, *b"sta"); let state = d.here; forth!(Literal(state_ptr), RET); @@ -906,7 +1351,8 @@ fn build_dictionary(c: &mut Core) { d.entry(); d.name(1 | 0x80, *b"; "); forth!(Literal(!(RET as u16)), INV, comma, lbracket, unsmudge, RET); - // Finally put the primitives in the dictionary so they can be called directly. + /* Finally put the primitives in the dictionary so they can be + * called interactively. */ d.entry(); d.name(3, *b"ret"); forth!(RTO, DRP, RET); d.entry(); d.name(2, *b">r "); forth!(RTO, SWP, TOR, TOR, RET); d.entry(); d.name(2, *b"r> "); forth!(RTO, RTO, SWP, TOR, RET);