Adding reader macros. Turned tokenizer into a class, added list and string word.

2024-12-25 21:58:18 +01:00 · 2020-04-22 08:57:14 -04:00 · 2020-04-22 08:57:14 -04:00 · f1bdf52b2b
commit f1bdf52b2b
parent f10edf9fd7
8 changed files with 140 additions and 63 deletions
--- a/sallyforth/compiler.py
+++ b/sallyforth/compiler.py
@ -1,4 +1,3 @@
 from lex import forth_prompt
 from stack import Stack
 class Compiler:
--- a/sallyforth/kernel.py
+++ b/sallyforth/kernel.py
@ -2,7 +2,7 @@ import sys
 from os import path
 from words import *
 import words
-from lex import forth_prompt, read_tokens, is_string, tokenize
+from lex import  is_string, Tokenizer
 from stack import Stack
 from namespace import Namespace
@ -17,10 +17,12 @@ def to_number(token):
 class Forth:
    def __init__(self, startup=None):
        self.tokenizer = Tokenizer(self)
        self.stack = Stack()
        self.namespaces = {}
        initial_defs = {
                '*prompt*': const_f('SallyForth>> '),
                'macroexpand': w_enlist,
                'true': const_f(True),
                'false': const_f(False),
                'nil': const_f(None),
@ -46,15 +48,27 @@ class Forth:
    def defvar(self, name, value):
        self.namespace[name] = const_f(value)
    def py_evaluate(self, token, *args):
        #print(f'Evaluate: token [{token}] args <<{args}>>')
        rargs = list(args)
        rargs.reverse()
        if rargs:
            for a in rargs:
                # print("pushing", a);
                self.stack.push(a)
        #print(f'Before eval stack is {str(self.stack)}')
        return self.evaluate_token(token)
    def evaluate_token(self, token):
        #print("evaluate token: ", token)
        self.execute_token(token)
        return self.stack.pop()
    def compiling(self):
        return self.compiler
-    def execute_line(self, readline_f=forth_prompt):
+    def execute_line(self, line):
-        tokens = read_tokens(readline_f)
+        tokens = self.tokenizer.tokenize(line)
        self.execute_tokens(tokens)
    def execute_tokens(self, tokens):
@ -65,6 +79,20 @@ class Forth:
            else:
                self.compile_token(token)
    def macro_expand_token(self, token):
        if len(token) <= 0:
            return [token]
        if token[0] != '#':
            return [token]
        tag = token[1:]
        return self.py_evaluate('macroexpand', tag)
    def macro_expand_tokens(self, tokens):
        results = []
        for token in tokens:
            results.extend(self.macro_expand_token(token))
        return results
    def set_ns(self, ns_name):
        if ns_name in self.namespaces:
            self.namespace = self.namespaces[ns_name]
@ -85,8 +113,7 @@ class Forth:
        with open(fpath) as f:
            line = f.readline()
            while line:
-                tokens = tokenize(line)
+                self.execute_line(line)
                self.execute_tokens(tokens)
                line = f.readline()
        self.namespace['*source*'] = old_source
        self.namespace = old_namespace
--- a/sallyforth/lex.py
+++ b/sallyforth/lex.py
@ -8,58 +8,55 @@ def is_string(token):
 def is_space(ch):
    return ch == ' ' or ch == '\t' or ch == '\n'
-def tokenize(s):
+class Tokenizer:
-    state = 'start'
+    def __init__(self, forth):
-    token = ''
+        self.forth = forth
-    tokens = []
+        print("Tokenizer:", self.forth)
-    for ch in s:
+
-        #print(f'Loop state {state} token {token} ch {ch}')
+    def tokenize(self, s):
-        if state == 'start' and ch == '(':
+        raw_tokens = self.raw_tokenize(s)
-            state = 'comment'
+        return self.forth.macro_expand_tokens(raw_tokens)
-        elif state == 'start' and ch == '\\':
+
-            state = 'line_comment'
+    def raw_tokenize(self, s):
-        elif state == 'line_comment' and ch == '\n':
+        state = 'start'
-            state = 'start'
+        token = ''
-        elif state == 'comment' and ch == ')':
+        tokens = []
-            state = 'start'
+        for ch in s:
-        elif state in ['comment', 'line_comment']:
+            #print(f'Loop state {state} token {token} ch {ch}')
-            continue
+            if state == 'start' and ch == '(':
-        elif state == 'start' and is_space(ch):
+                state = 'comment'
-            continue
+            elif state == 'start' and ch == '\\':
-        elif state == 'start' and ch == "'":
+                state = 'line_comment'
-            token = ch
+            elif state == 'line_comment' and ch == '\n':
-            state = 's_string'
+                state = 'start'
-        elif state == 'start' and ch == '"':
+            elif state == 'comment' and ch == ')':
-            token = ch
+                state = 'start'
-            state = 'string'
+            elif state in ['comment', 'line_comment']:
-        elif state == 'start':
+                continue
-            token = ch
+            elif state == 'start' and is_space(ch):
-            state = 'word'
+                continue
-        elif state == 'string' and ch == '"':
+            elif state == 'start' and ch == "'":
                token = ch
                state = 's_string'
            elif state == 'start' and ch == '"':
                token = ch
                state = 'string'
            elif state == 'start':
                token = ch
                state = 'word'
            elif state == 'string' and ch == '"':
                tokens.append(token)
                state = 'start'
                token = ''
            elif (state in ['word', 's_string']) and is_space(ch):
                tokens.append(token)
                state = 'start'
                token = ''
            elif state == 'word' or state == 'string' or state == 's_string':
                token += ch
            else:
                print(f'State: [{state}] token: [{token}] ch: [{ch}]???')
                state = 'start'
        if len(token) > 0:
            tokens.append(token)
-            state = 'start'
+        return tokens
            token = ''
        elif (state in ['word', 's_string']) and is_space(ch):
            tokens.append(token)
            state = 'start'
            token = ''
        elif state == 'word' or state == 'string' or state == 's_string':
            token += ch
        else:
            print(f'State: [{state}] token: [{token}] ch: [{ch}]???')
            state = 'start'
    if len(token) > 0:
        tokens.append(token)
    return tokens
 def read_tokens(read_f):
    line = read_f()
    return tokenize(line)
 def forth_prompt():
    return input('SallyForth>> ')
 def file_read_f(f):
    def read_it():
        return f.readline()
    return read_it
--- a/sallyforth/list.sf
+++ b/sallyforth/list.sf
@ -0,0 +1,36 @@
 "List" p
 : [n] (n list -- nth-item) 
      2 ->list                            \ Make the arg list.
      <. builtins.list '__getitem__ .>    \ Find the getitem method.
      !!                                  \ Call!
 ;
 : first  (list -- first-item)  0 swap [n] ;
 : second (list -- second-item) 1 swap [n] ;
 : third  (list -- third-item)  2 swap [n] ;
 : fourth (list -- fourth-item) 3 swap [n] ;
 : last (list -- last-item) -1 swap [n] ;
 : slice (start stop -- slice-obj)
  swap
  2 ->list
  builtins.slice
  !!
 ;
 : take (n list -- first-n-items)
  swap 0 swap slice                 \ Make the 0..n slice.
  swap [n]                          \ Do a[0..n].
 ;
 : drop (n list -- all-but-first-n-items)
  swap nil slice                       \ Make the n..None slice.
  swap [n]
 ;
--- a/sallyforth/sallyforth.py
+++ b/sallyforth/sallyforth.py
@ -2,7 +2,6 @@ import os
 import sys
 import atexit
 from kernel import Forth
 from lex import tokenize
 import readline
 import traceback
@ -55,9 +54,8 @@ def repl(f):
        except EOFError:
            break
        tokens = tokenize(line)
        try:
-            f.execute_tokens(tokens)
+            f.execute_line(line)
        except:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            print("Error:", exc_type)
--- a/sallyforth/startup.sf
+++ b/sallyforth/startup.sf
@ -101,6 +101,9 @@
 : >>@ ] @@ ;
 : >>! ] @@ [] swap !! ;
 "string.sf" source
 "list.sf" source
 "init.sf" source-if-exists
--- a/sallyforth/string.sf
+++ b/sallyforth/string.sf
@ -0,0 +1,7 @@
 "String" p
 : split  (delimit str -- tokens) 2 ->list  <. builtins.str 'split .> !! ;
 : dot-split (str -- tokens) "." swap split ;
--- a/sallyforth/words.py
+++ b/sallyforth/words.py
@ -35,6 +35,16 @@ def import_native_module(forth, m, alias=None, excludes=[]):
        else:
            forth.namespace[localname] = const_f(val)
 def w_no_op(f, i):
    return i+1
 def w_enlist(f, i):
    print("Enlist!")
    x = f.stack.pop()
    print("Popped", x)
    f.stack.push([x])
    return i+1
 def w_forth(f, i):
    f.stack.push(f)
    return i+1