diff --git a/sallyforth/compiler.py b/sallyforth/compiler.py index bfdf0c3..10e5953 100644 --- a/sallyforth/compiler.py +++ b/sallyforth/compiler.py @@ -1,4 +1,3 @@ -from lex import forth_prompt from stack import Stack class Compiler: diff --git a/sallyforth/kernel.py b/sallyforth/kernel.py index e4655cb..d839c1b 100644 --- a/sallyforth/kernel.py +++ b/sallyforth/kernel.py @@ -2,7 +2,7 @@ import sys from os import path from words import * import words -from lex import forth_prompt, read_tokens, is_string, tokenize +from lex import is_string, Tokenizer from stack import Stack from namespace import Namespace @@ -17,10 +17,12 @@ def to_number(token): class Forth: def __init__(self, startup=None): + self.tokenizer = Tokenizer(self) self.stack = Stack() self.namespaces = {} initial_defs = { '*prompt*': const_f('SallyForth>> '), + 'macroexpand': w_enlist, 'true': const_f(True), 'false': const_f(False), 'nil': const_f(None), @@ -46,15 +48,27 @@ class Forth: def defvar(self, name, value): self.namespace[name] = const_f(value) + def py_evaluate(self, token, *args): + #print(f'Evaluate: token [{token}] args <<{args}>>') + rargs = list(args) + rargs.reverse() + if rargs: + for a in rargs: + # print("pushing", a); + self.stack.push(a) + #print(f'Before eval stack is {str(self.stack)}') + return self.evaluate_token(token) + def evaluate_token(self, token): + #print("evaluate token: ", token) self.execute_token(token) return self.stack.pop() def compiling(self): return self.compiler - def execute_line(self, readline_f=forth_prompt): - tokens = read_tokens(readline_f) + def execute_line(self, line): + tokens = self.tokenizer.tokenize(line) self.execute_tokens(tokens) def execute_tokens(self, tokens): @@ -65,6 +79,20 @@ class Forth: else: self.compile_token(token) + def macro_expand_token(self, token): + if len(token) <= 0: + return [token] + if token[0] != '#': + return [token] + tag = token[1:] + return self.py_evaluate('macroexpand', tag) + + def macro_expand_tokens(self, tokens): + results = [] + for token in tokens: + results.extend(self.macro_expand_token(token)) + return results + def set_ns(self, ns_name): if ns_name in self.namespaces: self.namespace = self.namespaces[ns_name] @@ -85,8 +113,7 @@ class Forth: with open(fpath) as f: line = f.readline() while line: - tokens = tokenize(line) - self.execute_tokens(tokens) + self.execute_line(line) line = f.readline() self.namespace['*source*'] = old_source self.namespace = old_namespace diff --git a/sallyforth/lex.py b/sallyforth/lex.py index d630f50..e9338e7 100644 --- a/sallyforth/lex.py +++ b/sallyforth/lex.py @@ -8,58 +8,55 @@ def is_string(token): def is_space(ch): return ch == ' ' or ch == '\t' or ch == '\n' -def tokenize(s): - state = 'start' - token = '' - tokens = [] - for ch in s: - #print(f'Loop state {state} token {token} ch {ch}') - if state == 'start' and ch == '(': - state = 'comment' - elif state == 'start' and ch == '\\': - state = 'line_comment' - elif state == 'line_comment' and ch == '\n': - state = 'start' - elif state == 'comment' and ch == ')': - state = 'start' - elif state in ['comment', 'line_comment']: - continue - elif state == 'start' and is_space(ch): - continue - elif state == 'start' and ch == "'": - token = ch - state = 's_string' - elif state == 'start' and ch == '"': - token = ch - state = 'string' - elif state == 'start': - token = ch - state = 'word' - elif state == 'string' and ch == '"': +class Tokenizer: + def __init__(self, forth): + self.forth = forth + print("Tokenizer:", self.forth) + + def tokenize(self, s): + raw_tokens = self.raw_tokenize(s) + return self.forth.macro_expand_tokens(raw_tokens) + + def raw_tokenize(self, s): + state = 'start' + token = '' + tokens = [] + for ch in s: + #print(f'Loop state {state} token {token} ch {ch}') + if state == 'start' and ch == '(': + state = 'comment' + elif state == 'start' and ch == '\\': + state = 'line_comment' + elif state == 'line_comment' and ch == '\n': + state = 'start' + elif state == 'comment' and ch == ')': + state = 'start' + elif state in ['comment', 'line_comment']: + continue + elif state == 'start' and is_space(ch): + continue + elif state == 'start' and ch == "'": + token = ch + state = 's_string' + elif state == 'start' and ch == '"': + token = ch + state = 'string' + elif state == 'start': + token = ch + state = 'word' + elif state == 'string' and ch == '"': + tokens.append(token) + state = 'start' + token = '' + elif (state in ['word', 's_string']) and is_space(ch): + tokens.append(token) + state = 'start' + token = '' + elif state == 'word' or state == 'string' or state == 's_string': + token += ch + else: + print(f'State: [{state}] token: [{token}] ch: [{ch}]???') + state = 'start' + if len(token) > 0: tokens.append(token) - state = 'start' - token = '' - elif (state in ['word', 's_string']) and is_space(ch): - tokens.append(token) - state = 'start' - token = '' - elif state == 'word' or state == 'string' or state == 's_string': - token += ch - else: - print(f'State: [{state}] token: [{token}] ch: [{ch}]???') - state = 'start' - if len(token) > 0: - tokens.append(token) - return tokens - -def read_tokens(read_f): - line = read_f() - return tokenize(line) - -def forth_prompt(): - return input('SallyForth>> ') - -def file_read_f(f): - def read_it(): - return f.readline() - return read_it + return tokens diff --git a/sallyforth/list.sf b/sallyforth/list.sf new file mode 100644 index 0000000..3f9e954 --- /dev/null +++ b/sallyforth/list.sf @@ -0,0 +1,36 @@ +"List" p + +: [n] (n list -- nth-item) + 2 ->list \ Make the arg list. + <. builtins.list '__getitem__ .> \ Find the getitem method. + !! \ Call! +; + +: first (list -- first-item) 0 swap [n] ; +: second (list -- second-item) 1 swap [n] ; +: third (list -- third-item) 2 swap [n] ; +: fourth (list -- fourth-item) 3 swap [n] ; + +: last (list -- last-item) -1 swap [n] ; + +: slice (start stop -- slice-obj) + swap + 2 ->list + builtins.slice + !! +; + +: take (n list -- first-n-items) + swap 0 swap slice \ Make the 0..n slice. + swap [n] \ Do a[0..n]. +; + +: drop (n list -- all-but-first-n-items) + swap nil slice \ Make the n..None slice. + swap [n] +; + + + + + diff --git a/sallyforth/sallyforth.py b/sallyforth/sallyforth.py index 45da16b..2537978 100644 --- a/sallyforth/sallyforth.py +++ b/sallyforth/sallyforth.py @@ -2,7 +2,6 @@ import os import sys import atexit from kernel import Forth -from lex import tokenize import readline import traceback @@ -55,9 +54,8 @@ def repl(f): except EOFError: break - tokens = tokenize(line) try: - f.execute_tokens(tokens) + f.execute_line(line) except: exc_type, exc_value, exc_traceback = sys.exc_info() print("Error:", exc_type) diff --git a/sallyforth/startup.sf b/sallyforth/startup.sf index 88bab3c..8c3a5d9 100644 --- a/sallyforth/startup.sf +++ b/sallyforth/startup.sf @@ -101,6 +101,9 @@ : >>@ ] @@ ; : >>! ] @@ [] swap !! ; +"string.sf" source +"list.sf" source + "init.sf" source-if-exists diff --git a/sallyforth/string.sf b/sallyforth/string.sf new file mode 100644 index 0000000..e8c445d --- /dev/null +++ b/sallyforth/string.sf @@ -0,0 +1,7 @@ +"String" p + +: split (delimit str -- tokens) 2 ->list <. builtins.str 'split .> !! ; + +: dot-split (str -- tokens) "." swap split ; + + diff --git a/sallyforth/words.py b/sallyforth/words.py index c4435b2..a1e6007 100644 --- a/sallyforth/words.py +++ b/sallyforth/words.py @@ -35,6 +35,16 @@ def import_native_module(forth, m, alias=None, excludes=[]): else: forth.namespace[localname] = const_f(val) +def w_no_op(f, i): + return i+1 + +def w_enlist(f, i): + print("Enlist!") + x = f.stack.pop() + print("Popped", x) + f.stack.push([x]) + return i+1 + def w_forth(f, i): f.stack.push(f) return i+1