diff --git a/dawg/English/Makefile b/dawg/English/Makefile
new file mode 100644
index 000000000..aa7edea4e
--- /dev/null
+++ b/dawg/English/Makefile
@@ -0,0 +1,40 @@
+# -*-mode: Makefile -*-
+# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+TARGET_TYPE ?= FRANK
+
+# This represents the default -- for now
+COMMAND = -f Makefile.BasEnglish TARGET_TYPE=FRANK
+
+alleng:
+ for mfile in Makefile.BasEnglish Makefile.OSW Makefile.TWL98 Makefile.CollegeEng; do \
+ $(MAKE) -f $$mfile TARGET_TYPE=$(TARGET_TYPE); \
+ done
+
+%:
+ $(MAKE) $(COMMAND) $@
+
+all:
+ $(MAKE) $(COMMAND)
+
+clean:
+ $(MAKE) $(COMMAND) clean
+
+help:
+ @echo "try make -f Makefile.[BasEnglish|CollegeEng] \\"
+ @echo " TARGET_TYPE=[PALM|FRANK]"
+
diff --git a/dawg/English/Makefile.BasEnglish b/dawg/English/Makefile.BasEnglish
new file mode 100644
index 000000000..d8151e1c0
--- /dev/null
+++ b/dawg/English/Makefile.BasEnglish
@@ -0,0 +1,35 @@
+# -*-mode: Makefile -*-
+# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+LANG=BasEnglish
+LANGCODE=en_US
+#NEWDAWG=1
+
+TARGET_TYPE ?= FRANK
+
+include ../Makefile.2to8
+
+include ../Makefile.langcommon
+
+$(LANG)Main.dict.gz: BasEnglish.dict.gz
+ ln -s $< $@
+
+# Everything but creating of the Main.dict file is inherited from the
+# "parent" Makefile.langcommon in the parent directory.
+
+clean: clean_common
+ rm -f $(LANG)Main.dict.gz *.bin $(LANG)*.pdb $(LANG)*.seb
diff --git a/dawg/English/info.txt b/dawg/English/info.txt
new file mode 100644
index 000000000..40cf89fb3
--- /dev/null
+++ b/dawg/English/info.txt
@@ -0,0 +1,71 @@
+# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+LANGCODE:en_US
+
+# deal with DOS files
+LANGFILTER_PRECLIP: tr -d '\r' |
+
+LANGFILTER_POSTCLIP: | tr [a-z] [A-Z]
+LANGFILTER_POSTCLIP: | grep '^[A-Z]*$'
+LANGFILTER_POSTCLIP: | tr -s '\n\r' '\000\000'
+LANGFILTER_POSTCLIP: | sort -z
+
+# We can trust sort (above) to do the right thing since there's no
+# high ascii. dict2dawg.pl is much faster if I can trust that its
+# input is in sorted order.
+NEEDSSORT:false
+
+LANGINFO:
English dictionaries can contain words with any of the 26
+LANGINFO: letters you think of as making up the alphabet: A-Z. At
+LANGINFO: this point any word in your list containing anything else
+LANGINFO: will simply be excluded from the dictionary.
+
+# High bit means "official". Next 7 bits are an enum where
+# English==1. Low byte is padding
+XLOC_HEADER:0x8100
+
+
+2 0 {"_"}
+9 1 'A'
+2 3 'B'
+2 3 'C'
+4 2 'D'
+12 1 'E'
+2 4 'F'
+3 2 'G'
+2 4 'H'
+9 1 'I'
+1 8 'J'
+1 5 'K'
+4 1 'L'
+2 3 'M'
+6 1 'N'
+8 1 'O'
+2 3 'P'
+1 10 'Q'
+6 1 'R'
+4 1 'S'
+6 1 'T'
+4 1 'U'
+2 4 'V'
+2 4 'W'
+1 8 'X'
+2 4 'Y'
+1 10 'Z'
+
+
+# should ignore all after the above
diff --git a/dawg/Makefile.2to8 b/dawg/Makefile.2to8
new file mode 100644
index 000000000..bb1777d27
--- /dev/null
+++ b/dawg/Makefile.2to8
@@ -0,0 +1,7 @@
+# -*-mode: Makefile -*-
+
+# These are the targets that almost all language makefiles will want.
+
+SHORT_WORD = 2
+LONG_WORD = 8
+
diff --git a/dawg/Makefile.langcommon b/dawg/Makefile.langcommon
new file mode 100644
index 000000000..52d33a44a
--- /dev/null
+++ b/dawg/Makefile.langcommon
@@ -0,0 +1,257 @@
+# -*-mode: Makefile -*-
+
+# Copyright 2000-2002 by Eric House (fixin@peak.org)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+FRANK_EXT = xwd
+
+# this will make all dicts the new, larger type
+#FORCE_4 = -force4
+
+PALM_DICT_TYPE = DAWG
+PAR = ../par.pl
+
+LANGUAGE = $(shell basename $$(pwd))
+
+#all: target_all
+
+# let languages set this first, but we always add blank to it.
+BLANK_INFO = "_" /dev/null /dev/null
+
+# Supply a default so don't have to type so much; feel free to change
+TARGET_TYPE ?= FRANK
+
+ifdef NEWDAWG
+ TABLE_ARG = -mn
+else
+ TABLE_ARG = -m
+endif
+
+##############################################################################
+# PalmOS rules
+##############################################################################
+ifeq ($(TARGET_TYPE),PALM)
+
+ifdef NEWDAWG
+ PDBTYPE = Xwr4
+else
+ PDBTYPE = Xwr3
+endif
+
+all: $(LANG)2to8.pdb
+
+empty: $(LANG)0to0.pdb
+
+# Those languages that have bitmap files for custom glyphs will need to
+# define BMPBINFILES and perhaps provide a rule for building the files
+binfiles.stamp: $(BMPBINFILES)
+ touch binfiles.stamp
+
+palmspecials.bin: ../palm_mkspecials.pl $(BMPFILES)
+ $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
+
+# can't just use values.bin because the specials bitmap info is
+# platform-specific
+palmvalues.bin: values.bin palmspecials.bin
+ cat $^ > $@
+
+# values.bin: palmspecials.bin ../xloc binfiles.stamp
+# cd ../ && $(MAKE) xloc
+# binfileparms=""; \
+# if [ "$(BMPBINFILES)" != "" ]; then \
+# for f in $(BMPBINFILES)""; \
+# do binfileparms="$$binfileparms -i $$f"; \
+# done; \
+# fi; \
+# ../xloc -l $(LANGCODE) $$binfileparms -T $@
+# cat palmspecials.bin >> $@
+
+# header (first record) is node count (long) and 4 chars:
+# unsigned char firstEdgeRecNum;
+# unsigned char charTableRecNum;
+# unsigned char valTableRecNum;
+# unsigned char reserved[3]; // worst case this points to a new resource
+
+# include "flags" as used on the other platforms
+palmheader%.bin: $(LANG)%_wordcount.bin $(LANG)%_flags.bin
+ rm -f $@
+ touch $@
+ifdef NEWDAWG
+ cat $(LANG)$*_flags.bin >> $@
+endif
+ cat $< >> $@
+ perl -e "print pack(\"C\",3)" >> $@ # first edge
+ perl -e "print pack(\"C\",1)" >> $@ # char table rec number
+ perl -e "print pack(\"C\",2)" >> $@ # valTable rec number
+ perl -e "print pack(\"CCC\",0)" >> $@ # reserved 3 bytes
+ perl -e "print pack(\"CC\",0)" >> $@ # c code added two more...
+
+
+# This works, but leaves out the header info that the current version
+# has. I'm not sure anybody cares, though...
+$(LANG)%.pdb: dawg$(LANG)%.stamp table.bin palmvalues.bin palmheader%.bin
+ $(PAR) c -a backup $@ \
+ $(basename $(@F)) $(PALM_DICT_TYPE) $(PDBTYPE) \
+ palmheader$*.bin table.bin palmvalues.bin dawg$(LANG)$*_*.bin
+
+# start=$$(echo $@ | sed -e 's/$(LANG)\([0-9]*\)to[0-9]*.pdb/\1/'); \
+# end=$$(echo $@ | sed -e 's/$(LANG)[0-9]*to\([0-9]*\).pdb/\1/'); \
+# zcat $< | grep "^.\{$${start},$${end}\}$$" | \
+# ../dict2pdb -t table.bin -v values.bin -n $(basename $(@F)) \
+# > $@
+
+# the files to export for byod
+byodbins: table.bin values.bin palmvalues.bin
+
+#endif # TARGET_TYPE==PALM
+
+##############################################################################
+# Franklin ebook rules
+##############################################################################
+else
+ifeq ($(TARGET_TYPE),FRANK)
+
+# If we're on a system that can build for Franklin, assume that's what
+# we want to build (and the .xwd.saved [<-bug] file for other non-palm
+# platforms is a by-product). But if the EBM tools aren't there, just
+# build the .xwd file.
+ifeq (x$(shell echo -n $$EBOOKMAN_SDK)x,xx)
+all: $(LANG)2to8.xwd
+else
+all: $(LANG)2to8.seb
+endif
+
+empty: $(LANG)0to0.seb
+
+ifneq (x$(shell echo -n $$EBOOKMAN_SDK)x,xx)
+include ${EBOOKMAN_SDK}/ebsdk.uses
+endif
+
+# a binary file (one byte) giving the number of tiles in the dict
+charcount.bin: table.bin
+ifdef NEWDAWG
+ siz=$$(wc -c $< | sed -e 's/$/'); \
+ perl -e "print pack(\"c\",$$siz/2)" > $@
+else
+ siz=$$(wc -c $< | sed -e 's/$/'); \
+ perl -e "print pack(\"c\",$$siz)" > $@
+endif
+
+# For each entry in the table whose face < 32, there needs to be a pair of
+# pbitm files and a string giving the printing form
+frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
+ $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
+
+$(LANG)%.$(FRANK_EXT): dawg$(LANG)%.stamp $(LANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin
+ cat $(LANG)$*_flags.bin charcount.bin table.bin values.bin \
+ frankspecials.bin $(LANG)StartLoc.bin $$(ls dawg$(LANG)$*_*.bin) > $@
+ cp $@ saveme.bin
+
+$(LANG)%.seb: $(LANG)%.$(FRANK_EXT) $(LANG)%.atts
+ ${ESDK_CREATESEB_EXE} $<
+ cp $< $<.saved
+
+$(LANG)%.atts: #recreate it each time based on params
+ echo '_PUB|global+read-only|"Eric_House"' >> $@
+ echo "_NAME|global+read-only|\"$(LANG)2to8\"" >> $@
+ echo "_EXT|global+read-only|\"$(FRANK_EXT)\"" >> $@
+ echo '_LCAT|nosign+global|"CONTENT"' >> $@
+ echo '_PERM|global+read-only|"r"' >> $@
+
+# values.bin: ../xloc
+# cd ../ && $(MAKE) xloc
+# ../xloc -l $(LANGCODE) -T $@
+
+# the files to export for byod
+byodbins: table.bin values.bin frankspecials.bin
+
+
+else
+ (Need to define TARGET_TYPE if get error pointing to this line)
+endif #ifeq ($(TARGET_TYPE),FRANK)
+endif
+
+ifeq (s$(TARGET_TYPE),s)
+It's an error not to specify a TARGET_TYPE
+endif
+
+##############################################################################
+# shared rules
+##############################################################################
+
+# For some reason I can't fathom dawg$(LANG)% gets nuked every time
+# the top-level rule fires (all: for whatever TARGET_TYPE.) It
+# happens after the rule finishes....
+
+# 16 bits worth of flags for the start of the eventual file. At this
+# point, the flags mean this:
+# 1: old-style DAWG.
+# 2: new-style DAWG, three bytes per node.
+# 3: new-style DAWG, four bytes per node
+$(LANG)%_flags.bin: dawg$(LANG)%.stamp
+ifdef NEWDAWG
+ if [ 3 == $$(cat $(LANG)$*_nodesize.bin) ] ; \
+ then perl -e "print pack(\"n\",0x0002)" > $@; echo "flags=2"; \
+ else perl -e "print pack(\"n\",0x0003)" > $@; echo "flags=3"; \
+ fi
+else
+ if [ 3 == $$(cat $(LANG)$*_nodesize.bin) ] ; \
+ then perl -e "print pack(\"n\",0x0001)" > $@; echo "flags=1"; \
+ else echo "ERROR: old format can't handle 4-byte"; exit 1; \
+ fi
+endif
+
+dawg$(LANG)%.stamp: $(LANG)Main.dict.gz ../dict2dawg.pl table.bin ../Makefile.langcommon
+ start=$$(echo $@ | sed -e 's/dawg$(LANG)\([0-9]*\)to[0-9]*.stamp/\1/'); \
+ end=$$(echo $@ | sed -e 's/dawg$(LANG)[0-9]*to\([0-9]*\).stamp/\1/'); \
+ echo $${start} and $$end; \
+ zcat $< | grep "^.\{$${start},$${end}\}$$" | tr '\n' '\0'| \
+ sort -z | ../dict2dawg.pl $(TABLE_ARG) table.bin -b 28000 \
+ -ob dawg$(LANG)$* \
+ -sn $(LANG)StartLoc.bin -k -term 0 -wc $(LANG)$*_wordcount.bin \
+ $(FORCE_4) -ns $(LANG)$*_nodesize.bin
+ touch $@
+
+$(LANG)%_wordcount.bin: dawg$(LANG)%.stamp
+ @echo
+
+# the files to export for byod
+allbins:
+ $(MAKE) TARGET_TYPE=PALM byodbins
+ $(MAKE) TARGET_TYPE=FRANK byodbins
+
+table.bin: ../xloc.pl
+ifdef NEWDAWG
+ perl -I../ ../xloc.pl -tn > $@
+else
+ perl -I../ ../xloc.pl -t > $@
+endif
+
+values.bin: ../xloc.pl
+ perl -I../ ../xloc.pl -v > $@
+
+%.dict: %.dict.gz
+ zcat $< > $@
+
+clean_common:
+ rm -f $(LANG)Main.dict *.bin *.pdb *.seb dawg*.stamp *.$(FRANK_EXT) \
+ $(LANG)*.pdb $(LANG)*.seb
+
+help:
+ @echo "make TARGET_TYPE=[FRANK|PALM]"
+
+test:
+ @echo $(LANGUAGE)
diff --git a/dawg/dict2dawg.pl b/dawg/dict2dawg.pl
new file mode 100755
index 000000000..eb1729be2
--- /dev/null
+++ b/dawg/dict2dawg.pl
@@ -0,0 +1,798 @@
+#!/usr/bin/perl
+
+##############################################################################
+# adapted from C++ code Copyright (C) 2000 Falk Hueffner
+# This version Copyright (C) 2002 Eric House (fixin@peak.org)
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+# USA
+##############################################################################
+
+# inputs: 0. Name of file mapping letters to 0..31 values. In English
+# case just contains A..Z. This will be used to translate the tries
+# on output.
+# 1. Max number of bytes per binary output file.
+#
+# 2. Basename of binary files for output.
+
+# 3. Name of file to which to write the number of the
+# startNode, since I'm not rewriting a bunch of code to expect Falk's
+# '*' node at the start.
+#
+
+# In STDIN, the text file to be compressed. It absolutely
+# must be sorted. The sort doesn't have to follow the order in the
+# map file, however.
+
+# This is meant eventually to be runnable as part of a cgi system for
+# letting users generating Crosswords dicts online.
+
+
+
+use strict;
+use POSIX;
+
+my $gFirstDiff;
+my @gCurrentWord;
+my $gCurrentWord; # save so can check for sortedness
+my $gDone = 0;
+my @gInputStrings;
+my $gNeedsSort = 1; # read from cmd line eventually
+my @gNodes; # final array of nodes
+my $gNBytesPerOutfile = 0xFFFFFFFF;
+my $gTableFile;
+my $gOutFileBase;
+my $gStartNodeOut;
+my $gInFileName;
+my $gKillIfMissing = 1;
+my $gTermChar = '/n';
+my $gDumpText = 0; # dump the dict as text after?
+my $gCountFile;
+my $gBytesPerNodeFile; # where to write whether node size 3 or 4
+my $gWordCount = 0;
+my %gTableHash;
+my @gRevMap;
+my $debug = 0;
+my %gSubsHash;
+my $gForceFour = 0; # use four bytes regardless of need?
+my $gNBytesPerNode;
+my $gUseUnicode;
+
+main();
+
+exit;
+
+##############################################################################
+
+sub main() {
+
+ if ( !parseARGV() ) {
+ usage();
+ exit();
+ }
+
+ makeTableHash();
+
+ my $infile;
+
+ if ( $gInFileName ) {
+ open $infile, "<$gInFileName";
+ } else {
+ $infile = \*STDIN;
+ }
+
+ @gInputStrings = parseAndSort( $infile );
+ if ( $gInFileName ) {
+ close $infile;
+ }
+
+ # Do I need this stupid thing? Better to move the first row to
+ # the front of the array and patch everything else. Or fix the
+ # non-palm dictionary format to include the offset of the first
+ # node.
+
+ my $dummyNode = 0xFFFFFFFF;
+ @gNodes = ( $dummyNode );
+
+ readNextWord();
+
+ my $firstRootChildOffset = buildNode(0);
+
+ moveTopToFront( \$firstRootChildOffset );
+
+ if ( $gStartNodeOut ) {
+ writeOutStartNode( $gStartNodeOut, $firstRootChildOffset );
+ }
+
+ print STDERR "\n... dumping table ...\n" if $debug;
+ printNodes( \@gNodes, "done with main" ) if $debug;
+
+ # write out the number of nodes if requested
+ if ( $gCountFile ) {
+ open OFILE, "> $gCountFile";
+ print OFILE pack( "N", $gWordCount );
+ close OFILE;
+ print STDERR "wrote out: got $gWordCount words\n";
+ }
+
+ if ( $gOutFileBase ) {
+ emitNodes( $gNBytesPerOutfile, $gOutFileBase );
+ }
+
+ if ( $gDumpText && @gNodes > 0 ) {
+ printOneLevel( $firstRootChildOffset, "" );
+ }
+
+ if ( $gBytesPerNodeFile ) {
+ open OFILE, "> $gBytesPerNodeFile";
+ print OFILE $gNBytesPerNode;
+ close OFILE;
+ }
+ print STDERR "Used $gNBytesPerNode per node.\n";
+} # main
+
+# We now have an array of nodes with the last subarray being the
+# logical top of the tree. Move them to the start, fixing all fco
+# refs, so that legacy code like Palm can assume top==0.
+#
+# Note: It'd probably be a bit faster to integrate this with emitNodes
+# -- unless I need to have an in-memory list that can be used for
+# lookups. But that's best for debugging, so keep it this way for now.
+#
+# Also Note: the first node is a dummy that can and should be tossed
+# now.
+
+sub moveTopToFront($) {
+ my ( $firstRef ) = @_;
+
+ my $firstChild = ${$firstRef};
+ ${$firstRef} = 0;
+ my @lastSub;
+
+ if ( $firstChild > 0 ) {
+ # remove the last (the root) subarray
+ @lastSub = splice( @gNodes, $firstChild );
+ } else {
+ die "there should be no words!!" if $gWordCount != 0;
+ }
+ # remove the first (garbage) node
+ shift @gNodes;
+
+ my $diff;
+ if ( $firstChild > 0 ) {
+ # -1 because all move down by 1; see prev line
+ $diff = @lastSub - 1;
+ die "something wrong with len\n" if $diff < 0;
+ } else {
+ $diff = 0;
+ }
+
+ # stick it on the front
+ splice( @gNodes, 0, 0, @lastSub);
+
+ # We add $diff to everything. There's no subtracting because
+ # nobody had any refs to the top list.
+
+ for ( my $i = 0; $i < @gNodes; ++$i ) {
+ my $fco = TrieNodeGetFirstChildOffset( $gNodes[$i] );
+ if ( $fco != 0 ) { # 0 means NONE, not 0th!!
+ TrieNodeSetFirstChildOffset( \$gNodes[$i], $fco+$diff );
+ }
+ }
+} # moveTopToFront
+
+
+sub buildNode {
+ my ( $depth ) = @_;
+
+ if ( @gCurrentWord == $depth ) {
+ # End of word reached. If the next word isn't a continuation
+ # of the current one, then we've reached the bottom of the
+ # recursion tree.
+ readNextWord();
+ if ($gFirstDiff < $depth || $gDone) {
+ return 0;
+ }
+ }
+
+ my @newedges;
+
+ do {
+ my $letter = $gCurrentWord[$depth];
+ my $isTerminal = @gCurrentWord - 1 == $depth ? 1:0;
+
+ my $nodeOffset = buildNode($depth+1);
+ my $newNode = MakeTrieNode($letter, $isTerminal, $nodeOffset);
+ push( @newedges, $newNode );
+
+ } while ( ($gFirstDiff == $depth) && !$gDone);
+
+ TrieNodeSetIsLastSibling( \@newedges[@newedges-1], 1 );
+
+ return addNodes( \@newedges );
+} # buildNode
+
+sub addNodes {
+ my ( $newedgesR ) = @_;
+
+ my $found = findSubArray( $newedgesR );
+
+ if ( $found >= 0 ) {
+ die "0 is an invalid match!!!" if $found == 0;
+ return $found;
+ } else {
+
+ my $firstFreeIndex = @gNodes;
+
+ print STDERR "adding...\n" if $debug;
+ printNodes( $newedgesR ) if $debug;
+
+ push @gNodes, (@{$newedgesR});
+
+ registerSubArray( $newedgesR, $firstFreeIndex );
+ return $firstFreeIndex;
+ }
+} # addNodes
+
+sub printNode {
+ my ( $index, $node ) = @_;
+
+ print STDERR "[$index] ";
+
+ printf( STDERR
+ "letter=%d; isTerminal=%d; isLastSib=%d; fco=%d;\n",
+ TrieNodeGetLetter($node),
+ TrieNodeGetIsTerminal($node),
+ TrieNodeGetIsLastSibling($node),
+ TrieNodeGetFirstChildOffset($node));
+} # printNode
+
+sub printNodes {
+ my ( $nodesR, $name ) = @_;
+
+ my $len = @{$nodesR};
+ # print "printNodes($name): len = $len\n";
+
+ for ( my $i = 0; $i < $len; ++$i ) {
+ my $node = ${$nodesR}[$i];
+ printNode( $i, $node );
+ }
+
+}
+
+
+# Hashing. We'll keep a hash of offsets into the existing nodes
+# array, and as the key use a string that represents the entire sub
+# array. Since the key is what we're matching for, there should never
+# be more than one value per hash and so we don't need buckets.
+# Return -1 if there's no match.
+
+sub findSubArray {
+ my ( $newedgesR ) = @_;
+
+ my $key = join('', @{$newedgesR});
+
+ if ( exists( $gSubsHash{$key} ) ) {
+ return $gSubsHash{$key};
+ } else {
+ return -1;
+ }
+} # findSubArray
+
+# add to the hash
+sub registerSubArray {
+ my ( $edgesR, $nodeLoc ) = @_;
+
+ my $key = join( '', @{$edgesR} );
+
+ if ( exists $gSubsHash{$key} ) {
+ die "entry for key shouldn't exist!!";
+ } else {
+ $gSubsHash{$key} = $nodeLoc;
+ }
+
+} # registerSubArray
+
+sub readNextWord() {
+ my @word;
+
+ if ( !$gDone ) {
+ $gDone = @gInputStrings == 0;
+ if ( !$gDone ) {
+ @word = @{shift @gInputStrings};
+ } else {
+ print STDERR "gDone set to true\n" if $debug;
+ }
+
+ print STDERR "got word: ", join(',',@word), "\n" if $debug;
+ }
+ my $numCommonLetters = 0;
+ my $len = @word;
+ if ( @gCurrentWord < $len ) {
+ $len = @gCurrentWord;
+ }
+
+ while ( @gCurrentWord[$numCommonLetters] eq @word[$numCommonLetters]
+ && $numCommonLetters < $len) {
+ ++$numCommonLetters;
+ }
+
+ $gFirstDiff = $numCommonLetters;
+ die "words ", join(",",@gCurrentWord), " and ", join(",", @word), " out of order" if #$debug &&
+ @gCurrentWord > 0 && @word > 0
+ && !firstBeforeSecond( \@gCurrentWord, \@word );
+ @gCurrentWord = @word;
+} # readNextWord
+
+sub firstBeforeSecond {
+ my ( $firstR, $secondR ) = @_;
+
+ for ( my $i = 0; ; ++$i ) {
+
+ # if we reach the end of the first word/list, we're done.
+ if ( $i == @{$firstR} ) {
+ die "duplicate!!!" if $i == @{$secondR};
+ return 1;
+ # but if we reach the second end first, we've failed
+ } elsif ( $i == @{$secondR} ) {
+ return 0;
+ }
+
+ my $diff = ${$firstR}[$i] <=> ${$secondR}[$i];
+
+ if ( $diff == 0 ) {
+ next;
+ } else {
+ return $diff < 0;
+ }
+ }
+} # firstBeforeSecond
+
+# passed to sort. Should remain unprototyped for effeciency's sake
+
+sub cmpWords {
+
+ my $lenA = @{$a};
+ my $lenB = @{$b};
+ my $min = $lenA > $lenB? $lenB: $lenA;
+
+ for ( my $i = 0; $i < $min; ++$i ) {
+ my $ac = ${$a}[$i];
+ my $bc = ${$b}[$i];
+
+ my $res = $ac <=> $bc;
+
+ if ( $res != 0 ) {
+ return $res; # we're done
+ }
+ }
+
+ # If we got here, they match up to their common length. Longer is
+ # greater.
+ my $res = @{$a} <=> @{$b};
+ return $res; # which is longer?
+} # cmpWords
+
+sub parseAndSort() {
+ my ( $infile ) = @_;
+
+ my @wordlist;
+ my @word;
+
+ WORDLOOP:
+ for ( ; ; ) {
+
+ my $dropWord = 0;
+ splice @word; # empty it
+
+ # for each byte
+ for ( ; ; ) {
+ my $byt = getc($infile);
+
+ if ( $byt eq undef ) {
+ last WORDLOOP;
+ } elsif ( $byt eq $gTermChar ) {
+ if ( !$dropWord ) {
+ push @wordlist, [ @word ];
+ ++$gWordCount;
+ }
+ next WORDLOOP;
+ } elsif ( exists( $gTableHash{$byt} ) ) {
+ if ( !$dropWord ) {
+ push @word, $gTableHash{$byt};
+ die "word too long" if @word > 15;
+ }
+ } elsif ($gKillIfMissing) {
+ die "char $byt (", $byt+0, ") not in map file $gTableFile\n";
+ } else {
+ $dropWord = 1;
+ splice @word; # lose anything we already have
+ }
+ }
+ }
+
+ if ( $gNeedsSort && ($gWordCount > 0) ) {
+ @wordlist = sort cmpWords @wordlist;
+ }
+
+ print STDERR "length of list is ", @wordlist + 0, ".\n" if $debug;
+
+ return @wordlist;
+} # parseAndSort
+
+# Print binary representation of trie array. This isn't used yet, but
+# eventually it'll want to dump to multiple files appropriate for Palm
+# that can be catenated together on other platforms. There'll need to
+# be a file giving the offset of the first node too. Also, might want
+# to move to 4-byte representation when the input can't otherwise be
+# handled.
+
+sub dumpNodes {
+
+ for ( my $i = 0; $i < @gNodes; ++$i ) {
+ my $node = $gNodes[$i];
+ my $bstr = pack( "I", $node );
+ print STDOUT $bstr;
+ }
+}
+
+##############################################################################
+# Little node-field setters and getters to hide what bits represent
+# what.
+##############################################################################
+
+sub TrieNodeSetIsTerminal {
+ my ( $nodeR, $isTerminal ) = @_;
+
+ if ( $isTerminal ) {
+ ${$nodeR} |= 1 << 31;
+ } else {
+ ${$nodeR} &= ~(1 << 31);
+ }
+}
+
+sub TrieNodeGetIsTerminal {
+ my ( $node ) = @_;
+ return ($node & 1 << 31) != 0;
+}
+
+sub TrieNodeSetIsLastSibling {
+ my ( $nodeR, $isLastSibling ) = @_;
+ if ( $isLastSibling ) {
+ ${$nodeR} |= 1 << 30;
+ } else {
+ ${$nodeR} &= ~(1 << 30);
+ }
+}
+
+sub TrieNodeGetIsLastSibling {
+ my ( $node ) = @_;
+ return ($node & 1 << 30) != 0;
+}
+
+sub TrieNodeSetLetter {
+ my ( $nodeR, $letter ) = @_;
+
+ die "letter ", $letter, " too big" if $letter >= 32;
+
+ my $mask = ~(0x1F << 25);
+ ${$nodeR} &= $mask; # clear all the bits
+ ${$nodeR} |= ($letter << 25); # set new ones
+}
+
+sub TrieNodeGetLetter {
+ my ( $node ) = @_;
+ $node >>= 25;
+ $node &= 0x1F;
+ return $node;
+}
+
+sub TrieNodeSetFirstChildOffset {
+ my ( $nodeR, $fco ) = @_;
+
+ die "$fco larger than 25 bits" if ($fco & 0xFE000000) != 0;
+
+ my $mask = ~0x01FFFFFF;
+ ${$nodeR} &= $mask; # clear all the bits
+ ${$nodeR} |= $fco; # set new ones
+}
+
+sub TrieNodeGetFirstChildOffset {
+ my ( $node ) = @_;
+ $node &= 0x01FFFFFF; # 24 bits
+ return $node;
+}
+
+
+sub MakeTrieNode {
+ my ( $letter, $isTerminal, $firstChildOffset, $isLastSibling ) = @_;
+ my $result = 0;
+
+ TrieNodeSetIsTerminal( \$result, $isTerminal );
+ TrieNodeSetIsLastSibling( \$result, $isLastSibling );
+ TrieNodeSetLetter( \$result, $letter );
+ TrieNodeSetFirstChildOffset( \$result, $firstChildOffset );
+
+ return $result;
+} # MakeTrieNode
+
+# Caller may need to know the offset of the first top-level node.
+# Write it here.
+sub writeOutStartNode {
+ my ( $startNodeOut, $firstRootChildOffset ) = @_;
+
+ open NODEOUT, ">$startNodeOut";
+ print NODEOUT pack( "N", $firstRootChildOffset );
+ close NODEOUT;
+} # writeOutStartNode
+
+# build the hash for translating. I'm using a hash assuming it'll be
+# fast. Key is the letter; value is the 0..31 value to be output.
+sub makeTableHash {
+ my $i;
+ open TABLEFILE, "< $gTableFile";
+
+ splice @gRevMap; # empty it
+
+ for ( $i = 0; ; ++$i ) {
+ my $ch = getc(TABLEFILE);
+ if ( $ch eq undef ) {
+ last;
+ }
+
+ if ( $gUseUnicode ) { # skip the first byte each time: tmp HACK!!!
+ $ch = getc(TABLEFILE);
+ }
+ if ( $ch eq undef ) {
+ last;
+ }
+
+ push @gRevMap, $ch;
+
+ if ( ord($ch) == 0 ) { # blank
+ next; # we want to increment i when blank seen since
+ # it is a tile value
+ }
+
+ die "$gTableFile too large\n" if $i > 32;
+ die "only blank (0) can be 32nd char\n" if ($i == 32 && $ch != 0);
+
+ $gTableHash{$ch} = $i;
+ }
+
+ close TABLEFILE;
+} # makeTableHash
+
+# emitNodes. "input" is $gNodes. From it we write up to
+# $nBytesPerOutfile to files named $outFileBase0..n, mapping the
+# letter field down to 5 bits with a hash built from $tableFile. If
+# at any point we encounter a letter not in the hash we fail with an
+# error.
+
+sub emitNodes($$) {
+ my ( $gNBytesPerOutfile, $outFileBase ) = @_;
+
+ # now do the emit.
+
+ # is 17 bits enough?
+ printf STDOUT ("There are %d (0x%x) nodes in this DAWG.\n",
+ 0 + @gNodes, 0 + @gNodes );
+ if ( @gNodes > 0x1FFFF || $gForceFour ) {
+ $gNBytesPerNode = 4;
+ } else {
+ $gNBytesPerNode = 3;
+ }
+
+ my $nextIndex = 0;
+ my $nextFileNum = 0;
+
+ for ( $nextFileNum = 0; ; ++$nextFileNum ) {
+
+ if ( $nextIndex >= @gNodes ) {
+ last; # we're done
+ }
+
+ die "Too many outfiles; infinite loop?" if $nextFileNum > 99;
+
+ my $outName = sprintf("${outFileBase}_%03d.bin", $nextFileNum);
+ open OUTFILE, "> $outName";
+ my $curSize = 0;
+
+ while ( $nextIndex < @gNodes ) {
+
+ # scan to find the next terminal
+ my $i;
+ for ( $i = $nextIndex;
+ !TrieNodeGetIsLastSibling($gNodes[$i]);
+ ++$i ) {
+
+ # do nothing but a sanity check
+ if ( $i >= @gNodes) {
+ die "bad trie format: last node not last sibling" ;
+ }
+
+ }
+ ++$i; # move beyond the terminal
+ my $nextSize = ($i - $nextIndex) * $gNBytesPerNode;
+ if ($curSize + $nextSize > $gNBytesPerOutfile) {
+ last;
+ } else {
+ # emit the subarray
+ while ( $nextIndex < $i ) {
+ outputNode( $gNodes[$nextIndex], $gNBytesPerNode,
+ \*OUTFILE );
+ ++$nextIndex;
+ }
+ $curSize += $nextSize;
+ }
+ }
+
+ close OUTFILE;
+ }
+
+} # emitNodes
+
+sub printWord {
+ my ( $str ) = @_;
+
+ print STDERR "$str\n";
+}
+
+# print out the entire dictionary, as text, to STDERR.
+
+sub printOneLevel {
+
+ my ( $index, $str ) = @_;
+
+ for ( ; ; ) {
+
+ my $newStr = $str;
+ my $node = $gNodes[$index++];
+
+ my $lindx = $gRevMap[TrieNodeGetLetter($node)];
+
+ if ( ord($lindx) >= 0x20 ) {
+ $newStr .= "$lindx";
+ } else {
+ print STDERR "sub space" if $debug;
+ $newStr .= "\\" . chr('0'+$lindx);
+ }
+
+ if ( TrieNodeGetIsTerminal($node) ) {
+ printWord( $newStr );
+ }
+
+ my $fco = TrieNodeGetFirstChildOffset( $node );
+ if ( $fco != 0 ) {
+ printOneLevel( $fco, $newStr );
+ }
+
+ if ( TrieNodeGetIsLastSibling($node) ) {
+ last;
+ }
+ }
+}
+
+sub outputNode {
+ my ( $node, $nBytes, $outfile ) = @_;
+
+ my $fco = TrieNodeGetFirstChildOffset($node);
+ my $fourthByte;
+
+ if ( $nBytes == 4 ) {
+ $fourthByte = $fco >> 17;
+ die "fco too big" if $fourthByte > 0xFF;
+ $fco &= 0x1FFFF;
+ }
+
+ # format according to dawg.h:
+ # typedef struct array_edge {
+ # unsigned char highByte;
+ # unsigned char lowByte;
+ # unsigned char bits;
+#ifdef FOUR_BYTE
+ # unsigned char moreBits;
+#endif
+ # } array_edge;
+
+ # define LETTERMASK 0x1f
+ # define ACCEPTINGMASK 0x20
+ # define LASTEDGEMASK 0x40
+ # define LASTBITMASK 0x80
+
+ # write the fco (less that one bit). We want two bytes worth
+ # in three-byte mode, and three in four-byte mode (which is
+ # untested)
+ for ( my $i = 1; $i >= 0; --$i ) {
+ my $tmp = ($fco >> ($i * 8)) & 0xFF;
+ print $outfile pack( "C", $tmp );
+ }
+ $fco >>= 16; # it should now be 1 or 0
+ die "fco not 1 or 0" if $fco > 1;
+
+ my $chIn5 = TrieNodeGetLetter($node);
+ my $bits = $chIn5;
+
+ if ( TrieNodeGetIsLastSibling($node) ) {
+ $bits |= 0x40;
+ }
+ if ( TrieNodeGetIsTerminal($node) ) {
+ $bits |= 0x20;
+ }
+ if ( $fco != 0 ) {
+ $bits |= 0x80;
+ }
+ print $outfile pack( "C", $bits );
+
+ # the final byte, if in use
+ if ( $nBytes == 4 ) {
+ print $outfile pack( "C", $fourthByte );
+ }
+} # outputNode
+
+sub usage {
+ print STDERR "usage: $0 \n"
+ . "\t[-b bytesPerFile] (default = 0xFFFFFFFF)\n"
+ . "\t-m mapFile\n"
+ . "\t-ob outFileBase\n"
+ . "\t-sn start node out file\n"
+ . "\t[-if input file name] -- default = stdin\n"
+ . "\t[-term ch] (word terminator -- default = '\\0'\n"
+ . "\t[-nosort] (input already sorted in accord with -m; " .
+ " default=sort'\n"
+ . "\t[-dump] (write dictionary as text to STDERR for testing)\n"
+ . "\t[-force4](use 4 bytes per node regardless of need)\n"
+ . "\t[-r] (reject words with letters not in mapfile)\n"
+ . "\t[-k] (kill if any letters no in mapfile -- default)\n"
+ ;
+
+} # usage
+
+sub parseARGV {
+
+ my $arg;
+ while ( my $arg = shift(@ARGV) ) {
+
+ SWITCH: {
+ if ($arg =~ /-b/) {$gNBytesPerOutfile = shift(@ARGV), last SWITCH;}
+ if ($arg =~ /-mn/) {$gTableFile = shift(@ARGV);
+ $gUseUnicode = 1;
+ last SWITCH;}
+ if ($arg =~ /-m/) {$gTableFile = shift(@ARGV); last SWITCH;}
+ if ($arg =~ /-ob/) {$gOutFileBase = shift(@ARGV), last SWITCH;}
+ if ($arg =~ /-sn/) {$gStartNodeOut = shift(@ARGV), last SWITCH;}
+ if ($arg =~ /-if/) {$gInFileName = shift(@ARGV), last SWITCH;}
+ if ($arg =~ /-r/) {$gKillIfMissing = 0; last SWITCH;}
+ if ($arg =~ /-k/) {$gKillIfMissing = 1; last SWITCH;}
+ if ($arg =~ /-term/) {$gTermChar = chr(shift(@ARGV)); last SWITCH;}
+ if ($arg =~ /-dump/) {$gDumpText = 1; last SWITCH;}
+ if ($arg =~ /-nosort/) {$gNeedsSort = 0; last SWITCH;}
+ if ($arg =~ /-wc/) {$gCountFile = shift(@ARGV); last SWITCH;}
+ if ($arg =~ /-ns/) {$gBytesPerNodeFile = shift(@ARGV); last SWITCH;}
+ if ($arg =~ /-force4/) {$gForceFour = 1; last SWITCH;}
+ die "unexpected arg $arg\n";
+ }
+ }
+
+
+ print STDERR "gNBytesPerOutfile=$gNBytesPerOutfile\n" if $debug;
+ print STDERR "gTableFile=$gTableFile\n" if $debug;
+ print STDERR "gOutFileBase=$gOutFileBase\n" if $debug;
+ print STDERR "gStartNodeOut=$gStartNodeOut\n" if $debug;
+ printf STDERR "gTermChar=%s(%d)\n", $gTermChar, ord($gTermChar) if $debug;
+
+ return $gTableFile;
+
+} # parseARGV
diff --git a/dawg/frank_mkspecials.pl b/dawg/frank_mkspecials.pl
new file mode 100755
index 000000000..353727a74
--- /dev/null
+++ b/dawg/frank_mkspecials.pl
@@ -0,0 +1,45 @@
+#!/usr/bin/perl
+
+# Copyright 2001 by Eric House (fixin@peak.org)
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# Given arguments consisting of triples, first a string and then pbitm
+# files representing bitmaps. For each triple, print out the string and
+# then the converted bitmaps.
+
+use strict;
+
+while ( @ARGV ) {
+ my $str = shift();
+ my $largebmp = shift();
+ my $smallbmp = shift();
+
+ doOne( $str, $largebmp, $smallbmp );
+}
+
+sub doOne {
+ my ( $str, $largebmp, $smallbmp ) = @_;
+
+ print pack( "C", length($str) );
+ print $str;
+
+ print STDERR "looking at $largebmp", "\n";
+
+ print `cat $largebmp | ../pbitm2bin.pl`;
+ print `cat $smallbmp | ../pbitm2bin.pl`;
+}
+
+
diff --git a/dawg/palm_mkspecials.pl b/dawg/palm_mkspecials.pl
new file mode 100755
index 000000000..d30f413d5
--- /dev/null
+++ b/dawg/palm_mkspecials.pl
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+
+# Copyright 2002 by Eric House
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# Given arguments consisting of triples, first a string and then pbitm
+# files representing bitmaps. The format looks like this:
+
+# array [0-n] of { char len;
+# char[3] alt txt;
+# int16 offsetOfLarge;
+# int16 offsetOfSmall;
+# }
+# array [0-n] of {
+# bitmapLargeIfPresent;
+# bitmapSmallIfPresent;
+# }
+#
+# In addition, there's padding between bitmaps if needed to get the next
+# one to a 2-byte boundary. And the input files are not in PalmOS bitmap
+# format, so thay have to get converted into a tmp file before the sizes
+# can be known and included in the eventual output.
+
+use strict;
+
+my $tmpfile = "/tmp/tmpout$$";
+
+my $nSpecials = @ARGV / 3;
+die "wrong number of args" if (@ARGV % 3) != 0;
+my $gOffset = $nSpecials * 8; # sizeof(Xloc_specialEntry)
+
+open TMPFILE, "> $tmpfile";
+
+for ( my $i = 0; $i < $nSpecials; ++$i ) {
+
+ my $size;
+
+ my $str = shift( @ARGV );
+ my $len = length($str);
+ die "string $str too long" if $len > 3;
+ print $str;
+ while ( $len < 4 ) {
+ ++$len;
+ print pack("c", 0 );
+ }
+
+ doOneFile( shift( @ARGV ), \*TMPFILE, \$gOffset );
+ doOneFile( shift( @ARGV ), \*TMPFILE, \$gOffset );
+}
+
+close TMPFILE;
+
+# now append the tempfile
+open TMPFILE, "< $tmpfile";
+while ( read( TMPFILE, my $buffer, 128 ) ) {
+ print $buffer;
+}
+close TMPFILE;
+
+unlink $tmpfile;
+
+exit 0;
+
+
+sub doOneFile($$) {
+ my ( $fil, $fh, $offsetR ) = @_;
+
+ my $size = convertBmp($fil, $fh );
+ if ( ($size % 2) != 0 ) {
+ ++$size;
+ print $fh pack( "c", 0 );
+ }
+
+ print pack( "n", $size > 0? ${$offsetR} : 0 );
+
+ ${$offsetR} += $size;
+} # doOneFile
+
+sub convertBmp($$) {
+ my ( $pbitmfile, $fhandle ) = @_;
+
+ if ( $pbitmfile eq "/dev/null" ) {
+ return 0;
+ } else {
+
+ # for some reason I can't get quote marks to print into tmp.rcp using just `echo`
+ open TMP, "> tmp.rcp";
+ print TMP "BITMAP ID 1000 \"$pbitmfile\" AUTOCOMPRESS";
+ close TMP;
+
+ `pilrc tmp.rcp`;
+ print $fhandle `cat Tbmp03e8.bin`;
+ my $siz = -s "Tbmp03e8.bin";
+ `rm -f tmp.rcp Tbmp03e8.bin`;
+
+ return $siz;
+ }
+}
diff --git a/dawg/par.pl b/dawg/par.pl
new file mode 100755
index 000000000..11ca565d2
--- /dev/null
+++ b/dawg/par.pl
@@ -0,0 +1,234 @@
+#!/usr/bin/perl
+
+# Copyright 2002 by Eric House (fixin@peak.org) All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# Only enough of par's features to support building a crosswords dict
+# pdb
+
+use strict;
+
+my $debug = 0;
+
+
+# stolen from par source
+my $PRC_FLAGS_RESOURCE = (0x1<<0);
+my $PRC_FLAGS_READONLY = (0x1<<1);
+my $PRC_FLAGS_DIRTY = (0x1<<2);
+my $PRC_FLAGS_BACKUP = (0x1<<3);
+my $PRC_FLAGS_NEWER = (0x1<<4);
+my $PRC_FLAGS_RESET = (0x1<<5);
+my $PRC_FLAGS_COPYPREVENT = (0x1<<6);
+my $PRC_FLAGS_STREAM = (0x1<<7);
+my $PRC_FLAGS_HIDDEN = (0x1<<8);
+my $PRC_FLAGS_LAUNCHABLE = (0x1<<9);
+my $PRC_FLAGS_RECYCLABLE = (0x1<<10);
+my $PRC_FLAGS_BUNDLE = (0x1<<11);
+my $PRC_FLAGS_OPEN = (0x1<<15);
+
+
+my $gAttrs = 0;
+my $gVersion = 1; # par defaults this to 1
+
+my $cmd = shift( @ARGV );
+die "only 'c' supported now" if $cmd ne "c" && $cmd ne "-c";
+
+readHOptions( \@ARGV );
+
+my $dbfile = shift( @ARGV );
+my $name = shift( @ARGV );
+die "name $name too long" if length($name) > 31;
+my $type = shift( @ARGV );
+die "type $type must be of length 4" if length($type) != 4;
+my $cid = shift( @ARGV );
+die "cid $cid must be of length 4" if length($cid) != 4;
+
+my @fileNames;
+my @fileLengths;
+
+my $nFiles = 0;
+
+while ( @ARGV > 0 ) {
+ my $filename = shift( @ARGV );
+ push @fileNames, $filename;
+ push @fileLengths, -s $filename;
+ ++$nFiles;
+}
+
+# from par's prcp.h; thanks djw!
+# typedef struct prc_file_t {
+# prc_byte_t name[32];
+# prc_byte_t flags[2];
+# prc_byte_t version[2];
+# prc_byte_t ctime[4];
+# prc_byte_t mtime[4];
+# prc_byte_t btime[4];
+# prc_byte_t modnum[4];
+# prc_byte_t appinfo[4];
+# prc_byte_t sortinfo[4];
+# prc_byte_t type[4];
+# prc_byte_t cid[4];
+# prc_byte_t unique_id_seed[4];
+# prc_byte_t next_record_list[4];
+# prc_byte_t nrecords[2];
+# } prc_file_t;
+
+my $str;
+my $offset = 0;
+
+open OUTFILE, "> $dbfile" or die "couldn't open outfile $dbfile for writing";
+
+# print the string, then pad with 0s
+$offset = length($name);
+print OUTFILE $name;
+while ( $offset < 32 ) {
+ print OUTFILE pack("c", 0);
+ ++$offset;
+}
+
+$str = pack("n", $gAttrs); # flags
+print OUTFILE $str;
+$offset += length($str);
+
+$str = pack("n", $gVersion); # version
+print OUTFILE $str;
+$offset += length($str);
+
+my $time = time() + 2082844800;
+$str = pack("NNN", $time, $time, 0); # ctime, mtime, btime
+print OUTFILE $str;
+$offset += length($str);
+
+$str = pack("N", 0 ); # mod num
+print OUTFILE $str;
+$offset += length($str);
+
+$str = pack("N", 0 ); # appinfo
+print OUTFILE $str;
+$offset += length($str);
+
+$str = pack("N", 0 ); # sortinfo
+print OUTFILE $str;
+$offset += length($str);
+
+
+print OUTFILE $type; # type
+print OUTFILE $cid; # cid
+$offset += 8;
+
+$str = pack("NN", 0, 0 ); # unique_id_seed, next_record_list
+print OUTFILE $str;
+$offset += length($str);
+
+$str = pack("n", $nFiles ); # nrecords
+print OUTFILE $str;
+$offset += length($str);
+
+$offset += $nFiles * 8;
+$offset += 2; # djw adds 2 bytes after size list; see below
+foreach my $len ( @fileLengths ) {
+ print OUTFILE pack( "N", $offset );
+ print OUTFILE pack( "N", 0 );
+ $offset += $len;
+}
+
+print OUTFILE pack( "n", 0 ); # djw does this sans comment: flush.c, line 87
+
+foreach my $file ( @fileNames ) {
+ open INFILE, "<$file" or die "couldn't open infile $file\n";
+ my $buffer;
+ while ( read INFILE, $buffer, 1024 ) {
+ print OUTFILE $buffer;
+ }
+ close INFILE;
+}
+
+
+close OUTFILE;
+
+exit 0;
+
+##############################################################################
+# Subroutines
+##############################################################################
+
+sub readHOptions {
+
+ my ( $argvR ) = @_;
+
+ for ( ; ; ) {
+ my $opt = ${$argvR}[0];
+
+ if ( $opt !~ /^-/ ) {
+ last;
+ }
+
+ # it starts with a '-': use it; else don't consume anything
+ shift @{$argvR};
+
+ if ( $opt eq "-a" ) {
+ my $attrs = shift @{$argvR};
+ processAttrString( $attrs );
+ } elsif ( $opt eq "-v" ) {
+ $gVersion = shift @{$argvR};
+ } else {
+ die "what's with \"$opt\": -a and -v are the only hattrs supported";
+ }
+ }
+
+} # readHOptions
+
+sub processAttrString {
+
+ my ( $attrs ) = @_;
+
+ foreach my $flag ( split /\|/, $attrs ) {
+
+ print STDERR "looking at flag $flag\n" if $debug;
+
+ if ( $flag =~ /resource/ ) {
+ $gAttrs |= $PRC_FLAGS_RESOURCE;
+ die "resource attr not supported";
+ } elsif ( $flag =~ /readonly/ ) {
+ $gAttrs |= $PRC_FLAGS_READONLY;
+ } elsif ( $flag =~ /dirty/ ) {
+ $gAttrs |= $PRC_FLAGS_DIRTY;
+ } elsif ( $flag =~ /backup/ ) {
+ $gAttrs |= $PRC_FLAGS_BACKUP;
+ } elsif ( $flag =~ /newer/ ) {
+ $gAttrs |= $PRC_FLAGS_NEWER;
+ } elsif ( $flag =~ /reset/ ) {
+ $gAttrs |= $PRC_FLAGS_RESET;
+ } elsif ( $flag =~ /copyprevent/ ) {
+ $gAttrs |= $PRC_FLAGS_COPYPREVENT;
+ } elsif ( $flag =~ /stream/ ) {
+ $gAttrs |= $PRC_FLAGS_STREAM;
+ die "stream attr not supported";
+ } elsif ( $flag =~ /hidden/ ) {
+ $gAttrs |= $PRC_FLAGS_HIDDEN;
+ } elsif ( $flag =~ /launchable/ ) {
+ $gAttrs |= $PRC_FLAGS_LAUNCHABLE;
+ } elsif ( $flag =~ /recyclable/ ) {
+ $gAttrs |= $PRC_FLAGS_RECYCLABLE;
+ } elsif ( $flag =~ /bundle/ ) {
+ $gAttrs |= $PRC_FLAGS_BUNDLE;
+ } elsif ( $flag =~ /open/ ) {
+ $gAttrs |= $PRC_FLAGS_OPEN;
+ } else {
+ die "flag $flag not supportd";
+ }
+ }
+} # processAttrString
diff --git a/dawg/xloc.pl b/dawg/xloc.pl
new file mode 100755
index 000000000..d811b51a1
--- /dev/null
+++ b/dawg/xloc.pl
@@ -0,0 +1,42 @@
+#!/usr/bin/perl
+
+# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# test and wrapper file for xloc.pm
+
+use strict;
+use xloc;
+
+
+
+my $arg = shift(@ARGV);
+my $lang = shift(@ARGV);
+my $path = "./$lang";
+my $infoFile = "$path/info.txt";
+
+die "info file $infoFile not found\n" if ! -s $infoFile;
+
+
+my $xlocToken = xloc::ParseTileInfo($infoFile);
+
+if ( $arg eq "-t" ) {
+ xloc::WriteMapFile( $xlocToken, 0, \*STDOUT );
+} elsif ( $arg eq "-tn" ) {
+ xloc::WriteMapFile( $xlocToken, 1, \*STDOUT );
+} elsif ( $arg eq "-v" ) {
+ xloc::WriteValuesFile( $xlocToken, \*STDOUT );
+}
diff --git a/dawg/xloc.pm b/dawg/xloc.pm
new file mode 100644
index 000000000..729af26e4
--- /dev/null
+++ b/dawg/xloc.pm
@@ -0,0 +1,180 @@
+#!/usr/bin/perl
+
+# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# The idea here is that all that matters about a language is stored in
+# one file (possibly excepting rules for prepping a dictionary).
+# There's a list of tile faces, counts and values, and also some
+# name-value pairs as needed. The pairs come first, and then a list
+# of tiles.
+
+package xloc;
+
+use strict;
+use warnings;
+
+BEGIN {
+ use Exporter ();
+ our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
+
+ $VERSION = 1.00;
+
+ @ISA = qw(Exporter);
+ @EXPORT = qw(&ParseTileInfo &GetNTiles &TileFace &TileValue
+ &TileCount &GetValue &WriteMapFile &WriteValuesFile);
+ %EXPORT_TAGS = ( );
+}
+
+# Returns what's meant to be an opaque object that can be passed back
+# for queries. It's a hash with name-value pairs and an _INFO entry
+# containing a list of tile info lists.
+
+sub ParseTileInfo($) {
+ my ( $filePath ) = @_;
+ my %result;
+
+ open INPUT, "<$filePath" or die "couldn't open $filePath";
+
+ my $inTiles = 0;
+ my @tiles;
+ while ( ) {
+
+ chomp;
+ s/\#.*$//;
+ s/^\s*$//; # nuke all-white-space lines
+ next if !length;
+
+ if ( $inTiles ) {
+ if ( // ) {
+ last;
+ } else {
+ my ( $count, $val, $face ) = m/^\s*(\w+)\s+(\w+)\s+(.*)\s*$/;
+ push @tiles, [ $count, $val, $face ];
+ }
+ } elsif ( /\w:/ ) {
+ my ( $nam, $val ) = split ':', $_, 2;
+ $result{$nam} .= $val;
+ } elsif ( // ) {
+ $inTiles = 1;
+ }
+
+ }
+
+ close INPUT;
+
+ $result{"_TILES"} = [ @tiles ];
+
+ return \%result;
+}
+
+sub GetNTiles($) {
+ my ( $hashR ) = @_;
+
+ my $listR = ${$hashR}{"_TILES"};
+
+ return 0 + @{$listR};
+}
+
+sub GetValue($$) {
+ my ( $hashR, $name ) = @_;
+ return ${$hashR}{$name};
+}
+
+sub WriteMapFile($$$) {
+ my ( $hashR, $unicode, $fhr ) = @_;
+
+ my $packStr;
+ if ( $unicode ) {
+ $packStr = "n";
+ } else {
+ $packStr = "C";
+ }
+
+ my $count = GetNTiles($hashR);
+ my $specialCount = 0;
+ for ( my $i = 0; $i < $count; ++$i ) {
+ my $tileR = GetNthTile( $hashR, $i );
+ my $str = ${$tileR}[2];
+
+ if ( $str =~ /\'(.)\'/ ) {
+ print $fhr pack($packStr, ord($1) );
+ } elsif ( $str =~ /\"(.+)\"/ ) {
+ print $fhr pack($packStr, $specialCount++ );
+ } elsif ( $str =~ /(\d+)/ ) {
+ print $fhr pack( $packStr, chr($1) );
+ } else {
+ die "WriteMapFile: unrecognized face format $str";
+ }
+ }
+} # WriteMapFile
+
+sub WriteValuesFile($$) {
+ my ( $hashR, $fhr ) = @_;
+
+ my $header = GetValue( $hashR,"XLOC_HEADER" );
+ die "no XLOC_HEADER found" if ! $header;
+
+ print STDERR "header is $header\n";
+
+ print $fhr pack( "n", hex($header) );
+
+ my $count = GetNTiles($hashR);
+ for ( my $i = 0; $i < $count; ++$i ) {
+ my $tileR = GetNthTile( $hashR, $i );
+
+ print $fhr pack( "c", TileValue($tileR) );
+ print $fhr pack( "c", TileCount($tileR) );
+ }
+
+} # WriteValuesFile
+
+sub GetNthTile($$) {
+ my ( $hashR, $n ) = @_;
+ my $listR = ${$hashR}{"_TILES"};
+
+ return ${$listR}[$n];
+}
+
+sub TileFace($) {
+ my ( $tileR ) = @_;
+
+ my $str = ${$tileR}[2];
+
+ if ( $str =~ /\'(.)\'/ ) {
+ return $1;
+ } elsif ( $str =~ /\"(.+)\"/ ) {
+ return $1;
+ } elsif ( $str =~ /(\d+)/ ) {
+ return chr($1);
+ } else {
+ die "TileFace: unrecognized face format: $str";
+ }
+}
+
+sub TileValue($) {
+ my ( $tileR ) = @_;
+
+ return ${$tileR}[0];
+}
+
+sub TileCount($) {
+ my ( $tileR ) = @_;
+
+ return ${$tileR}[1];
+}
+
+1;