first checkin from personal project

2025-01-28 07:58:08 +01:00 · 2003-11-01 06:19:20 +00:00 · 2003-11-01 06:19:20 +00:00 · cec6a805e3
commit cec6a805e3
parent 4c17ed9705
11 changed files with 1820 additions and 0 deletions
--- a/dawg/English/Makefile
+++ b/dawg/English/Makefile
@ -0,0 +1,40 @@
 # -*-mode: Makefile -*-
 # Copyright 2002 by Eric House (fixin@peak.org).  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 TARGET_TYPE ?= FRANK
 # This represents the default -- for now
 COMMAND = -f Makefile.BasEnglish TARGET_TYPE=FRANK
 alleng:
 	for mfile in Makefile.BasEnglish Makefile.OSW Makefile.TWL98 Makefile.CollegeEng; do \
 		$(MAKE) -f $$mfile TARGET_TYPE=$(TARGET_TYPE); \
 	done
 %:
 	$(MAKE) $(COMMAND) $@
 all:
 	$(MAKE) $(COMMAND)
 clean:
 	$(MAKE) $(COMMAND) clean
 help:
 	@echo "try make -f Makefile.[BasEnglish|CollegeEng] \\"
 	@echo "    TARGET_TYPE=[PALM|FRANK]"
--- a/dawg/English/Makefile.BasEnglish
+++ b/dawg/English/Makefile.BasEnglish
@ -0,0 +1,35 @@
 # -*-mode: Makefile -*-
 # Copyright 2002 by Eric House (fixin@peak.org).  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 LANG=BasEnglish
 LANGCODE=en_US
 #NEWDAWG=1
 TARGET_TYPE ?= FRANK
 include ../Makefile.2to8
 include ../Makefile.langcommon
 $(LANG)Main.dict.gz: BasEnglish.dict.gz
 	ln -s $< $@
 # Everything but creating of the Main.dict file is inherited from the
 # "parent" Makefile.langcommon in the parent directory.
 clean: clean_common
 	rm -f $(LANG)Main.dict.gz *.bin $(LANG)*.pdb $(LANG)*.seb
--- a/dawg/English/info.txt
+++ b/dawg/English/info.txt
@ -0,0 +1,71 @@
 # Copyright 2002 by Eric House (fixin@peak.org).  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 LANGCODE:en_US
 # deal with DOS files
 LANGFILTER_PRECLIP: tr -d '\r' |
 LANGFILTER_POSTCLIP: | tr [a-z] [A-Z]
 LANGFILTER_POSTCLIP: | grep '^[A-Z]*$'
 LANGFILTER_POSTCLIP: | tr -s '\n\r' '\000\000'
 LANGFILTER_POSTCLIP: | sort -z
 # We can trust sort (above) to do the right thing since there's no
 # high ascii.  dict2dawg.pl is much faster if I can trust that its
 # input is in sorted order.
 NEEDSSORT:false
 LANGINFO: <p>English dictionaries can contain words with any of the 26
 LANGINFO: letters you think of as making up the alphabet: A-Z.  At
 LANGINFO: this point any word in your list containing anything else
 LANGINFO: will simply be excluded from the dictionary.</p>
 # High bit means "official".  Next 7 bits are an enum where
 # English==1.  Low byte is padding
 XLOC_HEADER:0x8100
 <BEGIN_TILES>
 2			0		{"_"}
 9			1		'A'
 2			3		'B'
 2			3		'C'
 4			2		'D'
 12			1		'E'
 2			4		'F'
 3			2		'G'
 2			4		'H'
 9			1		'I'
 1			8		'J'
 1			5		'K'
 4			1		'L'
 2			3		'M'
 6			1		'N'
 8			1		'O'
 2			3		'P'
 1			10		'Q'
 6			1		'R'
 4			1		'S'
 6			1		'T'
 4			1		'U'
 2			4		'V'
 2			4		'W'
 1			8		'X'
 2			4		'Y'
 1			10		'Z'
 <END_TILES>
 # should ignore all after the <END> above
--- a/dawg/Makefile.2to8
+++ b/dawg/Makefile.2to8
@ -0,0 +1,7 @@
 # -*-mode: Makefile -*-
 # These are the targets that almost all language makefiles will want.
 SHORT_WORD = 2
 LONG_WORD = 8
--- a/dawg/Makefile.langcommon
+++ b/dawg/Makefile.langcommon
@ -0,0 +1,257 @@
 # -*-mode: Makefile -*-
 # Copyright 2000-2002 by Eric House (fixin@peak.org)
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 FRANK_EXT = xwd
 # this will make all dicts the new, larger type
 #FORCE_4 = -force4
 PALM_DICT_TYPE = DAWG
 PAR = ../par.pl
 LANGUAGE = $(shell basename $$(pwd))
 #all: target_all
 # let languages set this first, but we always add blank to it.
 BLANK_INFO =  "_" /dev/null /dev/null
 # Supply a default so don't have to type so much; feel free to change 
 TARGET_TYPE ?= FRANK
 ifdef NEWDAWG
 	TABLE_ARG = -mn
 else
 	TABLE_ARG = -m
 endif
 ##############################################################################
 # PalmOS rules
 ##############################################################################
 ifeq ($(TARGET_TYPE),PALM)
 ifdef NEWDAWG
 	PDBTYPE = Xwr4
 else
 	PDBTYPE = Xwr3
 endif
 all: $(LANG)2to8.pdb
 empty: $(LANG)0to0.pdb
 # Those languages that have bitmap files for custom glyphs will need to
 # define BMPBINFILES and perhaps provide a rule for building the files
 binfiles.stamp: $(BMPBINFILES)
 	touch binfiles.stamp
 palmspecials.bin: ../palm_mkspecials.pl $(BMPFILES)
 	$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
 # can't just use values.bin because the specials bitmap info is
 # platform-specific
 palmvalues.bin: values.bin palmspecials.bin
 	cat $^ > $@
 # values.bin: palmspecials.bin ../xloc binfiles.stamp
 # 	cd ../ && $(MAKE) xloc
 # 	binfileparms=""; \
 # 		if [ "$(BMPBINFILES)" != "" ]; then \
 # 			for f in $(BMPBINFILES)""; \
 # 				do binfileparms="$$binfileparms -i $$f"; \
 # 			done; \
 # 		fi; \
 # 		../xloc -l $(LANGCODE) $$binfileparms -T $@
 # 		cat palmspecials.bin >> $@
 # header (first record) is node count (long) and 4 chars: 
 #    unsigned char firstEdgeRecNum;
 #    unsigned char charTableRecNum;
 #    unsigned char valTableRecNum;
 #    unsigned char reserved[3]; // worst case this points to a new resource
 # include "flags" as used on the other platforms
 palmheader%.bin: $(LANG)%_wordcount.bin $(LANG)%_flags.bin
 	rm -f $@
 	touch $@
 ifdef NEWDAWG
 	cat $(LANG)$*_flags.bin >> $@
 endif
 	cat $< >> $@
 	perl -e "print pack(\"C\",3)" >> $@	# first edge
 	perl -e "print pack(\"C\",1)" >> $@	# char table rec number
 	perl -e "print pack(\"C\",2)" >> $@	# valTable rec number
 	perl -e "print pack(\"CCC\",0)" >> $@	# reserved 3 bytes
 	perl -e "print pack(\"CC\",0)" >> $@	# c code added two more...
 # This works, but leaves out the header info that the current version
 # has.  I'm not sure anybody cares, though...
 $(LANG)%.pdb: dawg$(LANG)%.stamp table.bin palmvalues.bin palmheader%.bin 
 	$(PAR) c -a backup $@ \
 		$(basename $(@F)) $(PALM_DICT_TYPE) $(PDBTYPE) \
 		palmheader$*.bin table.bin palmvalues.bin dawg$(LANG)$*_*.bin
 #	start=$$(echo $@ | sed -e 's/$(LANG)\([0-9]*\)to[0-9]*.pdb/\1/'); \
 #	end=$$(echo $@ | sed -e 's/$(LANG)[0-9]*to\([0-9]*\).pdb/\1/'); \
 #	zcat $< | grep "^.\{$${start},$${end}\}$$" | \
 #		../dict2pdb -t table.bin -v values.bin -n $(basename $(@F)) \
 #		> $@
 # the files to export for byod
 byodbins: table.bin values.bin palmvalues.bin 
 #endif				# TARGET_TYPE==PALM
 ##############################################################################
 # Franklin ebook rules
 ##############################################################################
 else
 ifeq ($(TARGET_TYPE),FRANK)
 # If we're on a system that can build for Franklin, assume that's what
 # we want to build (and the .xwd.saved [<-bug] file for other non-palm
 # platforms is a by-product).  But if the EBM tools aren't there, just
 # build the .xwd file.
 ifeq (x$(shell echo -n $$EBOOKMAN_SDK)x,xx)
 all: $(LANG)2to8.xwd
 else
 all: $(LANG)2to8.seb
 endif
 empty: $(LANG)0to0.seb
 ifneq (x$(shell echo -n $$EBOOKMAN_SDK)x,xx)
 include ${EBOOKMAN_SDK}/ebsdk.uses
 endif
 # a binary file (one byte) giving the number of tiles in the dict
 charcount.bin: table.bin
 ifdef NEWDAWG
 	siz=$$(wc -c $< | sed -e 's/$<//'); \
 	perl -e "print pack(\"c\",$$siz/2)" > $@
 else
 	siz=$$(wc -c $< | sed -e 's/$<//'); \
 	perl -e "print pack(\"c\",$$siz)" > $@
 endif
 # For each entry in the table whose face < 32, there needs to be a pair of
 # pbitm files and a string giving the printing form
 frankspecials.bin: ../frank_mkspecials.pl  $(BMPFILES)
 	$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
 $(LANG)%.$(FRANK_EXT): dawg$(LANG)%.stamp $(LANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin
 	cat $(LANG)$*_flags.bin charcount.bin table.bin values.bin \
 		frankspecials.bin $(LANG)StartLoc.bin $$(ls dawg$(LANG)$*_*.bin) > $@
 	cp $@ saveme.bin
 $(LANG)%.seb: $(LANG)%.$(FRANK_EXT) $(LANG)%.atts 
 	${ESDK_CREATESEB_EXE} $<
 	cp $< $<.saved
 $(LANG)%.atts:			#recreate it each time based on params
 	echo '_PUB|global+read-only|"Eric_House"' >> $@
 	echo "_NAME|global+read-only|\"$(LANG)2to8\""  >> $@
 	echo "_EXT|global+read-only|\"$(FRANK_EXT)\""  >> $@
 	echo '_LCAT|nosign+global|"CONTENT"'  >> $@
 	echo '_PERM|global+read-only|"r"' >> $@
 # values.bin: ../xloc 
 # 	cd ../ && $(MAKE) xloc
 # 	../xloc -l $(LANGCODE) -T $@
 # the files to export for byod
 byodbins: table.bin values.bin frankspecials.bin
 else
 	(Need to define TARGET_TYPE if get error pointing to this line)
 endif				#ifeq ($(TARGET_TYPE),FRANK)
 endif
 ifeq (s$(TARGET_TYPE),s)
 It's an error not to specify a TARGET_TYPE
 endif
 ##############################################################################
 # shared rules
 ##############################################################################
 # For some reason I can't fathom dawg$(LANG)% gets nuked every time
 # the top-level rule fires (all: for whatever TARGET_TYPE.)  It
 # happens after the rule finishes....
 # 16 bits worth of flags for the start of the eventual file.  At this
 # point, the flags mean this:
 # 1: old-style DAWG.
 # 2: new-style DAWG, three bytes per node.
 # 3: new-style DAWG, four bytes per node
 $(LANG)%_flags.bin: dawg$(LANG)%.stamp
 ifdef NEWDAWG
 	if [ 3 == $$(cat $(LANG)$*_nodesize.bin) ] ; \
 		then perl -e "print pack(\"n\",0x0002)" > $@; echo "flags=2"; \
 		else perl -e "print pack(\"n\",0x0003)" > $@; echo "flags=3"; \
 	fi
 else
 	if [ 3 == $$(cat $(LANG)$*_nodesize.bin) ] ; \
 		then perl -e "print pack(\"n\",0x0001)" > $@; echo "flags=1"; \
 		else echo "ERROR: old format can't handle 4-byte"; exit 1; \
 	fi
 endif
 dawg$(LANG)%.stamp: $(LANG)Main.dict.gz ../dict2dawg.pl table.bin ../Makefile.langcommon
 	start=$$(echo $@ | sed -e 's/dawg$(LANG)\([0-9]*\)to[0-9]*.stamp/\1/'); \
 	end=$$(echo $@ | sed -e 's/dawg$(LANG)[0-9]*to\([0-9]*\).stamp/\1/'); \
 	echo $${start} and $$end; \
 	zcat $< | grep "^.\{$${start},$${end}\}$$" | tr '\n' '\0'| \
 		sort -z | ../dict2dawg.pl $(TABLE_ARG) table.bin -b 28000 \
 		-ob dawg$(LANG)$* \
 		-sn $(LANG)StartLoc.bin -k -term 0 -wc $(LANG)$*_wordcount.bin \
 		$(FORCE_4) -ns $(LANG)$*_nodesize.bin
 	touch $@
 $(LANG)%_wordcount.bin: dawg$(LANG)%.stamp
 	@echo
 # the files to export for byod
 allbins: 
 	$(MAKE) TARGET_TYPE=PALM byodbins
 	$(MAKE) TARGET_TYPE=FRANK byodbins
 table.bin:  ../xloc.pl 
 ifdef NEWDAWG
 	perl -I../ ../xloc.pl -tn > $@
 else
 	perl -I../ ../xloc.pl -t > $@
 endif
 values.bin:  ../xloc.pl 
 	perl -I../ ../xloc.pl -v > $@
 %.dict: %.dict.gz
 	zcat $< > $@
 clean_common:
 	rm -f $(LANG)Main.dict *.bin *.pdb *.seb dawg*.stamp *.$(FRANK_EXT) \
 		$(LANG)*.pdb $(LANG)*.seb 
 help:
 	@echo "make TARGET_TYPE=[FRANK|PALM]"
 test:
 	@echo $(LANGUAGE)
--- a/dawg/dict2dawg.pl
+++ b/dawg/dict2dawg.pl
@ -0,0 +1,798 @@
 #!/usr/bin/perl
 ##############################################################################
 # adapted from C++ code Copyright (C) 2000 Falk Hueffner
 # This version Copyright (C) 2002 Eric House (fixin@peak.org)
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 # USA
 ##############################################################################
 # inputs: 0. Name of file mapping letters to 0..31 values.  In English
 # case just contains A..Z.  This will be used to translate the tries
 # on output.
 #         1. Max number of bytes per binary output file.
 #
 #         2. Basename of binary files for output.
 #         3. Name of file to which to write the number of the
 # startNode, since I'm not rewriting a bunch of code to expect Falk's
 # '*' node at the start.
 #
 #         In STDIN, the text file to be compressed.  It absolutely
 # must be sorted.  The sort doesn't have to follow the order in the
 # map file, however.
 # This is meant eventually to be runnable as part of a cgi system for
 # letting users generating Crosswords dicts online.
 use strict;
 use POSIX;
 my $gFirstDiff;
 my @gCurrentWord;
 my $gCurrentWord;               # save so can check for sortedness
 my $gDone = 0;
 my @gInputStrings;
 my $gNeedsSort = 1;             # read from cmd line eventually
 my @gNodes;                       # final array of nodes
 my $gNBytesPerOutfile = 0xFFFFFFFF;
 my $gTableFile;
 my $gOutFileBase;
 my $gStartNodeOut;
 my $gInFileName;
 my $gKillIfMissing = 1;
 my $gTermChar = '/n';
 my $gDumpText = 0;              # dump the dict as text after?
 my $gCountFile;
 my $gBytesPerNodeFile;          # where to write whether node size 3 or 4
 my $gWordCount = 0;
 my %gTableHash;
 my @gRevMap;
 my $debug = 0;
 my %gSubsHash;
 my $gForceFour = 0;             # use four bytes regardless of need?
 my $gNBytesPerNode;
 my $gUseUnicode;
 main();
 exit;
 ##############################################################################
 sub main() { 
    if ( !parseARGV() ) {
        usage();
        exit();
    }
    makeTableHash();
    my $infile;
    if ( $gInFileName ) {
        open $infile, "<$gInFileName";
    } else {
        $infile = \*STDIN;
    }
    @gInputStrings = parseAndSort( $infile );
    if ( $gInFileName ) {
        close $infile;
    }
    # Do I need this stupid thing?  Better to move the first row to
    # the front of the array and patch everything else.  Or fix the
    # non-palm dictionary format to include the offset of the first
    # node.
    my $dummyNode = 0xFFFFFFFF;
    @gNodes = ( $dummyNode );
    readNextWord();
    my $firstRootChildOffset = buildNode(0);
    moveTopToFront( \$firstRootChildOffset );
    if ( $gStartNodeOut ) {
        writeOutStartNode( $gStartNodeOut, $firstRootChildOffset );
    }
    print STDERR "\n... dumping table ...\n" if $debug;
    printNodes( \@gNodes, "done with main" ) if $debug;
    # write out the number of nodes if requested
    if ( $gCountFile ) {
        open OFILE, "> $gCountFile";
        print OFILE pack( "N", $gWordCount );
        close OFILE;
        print STDERR "wrote out: got $gWordCount words\n";
    }
    if ( $gOutFileBase ) {
        emitNodes( $gNBytesPerOutfile, $gOutFileBase );
    }
    if ( $gDumpText && @gNodes > 0 ) {
        printOneLevel( $firstRootChildOffset, "" );
    }
    if ( $gBytesPerNodeFile ) {
        open OFILE, "> $gBytesPerNodeFile";
        print OFILE $gNBytesPerNode;
        close OFILE;
    }
    print STDERR "Used $gNBytesPerNode per node.\n";
 } # main
 # We now have an array of nodes with the last subarray being the
 # logical top of the tree.  Move them to the start, fixing all fco
 # refs, so that legacy code like Palm can assume top==0.
 #
 # Note: It'd probably be a bit faster to integrate this with emitNodes
 # -- unless I need to have an in-memory list that can be used for
 # lookups.  But that's best for debugging, so keep it this way for now.
 #
 # Also Note: the first node is a dummy that can and should be tossed
 # now.
 sub moveTopToFront($) {
    my ( $firstRef ) = @_;
    my $firstChild = ${$firstRef};
    ${$firstRef} = 0;
    my @lastSub;
    if ( $firstChild > 0 ) {
        # remove the last (the root) subarray
        @lastSub = splice( @gNodes, $firstChild );
    } else {
        die "there should be no words!!" if $gWordCount != 0;
    }
    # remove the first (garbage) node
    shift @gNodes;
    my $diff;
    if ( $firstChild > 0 ) {
        # -1 because all move down by 1; see prev line
        $diff = @lastSub - 1;
        die "something wrong with len\n" if $diff < 0;
    } else {
        $diff = 0;
    }
    # stick it on the front
    splice( @gNodes, 0, 0, @lastSub);
    # We add $diff to everything. There's no subtracting because
    # nobody had any refs to the top list.
    for ( my $i = 0; $i < @gNodes; ++$i ) {
        my $fco = TrieNodeGetFirstChildOffset( $gNodes[$i] );
        if ( $fco != 0 ) {      # 0 means NONE, not 0th!!
            TrieNodeSetFirstChildOffset( \$gNodes[$i], $fco+$diff );
        }
    }
 } # moveTopToFront
 sub buildNode {
    my ( $depth ) = @_;
    if ( @gCurrentWord == $depth ) {
        # End of word reached. If the next word isn't a continuation
        # of the current one, then we've reached the bottom of the
        # recursion tree.
        readNextWord();
        if ($gFirstDiff < $depth || $gDone) {
            return 0;
        }
    }
    my @newedges;
    do {
        my $letter = $gCurrentWord[$depth];
        my $isTerminal = @gCurrentWord - 1 == $depth ? 1:0;
        my $nodeOffset = buildNode($depth+1);
        my $newNode = MakeTrieNode($letter, $isTerminal, $nodeOffset);
        push( @newedges, $newNode );
    } while ( ($gFirstDiff == $depth) && !$gDone);
    TrieNodeSetIsLastSibling( \@newedges[@newedges-1], 1 );
    return addNodes( \@newedges );
 } # buildNode
 sub addNodes {
    my ( $newedgesR ) = @_;
    my $found = findSubArray( $newedgesR );
    if ( $found >= 0 ) {
        die "0 is an invalid match!!!" if $found == 0;
        return $found;
    } else {
        my $firstFreeIndex = @gNodes;
        print STDERR "adding...\n" if $debug;
        printNodes( $newedgesR ) if $debug;
        push @gNodes, (@{$newedgesR});
        registerSubArray( $newedgesR, $firstFreeIndex );
        return $firstFreeIndex;
    }
 } # addNodes
 sub printNode {
    my ( $index, $node ) = @_;
    print STDERR "[$index] ";
    printf( STDERR
            "letter=%d; isTerminal=%d; isLastSib=%d; fco=%d;\n",
            TrieNodeGetLetter($node),
            TrieNodeGetIsTerminal($node),
            TrieNodeGetIsLastSibling($node),
            TrieNodeGetFirstChildOffset($node));
 } # printNode
 sub printNodes {
    my ( $nodesR, $name ) = @_;
    my $len = @{$nodesR};
    # print "printNodes($name): len = $len\n";
    for ( my $i = 0; $i < $len; ++$i ) {
        my $node = ${$nodesR}[$i];
        printNode( $i, $node );
    }
 }
 # Hashing.  We'll keep a hash of offsets into the existing nodes
 # array, and as the key use a string that represents the entire sub
 # array.  Since the key is what we're matching for, there should never
 # be more than one value per hash and so we don't need buckets.
 # Return -1 if there's no match.
 sub findSubArray {
   my ( $newedgesR ) = @_;
 	my $key = join('', @{$newedgesR});
    if ( exists( $gSubsHash{$key} ) ) {
        return $gSubsHash{$key};
    } else {
        return -1;
    }
 } # findSubArray
 # add to the hash
 sub registerSubArray {
    my ( $edgesR, $nodeLoc ) = @_;
    my $key = join( '', @{$edgesR} );
    if ( exists $gSubsHash{$key} ) {
        die "entry for key shouldn't exist!!";
    } else {
        $gSubsHash{$key} = $nodeLoc;
    }
 } # registerSubArray
 sub readNextWord() {
    my @word;
    if ( !$gDone ) {
        $gDone = @gInputStrings == 0;
        if ( !$gDone ) {
            @word = @{shift @gInputStrings};
        } else {
            print STDERR "gDone set to true\n" if $debug;
        }
        print STDERR "got word: ", join(',',@word), "\n" if $debug;
    }
    my $numCommonLetters = 0;
    my $len = @word;
    if ( @gCurrentWord < $len ) {
        $len = @gCurrentWord;
    }
    while ( @gCurrentWord[$numCommonLetters] eq @word[$numCommonLetters]
            && $numCommonLetters < $len) {
        ++$numCommonLetters;
    }
    $gFirstDiff = $numCommonLetters;
    die "words ", join(",",@gCurrentWord), " and ", join(",", @word), " out of order" if #$debug && 
        @gCurrentWord > 0 && @word > 0
        && !firstBeforeSecond( \@gCurrentWord, \@word );
    @gCurrentWord = @word;
 } # readNextWord
 sub firstBeforeSecond {
    my ( $firstR, $secondR ) = @_;
    for ( my $i = 0; ; ++$i ) {
        # if we reach the end of the first word/list, we're done.
        if ( $i == @{$firstR} ) {  
            die "duplicate!!!" if $i == @{$secondR};
            return 1;
            # but if we reach the second end first, we've failed
        } elsif ( $i == @{$secondR} ) {
            return 0;
        }
        my $diff = ${$firstR}[$i] <=> ${$secondR}[$i];
        if ( $diff == 0 ) {
            next;
        } else {
            return $diff < 0;
        }
    }
 } # firstBeforeSecond
 # passed to sort.  Should remain unprototyped for effeciency's sake
 sub cmpWords {
    my $lenA = @{$a};
    my $lenB = @{$b};
    my $min = $lenA > $lenB? $lenB: $lenA;
    for ( my $i = 0; $i < $min; ++$i ) {
        my $ac = ${$a}[$i];
        my $bc = ${$b}[$i];
        my $res = $ac <=> $bc;
        if ( $res != 0 ) {
            return $res;        # we're done
        }
    }
    # If we got here, they match up to their common length.  Longer is
    # greater.
    my $res = @{$a} <=> @{$b};
    return $res; # which is longer?
 } # cmpWords
 sub parseAndSort() {
    my ( $infile ) = @_;
    my @wordlist;
    my @word;
  WORDLOOP:
    for ( ; ; ) {
        my $dropWord = 0;
        splice @word;           # empty it
        # for each byte
        for ( ; ; ) {
            my $byt = getc($infile);
            if ( $byt eq undef ) {
                last WORDLOOP;
            } elsif ( $byt eq $gTermChar ) {
                if ( !$dropWord ) {
                    push @wordlist, [ @word ];
                    ++$gWordCount;
                }
                next WORDLOOP;
            } elsif ( exists( $gTableHash{$byt} ) ) {
                if ( !$dropWord ) {
                    push @word, $gTableHash{$byt};
                    die "word too long" if @word > 15;
                }
            } elsif ($gKillIfMissing) {
                die "char $byt (", $byt+0, ") not in map file $gTableFile\n";
            } else {
                $dropWord = 1;
                splice @word;     # lose anything we already have
            }
        }
    }
    if ( $gNeedsSort && ($gWordCount > 0)  ) {
        @wordlist = sort cmpWords @wordlist;
    }
    print STDERR "length of list is ", @wordlist + 0, ".\n" if $debug;
    return @wordlist;
 } # parseAndSort
 # Print binary representation of trie array.  This isn't used yet, but
 # eventually it'll want to dump to multiple files appropriate for Palm
 # that can be catenated together on other platforms.  There'll need to
 # be a file giving the offset of the first node too.  Also, might want
 # to move to 4-byte representation when the input can't otherwise be
 # handled.
 sub dumpNodes {
    for ( my $i = 0; $i < @gNodes; ++$i ) {
        my $node = $gNodes[$i];
        my $bstr = pack( "I", $node );
        print STDOUT $bstr;
    }
 }
 ##############################################################################
 # Little node-field setters and getters to hide what bits represent
 # what.
 ##############################################################################
 sub TrieNodeSetIsTerminal {
    my ( $nodeR, $isTerminal ) = @_;
    if ( $isTerminal ) {
        ${$nodeR} |= 1 << 31;
    } else {
        ${$nodeR} &= ~(1 << 31);
    }
 }
 sub TrieNodeGetIsTerminal {
    my ( $node ) = @_;
    return ($node & 1 << 31) != 0;
 }
 sub TrieNodeSetIsLastSibling {
    my ( $nodeR, $isLastSibling ) = @_;
    if ( $isLastSibling ) {
        ${$nodeR} |= 1 << 30;
    } else {
        ${$nodeR} &= ~(1 << 30);
    }
 }
 sub TrieNodeGetIsLastSibling {
    my ( $node ) = @_;
    return ($node & 1 << 30) != 0;
 }
 sub TrieNodeSetLetter {
    my ( $nodeR, $letter ) = @_;
    die "letter ", $letter, " too big" if $letter >= 32;
    my $mask = ~(0x1F << 25);
    ${$nodeR} &= $mask;                         # clear all the bits
    ${$nodeR} |= ($letter << 25);          # set new ones
 }
 sub TrieNodeGetLetter {
    my ( $node ) = @_;
    $node >>= 25;
    $node &= 0x1F;
    return $node;
 }
 sub TrieNodeSetFirstChildOffset {
    my ( $nodeR, $fco ) = @_;
    die "$fco larger than 25 bits" if ($fco & 0xFE000000) != 0;
    my $mask = ~0x01FFFFFF;
    ${$nodeR} &= $mask;                   # clear all the bits
    ${$nodeR} |= $fco;                    # set new ones
 }
 sub TrieNodeGetFirstChildOffset {
    my ( $node ) = @_;
    $node &= 0x01FFFFFF;                  # 24 bits
    return $node;
 }
 sub MakeTrieNode {
    my ( $letter, $isTerminal, $firstChildOffset, $isLastSibling ) = @_;
    my $result = 0;
    TrieNodeSetIsTerminal( \$result, $isTerminal );
    TrieNodeSetIsLastSibling( \$result, $isLastSibling );
    TrieNodeSetLetter( \$result, $letter );
    TrieNodeSetFirstChildOffset( \$result, $firstChildOffset );
    return $result;
 } # MakeTrieNode
 # Caller may need to know the offset of the first top-level node.
 # Write it here.
 sub writeOutStartNode {
    my ( $startNodeOut, $firstRootChildOffset ) = @_;
    open NODEOUT, ">$startNodeOut";
    print NODEOUT pack( "N", $firstRootChildOffset );
    close NODEOUT;
 } # writeOutStartNode
 # build the hash for translating.  I'm using a hash assuming it'll be
 # fast.  Key is the letter; value is the 0..31 value to be output.
 sub makeTableHash {
    my $i;
    open TABLEFILE, "< $gTableFile";
    splice @gRevMap;            # empty it
    for ( $i = 0; ; ++$i ) {
        my $ch = getc(TABLEFILE);
        if ( $ch eq undef ) {
            last;
        }
        if ( $gUseUnicode ) {   # skip the first byte each time: tmp HACK!!!
            $ch = getc(TABLEFILE);
        }
        if ( $ch eq undef ) {
            last;
        }
        push @gRevMap, $ch;
        if ( ord($ch) == 0 ) {	# blank
            next;       # we want to increment i when blank seen since 
                        # it is a tile value
        }
        die "$gTableFile too large\n" if $i > 32;
        die "only blank (0) can be 32nd char\n" if ($i == 32 && $ch != 0);
        $gTableHash{$ch} = $i;
    }
    close TABLEFILE;
 } # makeTableHash
 # emitNodes. "input" is $gNodes.  From it we write up to
 # $nBytesPerOutfile to files named $outFileBase0..n, mapping the
 # letter field down to 5 bits with a hash built from $tableFile.  If
 # at any point we encounter a letter not in the hash we fail with an
 # error.
 sub emitNodes($$) {
    my ( $gNBytesPerOutfile, $outFileBase ) = @_;
    # now do the emit.
    # is 17 bits enough?
    printf STDOUT ("There are %d (0x%x) nodes in this DAWG.\n",
                   0 + @gNodes, 0 + @gNodes );
    if ( @gNodes > 0x1FFFF || $gForceFour ) {
        $gNBytesPerNode = 4;
    } else {
        $gNBytesPerNode = 3;
    }
    my $nextIndex = 0;
    my $nextFileNum = 0;
    for ( $nextFileNum = 0; ; ++$nextFileNum ) {
        if ( $nextIndex >= @gNodes ) {
            last;	# we're done
        }
        die "Too many outfiles; infinite loop?" if $nextFileNum > 99;
        my $outName = sprintf("${outFileBase}_%03d.bin", $nextFileNum);
        open OUTFILE, "> $outName";
        my $curSize = 0;
        while ( $nextIndex < @gNodes ) {
            # scan to find the next terminal
            my $i;
            for ( $i = $nextIndex; 
                  !TrieNodeGetIsLastSibling($gNodes[$i]);
                  ++$i ) {
                # do nothing but a sanity check
                if ( $i >= @gNodes) {
                    die "bad trie format: last node not last sibling" ;
                }
            }
            ++$i;	# move beyond the terminal
            my $nextSize = ($i - $nextIndex) * $gNBytesPerNode;
            if ($curSize + $nextSize > $gNBytesPerOutfile) {
                last;
            } else {
                # emit the subarray
                while ( $nextIndex < $i ) {
                    outputNode( $gNodes[$nextIndex], $gNBytesPerNode,
                                \*OUTFILE );
                    ++$nextIndex;
                }
                $curSize += $nextSize;
            }
        }
        close OUTFILE;
    }
 } # emitNodes
 sub printWord {
    my ( $str ) = @_;
    print STDERR "$str\n";
 }
 # print out the entire dictionary, as text, to STDERR.
 sub printOneLevel {
    my ( $index, $str ) = @_;
    for ( ; ; ) {
        my $newStr = $str;
        my $node = $gNodes[$index++];
        my $lindx = $gRevMap[TrieNodeGetLetter($node)];
        if ( ord($lindx) >= 0x20 ) {
            $newStr .= "$lindx";
        } else {
            print STDERR "sub space" if $debug;
            $newStr .= "\\" . chr('0'+$lindx);
        }
        if ( TrieNodeGetIsTerminal($node) ) {
            printWord( $newStr );
        } 
        my $fco = TrieNodeGetFirstChildOffset( $node );
        if ( $fco != 0 ) {
            printOneLevel( $fco, $newStr );
        }
        if ( TrieNodeGetIsLastSibling($node) ) {
            last;
        }
    }
 }
 sub outputNode {
    my ( $node, $nBytes, $outfile ) = @_;
    my $fco = TrieNodeGetFirstChildOffset($node);
    my $fourthByte;
    if ( $nBytes == 4 ) {
        $fourthByte = $fco >> 17;
        die "fco too big" if $fourthByte > 0xFF;
        $fco &= 0x1FFFF;
    }
    # format according to dawg.h:
    # typedef struct array_edge {
    #     unsigned char highByte;
    #     unsigned char lowByte;
    #     unsigned char bits;
 #ifdef FOUR_BYTE
    #     unsigned char moreBits;
 #endif
    # } array_edge;
    # define LETTERMASK 0x1f
    # define ACCEPTINGMASK 0x20
    # define LASTEDGEMASK 0x40
    # define LASTBITMASK 0x80
    # write the fco (less that one bit).  We want two bytes worth
    # in three-byte mode, and three in four-byte mode (which is
    # untested)
    for ( my $i = 1; $i >= 0; --$i ) {
        my $tmp = ($fco >> ($i * 8)) & 0xFF;
        print $outfile pack( "C", $tmp );
    }
    $fco >>= 16;                # it should now be 1 or 0
    die "fco not 1 or 0" if $fco > 1;
    my $chIn5 = TrieNodeGetLetter($node);
    my $bits = $chIn5;
    if ( TrieNodeGetIsLastSibling($node) ) {
        $bits |= 0x40;
    }
    if ( TrieNodeGetIsTerminal($node) ) {
        $bits |= 0x20;
    }
    if ( $fco != 0 ) {
        $bits |= 0x80;
    }
    print $outfile pack( "C", $bits );
    # the final byte, if in use
    if ( $nBytes == 4 ) {
        print $outfile pack( "C", $fourthByte );
    }
 } # outputNode
 sub usage {
    print STDERR "usage: $0 \n"
        . "\t[-b    bytesPerFile] (default = 0xFFFFFFFF)\n"
        . "\t-m     mapFile\n"
        . "\t-ob    outFileBase\n"
        . "\t-sn    start node out file\n"
        . "\t[-if   input file name]  -- default = stdin\n"
        . "\t[-term ch] (word terminator -- default = '\\0'\n"
        . "\t[-nosort] (input already sorted in accord with -m; " .
        " default=sort'\n"
        . "\t[-dump]  (write dictionary as text to STDERR for testing)\n"
        . "\t[-force4](use 4 bytes per node regardless of need)\n"
        . "\t[-r]     (reject words with letters not in mapfile)\n"
        . "\t[-k]     (kill if any letters no in mapfile -- default)\n"
        ;
 } # usage
 sub parseARGV {
    my $arg;
    while ( my $arg = shift(@ARGV) ) {
      SWITCH: {
          if ($arg =~ /-b/) {$gNBytesPerOutfile = shift(@ARGV), last SWITCH;}
          if ($arg =~ /-mn/) {$gTableFile = shift(@ARGV);
                              $gUseUnicode = 1;
                              last SWITCH;}
          if ($arg =~ /-m/) {$gTableFile = shift(@ARGV); last SWITCH;}
          if ($arg =~ /-ob/) {$gOutFileBase = shift(@ARGV), last SWITCH;}
          if ($arg =~ /-sn/) {$gStartNodeOut = shift(@ARGV), last SWITCH;}
          if ($arg =~ /-if/) {$gInFileName = shift(@ARGV), last SWITCH;}
          if ($arg =~ /-r/) {$gKillIfMissing = 0; last SWITCH;}
          if ($arg =~ /-k/) {$gKillIfMissing = 1; last SWITCH;}
          if ($arg =~ /-term/) {$gTermChar = chr(shift(@ARGV)); last SWITCH;}
          if ($arg =~ /-dump/) {$gDumpText = 1; last SWITCH;}
          if ($arg =~ /-nosort/) {$gNeedsSort = 0; last SWITCH;}
          if ($arg =~ /-wc/) {$gCountFile = shift(@ARGV); last SWITCH;}
          if ($arg =~ /-ns/) {$gBytesPerNodeFile = shift(@ARGV); last SWITCH;}
          if ($arg =~ /-force4/) {$gForceFour = 1; last SWITCH;}
          die "unexpected arg $arg\n";
      }
    }
    print STDERR "gNBytesPerOutfile=$gNBytesPerOutfile\n" if $debug;
    print STDERR "gTableFile=$gTableFile\n" if $debug;
    print STDERR "gOutFileBase=$gOutFileBase\n" if $debug;
    print STDERR "gStartNodeOut=$gStartNodeOut\n" if $debug;
    printf STDERR "gTermChar=%s(%d)\n", $gTermChar, ord($gTermChar) if $debug;
    return $gTableFile;
 } # parseARGV
--- a/dawg/frank_mkspecials.pl
+++ b/dawg/frank_mkspecials.pl
@ -0,0 +1,45 @@
 #!/usr/bin/perl
 # Copyright 2001 by Eric House (fixin@peak.org)
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 # Given arguments consisting of triples, first a string and then pbitm
 # files representing bitmaps.  For each triple, print out the string and
 # then the converted bitmaps.
 use strict;
 while ( @ARGV ) {
  my $str = shift();
  my $largebmp = shift();
  my $smallbmp = shift();
  doOne( $str, $largebmp, $smallbmp );
 }
 sub doOne {
  my ( $str, $largebmp, $smallbmp ) = @_;
  print pack( "C", length($str) );
  print $str;
  print STDERR "looking at $largebmp", "\n";
  print `cat $largebmp | ../pbitm2bin.pl`;
  print `cat $smallbmp | ../pbitm2bin.pl`;
 }
--- a/dawg/palm_mkspecials.pl
+++ b/dawg/palm_mkspecials.pl
@ -0,0 +1,111 @@
 #!/usr/bin/perl
 # Copyright 2002 by Eric House
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 # Given arguments consisting of triples, first a string and then pbitm
 # files representing bitmaps. The format looks like this:
 # array [0-n] of { char len; 
 #                  char[3] alt txt;
 #                  int16 offsetOfLarge;
 #                  int16 offsetOfSmall;
 #                }
 # array [0-n] of { 
 #                  bitmapLargeIfPresent;
 #                  bitmapSmallIfPresent;
 #                }
 #
 # In addition, there's padding between bitmaps if needed to get the next
 # one to a 2-byte boundary.  And the input files are not in PalmOS bitmap
 # format, so thay have to get converted into a tmp file before the sizes
 # can be known and included in the eventual output.
 use strict;
 my $tmpfile = "/tmp/tmpout$$";
 my $nSpecials = @ARGV / 3;
 die "wrong number of args" if (@ARGV % 3) != 0;
 my $gOffset = $nSpecials * 8;     # sizeof(Xloc_specialEntry)
 open TMPFILE, "> $tmpfile";
 for ( my $i = 0; $i < $nSpecials; ++$i ) {
    my $size;
    my $str = shift( @ARGV );
    my $len = length($str);
    die "string $str too long" if $len > 3;
    print $str;
    while ( $len < 4 ) {
        ++$len;
        print pack("c", 0 );
    }
    doOneFile( shift( @ARGV ), \*TMPFILE, \$gOffset );
    doOneFile( shift( @ARGV ), \*TMPFILE, \$gOffset );
 }
 close TMPFILE;
 # now append the tempfile
 open TMPFILE, "< $tmpfile";
 while ( read( TMPFILE, my $buffer, 128 ) ) {
    print $buffer;
 }
 close TMPFILE;
 unlink $tmpfile;
 exit 0;
 sub doOneFile($$) {
    my ( $fil, $fh, $offsetR ) = @_;
    my $size = convertBmp($fil, $fh );
    if ( ($size % 2) != 0 ) {
        ++$size;
        print $fh pack( "c", 0 );
    }
    print pack( "n", $size > 0? ${$offsetR} : 0 );
    ${$offsetR} += $size;
 } # doOneFile
 sub convertBmp($$) {
    my ( $pbitmfile, $fhandle ) = @_;
    if ( $pbitmfile eq "/dev/null" ) {
        return 0;
    } else {
        # for some reason I can't get quote marks to print into tmp.rcp using just `echo`
        open TMP, "> tmp.rcp";
        print TMP "BITMAP ID 1000 \"$pbitmfile\" AUTOCOMPRESS";
        close TMP;
        `pilrc tmp.rcp`;
        print $fhandle `cat Tbmp03e8.bin`;
        my $siz = -s "Tbmp03e8.bin";
        `rm -f tmp.rcp Tbmp03e8.bin`;
        return $siz;
    }
 }
--- a/dawg/par.pl
+++ b/dawg/par.pl
@ -0,0 +1,234 @@
 #!/usr/bin/perl
 # Copyright 2002 by Eric House (fixin@peak.org)   All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 # Only enough of par's features to support building a crosswords dict
 # pdb
 use strict;
 my $debug = 0;
 # stolen from par source
 my $PRC_FLAGS_RESOURCE =    (0x1<<0);
 my $PRC_FLAGS_READONLY =   (0x1<<1);
 my $PRC_FLAGS_DIRTY    =   (0x1<<2);
 my $PRC_FLAGS_BACKUP   =   (0x1<<3);
 my $PRC_FLAGS_NEWER    =   (0x1<<4);
 my $PRC_FLAGS_RESET    =   (0x1<<5);
 my $PRC_FLAGS_COPYPREVENT = (0x1<<6);
 my $PRC_FLAGS_STREAM   =   (0x1<<7);
 my $PRC_FLAGS_HIDDEN   =   (0x1<<8);
 my $PRC_FLAGS_LAUNCHABLE = (0x1<<9);
 my $PRC_FLAGS_RECYCLABLE = (0x1<<10);
 my $PRC_FLAGS_BUNDLE   =   (0x1<<11);
 my $PRC_FLAGS_OPEN     =   (0x1<<15);
 my $gAttrs = 0;
 my $gVersion = 1;               # par defaults this to 1
 my $cmd = shift( @ARGV );
 die "only 'c' supported now" if $cmd ne "c" && $cmd ne "-c";
 readHOptions( \@ARGV );
 my $dbfile = shift( @ARGV );
 my $name = shift( @ARGV );
 die "name $name too long" if length($name) > 31;
 my $type = shift( @ARGV );
 die "type $type must be of length 4" if length($type) != 4;
 my $cid = shift( @ARGV );
 die "cid $cid must be of length 4" if length($cid) != 4;
 my @fileNames;
 my @fileLengths;
 my $nFiles = 0;
 while ( @ARGV > 0 ) {
    my $filename = shift( @ARGV );
    push @fileNames, $filename;
    push @fileLengths, -s $filename;
    ++$nFiles;
 }
 # from par's prcp.h; thanks djw!
 # typedef struct prc_file_t {
 #     prc_byte_t name[32];
 #     prc_byte_t flags[2];
 #     prc_byte_t version[2];
 #     prc_byte_t ctime[4];
 #     prc_byte_t mtime[4];
 #     prc_byte_t btime[4];
 #     prc_byte_t modnum[4];
 #     prc_byte_t appinfo[4];
 #     prc_byte_t sortinfo[4];
 #     prc_byte_t type[4];
 #     prc_byte_t cid[4];
 #     prc_byte_t unique_id_seed[4];
 #     prc_byte_t next_record_list[4];
 #     prc_byte_t nrecords[2];
 # } prc_file_t;
 my $str;
 my $offset = 0;
 open OUTFILE, "> $dbfile" or die "couldn't open outfile $dbfile for writing";
 # print the string, then pad with 0s
 $offset = length($name);
 print OUTFILE $name;
 while ( $offset < 32 ) {
    print OUTFILE pack("c", 0);
    ++$offset;
 }
 $str = pack("n", $gAttrs);     # flags
 print OUTFILE $str;
 $offset += length($str);
 $str = pack("n", $gVersion);     # version
 print OUTFILE $str;
 $offset += length($str);
 my $time = time() + 2082844800;
 $str = pack("NNN", $time, $time, 0);     # ctime, mtime, btime
 print OUTFILE $str;
 $offset += length($str);
 $str = pack("N", 0 );    # mod num
 print OUTFILE $str;
 $offset += length($str);
 $str = pack("N", 0 );    # appinfo
 print OUTFILE $str;
 $offset += length($str);
 $str = pack("N", 0 );    # sortinfo
 print OUTFILE $str;
 $offset += length($str);
 print OUTFILE $type;            # type
 print OUTFILE $cid;             # cid
 $offset += 8;
 $str = pack("NN", 0, 0 ); # unique_id_seed, next_record_list
 print OUTFILE $str;
 $offset += length($str);
 $str = pack("n", $nFiles );     # nrecords
 print OUTFILE $str;
 $offset += length($str);
 $offset += $nFiles * 8;
 $offset += 2;                   # djw adds 2 bytes after size list; see below
 foreach my $len ( @fileLengths ) {
    print OUTFILE pack( "N", $offset );
    print OUTFILE pack( "N", 0 );
    $offset += $len;
 }
 print OUTFILE pack( "n", 0 );   # djw does this sans comment: flush.c, line 87
 foreach my $file ( @fileNames ) {
    open INFILE, "<$file" or die "couldn't open infile $file\n";
    my $buffer;
    while ( read INFILE, $buffer, 1024 ) {
        print OUTFILE $buffer;
    }
    close INFILE;
 }
 close OUTFILE;
 exit 0;
 ##############################################################################
 # Subroutines
 ##############################################################################
 sub readHOptions {
    my ( $argvR ) = @_;
    for ( ; ; ) {
        my $opt = ${$argvR}[0];
        if ( $opt !~ /^-/ ) {
            last;
        }
        # it starts with a '-': use it; else don't consume anything
        shift @{$argvR};
        if ( $opt eq "-a" ) {
            my $attrs = shift @{$argvR};
            processAttrString( $attrs );
        } elsif ( $opt eq "-v" ) {
            $gVersion = shift @{$argvR};
        } else {
            die "what's with \"$opt\": -a and -v are the only hattrs supported";
        }
    }
 } # readHOptions
 sub processAttrString {
    my ( $attrs ) = @_;
    foreach my $flag ( split /\|/, $attrs ) {
        print STDERR "looking at flag $flag\n" if $debug;
        if ( $flag =~ /resource/ ) {
            $gAttrs |= $PRC_FLAGS_RESOURCE;
            die "resource attr not supported";
        } elsif ( $flag =~ /readonly/ ) {
            $gAttrs |= $PRC_FLAGS_READONLY;
        } elsif ( $flag =~ /dirty/ ) {
            $gAttrs |= $PRC_FLAGS_DIRTY;
        } elsif ( $flag =~ /backup/ ) {
            $gAttrs |= $PRC_FLAGS_BACKUP;
        } elsif ( $flag =~ /newer/ ) {
            $gAttrs |= $PRC_FLAGS_NEWER;
        } elsif ( $flag =~ /reset/ ) {
            $gAttrs |= $PRC_FLAGS_RESET;
        } elsif ( $flag =~ /copyprevent/ ) {
            $gAttrs |= $PRC_FLAGS_COPYPREVENT;
        } elsif ( $flag =~ /stream/ ) {
            $gAttrs |= $PRC_FLAGS_STREAM;
            die "stream attr not supported";
        } elsif ( $flag =~ /hidden/ ) {
            $gAttrs |= $PRC_FLAGS_HIDDEN;
        } elsif ( $flag =~ /launchable/ ) {
            $gAttrs |= $PRC_FLAGS_LAUNCHABLE;
        } elsif ( $flag =~ /recyclable/ ) {
            $gAttrs |= $PRC_FLAGS_RECYCLABLE;
        } elsif ( $flag =~ /bundle/ ) {
            $gAttrs |= $PRC_FLAGS_BUNDLE;
        } elsif ( $flag =~ /open/ ) {
            $gAttrs |= $PRC_FLAGS_OPEN;
        } else {
            die "flag $flag not supportd";
        }
    }
 } # processAttrString
--- a/dawg/xloc.pl
+++ b/dawg/xloc.pl
@ -0,0 +1,42 @@
 #!/usr/bin/perl
 # Copyright 2002 by Eric House (fixin@peak.org).  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 # test and wrapper file for xloc.pm
 use strict;
 use xloc;
 my $arg = shift(@ARGV);
 my $lang = shift(@ARGV);
 my $path = "./$lang";
 my $infoFile = "$path/info.txt";
 die "info file $infoFile not found\n" if ! -s $infoFile;
 my $xlocToken = xloc::ParseTileInfo($infoFile);
 if ( $arg eq "-t" ) {
    xloc::WriteMapFile( $xlocToken, 0, \*STDOUT );
 } elsif ( $arg eq "-tn" ) {
    xloc::WriteMapFile( $xlocToken, 1, \*STDOUT );
 } elsif ( $arg eq "-v" ) {
    xloc::WriteValuesFile( $xlocToken, \*STDOUT );
 }
--- a/dawg/xloc.pm
+++ b/dawg/xloc.pm
@ -0,0 +1,180 @@
 #!/usr/bin/perl
 # Copyright 2002 by Eric House (fixin@peak.org).  All rights reserved.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 # The idea here is that all that matters about a language is stored in
 # one file (possibly excepting rules for prepping a dictionary).
 # There's a list of tile faces, counts and values, and also some
 # name-value pairs as needed.  The pairs come first, and then a list
 # of tiles.
 package xloc;
 use strict;
 use warnings;
 BEGIN {
    use Exporter   ();
    our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
    $VERSION     = 1.00;
    @ISA         = qw(Exporter);
    @EXPORT      = qw(&ParseTileInfo &GetNTiles &TileFace &TileValue
 		      &TileCount &GetValue &WriteMapFile &WriteValuesFile);
    %EXPORT_TAGS = ( );
 }
 # Returns what's meant to be an opaque object that can be passed back
 # for queries.  It's a hash with name-value pairs and an _INFO entry
 # containing a list of tile info lists.
 sub ParseTileInfo($) {
    my ( $filePath ) = @_;
    my %result;
    open INPUT, "<$filePath" or die "couldn't open $filePath";
    my $inTiles = 0;
    my @tiles;
    while ( <INPUT> ) {
        chomp;
        s/\#.*$//;
        s/^\s*$//;                  # nuke all-white-space lines
        next if !length;
        if ( $inTiles ) {
            if ( /<END_TILES>/ ) {
                last;
            } else {
                my ( $count, $val, $face ) = m/^\s*(\w+)\s+(\w+)\s+(.*)\s*$/;
                push @tiles, [ $count, $val, $face ];
            }
        } elsif ( /\w:/ ) {
            my ( $nam, $val ) = split ':', $_, 2;
            $result{$nam} .= $val;
        } elsif ( /<BEGIN_TILES>/ ) {
            $inTiles = 1;
        }
    }
    close INPUT;
    $result{"_TILES"} = [ @tiles ];
    return \%result;
 }
 sub GetNTiles($) {
    my ( $hashR ) = @_;
    my $listR = ${$hashR}{"_TILES"};
    return 0 + @{$listR};
 }
 sub GetValue($$) {
    my ( $hashR, $name ) = @_;
    return ${$hashR}{$name};
 }
 sub WriteMapFile($$$) {
    my ( $hashR, $unicode, $fhr ) = @_;
    my $packStr;
    if ( $unicode ) {
        $packStr = "n";
    } else {
        $packStr = "C";
    }
    my $count = GetNTiles($hashR);
    my $specialCount = 0;
    for ( my $i = 0; $i < $count; ++$i ) {
        my $tileR = GetNthTile( $hashR, $i );
        my $str = ${$tileR}[2];
        if ( $str =~ /\'(.)\'/ ) {
            print $fhr pack($packStr, ord($1) );
        } elsif ( $str =~ /\"(.+)\"/ ) {
            print $fhr pack($packStr, $specialCount++ );
        } elsif ( $str =~ /(\d+)/ ) {
            print $fhr pack( $packStr, chr($1) );
        } else {
            die "WriteMapFile: unrecognized face format $str";
        }
    }
 } # WriteMapFile
 sub WriteValuesFile($$) {
    my ( $hashR, $fhr ) = @_;
    my $header = GetValue( $hashR,"XLOC_HEADER" );
    die "no XLOC_HEADER found" if ! $header;
    print STDERR "header is $header\n";
    print $fhr pack( "n", hex($header) );
    my $count = GetNTiles($hashR);
    for ( my $i = 0; $i < $count; ++$i ) {
        my $tileR = GetNthTile( $hashR, $i );
        print $fhr pack( "c", TileValue($tileR) );
        print $fhr pack( "c", TileCount($tileR) );
    }
 } # WriteValuesFile
 sub GetNthTile($$) {
    my ( $hashR, $n ) = @_;
    my $listR = ${$hashR}{"_TILES"};
    return ${$listR}[$n];
 }
 sub TileFace($) {
    my ( $tileR ) = @_;
    my $str = ${$tileR}[2];
    if ( $str =~ /\'(.)\'/ ) {
        return $1;
    } elsif ( $str =~ /\"(.+)\"/ ) {
        return $1;
    } elsif ( $str =~ /(\d+)/ ) {
        return chr($1);
    } else {
        die "TileFace: unrecognized face format: $str";
    }
 }
 sub TileValue($) {
    my ( $tileR ) = @_;
    return ${$tileR}[0];
 }
 sub TileCount($) {
    my ( $tileR ) = @_;
    return ${$tileR}[1];
 }
 1;