diff --git a/xwords4/xwords4/dawg/English/Makefile b/xwords4/xwords4/dawg/English/Makefile new file mode 100644 index 000000000..aa7edea4e --- /dev/null +++ b/xwords4/xwords4/dawg/English/Makefile @@ -0,0 +1,40 @@ +# -*-mode: Makefile -*- +# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +TARGET_TYPE ?= FRANK + +# This represents the default -- for now +COMMAND = -f Makefile.BasEnglish TARGET_TYPE=FRANK + +alleng: + for mfile in Makefile.BasEnglish Makefile.OSW Makefile.TWL98 Makefile.CollegeEng; do \ + $(MAKE) -f $$mfile TARGET_TYPE=$(TARGET_TYPE); \ + done + +%: + $(MAKE) $(COMMAND) $@ + +all: + $(MAKE) $(COMMAND) + +clean: + $(MAKE) $(COMMAND) clean + +help: + @echo "try make -f Makefile.[BasEnglish|CollegeEng] \\" + @echo " TARGET_TYPE=[PALM|FRANK]" + diff --git a/xwords4/xwords4/dawg/English/Makefile.BasEnglish b/xwords4/xwords4/dawg/English/Makefile.BasEnglish new file mode 100644 index 000000000..d8151e1c0 --- /dev/null +++ b/xwords4/xwords4/dawg/English/Makefile.BasEnglish @@ -0,0 +1,35 @@ +# -*-mode: Makefile -*- +# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +LANG=BasEnglish +LANGCODE=en_US +#NEWDAWG=1 + +TARGET_TYPE ?= FRANK + +include ../Makefile.2to8 + +include ../Makefile.langcommon + +$(LANG)Main.dict.gz: BasEnglish.dict.gz + ln -s $< $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(LANG)Main.dict.gz *.bin $(LANG)*.pdb $(LANG)*.seb diff --git a/xwords4/xwords4/dawg/English/Makefile.CollegeEng b/xwords4/xwords4/dawg/English/Makefile.CollegeEng new file mode 100644 index 000000000..efd35497b --- /dev/null +++ b/xwords4/xwords4/dawg/English/Makefile.CollegeEng @@ -0,0 +1,34 @@ +# -*- mode: makefile; -*- +# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +LANG=CollegeEng +LANGCODE=en_US +#NEWDAWG=1 +TARGET_TYPE=FRANK + +include ../Makefile.2to8 + +include ../Makefile.langcommon + +$(LANG)Main.dict.gz: CollegeEng.dict.gz + ln -s $< $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(LANG)Main.dict.gz *.bin $(LANG)*.pdb $(LANG)*.seb diff --git a/xwords4/xwords4/dawg/English/Makefile.OSPD b/xwords4/xwords4/dawg/English/Makefile.OSPD new file mode 100644 index 000000000..18283425e --- /dev/null +++ b/xwords4/xwords4/dawg/English/Makefile.OSPD @@ -0,0 +1,33 @@ +# -*- mode: makefile; compile-command: "make -f Makefile.OSPD TARGET_TYPE=PALM"; -*- +# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +LANG=OSPD +LANGCODE=en_US +NEWDAWG=1 + +include ../Makefile.2to8 + +include ../Makefile.langcommon + +$(LANG)Main.dict.gz: OSPD.dict.gz + ln -s $< $@ + +# Everything but creating of the Main.dict file is inherited from the +# "parent" Makefile.langcommon in the parent directory. + +clean: clean_common + rm -f $(LANG)Main.dict.gz *.bin diff --git a/xwords4/xwords4/dawg/English/info.data b/xwords4/xwords4/dawg/English/info.data new file mode 100644 index 000000000..8bf3d67ab --- /dev/null +++ b/xwords4/xwords4/dawg/English/info.data @@ -0,0 +1,35 @@ +# I'm a comment. Any line beginning with this char will be dropped + +LANGCODE:en_US + + +2 0 {"_"} +9 1 'A' +2 3 'B' +2 3 'C' +4 2 'D' +12 1 'E' +2 4 'F' +3 2 'G' +2 4 'H' +9 1 'I' +1 8 'J' +1 5 'K' +4 1 'L' +2 3 'M' +6 1 'N' +8 1 'O' +2 3 'P' +1 10 'Q' +6 1 'R' +4 1 'S' +6 1 'T' +4 1 'U' +2 4 'V' +2 4 'W' +1 8 'X' +2 4 'Y' +1 10 'Z' + + +# should ignore all after the above diff --git a/xwords4/xwords4/dawg/English/info.txt b/xwords4/xwords4/dawg/English/info.txt new file mode 100644 index 000000000..40cf89fb3 --- /dev/null +++ b/xwords4/xwords4/dawg/English/info.txt @@ -0,0 +1,71 @@ +# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +LANGCODE:en_US + +# deal with DOS files +LANGFILTER_PRECLIP: tr -d '\r' | + +LANGFILTER_POSTCLIP: | tr [a-z] [A-Z] +LANGFILTER_POSTCLIP: | grep '^[A-Z]*$' +LANGFILTER_POSTCLIP: | tr -s '\n\r' '\000\000' +LANGFILTER_POSTCLIP: | sort -z + +# We can trust sort (above) to do the right thing since there's no +# high ascii. dict2dawg.pl is much faster if I can trust that its +# input is in sorted order. +NEEDSSORT:false + +LANGINFO:

English dictionaries can contain words with any of the 26 +LANGINFO: letters you think of as making up the alphabet: A-Z. At +LANGINFO: this point any word in your list containing anything else +LANGINFO: will simply be excluded from the dictionary.

+ +# High bit means "official". Next 7 bits are an enum where +# English==1. Low byte is padding +XLOC_HEADER:0x8100 + + +2 0 {"_"} +9 1 'A' +2 3 'B' +2 3 'C' +4 2 'D' +12 1 'E' +2 4 'F' +3 2 'G' +2 4 'H' +9 1 'I' +1 8 'J' +1 5 'K' +4 1 'L' +2 3 'M' +6 1 'N' +8 1 'O' +2 3 'P' +1 10 'Q' +6 1 'R' +4 1 'S' +6 1 'T' +4 1 'U' +2 4 'V' +2 4 'W' +1 8 'X' +2 4 'Y' +1 10 'Z' + + +# should ignore all after the above diff --git a/xwords4/xwords4/dawg/Makefile.2to8 b/xwords4/xwords4/dawg/Makefile.2to8 new file mode 100644 index 000000000..bb1777d27 --- /dev/null +++ b/xwords4/xwords4/dawg/Makefile.2to8 @@ -0,0 +1,7 @@ +# -*-mode: Makefile -*- + +# These are the targets that almost all language makefiles will want. + +SHORT_WORD = 2 +LONG_WORD = 8 + diff --git a/xwords4/xwords4/dawg/Makefile.langcommon b/xwords4/xwords4/dawg/Makefile.langcommon new file mode 100644 index 000000000..2de1fd109 --- /dev/null +++ b/xwords4/xwords4/dawg/Makefile.langcommon @@ -0,0 +1,249 @@ +# -*-mode: Makefile -*- + +# Copyright 2000-2002 by Eric House (fixin@peak.org) +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +FRANK_EXT = xwd + +# this will make all dicts the new, larger type +#FORCE_4 = -force4 + +PALM_DICT_TYPE = DAWG +PAR = ../par.pl +#PAR = par # available from djw.org + +LANGUAGE = $(shell basename $$(pwd)) + +#all: target_all + +# let languages set this first, but we always add blank to it. +BLANK_INFO = "_" /dev/null /dev/null + +# Supply a default so don't have to type so much; feel free to change +TARGET_TYPE ?= FRANK + +ifdef NEWDAWG + TABLE_ARG = -mn +else + TABLE_ARG = -m +endif + +############################################################################## +# PalmOS rules +############################################################################## +ifeq ($(TARGET_TYPE),PALM) + +ifdef NEWDAWG + PDBTYPE = Xwr4 +else + PDBTYPE = Xwr3 +endif + +all: $(LANG)2to8.pdb + +empty: $(LANG)0to0.pdb + +# Those languages that have bitmap files for custom glyphs will need to +# define BMPBINFILES and perhaps provide a rule for building the files +binfiles.stamp: $(BMPBINFILES) + touch binfiles.stamp + +palmspecials.bin: ../palm_mkspecials.pl $(BMPFILES) + $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@ + +# can't just use values.bin because the specials bitmap info is +# platform-specific +palmvalues.bin: values.bin palmspecials.bin + cat $^ > $@ + +# values.bin: palmspecials.bin ../xloc binfiles.stamp +# cd ../ && $(MAKE) xloc +# binfileparms=""; \ +# if [ "$(BMPBINFILES)" != "" ]; then \ +# for f in $(BMPBINFILES)""; \ +# do binfileparms="$$binfileparms -i $$f"; \ +# done; \ +# fi; \ +# ../xloc -l $(LANGCODE) $$binfileparms -T $@ +# cat palmspecials.bin >> $@ + +# header (first record) is node count (long) and 4 chars: +# unsigned char firstEdgeRecNum; +# unsigned char charTableRecNum; +# unsigned char valTableRecNum; +# unsigned char reserved[3]; // worst case this points to a new resource + +# include "flags" as used on the other platforms +palmheader%.bin: $(LANG)%_wordcount.bin $(LANG)%_flags.bin + rm -f $@ + touch $@ +ifdef NEWDAWG + cat $(LANG)$*_flags.bin >> $@ +endif + cat $< >> $@ + perl -e "print pack(\"C\",3)" >> $@ # first edge + perl -e "print pack(\"C\",1)" >> $@ # char table rec number + perl -e "print pack(\"C\",2)" >> $@ # valTable rec number + perl -e "print pack(\"CCC\",0)" >> $@ # reserved 3 bytes + perl -e "print pack(\"CC\",0)" >> $@ # c code added two more... + + +# This works, but leaves out the header info that the current version +# has. I'm not sure anybody cares, though... +$(LANG)%.pdb: dawg$(LANG)%.stamp table.bin palmvalues.bin palmheader%.bin + $(PAR) c -a backup $@ \ + $(basename $(@F)) $(PALM_DICT_TYPE) $(PDBTYPE) \ + palmheader$*.bin table.bin palmvalues.bin dawg$(LANG)$*_*.bin + +# start=$$(echo $@ | sed -e 's/$(LANG)\([0-9]*\)to[0-9]*.pdb/\1/'); \ +# end=$$(echo $@ | sed -e 's/$(LANG)[0-9]*to\([0-9]*\).pdb/\1/'); \ +# zcat $< | grep "^.\{$${start},$${end}\}$$" | \ +# ../dict2pdb -t table.bin -v values.bin -n $(basename $(@F)) \ +# > $@ + +# the files to export for byod +byodbins: table.bin values.bin palmvalues.bin + +#endif # TARGET_TYPE==PALM + +############################################################################## +# Franklin ebook rules +############################################################################## +else +ifeq ($(TARGET_TYPE),FRANK) + +all: $(LANG)2to8.seb + +empty: $(LANG)0to0.seb + +# get defn of ESDK_CREATESEB_EXE +include ${EBOOKMAN_SDK}/ebsdk.uses + +# a binary file (one byte) giving the number of tiles in the dict +charcount.bin: table.bin +ifdef NEWDAWG + siz=$$(wc -c $< | sed -e 's/$ $@ +else + siz=$$(wc -c $< | sed -e 's/$ $@ +endif + +# For each entry in the table whose face < 32, there needs to be a pair of +# pbitm files and a string giving the printing form +frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES) + $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@ + +$(LANG)%.$(FRANK_EXT): dawg$(LANG)%.stamp $(LANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin + cat $(LANG)$*_flags.bin charcount.bin table.bin values.bin \ + frankspecials.bin $(LANG)StartLoc.bin $$(ls dawg$(LANG)$*_*.bin) > $@ + cp $@ saveme.bin + +$(LANG)%.seb: $(LANG)%.$(FRANK_EXT) $(LANG)%.atts + ${ESDK_CREATESEB_EXE} $< + cp $< $<.saved + +$(LANG)%.atts: #recreate it each time based on params + echo '_PUB|global+read-only|"Eric_House"' >> $@ + echo "_NAME|global+read-only|\"$(LANG)2to8\"" >> $@ + echo "_EXT|global+read-only|\"$(FRANK_EXT)\"" >> $@ + echo '_LCAT|nosign+global|"CONTENT"' >> $@ + echo '_PERM|global+read-only|"r"' >> $@ + +# values.bin: ../xloc +# cd ../ && $(MAKE) xloc +# ../xloc -l $(LANGCODE) -T $@ + +# the files to export for byod +byodbins: table.bin values.bin frankspecials.bin + + +else + (Need to define TARGET_TYPE if get error poining to this line) +endif #ifeq ($(TARGET_TYPE),FRANK) +endif + +ifeq (s$(TARGET_TYPE),s) +It's an error not to specify a TARGET_TYPE +endif + +############################################################################## +# shared rules +############################################################################## + +# For some reason I can't fathom dawg$(LANG)% gets nuked every time +# the top-level rule fires (all: for whatever TARGET_TYPE.) It +# happens after the rule finishes.... + +# 16 bits worth of flags for the start of the eventual file. At this +# point, the flags mean this: +# 1: old-style DAWG. +# 2: new-style DAWG, three bytes per node. +# 3: new-style DAWG, four bytes per node +$(LANG)%_flags.bin: dawg$(LANG)%.stamp +ifdef NEWDAWG + if [ 3 == $$(cat $(LANG)$*_nodesize.bin) ] ; \ + then perl -e "print pack(\"n\",0x0002)" > $@; echo "flags=2"; \ + else perl -e "print pack(\"n\",0x0003)" > $@; echo "flags=3"; \ + fi +else + if [ 3 == $$(cat $(LANG)$*_nodesize.bin) ] ; \ + then perl -e "print pack(\"n\",0x0001)" > $@; echo "flags=1"; \ + else echo "ERROR: old format can't handle 4-byte"; exit 1; \ + fi +endif + +dawg$(LANG)%.stamp: $(LANG)Main.dict.gz ../dict2dawg.pl table.bin ../Makefile.langcommon + start=$$(echo $@ | sed -e 's/dawg$(LANG)\([0-9]*\)to[0-9]*.stamp/\1/'); \ + end=$$(echo $@ | sed -e 's/dawg$(LANG)[0-9]*to\([0-9]*\).stamp/\1/'); \ + echo $${start} and $$end; \ + zcat $< | grep "^.\{$${start},$${end}\}$$" | tr '\n' '\0'| \ + sort -z | ../dict2dawg.pl $(TABLE_ARG) table.bin -b 28000 \ + -ob dawg$(LANG)$* \ + -sn $(LANG)StartLoc.bin -k -term 0 -wc $(LANG)$*_wordcount.bin \ + $(FORCE_4) -ns $(LANG)$*_nodesize.bin + touch $@ + +$(LANG)%_wordcount.bin: dawg$(LANG)%.stamp + @echo + +# the files to export for byod +allbins: + $(MAKE) TARGET_TYPE=PALM byodbins + $(MAKE) TARGET_TYPE=FRANK byodbins + +table.bin: ../xloc.pl +ifdef NEWDAWG + perl -I../ ../xloc.pl -tn > $@ +else + perl -I../ ../xloc.pl -t > $@ +endif + +values.bin: ../xloc.pl + perl -I../ ../xloc.pl -v > $@ + +%.dict: %.dict.gz + zcat $< > $@ + +clean_common: + rm -f $(LANG)Main.dict *.bin *.pdb *.seb dawg*.stamp *.$(FRANK_EXT) \ + $(LANG)*.pdb $(LANG)*.seb + +help: + @echo "make TARGET_TYPE=[FRANK|PALM]" + +test: + @echo $(LANGUAGE) diff --git a/xwords4/xwords4/dawg/allchars.pl b/xwords4/xwords4/dawg/allchars.pl new file mode 100755 index 000000000..2fb9fee37 --- /dev/null +++ b/xwords4/xwords4/dawg/allchars.pl @@ -0,0 +1,7 @@ +#!/usr/bin/perl + +use strict; + +for (my $i = 1; $i < 255; ++$i ) { + printf( "%d: %s (0x%x)\n", $i, chr($i), $i ); +} diff --git a/xwords4/xwords4/dawg/dawg.h b/xwords4/xwords4/dawg/dawg.h new file mode 100644 index 000000000..431f3e80b --- /dev/null +++ b/xwords4/xwords4/dawg/dawg.h @@ -0,0 +1,47 @@ +/* + * Copyright 1998 by Eric House. All rights reserved. + * fixin@peak.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +typedef struct dawg_header { + unsigned long numWords; + unsigned char firstEdgeRecNum; + unsigned char charTableRecNum; + unsigned char valTableRecNum; + unsigned char reserved[3]; // worst case this points to a new resource +} dawg_header; + +typedef struct array_edge { + unsigned char highByte; + unsigned char lowByte; + unsigned char bits; +} array_edge; + +/* + * the bits field has five bits for the character (0-based rather than + * 'a'-based, of course; one bit each indicating whether the edge may + * be terminal and whether it's the last edge of a sub-array; and a final + * bit that's overflow from the highByte field allowing indices to be in + * the range 0-(2^^17)-1 + */ +#define LETTERMASK 0x1f +#define ACCEPTINGMASK 0x20 +#define LASTEDGEMASK 0x40 +#define LASTBITMASK 0x80 + +//#define ushort_byte_swap(d) ((unsigned short)(d<<8 | d>>8)) + diff --git a/xwords4/xwords4/dawg/dict2DAWG.c b/xwords4/xwords4/dawg/dict2DAWG.c new file mode 100644 index 000000000..39e4a62be --- /dev/null +++ b/xwords4/xwords4/dawg/dict2DAWG.c @@ -0,0 +1,685 @@ +// -*-mode: C; fill-column: 80; compile-command: "make dict2pdb"; -*- +/* + * Copyright 1997 by Eric House. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Converts a -separated list of words, in stdin, to a DAWG written + to stdout in PalmOS .pdb file format. + + Called like this: dict2dawg > dict.pdb <<. + car + cars + cat + does + dog + . + + Records in the database are of 48K length by default, except that + the last will likely be smaller and that they always end with the end + of a sub-array (so that iteration over a subarray doesn't have to + worry about boundaries.) + + Records ought to hold two parallel arrays (but don't yet): first the + index array, of shorts, and then the bits array of unsigned chars. + Remember that one bit of the bits entry is actually the 17th bit of + the index value... + + Ultimately we want to associate xloc-like date with each dictionary so + that langauges whose relevant letters aren't all in an ascii sequence can + be accomodated. In most cases we'll be passed in a file containing a table + to be used for the mapping -- just a text file with one character per line + where A might be the 0th line, umlaut-A the first, etc. But we'll also + generate such a table ourselves when not given one, and output it when + asked. + + Bugs: It's currently necessary that input to this program be sorted + or some data may be lost. + + To do: + Make it two parallel arrays. + Some sort of hashing on pruning. */ + +#include +#include +#include +#include +#include +#include + +#include "swap.h" + +#define PRE_EDGE_RECORDCOUNT 3 + +/* #include "pdb.h" */ +#include "dawg.h" +/* #include "swap.c" */ + +typedef char boolean; +#define true 1 +#define false 0 + +typedef unsigned char Tile; + +typedef struct tree_edge { + unsigned char letter; + unsigned long index; + boolean terminal; + struct tree_edge* prev; + struct tree_edge* next; + struct tree_edge* children; +} tree_edge; + +#define MAXLENGTH 15 + + +////////////////////////////////////////////////////////////////////////////// +// prototypes +////////////////////////////////////////////////////////////////////////////// +static void addToTree( unsigned char* buf, short buflen, tree_edge* nodege ); +tree_edge* newNode( unsigned char letter, boolean terminal ); +static void remember( unsigned char* c ); +void readInTables( char* orderTableFile ); +void init_prune_data(); +void prune_tree( tree_edge* edge ); +/* unsigned short byte_swap( unsigned short d ); */ +void write_children( array_edge* mainArray, tree_edge* edge ); +int count_nodes( tree_edge* edge ); +unsigned long index_children( tree_edge* edge, unsigned long firstIndex ); +void write_as_pdb( array_edge* edges, unsigned long edgeCount ); +void usage( char* progName ); +void initTables( void ); +Tile CharToTile( unsigned char ch ); +static short fileSize( char* fileName ); +void write_as_files( array_edge* edges, unsigned long edgeCount, + char* fileNameBase ); + +unsigned long gWordCount = 0; + +////////////////////////////////////////////////////////////////////////////// +// globals +////////////////////////////////////////////////////////////////////////////// +boolean verbose = 0; +tree_edge* rootEdge; +int gNodeCount = 0; +long gNodesCreated; +long gPulled; +char gDictName[32]; +char* gOrderTableFileName = NULL; +/* char* gValueTableFileName = NULL; */ +short gNumUniqueTiles; +typedef struct OrderResEntry { +/* unsigned char count; */ +/* unsigned char value; */ + unsigned char ch; +} OrderResEntry; +static OrderResEntry gOrderTable[32]; +static signed short gLookupTable[256]; + +dawg_header gDawgHeader; + +////////////////////////////////////////////////////////////////////////////// +// main +////////////////////////////////////////////////////////////////////////////// +int main( int argc, char** argv ) { + char buf[MAXLENGTH+10]; + unsigned long edgeCount; + long maxWordLen = MAXLENGTH; + int got; + char* baseName = NULL; + array_edge* mainArray = NULL; + + initTables(); + memset( &gDawgHeader, 0, sizeof(gDawgHeader) ); + + gDictName[0] = '\0'; + + while ( (got = getopt(argc, argv, "t:vhn:")) != EOF ) { + switch ( got ) { + case 'm': + sscanf( optarg, "%ld", &maxWordLen ); + fprintf( stderr, "maxWordLen set to %ld\n", maxWordLen ); + break; + case 'n': + baseName = optarg; + break; + case 'v': + verbose = true; + fprintf( stderr, "verbose set\n" ); + break; + case 't': + gOrderTableFileName = optarg; + break; + case 'h': + default: + usage( argv[0] ); + break; + } + } + + if ( gOrderTableFileName != NULL ) { + readInTables( gOrderTableFileName ); + } + + assert( baseName ); + + rootEdge = newNode( '\0', 0 ); + gNodesCreated = 0; + gPulled = 0; + + while ( fgets( buf, MAXLENGTH+9, stdin ) ) { + unsigned char* cr = (unsigned char*)strchr( buf, '\n' ); + short wordlen; + if ( cr ) { + *cr = '\0'; + } + + wordlen = strlen( buf ); + + if ( (maxWordLen != MAXLENGTH) && (wordlen > maxWordLen) ) { + continue; + } else if ( wordlen > MAXLENGTH ) { + fprintf( stderr, "word %s too long\n", buf ); + exit(1); + } + + // remember that *cr may be 0 *after* the call to remember + for ( cr = buf; *cr; ++cr ) { + remember(cr); + } + + addToTree( buf, wordlen, rootEdge ); + ++gWordCount; + } + + fprintf( stderr, "done with addToTree (%ld nodes; %ld words)\n", + gNodesCreated, gWordCount ); + + init_prune_data(); + prune_tree( rootEdge ); + + fprintf( stderr, "done with prune_tree: %ld pulled\n", gPulled ); + + edgeCount = index_children( rootEdge, 0 ); + + if ( edgeCount >= 0x1FFFF ) { + fprintf( stderr, "ERROR: too many edges: %ld (max is %ld)\n", + edgeCount, (long)0x1FFFF ); + exit( 1 ); + } + + fprintf( stderr, "done with index_children; edgeCount = %ld\n", edgeCount); + mainArray = (array_edge*)malloc( edgeCount * sizeof(array_edge) ); + assert( mainArray ); + +/* largestDiff = smallestDiff = 0; */ + write_children( mainArray, rootEdge ); + fprintf( stderr, "done with write_children\n" ); +/* fprintf( stderr, "largestDiff = %ld, smallestDiff = %ld\n", */ +/* largestDiff, smallestDiff ); */ + + // Now we have a huge array in memory and need to write it to pdb + // format. + write_as_files( mainArray, edgeCount, baseName ); + +/* if ( verbose ) { */ +/* fprintf( stderr, "Writing %d nodes\n", edgeCount ); */ +/* fprintf( stderr, "{letter, next_index, terminal, lastEdge}\n" ); */ +/* for ( i = 0; i < edgeCount; ++i ) { */ +/* array_edge* edge = &gArray[i]; */ + // fprintf( stderr, "/*[%d]*/ {%c, %d, %s, %s}\n", */ +/* i, */ +/* (edge->bits & LETTERMASK) + 'a', */ +/* ushort_byte_swap(edge->first_child), */ +/* (edge->bits&TERMINALMASK)?"true":"false", */ +/* (edge->bits&LASTEDGEMASK)?"true":"false" ); */ +/* } */ +/* } */ + +/* fprintf( stderr, "writing %ld edges to file\n", edgeCount ); */ +/* for ( i = 0; i < edgeCount; ++i ) { */ +/* fwrite( &gArray[i], sizeof(array_edge), 1, stdout ); */ +/* } */ + + return 0; +} // main + +/* Given a node on the tree (not yet converted to a directed graph) + * walk down it using letters where they exist and adding them where + * the don't. + * + * The structure we're building here looks like this, for input "CAT" + * and "CAR": + * /T + * *-C-A + * \R + * That is, words beginning with the same letters share the same initial + * branches of the tree. Thus on entering a given level of recursion + * there are these possibilities: + * a) There's nothing here: create a new node and recurse on it. + * b) We find a node that holds the letter we seek: recurse on it. + * c) We reach the end of the list of letters without finding what we + * seek: create a new node at the end and recurse on it. + * d) We reach a node before which ours should have been found: create a + * new node in the right place and recurse on it. + */ +static void addToTree( unsigned char* buf, short buflen, tree_edge* node ) { + unsigned char target = *buf; + boolean terminal = (buflen == 1); + tree_edge* child; + tree_edge* prev = NULL; + tree_edge* new_node; + +/* if ( !target ) { */ +/* assert( buflen == 0 ); */ +/* return; */ +/* } */ + if ( buflen == 0 ) return; + assert( buflen > 0 ); + + if ( node->children == NULL ) { + addToTree( buf+1, buflen-1, + node->children = newNode( target, terminal ) ); + return; + } + + for ( child = node->children; child != NULL; child = child->next ) { + if ( child->letter == target ) { + addToTree( buf+1, buflen-1, child ); + return; + } else if ( child->letter > target ) { // it's not in the tree yet. + new_node = newNode( target, terminal ); + new_node->next = child; + new_node->prev = child->prev; + if ( child->prev ) { + child->prev->next = new_node; + } else { // it's the first node! + node->children = new_node; + } + child->prev = new_node; + + addToTree( buf+1, buflen-1, new_node ); + return; + } + prev = child; + } + + assert( prev != NULL ); + new_node = newNode( target, terminal ); + prev->next = new_node; + new_node->prev = prev; + addToTree( buf+1, buflen-1, new_node ); + return; +} + +tree_edge* newNode( unsigned char letter, boolean terminal ) { + tree_edge* result = (tree_edge*)malloc( sizeof(tree_edge )); + assert( result ); + ++gNodesCreated; + result->letter = letter; + result->index = 0xFFFF; + result->terminal = terminal; + result->children = result->next = result->prev = NULL; + + ++gNodeCount; + return result; +} + +////////////////////////////////////////////////////////////////////////////// +// prune_tree (and helpers) +////////////////////////////////////////////////////////////////////////////// +boolean sameStructure( tree_edge* node1, tree_edge* node2 ) { + // simple cases first. + if ( node1 == node2 ) return true; + else if ( node1 == NULL || node2 == NULL ) return false; + else if ( node1->letter != node2->letter ) return false; + else if ( node1->terminal != node2->terminal ) return false; + //else if ( count_nodes( node1 ) != count_nodes( node2 ) ) return false; + else { + tree_edge* children1; + tree_edge* children2; + for ( children1 = node1->children, children2 = node2->children; + children1 || children2; + children1 = children1->next, children2 = children2->next ) { + if ( !sameStructure( children1, children2 ) ) + return false; + } + for ( children1 = node1->next, children2 = node2->next; + children1 || children2; + children1 = children1->next, children2 = children2->next ) { + if ( !sameStructure( children1, children2 ) ) + return false; + } + return (children1 == NULL) && (children2 == NULL); + } +} + +typedef struct visited_edge { + tree_edge* theEdge; + struct visited_edge* next; +} visited_edge; +static visited_edge* visitedEdges[256]; + +void init_prune_data() { + short i; + for ( i = 0; i < 26; ++i ) { + visitedEdges[i] = NULL; + } +} + +tree_edge* visited( tree_edge* node ) { + short hash = node->letter;// - 'a'; + //assert( hash >=0 && hash < 26 ); + if ( visitedEdges[hash] == NULL ) { + visitedEdges[hash] = (visited_edge*)malloc(sizeof(visited_edge)); + assert( visitedEdges[hash] ); + visitedEdges[hash]->theEdge = node; + visitedEdges[hash]->next = NULL; + return node; + } else { + visited_edge* visited; + for ( visited = visitedEdges[hash]; visited; + visited = visited->next ) { + if ( verbose ) { + fprintf( stderr, "looking at %c and %c\n", + node->letter, visited->theEdge->letter ); + } + if ( sameStructure( node, visited->theEdge ) ) { + if ( verbose ) { + fprintf( stderr, "pruning tree beginning with %c\n", + node->letter ); + } + return visited->theEdge; + } + } + // didn't find it. Insert new entry at head of list. + visited = (visited_edge*)malloc(sizeof(visited_edge)); + assert( visited ); + visited->theEdge = node; + visited->next = visitedEdges[hash]; + visitedEdges[hash] = visited; + return node; + } +} // visited + +int count_nodes( tree_edge* edge ) { + short result = 0; + while ( edge ) { + result += count_nodes( edge->children ); + ++result; + edge = edge->next; + } + return result; +} + +/* Walk the tree. Starting at the lowest points, lookup each node to see + * if an equivalent one has already been visited. If so, replace it with + * (a ptr to) the first one seen. + */ +void prune_tree( tree_edge* edge ) { + tree_edge* child = edge->children; + tree_edge* tmp; + + // fprintf( stderr, "prune_tree called\n" ); + + if ( !child ) { + return; + } + +/* if( edge->letter == 'c' ) { */ +/* fprintf( stderr, "C\n" ); */ +/* } */ + + while ( child ) { + prune_tree( child ); + child = child->next; + } + + tmp = visited( edge->children ); + if ( tmp != edge->children ) { + short pulled = count_nodes(edge->children); +/* fprintf( stderr, "Removing %d nodes\n", pulled ); */ + gPulled += pulled; + edge->children = tmp; + } +} + +////////////////////////////////////////////////////////////////////////////// +// write_edge (and helpers) +////////////////////////////////////////////////////////////////////////////// + +unsigned long index_children( tree_edge* edge, unsigned long firstIndex ) { + tree_edge* child; + for ( child = edge->children; child; child = child->next ) { + if ( child->index == 0xFFFF ) { + child->index = firstIndex++; +/* assert( firstIndex != 0xFFFF ); */ +/* fprintf( stderr, "set index of %c (%x) to %d\n", child->letter, */ +/* child, child->index ); */ + } + } + + for ( child = edge->children; child; child = child->next ) { + firstIndex = index_children( child, firstIndex ); + } + return firstIndex; +} + +void write_child( array_edge* mainArray, tree_edge* child ) { + if ( child ) { + array_edge* entry = &mainArray[child->index]; + unsigned char bits = 0; + unsigned long childIndex + = (child->children!=NULL)? child->children->index : 0; + assert( childIndex <= 0x0001FFFF ); + + entry->lowByte = childIndex & 0x000000FF; + entry->highByte = (childIndex>>8) & 0x000000FF; + + bits = CharToTile(child->letter) & LETTERMASK; + + if ( childIndex & 0x00010000 ) { + bits |= LASTBITMASK; + } + if ( child->terminal ) { + bits |= ACCEPTINGMASK; + } + if ( child->next == NULL ) { + bits |= LASTEDGEMASK; + } + entry->bits = bits; + } +} + +void write_children( array_edge* mainArray, tree_edge* edge ) { + tree_edge* child; + for ( child = edge->children; child; child = child->next ) { + write_child( mainArray, child ); + write_children( mainArray, child ); + + // gather some stats +/* if ( child->index != 0 ) { */ +/* diff = edge->index - child->index; */ +/* if ( diff > largestDiff ) { */ +/* largestDiff = diff; */ +/* } */ +/* if ( diff < smallestDiff ) { */ +/* smallestDiff = diff; */ +/* } */ +/* } */ + + } +} + +////////////////////////////////////////////////////////////////////////////// +// write_as_pdb and helpers +////////////////////////////////////////////////////////////////////////////// + +void write_pdb_record_data( array_edge* edges, unsigned long startCount, + unsigned long count ) { + unsigned long i; + for ( i = startCount; i < startCount + count; ++i ) { + fwrite( &edges[i], sizeof(array_edge), 1, stdout ); + } +} + +// I *think* that the upper bound on this is 0xFFFF/edgesize minus enough that +// I can add edges out to the end of the subarray in which the line falls +// can be accomodated -- which I guess is about 32-1-1 (minus one because +// blanks take up one of the 32 slots though they don't appear in DAWGs, +// and minus another because the boundary must appear after at least the +// first or we just leave it there.) + +#define EDGES_PER_RECORD 0x3FFF +#ifndef EDGES_PER_RECORD +# define EDGES_PER_RECORD 0x00005528 +#endif + +/* Write as binary files segmented appropriately in case the target is PalmOS or + * other platform with restricted-length databases. + */ +void write_as_files( array_edge* edges, unsigned long edgeCount, + char* fileNameBase ) { + unsigned long firstUnhousedEdge = 0; + short numEdgesThisFile; + boolean exitNext = false; +/* unsigned long prevEdgeCount; */ +/* unsigned long curOffset = 0; */ + short fileNum; + + for ( fileNum = 0; !exitNext; ++fileNum ) { + unsigned long lastEdge; + char buf[40]; + FILE* dawgOutF; + unsigned long firstEdgeThisFile = 0; + + /* from the first edge not yet in a record, go forward EDGES_PER_RECORD + edges, and than march forward further until the current subarray is + finished. */ + lastEdge = firstUnhousedEdge + EDGES_PER_RECORD - 1; + if ( lastEdge + 1 >= edgeCount ) { + lastEdge = edgeCount - 1; + assert( (edges[lastEdge].bits & LASTEDGEMASK) ); + exitNext = true; + } + while ( (edges[lastEdge].bits & LASTEDGEMASK) == 0 ) { + ++lastEdge; + } + + numEdgesThisFile = lastEdge - firstUnhousedEdge + 1; + + sprintf( buf, "%s_%d.bin", fileNameBase, fileNum ); + dawgOutF = fopen( buf, "wb" ); + fwrite( &edges[firstUnhousedEdge], sizeof(array_edge), + numEdgesThisFile, dawgOutF ); + fclose( dawgOutF ); + + fprintf( stderr, "wrote edges from %ld to %ld to file %s\n", + firstUnhousedEdge, firstUnhousedEdge+numEdgesThisFile, buf ); + + firstUnhousedEdge = lastEdge + 1; + } + + fprintf( stderr, "%ld edges yielded %d records of up to %ld edges each\n", + edgeCount, fileNum, (long)EDGES_PER_RECORD ); + +} // write_as_files + +/****************************************************************************** + * Read in a file of letters, one per line, whose position in the file will + * determine the translation from char to Tile when the dawg is written out. + * If no such file is passed in, we'll create our own based on the ascii order + * of those chars we see in processing the dictionary. If one is passed in, + * we'll use it, but we'll fail if we encounter a letter not on the list. + * + * Also, for faster lookup of Tile values we maintain a second table mapping + * chars to tiles. 'A' might map to 1, A-umlaut to 2, etc., if 0 is the blank + * char + *****************************************************************************/ +void initTables() { + memset( gOrderTable, 0, 32*sizeof(*gOrderTable) ); + memset( gLookupTable, -1, 256*sizeof(*gLookupTable) ); +} // initTables + +void readInTables( char* orderTableFile ) { + unsigned char ch = 0; + FILE* f = fopen( orderTableFile, "rb" ); + assert( f ); + + gNumUniqueTiles = 0; + while ( fscanf( f, "%c\n", &ch ) != EOF ) { + assert( gNumUniqueTiles <= 32 ); + assert( ch < 255 ); + assert( gOrderTable[gNumUniqueTiles].ch == 0 ); + gOrderTable[gNumUniqueTiles].ch = ch; + gLookupTable[ch] = gNumUniqueTiles; + ++gNumUniqueTiles; + } + fclose( f ); + +} // readInTables + +/****************************************************************************** + * + *****************************************************************************/ +static void remember( unsigned char* c ) { + signed short tile = gLookupTable[*c]; + assert( gOrderTableFileName != NULL ); + if ( tile == -1 ) { + fprintf( stderr, "ERROR: unexpected character '%c' (0x%x)\n", + *c, (short)*c ); + exit(1); + } + assert( tile < 32 ); + *c = tile; +} // remember + +/****************************************************************************** + * + *****************************************************************************/ +Tile CharToTile( unsigned char ch ) { + return ch; +/* assert( gLookupTable[ch] < 32 ); */ +/* return (Tile)gLookupTable[ch]; */ +} // CharToTile + +static short fileSize( char* fileName ) { + short result; + FILE* f = fopen( fileName, "rb" ); + assert( f ); + if ( fseek( f, 0L, SEEK_END ) != 0 ) { + fprintf( stderr, "error from fseek\n" ); + exit(1); + } + result = ftell( f ); + fclose( f ); + return (short)result; +} // fileSize + +////////////////////////////////////////////////////////////////////////////// +// usage +////////////////////////////////////////////////////////////////////////////// +void usage( char* progName ) { + fprintf( stderr, + "USAGE: %s\n" + " [-m]\n" + " [-v] (verbose) \n" +/* " [-t char-order-table-file] \n" */ +/* " [-n ] \n" */ + " dawg_file\n", + progName ); + exit( 1 ); +} + diff --git a/xwords4/xwords4/dawg/dict2dawg.pl b/xwords4/xwords4/dawg/dict2dawg.pl new file mode 100755 index 000000000..eb1729be2 --- /dev/null +++ b/xwords4/xwords4/dawg/dict2dawg.pl @@ -0,0 +1,798 @@ +#!/usr/bin/perl + +############################################################################## +# adapted from C++ code Copyright (C) 2000 Falk Hueffner +# This version Copyright (C) 2002 Eric House (fixin@peak.org) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +############################################################################## + +# inputs: 0. Name of file mapping letters to 0..31 values. In English +# case just contains A..Z. This will be used to translate the tries +# on output. +# 1. Max number of bytes per binary output file. +# +# 2. Basename of binary files for output. + +# 3. Name of file to which to write the number of the +# startNode, since I'm not rewriting a bunch of code to expect Falk's +# '*' node at the start. +# + +# In STDIN, the text file to be compressed. It absolutely +# must be sorted. The sort doesn't have to follow the order in the +# map file, however. + +# This is meant eventually to be runnable as part of a cgi system for +# letting users generating Crosswords dicts online. + + + +use strict; +use POSIX; + +my $gFirstDiff; +my @gCurrentWord; +my $gCurrentWord; # save so can check for sortedness +my $gDone = 0; +my @gInputStrings; +my $gNeedsSort = 1; # read from cmd line eventually +my @gNodes; # final array of nodes +my $gNBytesPerOutfile = 0xFFFFFFFF; +my $gTableFile; +my $gOutFileBase; +my $gStartNodeOut; +my $gInFileName; +my $gKillIfMissing = 1; +my $gTermChar = '/n'; +my $gDumpText = 0; # dump the dict as text after? +my $gCountFile; +my $gBytesPerNodeFile; # where to write whether node size 3 or 4 +my $gWordCount = 0; +my %gTableHash; +my @gRevMap; +my $debug = 0; +my %gSubsHash; +my $gForceFour = 0; # use four bytes regardless of need? +my $gNBytesPerNode; +my $gUseUnicode; + +main(); + +exit; + +############################################################################## + +sub main() { + + if ( !parseARGV() ) { + usage(); + exit(); + } + + makeTableHash(); + + my $infile; + + if ( $gInFileName ) { + open $infile, "<$gInFileName"; + } else { + $infile = \*STDIN; + } + + @gInputStrings = parseAndSort( $infile ); + if ( $gInFileName ) { + close $infile; + } + + # Do I need this stupid thing? Better to move the first row to + # the front of the array and patch everything else. Or fix the + # non-palm dictionary format to include the offset of the first + # node. + + my $dummyNode = 0xFFFFFFFF; + @gNodes = ( $dummyNode ); + + readNextWord(); + + my $firstRootChildOffset = buildNode(0); + + moveTopToFront( \$firstRootChildOffset ); + + if ( $gStartNodeOut ) { + writeOutStartNode( $gStartNodeOut, $firstRootChildOffset ); + } + + print STDERR "\n... dumping table ...\n" if $debug; + printNodes( \@gNodes, "done with main" ) if $debug; + + # write out the number of nodes if requested + if ( $gCountFile ) { + open OFILE, "> $gCountFile"; + print OFILE pack( "N", $gWordCount ); + close OFILE; + print STDERR "wrote out: got $gWordCount words\n"; + } + + if ( $gOutFileBase ) { + emitNodes( $gNBytesPerOutfile, $gOutFileBase ); + } + + if ( $gDumpText && @gNodes > 0 ) { + printOneLevel( $firstRootChildOffset, "" ); + } + + if ( $gBytesPerNodeFile ) { + open OFILE, "> $gBytesPerNodeFile"; + print OFILE $gNBytesPerNode; + close OFILE; + } + print STDERR "Used $gNBytesPerNode per node.\n"; +} # main + +# We now have an array of nodes with the last subarray being the +# logical top of the tree. Move them to the start, fixing all fco +# refs, so that legacy code like Palm can assume top==0. +# +# Note: It'd probably be a bit faster to integrate this with emitNodes +# -- unless I need to have an in-memory list that can be used for +# lookups. But that's best for debugging, so keep it this way for now. +# +# Also Note: the first node is a dummy that can and should be tossed +# now. + +sub moveTopToFront($) { + my ( $firstRef ) = @_; + + my $firstChild = ${$firstRef}; + ${$firstRef} = 0; + my @lastSub; + + if ( $firstChild > 0 ) { + # remove the last (the root) subarray + @lastSub = splice( @gNodes, $firstChild ); + } else { + die "there should be no words!!" if $gWordCount != 0; + } + # remove the first (garbage) node + shift @gNodes; + + my $diff; + if ( $firstChild > 0 ) { + # -1 because all move down by 1; see prev line + $diff = @lastSub - 1; + die "something wrong with len\n" if $diff < 0; + } else { + $diff = 0; + } + + # stick it on the front + splice( @gNodes, 0, 0, @lastSub); + + # We add $diff to everything. There's no subtracting because + # nobody had any refs to the top list. + + for ( my $i = 0; $i < @gNodes; ++$i ) { + my $fco = TrieNodeGetFirstChildOffset( $gNodes[$i] ); + if ( $fco != 0 ) { # 0 means NONE, not 0th!! + TrieNodeSetFirstChildOffset( \$gNodes[$i], $fco+$diff ); + } + } +} # moveTopToFront + + +sub buildNode { + my ( $depth ) = @_; + + if ( @gCurrentWord == $depth ) { + # End of word reached. If the next word isn't a continuation + # of the current one, then we've reached the bottom of the + # recursion tree. + readNextWord(); + if ($gFirstDiff < $depth || $gDone) { + return 0; + } + } + + my @newedges; + + do { + my $letter = $gCurrentWord[$depth]; + my $isTerminal = @gCurrentWord - 1 == $depth ? 1:0; + + my $nodeOffset = buildNode($depth+1); + my $newNode = MakeTrieNode($letter, $isTerminal, $nodeOffset); + push( @newedges, $newNode ); + + } while ( ($gFirstDiff == $depth) && !$gDone); + + TrieNodeSetIsLastSibling( \@newedges[@newedges-1], 1 ); + + return addNodes( \@newedges ); +} # buildNode + +sub addNodes { + my ( $newedgesR ) = @_; + + my $found = findSubArray( $newedgesR ); + + if ( $found >= 0 ) { + die "0 is an invalid match!!!" if $found == 0; + return $found; + } else { + + my $firstFreeIndex = @gNodes; + + print STDERR "adding...\n" if $debug; + printNodes( $newedgesR ) if $debug; + + push @gNodes, (@{$newedgesR}); + + registerSubArray( $newedgesR, $firstFreeIndex ); + return $firstFreeIndex; + } +} # addNodes + +sub printNode { + my ( $index, $node ) = @_; + + print STDERR "[$index] "; + + printf( STDERR + "letter=%d; isTerminal=%d; isLastSib=%d; fco=%d;\n", + TrieNodeGetLetter($node), + TrieNodeGetIsTerminal($node), + TrieNodeGetIsLastSibling($node), + TrieNodeGetFirstChildOffset($node)); +} # printNode + +sub printNodes { + my ( $nodesR, $name ) = @_; + + my $len = @{$nodesR}; + # print "printNodes($name): len = $len\n"; + + for ( my $i = 0; $i < $len; ++$i ) { + my $node = ${$nodesR}[$i]; + printNode( $i, $node ); + } + +} + + +# Hashing. We'll keep a hash of offsets into the existing nodes +# array, and as the key use a string that represents the entire sub +# array. Since the key is what we're matching for, there should never +# be more than one value per hash and so we don't need buckets. +# Return -1 if there's no match. + +sub findSubArray { + my ( $newedgesR ) = @_; + + my $key = join('', @{$newedgesR}); + + if ( exists( $gSubsHash{$key} ) ) { + return $gSubsHash{$key}; + } else { + return -1; + } +} # findSubArray + +# add to the hash +sub registerSubArray { + my ( $edgesR, $nodeLoc ) = @_; + + my $key = join( '', @{$edgesR} ); + + if ( exists $gSubsHash{$key} ) { + die "entry for key shouldn't exist!!"; + } else { + $gSubsHash{$key} = $nodeLoc; + } + +} # registerSubArray + +sub readNextWord() { + my @word; + + if ( !$gDone ) { + $gDone = @gInputStrings == 0; + if ( !$gDone ) { + @word = @{shift @gInputStrings}; + } else { + print STDERR "gDone set to true\n" if $debug; + } + + print STDERR "got word: ", join(',',@word), "\n" if $debug; + } + my $numCommonLetters = 0; + my $len = @word; + if ( @gCurrentWord < $len ) { + $len = @gCurrentWord; + } + + while ( @gCurrentWord[$numCommonLetters] eq @word[$numCommonLetters] + && $numCommonLetters < $len) { + ++$numCommonLetters; + } + + $gFirstDiff = $numCommonLetters; + die "words ", join(",",@gCurrentWord), " and ", join(",", @word), " out of order" if #$debug && + @gCurrentWord > 0 && @word > 0 + && !firstBeforeSecond( \@gCurrentWord, \@word ); + @gCurrentWord = @word; +} # readNextWord + +sub firstBeforeSecond { + my ( $firstR, $secondR ) = @_; + + for ( my $i = 0; ; ++$i ) { + + # if we reach the end of the first word/list, we're done. + if ( $i == @{$firstR} ) { + die "duplicate!!!" if $i == @{$secondR}; + return 1; + # but if we reach the second end first, we've failed + } elsif ( $i == @{$secondR} ) { + return 0; + } + + my $diff = ${$firstR}[$i] <=> ${$secondR}[$i]; + + if ( $diff == 0 ) { + next; + } else { + return $diff < 0; + } + } +} # firstBeforeSecond + +# passed to sort. Should remain unprototyped for effeciency's sake + +sub cmpWords { + + my $lenA = @{$a}; + my $lenB = @{$b}; + my $min = $lenA > $lenB? $lenB: $lenA; + + for ( my $i = 0; $i < $min; ++$i ) { + my $ac = ${$a}[$i]; + my $bc = ${$b}[$i]; + + my $res = $ac <=> $bc; + + if ( $res != 0 ) { + return $res; # we're done + } + } + + # If we got here, they match up to their common length. Longer is + # greater. + my $res = @{$a} <=> @{$b}; + return $res; # which is longer? +} # cmpWords + +sub parseAndSort() { + my ( $infile ) = @_; + + my @wordlist; + my @word; + + WORDLOOP: + for ( ; ; ) { + + my $dropWord = 0; + splice @word; # empty it + + # for each byte + for ( ; ; ) { + my $byt = getc($infile); + + if ( $byt eq undef ) { + last WORDLOOP; + } elsif ( $byt eq $gTermChar ) { + if ( !$dropWord ) { + push @wordlist, [ @word ]; + ++$gWordCount; + } + next WORDLOOP; + } elsif ( exists( $gTableHash{$byt} ) ) { + if ( !$dropWord ) { + push @word, $gTableHash{$byt}; + die "word too long" if @word > 15; + } + } elsif ($gKillIfMissing) { + die "char $byt (", $byt+0, ") not in map file $gTableFile\n"; + } else { + $dropWord = 1; + splice @word; # lose anything we already have + } + } + } + + if ( $gNeedsSort && ($gWordCount > 0) ) { + @wordlist = sort cmpWords @wordlist; + } + + print STDERR "length of list is ", @wordlist + 0, ".\n" if $debug; + + return @wordlist; +} # parseAndSort + +# Print binary representation of trie array. This isn't used yet, but +# eventually it'll want to dump to multiple files appropriate for Palm +# that can be catenated together on other platforms. There'll need to +# be a file giving the offset of the first node too. Also, might want +# to move to 4-byte representation when the input can't otherwise be +# handled. + +sub dumpNodes { + + for ( my $i = 0; $i < @gNodes; ++$i ) { + my $node = $gNodes[$i]; + my $bstr = pack( "I", $node ); + print STDOUT $bstr; + } +} + +############################################################################## +# Little node-field setters and getters to hide what bits represent +# what. +############################################################################## + +sub TrieNodeSetIsTerminal { + my ( $nodeR, $isTerminal ) = @_; + + if ( $isTerminal ) { + ${$nodeR} |= 1 << 31; + } else { + ${$nodeR} &= ~(1 << 31); + } +} + +sub TrieNodeGetIsTerminal { + my ( $node ) = @_; + return ($node & 1 << 31) != 0; +} + +sub TrieNodeSetIsLastSibling { + my ( $nodeR, $isLastSibling ) = @_; + if ( $isLastSibling ) { + ${$nodeR} |= 1 << 30; + } else { + ${$nodeR} &= ~(1 << 30); + } +} + +sub TrieNodeGetIsLastSibling { + my ( $node ) = @_; + return ($node & 1 << 30) != 0; +} + +sub TrieNodeSetLetter { + my ( $nodeR, $letter ) = @_; + + die "letter ", $letter, " too big" if $letter >= 32; + + my $mask = ~(0x1F << 25); + ${$nodeR} &= $mask; # clear all the bits + ${$nodeR} |= ($letter << 25); # set new ones +} + +sub TrieNodeGetLetter { + my ( $node ) = @_; + $node >>= 25; + $node &= 0x1F; + return $node; +} + +sub TrieNodeSetFirstChildOffset { + my ( $nodeR, $fco ) = @_; + + die "$fco larger than 25 bits" if ($fco & 0xFE000000) != 0; + + my $mask = ~0x01FFFFFF; + ${$nodeR} &= $mask; # clear all the bits + ${$nodeR} |= $fco; # set new ones +} + +sub TrieNodeGetFirstChildOffset { + my ( $node ) = @_; + $node &= 0x01FFFFFF; # 24 bits + return $node; +} + + +sub MakeTrieNode { + my ( $letter, $isTerminal, $firstChildOffset, $isLastSibling ) = @_; + my $result = 0; + + TrieNodeSetIsTerminal( \$result, $isTerminal ); + TrieNodeSetIsLastSibling( \$result, $isLastSibling ); + TrieNodeSetLetter( \$result, $letter ); + TrieNodeSetFirstChildOffset( \$result, $firstChildOffset ); + + return $result; +} # MakeTrieNode + +# Caller may need to know the offset of the first top-level node. +# Write it here. +sub writeOutStartNode { + my ( $startNodeOut, $firstRootChildOffset ) = @_; + + open NODEOUT, ">$startNodeOut"; + print NODEOUT pack( "N", $firstRootChildOffset ); + close NODEOUT; +} # writeOutStartNode + +# build the hash for translating. I'm using a hash assuming it'll be +# fast. Key is the letter; value is the 0..31 value to be output. +sub makeTableHash { + my $i; + open TABLEFILE, "< $gTableFile"; + + splice @gRevMap; # empty it + + for ( $i = 0; ; ++$i ) { + my $ch = getc(TABLEFILE); + if ( $ch eq undef ) { + last; + } + + if ( $gUseUnicode ) { # skip the first byte each time: tmp HACK!!! + $ch = getc(TABLEFILE); + } + if ( $ch eq undef ) { + last; + } + + push @gRevMap, $ch; + + if ( ord($ch) == 0 ) { # blank + next; # we want to increment i when blank seen since + # it is a tile value + } + + die "$gTableFile too large\n" if $i > 32; + die "only blank (0) can be 32nd char\n" if ($i == 32 && $ch != 0); + + $gTableHash{$ch} = $i; + } + + close TABLEFILE; +} # makeTableHash + +# emitNodes. "input" is $gNodes. From it we write up to +# $nBytesPerOutfile to files named $outFileBase0..n, mapping the +# letter field down to 5 bits with a hash built from $tableFile. If +# at any point we encounter a letter not in the hash we fail with an +# error. + +sub emitNodes($$) { + my ( $gNBytesPerOutfile, $outFileBase ) = @_; + + # now do the emit. + + # is 17 bits enough? + printf STDOUT ("There are %d (0x%x) nodes in this DAWG.\n", + 0 + @gNodes, 0 + @gNodes ); + if ( @gNodes > 0x1FFFF || $gForceFour ) { + $gNBytesPerNode = 4; + } else { + $gNBytesPerNode = 3; + } + + my $nextIndex = 0; + my $nextFileNum = 0; + + for ( $nextFileNum = 0; ; ++$nextFileNum ) { + + if ( $nextIndex >= @gNodes ) { + last; # we're done + } + + die "Too many outfiles; infinite loop?" if $nextFileNum > 99; + + my $outName = sprintf("${outFileBase}_%03d.bin", $nextFileNum); + open OUTFILE, "> $outName"; + my $curSize = 0; + + while ( $nextIndex < @gNodes ) { + + # scan to find the next terminal + my $i; + for ( $i = $nextIndex; + !TrieNodeGetIsLastSibling($gNodes[$i]); + ++$i ) { + + # do nothing but a sanity check + if ( $i >= @gNodes) { + die "bad trie format: last node not last sibling" ; + } + + } + ++$i; # move beyond the terminal + my $nextSize = ($i - $nextIndex) * $gNBytesPerNode; + if ($curSize + $nextSize > $gNBytesPerOutfile) { + last; + } else { + # emit the subarray + while ( $nextIndex < $i ) { + outputNode( $gNodes[$nextIndex], $gNBytesPerNode, + \*OUTFILE ); + ++$nextIndex; + } + $curSize += $nextSize; + } + } + + close OUTFILE; + } + +} # emitNodes + +sub printWord { + my ( $str ) = @_; + + print STDERR "$str\n"; +} + +# print out the entire dictionary, as text, to STDERR. + +sub printOneLevel { + + my ( $index, $str ) = @_; + + for ( ; ; ) { + + my $newStr = $str; + my $node = $gNodes[$index++]; + + my $lindx = $gRevMap[TrieNodeGetLetter($node)]; + + if ( ord($lindx) >= 0x20 ) { + $newStr .= "$lindx"; + } else { + print STDERR "sub space" if $debug; + $newStr .= "\\" . chr('0'+$lindx); + } + + if ( TrieNodeGetIsTerminal($node) ) { + printWord( $newStr ); + } + + my $fco = TrieNodeGetFirstChildOffset( $node ); + if ( $fco != 0 ) { + printOneLevel( $fco, $newStr ); + } + + if ( TrieNodeGetIsLastSibling($node) ) { + last; + } + } +} + +sub outputNode { + my ( $node, $nBytes, $outfile ) = @_; + + my $fco = TrieNodeGetFirstChildOffset($node); + my $fourthByte; + + if ( $nBytes == 4 ) { + $fourthByte = $fco >> 17; + die "fco too big" if $fourthByte > 0xFF; + $fco &= 0x1FFFF; + } + + # format according to dawg.h: + # typedef struct array_edge { + # unsigned char highByte; + # unsigned char lowByte; + # unsigned char bits; +#ifdef FOUR_BYTE + # unsigned char moreBits; +#endif + # } array_edge; + + # define LETTERMASK 0x1f + # define ACCEPTINGMASK 0x20 + # define LASTEDGEMASK 0x40 + # define LASTBITMASK 0x80 + + # write the fco (less that one bit). We want two bytes worth + # in three-byte mode, and three in four-byte mode (which is + # untested) + for ( my $i = 1; $i >= 0; --$i ) { + my $tmp = ($fco >> ($i * 8)) & 0xFF; + print $outfile pack( "C", $tmp ); + } + $fco >>= 16; # it should now be 1 or 0 + die "fco not 1 or 0" if $fco > 1; + + my $chIn5 = TrieNodeGetLetter($node); + my $bits = $chIn5; + + if ( TrieNodeGetIsLastSibling($node) ) { + $bits |= 0x40; + } + if ( TrieNodeGetIsTerminal($node) ) { + $bits |= 0x20; + } + if ( $fco != 0 ) { + $bits |= 0x80; + } + print $outfile pack( "C", $bits ); + + # the final byte, if in use + if ( $nBytes == 4 ) { + print $outfile pack( "C", $fourthByte ); + } +} # outputNode + +sub usage { + print STDERR "usage: $0 \n" + . "\t[-b bytesPerFile] (default = 0xFFFFFFFF)\n" + . "\t-m mapFile\n" + . "\t-ob outFileBase\n" + . "\t-sn start node out file\n" + . "\t[-if input file name] -- default = stdin\n" + . "\t[-term ch] (word terminator -- default = '\\0'\n" + . "\t[-nosort] (input already sorted in accord with -m; " . + " default=sort'\n" + . "\t[-dump] (write dictionary as text to STDERR for testing)\n" + . "\t[-force4](use 4 bytes per node regardless of need)\n" + . "\t[-r] (reject words with letters not in mapfile)\n" + . "\t[-k] (kill if any letters no in mapfile -- default)\n" + ; + +} # usage + +sub parseARGV { + + my $arg; + while ( my $arg = shift(@ARGV) ) { + + SWITCH: { + if ($arg =~ /-b/) {$gNBytesPerOutfile = shift(@ARGV), last SWITCH;} + if ($arg =~ /-mn/) {$gTableFile = shift(@ARGV); + $gUseUnicode = 1; + last SWITCH;} + if ($arg =~ /-m/) {$gTableFile = shift(@ARGV); last SWITCH;} + if ($arg =~ /-ob/) {$gOutFileBase = shift(@ARGV), last SWITCH;} + if ($arg =~ /-sn/) {$gStartNodeOut = shift(@ARGV), last SWITCH;} + if ($arg =~ /-if/) {$gInFileName = shift(@ARGV), last SWITCH;} + if ($arg =~ /-r/) {$gKillIfMissing = 0; last SWITCH;} + if ($arg =~ /-k/) {$gKillIfMissing = 1; last SWITCH;} + if ($arg =~ /-term/) {$gTermChar = chr(shift(@ARGV)); last SWITCH;} + if ($arg =~ /-dump/) {$gDumpText = 1; last SWITCH;} + if ($arg =~ /-nosort/) {$gNeedsSort = 0; last SWITCH;} + if ($arg =~ /-wc/) {$gCountFile = shift(@ARGV); last SWITCH;} + if ($arg =~ /-ns/) {$gBytesPerNodeFile = shift(@ARGV); last SWITCH;} + if ($arg =~ /-force4/) {$gForceFour = 1; last SWITCH;} + die "unexpected arg $arg\n"; + } + } + + + print STDERR "gNBytesPerOutfile=$gNBytesPerOutfile\n" if $debug; + print STDERR "gTableFile=$gTableFile\n" if $debug; + print STDERR "gOutFileBase=$gOutFileBase\n" if $debug; + print STDERR "gStartNodeOut=$gStartNodeOut\n" if $debug; + printf STDERR "gTermChar=%s(%d)\n", $gTermChar, ord($gTermChar) if $debug; + + return $gTableFile; + +} # parseARGV diff --git a/xwords4/xwords4/dawg/frank_mkspecials.pl b/xwords4/xwords4/dawg/frank_mkspecials.pl new file mode 100755 index 000000000..353727a74 --- /dev/null +++ b/xwords4/xwords4/dawg/frank_mkspecials.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl + +# Copyright 2001 by Eric House (fixin@peak.org) +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# Given arguments consisting of triples, first a string and then pbitm +# files representing bitmaps. For each triple, print out the string and +# then the converted bitmaps. + +use strict; + +while ( @ARGV ) { + my $str = shift(); + my $largebmp = shift(); + my $smallbmp = shift(); + + doOne( $str, $largebmp, $smallbmp ); +} + +sub doOne { + my ( $str, $largebmp, $smallbmp ) = @_; + + print pack( "C", length($str) ); + print $str; + + print STDERR "looking at $largebmp", "\n"; + + print `cat $largebmp | ../pbitm2bin.pl`; + print `cat $smallbmp | ../pbitm2bin.pl`; +} + + diff --git a/xwords4/xwords4/dawg/gendict.c b/xwords4/xwords4/dawg/gendict.c new file mode 100644 index 000000000..3117d685a --- /dev/null +++ b/xwords4/xwords4/dawg/gendict.c @@ -0,0 +1,99 @@ +#include + +/* + * Copyright 1998 by Eric House. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +typedef char boolean; +#define true 1 +#define false 0 + +#define MAXWORDLEN 15 + +boolean writeOneWord( char* word, int len ); +void usage(); + +////////////////////////////////////////////////////////////////////////////// +// main +// This program generates all possible combinations of letters 'A'-'Z' of +// lengths between 2 and MAXWORDLEN, writing them to stdout. It's meant +// to be used to test the dawg dictionary and a state machine based on +// it: if the output of this program is used as input to that machine, the +// list of words accepted by the machine should be identical to the list +// from which the dawg was created -- provided sufficient length words +// are created. +////////////////////////////////////////////////////////////////////////////// +int main( int argc, char** argv ) { + int i, j; + long lowerbound = 0; + long upperbound = 0; + char buffer[MAXWORDLEN+1]; + + if ( argc ==3 ) { + sscanf( argv[1], "%d", &lowerbound ); + sscanf( argv[2], "%d", &upperbound ); + if ( lowerbound < 2 || upperbound < 2 ) { + usage(); + } + } else { + usage(); + } + + for ( i = lowerbound; i <= upperbound; ++i ) { + buffer[0] = '\0'; + for ( j = 0; j < i; ++j ) { + strcat( buffer, "A" ); + } + while ( !writeOneWord( buffer, i-1 ) ) { + // do nothing + } + } +} + +/* Increment the last letter if possible. Otherwise reset it and find + * the first letter above it that can be incremented, resetting along + * the way. If the *first* letter needs to be reset we're finished. + */ +boolean writeOneWord( char* word, int len ) { + fprintf( stdout, "%s\n", word ); + + if ( word[len] != 'Z' ) { + word[len]++; + } else { + int i; + word[len] = 'A'; + for ( i = len-1; ; --i ) { + if ( word[i] != 'Z' ) { + word[i]++; + break; + } else if ( i == 0 ) { // they're *all* Zs... + return true; // we wrote all the words! + } else { + word[i] = 'A'; + } + } + } + return false; +} + +void usage() { + fprintf( stderr, + "USAGE: gendict upperbound lowerbound\n" + " (Both must be >= 2.)\n" ); + exit( 0 ); +} + diff --git a/xwords4/xwords4/dawg/listchars.pl b/xwords4/xwords4/dawg/listchars.pl new file mode 100755 index 000000000..185b29f7d --- /dev/null +++ b/xwords4/xwords4/dawg/listchars.pl @@ -0,0 +1,36 @@ +#!/usr/bin/perl + +# Copyright 2001 by Eric House +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +use strict; + +my %lettersHash; + + +while ( <> ) { + chomp; + foreach my $byte (split //) { + ++$lettersHash{$byte}; + } +} + +foreach my $key (sort keys(%lettersHash)) { + my $count = $lettersHash{$key}; + if ( $count ) { + printf( "%.3d: %s: %.7d\n", ord($key), $key, $count ); + } +} diff --git a/xwords4/xwords4/dawg/palm_mkspecials.pl b/xwords4/xwords4/dawg/palm_mkspecials.pl new file mode 100755 index 000000000..d30f413d5 --- /dev/null +++ b/xwords4/xwords4/dawg/palm_mkspecials.pl @@ -0,0 +1,111 @@ +#!/usr/bin/perl + +# Copyright 2002 by Eric House +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# Given arguments consisting of triples, first a string and then pbitm +# files representing bitmaps. The format looks like this: + +# array [0-n] of { char len; +# char[3] alt txt; +# int16 offsetOfLarge; +# int16 offsetOfSmall; +# } +# array [0-n] of { +# bitmapLargeIfPresent; +# bitmapSmallIfPresent; +# } +# +# In addition, there's padding between bitmaps if needed to get the next +# one to a 2-byte boundary. And the input files are not in PalmOS bitmap +# format, so thay have to get converted into a tmp file before the sizes +# can be known and included in the eventual output. + +use strict; + +my $tmpfile = "/tmp/tmpout$$"; + +my $nSpecials = @ARGV / 3; +die "wrong number of args" if (@ARGV % 3) != 0; +my $gOffset = $nSpecials * 8; # sizeof(Xloc_specialEntry) + +open TMPFILE, "> $tmpfile"; + +for ( my $i = 0; $i < $nSpecials; ++$i ) { + + my $size; + + my $str = shift( @ARGV ); + my $len = length($str); + die "string $str too long" if $len > 3; + print $str; + while ( $len < 4 ) { + ++$len; + print pack("c", 0 ); + } + + doOneFile( shift( @ARGV ), \*TMPFILE, \$gOffset ); + doOneFile( shift( @ARGV ), \*TMPFILE, \$gOffset ); +} + +close TMPFILE; + +# now append the tempfile +open TMPFILE, "< $tmpfile"; +while ( read( TMPFILE, my $buffer, 128 ) ) { + print $buffer; +} +close TMPFILE; + +unlink $tmpfile; + +exit 0; + + +sub doOneFile($$) { + my ( $fil, $fh, $offsetR ) = @_; + + my $size = convertBmp($fil, $fh ); + if ( ($size % 2) != 0 ) { + ++$size; + print $fh pack( "c", 0 ); + } + + print pack( "n", $size > 0? ${$offsetR} : 0 ); + + ${$offsetR} += $size; +} # doOneFile + +sub convertBmp($$) { + my ( $pbitmfile, $fhandle ) = @_; + + if ( $pbitmfile eq "/dev/null" ) { + return 0; + } else { + + # for some reason I can't get quote marks to print into tmp.rcp using just `echo` + open TMP, "> tmp.rcp"; + print TMP "BITMAP ID 1000 \"$pbitmfile\" AUTOCOMPRESS"; + close TMP; + + `pilrc tmp.rcp`; + print $fhandle `cat Tbmp03e8.bin`; + my $siz = -s "Tbmp03e8.bin"; + `rm -f tmp.rcp Tbmp03e8.bin`; + + return $siz; + } +} diff --git a/xwords4/xwords4/dawg/par.pl b/xwords4/xwords4/dawg/par.pl new file mode 100755 index 000000000..11ca565d2 --- /dev/null +++ b/xwords4/xwords4/dawg/par.pl @@ -0,0 +1,234 @@ +#!/usr/bin/perl + +# Copyright 2002 by Eric House (fixin@peak.org) All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# Only enough of par's features to support building a crosswords dict +# pdb + +use strict; + +my $debug = 0; + + +# stolen from par source +my $PRC_FLAGS_RESOURCE = (0x1<<0); +my $PRC_FLAGS_READONLY = (0x1<<1); +my $PRC_FLAGS_DIRTY = (0x1<<2); +my $PRC_FLAGS_BACKUP = (0x1<<3); +my $PRC_FLAGS_NEWER = (0x1<<4); +my $PRC_FLAGS_RESET = (0x1<<5); +my $PRC_FLAGS_COPYPREVENT = (0x1<<6); +my $PRC_FLAGS_STREAM = (0x1<<7); +my $PRC_FLAGS_HIDDEN = (0x1<<8); +my $PRC_FLAGS_LAUNCHABLE = (0x1<<9); +my $PRC_FLAGS_RECYCLABLE = (0x1<<10); +my $PRC_FLAGS_BUNDLE = (0x1<<11); +my $PRC_FLAGS_OPEN = (0x1<<15); + + +my $gAttrs = 0; +my $gVersion = 1; # par defaults this to 1 + +my $cmd = shift( @ARGV ); +die "only 'c' supported now" if $cmd ne "c" && $cmd ne "-c"; + +readHOptions( \@ARGV ); + +my $dbfile = shift( @ARGV ); +my $name = shift( @ARGV ); +die "name $name too long" if length($name) > 31; +my $type = shift( @ARGV ); +die "type $type must be of length 4" if length($type) != 4; +my $cid = shift( @ARGV ); +die "cid $cid must be of length 4" if length($cid) != 4; + +my @fileNames; +my @fileLengths; + +my $nFiles = 0; + +while ( @ARGV > 0 ) { + my $filename = shift( @ARGV ); + push @fileNames, $filename; + push @fileLengths, -s $filename; + ++$nFiles; +} + +# from par's prcp.h; thanks djw! +# typedef struct prc_file_t { +# prc_byte_t name[32]; +# prc_byte_t flags[2]; +# prc_byte_t version[2]; +# prc_byte_t ctime[4]; +# prc_byte_t mtime[4]; +# prc_byte_t btime[4]; +# prc_byte_t modnum[4]; +# prc_byte_t appinfo[4]; +# prc_byte_t sortinfo[4]; +# prc_byte_t type[4]; +# prc_byte_t cid[4]; +# prc_byte_t unique_id_seed[4]; +# prc_byte_t next_record_list[4]; +# prc_byte_t nrecords[2]; +# } prc_file_t; + +my $str; +my $offset = 0; + +open OUTFILE, "> $dbfile" or die "couldn't open outfile $dbfile for writing"; + +# print the string, then pad with 0s +$offset = length($name); +print OUTFILE $name; +while ( $offset < 32 ) { + print OUTFILE pack("c", 0); + ++$offset; +} + +$str = pack("n", $gAttrs); # flags +print OUTFILE $str; +$offset += length($str); + +$str = pack("n", $gVersion); # version +print OUTFILE $str; +$offset += length($str); + +my $time = time() + 2082844800; +$str = pack("NNN", $time, $time, 0); # ctime, mtime, btime +print OUTFILE $str; +$offset += length($str); + +$str = pack("N", 0 ); # mod num +print OUTFILE $str; +$offset += length($str); + +$str = pack("N", 0 ); # appinfo +print OUTFILE $str; +$offset += length($str); + +$str = pack("N", 0 ); # sortinfo +print OUTFILE $str; +$offset += length($str); + + +print OUTFILE $type; # type +print OUTFILE $cid; # cid +$offset += 8; + +$str = pack("NN", 0, 0 ); # unique_id_seed, next_record_list +print OUTFILE $str; +$offset += length($str); + +$str = pack("n", $nFiles ); # nrecords +print OUTFILE $str; +$offset += length($str); + +$offset += $nFiles * 8; +$offset += 2; # djw adds 2 bytes after size list; see below +foreach my $len ( @fileLengths ) { + print OUTFILE pack( "N", $offset ); + print OUTFILE pack( "N", 0 ); + $offset += $len; +} + +print OUTFILE pack( "n", 0 ); # djw does this sans comment: flush.c, line 87 + +foreach my $file ( @fileNames ) { + open INFILE, "<$file" or die "couldn't open infile $file\n"; + my $buffer; + while ( read INFILE, $buffer, 1024 ) { + print OUTFILE $buffer; + } + close INFILE; +} + + +close OUTFILE; + +exit 0; + +############################################################################## +# Subroutines +############################################################################## + +sub readHOptions { + + my ( $argvR ) = @_; + + for ( ; ; ) { + my $opt = ${$argvR}[0]; + + if ( $opt !~ /^-/ ) { + last; + } + + # it starts with a '-': use it; else don't consume anything + shift @{$argvR}; + + if ( $opt eq "-a" ) { + my $attrs = shift @{$argvR}; + processAttrString( $attrs ); + } elsif ( $opt eq "-v" ) { + $gVersion = shift @{$argvR}; + } else { + die "what's with \"$opt\": -a and -v are the only hattrs supported"; + } + } + +} # readHOptions + +sub processAttrString { + + my ( $attrs ) = @_; + + foreach my $flag ( split /\|/, $attrs ) { + + print STDERR "looking at flag $flag\n" if $debug; + + if ( $flag =~ /resource/ ) { + $gAttrs |= $PRC_FLAGS_RESOURCE; + die "resource attr not supported"; + } elsif ( $flag =~ /readonly/ ) { + $gAttrs |= $PRC_FLAGS_READONLY; + } elsif ( $flag =~ /dirty/ ) { + $gAttrs |= $PRC_FLAGS_DIRTY; + } elsif ( $flag =~ /backup/ ) { + $gAttrs |= $PRC_FLAGS_BACKUP; + } elsif ( $flag =~ /newer/ ) { + $gAttrs |= $PRC_FLAGS_NEWER; + } elsif ( $flag =~ /reset/ ) { + $gAttrs |= $PRC_FLAGS_RESET; + } elsif ( $flag =~ /copyprevent/ ) { + $gAttrs |= $PRC_FLAGS_COPYPREVENT; + } elsif ( $flag =~ /stream/ ) { + $gAttrs |= $PRC_FLAGS_STREAM; + die "stream attr not supported"; + } elsif ( $flag =~ /hidden/ ) { + $gAttrs |= $PRC_FLAGS_HIDDEN; + } elsif ( $flag =~ /launchable/ ) { + $gAttrs |= $PRC_FLAGS_LAUNCHABLE; + } elsif ( $flag =~ /recyclable/ ) { + $gAttrs |= $PRC_FLAGS_RECYCLABLE; + } elsif ( $flag =~ /bundle/ ) { + $gAttrs |= $PRC_FLAGS_BUNDLE; + } elsif ( $flag =~ /open/ ) { + $gAttrs |= $PRC_FLAGS_OPEN; + } else { + die "flag $flag not supportd"; + } + } +} # processAttrString diff --git a/xwords4/xwords4/dawg/pbitm2bin.pl b/xwords4/xwords4/dawg/pbitm2bin.pl new file mode 100755 index 000000000..4350f57d2 --- /dev/null +++ b/xwords4/xwords4/dawg/pbitm2bin.pl @@ -0,0 +1,87 @@ +#!/usr/bin/perl +# +# Copyright 2001 by Eric House (fixin@peak.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# +# Given a pbitm on stdin, a text bitmap file where '#' indicates a set +# bit and '-' indicates a clear bit, convert into binary form (on +# stdout) where there's one bit per bit plus a byte each for the width +# and height. Nothing for bitdepth at this point. And no padding: if +# the number of bits in a row isn't a multiple of 8 then one byte will +# hold the last bits of one row and the first of another. + +use strict; + +my $nRows = 0; +my $nCols = 0; +my $bits = ""; # save the chars in a single string to start + +# first gather information and sanity-check the data + +while (<>) { + chomp; + my $len = length(); + + if ( $nCols == 0 ) { + $nCols = $len; + } else { + die "line of inconsistent length" if $nCols != $len ; + } + if ( $nCols == 0 ) { + last; + } + + $bits .= $_; + ++$nRows; +} + +my $len = length($bits); +print pack( "C", $nCols ); + +# if we've been given an empty file, print out a single null byte and +# be done. That'll be the convention for "non-existant bitmap". +if ( $len == 0 ) { + exit 0; +} +print pack( "C", $nRows ); +printf STDERR "emitting %dx%d bitmap\n", $nCols, $nRows; + + +my @charlist = split( //,$bits); +my $byte = 0; + +for ( my $count = 0; ; ++$count ) { + + my $ch = $charlist[$count]; + my $bitindex = $count % 8; + + $ch == '-' || $ch == '#' || die "unknown char $ch"; + + my $bit = ($ch eq '#')? 1:0; + + $byte |= $bit << (7 - $bitindex); + + my $lastPass = $count + 1 == $len; + if ( $bitindex == 7 || $lastPass ) { + print pack( "C", $byte ); + if ( $lastPass ) { + last; + } + $byte = 0; + } + +} # for loop diff --git a/xwords4/xwords4/dawg/pdb.h b/xwords4/xwords4/dawg/pdb.h new file mode 100644 index 000000000..ee8b9dc6f --- /dev/null +++ b/xwords4/xwords4/dawg/pdb.h @@ -0,0 +1,49 @@ + +/* + * Copyright 1997 - 2002 by Eric House (fixin@peak.org). All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +typedef unsigned long DWORD; +typedef unsigned short WORD; + +// all numbers in these structs are big-endian, MAC format +typedef struct DocHeader { + char sName[32]; // 0x00 + DWORD dwUnknown1; // 0x20 bytes + DWORD dwTime1; // 0x24 bytes + DWORD dwTime2; // 0x28 + DWORD dwTime3; // 0x2C + DWORD dwLastSync; // 0x30 + DWORD ofsSort; // 0x34 + DWORD ofsCatagories;// 0x38 + DWORD dwType; // 0x3C + DWORD dwCreator; // 0x40 + DWORD dwUnknown2; // 0x44 + DWORD dwUnknown3; // 0x48 + WORD wNumRecs; // 0x4C +} DocHeader; + +#define DOCHEADSZ 78 + +typedef struct RecordHeader { + // added type in experimentally! on 4/14 +/* char type[4]; */ + DWORD offset; + DWORD bits; // high byte is flags, remaining three are a unique id +} RecordHeader; + +#define RECHEADSZ 8 diff --git a/xwords4/xwords4/dawg/pdb2dict.c b/xwords4/xwords4/dawg/pdb2dict.c new file mode 100755 index 000000000..1e5e7f60b --- /dev/null +++ b/xwords4/xwords4/dawg/pdb2dict.c @@ -0,0 +1,272 @@ +// -*-mode: C; fill-column: 80; compile-command: "make pdb2dict"; -*- + +/* + * Copyright 1997 - 2002 by Eric House (fixin@peak.org). All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include + +#include "pdb.h" +#include "dawg.h" + +#include "swap.h" + +/////////////////////////////// prototypes ////////////////////////////////// +static void generate_dict( array_edge* memoryFile ); +static void write_words( array_edge* memoryFile, long edgeIndex, + short charIndex, char* wordBuffer ); +static void skipNHeaders( FILE* file, int n, RecordHeader* recHeader ); +static array_edge* loadEdgesArray( FILE* dictFile, unsigned char* charTable ); +static void* readNthRecord( FILE* files, void* where, int whereMaxSize, + int whichRec, int* foundSize ); +static void getNthOffset( FILE* file, int n, long* offset, + unsigned short* size ); +static void printWord( char* wordBuffer ); + +///////////////////////////////// globals /////////////////////////////////// +static unsigned char charTable[32]; + +/****************************************************************************** + * + *****************************************************************************/ +void usage() { + fprintf( stderr, "Usage: pdb2dict \n" ); + exit( 1 ); +} + +/****************************************************************************** + * + *****************************************************************************/ +int main( int argc, char** argv ) { + char* pdbName; + array_edge* memoryFile; + FILE* dictFile; + if ( argc < 2 ) { + usage(); + } + + // fprintf( stderr, "sizeof(WORD)=%d\n", sizeof(WORD) ); + + pdbName = argv[1]; + if ( (argv < 2) || (strchr( pdbName, '.' ) == NULL) || + strcmp( (char*)strchr( pdbName, '.' ), ".pdb" ) ) { + usage(); + } + + dictFile = fopen( pdbName, "r" ); + if ( dictFile == NULL ) { + fprintf( stderr, "%s: No such file %s\n", argv[0], pdbName ); + exit(1); + } + memoryFile = loadEdgesArray( dictFile, charTable ); + fclose( dictFile ); + + generate_dict( memoryFile ); + free( memoryFile ); + return 0; +} // main + +/****************************************************************************** + * + *****************************************************************************/ +static array_edge* loadEdgesArray( FILE* dictFile, unsigned char* charTable ) { + DocHeader pdbHeader; + dawg_header dawgHeader; + array_edge* firstEdge = NULL; + fpos_t firstHeaderOffset; + int curSize, i; + + // read in the main pdb header + fread( &pdbHeader, DOCHEADSZ, 1, dictFile ); + assert( (strncmp( (char*)&pdbHeader.dwCreator, "Xwr3", 4) == 0) + && (strncmp( (char*)&pdbHeader.dwType, "DAWG", 4) == 0) ); + + (void)fgetpos( dictFile, &firstHeaderOffset ); + + (void)readNthRecord( dictFile, &dawgHeader, sizeof(dawgHeader), 0, NULL ); + //fprintf( stderr, "word count = %ld\n", swap_long(dawgHeader.numWords) ); + assert( dawgHeader.firstEdgeRecNum == 3 ); + + (void)readNthRecord( dictFile, charTable, 32, dawgHeader.charTableRecNum, + NULL ); + + firstEdge = (array_edge*)malloc(0); + curSize = 0; + for ( i = dawgHeader.firstEdgeRecNum; i < swap_short(pdbHeader.wNumRecs); + ++i ) { + int newSize; + void* rec = readNthRecord( dictFile, NULL, 0, i, &newSize ); + + firstEdge = (array_edge*)realloc( firstEdge, curSize + newSize ); + memcpy( ((char*)firstEdge) + curSize, rec, newSize ); + free( rec ); + curSize += newSize; + } + return firstEdge; +} // loadEdgesArray + +/****************************************************************************** + * + *****************************************************************************/ +static void* readNthRecord( FILE* file, void* where, int whereMaxSize, + int whichRec, int* foundSize ) { + void* result = NULL; + fpos_t pos; + long offset; + unsigned short size; + + (void)fgetpos( file, &pos); + + getNthOffset( file, whichRec, &offset, &size ); + fseek( file, offset, 0 ); + + if ( where == NULL ) { + result = malloc( size ); + fread( result, size, 1, file ); + } else { + assert( size <= whereMaxSize ); + fread( where, size, 1, file ); + } + if ( foundSize ) { + *foundSize = size; + } + + (void)fsetpos( file, &pos); + return result; +} // readNthRecord + +/****************************************************************************** + * Size is my offset subtracted from the one after me, unless I'm the last + * entry in which case it's file size minus my offset. + *****************************************************************************/ +static void getNthOffset( FILE* file, int n, long* offset, + unsigned short* size ) { + DocHeader pdbHeader; + RecordHeader recHeader; + fpos_t pos; + long sizeCalc; + + (void)fgetpos( file, &pos); + rewind( file ); + + //fprintf( stderr, "sizeof(pdbHeader)=%d\n", sizeof(pdbHeader) ); + fread( &pdbHeader, DOCHEADSZ, 1, file ); + assert( swap_short(pdbHeader.wNumRecs) > n ); + + skipNHeaders( file, n+1, &recHeader ); + *offset = sizeCalc = swap_long( recHeader.offset ); + if ( n+1 == swap_short(pdbHeader.wNumRecs) ) { // use file size + fseek( file, 0, SEEK_END ); + sizeCalc = ftell( file ) - sizeCalc; + } else { + skipNHeaders( file, 1, &recHeader ); + sizeCalc = swap_long( recHeader.offset ) - sizeCalc; + } + *size = sizeCalc; + + (void)fsetpos( file, &pos); +} // getNthOffset + +/****************************************************************************** + * Skip over the given number of headers, returning with the last one read + * into the supplied buffer. + *****************************************************************************/ +static void skipNHeaders( FILE* file, int n, RecordHeader* recHeader ) { + short i; + for ( i = 0; i < n; ++i ) { + fread( recHeader, sizeof(*recHeader), 1, file ); + } +} // skipNHeaders + +/****************************************************************************** + * beginning with an array of NULL chars, on each level of the tree + * iterate over each child replacing the appropriate char with the + * letter from the edge. When an edge is terminal, print the word + * formed. And when returning replace the letter with a null char. + *****************************************************************************/ +static void generate_dict( array_edge* memoryFile ) { + char wordBuffer[31]; + (void)memset( wordBuffer, '\0', 31 ); + + write_words( memoryFile, 0, 0, wordBuffer ); +} + +/****************************************************************************** + * + *****************************************************************************/ +void write_words( array_edge* memoryFile, long edgeIndex, short charIndex, + char* wordBuffer ) { + array_edge* child = &memoryFile[edgeIndex]; + for ( ; ; child = &memoryFile[++edgeIndex] ) { + unsigned char bits = child->bits; + long index = 0; + wordBuffer[charIndex] = charTable[(bits & LETTERMASK)]; + if ( bits & ACCEPTINGMASK ) { + printWord( wordBuffer ); + } + + index = (child->highByte * 256) + child->lowByte; + if ( bits & LASTBITMASK ) { + index += 0x00010000; + } + + if ( index > 0 ) { + write_words( memoryFile, index, charIndex+1, wordBuffer ); + } + if ( bits & LASTEDGEMASK ) { + wordBuffer[charIndex] = '\0'; + break; + } + } +} + +/****************************************************************************** + * + *****************************************************************************/ +static void printWord( char* wordBuffer ) { + unsigned char buf[32], ch; + unsigned char* next = buf; + + while ( (ch = *wordBuffer++) != '\0' ) { + if ( ch >= 0x20 ) { + *next++ = ch; + } else { + char* str = NULL; + switch ( ch ) { + case 1: + str = "CH"; + break; + case 2: + str = "LL"; + break; + case 3: + str = "RR"; + break; + default: + fprintf( stderr, "Got %d\n", ch ); + assert( 0 ); + } + strcpy( next, str ); + next += strlen(str); + } + } + *next = '\0'; + fprintf( stdout, "%s\n", buf ); +} // appendChars diff --git a/xwords4/xwords4/dawg/swap.c b/xwords4/xwords4/dawg/swap.c new file mode 100644 index 000000000..2e62a7576 --- /dev/null +++ b/xwords4/xwords4/dawg/swap.c @@ -0,0 +1,9 @@ + +/* #ifdef BIG_ENDIAN */ +/* unsigned short swap_short(unsigned short s) { return s; } */ +/* unsigned long swap_long(unsigned long l ) { return l; } */ + +/* #else */ + + +/* #endif */ diff --git a/xwords4/xwords4/dawg/swap.h b/xwords4/xwords4/dawg/swap.h new file mode 100644 index 000000000..c1c071d31 --- /dev/null +++ b/xwords4/xwords4/dawg/swap.h @@ -0,0 +1,35 @@ +/* + * Copyright 1997 by Eric House (fixin@peak.org). All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _SWAP_H_ +#define _SWAP_H_ + +#if BYTE_ORDER == LITTLE_ENDIAN +static unsigned short swap_short( unsigned short s ) { + return s >> 8 | s << 8; +} + +static unsigned long swap_long( unsigned long l ) { + return l >> 24 | (l>>8 & 0x0000FF00) | (l<<8 & 0x00FF0000) | l << 24; +} +#else +# define swap_short(s) (s) +# define swap_long(l) (l) +#endif + +#endif /* _SWAP_H_ */ diff --git a/xwords4/xwords4/dawg/xloc.c b/xwords4/xwords4/dawg/xloc.c new file mode 100644 index 000000000..42837e906 --- /dev/null +++ b/xwords4/xwords4/dawg/xloc.c @@ -0,0 +1,673 @@ +// -*-mode: C; fill-column: 80; compile-command: "make xloc"; -*- +/* + * Copyright 1998 - 2002 by Eric House (fixin@peak.org). All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* This is where the langauge tile tables live. There's one for each + * supported langauge. Tile tables give the face of each tile (e.g. "A") + * the value (1), and the number in the game (8). + * + * This program generates two output files, a text file which has each + * face on a separate line: "\0\nA\nB\n..." (where the \0 will later + * be mapped to BLANK), and a binary file giving the values and counts + * of those tiles in table form *and* additional data for any tiles with + * non-printing faces mapping them to alternative printing values (e.g. "_" + * for BLANK) and (optionally) to custom pilot bitmaps for representation + * on-screen. + * + * This program comes after I've attempted to do something simpler with + * shell scripts in makefiles. There are too many problems with the null + * and sub-0x20 bytes for the non-printing characters' faces. Thus for now + * rather than pass in tables I'll just maintain one for each language here. + */ + +/* Here's the old comment: + * Build a resource for Crosswords representing the letters in the + * game, and the number and value of each letter. The arrays for + * each language for which a version of Crosswords exists are also + * stored in this file. + * + * This resource is meant to replace the gInitialLetterCounts and + * gTileValues arrays as well as to provide indirection aiding in + * localization + * + * In order to keep the size of an in-memory game down, letters are + * stored in five bits (and in null-terminated strings) so we have a + * range of 31 values available. Each will then be an index into a + * table built here. This extra level of abstraction permits + * non-contiguous ranges of characters as, for instance, is required + * by the German characters having umlauts. Note, however, that a + * language requiring *more* than 31 characters (including 1 for each + * blank) will require some redesign. + * + * An additional problem is created by the need to convert letters + * from lower case to upper on input to the blank-setting dialog as a + * courtesy to players (already present in the shipping version, + * alas). For now I'll simply search the array of printing values (col. + * 2 below) and if I fail to find it try various transformations the first + * of which will be to upper-case a value in the a-z range + * + * The arrays below consist of three columns each: + * ASCII value numTilesThatValue tileValue + * The latter two are compressed into one byte, four bits each, limiting + * each to the range 1..16. (Actually, I ought to confirm that the code + * required to deal with shifting and masking isn't bigger than the extra + * <= 31 bytes I'm saving.... so skip the compression for now.) + * + */ + +#include +#include +#include +#include + +#include "xwcommon3.h" +//#include "../../../../xwcommon3.h" + +//#define MASH(a,b) ((a)<<4)|(b) +#define MASH(a,b) (a),(b) + +static short endian_short( short in ) { + if ( 0 ) { + return in; + } else { + return ((in >> 8) & 0x00ff) | ((in << 8) & 0xFF00); + } +} + +void errexit( char* msg ) { + fprintf( stderr, msg ); + exit(1); +} + +static short fileSize( char* fileName ) { + short result; + FILE* f = fopen( fileName, "rb" ); + printf( "opening %s\n", fileName ); + if ( f == NULL ) { + errexit( "fopen failed\n" ); + } + if ( fseek( f, 0L, SEEK_END ) != 0 ) { + errexit( "error from fseek" ); + } + result = ftell( f ); + fclose( f ); + printf( "length of file %s is %d\n", fileName, result ); + return (short)result; +} + +#if 0 +Graham writes: +> PORTUGUESE +> +> +> Letter Distribution Face value +> +> A 12 1 +> B 2 3 +> C 4 3 +> CH 1 5 +> D 5 2 +> E 12 1 +> F 1 4 +> G 2 2 +> H 2 4 +> I 6 1 +> J 1 8 +> (K) 0 +> L 4 1 +> LL 1 8 +> M 2 3 +> N 5 1 +> N tilde 1 8 +> O 9 1 +> P 2 3 +> Q 1 5 +> R 5 1 +> RR 1 8 +> S 6 1 +> T 4 1 +> U 5 1 +> V 1 4 +> (W) 0 +> X 1 8 +> Y 1 4 +> Z 1 10 +> BLANK 2 0 +> +> Total 100 + +> GREEK +> +> Letter Distribution Face value +> +> alpha 12 1 +> beta 1 8 +> gamma 2 4 +> delta 2 4 +> epsilon 8 1 +> zeta 1 10 +> eta 7 1 +> theta 1 10 +> iota 8 1 +> kappa 4 2 +> lambda 3 3 +> mu 3 3 +> nu 6 1 +> xi 1 10 +> omicron 9 1 +> pi 4 2 +> rho 5 2 +> sigma 7 1 +> tau 8 1 +> upsilon 4 2 +> phi 1 8 +> chi 1 8 +> psi 1 10 +> omega 3 3 +> blank 2 0 +> +> Total 104 + + +#endif + +unsigned char finnish_table[] = { +#if 0 +from yarik@avalon.merikoski.fi +amount points letter +10 1 A + 1 8 B + 1 10 C + 1 7 D + 8 1 E + 1 8 F + 1 8 G + 2 4 H +10 1 I + 2 4 J + 5 2 K + 5 2 L + 3 3 M + 8 1 N + 5 2 O + 2 4 P + 2 4 R + 7 1 S + 9 1 T + 5 3 U + 2 4 V + 2 4 Y +5 2 D // an A with two dots above +1 7 V // an O with two dots above +2 ? ? // the 'wild card' +#endif +}; + +unsigned char US_english_table[] = { + // numTiles, tileValue ASCII value + MASH(9, 1), 'A', + MASH(2, 3), 'B', + MASH(2, 3), 'C', + MASH(4, 2), 'D', + MASH(12, 1), 'E', + MASH(2, 4), 'F', + MASH(3, 2), 'G', + MASH(2, 4), 'H', + MASH(9, 1), 'I', + MASH(1, 8), 'J', + MASH(1, 5), 'K', + MASH(4, 1), 'L', + MASH(2, 3), 'M', + MASH(6, 1), 'N', + MASH(8, 1), 'O', + MASH(2, 3), 'P', + MASH(1, 10), 'Q', + MASH(6, 1), 'R', + MASH(4, 1), 'S', + MASH(6, 1), 'T', + MASH(4, 1), 'U', + MASH(2, 4), 'V', + MASH(2, 4), 'W', + MASH(1, 8), 'X', + MASH(2, 4), 'Y', + MASH(1, 10), 'Z', + + MASH(2, 0), BLANK_FACE, /* BLANK1 */ + // 0 /* TERMINATES ARRAY */ +}; // US_english_table + +unsigned char norwegian_table[] = { + // numTiles, tileValue ASCII value + MASH(2, 0), BLANK_FACE, /* BLANK1 */ + MASH(7, 1), 'A', + MASH(3, 4), 'B', + MASH(1, 10), 'C', + MASH(5, 1), 'D', + MASH(9, 1), 'E', + MASH(4, 2), 'F', + MASH(4, 2), 'G', + MASH(3, 3), 'H', + MASH(5, 1), 'I', + MASH(2, 4), 'J', + MASH(4, 2), 'K', + MASH(5, 1), 'L', + MASH(3, 2), 'M', + MASH(6, 1), 'N', + MASH(4, 2), 'O', + MASH(2, 4), 'P', + MASH(6, 1), 'R', + MASH(6, 1), 'S', + MASH(6, 1), 'T', + MASH(3, 4), 'U', + MASH(3, 4), 'V', + MASH(1, 8), 'W', + MASH(1, 6), 'Y', + MASH(1, 6), 'Æ', + MASH(2, 5), 'Ø', + MASH(2, 4), 'Å', +}; + +unsigned char swedish_table[] = { + // numTiles, tileValue ASCII value + MASH(2, 0), BLANK_FACE, /* BLANK1 */ + MASH(8, 1), 'A', + MASH(2, 4), 'Å', // A with circle + MASH(2, 3), 'Ä', // A with two dots + MASH(2, 4), 'B', + MASH(1, 10), 'C', + MASH(5, 1), 'D', + MASH(7, 1), 'E', // 15's the max.... + MASH(2, 3), 'F', + MASH(3, 2), 'G', + MASH(2, 2), 'H', + MASH(5, 1), 'I', + MASH(1, 7), 'J', + MASH(3, 2), 'K', + MASH(5, 1), 'L', + MASH(3, 2), 'M', + MASH(6, 1), 'N', + MASH(5, 2), 'O', + MASH(2, 4), 'Ö', // O with two dots + MASH(2, 4), 'P', + MASH(8, 1), 'R', + MASH(8, 1), 'S', + MASH(8, 1), 'T', + MASH(3, 4), 'U', + MASH(2, 3), 'V', + MASH(1, 8), 'X', + MASH(1, 7), 'Y', + MASH(1, 8), 'Z', + // 0 /* TERMINATES ARRAY */ +}; // swedish_table + +unsigned char polish_table[] = { + // numTiles, tileValue ASCII value + // NO BLANK; there are already 32 tiles.... + // MASH(2, 0), BLANK_FACE, /* BLANK1 */ + MASH(8, 1), 'A', + MASH(1, 5), '¡', + MASH(2, 3), 'B', + MASH(3, 2), 'C', + MASH(1, 6), 'Æ', + MASH(3, 2), 'D', + MASH(7, 1), 'E', + MASH(1, 5), 'Ê', + MASH(2, 4), 'F', + MASH(2, 3), 'G', + MASH(2, 3), 'H', + MASH(8, 1), 'I', + MASH(2, 3), 'J', + MASH(3, 2), 'K', + MASH(3, 2), 'L', + MASH(2, 3), '£', + MASH(3, 2), 'M', + MASH(5, 1), 'N', + MASH(1, 7), 'Ñ', + MASH(6, 1), 'O', + MASH(1, 5), 'Ó', + MASH(3, 2), 'P', + MASH(4, 1), 'R', + MASH(4, 1), 'S', + MASH(1, 5), '¦', + MASH(3, 2), 'T', + MASH(2, 3), 'U', + MASH(4, 1), 'W', + MASH(4, 2), 'Y', + MASH(5, 1), 'Z', + MASH(1, 7), '¬', + MASH(1, 5), '¯', + // 0 /* TERMINATES ARRAY */ +}; // polish_table + +unsigned char french_table[] = { + // numTiles, tileValue ASCII value + MASH(2, 0), BLANK_FACE, /* BLANK1 */ + MASH(9, 1), 'A', + MASH(2, 3), 'B', + MASH(2, 3), 'C', + MASH(3, 2), 'D', + MASH(15, 1), 'E', + MASH(2, 4), 'F', + MASH(2, 2), 'G', + MASH(2, 4), 'H', + MASH(8, 1), 'I', + MASH(1, 8), 'J', + MASH(1, 10), 'K', + MASH(5, 1), 'L', + MASH(3, 2), 'M', + MASH(6, 1), 'N', + MASH(6, 1), 'O', + MASH(2, 3), 'P', + MASH(1, 8), 'Q', + MASH(6, 1), 'R', + MASH(6, 1), 'S', + MASH(6, 1), 'T', + MASH(6, 1), 'U', + MASH(2, 4), 'V', + MASH(1, 10), 'W', + MASH(1, 10), 'X', + MASH(1, 10), 'Y', + MASH(1, 10), 'Z', + // 0 /* TERMINATES ARRAY */ +}; // french_table + +unsigned char german_table[] = { + // numTiles, tileValue ASCII value + MASH(2, 0), BLANK_FACE, /* BLANK1 */ + MASH(5, 1), 'A', + MASH(1, 6), 196, // A mit umlaut + MASH(2, 3), 'B', + MASH(2, 4), 'C', + MASH(4, 1), 'D', + MASH(15, 1), 'E', + MASH(2, 4), 'F', + MASH(3, 2), 'G', + MASH(4, 2), 'H', + MASH(6, 1), 'I', + MASH(1, 6), 'J', + MASH(2, 4), 'K', + MASH(3, 2), 'L', + MASH(4, 3), 'M', + MASH(9, 1), 'N', + MASH(3, 2), 'O', + MASH(1, 8), 214, // O mit umlaut + MASH(1, 4), 'P', + MASH(1, 10), 'Q', + MASH(6, 1), 'R', + MASH(7, 1), 'S', + MASH(6, 1), 'T', + MASH(6, 1), 'U', + MASH(1, 6), 220, // U mit umlaut + MASH(1, 6), 'V', + MASH(1, 3), 'W', + MASH(1, 8), 'X', + MASH(1, 10), 'Y', + MASH(1, 3), 'Z', + // 0 /* TERMINATES ARRAY */ +}; // german_table + +unsigned char dutch_table[] = { + // numTiles, tileValue ASCII value + MASH(2, 0), BLANK_FACE, /* BLANK1 */ + MASH(6, 1), 'A', + MASH(2, 3), 'B', + MASH(2, 5), 'C', + MASH(5, 2), 'D', + MASH(16, 1), 'E', + MASH(2, 4), 'F', + MASH(2, 3), 'G', + MASH(2, 4), 'H', + MASH(4, 1), 'I', + MASH(2, 4), 'J', + MASH(3, 3), 'K', + MASH(3, 3), 'L', + MASH(3, 3), 'M', + MASH(8, 1), 'N', + MASH(6, 1), 'O', + MASH(3, 3), 'P', + MASH(1, 10), 'Q', + MASH(5, 2), 'R', + MASH(5, 2), 'S', + MASH(5, 2), 'T', + MASH(4, 4), 'U', + MASH(4, 2), 'V', + MASH(2, 5), 'W', + MASH(1, 8), 'X', + MASH(2, 4), 'Y', + MASH(2, 4), 'Z', + //0 /* TERMINATES ARRAY */ +}; // dutch_table + +unsigned char italian_table[] = { + // numTiles, tileValue ASCII value + MASH(2, 0), BLANK_FACE, /* BLANK1 */ + MASH(13, 1), 'A', + MASH(3, 5), 'B', + MASH(4, 4), 'C', + MASH(3, 5), 'D', + MASH(13, 1), 'E', + MASH(2, 8), 'F', + MASH(2, 5), 'G', + MASH(2, 8), 'H', + MASH(13, 1), 'I', + MASH(5, 3), 'L', + MASH(5, 3), 'M', + MASH(6, 2), 'N', + MASH(13, 1), 'O', + MASH(3, 5), 'P', + MASH(1, 10), 'Q', + MASH(6, 2), 'R', + MASH(6, 2), 'S', + MASH(6, 2), 'T', + MASH(5, 3), 'U', + MASH(4, 4), 'V', + MASH(2, 8), 'Z', + // 0 /* TERMINATES ARRAY */ +}; // italian_table + +unsigned char spanish_table[] = { + // numTiles, tileValue ASCII value + MASH( 12, 1), 'A', + MASH( 2, 3), 'B', + MASH( 4, 3), 'C', + MASH( 1, 5), 1, /*'CH'*/ + MASH( 5, 2), 'D', + MASH( 12, 1), 'E', + MASH( 1, 4), 'F', + MASH( 2, 2), 'G', + MASH( 2, 4), 'H', + MASH( 6, 1), 'I', + MASH( 1, 8), 'J', + MASH( 4, 1), 'L', + MASH( 1, 8), 2, /*'LL'*/ + MASH( 2, 3), 'M', + MASH( 5, 1), 'N', + MASH( 1, 8), 209, /*'N~'*/ + MASH( 9, 1), 'O', + MASH( 2, 3), 'P', + MASH( 1, 5), 'Q', + MASH( 5, 1), 'R', + MASH( 1, 8), 3, /*'RR'*/ + MASH( 6, 1), 'S', + MASH( 4, 1), 'T', + MASH( 5, 1), 'U', + MASH( 1, 4), 'V', + MASH( 1, 8), 'X', + MASH( 1, 4), 'Y', + MASH( 1, 10), 'Z', + MASH( 2, 0), BLANK_FACE, /* BLANK1 */ +}; // spanish_table + +/* Test case that reverses char order and puts blank at the end, violating the + * sometimes-assumption that blank==0. */ +unsigned char hex_table[] = { + // numTiles, tileValue ASCII value + MASH(9, 1), 'A', + MASH(2, 3), 'B', + MASH(2, 3), 'C', + MASH(4, 2), 'D', + MASH(12, 1), 'E', + MASH(2, 4), 'F', + MASH(4, 0), BLANK_FACE, /* BLANK1 */ + //0 /* TERMINATES ARRAY */ +}; // hex_table + +unsigned char test_table[] = { + // numTiles, tileValue ASCII value + MASH(1, 1), 'A', + MASH(1, 5), 'B', + MASH(1, 10), 'E', + //0 /* TERMINATES ARRAY */ +}; // test_table + +//#define NONFILEARGS 3 +#define MAXSPECIALS 20 + +int main( int argc, char** argv ) { + char* lang = NULL; + // char* fileName; + char* facesFileName = NULL; + char* binaryFileName = NULL; + + FILE* facesFile; + FILE* binFile; + Xloc_header header; + unsigned char* table; + short tableLength = 0; + //char* name = ""; + short i; + short fileArgsUsed; + short offset; + int got; + + while ( (got = getopt(argc, argv, "l:O:T:h")) != EOF ) { + switch ( got ) { + case 'l': + lang = optarg; + break; + case 'O': + facesFileName = optarg; + break; + case 'T': + binaryFileName = optarg; + break; + case 'h': + default: + errexit( "Usage: xloc -l lang_code " + "-O tableOutfile -T valCountOutfile\n" ); + } + } + + fprintf( stderr, "binoutfile = %s\n", + binaryFileName?binaryFileName:"null" ); + fprintf( stderr, "facesFileName = %s\n", + facesFileName?facesFileName:"null" ); + fprintf( stderr, "lang = %s\n", lang ); + + header.padding = 0; + + if ( strcmp( lang, "en_US" ) == 0 ) { + table = US_english_table; + tableLength = sizeof(US_english_table); + header.langCodeFlags = US_ENGLISH; + } else if ( strcmp( lang, "sv_SE" ) == 0 ) { + table = swedish_table; + tableLength = sizeof(swedish_table); + header.langCodeFlags = SWEDISH_SWEDISH; + } else if ( strcmp( lang, "no_NO" ) == 0 ) { + table = norwegian_table; + tableLength = sizeof(norwegian_table); + header.langCodeFlags = NORWEGIAN_NORWEGIAN; + } else if ( strcmp( lang, "pl_PL" ) == 0 ) { + table = polish_table; + tableLength = sizeof(polish_table); + header.langCodeFlags = POLISH_POLISH; + } else if ( strcmp( lang, "fr_FR" ) == 0 ) { + table = french_table; + tableLength = sizeof(french_table); + header.langCodeFlags = FRENCH_FRENCH; + } else if ( strcmp( lang, "de_DE" ) == 0 ) { + table = german_table; + tableLength = sizeof(german_table); + header.langCodeFlags = GERMAN_GERMAN; + } else if ( strcmp( lang, "nl_NL" ) == 0 ) { + table = dutch_table; + tableLength = sizeof(dutch_table); + header.langCodeFlags = DUTCH_DUTCH; + } else if ( strcmp( lang, "it_IT" ) == 0 ) { + table = italian_table; + tableLength = sizeof(italian_table); + header.langCodeFlags = ITALIAN_ITALIAN; + } else if ( strcmp( lang, "es_ES" ) == 0 ) { + table = spanish_table; + tableLength = sizeof(spanish_table); + header.langCodeFlags = SPANISH_SPANISH; + } else if ( strcmp( lang, "hex" ) == 0 ) { + table = hex_table; + tableLength = sizeof(hex_table); + } else if ( strcmp( lang, "test" ) == 0 ) { + table = test_table; + tableLength = sizeof(test_table); + } else { + fprintf( stderr, "unknown language code %s\n", lang ); + exit(1); + } + + header.langCodeFlags |= 1< 0 ); + + for ( i = 0; i < tableLength; i += BYTES_PER_LETTER ) { + fprintf( facesFile, "%c", table[i+2] ); + } + + fclose( facesFile ); + } + + //////////////////////////////////////////////////// + // now the binary file + //////////////////////////////////////////////////// + if ( binaryFileName != NULL ) { + + binFile = fopen( binaryFileName, "w" ); + assert( binFile ); + + fwrite( &header, sizeof(header), 1, binFile ); + + // now write out the table, where header.specialCharStart is length + for ( i = 0; i < tableLength; i += BYTES_PER_LETTER ) { + fwrite( &table[i], sizeof(table[i])+sizeof(table[i+1]), 1, + binFile ); + } + + // record file sizes + + // write data with file size included + + // append the files themselves + + fclose( binFile ); + } + return 0; +} // main diff --git a/xwords4/xwords4/dawg/xloc.pl b/xwords4/xwords4/dawg/xloc.pl new file mode 100755 index 000000000..d811b51a1 --- /dev/null +++ b/xwords4/xwords4/dawg/xloc.pl @@ -0,0 +1,42 @@ +#!/usr/bin/perl + +# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# test and wrapper file for xloc.pm + +use strict; +use xloc; + + + +my $arg = shift(@ARGV); +my $lang = shift(@ARGV); +my $path = "./$lang"; +my $infoFile = "$path/info.txt"; + +die "info file $infoFile not found\n" if ! -s $infoFile; + + +my $xlocToken = xloc::ParseTileInfo($infoFile); + +if ( $arg eq "-t" ) { + xloc::WriteMapFile( $xlocToken, 0, \*STDOUT ); +} elsif ( $arg eq "-tn" ) { + xloc::WriteMapFile( $xlocToken, 1, \*STDOUT ); +} elsif ( $arg eq "-v" ) { + xloc::WriteValuesFile( $xlocToken, \*STDOUT ); +} diff --git a/xwords4/xwords4/dawg/xloc.pm b/xwords4/xwords4/dawg/xloc.pm new file mode 100644 index 000000000..729af26e4 --- /dev/null +++ b/xwords4/xwords4/dawg/xloc.pm @@ -0,0 +1,180 @@ +#!/usr/bin/perl + +# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# The idea here is that all that matters about a language is stored in +# one file (possibly excepting rules for prepping a dictionary). +# There's a list of tile faces, counts and values, and also some +# name-value pairs as needed. The pairs come first, and then a list +# of tiles. + +package xloc; + +use strict; +use warnings; + +BEGIN { + use Exporter (); + our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); + + $VERSION = 1.00; + + @ISA = qw(Exporter); + @EXPORT = qw(&ParseTileInfo &GetNTiles &TileFace &TileValue + &TileCount &GetValue &WriteMapFile &WriteValuesFile); + %EXPORT_TAGS = ( ); +} + +# Returns what's meant to be an opaque object that can be passed back +# for queries. It's a hash with name-value pairs and an _INFO entry +# containing a list of tile info lists. + +sub ParseTileInfo($) { + my ( $filePath ) = @_; + my %result; + + open INPUT, "<$filePath" or die "couldn't open $filePath"; + + my $inTiles = 0; + my @tiles; + while ( ) { + + chomp; + s/\#.*$//; + s/^\s*$//; # nuke all-white-space lines + next if !length; + + if ( $inTiles ) { + if ( // ) { + last; + } else { + my ( $count, $val, $face ) = m/^\s*(\w+)\s+(\w+)\s+(.*)\s*$/; + push @tiles, [ $count, $val, $face ]; + } + } elsif ( /\w:/ ) { + my ( $nam, $val ) = split ':', $_, 2; + $result{$nam} .= $val; + } elsif ( // ) { + $inTiles = 1; + } + + } + + close INPUT; + + $result{"_TILES"} = [ @tiles ]; + + return \%result; +} + +sub GetNTiles($) { + my ( $hashR ) = @_; + + my $listR = ${$hashR}{"_TILES"}; + + return 0 + @{$listR}; +} + +sub GetValue($$) { + my ( $hashR, $name ) = @_; + return ${$hashR}{$name}; +} + +sub WriteMapFile($$$) { + my ( $hashR, $unicode, $fhr ) = @_; + + my $packStr; + if ( $unicode ) { + $packStr = "n"; + } else { + $packStr = "C"; + } + + my $count = GetNTiles($hashR); + my $specialCount = 0; + for ( my $i = 0; $i < $count; ++$i ) { + my $tileR = GetNthTile( $hashR, $i ); + my $str = ${$tileR}[2]; + + if ( $str =~ /\'(.)\'/ ) { + print $fhr pack($packStr, ord($1) ); + } elsif ( $str =~ /\"(.+)\"/ ) { + print $fhr pack($packStr, $specialCount++ ); + } elsif ( $str =~ /(\d+)/ ) { + print $fhr pack( $packStr, chr($1) ); + } else { + die "WriteMapFile: unrecognized face format $str"; + } + } +} # WriteMapFile + +sub WriteValuesFile($$) { + my ( $hashR, $fhr ) = @_; + + my $header = GetValue( $hashR,"XLOC_HEADER" ); + die "no XLOC_HEADER found" if ! $header; + + print STDERR "header is $header\n"; + + print $fhr pack( "n", hex($header) ); + + my $count = GetNTiles($hashR); + for ( my $i = 0; $i < $count; ++$i ) { + my $tileR = GetNthTile( $hashR, $i ); + + print $fhr pack( "c", TileValue($tileR) ); + print $fhr pack( "c", TileCount($tileR) ); + } + +} # WriteValuesFile + +sub GetNthTile($$) { + my ( $hashR, $n ) = @_; + my $listR = ${$hashR}{"_TILES"}; + + return ${$listR}[$n]; +} + +sub TileFace($) { + my ( $tileR ) = @_; + + my $str = ${$tileR}[2]; + + if ( $str =~ /\'(.)\'/ ) { + return $1; + } elsif ( $str =~ /\"(.+)\"/ ) { + return $1; + } elsif ( $str =~ /(\d+)/ ) { + return chr($1); + } else { + die "TileFace: unrecognized face format: $str"; + } +} + +sub TileValue($) { + my ( $tileR ) = @_; + + return ${$tileR}[0]; +} + +sub TileCount($) { + my ( $tileR ) = @_; + + return ${$tileR}[1]; +} + +1; diff --git a/xwords4/xwords4/dawg/xwcommon3.h b/xwords4/xwords4/dawg/xwcommon3.h new file mode 100644 index 000000000..2f8e1f24f --- /dev/null +++ b/xwords4/xwords4/dawg/xwcommon3.h @@ -0,0 +1,132 @@ +// -*-mode: C; fill-column: 80; -*- + +/* + * Copyright 1997 by Eric House (fixin@peak.org). All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __XWCOMMON__ +#define __XWCOMMON__ + +//#include +// so sue me :-) +//#include "/home/pilot/usr/local/gnu/m68k-palmos-coff/include/PalmOS2/Hardware/Hardware.h" + +enum { BLANK=0, + EMPTY = 32, + DOUBLE_LETTER, + DOUBLE_WORD, + TRIPLE_LETTER, + TRIPLE_WORD +}; + + // a single-width diagonal stripe +#define PAT_DOUBLE_LETTER { 0x8844, 0x2211, 0x8844, 0x2211 } + // a double-width diagonal stripe +#define PAT_DOUBLE_WORD { 0xaa55, 0xaa55, 0xaa55, 0xaa55 } + // grey pattern (single-pixel checkboard) +#define PAT_TRIPLE_LETTER { 0xCC66, 0x3399, 0xCC66, 0x3399 } + // 2-pixel checkboard +#define PAT_TRIPLE_WORD { 0xCCCC, 0x3333, 0xCCCC, 0x3333 } + + +#define BOARD_RES_TYPE 'Xbrd' +#define TILES_RES_TYPE 'Xloc' +#define STRL_RES_TYPE 'StrL' +#define XW_STRL_RESOURCE_ID 1000 +// both the above resources use this ID +#define XW_CONFIGABLE_RESOURCE_ID 1001 + +#define BYTES_PER_LETTER 3 + +#define BLANK_FACE '\0' +//#define A_TILE 1 +#define A_TILE 0 + +#define MAX_NUM_TILES 110 +#define MAX_UNIQUE_TILES (32-A_TILE) +//b#define NUM_BLANKS 2 +#define MAX_NUM_BLANKS 4 + +/* language header: + * specialCharStart simply gives the number of bytes needed to skip beyond the + * standard tiles table to the first of the "special" entries. + * + * langCodeFlags is more ambitious. Each language I release will have an + * assigned code. The code has a least two purposes: to prevent viewing a game + * with the wrong language; and to tie a language to the dictionary that can be + * used by the computer player. + * + * For a dictionary and language (set of tile rules) to work together, the + * mapping of index to character must be in sync. In the German case, 0 must be + * A, 1 umlaut-A, etc. But the number of characters and values assigned each + * tile do not matter. Thus XWConfig can allow those aspects of a language to + * be edited. but if a user wants to add or delete a character in an "official" + * language XWConfig must disallow this, forcing him instead to "clone" the + * language to something whose offical flag will be cleared. + */ + +typedef struct Xloc_header { + //unsigned char specialCharStart; + unsigned char langCodeFlags; // can't do bitfields; gcc for pilot and x86 + // seem to generate different code + unsigned char padding; // ptrs to the shorts in Xloc_specialEntry + // will otherwise be odd +} Xloc_header; + +#define XLOC_LANG_MASK 0x80 // high bit is "official" +#define XLOC_LANG_OFFSET 7 +#define XLOC_OFFICIAL_MASK 0x7F // rest are for the enums below + +enum { + HOMEBREW = 0, + US_ENGLISH = 1, + FRENCH_FRENCH = 2, + GERMAN_GERMAN = 3, + DUTCH_DUTCH = 4, + ITALIAN_ITALIAN = 5, + SPANISH_SPANISH = 6, + SWEDISH_SWEDISH = 7, + POLISH_POLISH = 8, + NORWEGIAN_NORWEGIAN = 9, +}; + +/* "Special chars", added to support Spanish "LL" and "RR", replace + * the ascii character code in the Xloc charinfo array with an integer + * between 1 and 0X1F which is an index into an larger array appended + * to the charinfo array. Fields in the structs located in that array + * include the string to be used to represent the Tile when drawing in + * text (e.g. formatting for the Tile values dialog) and the IDs of + * resources holding bitmaps to be used when drawing tiles large + * (tray) and small (board). + */ +typedef struct Xloc_specialEntry { + unsigned char textVersion[4]; /* string can be up to 3 chars long */ + short hasLarge; + short hasSmall; +} Xloc_specialEntry; + + + + +/* #define LARGE_CH_BMP_ID 2000 */ +/* #define SMALL_CH_BMP_ID 2001 */ +/* #define LARGE_LL_BMP_ID 2002 */ +/* #define SMALL_LL_BMP_ID 2003 */ +/* #define LARGE_RR_BMP_ID 2004 */ +/* #define SMALL_RR_BMP_ID 2005 */ + +#endif