mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-28 07:58:08 +01:00
first checkin from personal project
This commit is contained in:
parent
4c17ed9705
commit
cec6a805e3
11 changed files with 1820 additions and 0 deletions
40
dawg/English/Makefile
Normal file
40
dawg/English/Makefile
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
# -*-mode: Makefile -*-
|
||||||
|
# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
TARGET_TYPE ?= FRANK
|
||||||
|
|
||||||
|
# This represents the default -- for now
|
||||||
|
COMMAND = -f Makefile.BasEnglish TARGET_TYPE=FRANK
|
||||||
|
|
||||||
|
alleng:
|
||||||
|
for mfile in Makefile.BasEnglish Makefile.OSW Makefile.TWL98 Makefile.CollegeEng; do \
|
||||||
|
$(MAKE) -f $$mfile TARGET_TYPE=$(TARGET_TYPE); \
|
||||||
|
done
|
||||||
|
|
||||||
|
%:
|
||||||
|
$(MAKE) $(COMMAND) $@
|
||||||
|
|
||||||
|
all:
|
||||||
|
$(MAKE) $(COMMAND)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(MAKE) $(COMMAND) clean
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "try make -f Makefile.[BasEnglish|CollegeEng] \\"
|
||||||
|
@echo " TARGET_TYPE=[PALM|FRANK]"
|
||||||
|
|
35
dawg/English/Makefile.BasEnglish
Normal file
35
dawg/English/Makefile.BasEnglish
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# -*-mode: Makefile -*-
|
||||||
|
# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
LANG=BasEnglish
|
||||||
|
LANGCODE=en_US
|
||||||
|
#NEWDAWG=1
|
||||||
|
|
||||||
|
TARGET_TYPE ?= FRANK
|
||||||
|
|
||||||
|
include ../Makefile.2to8
|
||||||
|
|
||||||
|
include ../Makefile.langcommon
|
||||||
|
|
||||||
|
$(LANG)Main.dict.gz: BasEnglish.dict.gz
|
||||||
|
ln -s $< $@
|
||||||
|
|
||||||
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
|
# "parent" Makefile.langcommon in the parent directory.
|
||||||
|
|
||||||
|
clean: clean_common
|
||||||
|
rm -f $(LANG)Main.dict.gz *.bin $(LANG)*.pdb $(LANG)*.seb
|
71
dawg/English/info.txt
Normal file
71
dawg/English/info.txt
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
LANGCODE:en_US
|
||||||
|
|
||||||
|
# deal with DOS files
|
||||||
|
LANGFILTER_PRECLIP: tr -d '\r' |
|
||||||
|
|
||||||
|
LANGFILTER_POSTCLIP: | tr [a-z] [A-Z]
|
||||||
|
LANGFILTER_POSTCLIP: | grep '^[A-Z]*$'
|
||||||
|
LANGFILTER_POSTCLIP: | tr -s '\n\r' '\000\000'
|
||||||
|
LANGFILTER_POSTCLIP: | sort -z
|
||||||
|
|
||||||
|
# We can trust sort (above) to do the right thing since there's no
|
||||||
|
# high ascii. dict2dawg.pl is much faster if I can trust that its
|
||||||
|
# input is in sorted order.
|
||||||
|
NEEDSSORT:false
|
||||||
|
|
||||||
|
LANGINFO: <p>English dictionaries can contain words with any of the 26
|
||||||
|
LANGINFO: letters you think of as making up the alphabet: A-Z. At
|
||||||
|
LANGINFO: this point any word in your list containing anything else
|
||||||
|
LANGINFO: will simply be excluded from the dictionary.</p>
|
||||||
|
|
||||||
|
# High bit means "official". Next 7 bits are an enum where
|
||||||
|
# English==1. Low byte is padding
|
||||||
|
XLOC_HEADER:0x8100
|
||||||
|
|
||||||
|
<BEGIN_TILES>
|
||||||
|
2 0 {"_"}
|
||||||
|
9 1 'A'
|
||||||
|
2 3 'B'
|
||||||
|
2 3 'C'
|
||||||
|
4 2 'D'
|
||||||
|
12 1 'E'
|
||||||
|
2 4 'F'
|
||||||
|
3 2 'G'
|
||||||
|
2 4 'H'
|
||||||
|
9 1 'I'
|
||||||
|
1 8 'J'
|
||||||
|
1 5 'K'
|
||||||
|
4 1 'L'
|
||||||
|
2 3 'M'
|
||||||
|
6 1 'N'
|
||||||
|
8 1 'O'
|
||||||
|
2 3 'P'
|
||||||
|
1 10 'Q'
|
||||||
|
6 1 'R'
|
||||||
|
4 1 'S'
|
||||||
|
6 1 'T'
|
||||||
|
4 1 'U'
|
||||||
|
2 4 'V'
|
||||||
|
2 4 'W'
|
||||||
|
1 8 'X'
|
||||||
|
2 4 'Y'
|
||||||
|
1 10 'Z'
|
||||||
|
<END_TILES>
|
||||||
|
|
||||||
|
# should ignore all after the <END> above
|
7
dawg/Makefile.2to8
Normal file
7
dawg/Makefile.2to8
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# -*-mode: Makefile -*-
|
||||||
|
|
||||||
|
# These are the targets that almost all language makefiles will want.
|
||||||
|
|
||||||
|
SHORT_WORD = 2
|
||||||
|
LONG_WORD = 8
|
||||||
|
|
257
dawg/Makefile.langcommon
Normal file
257
dawg/Makefile.langcommon
Normal file
|
@ -0,0 +1,257 @@
|
||||||
|
# -*-mode: Makefile -*-
|
||||||
|
|
||||||
|
# Copyright 2000-2002 by Eric House (fixin@peak.org)
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
FRANK_EXT = xwd
|
||||||
|
|
||||||
|
# this will make all dicts the new, larger type
|
||||||
|
#FORCE_4 = -force4
|
||||||
|
|
||||||
|
PALM_DICT_TYPE = DAWG
|
||||||
|
PAR = ../par.pl
|
||||||
|
|
||||||
|
LANGUAGE = $(shell basename $$(pwd))
|
||||||
|
|
||||||
|
#all: target_all
|
||||||
|
|
||||||
|
# let languages set this first, but we always add blank to it.
|
||||||
|
BLANK_INFO = "_" /dev/null /dev/null
|
||||||
|
|
||||||
|
# Supply a default so don't have to type so much; feel free to change
|
||||||
|
TARGET_TYPE ?= FRANK
|
||||||
|
|
||||||
|
ifdef NEWDAWG
|
||||||
|
TABLE_ARG = -mn
|
||||||
|
else
|
||||||
|
TABLE_ARG = -m
|
||||||
|
endif
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
# PalmOS rules
|
||||||
|
##############################################################################
|
||||||
|
ifeq ($(TARGET_TYPE),PALM)
|
||||||
|
|
||||||
|
ifdef NEWDAWG
|
||||||
|
PDBTYPE = Xwr4
|
||||||
|
else
|
||||||
|
PDBTYPE = Xwr3
|
||||||
|
endif
|
||||||
|
|
||||||
|
all: $(LANG)2to8.pdb
|
||||||
|
|
||||||
|
empty: $(LANG)0to0.pdb
|
||||||
|
|
||||||
|
# Those languages that have bitmap files for custom glyphs will need to
|
||||||
|
# define BMPBINFILES and perhaps provide a rule for building the files
|
||||||
|
binfiles.stamp: $(BMPBINFILES)
|
||||||
|
touch binfiles.stamp
|
||||||
|
|
||||||
|
palmspecials.bin: ../palm_mkspecials.pl $(BMPFILES)
|
||||||
|
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
|
||||||
|
|
||||||
|
# can't just use values.bin because the specials bitmap info is
|
||||||
|
# platform-specific
|
||||||
|
palmvalues.bin: values.bin palmspecials.bin
|
||||||
|
cat $^ > $@
|
||||||
|
|
||||||
|
# values.bin: palmspecials.bin ../xloc binfiles.stamp
|
||||||
|
# cd ../ && $(MAKE) xloc
|
||||||
|
# binfileparms=""; \
|
||||||
|
# if [ "$(BMPBINFILES)" != "" ]; then \
|
||||||
|
# for f in $(BMPBINFILES)""; \
|
||||||
|
# do binfileparms="$$binfileparms -i $$f"; \
|
||||||
|
# done; \
|
||||||
|
# fi; \
|
||||||
|
# ../xloc -l $(LANGCODE) $$binfileparms -T $@
|
||||||
|
# cat palmspecials.bin >> $@
|
||||||
|
|
||||||
|
# header (first record) is node count (long) and 4 chars:
|
||||||
|
# unsigned char firstEdgeRecNum;
|
||||||
|
# unsigned char charTableRecNum;
|
||||||
|
# unsigned char valTableRecNum;
|
||||||
|
# unsigned char reserved[3]; // worst case this points to a new resource
|
||||||
|
|
||||||
|
# include "flags" as used on the other platforms
|
||||||
|
palmheader%.bin: $(LANG)%_wordcount.bin $(LANG)%_flags.bin
|
||||||
|
rm -f $@
|
||||||
|
touch $@
|
||||||
|
ifdef NEWDAWG
|
||||||
|
cat $(LANG)$*_flags.bin >> $@
|
||||||
|
endif
|
||||||
|
cat $< >> $@
|
||||||
|
perl -e "print pack(\"C\",3)" >> $@ # first edge
|
||||||
|
perl -e "print pack(\"C\",1)" >> $@ # char table rec number
|
||||||
|
perl -e "print pack(\"C\",2)" >> $@ # valTable rec number
|
||||||
|
perl -e "print pack(\"CCC\",0)" >> $@ # reserved 3 bytes
|
||||||
|
perl -e "print pack(\"CC\",0)" >> $@ # c code added two more...
|
||||||
|
|
||||||
|
|
||||||
|
# This works, but leaves out the header info that the current version
|
||||||
|
# has. I'm not sure anybody cares, though...
|
||||||
|
$(LANG)%.pdb: dawg$(LANG)%.stamp table.bin palmvalues.bin palmheader%.bin
|
||||||
|
$(PAR) c -a backup $@ \
|
||||||
|
$(basename $(@F)) $(PALM_DICT_TYPE) $(PDBTYPE) \
|
||||||
|
palmheader$*.bin table.bin palmvalues.bin dawg$(LANG)$*_*.bin
|
||||||
|
|
||||||
|
# start=$$(echo $@ | sed -e 's/$(LANG)\([0-9]*\)to[0-9]*.pdb/\1/'); \
|
||||||
|
# end=$$(echo $@ | sed -e 's/$(LANG)[0-9]*to\([0-9]*\).pdb/\1/'); \
|
||||||
|
# zcat $< | grep "^.\{$${start},$${end}\}$$" | \
|
||||||
|
# ../dict2pdb -t table.bin -v values.bin -n $(basename $(@F)) \
|
||||||
|
# > $@
|
||||||
|
|
||||||
|
# the files to export for byod
|
||||||
|
byodbins: table.bin values.bin palmvalues.bin
|
||||||
|
|
||||||
|
#endif # TARGET_TYPE==PALM
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
# Franklin ebook rules
|
||||||
|
##############################################################################
|
||||||
|
else
|
||||||
|
ifeq ($(TARGET_TYPE),FRANK)
|
||||||
|
|
||||||
|
# If we're on a system that can build for Franklin, assume that's what
|
||||||
|
# we want to build (and the .xwd.saved [<-bug] file for other non-palm
|
||||||
|
# platforms is a by-product). But if the EBM tools aren't there, just
|
||||||
|
# build the .xwd file.
|
||||||
|
ifeq (x$(shell echo -n $$EBOOKMAN_SDK)x,xx)
|
||||||
|
all: $(LANG)2to8.xwd
|
||||||
|
else
|
||||||
|
all: $(LANG)2to8.seb
|
||||||
|
endif
|
||||||
|
|
||||||
|
empty: $(LANG)0to0.seb
|
||||||
|
|
||||||
|
ifneq (x$(shell echo -n $$EBOOKMAN_SDK)x,xx)
|
||||||
|
include ${EBOOKMAN_SDK}/ebsdk.uses
|
||||||
|
endif
|
||||||
|
|
||||||
|
# a binary file (one byte) giving the number of tiles in the dict
|
||||||
|
charcount.bin: table.bin
|
||||||
|
ifdef NEWDAWG
|
||||||
|
siz=$$(wc -c $< | sed -e 's/$<//'); \
|
||||||
|
perl -e "print pack(\"c\",$$siz/2)" > $@
|
||||||
|
else
|
||||||
|
siz=$$(wc -c $< | sed -e 's/$<//'); \
|
||||||
|
perl -e "print pack(\"c\",$$siz)" > $@
|
||||||
|
endif
|
||||||
|
|
||||||
|
# For each entry in the table whose face < 32, there needs to be a pair of
|
||||||
|
# pbitm files and a string giving the printing form
|
||||||
|
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
|
||||||
|
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
|
||||||
|
|
||||||
|
$(LANG)%.$(FRANK_EXT): dawg$(LANG)%.stamp $(LANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin
|
||||||
|
cat $(LANG)$*_flags.bin charcount.bin table.bin values.bin \
|
||||||
|
frankspecials.bin $(LANG)StartLoc.bin $$(ls dawg$(LANG)$*_*.bin) > $@
|
||||||
|
cp $@ saveme.bin
|
||||||
|
|
||||||
|
$(LANG)%.seb: $(LANG)%.$(FRANK_EXT) $(LANG)%.atts
|
||||||
|
${ESDK_CREATESEB_EXE} $<
|
||||||
|
cp $< $<.saved
|
||||||
|
|
||||||
|
$(LANG)%.atts: #recreate it each time based on params
|
||||||
|
echo '_PUB|global+read-only|"Eric_House"' >> $@
|
||||||
|
echo "_NAME|global+read-only|\"$(LANG)2to8\"" >> $@
|
||||||
|
echo "_EXT|global+read-only|\"$(FRANK_EXT)\"" >> $@
|
||||||
|
echo '_LCAT|nosign+global|"CONTENT"' >> $@
|
||||||
|
echo '_PERM|global+read-only|"r"' >> $@
|
||||||
|
|
||||||
|
# values.bin: ../xloc
|
||||||
|
# cd ../ && $(MAKE) xloc
|
||||||
|
# ../xloc -l $(LANGCODE) -T $@
|
||||||
|
|
||||||
|
# the files to export for byod
|
||||||
|
byodbins: table.bin values.bin frankspecials.bin
|
||||||
|
|
||||||
|
|
||||||
|
else
|
||||||
|
(Need to define TARGET_TYPE if get error pointing to this line)
|
||||||
|
endif #ifeq ($(TARGET_TYPE),FRANK)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq (s$(TARGET_TYPE),s)
|
||||||
|
It's an error not to specify a TARGET_TYPE
|
||||||
|
endif
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
# shared rules
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# For some reason I can't fathom dawg$(LANG)% gets nuked every time
|
||||||
|
# the top-level rule fires (all: for whatever TARGET_TYPE.) It
|
||||||
|
# happens after the rule finishes....
|
||||||
|
|
||||||
|
# 16 bits worth of flags for the start of the eventual file. At this
|
||||||
|
# point, the flags mean this:
|
||||||
|
# 1: old-style DAWG.
|
||||||
|
# 2: new-style DAWG, three bytes per node.
|
||||||
|
# 3: new-style DAWG, four bytes per node
|
||||||
|
$(LANG)%_flags.bin: dawg$(LANG)%.stamp
|
||||||
|
ifdef NEWDAWG
|
||||||
|
if [ 3 == $$(cat $(LANG)$*_nodesize.bin) ] ; \
|
||||||
|
then perl -e "print pack(\"n\",0x0002)" > $@; echo "flags=2"; \
|
||||||
|
else perl -e "print pack(\"n\",0x0003)" > $@; echo "flags=3"; \
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if [ 3 == $$(cat $(LANG)$*_nodesize.bin) ] ; \
|
||||||
|
then perl -e "print pack(\"n\",0x0001)" > $@; echo "flags=1"; \
|
||||||
|
else echo "ERROR: old format can't handle 4-byte"; exit 1; \
|
||||||
|
fi
|
||||||
|
endif
|
||||||
|
|
||||||
|
dawg$(LANG)%.stamp: $(LANG)Main.dict.gz ../dict2dawg.pl table.bin ../Makefile.langcommon
|
||||||
|
start=$$(echo $@ | sed -e 's/dawg$(LANG)\([0-9]*\)to[0-9]*.stamp/\1/'); \
|
||||||
|
end=$$(echo $@ | sed -e 's/dawg$(LANG)[0-9]*to\([0-9]*\).stamp/\1/'); \
|
||||||
|
echo $${start} and $$end; \
|
||||||
|
zcat $< | grep "^.\{$${start},$${end}\}$$" | tr '\n' '\0'| \
|
||||||
|
sort -z | ../dict2dawg.pl $(TABLE_ARG) table.bin -b 28000 \
|
||||||
|
-ob dawg$(LANG)$* \
|
||||||
|
-sn $(LANG)StartLoc.bin -k -term 0 -wc $(LANG)$*_wordcount.bin \
|
||||||
|
$(FORCE_4) -ns $(LANG)$*_nodesize.bin
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
$(LANG)%_wordcount.bin: dawg$(LANG)%.stamp
|
||||||
|
@echo
|
||||||
|
|
||||||
|
# the files to export for byod
|
||||||
|
allbins:
|
||||||
|
$(MAKE) TARGET_TYPE=PALM byodbins
|
||||||
|
$(MAKE) TARGET_TYPE=FRANK byodbins
|
||||||
|
|
||||||
|
table.bin: ../xloc.pl
|
||||||
|
ifdef NEWDAWG
|
||||||
|
perl -I../ ../xloc.pl -tn > $@
|
||||||
|
else
|
||||||
|
perl -I../ ../xloc.pl -t > $@
|
||||||
|
endif
|
||||||
|
|
||||||
|
values.bin: ../xloc.pl
|
||||||
|
perl -I../ ../xloc.pl -v > $@
|
||||||
|
|
||||||
|
%.dict: %.dict.gz
|
||||||
|
zcat $< > $@
|
||||||
|
|
||||||
|
clean_common:
|
||||||
|
rm -f $(LANG)Main.dict *.bin *.pdb *.seb dawg*.stamp *.$(FRANK_EXT) \
|
||||||
|
$(LANG)*.pdb $(LANG)*.seb
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "make TARGET_TYPE=[FRANK|PALM]"
|
||||||
|
|
||||||
|
test:
|
||||||
|
@echo $(LANGUAGE)
|
798
dawg/dict2dawg.pl
Executable file
798
dawg/dict2dawg.pl
Executable file
|
@ -0,0 +1,798 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
# adapted from C++ code Copyright (C) 2000 Falk Hueffner
|
||||||
|
# This version Copyright (C) 2002 Eric House (fixin@peak.org)
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
# General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
||||||
|
# USA
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# inputs: 0. Name of file mapping letters to 0..31 values. In English
|
||||||
|
# case just contains A..Z. This will be used to translate the tries
|
||||||
|
# on output.
|
||||||
|
# 1. Max number of bytes per binary output file.
|
||||||
|
#
|
||||||
|
# 2. Basename of binary files for output.
|
||||||
|
|
||||||
|
# 3. Name of file to which to write the number of the
|
||||||
|
# startNode, since I'm not rewriting a bunch of code to expect Falk's
|
||||||
|
# '*' node at the start.
|
||||||
|
#
|
||||||
|
|
||||||
|
# In STDIN, the text file to be compressed. It absolutely
|
||||||
|
# must be sorted. The sort doesn't have to follow the order in the
|
||||||
|
# map file, however.
|
||||||
|
|
||||||
|
# This is meant eventually to be runnable as part of a cgi system for
|
||||||
|
# letting users generating Crosswords dicts online.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use POSIX;
|
||||||
|
|
||||||
|
my $gFirstDiff;
|
||||||
|
my @gCurrentWord;
|
||||||
|
my $gCurrentWord; # save so can check for sortedness
|
||||||
|
my $gDone = 0;
|
||||||
|
my @gInputStrings;
|
||||||
|
my $gNeedsSort = 1; # read from cmd line eventually
|
||||||
|
my @gNodes; # final array of nodes
|
||||||
|
my $gNBytesPerOutfile = 0xFFFFFFFF;
|
||||||
|
my $gTableFile;
|
||||||
|
my $gOutFileBase;
|
||||||
|
my $gStartNodeOut;
|
||||||
|
my $gInFileName;
|
||||||
|
my $gKillIfMissing = 1;
|
||||||
|
my $gTermChar = '/n';
|
||||||
|
my $gDumpText = 0; # dump the dict as text after?
|
||||||
|
my $gCountFile;
|
||||||
|
my $gBytesPerNodeFile; # where to write whether node size 3 or 4
|
||||||
|
my $gWordCount = 0;
|
||||||
|
my %gTableHash;
|
||||||
|
my @gRevMap;
|
||||||
|
my $debug = 0;
|
||||||
|
my %gSubsHash;
|
||||||
|
my $gForceFour = 0; # use four bytes regardless of need?
|
||||||
|
my $gNBytesPerNode;
|
||||||
|
my $gUseUnicode;
|
||||||
|
|
||||||
|
main();
|
||||||
|
|
||||||
|
exit;
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
sub main() {
|
||||||
|
|
||||||
|
if ( !parseARGV() ) {
|
||||||
|
usage();
|
||||||
|
exit();
|
||||||
|
}
|
||||||
|
|
||||||
|
makeTableHash();
|
||||||
|
|
||||||
|
my $infile;
|
||||||
|
|
||||||
|
if ( $gInFileName ) {
|
||||||
|
open $infile, "<$gInFileName";
|
||||||
|
} else {
|
||||||
|
$infile = \*STDIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
@gInputStrings = parseAndSort( $infile );
|
||||||
|
if ( $gInFileName ) {
|
||||||
|
close $infile;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Do I need this stupid thing? Better to move the first row to
|
||||||
|
# the front of the array and patch everything else. Or fix the
|
||||||
|
# non-palm dictionary format to include the offset of the first
|
||||||
|
# node.
|
||||||
|
|
||||||
|
my $dummyNode = 0xFFFFFFFF;
|
||||||
|
@gNodes = ( $dummyNode );
|
||||||
|
|
||||||
|
readNextWord();
|
||||||
|
|
||||||
|
my $firstRootChildOffset = buildNode(0);
|
||||||
|
|
||||||
|
moveTopToFront( \$firstRootChildOffset );
|
||||||
|
|
||||||
|
if ( $gStartNodeOut ) {
|
||||||
|
writeOutStartNode( $gStartNodeOut, $firstRootChildOffset );
|
||||||
|
}
|
||||||
|
|
||||||
|
print STDERR "\n... dumping table ...\n" if $debug;
|
||||||
|
printNodes( \@gNodes, "done with main" ) if $debug;
|
||||||
|
|
||||||
|
# write out the number of nodes if requested
|
||||||
|
if ( $gCountFile ) {
|
||||||
|
open OFILE, "> $gCountFile";
|
||||||
|
print OFILE pack( "N", $gWordCount );
|
||||||
|
close OFILE;
|
||||||
|
print STDERR "wrote out: got $gWordCount words\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $gOutFileBase ) {
|
||||||
|
emitNodes( $gNBytesPerOutfile, $gOutFileBase );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $gDumpText && @gNodes > 0 ) {
|
||||||
|
printOneLevel( $firstRootChildOffset, "" );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $gBytesPerNodeFile ) {
|
||||||
|
open OFILE, "> $gBytesPerNodeFile";
|
||||||
|
print OFILE $gNBytesPerNode;
|
||||||
|
close OFILE;
|
||||||
|
}
|
||||||
|
print STDERR "Used $gNBytesPerNode per node.\n";
|
||||||
|
} # main
|
||||||
|
|
||||||
|
# We now have an array of nodes with the last subarray being the
|
||||||
|
# logical top of the tree. Move them to the start, fixing all fco
|
||||||
|
# refs, so that legacy code like Palm can assume top==0.
|
||||||
|
#
|
||||||
|
# Note: It'd probably be a bit faster to integrate this with emitNodes
|
||||||
|
# -- unless I need to have an in-memory list that can be used for
|
||||||
|
# lookups. But that's best for debugging, so keep it this way for now.
|
||||||
|
#
|
||||||
|
# Also Note: the first node is a dummy that can and should be tossed
|
||||||
|
# now.
|
||||||
|
|
||||||
|
sub moveTopToFront($) {
|
||||||
|
my ( $firstRef ) = @_;
|
||||||
|
|
||||||
|
my $firstChild = ${$firstRef};
|
||||||
|
${$firstRef} = 0;
|
||||||
|
my @lastSub;
|
||||||
|
|
||||||
|
if ( $firstChild > 0 ) {
|
||||||
|
# remove the last (the root) subarray
|
||||||
|
@lastSub = splice( @gNodes, $firstChild );
|
||||||
|
} else {
|
||||||
|
die "there should be no words!!" if $gWordCount != 0;
|
||||||
|
}
|
||||||
|
# remove the first (garbage) node
|
||||||
|
shift @gNodes;
|
||||||
|
|
||||||
|
my $diff;
|
||||||
|
if ( $firstChild > 0 ) {
|
||||||
|
# -1 because all move down by 1; see prev line
|
||||||
|
$diff = @lastSub - 1;
|
||||||
|
die "something wrong with len\n" if $diff < 0;
|
||||||
|
} else {
|
||||||
|
$diff = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
# stick it on the front
|
||||||
|
splice( @gNodes, 0, 0, @lastSub);
|
||||||
|
|
||||||
|
# We add $diff to everything. There's no subtracting because
|
||||||
|
# nobody had any refs to the top list.
|
||||||
|
|
||||||
|
for ( my $i = 0; $i < @gNodes; ++$i ) {
|
||||||
|
my $fco = TrieNodeGetFirstChildOffset( $gNodes[$i] );
|
||||||
|
if ( $fco != 0 ) { # 0 means NONE, not 0th!!
|
||||||
|
TrieNodeSetFirstChildOffset( \$gNodes[$i], $fco+$diff );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} # moveTopToFront
|
||||||
|
|
||||||
|
|
||||||
|
sub buildNode {
|
||||||
|
my ( $depth ) = @_;
|
||||||
|
|
||||||
|
if ( @gCurrentWord == $depth ) {
|
||||||
|
# End of word reached. If the next word isn't a continuation
|
||||||
|
# of the current one, then we've reached the bottom of the
|
||||||
|
# recursion tree.
|
||||||
|
readNextWord();
|
||||||
|
if ($gFirstDiff < $depth || $gDone) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
my @newedges;
|
||||||
|
|
||||||
|
do {
|
||||||
|
my $letter = $gCurrentWord[$depth];
|
||||||
|
my $isTerminal = @gCurrentWord - 1 == $depth ? 1:0;
|
||||||
|
|
||||||
|
my $nodeOffset = buildNode($depth+1);
|
||||||
|
my $newNode = MakeTrieNode($letter, $isTerminal, $nodeOffset);
|
||||||
|
push( @newedges, $newNode );
|
||||||
|
|
||||||
|
} while ( ($gFirstDiff == $depth) && !$gDone);
|
||||||
|
|
||||||
|
TrieNodeSetIsLastSibling( \@newedges[@newedges-1], 1 );
|
||||||
|
|
||||||
|
return addNodes( \@newedges );
|
||||||
|
} # buildNode
|
||||||
|
|
||||||
|
sub addNodes {
|
||||||
|
my ( $newedgesR ) = @_;
|
||||||
|
|
||||||
|
my $found = findSubArray( $newedgesR );
|
||||||
|
|
||||||
|
if ( $found >= 0 ) {
|
||||||
|
die "0 is an invalid match!!!" if $found == 0;
|
||||||
|
return $found;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
my $firstFreeIndex = @gNodes;
|
||||||
|
|
||||||
|
print STDERR "adding...\n" if $debug;
|
||||||
|
printNodes( $newedgesR ) if $debug;
|
||||||
|
|
||||||
|
push @gNodes, (@{$newedgesR});
|
||||||
|
|
||||||
|
registerSubArray( $newedgesR, $firstFreeIndex );
|
||||||
|
return $firstFreeIndex;
|
||||||
|
}
|
||||||
|
} # addNodes
|
||||||
|
|
||||||
|
sub printNode {
|
||||||
|
my ( $index, $node ) = @_;
|
||||||
|
|
||||||
|
print STDERR "[$index] ";
|
||||||
|
|
||||||
|
printf( STDERR
|
||||||
|
"letter=%d; isTerminal=%d; isLastSib=%d; fco=%d;\n",
|
||||||
|
TrieNodeGetLetter($node),
|
||||||
|
TrieNodeGetIsTerminal($node),
|
||||||
|
TrieNodeGetIsLastSibling($node),
|
||||||
|
TrieNodeGetFirstChildOffset($node));
|
||||||
|
} # printNode
|
||||||
|
|
||||||
|
sub printNodes {
|
||||||
|
my ( $nodesR, $name ) = @_;
|
||||||
|
|
||||||
|
my $len = @{$nodesR};
|
||||||
|
# print "printNodes($name): len = $len\n";
|
||||||
|
|
||||||
|
for ( my $i = 0; $i < $len; ++$i ) {
|
||||||
|
my $node = ${$nodesR}[$i];
|
||||||
|
printNode( $i, $node );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Hashing. We'll keep a hash of offsets into the existing nodes
|
||||||
|
# array, and as the key use a string that represents the entire sub
|
||||||
|
# array. Since the key is what we're matching for, there should never
|
||||||
|
# be more than one value per hash and so we don't need buckets.
|
||||||
|
# Return -1 if there's no match.
|
||||||
|
|
||||||
|
sub findSubArray {
|
||||||
|
my ( $newedgesR ) = @_;
|
||||||
|
|
||||||
|
my $key = join('', @{$newedgesR});
|
||||||
|
|
||||||
|
if ( exists( $gSubsHash{$key} ) ) {
|
||||||
|
return $gSubsHash{$key};
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} # findSubArray
|
||||||
|
|
||||||
|
# add to the hash
|
||||||
|
sub registerSubArray {
|
||||||
|
my ( $edgesR, $nodeLoc ) = @_;
|
||||||
|
|
||||||
|
my $key = join( '', @{$edgesR} );
|
||||||
|
|
||||||
|
if ( exists $gSubsHash{$key} ) {
|
||||||
|
die "entry for key shouldn't exist!!";
|
||||||
|
} else {
|
||||||
|
$gSubsHash{$key} = $nodeLoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
} # registerSubArray
|
||||||
|
|
||||||
|
sub readNextWord() {
|
||||||
|
my @word;
|
||||||
|
|
||||||
|
if ( !$gDone ) {
|
||||||
|
$gDone = @gInputStrings == 0;
|
||||||
|
if ( !$gDone ) {
|
||||||
|
@word = @{shift @gInputStrings};
|
||||||
|
} else {
|
||||||
|
print STDERR "gDone set to true\n" if $debug;
|
||||||
|
}
|
||||||
|
|
||||||
|
print STDERR "got word: ", join(',',@word), "\n" if $debug;
|
||||||
|
}
|
||||||
|
my $numCommonLetters = 0;
|
||||||
|
my $len = @word;
|
||||||
|
if ( @gCurrentWord < $len ) {
|
||||||
|
$len = @gCurrentWord;
|
||||||
|
}
|
||||||
|
|
||||||
|
while ( @gCurrentWord[$numCommonLetters] eq @word[$numCommonLetters]
|
||||||
|
&& $numCommonLetters < $len) {
|
||||||
|
++$numCommonLetters;
|
||||||
|
}
|
||||||
|
|
||||||
|
$gFirstDiff = $numCommonLetters;
|
||||||
|
die "words ", join(",",@gCurrentWord), " and ", join(",", @word), " out of order" if #$debug &&
|
||||||
|
@gCurrentWord > 0 && @word > 0
|
||||||
|
&& !firstBeforeSecond( \@gCurrentWord, \@word );
|
||||||
|
@gCurrentWord = @word;
|
||||||
|
} # readNextWord
|
||||||
|
|
||||||
|
sub firstBeforeSecond {
|
||||||
|
my ( $firstR, $secondR ) = @_;
|
||||||
|
|
||||||
|
for ( my $i = 0; ; ++$i ) {
|
||||||
|
|
||||||
|
# if we reach the end of the first word/list, we're done.
|
||||||
|
if ( $i == @{$firstR} ) {
|
||||||
|
die "duplicate!!!" if $i == @{$secondR};
|
||||||
|
return 1;
|
||||||
|
# but if we reach the second end first, we've failed
|
||||||
|
} elsif ( $i == @{$secondR} ) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $diff = ${$firstR}[$i] <=> ${$secondR}[$i];
|
||||||
|
|
||||||
|
if ( $diff == 0 ) {
|
||||||
|
next;
|
||||||
|
} else {
|
||||||
|
return $diff < 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} # firstBeforeSecond
|
||||||
|
|
||||||
|
# passed to sort. Should remain unprototyped for effeciency's sake
|
||||||
|
|
||||||
|
sub cmpWords {
|
||||||
|
|
||||||
|
my $lenA = @{$a};
|
||||||
|
my $lenB = @{$b};
|
||||||
|
my $min = $lenA > $lenB? $lenB: $lenA;
|
||||||
|
|
||||||
|
for ( my $i = 0; $i < $min; ++$i ) {
|
||||||
|
my $ac = ${$a}[$i];
|
||||||
|
my $bc = ${$b}[$i];
|
||||||
|
|
||||||
|
my $res = $ac <=> $bc;
|
||||||
|
|
||||||
|
if ( $res != 0 ) {
|
||||||
|
return $res; # we're done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# If we got here, they match up to their common length. Longer is
|
||||||
|
# greater.
|
||||||
|
my $res = @{$a} <=> @{$b};
|
||||||
|
return $res; # which is longer?
|
||||||
|
} # cmpWords
|
||||||
|
|
||||||
|
sub parseAndSort() {
|
||||||
|
my ( $infile ) = @_;
|
||||||
|
|
||||||
|
my @wordlist;
|
||||||
|
my @word;
|
||||||
|
|
||||||
|
WORDLOOP:
|
||||||
|
for ( ; ; ) {
|
||||||
|
|
||||||
|
my $dropWord = 0;
|
||||||
|
splice @word; # empty it
|
||||||
|
|
||||||
|
# for each byte
|
||||||
|
for ( ; ; ) {
|
||||||
|
my $byt = getc($infile);
|
||||||
|
|
||||||
|
if ( $byt eq undef ) {
|
||||||
|
last WORDLOOP;
|
||||||
|
} elsif ( $byt eq $gTermChar ) {
|
||||||
|
if ( !$dropWord ) {
|
||||||
|
push @wordlist, [ @word ];
|
||||||
|
++$gWordCount;
|
||||||
|
}
|
||||||
|
next WORDLOOP;
|
||||||
|
} elsif ( exists( $gTableHash{$byt} ) ) {
|
||||||
|
if ( !$dropWord ) {
|
||||||
|
push @word, $gTableHash{$byt};
|
||||||
|
die "word too long" if @word > 15;
|
||||||
|
}
|
||||||
|
} elsif ($gKillIfMissing) {
|
||||||
|
die "char $byt (", $byt+0, ") not in map file $gTableFile\n";
|
||||||
|
} else {
|
||||||
|
$dropWord = 1;
|
||||||
|
splice @word; # lose anything we already have
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $gNeedsSort && ($gWordCount > 0) ) {
|
||||||
|
@wordlist = sort cmpWords @wordlist;
|
||||||
|
}
|
||||||
|
|
||||||
|
print STDERR "length of list is ", @wordlist + 0, ".\n" if $debug;
|
||||||
|
|
||||||
|
return @wordlist;
|
||||||
|
} # parseAndSort
|
||||||
|
|
||||||
|
# Print binary representation of trie array. This isn't used yet, but
|
||||||
|
# eventually it'll want to dump to multiple files appropriate for Palm
|
||||||
|
# that can be catenated together on other platforms. There'll need to
|
||||||
|
# be a file giving the offset of the first node too. Also, might want
|
||||||
|
# to move to 4-byte representation when the input can't otherwise be
|
||||||
|
# handled.
|
||||||
|
|
||||||
|
sub dumpNodes {
|
||||||
|
|
||||||
|
for ( my $i = 0; $i < @gNodes; ++$i ) {
|
||||||
|
my $node = $gNodes[$i];
|
||||||
|
my $bstr = pack( "I", $node );
|
||||||
|
print STDOUT $bstr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
# Little node-field setters and getters to hide what bits represent
|
||||||
|
# what.
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
sub TrieNodeSetIsTerminal {
|
||||||
|
my ( $nodeR, $isTerminal ) = @_;
|
||||||
|
|
||||||
|
if ( $isTerminal ) {
|
||||||
|
${$nodeR} |= 1 << 31;
|
||||||
|
} else {
|
||||||
|
${$nodeR} &= ~(1 << 31);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TrieNodeGetIsTerminal {
|
||||||
|
my ( $node ) = @_;
|
||||||
|
return ($node & 1 << 31) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TrieNodeSetIsLastSibling {
|
||||||
|
my ( $nodeR, $isLastSibling ) = @_;
|
||||||
|
if ( $isLastSibling ) {
|
||||||
|
${$nodeR} |= 1 << 30;
|
||||||
|
} else {
|
||||||
|
${$nodeR} &= ~(1 << 30);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TrieNodeGetIsLastSibling {
|
||||||
|
my ( $node ) = @_;
|
||||||
|
return ($node & 1 << 30) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TrieNodeSetLetter {
|
||||||
|
my ( $nodeR, $letter ) = @_;
|
||||||
|
|
||||||
|
die "letter ", $letter, " too big" if $letter >= 32;
|
||||||
|
|
||||||
|
my $mask = ~(0x1F << 25);
|
||||||
|
${$nodeR} &= $mask; # clear all the bits
|
||||||
|
${$nodeR} |= ($letter << 25); # set new ones
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TrieNodeGetLetter {
|
||||||
|
my ( $node ) = @_;
|
||||||
|
$node >>= 25;
|
||||||
|
$node &= 0x1F;
|
||||||
|
return $node;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TrieNodeSetFirstChildOffset {
|
||||||
|
my ( $nodeR, $fco ) = @_;
|
||||||
|
|
||||||
|
die "$fco larger than 25 bits" if ($fco & 0xFE000000) != 0;
|
||||||
|
|
||||||
|
my $mask = ~0x01FFFFFF;
|
||||||
|
${$nodeR} &= $mask; # clear all the bits
|
||||||
|
${$nodeR} |= $fco; # set new ones
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TrieNodeGetFirstChildOffset {
|
||||||
|
my ( $node ) = @_;
|
||||||
|
$node &= 0x01FFFFFF; # 24 bits
|
||||||
|
return $node;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub MakeTrieNode {
|
||||||
|
my ( $letter, $isTerminal, $firstChildOffset, $isLastSibling ) = @_;
|
||||||
|
my $result = 0;
|
||||||
|
|
||||||
|
TrieNodeSetIsTerminal( \$result, $isTerminal );
|
||||||
|
TrieNodeSetIsLastSibling( \$result, $isLastSibling );
|
||||||
|
TrieNodeSetLetter( \$result, $letter );
|
||||||
|
TrieNodeSetFirstChildOffset( \$result, $firstChildOffset );
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
} # MakeTrieNode
|
||||||
|
|
||||||
|
# Caller may need to know the offset of the first top-level node.
|
||||||
|
# Write it here.
|
||||||
|
sub writeOutStartNode {
|
||||||
|
my ( $startNodeOut, $firstRootChildOffset ) = @_;
|
||||||
|
|
||||||
|
open NODEOUT, ">$startNodeOut";
|
||||||
|
print NODEOUT pack( "N", $firstRootChildOffset );
|
||||||
|
close NODEOUT;
|
||||||
|
} # writeOutStartNode
|
||||||
|
|
||||||
|
# build the hash for translating. I'm using a hash assuming it'll be
|
||||||
|
# fast. Key is the letter; value is the 0..31 value to be output.
|
||||||
|
sub makeTableHash {
|
||||||
|
my $i;
|
||||||
|
open TABLEFILE, "< $gTableFile";
|
||||||
|
|
||||||
|
splice @gRevMap; # empty it
|
||||||
|
|
||||||
|
for ( $i = 0; ; ++$i ) {
|
||||||
|
my $ch = getc(TABLEFILE);
|
||||||
|
if ( $ch eq undef ) {
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $gUseUnicode ) { # skip the first byte each time: tmp HACK!!!
|
||||||
|
$ch = getc(TABLEFILE);
|
||||||
|
}
|
||||||
|
if ( $ch eq undef ) {
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
|
||||||
|
push @gRevMap, $ch;
|
||||||
|
|
||||||
|
if ( ord($ch) == 0 ) { # blank
|
||||||
|
next; # we want to increment i when blank seen since
|
||||||
|
# it is a tile value
|
||||||
|
}
|
||||||
|
|
||||||
|
die "$gTableFile too large\n" if $i > 32;
|
||||||
|
die "only blank (0) can be 32nd char\n" if ($i == 32 && $ch != 0);
|
||||||
|
|
||||||
|
$gTableHash{$ch} = $i;
|
||||||
|
}
|
||||||
|
|
||||||
|
close TABLEFILE;
|
||||||
|
} # makeTableHash
|
||||||
|
|
||||||
|
# emitNodes. "input" is $gNodes. From it we write up to
|
||||||
|
# $nBytesPerOutfile to files named $outFileBase0..n, mapping the
|
||||||
|
# letter field down to 5 bits with a hash built from $tableFile. If
|
||||||
|
# at any point we encounter a letter not in the hash we fail with an
|
||||||
|
# error.
|
||||||
|
|
||||||
|
sub emitNodes($$) {
|
||||||
|
my ( $gNBytesPerOutfile, $outFileBase ) = @_;
|
||||||
|
|
||||||
|
# now do the emit.
|
||||||
|
|
||||||
|
# is 17 bits enough?
|
||||||
|
printf STDOUT ("There are %d (0x%x) nodes in this DAWG.\n",
|
||||||
|
0 + @gNodes, 0 + @gNodes );
|
||||||
|
if ( @gNodes > 0x1FFFF || $gForceFour ) {
|
||||||
|
$gNBytesPerNode = 4;
|
||||||
|
} else {
|
||||||
|
$gNBytesPerNode = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $nextIndex = 0;
|
||||||
|
my $nextFileNum = 0;
|
||||||
|
|
||||||
|
for ( $nextFileNum = 0; ; ++$nextFileNum ) {
|
||||||
|
|
||||||
|
if ( $nextIndex >= @gNodes ) {
|
||||||
|
last; # we're done
|
||||||
|
}
|
||||||
|
|
||||||
|
die "Too many outfiles; infinite loop?" if $nextFileNum > 99;
|
||||||
|
|
||||||
|
my $outName = sprintf("${outFileBase}_%03d.bin", $nextFileNum);
|
||||||
|
open OUTFILE, "> $outName";
|
||||||
|
my $curSize = 0;
|
||||||
|
|
||||||
|
while ( $nextIndex < @gNodes ) {
|
||||||
|
|
||||||
|
# scan to find the next terminal
|
||||||
|
my $i;
|
||||||
|
for ( $i = $nextIndex;
|
||||||
|
!TrieNodeGetIsLastSibling($gNodes[$i]);
|
||||||
|
++$i ) {
|
||||||
|
|
||||||
|
# do nothing but a sanity check
|
||||||
|
if ( $i >= @gNodes) {
|
||||||
|
die "bad trie format: last node not last sibling" ;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
++$i; # move beyond the terminal
|
||||||
|
my $nextSize = ($i - $nextIndex) * $gNBytesPerNode;
|
||||||
|
if ($curSize + $nextSize > $gNBytesPerOutfile) {
|
||||||
|
last;
|
||||||
|
} else {
|
||||||
|
# emit the subarray
|
||||||
|
while ( $nextIndex < $i ) {
|
||||||
|
outputNode( $gNodes[$nextIndex], $gNBytesPerNode,
|
||||||
|
\*OUTFILE );
|
||||||
|
++$nextIndex;
|
||||||
|
}
|
||||||
|
$curSize += $nextSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close OUTFILE;
|
||||||
|
}
|
||||||
|
|
||||||
|
} # emitNodes
|
||||||
|
|
||||||
|
sub printWord {
|
||||||
|
my ( $str ) = @_;
|
||||||
|
|
||||||
|
print STDERR "$str\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
# print out the entire dictionary, as text, to STDERR.
|
||||||
|
|
||||||
|
sub printOneLevel {
|
||||||
|
|
||||||
|
my ( $index, $str ) = @_;
|
||||||
|
|
||||||
|
for ( ; ; ) {
|
||||||
|
|
||||||
|
my $newStr = $str;
|
||||||
|
my $node = $gNodes[$index++];
|
||||||
|
|
||||||
|
my $lindx = $gRevMap[TrieNodeGetLetter($node)];
|
||||||
|
|
||||||
|
if ( ord($lindx) >= 0x20 ) {
|
||||||
|
$newStr .= "$lindx";
|
||||||
|
} else {
|
||||||
|
print STDERR "sub space" if $debug;
|
||||||
|
$newStr .= "\\" . chr('0'+$lindx);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( TrieNodeGetIsTerminal($node) ) {
|
||||||
|
printWord( $newStr );
|
||||||
|
}
|
||||||
|
|
||||||
|
my $fco = TrieNodeGetFirstChildOffset( $node );
|
||||||
|
if ( $fco != 0 ) {
|
||||||
|
printOneLevel( $fco, $newStr );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( TrieNodeGetIsLastSibling($node) ) {
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub outputNode {
|
||||||
|
my ( $node, $nBytes, $outfile ) = @_;
|
||||||
|
|
||||||
|
my $fco = TrieNodeGetFirstChildOffset($node);
|
||||||
|
my $fourthByte;
|
||||||
|
|
||||||
|
if ( $nBytes == 4 ) {
|
||||||
|
$fourthByte = $fco >> 17;
|
||||||
|
die "fco too big" if $fourthByte > 0xFF;
|
||||||
|
$fco &= 0x1FFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
# format according to dawg.h:
|
||||||
|
# typedef struct array_edge {
|
||||||
|
# unsigned char highByte;
|
||||||
|
# unsigned char lowByte;
|
||||||
|
# unsigned char bits;
|
||||||
|
#ifdef FOUR_BYTE
|
||||||
|
# unsigned char moreBits;
|
||||||
|
#endif
|
||||||
|
# } array_edge;
|
||||||
|
|
||||||
|
# define LETTERMASK 0x1f
|
||||||
|
# define ACCEPTINGMASK 0x20
|
||||||
|
# define LASTEDGEMASK 0x40
|
||||||
|
# define LASTBITMASK 0x80
|
||||||
|
|
||||||
|
# write the fco (less that one bit). We want two bytes worth
|
||||||
|
# in three-byte mode, and three in four-byte mode (which is
|
||||||
|
# untested)
|
||||||
|
for ( my $i = 1; $i >= 0; --$i ) {
|
||||||
|
my $tmp = ($fco >> ($i * 8)) & 0xFF;
|
||||||
|
print $outfile pack( "C", $tmp );
|
||||||
|
}
|
||||||
|
$fco >>= 16; # it should now be 1 or 0
|
||||||
|
die "fco not 1 or 0" if $fco > 1;
|
||||||
|
|
||||||
|
my $chIn5 = TrieNodeGetLetter($node);
|
||||||
|
my $bits = $chIn5;
|
||||||
|
|
||||||
|
if ( TrieNodeGetIsLastSibling($node) ) {
|
||||||
|
$bits |= 0x40;
|
||||||
|
}
|
||||||
|
if ( TrieNodeGetIsTerminal($node) ) {
|
||||||
|
$bits |= 0x20;
|
||||||
|
}
|
||||||
|
if ( $fco != 0 ) {
|
||||||
|
$bits |= 0x80;
|
||||||
|
}
|
||||||
|
print $outfile pack( "C", $bits );
|
||||||
|
|
||||||
|
# the final byte, if in use
|
||||||
|
if ( $nBytes == 4 ) {
|
||||||
|
print $outfile pack( "C", $fourthByte );
|
||||||
|
}
|
||||||
|
} # outputNode
|
||||||
|
|
||||||
|
sub usage {
|
||||||
|
print STDERR "usage: $0 \n"
|
||||||
|
. "\t[-b bytesPerFile] (default = 0xFFFFFFFF)\n"
|
||||||
|
. "\t-m mapFile\n"
|
||||||
|
. "\t-ob outFileBase\n"
|
||||||
|
. "\t-sn start node out file\n"
|
||||||
|
. "\t[-if input file name] -- default = stdin\n"
|
||||||
|
. "\t[-term ch] (word terminator -- default = '\\0'\n"
|
||||||
|
. "\t[-nosort] (input already sorted in accord with -m; " .
|
||||||
|
" default=sort'\n"
|
||||||
|
. "\t[-dump] (write dictionary as text to STDERR for testing)\n"
|
||||||
|
. "\t[-force4](use 4 bytes per node regardless of need)\n"
|
||||||
|
. "\t[-r] (reject words with letters not in mapfile)\n"
|
||||||
|
. "\t[-k] (kill if any letters no in mapfile -- default)\n"
|
||||||
|
;
|
||||||
|
|
||||||
|
} # usage
|
||||||
|
|
||||||
|
sub parseARGV {
|
||||||
|
|
||||||
|
my $arg;
|
||||||
|
while ( my $arg = shift(@ARGV) ) {
|
||||||
|
|
||||||
|
SWITCH: {
|
||||||
|
if ($arg =~ /-b/) {$gNBytesPerOutfile = shift(@ARGV), last SWITCH;}
|
||||||
|
if ($arg =~ /-mn/) {$gTableFile = shift(@ARGV);
|
||||||
|
$gUseUnicode = 1;
|
||||||
|
last SWITCH;}
|
||||||
|
if ($arg =~ /-m/) {$gTableFile = shift(@ARGV); last SWITCH;}
|
||||||
|
if ($arg =~ /-ob/) {$gOutFileBase = shift(@ARGV), last SWITCH;}
|
||||||
|
if ($arg =~ /-sn/) {$gStartNodeOut = shift(@ARGV), last SWITCH;}
|
||||||
|
if ($arg =~ /-if/) {$gInFileName = shift(@ARGV), last SWITCH;}
|
||||||
|
if ($arg =~ /-r/) {$gKillIfMissing = 0; last SWITCH;}
|
||||||
|
if ($arg =~ /-k/) {$gKillIfMissing = 1; last SWITCH;}
|
||||||
|
if ($arg =~ /-term/) {$gTermChar = chr(shift(@ARGV)); last SWITCH;}
|
||||||
|
if ($arg =~ /-dump/) {$gDumpText = 1; last SWITCH;}
|
||||||
|
if ($arg =~ /-nosort/) {$gNeedsSort = 0; last SWITCH;}
|
||||||
|
if ($arg =~ /-wc/) {$gCountFile = shift(@ARGV); last SWITCH;}
|
||||||
|
if ($arg =~ /-ns/) {$gBytesPerNodeFile = shift(@ARGV); last SWITCH;}
|
||||||
|
if ($arg =~ /-force4/) {$gForceFour = 1; last SWITCH;}
|
||||||
|
die "unexpected arg $arg\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
print STDERR "gNBytesPerOutfile=$gNBytesPerOutfile\n" if $debug;
|
||||||
|
print STDERR "gTableFile=$gTableFile\n" if $debug;
|
||||||
|
print STDERR "gOutFileBase=$gOutFileBase\n" if $debug;
|
||||||
|
print STDERR "gStartNodeOut=$gStartNodeOut\n" if $debug;
|
||||||
|
printf STDERR "gTermChar=%s(%d)\n", $gTermChar, ord($gTermChar) if $debug;
|
||||||
|
|
||||||
|
return $gTableFile;
|
||||||
|
|
||||||
|
} # parseARGV
|
45
dawg/frank_mkspecials.pl
Executable file
45
dawg/frank_mkspecials.pl
Executable file
|
@ -0,0 +1,45 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Copyright 2001 by Eric House (fixin@peak.org)
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
# Given arguments consisting of triples, first a string and then pbitm
|
||||||
|
# files representing bitmaps. For each triple, print out the string and
|
||||||
|
# then the converted bitmaps.
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
while ( @ARGV ) {
|
||||||
|
my $str = shift();
|
||||||
|
my $largebmp = shift();
|
||||||
|
my $smallbmp = shift();
|
||||||
|
|
||||||
|
doOne( $str, $largebmp, $smallbmp );
|
||||||
|
}
|
||||||
|
|
||||||
|
sub doOne {
|
||||||
|
my ( $str, $largebmp, $smallbmp ) = @_;
|
||||||
|
|
||||||
|
print pack( "C", length($str) );
|
||||||
|
print $str;
|
||||||
|
|
||||||
|
print STDERR "looking at $largebmp", "\n";
|
||||||
|
|
||||||
|
print `cat $largebmp | ../pbitm2bin.pl`;
|
||||||
|
print `cat $smallbmp | ../pbitm2bin.pl`;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
111
dawg/palm_mkspecials.pl
Executable file
111
dawg/palm_mkspecials.pl
Executable file
|
@ -0,0 +1,111 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Copyright 2002 by Eric House
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
# Given arguments consisting of triples, first a string and then pbitm
|
||||||
|
# files representing bitmaps. The format looks like this:
|
||||||
|
|
||||||
|
# array [0-n] of { char len;
|
||||||
|
# char[3] alt txt;
|
||||||
|
# int16 offsetOfLarge;
|
||||||
|
# int16 offsetOfSmall;
|
||||||
|
# }
|
||||||
|
# array [0-n] of {
|
||||||
|
# bitmapLargeIfPresent;
|
||||||
|
# bitmapSmallIfPresent;
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# In addition, there's padding between bitmaps if needed to get the next
|
||||||
|
# one to a 2-byte boundary. And the input files are not in PalmOS bitmap
|
||||||
|
# format, so thay have to get converted into a tmp file before the sizes
|
||||||
|
# can be known and included in the eventual output.
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
my $tmpfile = "/tmp/tmpout$$";
|
||||||
|
|
||||||
|
my $nSpecials = @ARGV / 3;
|
||||||
|
die "wrong number of args" if (@ARGV % 3) != 0;
|
||||||
|
my $gOffset = $nSpecials * 8; # sizeof(Xloc_specialEntry)
|
||||||
|
|
||||||
|
open TMPFILE, "> $tmpfile";
|
||||||
|
|
||||||
|
for ( my $i = 0; $i < $nSpecials; ++$i ) {
|
||||||
|
|
||||||
|
my $size;
|
||||||
|
|
||||||
|
my $str = shift( @ARGV );
|
||||||
|
my $len = length($str);
|
||||||
|
die "string $str too long" if $len > 3;
|
||||||
|
print $str;
|
||||||
|
while ( $len < 4 ) {
|
||||||
|
++$len;
|
||||||
|
print pack("c", 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
doOneFile( shift( @ARGV ), \*TMPFILE, \$gOffset );
|
||||||
|
doOneFile( shift( @ARGV ), \*TMPFILE, \$gOffset );
|
||||||
|
}
|
||||||
|
|
||||||
|
close TMPFILE;
|
||||||
|
|
||||||
|
# now append the tempfile
|
||||||
|
open TMPFILE, "< $tmpfile";
|
||||||
|
while ( read( TMPFILE, my $buffer, 128 ) ) {
|
||||||
|
print $buffer;
|
||||||
|
}
|
||||||
|
close TMPFILE;
|
||||||
|
|
||||||
|
unlink $tmpfile;
|
||||||
|
|
||||||
|
exit 0;
|
||||||
|
|
||||||
|
|
||||||
|
sub doOneFile($$) {
|
||||||
|
my ( $fil, $fh, $offsetR ) = @_;
|
||||||
|
|
||||||
|
my $size = convertBmp($fil, $fh );
|
||||||
|
if ( ($size % 2) != 0 ) {
|
||||||
|
++$size;
|
||||||
|
print $fh pack( "c", 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
print pack( "n", $size > 0? ${$offsetR} : 0 );
|
||||||
|
|
||||||
|
${$offsetR} += $size;
|
||||||
|
} # doOneFile
|
||||||
|
|
||||||
|
sub convertBmp($$) {
|
||||||
|
my ( $pbitmfile, $fhandle ) = @_;
|
||||||
|
|
||||||
|
if ( $pbitmfile eq "/dev/null" ) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
# for some reason I can't get quote marks to print into tmp.rcp using just `echo`
|
||||||
|
open TMP, "> tmp.rcp";
|
||||||
|
print TMP "BITMAP ID 1000 \"$pbitmfile\" AUTOCOMPRESS";
|
||||||
|
close TMP;
|
||||||
|
|
||||||
|
`pilrc tmp.rcp`;
|
||||||
|
print $fhandle `cat Tbmp03e8.bin`;
|
||||||
|
my $siz = -s "Tbmp03e8.bin";
|
||||||
|
`rm -f tmp.rcp Tbmp03e8.bin`;
|
||||||
|
|
||||||
|
return $siz;
|
||||||
|
}
|
||||||
|
}
|
234
dawg/par.pl
Executable file
234
dawg/par.pl
Executable file
|
@ -0,0 +1,234 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Copyright 2002 by Eric House (fixin@peak.org) All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
# Only enough of par's features to support building a crosswords dict
|
||||||
|
# pdb
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
my $debug = 0;
|
||||||
|
|
||||||
|
|
||||||
|
# stolen from par source
|
||||||
|
my $PRC_FLAGS_RESOURCE = (0x1<<0);
|
||||||
|
my $PRC_FLAGS_READONLY = (0x1<<1);
|
||||||
|
my $PRC_FLAGS_DIRTY = (0x1<<2);
|
||||||
|
my $PRC_FLAGS_BACKUP = (0x1<<3);
|
||||||
|
my $PRC_FLAGS_NEWER = (0x1<<4);
|
||||||
|
my $PRC_FLAGS_RESET = (0x1<<5);
|
||||||
|
my $PRC_FLAGS_COPYPREVENT = (0x1<<6);
|
||||||
|
my $PRC_FLAGS_STREAM = (0x1<<7);
|
||||||
|
my $PRC_FLAGS_HIDDEN = (0x1<<8);
|
||||||
|
my $PRC_FLAGS_LAUNCHABLE = (0x1<<9);
|
||||||
|
my $PRC_FLAGS_RECYCLABLE = (0x1<<10);
|
||||||
|
my $PRC_FLAGS_BUNDLE = (0x1<<11);
|
||||||
|
my $PRC_FLAGS_OPEN = (0x1<<15);
|
||||||
|
|
||||||
|
|
||||||
|
my $gAttrs = 0;
|
||||||
|
my $gVersion = 1; # par defaults this to 1
|
||||||
|
|
||||||
|
my $cmd = shift( @ARGV );
|
||||||
|
die "only 'c' supported now" if $cmd ne "c" && $cmd ne "-c";
|
||||||
|
|
||||||
|
readHOptions( \@ARGV );
|
||||||
|
|
||||||
|
my $dbfile = shift( @ARGV );
|
||||||
|
my $name = shift( @ARGV );
|
||||||
|
die "name $name too long" if length($name) > 31;
|
||||||
|
my $type = shift( @ARGV );
|
||||||
|
die "type $type must be of length 4" if length($type) != 4;
|
||||||
|
my $cid = shift( @ARGV );
|
||||||
|
die "cid $cid must be of length 4" if length($cid) != 4;
|
||||||
|
|
||||||
|
my @fileNames;
|
||||||
|
my @fileLengths;
|
||||||
|
|
||||||
|
my $nFiles = 0;
|
||||||
|
|
||||||
|
while ( @ARGV > 0 ) {
|
||||||
|
my $filename = shift( @ARGV );
|
||||||
|
push @fileNames, $filename;
|
||||||
|
push @fileLengths, -s $filename;
|
||||||
|
++$nFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
# from par's prcp.h; thanks djw!
|
||||||
|
# typedef struct prc_file_t {
|
||||||
|
# prc_byte_t name[32];
|
||||||
|
# prc_byte_t flags[2];
|
||||||
|
# prc_byte_t version[2];
|
||||||
|
# prc_byte_t ctime[4];
|
||||||
|
# prc_byte_t mtime[4];
|
||||||
|
# prc_byte_t btime[4];
|
||||||
|
# prc_byte_t modnum[4];
|
||||||
|
# prc_byte_t appinfo[4];
|
||||||
|
# prc_byte_t sortinfo[4];
|
||||||
|
# prc_byte_t type[4];
|
||||||
|
# prc_byte_t cid[4];
|
||||||
|
# prc_byte_t unique_id_seed[4];
|
||||||
|
# prc_byte_t next_record_list[4];
|
||||||
|
# prc_byte_t nrecords[2];
|
||||||
|
# } prc_file_t;
|
||||||
|
|
||||||
|
my $str;
|
||||||
|
my $offset = 0;
|
||||||
|
|
||||||
|
open OUTFILE, "> $dbfile" or die "couldn't open outfile $dbfile for writing";
|
||||||
|
|
||||||
|
# print the string, then pad with 0s
|
||||||
|
$offset = length($name);
|
||||||
|
print OUTFILE $name;
|
||||||
|
while ( $offset < 32 ) {
|
||||||
|
print OUTFILE pack("c", 0);
|
||||||
|
++$offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
$str = pack("n", $gAttrs); # flags
|
||||||
|
print OUTFILE $str;
|
||||||
|
$offset += length($str);
|
||||||
|
|
||||||
|
$str = pack("n", $gVersion); # version
|
||||||
|
print OUTFILE $str;
|
||||||
|
$offset += length($str);
|
||||||
|
|
||||||
|
my $time = time() + 2082844800;
|
||||||
|
$str = pack("NNN", $time, $time, 0); # ctime, mtime, btime
|
||||||
|
print OUTFILE $str;
|
||||||
|
$offset += length($str);
|
||||||
|
|
||||||
|
$str = pack("N", 0 ); # mod num
|
||||||
|
print OUTFILE $str;
|
||||||
|
$offset += length($str);
|
||||||
|
|
||||||
|
$str = pack("N", 0 ); # appinfo
|
||||||
|
print OUTFILE $str;
|
||||||
|
$offset += length($str);
|
||||||
|
|
||||||
|
$str = pack("N", 0 ); # sortinfo
|
||||||
|
print OUTFILE $str;
|
||||||
|
$offset += length($str);
|
||||||
|
|
||||||
|
|
||||||
|
print OUTFILE $type; # type
|
||||||
|
print OUTFILE $cid; # cid
|
||||||
|
$offset += 8;
|
||||||
|
|
||||||
|
$str = pack("NN", 0, 0 ); # unique_id_seed, next_record_list
|
||||||
|
print OUTFILE $str;
|
||||||
|
$offset += length($str);
|
||||||
|
|
||||||
|
$str = pack("n", $nFiles ); # nrecords
|
||||||
|
print OUTFILE $str;
|
||||||
|
$offset += length($str);
|
||||||
|
|
||||||
|
$offset += $nFiles * 8;
|
||||||
|
$offset += 2; # djw adds 2 bytes after size list; see below
|
||||||
|
foreach my $len ( @fileLengths ) {
|
||||||
|
print OUTFILE pack( "N", $offset );
|
||||||
|
print OUTFILE pack( "N", 0 );
|
||||||
|
$offset += $len;
|
||||||
|
}
|
||||||
|
|
||||||
|
print OUTFILE pack( "n", 0 ); # djw does this sans comment: flush.c, line 87
|
||||||
|
|
||||||
|
foreach my $file ( @fileNames ) {
|
||||||
|
open INFILE, "<$file" or die "couldn't open infile $file\n";
|
||||||
|
my $buffer;
|
||||||
|
while ( read INFILE, $buffer, 1024 ) {
|
||||||
|
print OUTFILE $buffer;
|
||||||
|
}
|
||||||
|
close INFILE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
close OUTFILE;
|
||||||
|
|
||||||
|
exit 0;
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
# Subroutines
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
sub readHOptions {
|
||||||
|
|
||||||
|
my ( $argvR ) = @_;
|
||||||
|
|
||||||
|
for ( ; ; ) {
|
||||||
|
my $opt = ${$argvR}[0];
|
||||||
|
|
||||||
|
if ( $opt !~ /^-/ ) {
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
|
||||||
|
# it starts with a '-': use it; else don't consume anything
|
||||||
|
shift @{$argvR};
|
||||||
|
|
||||||
|
if ( $opt eq "-a" ) {
|
||||||
|
my $attrs = shift @{$argvR};
|
||||||
|
processAttrString( $attrs );
|
||||||
|
} elsif ( $opt eq "-v" ) {
|
||||||
|
$gVersion = shift @{$argvR};
|
||||||
|
} else {
|
||||||
|
die "what's with \"$opt\": -a and -v are the only hattrs supported";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} # readHOptions
|
||||||
|
|
||||||
|
sub processAttrString {
|
||||||
|
|
||||||
|
my ( $attrs ) = @_;
|
||||||
|
|
||||||
|
foreach my $flag ( split /\|/, $attrs ) {
|
||||||
|
|
||||||
|
print STDERR "looking at flag $flag\n" if $debug;
|
||||||
|
|
||||||
|
if ( $flag =~ /resource/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_RESOURCE;
|
||||||
|
die "resource attr not supported";
|
||||||
|
} elsif ( $flag =~ /readonly/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_READONLY;
|
||||||
|
} elsif ( $flag =~ /dirty/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_DIRTY;
|
||||||
|
} elsif ( $flag =~ /backup/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_BACKUP;
|
||||||
|
} elsif ( $flag =~ /newer/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_NEWER;
|
||||||
|
} elsif ( $flag =~ /reset/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_RESET;
|
||||||
|
} elsif ( $flag =~ /copyprevent/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_COPYPREVENT;
|
||||||
|
} elsif ( $flag =~ /stream/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_STREAM;
|
||||||
|
die "stream attr not supported";
|
||||||
|
} elsif ( $flag =~ /hidden/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_HIDDEN;
|
||||||
|
} elsif ( $flag =~ /launchable/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_LAUNCHABLE;
|
||||||
|
} elsif ( $flag =~ /recyclable/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_RECYCLABLE;
|
||||||
|
} elsif ( $flag =~ /bundle/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_BUNDLE;
|
||||||
|
} elsif ( $flag =~ /open/ ) {
|
||||||
|
$gAttrs |= $PRC_FLAGS_OPEN;
|
||||||
|
} else {
|
||||||
|
die "flag $flag not supportd";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} # processAttrString
|
42
dawg/xloc.pl
Executable file
42
dawg/xloc.pl
Executable file
|
@ -0,0 +1,42 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
# test and wrapper file for xloc.pm
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use xloc;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
my $arg = shift(@ARGV);
|
||||||
|
my $lang = shift(@ARGV);
|
||||||
|
my $path = "./$lang";
|
||||||
|
my $infoFile = "$path/info.txt";
|
||||||
|
|
||||||
|
die "info file $infoFile not found\n" if ! -s $infoFile;
|
||||||
|
|
||||||
|
|
||||||
|
my $xlocToken = xloc::ParseTileInfo($infoFile);
|
||||||
|
|
||||||
|
if ( $arg eq "-t" ) {
|
||||||
|
xloc::WriteMapFile( $xlocToken, 0, \*STDOUT );
|
||||||
|
} elsif ( $arg eq "-tn" ) {
|
||||||
|
xloc::WriteMapFile( $xlocToken, 1, \*STDOUT );
|
||||||
|
} elsif ( $arg eq "-v" ) {
|
||||||
|
xloc::WriteValuesFile( $xlocToken, \*STDOUT );
|
||||||
|
}
|
180
dawg/xloc.pm
Normal file
180
dawg/xloc.pm
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
# Copyright 2002 by Eric House (fixin@peak.org). All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
# The idea here is that all that matters about a language is stored in
|
||||||
|
# one file (possibly excepting rules for prepping a dictionary).
|
||||||
|
# There's a list of tile faces, counts and values, and also some
|
||||||
|
# name-value pairs as needed. The pairs come first, and then a list
|
||||||
|
# of tiles.
|
||||||
|
|
||||||
|
package xloc;
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
BEGIN {
|
||||||
|
use Exporter ();
|
||||||
|
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
|
||||||
|
|
||||||
|
$VERSION = 1.00;
|
||||||
|
|
||||||
|
@ISA = qw(Exporter);
|
||||||
|
@EXPORT = qw(&ParseTileInfo &GetNTiles &TileFace &TileValue
|
||||||
|
&TileCount &GetValue &WriteMapFile &WriteValuesFile);
|
||||||
|
%EXPORT_TAGS = ( );
|
||||||
|
}
|
||||||
|
|
||||||
|
# Returns what's meant to be an opaque object that can be passed back
|
||||||
|
# for queries. It's a hash with name-value pairs and an _INFO entry
|
||||||
|
# containing a list of tile info lists.
|
||||||
|
|
||||||
|
sub ParseTileInfo($) {
|
||||||
|
my ( $filePath ) = @_;
|
||||||
|
my %result;
|
||||||
|
|
||||||
|
open INPUT, "<$filePath" or die "couldn't open $filePath";
|
||||||
|
|
||||||
|
my $inTiles = 0;
|
||||||
|
my @tiles;
|
||||||
|
while ( <INPUT> ) {
|
||||||
|
|
||||||
|
chomp;
|
||||||
|
s/\#.*$//;
|
||||||
|
s/^\s*$//; # nuke all-white-space lines
|
||||||
|
next if !length;
|
||||||
|
|
||||||
|
if ( $inTiles ) {
|
||||||
|
if ( /<END_TILES>/ ) {
|
||||||
|
last;
|
||||||
|
} else {
|
||||||
|
my ( $count, $val, $face ) = m/^\s*(\w+)\s+(\w+)\s+(.*)\s*$/;
|
||||||
|
push @tiles, [ $count, $val, $face ];
|
||||||
|
}
|
||||||
|
} elsif ( /\w:/ ) {
|
||||||
|
my ( $nam, $val ) = split ':', $_, 2;
|
||||||
|
$result{$nam} .= $val;
|
||||||
|
} elsif ( /<BEGIN_TILES>/ ) {
|
||||||
|
$inTiles = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
close INPUT;
|
||||||
|
|
||||||
|
$result{"_TILES"} = [ @tiles ];
|
||||||
|
|
||||||
|
return \%result;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub GetNTiles($) {
|
||||||
|
my ( $hashR ) = @_;
|
||||||
|
|
||||||
|
my $listR = ${$hashR}{"_TILES"};
|
||||||
|
|
||||||
|
return 0 + @{$listR};
|
||||||
|
}
|
||||||
|
|
||||||
|
sub GetValue($$) {
|
||||||
|
my ( $hashR, $name ) = @_;
|
||||||
|
return ${$hashR}{$name};
|
||||||
|
}
|
||||||
|
|
||||||
|
sub WriteMapFile($$$) {
|
||||||
|
my ( $hashR, $unicode, $fhr ) = @_;
|
||||||
|
|
||||||
|
my $packStr;
|
||||||
|
if ( $unicode ) {
|
||||||
|
$packStr = "n";
|
||||||
|
} else {
|
||||||
|
$packStr = "C";
|
||||||
|
}
|
||||||
|
|
||||||
|
my $count = GetNTiles($hashR);
|
||||||
|
my $specialCount = 0;
|
||||||
|
for ( my $i = 0; $i < $count; ++$i ) {
|
||||||
|
my $tileR = GetNthTile( $hashR, $i );
|
||||||
|
my $str = ${$tileR}[2];
|
||||||
|
|
||||||
|
if ( $str =~ /\'(.)\'/ ) {
|
||||||
|
print $fhr pack($packStr, ord($1) );
|
||||||
|
} elsif ( $str =~ /\"(.+)\"/ ) {
|
||||||
|
print $fhr pack($packStr, $specialCount++ );
|
||||||
|
} elsif ( $str =~ /(\d+)/ ) {
|
||||||
|
print $fhr pack( $packStr, chr($1) );
|
||||||
|
} else {
|
||||||
|
die "WriteMapFile: unrecognized face format $str";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} # WriteMapFile
|
||||||
|
|
||||||
|
sub WriteValuesFile($$) {
|
||||||
|
my ( $hashR, $fhr ) = @_;
|
||||||
|
|
||||||
|
my $header = GetValue( $hashR,"XLOC_HEADER" );
|
||||||
|
die "no XLOC_HEADER found" if ! $header;
|
||||||
|
|
||||||
|
print STDERR "header is $header\n";
|
||||||
|
|
||||||
|
print $fhr pack( "n", hex($header) );
|
||||||
|
|
||||||
|
my $count = GetNTiles($hashR);
|
||||||
|
for ( my $i = 0; $i < $count; ++$i ) {
|
||||||
|
my $tileR = GetNthTile( $hashR, $i );
|
||||||
|
|
||||||
|
print $fhr pack( "c", TileValue($tileR) );
|
||||||
|
print $fhr pack( "c", TileCount($tileR) );
|
||||||
|
}
|
||||||
|
|
||||||
|
} # WriteValuesFile
|
||||||
|
|
||||||
|
sub GetNthTile($$) {
|
||||||
|
my ( $hashR, $n ) = @_;
|
||||||
|
my $listR = ${$hashR}{"_TILES"};
|
||||||
|
|
||||||
|
return ${$listR}[$n];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TileFace($) {
|
||||||
|
my ( $tileR ) = @_;
|
||||||
|
|
||||||
|
my $str = ${$tileR}[2];
|
||||||
|
|
||||||
|
if ( $str =~ /\'(.)\'/ ) {
|
||||||
|
return $1;
|
||||||
|
} elsif ( $str =~ /\"(.+)\"/ ) {
|
||||||
|
return $1;
|
||||||
|
} elsif ( $str =~ /(\d+)/ ) {
|
||||||
|
return chr($1);
|
||||||
|
} else {
|
||||||
|
die "TileFace: unrecognized face format: $str";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TileValue($) {
|
||||||
|
my ( $tileR ) = @_;
|
||||||
|
|
||||||
|
return ${$tileR}[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
sub TileCount($) {
|
||||||
|
my ( $tileR ) = @_;
|
||||||
|
|
||||||
|
return ${$tileR}[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
Loading…
Add table
Reference in a new issue