mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-13 08:01:33 +01:00
first set of changes formed by applyinig diff of android_branch's
dawg/ directory against unicode_branch's. The two branches seem to have to common ancestor -- probably didn't survive translation from svn -- so this is the best I can do. This checkin is all the files that were modified by the patch plus a couple of simple additions. Next I'll be adding directories that the patch created. It also reintroduced a bunch of .cvsignore files; I won't check those in.
This commit is contained in:
parent
78be732c64
commit
79990bc7b1
22 changed files with 423 additions and 226 deletions
|
@ -33,7 +33,7 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
LANG_SPECIAL_INFO = \
|
LANG_SPECIAL_INFO = \
|
||||||
"L-L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
|
"L·L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
|
||||||
"NY" $(PBITMS)/large_ny.pbitm $(PBITMS)/small_ny.pbitm \
|
"NY" $(PBITMS)/large_ny.pbitm $(PBITMS)/small_ny.pbitm \
|
||||||
"QU" $(PBITMS)/large_qu.pbitm $(PBITMS)/small_qu.pbitm \
|
"QU" $(PBITMS)/large_qu.pbitm $(PBITMS)/small_qu.pbitm \
|
||||||
|
|
||||||
|
|
|
@ -75,7 +75,7 @@ XLOC_HEADER:0x8C00
|
||||||
8 1 'I'
|
8 1 'I'
|
||||||
1 8 'J'
|
1 8 'J'
|
||||||
4 1 'L'
|
4 1 'L'
|
||||||
1 10 {"L-L"}
|
1 10 {"L·L"}
|
||||||
3 2 'M'
|
3 2 'M'
|
||||||
6 1 'N'
|
6 1 'N'
|
||||||
1 10 {"NY"}
|
1 10 {"NY"}
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
# along with this program; if not, write to the Free Software
|
# along with this program; if not, write to the Free Software
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
TARGET_TYPE ?= FRANK
|
TARGET_TYPE ?= WINCE
|
||||||
|
|
||||||
include ../Makefile.langcommon
|
include ../Makefile.langcommon
|
||||||
|
|
||||||
|
|
36
xwords4/dawg/English/Makefile.CollegeEng
Normal file
36
xwords4/dawg/English/Makefile.CollegeEng
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# -*- mode: makefile; compile-command: "make -f Makefile.COSD"; -*-
|
||||||
|
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
XWLANG=CollegeEng
|
||||||
|
LANGCODE=en_US
|
||||||
|
TARGET_TYPE=WINCE
|
||||||
|
|
||||||
|
include ../Makefile.2to8
|
||||||
|
|
||||||
|
include ../Makefile.langcommon
|
||||||
|
|
||||||
|
SOURCEDICT ?= $(XWDICTPATH)/English/CollegeEng.dict.gz
|
||||||
|
|
||||||
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
||||||
|
zcat $< | tr -d '\r' | tr [a-z] [A-Z] | grep -e "^[A-Z]\{2,15\}$$" | \
|
||||||
|
gzip -c > $@
|
||||||
|
|
||||||
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
|
# "parent" Makefile.langcommon in the parent directory.
|
||||||
|
|
||||||
|
clean: clean_common
|
||||||
|
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb
|
|
@ -15,9 +15,9 @@
|
||||||
# along with this program; if not, write to the Free Software
|
# along with this program; if not, write to the Free Software
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
XWLANG=OWL2_
|
XWLANG=OWL2
|
||||||
LANGCODE=en_US
|
LANGCODE=en_US
|
||||||
TARGET_TYPE=PALM
|
TARGET_TYPE?=PALM
|
||||||
|
|
||||||
include ../Makefile.2to8
|
include ../Makefile.2to8
|
||||||
|
|
||||||
|
|
37
xwords4/dawg/English/Makefile.TWL06
Normal file
37
xwords4/dawg/English/Makefile.TWL06
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
# -*- mode: makefile; compile-command: "make -f Makefile.COSD"; -*-
|
||||||
|
# Copyright 2002-2010 by Eric House (xwords@eehouse.org). All rights
|
||||||
|
# reserved.
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
XWLANG=TWL06
|
||||||
|
LANGCODE=en_US
|
||||||
|
TARGET_TYPE=WINCE
|
||||||
|
|
||||||
|
include ../Makefile.2to8
|
||||||
|
|
||||||
|
include ../Makefile.langcommon
|
||||||
|
|
||||||
|
# from http://www.3zsoftware.com/en/wordmagic/lists.php
|
||||||
|
SOURCEDICT ?= $(XWDICTPATH)/English/twl06.zip
|
||||||
|
|
||||||
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
||||||
|
zcat $< | grep -e "^[A-Z]\{2,15\}$$" | gzip -c > $@
|
||||||
|
|
||||||
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
|
# "parent" Makefile.langcommon in the parent directory.
|
||||||
|
|
||||||
|
clean: clean_common
|
||||||
|
rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb
|
|
@ -1,5 +1,6 @@
|
||||||
# -*- mode: makefile; coding: iso-8859-1; -*-
|
# -*- mode: makefile; coding: utf-8; -*-
|
||||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
# Copyright 2002 - 2010 by Eric House (xwords@eehouse.org). All
|
||||||
|
# rights reserved.
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
# modify it under the terms of the GNU General Public License
|
# modify it under the terms of the GNU General Public License
|
||||||
|
@ -17,7 +18,7 @@
|
||||||
|
|
||||||
XWLANG = German
|
XWLANG = German
|
||||||
LANGCODE = de_DE
|
LANGCODE = de_DE
|
||||||
ENC = ISO-8859-1
|
ENC = UTF-8
|
||||||
|
|
||||||
TARGET_TYPE ?= WINCE
|
TARGET_TYPE ?= WINCE
|
||||||
|
|
||||||
|
@ -28,11 +29,11 @@ include ../Makefile.langcommon
|
||||||
SOURCEDICT ?= $(XWDICTPATH)/German/HansGerman.dict.gz
|
SOURCEDICT ?= $(XWDICTPATH)/German/HansGerman.dict.gz
|
||||||
|
|
||||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
||||||
zcat $< | tr [a-zäöü] [A-ZÄÖÜ] | \
|
zcat $< \
|
||||||
LANG=$(LANGCODE):$(ENC) sed -e 's/ß/SS/g' | \
|
| tr [a-zäöü] [A-ZÄÖÜ] \
|
||||||
LANG=$(LANGCODE):$(ENC) grep '[AEIOUÄÖÜ]' | \
|
| sed -e 's/ß/SS/g' \
|
||||||
LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÖÜ]\+$$' | \
|
| grep '^[A-ZÄÖÜ]*$$' \
|
||||||
gzip -c > $@
|
| gzip -c > $@
|
||||||
|
|
||||||
# Everything but creating of the Main.dict file is inherited from the
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
# "parent" Makefile.langcommon in the parent directory.
|
# "parent" Makefile.langcommon in the parent directory.
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
# -*- mode: conf; coding: utf-8; -*-
|
||||||
|
# Copyright 2002 - 2010 by Eric House (xwords@eehouse.org). All
|
||||||
|
# rights reserved.
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
# modify it under the terms of the GNU General Public License
|
# modify it under the terms of the GNU General Public License
|
||||||
|
@ -15,17 +17,18 @@
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
LANGCODE:de_DE
|
LANGCODE:de_DE
|
||||||
|
CHARSET: utf-8
|
||||||
|
|
||||||
# deal with DOS files
|
# deal with DOS files
|
||||||
LANGFILTER: tr -d '\r'
|
LANGFILTER: tr -d '\r'
|
||||||
# substitute for sharfes-s
|
# substitute for sharfes-s
|
||||||
LANGFILTER: | sed -e 's/ß/SS/g'
|
LANGFILTER: | sed -e 's/ß/SS/g'
|
||||||
# uppercase all
|
# uppercase all
|
||||||
LANGFILTER: | tr [a-zäöü] [A-ZÄÖÜ]
|
LANGFILTER: | tr [a-zäöü] [A-ZÄÖÜ]
|
||||||
# no words not containing a vowel
|
# no words not containing a vowel
|
||||||
LANGFILTER: | grep '[AEIOUÄÖÜ]'
|
LANGFILTER: | grep '[AEIOUÄÖÜ]'
|
||||||
# none with illegal chars
|
# none with illegal chars
|
||||||
LANGFILTER: | grep '^[A-ZÄÖÜ]\+$'
|
LANGFILTER: | grep '^[A-ZÄÖÜ]\+$'
|
||||||
|
|
||||||
# Until I can figure out how to force sort to use a locale's collation
|
# Until I can figure out how to force sort to use a locale's collation
|
||||||
# rules we can't trust sort in the filtering rules above and so must
|
# rules we can't trust sort in the filtering rules above and so must
|
||||||
|
@ -46,9 +49,8 @@ XLOC_HEADER:0x8300
|
||||||
|
|
||||||
<BEGIN_TILES>
|
<BEGIN_TILES>
|
||||||
2 0 {"_"}
|
2 0 {"_"}
|
||||||
5 1 'A'
|
5 1 'A'
|
||||||
# A mit umlaut
|
1 6 'Ä'
|
||||||
1 6 196
|
|
||||||
2 3 'B'
|
2 3 'B'
|
||||||
2 4 'C'
|
2 4 'C'
|
||||||
4 1 'D'
|
4 1 'D'
|
||||||
|
@ -63,16 +65,14 @@ XLOC_HEADER:0x8300
|
||||||
4 3 'M'
|
4 3 'M'
|
||||||
9 1 'N'
|
9 1 'N'
|
||||||
3 2 'O'
|
3 2 'O'
|
||||||
# O mit umlaut
|
1 8 'Ö'
|
||||||
1 8 214
|
|
||||||
1 4 'P'
|
1 4 'P'
|
||||||
1 10 'Q'
|
1 10 'Q'
|
||||||
6 1 'R'
|
6 1 'R'
|
||||||
7 1 'S'
|
7 1 'S'
|
||||||
6 1 'T'
|
6 1 'T'
|
||||||
6 1 'U'
|
6 1 'U'
|
||||||
# U mit umlaut
|
1 6 'Ü'
|
||||||
1 6 220
|
|
||||||
1 6 'V'
|
1 6 'V'
|
||||||
1 3 'W'
|
1 3 'W'
|
||||||
1 8 'X'
|
1 8 'X'
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
# -*- mode: makefile; -*-
|
||||||
|
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
|
||||||
|
# reserved.
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
# modify it under the terms of the GNU General Public License
|
# modify it under the terms of the GNU General Public License
|
||||||
|
@ -16,6 +18,7 @@
|
||||||
|
|
||||||
XWLANG = Hex
|
XWLANG = Hex
|
||||||
LANGCODE = hex
|
LANGCODE = hex
|
||||||
|
ENC = UTF-8
|
||||||
|
|
||||||
TARGET_TYPE = WINCE
|
TARGET_TYPE = WINCE
|
||||||
|
|
||||||
|
@ -24,14 +27,15 @@ include ../Makefile.2to8
|
||||||
include ../Makefile.langcommon
|
include ../Makefile.langcommon
|
||||||
|
|
||||||
# Pass in your own dict here by setting DICT
|
# Pass in your own dict here by setting DICT
|
||||||
DICT ?= $(XWDICTPATH)/English/SOWPODS_official.txt.gz
|
DICT ?= $(XWDICTPATH)/English/CSW.dict.gz
|
||||||
|
|
||||||
# Feel free to base this on whatever dictionary you have at hand. I'm
|
# tr 'AE' 'ÄË' doesn't work, so use sed.
|
||||||
# using CollegeEng for no particular reason.
|
|
||||||
$(XWLANG)Main.dict.gz: $(DICT)
|
$(XWLANG)Main.dict.gz: $(DICT)
|
||||||
@echo "building $@ from $<"
|
@echo "building $@ from $<"
|
||||||
zcat $< | tr [a-f] [A-F] | grep -e '^[A-F]\{2,8\}$$' | \
|
zcat $< | tr [a-f] [A-F] | grep -e '^[A-F]\{2,8\}$$' | \
|
||||||
echo CAFEBABE DEADBEEF $$(cat -) | \
|
echo CAFEBABE DEADBEEF $$(cat -) | \
|
||||||
|
sed 's/A/Ä/g' | \
|
||||||
|
sed 's/E/Ë/g' | \
|
||||||
tr ' ' '\n' | sort | gzip > $@
|
tr ' ' '\n' | sort | gzip > $@
|
||||||
|
|
||||||
# Everything but creating of the Main.dict file is inherited from the
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
# -*- mode: conf; -*-
|
||||||
|
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
|
||||||
|
# reserved.
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
# modify it under the terms of the GNU General Public License
|
# modify it under the terms of the GNU General Public License
|
||||||
|
@ -16,14 +18,14 @@
|
||||||
|
|
||||||
LANGCODE:HEX
|
LANGCODE:HEX
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# uppercase all
|
# uppercase all
|
||||||
LANGFILTER: tr [a-f] [A-F]
|
LANGFILTER: tr [a-f] [A-F]
|
||||||
LANGFILTER: | grep '^[A-F]*$'
|
LANGFILTER: | grep '^[A-F]*$'
|
||||||
|
LANGFILTER: | sed 's/A/Ä/'
|
||||||
|
LANGFILTER: | sed 's/E/Ë/'
|
||||||
LANGFILTER: | sort -u
|
LANGFILTER: | sort -u
|
||||||
|
|
||||||
D2DARGS: -nosort -term 10
|
D2DARGS: -term 10
|
||||||
|
|
||||||
LANGINFO: <p>The hex "language" is something of a programmers' joke.
|
LANGINFO: <p>The hex "language" is something of a programmers' joke.
|
||||||
LANGINFO: Hex is short for hexadecimal, a 16-base number system whose
|
LANGINFO: Hex is short for hexadecimal, a 16-base number system whose
|
||||||
|
@ -41,8 +43,6 @@ LANGINFO: tiles and games play quickly. That's also why the Hex
|
||||||
LANGINFO: tile set has four blanks; that's the largest number
|
LANGINFO: tile set has four blanks; that's the largest number
|
||||||
LANGINFO: Crosswords supports and I needed to test at the limit.</p>
|
LANGINFO: Crosswords supports and I needed to test at the limit.</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# High bit means "official". Next 7 bits are an enum where Hex==127
|
# High bit means "official". Next 7 bits are an enum where Hex==127
|
||||||
# (I just made that up; not sure what it was originally.) Low byte is
|
# (I just made that up; not sure what it was originally.) Low byte is
|
||||||
# padding
|
# padding
|
||||||
|
@ -51,11 +51,11 @@ XLOC_HEADER:0xFF00
|
||||||
|
|
||||||
<BEGIN_TILES>
|
<BEGIN_TILES>
|
||||||
4 0 {"_"}
|
4 0 {"_"}
|
||||||
9 1 'A'
|
9 1 'Ä'
|
||||||
2 3 'B'
|
2 3 'B'
|
||||||
2 3 'C'
|
2 3 'C'
|
||||||
4 2 'D'
|
4 2 'D'
|
||||||
12 1 'E'
|
12 1 'Ë'
|
||||||
2 4 'F'
|
2 4 'F'
|
||||||
<END_TILES>
|
<END_TILES>
|
||||||
# should ignore all after the <END_TILES> above
|
# should ignore all after the <END_TILES> above
|
||||||
|
|
|
@ -204,16 +204,6 @@ endif
|
||||||
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
|
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
|
||||||
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
|
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
|
||||||
|
|
||||||
# a binary file (one byte) giving the number of tiles in the dict
|
|
||||||
charcount.bin: table.bin
|
|
||||||
ifdef NEWDAWG
|
|
||||||
siz=$$(ls -l $< | awk '{print $$5}'); \
|
|
||||||
perl -e "print pack(\"c\",$$siz/2)" > $@
|
|
||||||
else
|
|
||||||
siz=$$(wc -c $< | sed -e 's/$<//'); \
|
|
||||||
perl -e "print pack(\"c\",$$siz)" > $@
|
|
||||||
endif
|
|
||||||
|
|
||||||
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin
|
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin
|
||||||
cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
|
cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
|
||||||
frankspecials.bin $(XWLANG)StartLoc.bin \
|
frankspecials.bin $(XWLANG)StartLoc.bin \
|
||||||
|
@ -233,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
|
||||||
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
|
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
|
||||||
ifdef NEWDAWG
|
ifdef NEWDAWG
|
||||||
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
|
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
|
||||||
then perl -e "print pack(\"n\",0x0002)" > $@; echo "flags=2"; \
|
then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \
|
||||||
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
|
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
|
||||||
then perl -e "print pack(\"n\",0x0003)" > $@; echo "flags=3"; \
|
then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \
|
||||||
elif true; \
|
elif true; \
|
||||||
then echo "Unexpected node size"; exit 1; \
|
then echo "Unexpected node size"; exit 1; \
|
||||||
fi
|
fi
|
||||||
|
@ -272,8 +262,19 @@ else
|
||||||
perl -I../ ../xloc.pl -t -out $@
|
perl -I../ ../xloc.pl -t -out $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
values.bin: ../xloc.pl
|
values.bin: ../xloc.pl
|
||||||
perl -I../ ../xloc.pl -v -out $@ $(ENCP)
|
perl -I../ ../xloc.pl -v -out $@
|
||||||
|
|
||||||
|
# a binary file, two bytes, one giving the size of tiles data and the
|
||||||
|
# other the number of tiles in the dict. Tiles data is utf-8 and so
|
||||||
|
# number is not derivable from size.
|
||||||
|
charcount.bin: table.bin ../xloc.pl
|
||||||
|
SIZ=$$(ls -l $< | awk '{print $$5}'); \
|
||||||
|
perl -e "print pack(\"c\",$$SIZ)" > $@
|
||||||
|
TMP=/tmp/tmp$$$$; \
|
||||||
|
perl -I../ ../xloc.pl -s -out $$TMP; \
|
||||||
|
cat $$TMP >> $@; \
|
||||||
|
rm -f $$TMP
|
||||||
|
|
||||||
%.dict: %.dict.gz
|
%.dict: %.dict.gz
|
||||||
zcat $< > $@
|
zcat $< > $@
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# -*- coding: iso-8859-2; mode: Makefile; -*-
|
# -*- mode: Makefile; -*-
|
||||||
# Copyright 2002 - 2009 by Eric House (xwords@eehouse.org). All
|
# Copyright 2002 - 2009 by Eric House (xwords@eehouse.org). All
|
||||||
# rights reserved.
|
# rights reserved.
|
||||||
#
|
#
|
||||||
|
@ -16,9 +16,9 @@
|
||||||
# along with this program; if not, write to the Free Software
|
# along with this program; if not, write to the Free Software
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
XWLANG=Polish
|
XWLANG = Polish
|
||||||
LANGCODE=pl_PL
|
LANGCODE = pl_PL
|
||||||
ENC = ISO-8859-2
|
ENC = UTF-8
|
||||||
|
|
||||||
# DICT2DAWGARGS = -lang $(LANGCODE)
|
# DICT2DAWGARGS = -lang $(LANGCODE)
|
||||||
# DICT2DAWGARGS = -debug
|
# DICT2DAWGARGS = -debug
|
||||||
|
@ -29,12 +29,12 @@ include ../Makefile.2to8
|
||||||
|
|
||||||
include ../Makefile.langcommon
|
include ../Makefile.langcommon
|
||||||
|
|
||||||
SOURCEDICT ?= $(XWDICTPATH)/Polish/iso-8859-2/slowa.txt.gz
|
SOURCEDICT ?= $(XWDICTPATH)/Polish/slowa.txt.gz
|
||||||
|
|
||||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT)
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT)
|
||||||
zcat $< | tr -d '\r' \
|
zcat $< | tr -d '\r' \
|
||||||
| LANG=$(LANGCODE):$(ENC) tr [a帳c熛e璒ghijkl軛n隳鏕rs鈣uwyz撚] [A、C鑪E甪GHIJKLΚN埆紞RS吋UWYZ炫] \
|
| tr [aąbcćdeęfghijklłmnńoóprsśtuwyzźż] [AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ] \
|
||||||
| LANG=$(LANGCODE):$(ENC) grep '^[A、C鑪E甪GHIJKLΚN埆紞RS吋UWYZ炫]*$$' \
|
| grep '^[AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ]*$$' \
|
||||||
| gzip > $@
|
| gzip > $@
|
||||||
|
|
||||||
# Everything but creating of the Main.dict file is inherited from the
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# -*- coding: iso-8859-2; mode: conf; -*-
|
# -*- mode: conf; -*-
|
||||||
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
|
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
|
||||||
# reserved.
|
# reserved.
|
||||||
#
|
#
|
||||||
|
@ -17,12 +17,12 @@
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
LANGCODE:pl_PL
|
LANGCODE:pl_PL
|
||||||
CHARSET:iso-8859-2
|
CHARSET:utf-8
|
||||||
|
|
||||||
# deal with DOS files
|
# deal with DOS files
|
||||||
LANGFILTER: tr -d '\r'
|
LANGFILTER: tr -d '\r'
|
||||||
LANGFILTER: | tr [a-pr-uwyz±æê³ñ󶼿] [A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]
|
LANGFILTER: | tr [a-pr-uwyząćęłńóśźż] [A-PR-UWYZĄĆĘŁŃÓŚŹŻ]
|
||||||
LANGFILTER: | grep '^[A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]*$'
|
LANGFILTER: | grep '^[A-PR-UWYZĄĆĘŁŃÓŚŹŻ]*$'
|
||||||
LANGFILTER: | tr '\n' '\000'
|
LANGFILTER: | tr '\n' '\000'
|
||||||
|
|
||||||
D2DARGS: -r -term 0
|
D2DARGS: -r -term 0
|
||||||
|
@ -42,8 +42,8 @@ LANGINFO: this working.</p>
|
||||||
LANGINFO: <p>Note that the blank is the last tile here, while with all
|
LANGINFO: <p>Note that the blank is the last tile here, while with all
|
||||||
LANGINFO: other languages it's the first.</p>
|
LANGINFO: other languages it's the first.</p>
|
||||||
|
|
||||||
LANGINFO: <p>Also, please note that we currently require the files you
|
# LANGINFO: <p>Also, please note that we currently require the files you
|
||||||
LANGINFO: upload to use the iso-8859-2 character encoding.</p>
|
# LANGINFO: upload to use the iso-8859-2 character encoding.</p>
|
||||||
|
|
||||||
# High bit means "official". Next 7 bits are an enum where
|
# High bit means "official". Next 7 bits are an enum where
|
||||||
# Polish==8. Low byte is padding
|
# Polish==8. Low byte is padding
|
||||||
|
@ -51,13 +51,13 @@ XLOC_HEADER:0x8800
|
||||||
|
|
||||||
<BEGIN_TILES>
|
<BEGIN_TILES>
|
||||||
9 1 'A'
|
9 1 'A'
|
||||||
1 5 161 # '¡'
|
1 5 'Ą'
|
||||||
2 3 'B'
|
2 3 'B'
|
||||||
3 2 'C'
|
3 2 'C'
|
||||||
1 6 198 # 'Æ'
|
1 6 'Ć'
|
||||||
3 2 'D'
|
3 2 'D'
|
||||||
7 1 'E'
|
7 1 'E'
|
||||||
1 5 202 # 'Ê'
|
1 5 'Ę'
|
||||||
1 5 'F'
|
1 5 'F'
|
||||||
2 3 'G'
|
2 3 'G'
|
||||||
2 3 'H'
|
2 3 'H'
|
||||||
|
@ -65,23 +65,23 @@ XLOC_HEADER:0x8800
|
||||||
2 3 'J'
|
2 3 'J'
|
||||||
3 3 'K'
|
3 3 'K'
|
||||||
3 2 'L'
|
3 2 'L'
|
||||||
2 3 163 # '£'
|
2 3 'Ł'
|
||||||
3 2 'M'
|
3 2 'M'
|
||||||
5 1 'N'
|
5 1 'N'
|
||||||
1 7 209 # 'Ñ'
|
1 7 'Ń'
|
||||||
6 1 'O'
|
6 1 'O'
|
||||||
1 5 211 # 'Ó'
|
1 5 'Ó'
|
||||||
3 2 'P'
|
3 2 'P'
|
||||||
4 1 'R'
|
4 1 'R'
|
||||||
4 1 'S'
|
4 1 'S'
|
||||||
1 5 166 # '¦'
|
1 5 'Ś'
|
||||||
3 2 'T'
|
3 2 'T'
|
||||||
2 3 'U'
|
2 3 'U'
|
||||||
4 1 'W'
|
4 1 'W'
|
||||||
4 2 'Y'
|
4 2 'Y'
|
||||||
5 1 'Z'
|
5 1 'Z'
|
||||||
1 9 172 # '¬'
|
1 9 'Ź'
|
||||||
1 5 175 # '¯'
|
1 5 'Ż'
|
||||||
|
|
||||||
# the blank *must* be last here!!!
|
# the blank *must* be last here!!!
|
||||||
2 0 {"_"}
|
2 0 {"_"}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# -*-mode: Makefile; compile-command: "make all"; coding: iso-8859-1; -*-
|
# -*-mode: Makefile; compile-command: "make all"; coding: utf-8; -*-
|
||||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
|
@ -18,7 +18,7 @@
|
||||||
XWLANG = SpanishFAA41
|
XWLANG = SpanishFAA41
|
||||||
LANGCODE = es_ES
|
LANGCODE = es_ES
|
||||||
TARGET_TYPE ?= WINCE
|
TARGET_TYPE ?= WINCE
|
||||||
ENC = ISO-8859-1
|
ENC = UTF-8
|
||||||
|
|
||||||
ifeq ($(TARGET_TYPE),PALM)
|
ifeq ($(TARGET_TYPE),PALM)
|
||||||
PBITMS = ./bmps/palm
|
PBITMS = ./bmps/palm
|
||||||
|
@ -44,14 +44,13 @@ include ../Makefile.langcommon
|
||||||
#$(LANG)Main.dict.gz: SpanishMain.dict.gz
|
#$(LANG)Main.dict.gz: SpanishMain.dict.gz
|
||||||
# ln -s $< $@
|
# ln -s $< $@
|
||||||
|
|
||||||
SOURCEDICT ?= $(XWDICTPATH)/Spanish/FAA_4.1.txt.gz
|
SOURCEDICT ?= $(XWDICTPATH)/Spanish/FAA_4.1.utf8.gz
|
||||||
|
|
||||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
||||||
zcat $< \
|
zcat $< \
|
||||||
| tr -d '\r' \
|
| tr -d '\r' \
|
||||||
| tr '\207\216\222\227\234\237\226' 'aeiouu\321' \
|
| tr [a-zñ] [A-ZÑ] \
|
||||||
| tr [a-zñ] [A-ZÑ] \
|
| LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VX-ZÑ]*$$' \
|
||||||
| LANG=$(LANGCODE):$(ENC) grep '^[A-JL-VX-ZÑ]*$$' \
|
|
||||||
| sed 's/CH/1/g' \
|
| sed 's/CH/1/g' \
|
||||||
| sed 's/LL/2/g' \
|
| sed 's/LL/2/g' \
|
||||||
| sed 's/RR/3/g' \
|
| sed 's/RR/3/g' \
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# -*- mode: conf; coding: iso-8859-1; -*-
|
# -*- mode: conf; coding: utf-8; -*-
|
||||||
# Copyright 2002-2006 by Eric House (xwords@eehouse.org). All rights
|
# Copyright 2002-2006 by Eric House (xwords@eehouse.org). All rights
|
||||||
# reserved.
|
# reserved.
|
||||||
#
|
#
|
||||||
|
@ -20,6 +20,7 @@
|
||||||
# below
|
# below
|
||||||
|
|
||||||
NEEDSSORT:true
|
NEEDSSORT:true
|
||||||
|
CHARSET: utf-8
|
||||||
|
|
||||||
# MSDos LF chars go bye-bye
|
# MSDos LF chars go bye-bye
|
||||||
LANGFILTER: tr -d '\r'
|
LANGFILTER: tr -d '\r'
|
||||||
|
@ -27,9 +28,9 @@ LANGFILTER: tr -d '\r'
|
||||||
# convert accented vowels
|
# convert accented vowels
|
||||||
LANGFILTER: | tr '\207\216\222\227\234\237\226' 'aeiouu\321'
|
LANGFILTER: | tr '\207\216\222\227\234\237\226' 'aeiouu\321'
|
||||||
# uppercase
|
# uppercase
|
||||||
LANGFILTER: | tr [a-zñ] [A-ZÑ]
|
LANGFILTER: | tr [a-zñ] [A-ZÃ]
|
||||||
# remove words with illegal letters
|
# remove words with illegal letters
|
||||||
LANGFILTER: | grep '^[[A-JL-VX-ZÑ]*$'
|
LANGFILTER: | grep '^[[A-JL-VX-ZÃ]*$'
|
||||||
# substitute pairs (can't figure out how to use octal values)
|
# substitute pairs (can't figure out how to use octal values)
|
||||||
LANGFILTER: | sed 's/CH/1/g'
|
LANGFILTER: | sed 's/CH/1/g'
|
||||||
LANGFILTER: | sed 's/LL/2/g'
|
LANGFILTER: | sed 's/LL/2/g'
|
||||||
|
@ -43,7 +44,7 @@ LANGFILTER: | sort -u -z
|
||||||
D2DARGS: -r -term 0
|
D2DARGS: -r -term 0
|
||||||
|
|
||||||
LANGINFO: <p>Spanish words include all letters in the English alphabet
|
LANGINFO: <p>Spanish words include all letters in the English alphabet
|
||||||
LANGINFO: except "K" and "W", and with "Ñ" added. Since there are no
|
LANGINFO: except "K" and "W", and with "Ã" added. Since there are no
|
||||||
LANGINFO: tiles for accented vowels, these are replaced by the
|
LANGINFO: tiles for accented vowels, these are replaced by the
|
||||||
LANGINFO: unaccented forms.</p>
|
LANGINFO: unaccented forms.</p>
|
||||||
|
|
||||||
|
@ -92,8 +93,7 @@ XLOC_HEADER:0x8600
|
||||||
1 8 {"LL", true, true}
|
1 8 {"LL", true, true}
|
||||||
2 3 'M'
|
2 3 'M'
|
||||||
5 1 'N'
|
5 1 'N'
|
||||||
# /*'N~'*/
|
1 8 'Ñ'
|
||||||
1 8 209
|
|
||||||
9 1 'O'
|
9 1 'O'
|
||||||
2 3 'P'
|
2 3 'P'
|
||||||
1 5 'Q'
|
1 5 'Q'
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# -*-mode: Makefile; coding: iso-8859-1; -*-
|
# -*-mode: Makefile; coding: utf-8; -*-
|
||||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
|
@ -17,7 +17,7 @@
|
||||||
|
|
||||||
XWLANG=Swedish
|
XWLANG=Swedish
|
||||||
LANGCODE=sv_SE
|
LANGCODE=sv_SE
|
||||||
ENC = ISO-8859-1
|
ENC = UTF-8
|
||||||
|
|
||||||
# Swedish has too many chars for the old format.
|
# Swedish has too many chars for the old format.
|
||||||
NEWDAWG=whatever
|
NEWDAWG=whatever
|
||||||
|
@ -28,14 +28,14 @@ include ../Makefile.2to8
|
||||||
|
|
||||||
include ../Makefile.langcommon
|
include ../Makefile.langcommon
|
||||||
|
|
||||||
SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.dict.gz
|
SOURCEDICT ?= $(XWDICTPATH)/Swedish/swedish15.utf8.gz
|
||||||
|
|
||||||
# Q and W are not available as tiles, but I'm told there's a custom in
|
# Q and W are not available as tiles, but I'm told there's a custom in
|
||||||
# Swedish play of allowing blanks to stand for those letters as well.
|
# Swedish play of allowing blanks to stand for those letters as well.
|
||||||
# So we don't exclude words with those letters from the dictionary.
|
# So we don't exclude words with those letters from the dictionary.
|
||||||
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
$(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
|
||||||
zcat $< | tr [a-zäĺćöü] [A-ZÄĹĆÖÜ] | \
|
zcat $< | tr [a-zäåæöü] [A-ZÄÅÆÖÜ] | \
|
||||||
LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄĹĆÖÜ]\{2,15\}$$' | \
|
LANG=$(LANGCODE):$(ENC) grep '^[A-ZÄÅÆÖÜ]\{2,15\}$$' | \
|
||||||
gzip -c > $@
|
gzip -c > $@
|
||||||
|
|
||||||
# Everything but creating of the Main.dict file is inherited from the
|
# Everything but creating of the Main.dict file is inherited from the
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# -*- mode: conf; coding: iso-8859-1; -*-
|
# -*- mode: conf; coding: utf-8; -*-
|
||||||
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
|
@ -15,16 +15,17 @@
|
||||||
# along with this program; if not, write to the Free Software
|
# along with this program; if not, write to the Free Software
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
CHARSET: utf-8
|
||||||
LANGCODE:sv_SE
|
LANGCODE:sv_SE
|
||||||
|
|
||||||
LANGFILTER: tr -d '\r'
|
LANGFILTER: tr -d '\r'
|
||||||
LANGFILTER: | tr [a-zäåæöü] [A-ZÄÅÆÖÜ]
|
LANGFILTER: | tr [a-zäåæöü] [A-ZÄÅÆÖÜ]
|
||||||
LANGFILTER: | grep '^[A-ZÄÅÆÖÜ]*$'
|
LANGFILTER: | grep '^[A-ZÄÅÆÖÜ]*$'
|
||||||
|
|
||||||
D2DARGS: -r -term 10
|
D2DARGS: -r -term 10
|
||||||
|
|
||||||
LANGINFO: <p>From an English-speaker's perspective, Swedish drops Q
|
LANGINFO: <p>From an English-speaker's perspective, Swedish drops Q
|
||||||
LANGINFO: and W, and adds Ä, Å, Æ, Ö and Ü.</p>
|
LANGINFO: and W, and adds Ä, Å, Æ, Ö and Ü.</p>
|
||||||
|
|
||||||
# High bit means "official". Next 7 bits are an enum where
|
# High bit means "official". Next 7 bits are an enum where
|
||||||
# Swedish==7. Low byte is padding
|
# Swedish==7. Low byte is padding
|
||||||
|
@ -36,11 +37,11 @@ XLOC_HEADER:0x8700
|
||||||
2 0 {"_"}
|
2 0 {"_"}
|
||||||
8 1 'A'
|
8 1 'A'
|
||||||
# A with two dots
|
# A with two dots
|
||||||
2 3 'Ä'
|
2 3 'Ä'
|
||||||
# A with circle
|
# A with circle
|
||||||
2 4 'Å'
|
2 4 'Å'
|
||||||
# Æ tile only available for blanks
|
# Æ tile only available for blanks
|
||||||
0 1 'Æ'
|
0 1 'Æ'
|
||||||
2 4 'B'
|
2 4 'B'
|
||||||
1 8 'C'
|
1 8 'C'
|
||||||
5 1 'D'
|
5 1 'D'
|
||||||
|
@ -56,7 +57,7 @@ XLOC_HEADER:0x8700
|
||||||
6 1 'N'
|
6 1 'N'
|
||||||
5 2 'O'
|
5 2 'O'
|
||||||
# O with two dots
|
# O with two dots
|
||||||
2 4 'Ö'
|
2 4 'Ö'
|
||||||
2 4 'P'
|
2 4 'P'
|
||||||
# Q tile only available for blanks
|
# Q tile only available for blanks
|
||||||
0 1 'Q'
|
0 1 'Q'
|
||||||
|
@ -64,13 +65,12 @@ XLOC_HEADER:0x8700
|
||||||
8 1 'S'
|
8 1 'S'
|
||||||
8 1 'T'
|
8 1 'T'
|
||||||
3 4 'U'
|
3 4 'U'
|
||||||
# Ü tile only available for blanks
|
# Ü tile only available for blanks
|
||||||
0 1 'Ü'
|
0 1 'Ü'
|
||||||
2 3 'V'
|
2 3 'V'
|
||||||
# W tile only available for blanks
|
# W tile only available for blanks
|
||||||
0 1 'W'
|
0 1 'W'
|
||||||
1 8 'X'
|
1 8 'X'
|
||||||
1 7 'Y'
|
1 7 'Y'
|
||||||
1 10 'Z'
|
1 10 'Z'
|
||||||
|
|
||||||
<END_TILES>
|
<END_TILES>
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/perl
|
#!/usr/bin/perl -CS
|
||||||
#
|
#
|
||||||
# Copyright 2004 by Eric House (xwords@eehouse.org)
|
# Copyright 2004 - 2009 by Eric House (xwords@eehouse.org)
|
||||||
#
|
#
|
||||||
# This program is free software; you can redistribute it and/or
|
# This program is free software; you can redistribute it and/or
|
||||||
# modify it under the terms of the GNU General Public License
|
# modify it under the terms of the GNU General Public License
|
||||||
|
@ -22,9 +22,12 @@
|
||||||
|
|
||||||
use strict;
|
use strict;
|
||||||
use Fcntl;
|
use Fcntl;
|
||||||
|
use Encode 'from_to';
|
||||||
|
use Encode;
|
||||||
|
|
||||||
my $gInFile;
|
my $gInFile;
|
||||||
my $gDoRaw = 0;
|
my $gDoRaw = 0;
|
||||||
|
my $gDoJSON = 0;
|
||||||
my $gFileType;
|
my $gFileType;
|
||||||
my $gNodeSize;
|
my $gNodeSize;
|
||||||
|
|
||||||
|
@ -33,7 +36,7 @@ sub systell { sysseek($_[0], 0, SEEK_CUR) }
|
||||||
|
|
||||||
sub usage() {
|
sub usage() {
|
||||||
print STDERR "USAGE: $0 "
|
print STDERR "USAGE: $0 "
|
||||||
. "[-raw] "
|
. "[-raw | -json] "
|
||||||
. "-dict <xwdORpdb>"
|
. "-dict <xwdORpdb>"
|
||||||
. "\n"
|
. "\n"
|
||||||
. "\t(Takes a .pdb or .xwd and prints its words to stdout)\n";
|
. "\t(Takes a .pdb or .xwd and prints its words to stdout)\n";
|
||||||
|
@ -45,6 +48,8 @@ sub parseARGV() {
|
||||||
while ( my $parm = shift(@ARGV) ) {
|
while ( my $parm = shift(@ARGV) ) {
|
||||||
if ( $parm eq "-raw" ) {
|
if ( $parm eq "-raw" ) {
|
||||||
$gDoRaw = 1;
|
$gDoRaw = 1;
|
||||||
|
} elsif ( $parm eq "-json" ) {
|
||||||
|
$gDoJSON = 1;
|
||||||
} elsif ( $parm eq "-dict" ) {
|
} elsif ( $parm eq "-dict" ) {
|
||||||
$gInFile = shift(@ARGV);
|
$gInFile = shift(@ARGV);
|
||||||
} else {
|
} else {
|
||||||
|
@ -72,18 +77,32 @@ sub countSpecials($) {
|
||||||
sub readXWDFaces($$$) {
|
sub readXWDFaces($$$) {
|
||||||
my ( $fh, $facRef, $nSpecials ) = @_;
|
my ( $fh, $facRef, $nSpecials ) = @_;
|
||||||
|
|
||||||
my $buf;
|
my ( $buf, $nRead, $nChars, $nBytes );
|
||||||
my $nRead = sysread( $fh, $buf, 1 );
|
$nRead = sysread( $fh, $buf, 1 );
|
||||||
my $nChars = unpack( 'c', $buf );
|
$nBytes = unpack( 'c', $buf );
|
||||||
|
printf STDERR "nBytes of faces: %d\n", $nBytes;
|
||||||
|
$nRead = sysread( $fh, $buf, 1 );
|
||||||
|
$nChars = unpack( 'c', $buf );
|
||||||
|
printf STDERR "nChars of faces: %d\n", $nChars;
|
||||||
|
|
||||||
|
binmode( $fh, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
|
||||||
|
sysread( $fh, $buf, $nChars );
|
||||||
|
length($buf) == $nChars or die "didn't read expected number of bytes\n";
|
||||||
|
binmode( $fh ) or die "binmode failed\n";
|
||||||
|
|
||||||
|
print STDERR "string now: $buf\n";
|
||||||
my @faces;
|
my @faces;
|
||||||
for ( my $i = 0; $i < $nChars; ++$i ) {
|
for ( my $ii = 0; $ii < $nChars; ++$ii ) {
|
||||||
my $nRead = sysread( $fh, $buf, 2 );
|
my $chr = substr( $buf, $ii, 1 );
|
||||||
push( @faces, chr(unpack( "n", $buf ) ) );
|
print STDERR "pushing $chr \n";
|
||||||
|
push( @faces, $chr );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
printf STDERR "at 0x%x after reading faces\n", systell($fh);
|
||||||
|
|
||||||
${$nSpecials} = countSpecials( \@faces );
|
${$nSpecials} = countSpecials( \@faces );
|
||||||
@{$facRef} = @faces;
|
@{$facRef} = @faces;
|
||||||
|
printf STDERR "readXWDFaces=>%d\n", $nChars;
|
||||||
return $nChars;
|
return $nChars;
|
||||||
} # readXWDFaces
|
} # readXWDFaces
|
||||||
|
|
||||||
|
@ -99,6 +118,7 @@ sub skipBitmap($) {
|
||||||
|
|
||||||
sysread( $fh, $buf, $nBytes );
|
sysread( $fh, $buf, $nBytes );
|
||||||
}
|
}
|
||||||
|
printf STDERR "skipBitmap\n";
|
||||||
} # skipBitmap
|
} # skipBitmap
|
||||||
|
|
||||||
sub getSpecials($$$) {
|
sub getSpecials($$$) {
|
||||||
|
@ -138,9 +158,9 @@ sub readNodesToEnd($) {
|
||||||
|
|
||||||
sub nodeSizeFromFlags($) {
|
sub nodeSizeFromFlags($) {
|
||||||
my ( $flags ) = @_;
|
my ( $flags ) = @_;
|
||||||
if ( $flags == 2 ) {
|
if ( $flags == 4 ) {
|
||||||
return 3;
|
return 3;
|
||||||
} elsif ( $flags == 3 ) {
|
} elsif ( $flags == 5 ) {
|
||||||
return 4;
|
return 4;
|
||||||
} else {
|
} else {
|
||||||
die "invalid dict flags $flags";
|
die "invalid dict flags $flags";
|
||||||
|
@ -161,6 +181,7 @@ sub mergeSpecials($$) {
|
||||||
sub prepXWD($$$$) {
|
sub prepXWD($$$$) {
|
||||||
my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
|
my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
|
||||||
|
|
||||||
|
printf STDERR "at 0x%x at start\n", systell($fh);
|
||||||
my $buf;
|
my $buf;
|
||||||
my $nRead = sysread( $fh, $buf, 2 );
|
my $nRead = sysread( $fh, $buf, 2 );
|
||||||
my $flags = unpack( "n", $buf );
|
my $flags = unpack( "n", $buf );
|
||||||
|
@ -170,24 +191,30 @@ sub prepXWD($$$$) {
|
||||||
my $nSpecials;
|
my $nSpecials;
|
||||||
my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
|
my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
|
||||||
|
|
||||||
|
printf STDERR "at 0x%x before header read\n", systell($fh);
|
||||||
# skip xloc header
|
# skip xloc header
|
||||||
$nRead = sysread( $fh, $buf, 2 );
|
$nRead = sysread( $fh, $buf, 2 );
|
||||||
|
|
||||||
# skip values info.
|
# skip values info.
|
||||||
|
printf STDERR "at 0x%x before reading %d values\n", systell($fh), $faceCount;
|
||||||
sysread( $fh, $buf, $faceCount * 2 );
|
sysread( $fh, $buf, $faceCount * 2 );
|
||||||
|
printf STDERR "at 0x%x after values read\n", systell($fh);
|
||||||
|
|
||||||
|
printf STDERR "at 0x%x before specials read\n", systell($fh);
|
||||||
my @specials;
|
my @specials;
|
||||||
getSpecials( $fh, $nSpecials, \@specials );
|
getSpecials( $fh, $nSpecials, \@specials );
|
||||||
mergeSpecials( $facRef, \@specials );
|
mergeSpecials( $facRef, \@specials );
|
||||||
|
printf STDERR "at 0x%x after specials read\n", systell($fh);
|
||||||
|
|
||||||
# printf STDERR "at 0x%x before offset read\n", systell($fh);
|
printf STDERR "at 0x%x before offset read\n", systell($fh);
|
||||||
sysread( $fh, $buf, 4 );
|
sysread( $fh, $buf, 4 );
|
||||||
$$startRef = unpack( 'N', $buf );
|
$$startRef = unpack( 'N', $buf );
|
||||||
# print STDERR "startRef=$$startRef\n";
|
print STDERR "startRef=$$startRef\n";
|
||||||
|
|
||||||
my @nodes = readNodesToEnd( $fh );
|
my @nodes = readNodesToEnd( $fh );
|
||||||
|
|
||||||
@$nodesRef = @nodes;
|
@$nodesRef = @nodes;
|
||||||
|
print STDERR "prepXWD done\n";
|
||||||
} # prepXWD
|
} # prepXWD
|
||||||
|
|
||||||
sub readPDBSpecials($$$$$) {
|
sub readPDBSpecials($$$$$) {
|
||||||
|
@ -342,10 +369,52 @@ sub printNodes($$) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub printStartJson($) {
|
||||||
|
my ( $startIndex ) = @_;
|
||||||
|
printf( " start: 0x%.8x,\n", $startIndex );
|
||||||
|
}
|
||||||
|
|
||||||
|
sub printCharsJson($) {
|
||||||
|
my ( $fr ) = @_;
|
||||||
|
print " chars: [ ";
|
||||||
|
foreach my $char (@$fr) {
|
||||||
|
print "\"$char\", "
|
||||||
|
}
|
||||||
|
print "],\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
sub printNodesJson($) {
|
||||||
|
my ( $nr ) = @_;
|
||||||
|
print " dawg: [\n";
|
||||||
|
|
||||||
|
my $len = @$nr;
|
||||||
|
my $newLine = 1;
|
||||||
|
for ( my $ii = 0; $ii < $len; ++$ii ) {
|
||||||
|
my $node = $$nr[$ii];
|
||||||
|
|
||||||
|
if ( $newLine ) {
|
||||||
|
printf( " /*%.6x*/ ", $ii );
|
||||||
|
$newLine = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf "0x%.8x, ", $node;
|
||||||
|
|
||||||
|
my ( $chrIndex, $nextEdge, $accepting, $lastEdge );
|
||||||
|
parseNode( $node, \$chrIndex, \$nextEdge, \$accepting, \$lastEdge );
|
||||||
|
if ( $lastEdge ) {
|
||||||
|
print "\n";
|
||||||
|
$newLine = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print "\n ],\n"
|
||||||
|
}
|
||||||
|
|
||||||
#################################################################
|
#################################################################
|
||||||
# main
|
# main
|
||||||
#################################################################
|
#################################################################
|
||||||
|
|
||||||
|
binmode( STDERR, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
|
||||||
|
|
||||||
parseARGV();
|
parseARGV();
|
||||||
|
|
||||||
|
@ -364,9 +433,17 @@ if ( $gFileType eq "xwd" ){
|
||||||
close INFILE;
|
close INFILE;
|
||||||
|
|
||||||
die "no nodes!!!" if 0 == @nodes;
|
die "no nodes!!!" if 0 == @nodes;
|
||||||
|
|
||||||
if ( $gDoRaw ) {
|
if ( $gDoRaw ) {
|
||||||
printNodes( \@nodes, \@faces );
|
printNodes( \@nodes, \@faces );
|
||||||
|
} elsif ( $gDoJSON ) {
|
||||||
|
print "dict = {\n";
|
||||||
|
printStartJson( $startIndex );
|
||||||
|
printCharsJson( \@faces );
|
||||||
|
printNodesJson( \@nodes );
|
||||||
|
print "}\n";
|
||||||
} else {
|
} else {
|
||||||
|
binmode( STDOUT, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
|
||||||
printDAWG( [], \@nodes, $startIndex, \@faces );
|
printDAWG( [], \@nodes, $startIndex, \@faces );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -78,7 +78,7 @@ static void (*gReadWordProc)(void) = NULL;
|
||||||
static NodeList gNodes; // final array of nodes
|
static NodeList gNodes; // final array of nodes
|
||||||
static unsigned int gNBytesPerOutfile = 0xFFFFFFFF;
|
static unsigned int gNBytesPerOutfile = 0xFFFFFFFF;
|
||||||
static char* gTableFile = NULL;
|
static char* gTableFile = NULL;
|
||||||
static bool gIsMultibyte = false;
|
static bool gIsMultibyte = true; // always true
|
||||||
static const char* gEncoding = NULL;
|
static const char* gEncoding = NULL;
|
||||||
static char* gOutFileBase = NULL;
|
static char* gOutFileBase = NULL;
|
||||||
static char* gStartNodeOut = NULL;
|
static char* gStartNodeOut = NULL;
|
||||||
|
@ -91,9 +91,9 @@ static const char* gLang = NULL;
|
||||||
static char* gBytesPerNodeFile = NULL; // where to write whether node
|
static char* gBytesPerNodeFile = NULL; // where to write whether node
|
||||||
// size 3 or 4
|
// size 3 or 4
|
||||||
int gWordCount = 0;
|
int gWordCount = 0;
|
||||||
std::map<Letter,wchar_t> gTableHash;
|
std::map<wchar_t,Letter> gTableHash;
|
||||||
int gBlankIndex;
|
int gBlankIndex;
|
||||||
std::vector<char> gRevMap;
|
std::vector<wchar_t> gRevMap;
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
bool gDebug = false;
|
bool gDebug = false;
|
||||||
#endif
|
#endif
|
||||||
|
@ -107,17 +107,19 @@ int gLimHigh = MAX_WORD_LEN;
|
||||||
|
|
||||||
|
|
||||||
// OWL is 1.7M
|
// OWL is 1.7M
|
||||||
#define MAX_POOL_SIZE (10 * 0x100000)
|
#define MAX_POOL_SIZE (10 * 0x100000 * sizeof(wchar_t))
|
||||||
#define ERROR_EXIT(...) error_exit( __LINE__, __VA_ARGS__ );
|
#define ERROR_EXIT(...) error_exit( __LINE__, __VA_ARGS__ );
|
||||||
|
#define VSIZE(a) (sizeof(a)/sizeof(a[0]))
|
||||||
|
|
||||||
static char* parseARGV( int argc, char** argv, const char** inFileName );
|
static char* parseARGV( int argc, char** argv, const char** inFileName );
|
||||||
static void usage( const char* name );
|
static void usage( const char* name );
|
||||||
static void error_exit( int line, const char* fmt, ... );
|
static void error_exit( int line, const char* fmt, ... );
|
||||||
static void makeTableHash( void );
|
static void makeTableHash( void );
|
||||||
|
static void printTableHash( void );
|
||||||
static WordList* parseAndSort( void );
|
static WordList* parseAndSort( void );
|
||||||
static void printWords( WordList* strings );
|
static void printWords( WordList* strings );
|
||||||
static bool firstBeforeSecond( const Letter* lhs, const Letter* rhs );
|
static bool firstBeforeSecond( const Letter* lhs, const Letter* rhs );
|
||||||
static char* tileToAscii( char* out, int outSize, const Letter* in );
|
static wchar_t* tilesToText( wchar_t* out, int outLen, const Letter* in );
|
||||||
static int buildNode( int depth );
|
static int buildNode( int depth );
|
||||||
static void TrieNodeSetIsLastSibling( Node* nodeR, bool isLastSibling );
|
static void TrieNodeSetIsLastSibling( Node* nodeR, bool isLastSibling );
|
||||||
static int addNodes( NodeList& newedgesR );
|
static int addNodes( NodeList& newedgesR );
|
||||||
|
@ -178,6 +180,7 @@ main( int argc, char** argv )
|
||||||
}
|
}
|
||||||
|
|
||||||
makeTableHash();
|
makeTableHash();
|
||||||
|
printTableHash();
|
||||||
|
|
||||||
// Do I need this stupid thing? Better to move the first row to
|
// Do I need this stupid thing? Better to move the first row to
|
||||||
// the front of the array and patch everything else. Or fix the
|
// the front of the array and patch everything else. Or fix the
|
||||||
|
@ -451,9 +454,9 @@ readFromSortedArray( void )
|
||||||
}
|
}
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if ( gDebug ) {
|
if ( gDebug ) {
|
||||||
char buf[T2ABUFLEN(MAX_WORD_LEN)];
|
wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
|
||||||
fprintf( stderr, "%s: got word: %s\n", __func__,
|
fprintf( stderr, "%s: got word: %ls\n", __func__,
|
||||||
tileToAscii( buf, sizeof(buf), word ) );
|
tilesToText( buf, VSIZE(buf), word ) );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -473,13 +476,13 @@ readFromSortedArray( void )
|
||||||
&& !firstBeforeSecond( gCurrentWord, word ) ) {
|
&& !firstBeforeSecond( gCurrentWord, word ) ) {
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if ( gDebug ) {
|
if ( gDebug ) {
|
||||||
char buf1[T2ABUFLEN(MAX_WORD_LEN)];
|
wchar_t buf1[T2ABUFLEN(MAX_WORD_LEN)];
|
||||||
char buf2[T2ABUFLEN(MAX_WORD_LEN)];
|
wchar_t buf2[T2ABUFLEN(MAX_WORD_LEN)];
|
||||||
fprintf( stderr,
|
fprintf( stderr,
|
||||||
"%s: words %s and %s are the same or out of order\n",
|
"%s: words %ls and %ls are the same or out of order\n",
|
||||||
__func__,
|
__func__,
|
||||||
tileToAscii( buf1, sizeof(buf1), gCurrentWord ),
|
tilesToText( buf1, VSIZE(buf1), gCurrentWord ),
|
||||||
tileToAscii( buf2, sizeof(buf2), word ) );
|
tilesToText( buf2, VSIZE(buf2), word ) );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
continue;
|
continue;
|
||||||
|
@ -492,9 +495,9 @@ readFromSortedArray( void )
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if ( gDebug ) {
|
if ( gDebug ) {
|
||||||
char buf[T2ABUFLEN(MAX_WORD_LEN)];
|
wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
|
||||||
fprintf( stderr, "gCurrentWord now %s\n",
|
fprintf( stderr, "gCurrentWord now %ls\n",
|
||||||
tileToAscii( buf, sizeof(buf), gCurrentWord) );
|
tilesToText( buf, VSIZE(buf), gCurrentWord) );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
} // readFromSortedArray
|
} // readFromSortedArray
|
||||||
|
@ -516,6 +519,9 @@ getWideChar( FILE* file )
|
||||||
assert( 0 == ii );
|
assert( 0 == ii );
|
||||||
dest = byt;
|
dest = byt;
|
||||||
break;
|
break;
|
||||||
|
} else if ( byt < ' ' && 0 == ii ) {
|
||||||
|
dest = byt;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert( ii < 4 );
|
assert( ii < 4 );
|
||||||
|
@ -533,7 +539,7 @@ getWideChar( FILE* file )
|
||||||
} // getWideChar
|
} // getWideChar
|
||||||
|
|
||||||
static Letter*
|
static Letter*
|
||||||
readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
|
readOneWord( Letter* wordBuf, const int bufLen, int* lenp, bool* gotEOF )
|
||||||
{
|
{
|
||||||
Letter* result = NULL;
|
Letter* result = NULL;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
@ -545,7 +551,7 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
|
||||||
// return it. If no, start over ONLY IF the terminator was not
|
// return it. If no, start over ONLY IF the terminator was not
|
||||||
// EOF.
|
// EOF.
|
||||||
for ( ; ; ) {
|
for ( ; ; ) {
|
||||||
wchar_t byt = gIsMultibyte? getWideChar( gInFile ) : getc( gInFile );
|
wchar_t byt = getWideChar( gInFile );
|
||||||
|
|
||||||
// EOF is special: we don't try for another word even if
|
// EOF is special: we don't try for another word even if
|
||||||
// dropWord is true; we must leave now.
|
// dropWord is true; we must leave now.
|
||||||
|
@ -557,6 +563,13 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
|
||||||
if ( !dropWord && (count >= gLimLow) && (count <= gLimHigh) ) {
|
if ( !dropWord && (count >= gLimLow) && (count <= gLimHigh) ) {
|
||||||
assert( count < bufLen );
|
assert( count < bufLen );
|
||||||
wordBuf[count] = '\0';
|
wordBuf[count] = '\0';
|
||||||
|
#ifdef DEBUG
|
||||||
|
if ( gDebug ) {
|
||||||
|
wchar_t buf[T2ABUFLEN(count)];
|
||||||
|
fprintf( stderr, "%s: adding word: %ls\n",
|
||||||
|
__func__, tilesToText( buf, VSIZE(buf), wordBuf ) );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
result = wordBuf;
|
result = wordBuf;
|
||||||
*lenp = count;
|
*lenp = count;
|
||||||
++gWordCount;
|
++gWordCount;
|
||||||
|
@ -567,11 +580,12 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
|
||||||
}
|
}
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if ( gDebug ) {
|
if ( gDebug ) {
|
||||||
char buf[T2ABUFLEN(count)];
|
wchar_t buf[T2ABUFLEN(count)];
|
||||||
wordBuf[count] = '\0';
|
wordBuf[count] = '\0';
|
||||||
fprintf( stderr, "%s: dropping word (len %d>=%d): %s\n",
|
fprintf( stderr, "%s: dropping word (len %d >%d or <%d or "
|
||||||
__func__, count, gLimHigh,
|
"dropWord:%d): %ls\n", __func__, count, gLimHigh,
|
||||||
tileToAscii( buf, sizeof(buf), wordBuf ) );
|
gLimLow, (int)dropWord,
|
||||||
|
tilesToText( buf, VSIZE(buf), wordBuf ) );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
count = 0; // we'll start over
|
count = 0; // we'll start over
|
||||||
|
@ -579,43 +593,43 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
|
||||||
|
|
||||||
} else if ( count >= bufLen ) {
|
} else if ( count >= bufLen ) {
|
||||||
// Just drop it...
|
// Just drop it...
|
||||||
|
assert(0); // Fix this -- but need to warn when out of
|
||||||
|
// memory!!!
|
||||||
dropWord = true;
|
dropWord = true;
|
||||||
|
|
||||||
// Don't call into the hashtable twice here!!
|
// Don't call into the hashtable twice here!!
|
||||||
} else if ( gTableHash.find(byt) != gTableHash.end() ) {
|
} else {
|
||||||
assert( count < bufLen );
|
std::map<wchar_t,Letter>::iterator iter = gTableHash.find(byt);
|
||||||
wordBuf[count++] = gTableHash[byt];
|
if ( iter != gTableHash.end() ) {
|
||||||
if ( count >= bufLen ) {
|
assert( count < bufLen );
|
||||||
dropWord = true;
|
wordBuf[count++] = iter->second;
|
||||||
}
|
if ( count >= bufLen ) {
|
||||||
} else if ( gKillIfMissing || !dropWord ) {
|
dropWord = true;
|
||||||
char buf[T2ABUFLEN(count)];
|
|
||||||
wordBuf[count] = '\0';
|
|
||||||
|
|
||||||
tileToAscii( buf, sizeof(buf), wordBuf );
|
|
||||||
|
|
||||||
if ( gKillIfMissing ) {
|
|
||||||
ERROR_EXIT( "chr %lc (%d/0x%x) not in map file %s\n"
|
|
||||||
"last word was %s\n",
|
|
||||||
byt, (int)byt, (int)byt, gTableFile, buf );
|
|
||||||
} else if ( !dropWord ) {
|
|
||||||
#ifdef DEBUG
|
|
||||||
if ( gDebug ) {
|
|
||||||
fprintf( stderr, "%s: chr %c (%d) not in map file %s\n"
|
|
||||||
"dropping partial word %s\n", __func__,
|
|
||||||
(char)byt, (int)byt, gTableFile, buf );
|
|
||||||
}
|
}
|
||||||
|
} else if ( gKillIfMissing || !dropWord ) {
|
||||||
|
wchar_t buf[T2ABUFLEN(count)];
|
||||||
|
wordBuf[count] = '\0';
|
||||||
|
|
||||||
|
tilesToText( buf, VSIZE(buf), wordBuf );
|
||||||
|
|
||||||
|
if ( gKillIfMissing ) {
|
||||||
|
ERROR_EXIT( "chr %lc (%d/0x%x) not in map file %s\n"
|
||||||
|
"last word was %ls\n",
|
||||||
|
byt, (int)byt, (int)byt, gTableFile, buf );
|
||||||
|
} else if ( !dropWord ) {
|
||||||
|
#ifdef DEBUG
|
||||||
|
if ( gDebug ) {
|
||||||
|
fprintf( stderr, "%s: chr %lc (%d) not in map file %s\n"
|
||||||
|
"dropping partial word %ls\n", __func__,
|
||||||
|
byt, (int)byt, gTableFile, buf );
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
dropWord = true;
|
dropWord = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} // for
|
||||||
|
|
||||||
// if ( NULL != result ) {
|
|
||||||
// char buf[T2ABUFLEN(MAX_WORD_LEN)];
|
|
||||||
// fprintf( stderr, "%s returning %s\n", __func__,
|
|
||||||
// tileToAscii( buf, sizeof(buf), result ) );
|
|
||||||
// }
|
|
||||||
return result;
|
return result;
|
||||||
} // readOneWord
|
} // readOneWord
|
||||||
|
|
||||||
|
@ -635,7 +649,7 @@ readFromFile( void )
|
||||||
// during the sort. This seems easier.
|
// during the sort. This seems easier.
|
||||||
for ( ; ; ) {
|
for ( ; ; ) {
|
||||||
if ( !gDone ) {
|
if ( !gDone ) {
|
||||||
word = readOneWord( wordBuf, sizeof(wordBuf), &len, &s_eof );
|
word = readOneWord( wordBuf, VSIZE(wordBuf), &len, &s_eof );
|
||||||
gDone = NULL == word;
|
gDone = NULL == word;
|
||||||
}
|
}
|
||||||
if ( gDone ) {
|
if ( gDone ) {
|
||||||
|
@ -658,13 +672,13 @@ readFromFile( void )
|
||||||
&& !firstBeforeSecond( gCurrentWord, word ) ) {
|
&& !firstBeforeSecond( gCurrentWord, word ) ) {
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if ( gDebug ) {
|
if ( gDebug ) {
|
||||||
char buf1[T2ABUFLEN(MAX_WORD_LEN)];
|
wchar_t buf1[T2ABUFLEN(MAX_WORD_LEN)];
|
||||||
char buf2[T2ABUFLEN(MAX_WORD_LEN)];
|
wchar_t buf2[T2ABUFLEN(MAX_WORD_LEN)];
|
||||||
fprintf( stderr,
|
fprintf( stderr,
|
||||||
"%s: words %s and %s are the smae or out of order\n",
|
"%s: words %ls and %ls are the smae or out of order\n",
|
||||||
__func__,
|
__func__,
|
||||||
tileToAscii( buf1, sizeof(buf1), gCurrentWord ),
|
tilesToText( buf1, VSIZE(buf1), gCurrentWord ),
|
||||||
tileToAscii( buf2, sizeof(buf2), word ) );
|
tilesToText( buf2, VSIZE(buf2), word ) );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
continue;
|
continue;
|
||||||
|
@ -676,9 +690,9 @@ readFromFile( void )
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
if ( gDebug ) {
|
if ( gDebug ) {
|
||||||
char buf[T2ABUFLEN(MAX_WORD_LEN)];
|
wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
|
||||||
fprintf( stderr, "gCurrentWord now %s\n",
|
fprintf( stderr, "gCurrentWord now %ls\n",
|
||||||
tileToAscii( buf, sizeof(buf), gCurrentWord) );
|
tilesToText( buf, VSIZE(buf), gCurrentWord) );
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
} // readFromFile
|
} // readFromFile
|
||||||
|
@ -690,14 +704,15 @@ firstBeforeSecond( const Letter* lhs, const Letter* rhs )
|
||||||
return gt;
|
return gt;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char*
|
static wchar_t*
|
||||||
tileToAscii( char* out, int outSize, const Letter* in )
|
tilesToText( wchar_t* out, int outSize, const Letter* in )
|
||||||
{
|
{
|
||||||
char tiles[outSize];
|
wchar_t tiles[outSize];
|
||||||
int tilesLen = 1;
|
wchar_t* orig = out;
|
||||||
tiles[0] = '[';
|
int tilesLen = 0;
|
||||||
|
|
||||||
|
tiles[tilesLen++] = L'[';
|
||||||
|
|
||||||
char* orig = out;
|
|
||||||
for ( ; ; ) {
|
for ( ; ; ) {
|
||||||
Letter ch = *in++;
|
Letter ch = *in++;
|
||||||
if ( '\0' == ch ) {
|
if ( '\0' == ch ) {
|
||||||
|
@ -705,14 +720,15 @@ tileToAscii( char* out, int outSize, const Letter* in )
|
||||||
}
|
}
|
||||||
assert( ch < gRevMap.size() );
|
assert( ch < gRevMap.size() );
|
||||||
*out++ = gRevMap[ch];
|
*out++ = gRevMap[ch];
|
||||||
tilesLen += sprintf( &tiles[tilesLen], "%d,", ch );
|
|
||||||
|
tilesLen += swprintf( &tiles[tilesLen], outSize-tilesLen, L"%d,", ch );
|
||||||
assert( (out - orig) < outSize );
|
assert( (out - orig) < outSize );
|
||||||
}
|
}
|
||||||
|
|
||||||
assert( tilesLen+1 < outSize );
|
assert( tilesLen+1 < outSize );
|
||||||
tiles[tilesLen] = ']';
|
tiles[tilesLen] = L']';
|
||||||
tiles[tilesLen+1] = '\0';
|
tiles[tilesLen+1] = L'\0';
|
||||||
strcpy( out, tiles );
|
wcscpy( out, tiles );
|
||||||
|
|
||||||
return orig;
|
return orig;
|
||||||
}
|
}
|
||||||
|
@ -777,9 +793,9 @@ printWords( WordList* strings )
|
||||||
{
|
{
|
||||||
std::vector<Letter*>::iterator iter = strings->begin();
|
std::vector<Letter*>::iterator iter = strings->begin();
|
||||||
while ( iter != strings->end() ) {
|
while ( iter != strings->end() ) {
|
||||||
char buf[T2ABUFLEN(MAX_WORD_LEN)];
|
wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
|
||||||
tileToAscii( buf, sizeof(buf), *iter );
|
tilesToText( buf, VSIZE(buf), *iter );
|
||||||
fprintf( stderr, "%s\n", buf );
|
fprintf( stderr, "%ls\n", buf );
|
||||||
++iter;
|
++iter;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -906,18 +922,12 @@ makeTableHash( void )
|
||||||
gRevMap.push_back(0);
|
gRevMap.push_back(0);
|
||||||
|
|
||||||
for ( ii = 0; ; ++ii ) {
|
for ( ii = 0; ; ++ii ) {
|
||||||
int ch = getc(TABLEFILE);
|
wchar_t ch = getWideChar( TABLEFILE );
|
||||||
if ( ch == EOF ) {
|
if ( EOF == ch ) {
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( gUseUnicode ) { // skip the first byte each time: tmp HACK!!!
|
|
||||||
ch = getc(TABLEFILE);
|
|
||||||
}
|
|
||||||
if ( ch == EOF ) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprintf( stderr, "adding %lc/%x\n", ch, ch );
|
||||||
gRevMap.push_back(ch);
|
gRevMap.push_back(ch);
|
||||||
|
|
||||||
if ( ch == 0 ) { // blank
|
if ( ch == 0 ) { // blank
|
||||||
|
@ -940,6 +950,26 @@ makeTableHash( void )
|
||||||
fclose( TABLEFILE );
|
fclose( TABLEFILE );
|
||||||
} // makeTableHash
|
} // makeTableHash
|
||||||
|
|
||||||
|
static void
|
||||||
|
printTableHash( void )
|
||||||
|
{
|
||||||
|
if ( gDebug ) {
|
||||||
|
std::vector<wchar_t>::iterator iter = gRevMap.begin();
|
||||||
|
int count = 0; // 0th entry is 0
|
||||||
|
while ( iter != gRevMap.end() ) {
|
||||||
|
wchar_t ch = *iter;
|
||||||
|
if ( 0 != ch ) {
|
||||||
|
fprintf( stderr, "%s: gRevMap[%d]: %lc\n", __func__, count, ch );
|
||||||
|
fprintf( stderr, "%s: gTableHash[%lc]: %d\n", __func__, ch,
|
||||||
|
gTableHash[ch] );
|
||||||
|
assert( gTableHash[ch] == count );
|
||||||
|
}
|
||||||
|
++iter;
|
||||||
|
++count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// emitNodes. "input" is $gNodes. From it we write up to
|
// emitNodes. "input" is $gNodes. From it we write up to
|
||||||
// $nBytesPerOutfile to files named $outFileBase0..n, mapping the
|
// $nBytesPerOutfile to files named $outFileBase0..n, mapping the
|
||||||
// letter field down to 5 bits with a hash built from $tableFile. If
|
// letter field down to 5 bits with a hash built from $tableFile. If
|
||||||
|
@ -1065,6 +1095,9 @@ outputNode( Node node, int nBytes, FILE* outfile )
|
||||||
unsigned int fco = TrieNodeGetFirstChildOffset(node);
|
unsigned int fco = TrieNodeGetFirstChildOffset(node);
|
||||||
unsigned int fourthByte = 0;
|
unsigned int fourthByte = 0;
|
||||||
|
|
||||||
|
assert( ((3 == nBytes) && (fco < (1<<17)))
|
||||||
|
|| ((4 == nBytes) && (fco < (1<<24))) );
|
||||||
|
|
||||||
if ( nBytes == 4 ) {
|
if ( nBytes == 4 ) {
|
||||||
fourthByte = fco >> 16;
|
fourthByte = fco >> 16;
|
||||||
if ( fourthByte > 0xFF ) {
|
if ( fourthByte > 0xFF ) {
|
||||||
|
@ -1085,7 +1118,7 @@ outputNode( Node node, int nBytes, FILE* outfile )
|
||||||
// | | |
|
// | | |
|
||||||
// accepting bit ---+ | |
|
// accepting bit ---+ | |
|
||||||
// last edge bit ------+ |
|
// last edge bit ------+ |
|
||||||
// ---- last bit (17th on next node addr)---------+
|
// ---- last bit (17th of next node addr)---------+
|
||||||
|
|
||||||
// The four-byte format adds a byte at the right end for
|
// The four-byte format adds a byte at the right end for
|
||||||
// addressing, but removes the extra bit (5) in order to let the
|
// addressing, but removes the extra bit (5) in order to let the
|
||||||
|
@ -1247,13 +1280,13 @@ parseARGV( int argc, char** argv, const char** inFileName )
|
||||||
|
|
||||||
if ( !!enc ) {
|
if ( !!enc ) {
|
||||||
if ( !strcasecmp( enc, "UTF-8" ) ) {
|
if ( !strcasecmp( enc, "UTF-8" ) ) {
|
||||||
gIsMultibyte = true;
|
// gIsMultibyte = true;
|
||||||
} else if ( !strcasecmp( enc, "iso-8859-1" ) ) {
|
} else if ( !strcasecmp( enc, "iso-8859-1" ) ) {
|
||||||
gIsMultibyte = false;
|
// gIsMultibyte = false;
|
||||||
} else if ( !strcasecmp( enc, "iso-latin-1" ) ) {
|
} else if ( !strcasecmp( enc, "iso-latin-1" ) ) {
|
||||||
gIsMultibyte = false;
|
// gIsMultibyte = false;
|
||||||
} else if ( !strcasecmp( enc, "ISO-8859-2" ) ) {
|
} else if ( !strcasecmp( enc, "ISO-8859-2" ) ) {
|
||||||
gIsMultibyte = false;
|
// gIsMultibyte = false;
|
||||||
} else {
|
} else {
|
||||||
ERROR_EXIT( "%s: unknown encoding %s", __func__, enc );
|
ERROR_EXIT( "%s: unknown encoding %s", __func__, enc );
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,9 +15,15 @@
|
||||||
use strict;
|
use strict;
|
||||||
|
|
||||||
my @wordSizeCounts;
|
my @wordSizeCounts;
|
||||||
my @letterCounts;
|
my %letterCounts;
|
||||||
my $wordCount;
|
my $wordCount;
|
||||||
my $letterCount;
|
my $letterCount;
|
||||||
|
my $enc = "utf8"; # this could be a cmdline arg....
|
||||||
|
|
||||||
|
if ( $enc ) {
|
||||||
|
binmode( STDOUT, ":encoding($enc)" ) ;
|
||||||
|
binmode( STDIN, ":encoding($enc)" ) ;
|
||||||
|
}
|
||||||
|
|
||||||
while (<>) {
|
while (<>) {
|
||||||
|
|
||||||
|
@ -27,10 +33,10 @@ while (<>) {
|
||||||
++$wordCount;
|
++$wordCount;
|
||||||
|
|
||||||
foreach my $letter (split( / */ ) ) {
|
foreach my $letter (split( / */ ) ) {
|
||||||
my $i = ord($letter);
|
my $ii = ord($letter);
|
||||||
# special-case the bogus chars we add for "specials"
|
# special-case the bogus chars we add for "specials"
|
||||||
die "$0: this is a letter?: $i" if $i <= 32 && $i >= 4 && $i != 0;
|
die "$0: this is a letter?: $ii" if $ii <= 32 && $ii >= 4 && $ii != 0;
|
||||||
++$letterCounts[$i];
|
++$letterCounts{$letter};
|
||||||
++$letterCount;
|
++$letterCount;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -54,14 +60,12 @@ for ( my $i = 1 ; $i <= 99; ++$i ) {
|
||||||
print "\n\n**** Letter counts ****\n";
|
print "\n\n**** Letter counts ****\n";
|
||||||
print " ASCII ORD HEX PCT (of $letterCount)\n";
|
print " ASCII ORD HEX PCT (of $letterCount)\n";
|
||||||
my $lineNo = 1;
|
my $lineNo = 1;
|
||||||
for ( my $i = 0; $i < 255; ++$i ) {
|
foreach my $key (sort keys %letterCounts) {
|
||||||
my $count = $letterCounts[$i];
|
my $count = $letterCounts{$key};
|
||||||
if ( $count > 0 ) {
|
my $pct = (100.00 * $count) / $letterCount;
|
||||||
my $pct = (100.00 * $count) / $letterCount;
|
printf( "%2d: %3s %3d %x %5.2f (%d)\n",
|
||||||
printf( "%2d: %3s %3d %x %5.2f (%d)\n",
|
$lineNo, $key, ord($key), ord($key), $pct, $count );
|
||||||
$lineNo, chr($i), $i, $i, $pct, $count );
|
++$lineNo;
|
||||||
++$lineNo;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
print "\n";
|
print "\n";
|
||||||
|
|
|
@ -23,6 +23,7 @@ use xloc;
|
||||||
|
|
||||||
my $unicode = -1;
|
my $unicode = -1;
|
||||||
my $doval = 0;
|
my $doval = 0;
|
||||||
|
my $dosize = 0;
|
||||||
my $enc;
|
my $enc;
|
||||||
my $outfile;
|
my $outfile;
|
||||||
|
|
||||||
|
@ -37,6 +38,8 @@ while ( $arg = $ARGV[0] ) {
|
||||||
$unicode = 0;
|
$unicode = 0;
|
||||||
} elsif ( $arg eq "-v" ) {
|
} elsif ( $arg eq "-v" ) {
|
||||||
$doval = 1;
|
$doval = 1;
|
||||||
|
} elsif ( $arg eq "-s" ) {
|
||||||
|
$dosize = 1;
|
||||||
} elsif ( $arg eq '-out' ) {
|
} elsif ( $arg eq '-out' ) {
|
||||||
$outfile = $ARGV[1];
|
$outfile = $ARGV[1];
|
||||||
shift @ARGV;
|
shift @ARGV;
|
||||||
|
@ -52,12 +55,20 @@ die "info file $infoFile not found\n" if ! -s $infoFile;
|
||||||
|
|
||||||
my $xlocToken = xloc::ParseTileInfo($infoFile, $enc);
|
my $xlocToken = xloc::ParseTileInfo($infoFile, $enc);
|
||||||
|
|
||||||
open OUTFILE, "> $outfile";
|
if ( $enc ) {
|
||||||
|
open OUTFILE, ">:encoding($enc)", "$outfile"
|
||||||
|
or die "couldn't open $outfile";
|
||||||
|
} else {
|
||||||
|
open OUTFILE, ">$outfile" or die "couldn't open $outfile";
|
||||||
|
}
|
||||||
# For f*cking windoze linefeeds
|
# For f*cking windoze linefeeds
|
||||||
binmode( OUTFILE );
|
# binmode( OUTFILE );
|
||||||
|
|
||||||
if ( $unicode ne -1 ) {
|
if ( $unicode ne -1 ) {
|
||||||
xloc::WriteMapFile( $xlocToken, $unicode, \*OUTFILE );
|
xloc::WriteMapFile( $xlocToken, $unicode, \*OUTFILE );
|
||||||
|
} elsif ( $dosize ) {
|
||||||
|
my $count = xloc::GetNTiles( $xlocToken );
|
||||||
|
print OUTFILE pack("c", $count );
|
||||||
} elsif ( $doval ) {
|
} elsif ( $doval ) {
|
||||||
xloc::WriteValuesFile( $xlocToken, \*OUTFILE );
|
xloc::WriteValuesFile( $xlocToken, \*OUTFILE );
|
||||||
}
|
}
|
||||||
|
|
|
@ -103,13 +103,6 @@ sub GetValue($$) {
|
||||||
sub WriteMapFile($$$) {
|
sub WriteMapFile($$$) {
|
||||||
my ( $hashR, $unicode, $fhr ) = @_;
|
my ( $hashR, $unicode, $fhr ) = @_;
|
||||||
|
|
||||||
my $packStr;
|
|
||||||
if ( $unicode ) {
|
|
||||||
$packStr = "n";
|
|
||||||
} else {
|
|
||||||
$packStr = "C";
|
|
||||||
}
|
|
||||||
|
|
||||||
my $count = GetNTiles($hashR);
|
my $count = GetNTiles($hashR);
|
||||||
my $specialCount = 0;
|
my $specialCount = 0;
|
||||||
for ( my $i = 0; $i < $count; ++$i ) {
|
for ( my $i = 0; $i < $count; ++$i ) {
|
||||||
|
@ -117,11 +110,12 @@ sub WriteMapFile($$$) {
|
||||||
my $str = ${$tileR}[2];
|
my $str = ${$tileR}[2];
|
||||||
|
|
||||||
if ( $str =~ /\'(.)\'/ ) {
|
if ( $str =~ /\'(.)\'/ ) {
|
||||||
print $fhr pack($packStr, ord($1) );
|
print $fhr pack( "U", ord($1) );
|
||||||
|
# printf STDERR "ord: %x ($1)\n", ord($1);
|
||||||
} elsif ( $str =~ /\"(.+)\"/ ) {
|
} elsif ( $str =~ /\"(.+)\"/ ) {
|
||||||
print $fhr pack($packStr, $specialCount++ );
|
print $fhr pack( "c", $specialCount++ );
|
||||||
} elsif ( $str =~ /(\d+)/ ) {
|
} elsif ( $str =~ /(\d+)/ ) {
|
||||||
print $fhr pack( $packStr, $1 );
|
print $fhr pack( "n", $1 );
|
||||||
} else {
|
} else {
|
||||||
die "WriteMapFile: unrecognized face format $str, elem $i";
|
die "WriteMapFile: unrecognized face format $str, elem $i";
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue