First changes for building dictionaries (with tile faces in UTF-8).

Test cases are English and Polish, and with these changes English
seems to build ok and decode via dawg2dict.pl while Polish does too
but a lot of words are missing.  Building using Makefiles only so far,
not BYOD.
This commit is contained in:
ehouse 2009-03-18 04:16:26 +00:00
parent 31ec0f9235
commit eb3d97a334
10 changed files with 137 additions and 100 deletions

View file

@ -33,7 +33,7 @@ endif
endif endif
LANG_SPECIAL_INFO = \ LANG_SPECIAL_INFO = \
"L-L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \ "L·L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
"NY" $(PBITMS)/large_ny.pbitm $(PBITMS)/small_ny.pbitm \ "NY" $(PBITMS)/large_ny.pbitm $(PBITMS)/small_ny.pbitm \
"QU" $(PBITMS)/large_qu.pbitm $(PBITMS)/small_qu.pbitm \ "QU" $(PBITMS)/large_qu.pbitm $(PBITMS)/small_qu.pbitm \

View file

@ -75,7 +75,7 @@ XLOC_HEADER:0x8C00
8 1 'I' 8 1 'I'
1 8 'J' 1 8 'J'
4 1 'L' 4 1 'L'
1 10 {"L-L"} 1 10 {"L·L"}
3 2 'M' 3 2 'M'
6 1 'N' 6 1 'N'
1 10 {"NY"} 1 10 {"NY"}

View file

@ -15,7 +15,7 @@
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
TARGET_TYPE ?= FRANK TARGET_TYPE ?= WINCE
include ../Makefile.langcommon include ../Makefile.langcommon

View file

@ -204,16 +204,6 @@ endif
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES) frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@ $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
# a binary file (one byte) giving the number of tiles in the dict
charcount.bin: table.bin
ifdef NEWDAWG
siz=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"c\",$$siz/2)" > $@
else
siz=$$(wc -c $< | sed -e 's/$<//'); \
perl -e "print pack(\"c\",$$siz)" > $@
endif
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin
cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \ cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
frankspecials.bin $(XWLANG)StartLoc.bin \ frankspecials.bin $(XWLANG)StartLoc.bin \
@ -233,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
ifdef NEWDAWG ifdef NEWDAWG
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \ if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x0002)" > $@; echo "flags=2"; \ then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \ elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x0003)" > $@; echo "flags=3"; \ then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \
elif true; \ elif true; \
then echo "Unexpected node size"; exit 1; \ then echo "Unexpected node size"; exit 1; \
fi fi
@ -272,8 +262,19 @@ else
perl -I../ ../xloc.pl -t -out $@ perl -I../ ../xloc.pl -t -out $@
endif endif
values.bin: ../xloc.pl values.bin: ../xloc.pl
perl -I../ ../xloc.pl -v -out $@ $(ENCP) perl -I../ ../xloc.pl -v -out $@
# a binary file, two bytes, one giving the size of tiles data and the
# other the number of tiles in the dict. Tiles data is utf-8 and so
# number is not derivable from size.
charcount.bin: table.bin ../xloc.pl
SIZ=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"c\",$$SIZ)" > $@
TMP=/tmp/tmp$$$$; \
perl -I../ ../xloc.pl -s -out $$TMP; \
cat $$TMP >> $@; \
rm -f $$TMP
%.dict: %.dict.gz %.dict: %.dict.gz
zcat $< > $@ zcat $< > $@

View file

@ -1,4 +1,4 @@
# -*- coding: iso-8859-2; mode: Makefile; -*- # -*- mode: Makefile; -*-
# Copyright 2002 - 2009 by Eric House (xwords@eehouse.org). All # Copyright 2002 - 2009 by Eric House (xwords@eehouse.org). All
# rights reserved. # rights reserved.
# #
@ -16,9 +16,9 @@
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
XWLANG=Polish XWLANG = Polish
LANGCODE=pl_PL LANGCODE = pl_PL
ENC = ISO-8859-2 ENC = UTF-8
# DICT2DAWGARGS = -lang $(LANGCODE) # DICT2DAWGARGS = -lang $(LANGCODE)
# DICT2DAWGARGS = -debug # DICT2DAWGARGS = -debug
@ -29,12 +29,12 @@ include ../Makefile.2to8
include ../Makefile.langcommon include ../Makefile.langcommon
SOURCEDICT ?= $(XWDICTPATH)/Polish/iso-8859-2/slowa.txt.gz SOURCEDICT ?= slowa.txt.gz
$(XWLANG)Main.dict.gz: $(SOURCEDICT) $(XWLANG)Main.dict.gz: $(SOURCEDICT)
zcat $< | tr -d '\r' \ zcat $< | tr -d '\r' \
| LANG=$(LANGCODE):$(ENC) tr [a帳c熛e璒ghijkl軛n隳鏕rs鈣uwyz撚] [A、C鑪E甪GHIJKLΚN埆紞RS吋UWYZ炫] \ | tr [aąbcćdeęfghijklłmnńoóprsśtuwyzźż] [AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ] \
| LANG=$(LANGCODE):$(ENC) grep '^[A、C鑪E甪GHIJKLΚN埆紞RS吋UWYZ炫]*$$' \ | grep '^[AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ]*$$' \
| gzip > $@ | gzip > $@
# Everything but creating of the Main.dict file is inherited from the # Everything but creating of the Main.dict file is inherited from the

View file

@ -1,4 +1,4 @@
# -*- coding: iso-8859-2; mode: conf; -*- # -*- mode: conf; -*-
# Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights # Copyright 2002-2009 by Eric House (xwords@eehouse.org). All rights
# reserved. # reserved.
# #
@ -17,12 +17,12 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
LANGCODE:pl_PL LANGCODE:pl_PL
CHARSET:iso-8859-2 CHARSET:utf-8
# deal with DOS files # deal with DOS files
LANGFILTER: tr -d '\r' LANGFILTER: tr -d '\r'
LANGFILTER: | tr [a-pr-uwyz±æê³ñ󶼿] [A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯] LANGFILTER: | tr [a-pr-uwyząćęłńóśźż] [A-PR-UWYZĄĆĘŁŃÓŚŹŻ]
LANGFILTER: | grep '^[A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]*$' LANGFILTER: | grep '^[A-PR-UWYZĄĆĘŁŃÓŚŹŻ]*$'
LANGFILTER: | tr '\n' '\000' LANGFILTER: | tr '\n' '\000'
D2DARGS: -r -term 0 D2DARGS: -r -term 0
@ -42,8 +42,8 @@ LANGINFO: this working.</p>
LANGINFO: <p>Note that the blank is the last tile here, while with all LANGINFO: <p>Note that the blank is the last tile here, while with all
LANGINFO: other languages it&apos;s the first.</p> LANGINFO: other languages it&apos;s the first.</p>
LANGINFO: <p>Also, please note that we currently require the files you # LANGINFO: <p>Also, please note that we currently require the files you
LANGINFO: upload to use the iso-8859-2 character encoding.</p> # LANGINFO: upload to use the iso-8859-2 character encoding.</p>
# High bit means "official". Next 7 bits are an enum where # High bit means "official". Next 7 bits are an enum where
# Polish==8. Low byte is padding # Polish==8. Low byte is padding
@ -51,13 +51,13 @@ XLOC_HEADER:0x8800
<BEGIN_TILES> <BEGIN_TILES>
9 1 'A' 9 1 'A'
1 5 161 # '¡' 1 5 '
2 3 'B' 2 3 'B'
3 2 'C' 3 2 'C'
1 6 198 # 'Æ' 1 6 '
3 2 'D' 3 2 'D'
7 1 'E' 7 1 'E'
1 5 202 # 'Ê' 1 5 '
1 5 'F' 1 5 'F'
2 3 'G' 2 3 'G'
2 3 'H' 2 3 'H'
@ -65,23 +65,23 @@ XLOC_HEADER:0x8800
2 3 'J' 2 3 'J'
3 3 'K' 3 3 'K'
3 2 'L' 3 2 'L'
2 3 163 # '£' 2 3 '
3 2 'M' 3 2 'M'
5 1 'N' 5 1 'N'
1 7 209 # 'Ñ' 1 7 '
6 1 'O' 6 1 'O'
1 5 211 # 'Ó' 1 5 'Ó'
3 2 'P' 3 2 'P'
4 1 'R' 4 1 'R'
4 1 'S' 4 1 'S'
1 5 166 # '¦' 1 5 '
3 2 'T' 3 2 'T'
2 3 'U' 2 3 'U'
4 1 'W' 4 1 'W'
4 2 'Y' 4 2 'Y'
5 1 'Z' 5 1 'Z'
1 9 172 # '¬' 1 9 '
1 5 175 # '¯' 1 5 '
# the blank *must* be last here!!! # the blank *must* be last here!!!
2 0 {"_"} 2 0 {"_"}

View file

@ -22,12 +22,17 @@
use strict; use strict;
use Fcntl; use Fcntl;
use Encode 'from_to';
use Encode;
my $gInFile; my $gInFile;
my $gDoRaw = 0; my $gDoRaw = 0;
my $gFileType; my $gFileType;
my $gNodeSize; my $gNodeSize;
use Fcntl 'SEEK_CUR';
sub systell { sysseek($_[0], 0, SEEK_CUR) }
sub usage() { sub usage() {
print STDERR "USAGE: $0 " print STDERR "USAGE: $0 "
. "[-raw] " . "[-raw] "
@ -69,18 +74,32 @@ sub countSpecials($) {
sub readXWDFaces($$$) { sub readXWDFaces($$$) {
my ( $fh, $facRef, $nSpecials ) = @_; my ( $fh, $facRef, $nSpecials ) = @_;
my $buf; my ( $buf, $nRead, $nChars, $nBytes );
my $nRead = sysread( $fh, $buf, 1 ); $nRead = sysread( $fh, $buf, 1 );
my $nChars = unpack( 'c', $buf ); $nBytes = unpack( 'c', $buf );
printf STDERR "nBytes of faces: %d\n", $nBytes;
$nRead = sysread( $fh, $buf, 1 );
$nChars = unpack( 'c', $buf );
printf STDERR "nChars of faces: %d\n", $nChars;
binmode( $fh, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
sysread( $fh, $buf, $nChars );
length($buf) == $nChars or die "didn't read expected number of bytes\n";
binmode( $fh ) or die "binmode failed\n";
print STDERR "string now: $buf\n";
my @faces; my @faces;
for ( my $i = 0; $i < $nChars; ++$i ) { for ( my $ii = 0; $ii < $nChars; ++$ii ) {
my $nRead = sysread( $fh, $buf, 2 ); my $chr = substr( $buf, $ii, 1 );
push( @faces, chr(unpack( "n", $buf ) ) ); print STDERR "pushing $chr \n";
push( @faces, $chr );
} }
printf STDERR "at 0x%x after reading faces\n", systell($fh);
${$nSpecials} = countSpecials( \@faces ); ${$nSpecials} = countSpecials( \@faces );
@{$facRef} = @faces; @{$facRef} = @faces;
printf STDERR "readXWDFaces=>%d\n", $nChars;
return $nChars; return $nChars;
} # readXWDFaces } # readXWDFaces
@ -96,6 +115,7 @@ sub skipBitmap($) {
sysread( $fh, $buf, $nBytes ); sysread( $fh, $buf, $nBytes );
} }
printf STDERR "skipBitmap\n";
} # skipBitmap } # skipBitmap
sub getSpecials($$$) { sub getSpecials($$$) {
@ -135,9 +155,9 @@ sub readNodesToEnd($) {
sub nodeSizeFromFlags($) { sub nodeSizeFromFlags($) {
my ( $flags ) = @_; my ( $flags ) = @_;
if ( $flags == 2 ) { if ( $flags == 4 ) {
return 3; return 3;
} elsif ( $flags == 3 ) { } elsif ( $flags == 5 ) {
return 4; return 4;
} else { } else {
die "invalid dict flags $flags"; die "invalid dict flags $flags";
@ -158,6 +178,7 @@ sub mergeSpecials($$) {
sub prepXWD($$$$) { sub prepXWD($$$$) {
my ( $fh, $facRef, $nodesRef, $startRef ) = @_; my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
printf STDERR "at 0x%x at start\n", systell($fh);
my $buf; my $buf;
my $nRead = sysread( $fh, $buf, 2 ); my $nRead = sysread( $fh, $buf, 2 );
my $flags = unpack( "n", $buf ); my $flags = unpack( "n", $buf );
@ -167,22 +188,30 @@ sub prepXWD($$$$) {
my $nSpecials; my $nSpecials;
my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials ); my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
printf STDERR "at 0x%x before header read\n", systell($fh);
# skip xloc header # skip xloc header
$nRead = sysread( $fh, $buf, 2 ); $nRead = sysread( $fh, $buf, 2 );
# skip values info. # skip values info.
printf STDERR "at 0x%x before reading %d values\n", systell($fh), $faceCount;
sysread( $fh, $buf, $faceCount * 2 ); sysread( $fh, $buf, $faceCount * 2 );
printf STDERR "at 0x%x after values read\n", systell($fh);
printf STDERR "at 0x%x before specials read\n", systell($fh);
my @specials; my @specials;
getSpecials( $fh, $nSpecials, \@specials ); getSpecials( $fh, $nSpecials, \@specials );
mergeSpecials( $facRef, \@specials ); mergeSpecials( $facRef, \@specials );
printf STDERR "at 0x%x after specials read\n", systell($fh);
printf STDERR "at 0x%x before offset read\n", systell($fh);
sysread( $fh, $buf, 4 ); sysread( $fh, $buf, 4 );
$$startRef = unpack( 'N', $buf ); $$startRef = unpack( 'N', $buf );
print STDERR "startRef=$$startRef\n";
my @nodes = readNodesToEnd( $fh ); my @nodes = readNodesToEnd( $fh );
@$nodesRef = @nodes; @$nodesRef = @nodes;
print STDERR "prepXWD done\n";
} # prepXWD } # prepXWD
sub readPDBSpecials($$$$$) { sub readPDBSpecials($$$$$) {
@ -341,6 +370,7 @@ sub printNodes($$) {
# main # main
################################################################# #################################################################
binmode( STDERR, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
parseARGV(); parseARGV();
@ -359,9 +389,11 @@ if ( $gFileType eq "xwd" ){
close INFILE; close INFILE;
die "no nodes!!!" if 0 == @nodes; die "no nodes!!!" if 0 == @nodes;
if ( $gDoRaw ) { if ( $gDoRaw ) {
printNodes( \@nodes, \@faces ); printNodes( \@nodes, \@faces );
} else { } else {
binmode( STDOUT, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
printDAWG( [], \@nodes, $startIndex, \@faces ); printDAWG( [], \@nodes, $startIndex, \@faces );
} }

View file

@ -93,7 +93,7 @@ static char* gBytesPerNodeFile = NULL; // where to write whether node
int gWordCount = 0; int gWordCount = 0;
std::map<Letter,wchar_t> gTableHash; std::map<Letter,wchar_t> gTableHash;
int gBlankIndex; int gBlankIndex;
std::vector<char> gRevMap; std::vector<wchar_t> gRevMap;
#ifdef DEBUG #ifdef DEBUG
bool gDebug = false; bool gDebug = false;
#endif #endif
@ -117,7 +117,7 @@ static void makeTableHash( void );
static WordList* parseAndSort( void ); static WordList* parseAndSort( void );
static void printWords( WordList* strings ); static void printWords( WordList* strings );
static bool firstBeforeSecond( const Letter* lhs, const Letter* rhs ); static bool firstBeforeSecond( const Letter* lhs, const Letter* rhs );
static char* tileToAscii( char* out, int outSize, const Letter* in ); static wchar_t* tileToAscii( wchar_t* out, int outSize, const Letter* in );
static int buildNode( int depth ); static int buildNode( int depth );
static void TrieNodeSetIsLastSibling( Node* nodeR, bool isLastSibling ); static void TrieNodeSetIsLastSibling( Node* nodeR, bool isLastSibling );
static int addNodes( NodeList& newedgesR ); static int addNodes( NodeList& newedgesR );
@ -451,8 +451,8 @@ readFromSortedArray( void )
} }
#ifdef DEBUG #ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
char buf[T2ABUFLEN(MAX_WORD_LEN)]; wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
fprintf( stderr, "%s: got word: %s\n", __func__, fprintf( stderr, "%s: got word: %ls\n", __func__,
tileToAscii( buf, sizeof(buf), word ) ); tileToAscii( buf, sizeof(buf), word ) );
} }
#endif #endif
@ -473,10 +473,10 @@ readFromSortedArray( void )
&& !firstBeforeSecond( gCurrentWord, word ) ) { && !firstBeforeSecond( gCurrentWord, word ) ) {
#ifdef DEBUG #ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
char buf1[T2ABUFLEN(MAX_WORD_LEN)]; wchar_t buf1[T2ABUFLEN(MAX_WORD_LEN)];
char buf2[T2ABUFLEN(MAX_WORD_LEN)]; wchar_t buf2[T2ABUFLEN(MAX_WORD_LEN)];
fprintf( stderr, fprintf( stderr,
"%s: words %s and %s are the same or out of order\n", "%s: words %ls and %ls are the same or out of order\n",
__func__, __func__,
tileToAscii( buf1, sizeof(buf1), gCurrentWord ), tileToAscii( buf1, sizeof(buf1), gCurrentWord ),
tileToAscii( buf2, sizeof(buf2), word ) ); tileToAscii( buf2, sizeof(buf2), word ) );
@ -492,8 +492,8 @@ readFromSortedArray( void )
#ifdef DEBUG #ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
char buf[T2ABUFLEN(MAX_WORD_LEN)]; wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
fprintf( stderr, "gCurrentWord now %s\n", fprintf( stderr, "gCurrentWord now %ls\n",
tileToAscii( buf, sizeof(buf), gCurrentWord) ); tileToAscii( buf, sizeof(buf), gCurrentWord) );
} }
#endif #endif
@ -516,6 +516,9 @@ getWideChar( FILE* file )
assert( 0 == ii ); assert( 0 == ii );
dest = byt; dest = byt;
break; break;
} else if ( byt < ' ' && 0 == ii ) {
dest = byt;
break;
} }
assert( ii < 4 ); assert( ii < 4 );
@ -567,9 +570,9 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
} }
#ifdef DEBUG #ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
char buf[T2ABUFLEN(count)]; wchar_t buf[T2ABUFLEN(count)];
wordBuf[count] = '\0'; wordBuf[count] = '\0';
fprintf( stderr, "%s: dropping word (len %d>=%d): %s\n", fprintf( stderr, "%s: dropping word (len %d>=%d): %ls\n",
__func__, count, gLimHigh, __func__, count, gLimHigh,
tileToAscii( buf, sizeof(buf), wordBuf ) ); tileToAscii( buf, sizeof(buf), wordBuf ) );
} }
@ -589,21 +592,21 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
dropWord = true; dropWord = true;
} }
} else if ( gKillIfMissing || !dropWord ) { } else if ( gKillIfMissing || !dropWord ) {
char buf[T2ABUFLEN(count)]; wchar_t buf[T2ABUFLEN(count)];
wordBuf[count] = '\0'; wordBuf[count] = '\0';
tileToAscii( buf, sizeof(buf), wordBuf ); tileToAscii( buf, sizeof(buf), wordBuf );
if ( gKillIfMissing ) { if ( gKillIfMissing ) {
ERROR_EXIT( "chr %lc (%d/0x%x) not in map file %s\n" ERROR_EXIT( "chr %lc (%d/0x%x) not in map file %s\n"
"last word was %s\n", "last word was %ls\n",
byt, (int)byt, (int)byt, gTableFile, buf ); byt, (int)byt, (int)byt, gTableFile, buf );
} else if ( !dropWord ) { } else if ( !dropWord ) {
#ifdef DEBUG #ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "%s: chr %c (%d) not in map file %s\n" fprintf( stderr, "%s: chr %lc (%d) not in map file %s\n"
"dropping partial word %s\n", __func__, "dropping partial word %ls\n", __func__,
(char)byt, (int)byt, gTableFile, buf ); byt, (int)byt, gTableFile, buf );
} }
#endif #endif
dropWord = true; dropWord = true;
@ -658,10 +661,10 @@ readFromFile( void )
&& !firstBeforeSecond( gCurrentWord, word ) ) { && !firstBeforeSecond( gCurrentWord, word ) ) {
#ifdef DEBUG #ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
char buf1[T2ABUFLEN(MAX_WORD_LEN)]; wchar_t buf1[T2ABUFLEN(MAX_WORD_LEN)];
char buf2[T2ABUFLEN(MAX_WORD_LEN)]; wchar_t buf2[T2ABUFLEN(MAX_WORD_LEN)];
fprintf( stderr, fprintf( stderr,
"%s: words %s and %s are the smae or out of order\n", "%s: words %ls and %ls are the smae or out of order\n",
__func__, __func__,
tileToAscii( buf1, sizeof(buf1), gCurrentWord ), tileToAscii( buf1, sizeof(buf1), gCurrentWord ),
tileToAscii( buf2, sizeof(buf2), word ) ); tileToAscii( buf2, sizeof(buf2), word ) );
@ -676,8 +679,8 @@ readFromFile( void )
#ifdef DEBUG #ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
char buf[T2ABUFLEN(MAX_WORD_LEN)]; wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
fprintf( stderr, "gCurrentWord now %s\n", fprintf( stderr, "gCurrentWord now %ls\n",
tileToAscii( buf, sizeof(buf), gCurrentWord) ); tileToAscii( buf, sizeof(buf), gCurrentWord) );
} }
#endif #endif
@ -690,14 +693,16 @@ firstBeforeSecond( const Letter* lhs, const Letter* rhs )
return gt; return gt;
} }
static char* static wchar_t*
tileToAscii( char* out, int outSize, const Letter* in ) tileToAscii( wchar_t* out, int outSize, const Letter* in )
{ {
char tiles[outSize]; // FIX THIS! Pass actual size from callsite
outSize /= sizeof(wchar_t)/sizeof(char);
wchar_t tiles[outSize];
int tilesLen = 1; int tilesLen = 1;
tiles[0] = '['; tiles[0] = L'[';
char* orig = out; wchar_t* orig = out;
for ( ; ; ) { for ( ; ; ) {
Letter ch = *in++; Letter ch = *in++;
if ( '\0' == ch ) { if ( '\0' == ch ) {
@ -705,14 +710,14 @@ tileToAscii( char* out, int outSize, const Letter* in )
} }
assert( ch < gRevMap.size() ); assert( ch < gRevMap.size() );
*out++ = gRevMap[ch]; *out++ = gRevMap[ch];
tilesLen += sprintf( &tiles[tilesLen], "%d,", ch ); tilesLen += swprintf( &tiles[tilesLen], outSize-tilesLen, L"%d,", ch );
assert( (out - orig) < outSize ); assert( (out - orig) < outSize );
} }
assert( tilesLen+1 < outSize ); assert( tilesLen+1 < outSize );
tiles[tilesLen] = ']'; tiles[tilesLen] = L']';
tiles[tilesLen+1] = '\0'; tiles[tilesLen+1] = L'\0';
strcpy( out, tiles ); wcscpy( out, tiles );
return orig; return orig;
} }
@ -777,9 +782,9 @@ printWords( WordList* strings )
{ {
std::vector<Letter*>::iterator iter = strings->begin(); std::vector<Letter*>::iterator iter = strings->begin();
while ( iter != strings->end() ) { while ( iter != strings->end() ) {
char buf[T2ABUFLEN(MAX_WORD_LEN)]; wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
tileToAscii( buf, sizeof(buf), *iter ); tileToAscii( buf, sizeof(buf), *iter );
fprintf( stderr, "%s\n", buf ); fprintf( stderr, "%ls\n", buf );
++iter; ++iter;
} }
} }
@ -906,18 +911,12 @@ makeTableHash( void )
gRevMap.push_back(0); gRevMap.push_back(0);
for ( ii = 0; ; ++ii ) { for ( ii = 0; ; ++ii ) {
int ch = getc(TABLEFILE); wchar_t ch = getWideChar( TABLEFILE );
if ( ch == EOF ) { if ( EOF == ch ) {
break;
}
if ( gUseUnicode ) { // skip the first byte each time: tmp HACK!!!
ch = getc(TABLEFILE);
}
if ( ch == EOF ) {
break; break;
} }
fprintf( stderr, "adding %x\n", ch );
gRevMap.push_back(ch); gRevMap.push_back(ch);
if ( ch == 0 ) { // blank if ( ch == 0 ) { // blank

View file

@ -23,6 +23,7 @@ use xloc;
my $unicode = -1; my $unicode = -1;
my $doval = 0; my $doval = 0;
my $dosize = 0;
my $enc; my $enc;
my $outfile; my $outfile;
@ -37,6 +38,8 @@ while ( $arg = $ARGV[0] ) {
$unicode = 0; $unicode = 0;
} elsif ( $arg eq "-v" ) { } elsif ( $arg eq "-v" ) {
$doval = 1; $doval = 1;
} elsif ( $arg eq "-s" ) {
$dosize = 1;
} elsif ( $arg eq '-out' ) { } elsif ( $arg eq '-out' ) {
$outfile = $ARGV[1]; $outfile = $ARGV[1];
shift @ARGV; shift @ARGV;
@ -52,12 +55,20 @@ die "info file $infoFile not found\n" if ! -s $infoFile;
my $xlocToken = xloc::ParseTileInfo($infoFile, $enc); my $xlocToken = xloc::ParseTileInfo($infoFile, $enc);
open OUTFILE, "> $outfile"; if ( $enc ) {
open OUTFILE, ">:encoding($enc)", "$outfile"
or die "couldn't open $outfile";
} else {
open OUTFILE, ">$outfile" or die "couldn't open $outfile";
}
# For f*cking windoze linefeeds # For f*cking windoze linefeeds
binmode( OUTFILE ); # binmode( OUTFILE );
if ( $unicode ne -1 ) { if ( $unicode ne -1 ) {
xloc::WriteMapFile( $xlocToken, $unicode, \*OUTFILE ); xloc::WriteMapFile( $xlocToken, $unicode, \*OUTFILE );
} elsif ( $dosize ) {
my $count = xloc::GetNTiles( $xlocToken );
print OUTFILE pack("c", $count );
} elsif ( $doval ) { } elsif ( $doval ) {
xloc::WriteValuesFile( $xlocToken, \*OUTFILE ); xloc::WriteValuesFile( $xlocToken, \*OUTFILE );
} }

View file

@ -103,13 +103,6 @@ sub GetValue($$) {
sub WriteMapFile($$$) { sub WriteMapFile($$$) {
my ( $hashR, $unicode, $fhr ) = @_; my ( $hashR, $unicode, $fhr ) = @_;
my $packStr;
if ( $unicode ) {
$packStr = "n";
} else {
$packStr = "C";
}
my $count = GetNTiles($hashR); my $count = GetNTiles($hashR);
my $specialCount = 0; my $specialCount = 0;
for ( my $i = 0; $i < $count; ++$i ) { for ( my $i = 0; $i < $count; ++$i ) {
@ -117,11 +110,12 @@ sub WriteMapFile($$$) {
my $str = ${$tileR}[2]; my $str = ${$tileR}[2];
if ( $str =~ /\'(.)\'/ ) { if ( $str =~ /\'(.)\'/ ) {
print $fhr pack($packStr, ord($1) ); print $fhr pack( "W", ord($1) );
# printf STDERR "ord: %x ($1)\n", ord($1);
} elsif ( $str =~ /\"(.+)\"/ ) { } elsif ( $str =~ /\"(.+)\"/ ) {
print $fhr pack($packStr, $specialCount++ ); print $fhr pack( "c", $specialCount++ );
} elsif ( $str =~ /(\d+)/ ) { } elsif ( $str =~ /(\d+)/ ) {
print $fhr pack( $packStr, $1 ); print $fhr pack( "n", $1 );
} else { } else {
die "WriteMapFile: unrecognized face format $str, elem $i"; die "WriteMapFile: unrecognized face format $str, elem $i";
} }