First changes for building dictionaries (with tile faces in UTF-8).

Test cases are English and Polish, and with these changes English seems to build ok and decode via dawg2dict.pl while Polish does too but a lot of words are missing. Building using Makefiles only so far, not BYOD.
2025-01-14 08:01:38 +01:00 · 2009-03-18 04:16:26 +00:00 · 2009-03-18 04:16:26 +00:00 · eb3d97a334
commit eb3d97a334
parent 31ec0f9235
10 changed files with 137 additions and 100 deletions
--- a/dawg/Catalan/Makefile
+++ b/dawg/Catalan/Makefile
@ -33,7 +33,7 @@ endif
 endif

 LANG_SPECIAL_INFO = \
-		"L-L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
+		"L·L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
 		"NY" $(PBITMS)/large_ny.pbitm $(PBITMS)/small_ny.pbitm \
 		"QU" $(PBITMS)/large_qu.pbitm $(PBITMS)/small_qu.pbitm \

--- a/dawg/Catalan/info.txt
+++ b/dawg/Catalan/info.txt
@ -75,7 +75,7 @@ XLOC_HEADER:0x8C00
 8	1     'I'
 1	8     'J'
 4	1     'L'
-1	10    {"L-L"}
+1	10    {"L·L"}
 3	2     'M'
 6	1     'N'
 1	10    {"NY"}
--- a/dawg/English/Makefile
+++ b/dawg/English/Makefile
@ -15,7 +15,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

-TARGET_TYPE ?= FRANK
+TARGET_TYPE ?= WINCE

 include ../Makefile.langcommon

--- a/dawg/Makefile.langcommon
+++ b/dawg/Makefile.langcommon
@ -204,16 +204,6 @@ endif
 frankspecials.bin: ../frank_mkspecials.pl  $(BMPFILES)
 	$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@

-# a binary file (one byte) giving the number of tiles in the dict
-charcount.bin: table.bin
-ifdef NEWDAWG
-	siz=$$(ls -l $< | awk '{print $$5}'); \
-	perl -e "print pack(\"c\",$$siz/2)" > $@
-else
-	siz=$$(wc -c $< | sed -e 's/$<//'); \
-	perl -e "print pack(\"c\",$$siz)" > $@
-endif
-
 $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin 
 	cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
 		frankspecials.bin $(XWLANG)StartLoc.bin  \
@ -233,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
 $(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
 ifdef NEWDAWG
 	if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
-		then perl -e "print pack(\"n\",0x0002)" > $@; echo "flags=2"; \
+		then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \
 	elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
-		then perl -e "print pack(\"n\",0x0003)" > $@; echo "flags=3"; \
+		then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \
 	elif true; \
 		then echo "Unexpected node size"; exit 1; \
 	fi
@ -272,8 +262,19 @@ else
 	perl -I../ ../xloc.pl -t -out $@
 endif

-values.bin:  ../xloc.pl 
-	perl -I../ ../xloc.pl -v -out $@ $(ENCP)
+values.bin: ../xloc.pl
+	perl -I../ ../xloc.pl -v -out $@
+
+# a binary file, two bytes, one giving the size of tiles data and the
+#  other the number of tiles in the dict.  Tiles data is utf-8 and so
+#  number is not derivable from size.
+charcount.bin: table.bin ../xloc.pl
+	SIZ=$$(ls -l $< | awk '{print $$5}'); \
+	perl -e "print pack(\"c\",$$SIZ)" > $@
+	TMP=/tmp/tmp$$$$; \
+	perl -I../ ../xloc.pl -s -out $$TMP; \
+	cat $$TMP >> $@; \
+	rm -f $$TMP

 %.dict: %.dict.gz
 	zcat $< > $@
--- a/dawg/Polish/Makefile
+++ b/dawg/Polish/Makefile
@ -1,4 +1,4 @@
-# -*- coding: iso-8859-2; mode: Makefile; -*-
+# -*- mode: Makefile; -*-
 # Copyright 2002 - 2009 by Eric House (xwords@eehouse.org).  All
 # rights reserved.
 #
@ -16,9 +16,9 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

-XWLANG=Polish
-LANGCODE=pl_PL
-ENC = ISO-8859-2
+XWLANG = Polish
+LANGCODE = pl_PL
+ENC = UTF-8

 # DICT2DAWGARGS = -lang $(LANGCODE)
 # DICT2DAWGARGS = -debug
@ -29,12 +29,12 @@ include ../Makefile.2to8

 include ../Makefile.langcommon

-SOURCEDICT ?= $(XWDICTPATH)/Polish/iso-8859-2/slowa.txt.gz
+SOURCEDICT ?= slowa.txt.gz

 $(XWLANG)Main.dict.gz: $(SOURCEDICT)
 	zcat $< | tr -d '\r' \
-	| LANG=$(LANGCODE):$(ENC) tr [a帳c熛e璒ghijkl軛n隳鏕rs鈣uwyz撚] [A、C鑪E甪GHIJKLΚN埆紞RS吋UWYZ炫] \
-    | LANG=$(LANGCODE):$(ENC) grep '^[A、C鑪E甪GHIJKLΚN埆紞RS吋UWYZ炫]*$$' \
+	| tr [aąbcćdeęfghijklłmnńoóprsśtuwyzźż] [AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ] \
+    | grep '^[AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ]*$$' \
    | gzip > $@

 # Everything but creating of the Main.dict file is inherited from the
--- a/dawg/Polish/info.txt
+++ b/dawg/Polish/info.txt
@ -1,4 +1,4 @@
-# -*- coding: iso-8859-2; mode: conf; -*-
+# -*- mode: conf; -*-
 # Copyright 2002-2009 by Eric House (xwords@eehouse.org).  All rights
 # reserved.
 #
@ -17,12 +17,12 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

 LANGCODE:pl_PL
-CHARSET:iso-8859-2
+CHARSET:utf-8

 # deal with DOS files
 LANGFILTER: tr -d '\r'
-LANGFILTER: | tr [a-pr-uwyz±æê³ñó¶¼¿] [A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]
-LANGFILTER: | grep '^[A-PR-UWYZ¡ÆÊ£ÑÓ¦¬¯]*$'
+LANGFILTER: | tr [a-pr-uwyząćęłńóśźż] [A-PR-UWYZĄĆĘŁŃÓŚŹŻ]
+LANGFILTER: | grep '^[A-PR-UWYZĄĆĘŁŃÓŚŹŻ]*$'
 LANGFILTER: | tr '\n' '\000'

 D2DARGS: -r -term 0
@ -42,8 +42,8 @@ LANGINFO: this working.</p>
 LANGINFO: <p>Note that the blank is the last tile here, while with all
 LANGINFO: other languages it&apos;s the first.</p>

-LANGINFO: <p>Also, please note that we currently require the files you
-LANGINFO: upload to use the iso-8859-2 character encoding.</p>
+# LANGINFO: <p>Also, please note that we currently require the files you
+# LANGINFO: upload to use the iso-8859-2 character encoding.</p>

 # High bit means "official".  Next 7 bits are an enum where
 # Polish==8.  Low byte is padding
@ -51,13 +51,13 @@ XLOC_HEADER:0x8800

 <BEGIN_TILES>
 9           1       'A'
-1           5       161 # '¡'
+1           5       'Ą'
 2           3       'B'
 3           2       'C'
-1           6       198 # 'Æ'
+1           6       'Ć'
 3           2       'D'
 7           1       'E'
-1           5       202 # 'Ê'
+1           5       'Ę'
 1           5       'F'
 2           3       'G'
 2           3       'H'
@ -65,23 +65,23 @@ XLOC_HEADER:0x8800
 2           3       'J'
 3           3  	    'K'
 3           2       'L'
-2           3       163 # '£'
+2           3       'Ł'
 3           2       'M'
 5           1       'N'
-1           7       209 # 'Ñ'
+1           7       'Ń'
 6           1       'O'
-1           5       211 # 'Ó'
+1           5       'Ó'
 3           2       'P'
 4           1       'R'
 4           1       'S'
-1           5       166 # '¦'
+1           5       'Ś'
 3           2       'T'
 2           3       'U'
 4           1       'W'
 4           2  	    'Y'
 5           1       'Z'
-1           9       172 # '¬'
-1           5       175 # '¯'
+1           9       'Ź'
+1           5       'Ż'

 # the blank *must* be last here!!!
 2           0       {"_"}
--- a/dawg/dawg2dict.pl
+++ b/dawg/dawg2dict.pl
@ -22,12 +22,17 @@

 use strict;
 use Fcntl;
+use Encode 'from_to';
+use Encode;

 my $gInFile;
 my $gDoRaw = 0;
 my $gFileType;
 my $gNodeSize;

+use Fcntl 'SEEK_CUR';
+sub systell { sysseek($_[0], 0, SEEK_CUR) }
+
 sub usage() {
    print STDERR "USAGE: $0 "
        . "[-raw] "
@ -69,18 +74,32 @@ sub countSpecials($) {
 sub readXWDFaces($$$) {
    my ( $fh, $facRef, $nSpecials ) = @_;

-    my $buf;
-    my $nRead = sysread( $fh, $buf, 1 );
-    my $nChars = unpack( 'c', $buf );
+    my ( $buf, $nRead, $nChars, $nBytes );
+    $nRead = sysread( $fh, $buf, 1 );
+    $nBytes = unpack( 'c', $buf );
+    printf STDERR "nBytes of faces: %d\n", $nBytes;
+    $nRead = sysread( $fh, $buf, 1 );
+    $nChars = unpack( 'c', $buf );
+    printf STDERR "nChars of faces: %d\n", $nChars;

+    binmode( $fh, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
+    sysread( $fh, $buf, $nChars );
+    length($buf) == $nChars or die "didn't read expected number of bytes\n";
+    binmode( $fh ) or die "binmode failed\n";
+
+    print STDERR "string now: $buf\n";
    my @faces;
-    for ( my $i = 0; $i < $nChars; ++$i ) {
-        my $nRead = sysread( $fh, $buf, 2 );
-        push( @faces, chr(unpack( "n", $buf ) ) );
+    for ( my $ii = 0; $ii < $nChars; ++$ii ) {
+        my $chr = substr( $buf, $ii, 1 );
+        print STDERR "pushing $chr \n";
+        push( @faces, $chr );
    }

+    printf STDERR "at 0x%x after reading faces\n", systell($fh);
+
    ${$nSpecials} = countSpecials( \@faces );
    @{$facRef} = @faces;
+    printf STDERR "readXWDFaces=>%d\n", $nChars;
    return $nChars;
 } # readXWDFaces

@ -96,6 +115,7 @@ sub skipBitmap($) {

        sysread( $fh, $buf, $nBytes );
    }
+    printf STDERR "skipBitmap\n";
 } # skipBitmap

 sub getSpecials($$$) {
@ -135,9 +155,9 @@ sub readNodesToEnd($) {

 sub nodeSizeFromFlags($) {
    my ( $flags ) = @_;
-    if ( $flags == 2 ) {
+    if ( $flags == 4 ) {
        return 3;
-    } elsif ( $flags == 3 ) {
+    } elsif ( $flags == 5 ) {
        return 4;
    } else {
        die "invalid dict flags $flags";
@ -158,6 +178,7 @@ sub mergeSpecials($$) {
 sub prepXWD($$$$) {
    my ( $fh, $facRef, $nodesRef, $startRef ) = @_;

+    printf STDERR "at 0x%x at start\n", systell($fh);
    my $buf;
    my $nRead = sysread( $fh, $buf, 2 );
    my $flags = unpack( "n", $buf );
@ -167,22 +188,30 @@ sub prepXWD($$$$) {
    my $nSpecials;
    my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );

+    printf STDERR "at 0x%x before header read\n", systell($fh);
    # skip xloc header
    $nRead = sysread( $fh, $buf, 2 );

    # skip values info.
+    printf STDERR "at 0x%x before reading %d values\n", systell($fh), $faceCount;
    sysread( $fh, $buf, $faceCount * 2 );
+    printf STDERR "at 0x%x after values read\n", systell($fh);

+    printf STDERR "at 0x%x before specials read\n", systell($fh);
    my @specials;
    getSpecials( $fh, $nSpecials, \@specials );
    mergeSpecials( $facRef, \@specials );
+    printf STDERR "at 0x%x after specials read\n", systell($fh);

+    printf STDERR "at 0x%x before offset read\n", systell($fh);
    sysread( $fh, $buf, 4 );
    $$startRef = unpack( 'N', $buf );
+    print STDERR "startRef=$$startRef\n";

    my @nodes = readNodesToEnd( $fh );

    @$nodesRef = @nodes;
+    print STDERR "prepXWD done\n";
 } # prepXWD

 sub readPDBSpecials($$$$$) {
@ -341,6 +370,7 @@ sub printNodes($$) {
 # main
 #################################################################

+binmode( STDERR, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";

 parseARGV();

@ -359,9 +389,11 @@ if ( $gFileType eq "xwd" ){
 close INFILE;

 die "no nodes!!!" if 0 == @nodes;
+
 if ( $gDoRaw ) {
    printNodes( \@nodes, \@faces );
 } else {
+    binmode( STDOUT, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
    printDAWG( [], \@nodes, $startIndex, \@faces );
 }

--- a/dawg/dict2dawg.cpp
+++ b/dawg/dict2dawg.cpp
@ -93,7 +93,7 @@ static char* gBytesPerNodeFile = NULL;        // where to write whether node
 int gWordCount = 0;
 std::map<Letter,wchar_t> gTableHash;
 int gBlankIndex;
-std::vector<char> gRevMap;
+std::vector<wchar_t> gRevMap;
 #ifdef DEBUG
 bool gDebug = false;
 #endif
@ -117,7 +117,7 @@ static void makeTableHash( void );
 static WordList* parseAndSort( void );
 static void printWords( WordList* strings );
 static bool firstBeforeSecond( const Letter* lhs, const Letter* rhs );
-static char* tileToAscii( char* out, int outSize, const Letter* in );
+static wchar_t* tileToAscii( wchar_t* out, int outSize, const Letter* in );
 static int buildNode( int depth );
 static void TrieNodeSetIsLastSibling( Node* nodeR, bool isLastSibling );
 static int addNodes( NodeList& newedgesR );
@ -451,8 +451,8 @@ readFromSortedArray( void )
            }
 #ifdef DEBUG
            if ( gDebug ) {
-                char buf[T2ABUFLEN(MAX_WORD_LEN)];
-                fprintf( stderr, "%s: got word: %s\n", __func__,
+                wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
+                fprintf( stderr, "%s: got word: %ls\n", __func__,
                         tileToAscii( buf, sizeof(buf), word ) );
            }
 #endif
@ -473,10 +473,10 @@ readFromSortedArray( void )
             && !firstBeforeSecond( gCurrentWord, word ) ) {
 #ifdef DEBUG
            if ( gDebug ) {
-                char buf1[T2ABUFLEN(MAX_WORD_LEN)];
-                char buf2[T2ABUFLEN(MAX_WORD_LEN)];
+                wchar_t buf1[T2ABUFLEN(MAX_WORD_LEN)];
+                wchar_t buf2[T2ABUFLEN(MAX_WORD_LEN)];
                fprintf( stderr,
-                         "%s: words %s and %s are the same or out of order\n",
+                         "%s: words %ls and %ls are the same or out of order\n",
                         __func__, 
                         tileToAscii( buf1, sizeof(buf1), gCurrentWord ),
                         tileToAscii( buf2, sizeof(buf2), word ) );
@ -492,8 +492,8 @@ readFromSortedArray( void )

 #ifdef DEBUG
    if ( gDebug ) {
-        char buf[T2ABUFLEN(MAX_WORD_LEN)];
-        fprintf( stderr, "gCurrentWord now %s\n", 
+        wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
+        fprintf( stderr, "gCurrentWord now %ls\n", 
                 tileToAscii( buf, sizeof(buf), gCurrentWord) );
    }
 #endif
@ -516,6 +516,9 @@ getWideChar( FILE* file )
            assert( 0 == ii );
            dest = byt;
            break;
+        } else if ( byt < ' ' && 0 == ii ) {
+            dest = byt;
+            break;
        }

        assert( ii < 4 );
@ -567,9 +570,9 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
            } 
 #ifdef DEBUG
            if ( gDebug ) {
-                char buf[T2ABUFLEN(count)];
+                wchar_t buf[T2ABUFLEN(count)];
                wordBuf[count] = '\0';
-                fprintf( stderr, "%s: dropping word (len %d>=%d): %s\n", 
+                fprintf( stderr, "%s: dropping word (len %d>=%d): %ls\n", 
                         __func__, count, gLimHigh, 
                         tileToAscii( buf, sizeof(buf), wordBuf ) );
            }
@ -589,21 +592,21 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
                dropWord = true;
            }
        } else if ( gKillIfMissing || !dropWord ) {
-            char buf[T2ABUFLEN(count)];
+            wchar_t buf[T2ABUFLEN(count)];
            wordBuf[count] = '\0';

            tileToAscii( buf, sizeof(buf), wordBuf );

            if ( gKillIfMissing ) {
                ERROR_EXIT( "chr %lc (%d/0x%x) not in map file %s\n"
-                            "last word was %s\n",
+                            "last word was %ls\n",
                            byt, (int)byt, (int)byt, gTableFile, buf );
            } else if ( !dropWord ) {
 #ifdef DEBUG
                if ( gDebug ) {
-                    fprintf( stderr, "%s: chr %c (%d) not in map file %s\n"
-                             "dropping partial word %s\n", __func__,
-                             (char)byt, (int)byt, gTableFile, buf );
+                    fprintf( stderr, "%s: chr %lc (%d) not in map file %s\n"
+                             "dropping partial word %ls\n", __func__,
+                             byt, (int)byt, gTableFile, buf );
                }
 #endif
                dropWord = true;
@ -658,10 +661,10 @@ readFromFile( void )
             && !firstBeforeSecond( gCurrentWord, word ) ) {
 #ifdef DEBUG
            if ( gDebug ) {
-                char buf1[T2ABUFLEN(MAX_WORD_LEN)];
-                char buf2[T2ABUFLEN(MAX_WORD_LEN)];
+                wchar_t buf1[T2ABUFLEN(MAX_WORD_LEN)];
+                wchar_t buf2[T2ABUFLEN(MAX_WORD_LEN)];
                fprintf( stderr,
-                         "%s: words %s and %s are the smae or out of order\n",
+                         "%s: words %ls and %ls are the smae or out of order\n",
                         __func__, 
                         tileToAscii( buf1, sizeof(buf1), gCurrentWord ),
                         tileToAscii( buf2, sizeof(buf2), word ) );
@ -676,8 +679,8 @@ readFromFile( void )

 #ifdef DEBUG
    if ( gDebug ) {
-        char buf[T2ABUFLEN(MAX_WORD_LEN)];
-        fprintf( stderr, "gCurrentWord now %s\n", 
+        wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
+        fprintf( stderr, "gCurrentWord now %ls\n", 
                 tileToAscii( buf, sizeof(buf), gCurrentWord) );
    }
 #endif
@ -690,14 +693,16 @@ firstBeforeSecond( const Letter* lhs, const Letter* rhs )
    return gt;
 }

-static char*
-tileToAscii( char* out, int outSize, const Letter* in )
+static wchar_t*
+tileToAscii( wchar_t* out, int outSize, const Letter* in )
 {
-    char tiles[outSize];
+    // FIX THIS!  Pass actual size from callsite
+    outSize /= sizeof(wchar_t)/sizeof(char);
+    wchar_t tiles[outSize];
    int tilesLen = 1;
-    tiles[0] = '[';
+    tiles[0] = L'[';

-    char* orig = out;
+    wchar_t* orig = out;
    for ( ; ; ) {
        Letter ch = *in++;
        if ( '\0' == ch ) {
@ -705,14 +710,14 @@ tileToAscii( char* out, int outSize, const Letter* in )
        }
        assert( ch < gRevMap.size() );
        *out++ = gRevMap[ch];
-        tilesLen += sprintf( &tiles[tilesLen], "%d,", ch );
+        tilesLen += swprintf( &tiles[tilesLen], outSize-tilesLen, L"%d,", ch );
        assert( (out - orig) < outSize );
    }

    assert( tilesLen+1 < outSize );
-    tiles[tilesLen] = ']';
-    tiles[tilesLen+1] = '\0';
-    strcpy( out, tiles );
+    tiles[tilesLen] = L']';
+    tiles[tilesLen+1] = L'\0';
+    wcscpy( out, tiles );

    return orig;
 }
@ -777,9 +782,9 @@ printWords( WordList* strings )
 {
    std::vector<Letter*>::iterator iter = strings->begin();
    while ( iter != strings->end() ) {
-        char buf[T2ABUFLEN(MAX_WORD_LEN)];
+        wchar_t buf[T2ABUFLEN(MAX_WORD_LEN)];
        tileToAscii( buf, sizeof(buf), *iter );
-        fprintf( stderr, "%s\n", buf );
+        fprintf( stderr, "%ls\n", buf );
        ++iter;
    }
 }
@ -906,18 +911,12 @@ makeTableHash( void )
    gRevMap.push_back(0);

    for ( ii = 0; ; ++ii ) {
-        int ch = getc(TABLEFILE);
-        if ( ch == EOF ) {
-            break;
-        }
-
-        if ( gUseUnicode ) {   // skip the first byte each time: tmp HACK!!!
-            ch = getc(TABLEFILE);
-        }
-        if ( ch == EOF ) {
+        wchar_t ch = getWideChar( TABLEFILE );
+        if ( EOF == ch ) {
            break;
        }

+        fprintf( stderr, "adding %x\n", ch );
        gRevMap.push_back(ch);

        if ( ch == 0 ) {	// blank
--- a/dawg/xloc.pl
+++ b/dawg/xloc.pl
@ -23,6 +23,7 @@ use xloc;

 my $unicode = -1;
 my $doval = 0;
+my $dosize = 0;
 my $enc;
 my $outfile;

@ -37,6 +38,8 @@ while ( $arg = $ARGV[0] ) {
        $unicode = 0;
    } elsif ( $arg eq "-v" ) {
        $doval = 1;
+    } elsif ( $arg eq "-s" ) {
+        $dosize = 1;
    } elsif ( $arg eq '-out' ) {
        $outfile = $ARGV[1];
        shift @ARGV;
@ -52,12 +55,20 @@ die "info file $infoFile not found\n" if ! -s $infoFile;

 my $xlocToken = xloc::ParseTileInfo($infoFile, $enc);

-open OUTFILE, "> $outfile";
+if ( $enc ) {
+    open OUTFILE, ">:encoding($enc)", "$outfile" 
+        or die "couldn't open $outfile";
+} else {
+    open OUTFILE, ">$outfile" or die "couldn't open $outfile";
+}
 # For f*cking windoze linefeeds
-binmode( OUTFILE );
+# binmode( OUTFILE );

 if ( $unicode ne -1 ) {
    xloc::WriteMapFile( $xlocToken, $unicode, \*OUTFILE );
+} elsif ( $dosize ) {
+    my $count = xloc::GetNTiles( $xlocToken );
+    print OUTFILE pack("c", $count );
 } elsif ( $doval ) {
    xloc::WriteValuesFile( $xlocToken, \*OUTFILE );
 }
--- a/dawg/xloc.pm
+++ b/dawg/xloc.pm
@ -103,13 +103,6 @@ sub GetValue($$) {
 sub WriteMapFile($$$) {
    my ( $hashR, $unicode, $fhr ) = @_;

-    my $packStr;
-    if ( $unicode ) {
-        $packStr = "n";
-    } else {
-        $packStr = "C";
-    }
-
    my $count = GetNTiles($hashR);
    my $specialCount = 0;
    for ( my $i = 0; $i < $count; ++$i ) {
@ -117,11 +110,12 @@ sub WriteMapFile($$$) {
        my $str = ${$tileR}[2];

        if ( $str =~ /\'(.)\'/ ) {
-            print $fhr pack($packStr, ord($1) );
+            print $fhr pack( "W", ord($1) );
+#            printf STDERR "ord: %x ($1)\n", ord($1);
        } elsif ( $str =~ /\"(.+)\"/ ) {
-            print $fhr pack($packStr, $specialCount++ );
+            print $fhr pack( "c", $specialCount++ );
        } elsif ( $str =~ /(\d+)/ ) {
-            print $fhr pack( $packStr, $1 );
+            print $fhr pack( "n", $1 );
        } else {
            die "WriteMapFile: unrecognized face format $str, elem $i";
        }