From c4cdc24b78e0e8e8a965e0c39dd65c45e306b9d3 Mon Sep 17 00:00:00 2001
From: Eric House <eehouse@eehouse.org>
Date: Sun, 5 Dec 2010 19:33:10 -0800
Subject: [PATCH] initial changes to add a header to xwd format so that stuff
 like number of words can be included.  Changed to build dicts and linux to
 open them.  Android still needs to learn.  Also, some of the tools in dawg/
 need to be fixed to read old-format (pre-utf8) .xwd files.

---
 xwords4/android/XWords4/jni/anddict.c    |  1 -
 xwords4/common/dictnry.h                 |  3 +++
 xwords4/dawg/English/Makefile.BasEnglish |  2 +-
 xwords4/dawg/Makefile.langcommon         | 17 ++++++++++------
 xwords4/dawg/dawg2dict.pl                | 22 +++++++++++++++-----
 xwords4/linux/linuxdict.c                | 26 +++++++++++++++++++++++-
 6 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/xwords4/android/XWords4/jni/anddict.c b/xwords4/android/XWords4/jni/anddict.c
index ef69fc942..3c7acee91 100644
--- a/xwords4/android/XWords4/jni/anddict.c
+++ b/xwords4/android/XWords4/jni/anddict.c
@@ -432,7 +432,6 @@ makeDict( MPFORMAL JNIEnv *env, JNIUtilCtxt* jniutil, jbyteArray jbytes,
     anddict->bytes = localBytes;
 
     parseDict( anddict, localBytes, len );
-    setBlankTile( &anddict->super );
 
     /* copy the name */
     if ( NULL != jname ) {
diff --git a/xwords4/common/dictnry.h b/xwords4/common/dictnry.h
index ff088b60d..fc4ff731d 100644
--- a/xwords4/common/dictnry.h
+++ b/xwords4/common/dictnry.h
@@ -35,6 +35,8 @@ extern "C" {
 /* cast to unsigned in case XP_UCHAR is signed */
 #define IS_SPECIAL(face) ((XP_U16)(face) < 0x0020)
 
+#define DICT_HEADER_MASK 0x08
+
 typedef XP_U8 XP_LangCode;
 
 typedef enum {
@@ -78,6 +80,7 @@ struct DictionaryCtxt {
 
     SpecialBitmaps* bitmaps;
     XP_UCHAR** chars;
+    XP_U32 nWords;
 
     XP_LangCode langCode;
 
diff --git a/xwords4/dawg/English/Makefile.BasEnglish b/xwords4/dawg/English/Makefile.BasEnglish
index 03019e7a2..e44ba1f1b 100644
--- a/xwords4/dawg/English/Makefile.BasEnglish
+++ b/xwords4/dawg/English/Makefile.BasEnglish
@@ -19,7 +19,7 @@ XWLANG=BasEnglish
 LANGCODE=en_US
 DICT2DAWGARGS = -r -nosort
 
-TARGET_TYPE ?= PALM
+TARGET_TYPE ?= WINCE
 
 include ../Makefile.2to8
 
diff --git a/xwords4/dawg/Makefile.langcommon b/xwords4/dawg/Makefile.langcommon
index e5b5b64d8..db58afc67 100644
--- a/xwords4/dawg/Makefile.langcommon
+++ b/xwords4/dawg/Makefile.langcommon
@@ -204,8 +204,8 @@ endif
 frankspecials.bin: ../frank_mkspecials.pl  $(BMPFILES)
 	$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
 
-$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin 
-	cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
+$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin frankspecials.bin 
+	cat $(XWLANG)$*_flags.bin $(XWLANG)*_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin \
 		frankspecials.bin $(XWLANG)StartLoc.bin  \
 		$$(ls dawg$(XWLANG)$*_*.bin) > $@
 	cp $@ saveme.bin
@@ -223,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
 $(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
 ifdef NEWDAWG
 	if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
-		then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \
+		then perl -e "print pack(\"n\",0x000C)" > $@; echo "flags=4"; \
 	elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
-		then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \
+		then perl -e "print pack(\"n\",0x000D)" > $@; echo "flags=5"; \
 	elif true; \
 		then echo "Unexpected node size"; exit 1; \
 	fi
@@ -247,7 +247,7 @@ dawg$(XWLANG)%.stamp: $(XWLANG)Main.dict.gz $(DICT2DAWG) table.bin ../Makefile.l
 	touch $@
 
 $(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp
-	@echo
+	@echo "got this rule"
 
 # the files to export for byod
 allbins: 
@@ -268,7 +268,7 @@ values.bin: ../xloc.pl
 # a binary file, two bytes, one giving the size of tiles data and the
 #  other the number of tiles in the dict.  Tiles data is utf-8 and so
 #  number is not derivable from size.
-charcount.bin: table.bin ../xloc.pl
+$(XWLANG)_charcount.bin: table.bin ../xloc.pl
 	SIZ=$$(ls -l $< | awk '{print $$5}'); \
 	perl -e "print pack(\"c\",$$SIZ)" > $@
 	TMP=/tmp/tmp$$$$; \
@@ -276,6 +276,11 @@ charcount.bin: table.bin ../xloc.pl
 	cat $$TMP >> $@; \
 	rm -f $$TMP
 
+$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin
+	SIZ=$$(ls -l $< | awk '{print $$5}'); \
+	perl -e "print pack(\"n\",$$SIZ)" > $@
+	cat $< >> $@
+
 %.dict: %.dict.gz
 	zcat $< > $@
 
diff --git a/xwords4/dawg/dawg2dict.pl b/xwords4/dawg/dawg2dict.pl
index 70d93343a..7dc41e8f5 100755
--- a/xwords4/dawg/dawg2dict.pl
+++ b/xwords4/dawg/dawg2dict.pl
@@ -156,9 +156,21 @@ sub readNodesToEnd($) {
     return @nodes;
 } # readNodesToEnd
 
-sub nodeSizeFromFlags($) {
-    my ( $flags ) = @_;
-    if ( $flags == 4 ) {
+sub nodeSizeFromFlags($$) {
+    my ( $fh, $flags ) = @_;
+
+    my $bitSet = $flags & 0x0008;
+    printf STDERR "checking flags 0x%x with 0x%x -> 0x%x\n", $flags, 0x0008, $bitSet;
+    if ( 0 != $bitSet ){
+        $flags = $flags & ~0x0008;
+        # need to skip header
+        my $buf;
+        2 == sysread( $fh, $buf, 2 ) || die "couldn't read length of header";
+        my $len = unpack( "n", $buf );
+        $len == sysread( $fh, $buf, $len ) || die  "couldn't read header bytes";
+    }
+
+    if ( $flags == 2 || $ flags == 4 ) {
         return 3;
     } elsif ( $flags == 5 ) {
         return 4;
@@ -186,7 +198,7 @@ sub prepXWD($$$$) {
     my $nRead = sysread( $fh, $buf, 2 );
     my $flags = unpack( "n", $buf );
 
-    $gNodeSize = nodeSizeFromFlags( $flags );
+    $gNodeSize = nodeSizeFromFlags( $fh, $flags );
 
     my $nSpecials;
     my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
@@ -270,7 +282,7 @@ sub prepPDB($$$$) {
     my $nChars = ($offsets[2] - $facesOffset) / 2;
     $nRead += sysread( $fh, $buf, $facesOffset - $nRead );
     my @tmp = unpack( 'Nc6n', $buf );
-    $gNodeSize = nodeSizeFromFlags( $tmp[7] );
+    $gNodeSize = nodeSizeFromFlags( 0, $tmp[7] );
 
     my @faces;
     for ( my $i = 0; $i < $nChars; ++$i ) {
diff --git a/xwords4/linux/linuxdict.c b/xwords4/linux/linuxdict.c
index 7c84428b6..6c94882ec 100644
--- a/xwords4/linux/linuxdict.c
+++ b/xwords4/linux/linuxdict.c
@@ -209,11 +209,17 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
     XP_U16 facesSize;
     XP_U16 charSize;
     XP_Bool isUTF8 = XP_FALSE;
+    XP_Bool hasHeader = XP_FALSE;
 
     XP_ASSERT( dictF );
     if ( 1 == fread( &flags, sizeof(flags), 1, dictF ) ) {
         flags = ntohs(flags);
-        XP_DEBUGF( "flags=0x%x", flags );
+        XP_DEBUGF( "flags=0X%X", flags );
+        hasHeader = 0 != (DICT_HEADER_MASK & flags);
+        if ( hasHeader ) {
+            flags &= ~DICT_HEADER_MASK;
+            XP_DEBUGF( "has header!" );
+        }
 #ifdef NODE_CAN_4
         if ( flags == 0x0001 ) {
             dctx->super.nodeSize = 3;
@@ -250,6 +256,24 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
 #endif
 
     if ( formatOk ) {
+
+        if ( hasHeader ) {
+            XP_U16 headerLen;
+            if ( 1 != fread( &headerLen, sizeof(headerLen), 1, dictF ) ) {
+                goto closeAndExit;
+            }
+            headerLen = ntohs( headerLen );
+            XP_U32 wordCount;
+            if ( headerLen != sizeof(wordCount) ) { /* the only case we know right now */
+                goto closeAndExit;
+            }
+            if ( 1 != fread( &wordCount, sizeof(wordCount), 1, dictF ) ) {
+                goto closeAndExit;
+            }
+            dctx->super.nWords = ntohl( wordCount );
+            XP_DEBUGF( "dict contains %ld words", dctx->super.nWords );
+        }
+
         if ( isUTF8 ) {
             if ( 1 != fread( &numFaceBytes, sizeof(numFaceBytes), 1, dictF ) ) {
                 goto closeAndExit;