initial changes to add a header to xwd format so that stuff like

number of words can be included. Changed to build dicts and linux to open them. Android still needs to learn. Also, some of the tools in dawg/ need to be fixed to read old-format (pre-utf8) .xwd files.
2024-12-28 09:58:30 +01:00 · 2010-12-05 19:33:10 -08:00 · 2010-12-05 19:33:10 -08:00 · c4cdc24b78
commit c4cdc24b78
parent eff2324950
6 changed files with 57 additions and 14 deletions
--- a/xwords4/android/XWords4/jni/anddict.c
+++ b/xwords4/android/XWords4/jni/anddict.c
@ -432,7 +432,6 @@ makeDict( MPFORMAL JNIEnv *env, JNIUtilCtxt* jniutil, jbyteArray jbytes,
    anddict->bytes = localBytes;

    parseDict( anddict, localBytes, len );
-    setBlankTile( &anddict->super );

    /* copy the name */
    if ( NULL != jname ) {
--- a/xwords4/common/dictnry.h
+++ b/xwords4/common/dictnry.h
@ -35,6 +35,8 @@ extern "C" {
 /* cast to unsigned in case XP_UCHAR is signed */
 #define IS_SPECIAL(face) ((XP_U16)(face) < 0x0020)

+#define DICT_HEADER_MASK 0x08
+
 typedef XP_U8 XP_LangCode;

 typedef enum {
@ -78,6 +80,7 @@ struct DictionaryCtxt {

    SpecialBitmaps* bitmaps;
    XP_UCHAR** chars;
+    XP_U32 nWords;

    XP_LangCode langCode;

--- a/xwords4/dawg/English/Makefile.BasEnglish
+++ b/xwords4/dawg/English/Makefile.BasEnglish
@ -19,7 +19,7 @@ XWLANG=BasEnglish
 LANGCODE=en_US
 DICT2DAWGARGS = -r -nosort

-TARGET_TYPE ?= PALM
+TARGET_TYPE ?= WINCE

 include ../Makefile.2to8

--- a/xwords4/dawg/Makefile.langcommon
+++ b/xwords4/dawg/Makefile.langcommon
@ -204,8 +204,8 @@ endif
 frankspecials.bin: ../frank_mkspecials.pl  $(BMPFILES)
 	$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@

-$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin 
-	cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
+$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin frankspecials.bin 
+	cat $(XWLANG)$*_flags.bin $(XWLANG)*_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin \
 		frankspecials.bin $(XWLANG)StartLoc.bin  \
 		$$(ls dawg$(XWLANG)$*_*.bin) > $@
 	cp $@ saveme.bin
@ -223,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
 $(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
 ifdef NEWDAWG
 	if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
-		then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \
+		then perl -e "print pack(\"n\",0x000C)" > $@; echo "flags=4"; \
 	elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
-		then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \
+		then perl -e "print pack(\"n\",0x000D)" > $@; echo "flags=5"; \
 	elif true; \
 		then echo "Unexpected node size"; exit 1; \
 	fi
@ -247,7 +247,7 @@ dawg$(XWLANG)%.stamp: $(XWLANG)Main.dict.gz $(DICT2DAWG) table.bin ../Makefile.l
 	touch $@

 $(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp
-	@echo
+	@echo "got this rule"

 # the files to export for byod
 allbins: 
@ -268,7 +268,7 @@ values.bin: ../xloc.pl
 # a binary file, two bytes, one giving the size of tiles data and the
 #  other the number of tiles in the dict.  Tiles data is utf-8 and so
 #  number is not derivable from size.
-charcount.bin: table.bin ../xloc.pl
+$(XWLANG)_charcount.bin: table.bin ../xloc.pl
 	SIZ=$$(ls -l $< | awk '{print $$5}'); \
 	perl -e "print pack(\"c\",$$SIZ)" > $@
 	TMP=/tmp/tmp$$$$; \
@ -276,6 +276,11 @@ charcount.bin: table.bin ../xloc.pl
 	cat $$TMP >> $@; \
 	rm -f $$TMP

+$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin
+	SIZ=$$(ls -l $< | awk '{print $$5}'); \
+	perl -e "print pack(\"n\",$$SIZ)" > $@
+	cat $< >> $@
+
 %.dict: %.dict.gz
 	zcat $< > $@

--- a/xwords4/dawg/dawg2dict.pl
+++ b/xwords4/dawg/dawg2dict.pl
@ -156,9 +156,21 @@ sub readNodesToEnd($) {
    return @nodes;
 } # readNodesToEnd

-sub nodeSizeFromFlags($) {
-    my ( $flags ) = @_;
-    if ( $flags == 4 ) {
+sub nodeSizeFromFlags($$) {
+    my ( $fh, $flags ) = @_;
+
+    my $bitSet = $flags & 0x0008;
+    printf STDERR "checking flags 0x%x with 0x%x -> 0x%x\n", $flags, 0x0008, $bitSet;
+    if ( 0 != $bitSet ){
+        $flags = $flags & ~0x0008;
+        # need to skip header
+        my $buf;
+        2 == sysread( $fh, $buf, 2 ) || die "couldn't read length of header";
+        my $len = unpack( "n", $buf );
+        $len == sysread( $fh, $buf, $len ) || die  "couldn't read header bytes";
+    }
+
+    if ( $flags == 2 || $ flags == 4 ) {
        return 3;
    } elsif ( $flags == 5 ) {
        return 4;
@ -186,7 +198,7 @@ sub prepXWD($$$$) {
    my $nRead = sysread( $fh, $buf, 2 );
    my $flags = unpack( "n", $buf );

-    $gNodeSize = nodeSizeFromFlags( $flags );
+    $gNodeSize = nodeSizeFromFlags( $fh, $flags );

    my $nSpecials;
    my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
@ -270,7 +282,7 @@ sub prepPDB($$$$) {
    my $nChars = ($offsets[2] - $facesOffset) / 2;
    $nRead += sysread( $fh, $buf, $facesOffset - $nRead );
    my @tmp = unpack( 'Nc6n', $buf );
-    $gNodeSize = nodeSizeFromFlags( $tmp[7] );
+    $gNodeSize = nodeSizeFromFlags( 0, $tmp[7] );

    my @faces;
    for ( my $i = 0; $i < $nChars; ++$i ) {
--- a/xwords4/linux/linuxdict.c
+++ b/xwords4/linux/linuxdict.c
@ -209,11 +209,17 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
    XP_U16 facesSize;
    XP_U16 charSize;
    XP_Bool isUTF8 = XP_FALSE;
+    XP_Bool hasHeader = XP_FALSE;

    XP_ASSERT( dictF );
    if ( 1 == fread( &flags, sizeof(flags), 1, dictF ) ) {
        flags = ntohs(flags);
-        XP_DEBUGF( "flags=0x%x", flags );
+        XP_DEBUGF( "flags=0X%X", flags );
+        hasHeader = 0 != (DICT_HEADER_MASK & flags);
+        if ( hasHeader ) {
+            flags &= ~DICT_HEADER_MASK;
+            XP_DEBUGF( "has header!" );
+        }
 #ifdef NODE_CAN_4
        if ( flags == 0x0001 ) {
            dctx->super.nodeSize = 3;
@ -250,6 +256,24 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
 #endif

    if ( formatOk ) {
+
+        if ( hasHeader ) {
+            XP_U16 headerLen;
+            if ( 1 != fread( &headerLen, sizeof(headerLen), 1, dictF ) ) {
+                goto closeAndExit;
+            }
+            headerLen = ntohs( headerLen );
+            XP_U32 wordCount;
+            if ( headerLen != sizeof(wordCount) ) { /* the only case we know right now */
+                goto closeAndExit;
+            }
+            if ( 1 != fread( &wordCount, sizeof(wordCount), 1, dictF ) ) {
+                goto closeAndExit;
+            }
+            dctx->super.nWords = ntohl( wordCount );
+            XP_DEBUGF( "dict contains %ld words", dctx->super.nWords );
+        }
+
        if ( isUTF8 ) {
            if ( 1 != fread( &numFaceBytes, sizeof(numFaceBytes), 1, dictF ) ) {
                goto closeAndExit;