add dict sanity check that will, I hope, catch corrupt dicts.

2025-01-13 08:01:33 +01:00 · 2011-11-09 06:51:12 -08:00 · 2011-11-09 06:51:12 -08:00 · 268f018140
commit 268f018140
parent c591182d6c
5 changed files with 112 additions and 55 deletions
--- a/xwords4/common/dictnry.c
+++ b/xwords4/common/dictnry.c
@ -699,6 +699,47 @@ dict_getLangName( const DictionaryCtxt* ctxt )
    return ctxt->langName;
 }

+#ifdef XWFEATURE_DICTSANITY
+XP_Bool
+checkSanity( DictionaryCtxt* dict, const XP_U32 numEdges )
+{
+    XP_U32 ii;
+    XP_Bool passed = XP_TRUE;
+    array_edge* edge = dict->base;
+    Tile prevTile = 0;
+    for ( ii = 0; ii < numEdges && passed; ++ii ) {
+        Tile tile = EDGETILE( dict, edge );
+        if ( tile < prevTile ) {
+            XP_LOGF( "%s: node %ld of %ld has out-of-order tile", __func__,
+                     ii, numEdges );
+            passed = XP_FALSE;
+            break;
+        }
+        prevTile = tile;
+
+        unsigned long index = dict_index_from( dict, edge );
+        if ( index >= numEdges ) {
+            XP_LOGF( "%s: node %ld of %ld has too-high index", __func__,
+                     ii, numEdges );
+            passed = XP_FALSE;
+            break;
+        }
+
+        if ( IS_LAST_EDGE( dict, edge ) ) {
+            prevTile = 0;
+        }
+        edge += dict->nodeSize;
+    }
+
+    if ( passed ) {
+        passed = 0 == prevTile; /* last edge seen was a LAST_EDGE */
+    }
+
+    XP_LOGF( "%s(numEdges=%ld)=>%d", __func__, numEdges, passed );
+    return passed;
+} /* checkSanity */
+#endif
+
 #ifdef CPLUS
 }
 #endif
--- a/xwords4/common/dictnry.h
+++ b/xwords4/common/dictnry.h
@ -207,6 +207,8 @@ void dict_super_init( DictionaryCtxt* ctxt );
 void dict_splitFaces( DictionaryCtxt* dict, const XP_U8* bytes, 
                      XP_U16 nBytes, XP_U16 nFaces );

+XP_Bool checkSanity( DictionaryCtxt* dict, XP_U32 numEdges );
+
 #ifdef CPLUS
 }
 #endif
--- a/xwords4/linux/Makefile
+++ b/xwords4/linux/Makefile
@ -92,6 +92,7 @@ DEFINES += -DDISABLE_TILE_SEL
 DEFINES += -DSET_GAMESEED
 DEFINES += -DTEXT_MODEL
 DEFINES += -DXWFEATURE_WALKDICT
+DEFINES += -DXWFEATURE_DICTSANITY

 ifdef CURSES_CELL_HT
 DEFINES += -DCURSES_CELL_HT=$(CURSES_CELL_HT)
--- a/xwords4/linux/linuxdict.c
+++ b/xwords4/linux/linuxdict.c
@ -225,17 +225,19 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
    dctx->dictLength = statbuf.st_size;

    {
-	FILE* dictF = fopen( fileName, "r" );
-	XP_ASSERT( !!dictF );
-	if ( dctx->useMMap ) {
-	    dctx->dictBase = mmap( NULL, dctx->dictLength, PROT_READ, MAP_PRIVATE, fileno(dictF), 0 );
-	} else {
-	    dctx->dictBase = XP_MALLOC( dctx->super.mpool, dctx->dictLength );
-	    if ( dctx->dictLength != fread( dctx->dictBase, 1, dctx->dictLength, dictF ) ) {
-		XP_ASSERT( 0 );
-	    }
-	}
-	fclose( dictF );
+        FILE* dictF = fopen( fileName, "r" );
+        XP_ASSERT( !!dictF );
+        if ( dctx->useMMap ) {
+            dctx->dictBase = mmap( NULL, dctx->dictLength, PROT_READ, 
+                                   MAP_PRIVATE, fileno(dictF), 0 );
+        } else {
+            dctx->dictBase = XP_MALLOC( dctx->super.mpool, dctx->dictLength );
+            if ( dctx->dictLength != fread( dctx->dictBase, 1, 
+                                            dctx->dictLength, dictF ) ) {
+                XP_ASSERT( 0 );
+            }
+        }
+        fclose( dictF );
    }

    ptr = dctx->dictBase;
@ -247,65 +249,65 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
    XP_DEBUGF( "flags=0X%X", flags );
    hasHeader = 0 != (DICT_HEADER_MASK & flags);
    if ( hasHeader ) {
-	flags &= ~DICT_HEADER_MASK;
-	XP_DEBUGF( "has header!" );
+        flags &= ~DICT_HEADER_MASK;
+        XP_DEBUGF( "has header!" );
    }
 #ifdef NODE_CAN_4
    if ( flags == 0x0001 ) {
-	dctx->super.nodeSize = 3;
-	charSize = 1;
-	dctx->super.is_4_byte = XP_FALSE;
+        dctx->super.nodeSize = 3;
+        charSize = 1;
+        dctx->super.is_4_byte = XP_FALSE;
    } else if ( flags == 0x0002 ) {
-	dctx->super.nodeSize = 3;
-	charSize = 2;
-	dctx->super.is_4_byte = XP_FALSE;
+        dctx->super.nodeSize = 3;
+        charSize = 2;
+        dctx->super.is_4_byte = XP_FALSE;
    } else if ( flags == 0x0003 ) {
-	dctx->super.nodeSize = 4;
-	charSize = 2;
-	dctx->super.is_4_byte = XP_TRUE;
+        dctx->super.nodeSize = 4;
+        charSize = 2;
+        dctx->super.is_4_byte = XP_TRUE;
    } else if ( flags == 0x0004 ) {
-	dctx->super.nodeSize = 3;
-	dctx->super.isUTF8 = XP_TRUE;
-	isUTF8 = XP_TRUE;
-	dctx->super.is_4_byte = XP_FALSE;
+        dctx->super.nodeSize = 3;
+        dctx->super.isUTF8 = XP_TRUE;
+        isUTF8 = XP_TRUE;
+        dctx->super.is_4_byte = XP_FALSE;
    } else if ( flags == 0x0005 ) {
-	dctx->super.nodeSize = 4;
-	dctx->super.isUTF8 = XP_TRUE;
-	isUTF8 = XP_TRUE;
-	dctx->super.is_4_byte = XP_TRUE;
+        dctx->super.nodeSize = 4;
+        dctx->super.isUTF8 = XP_TRUE;
+        isUTF8 = XP_TRUE;
+        dctx->super.is_4_byte = XP_TRUE;
    } else {
-	/* case I don't know how to deal with */
-	formatOk = XP_FALSE;
-	XP_ASSERT(0);
+        /* case I don't know how to deal with */
+        formatOk = XP_FALSE;
+        XP_ASSERT(0);
    }

 #else
-XP_ASSERT( flags == 0x0001 );
+    XP_ASSERT( flags == 0x0001 );
 #endif

    if ( formatOk ) {
-	XP_U8 numFaceBytes, numFaces;
+        XP_U8 numFaceBytes, numFaces;

        if ( hasHeader ) {
            XP_U16 headerLen;
            XP_U32 wordCount;

-	    memcpy( &headerLen, ptr, sizeof(headerLen) );
-	    ptr += sizeof(headerLen);
+            memcpy( &headerLen, ptr, sizeof(headerLen) );
+            ptr += sizeof(headerLen);
            headerLen = ntohs( headerLen );
            if ( headerLen != sizeof(wordCount) ) { /* the only case we know right now */
                goto closeAndExit;
            }
-	    memcpy( &wordCount, ptr, sizeof(wordCount) );
-	    ptr += sizeof(wordCount);
+            memcpy( &wordCount, ptr, sizeof(wordCount) );
+            ptr += sizeof(wordCount);
            dctx->super.nWords = ntohl( wordCount );
            XP_DEBUGF( "dict contains %ld words", dctx->super.nWords );
        }

        if ( isUTF8 ) {
-	    numFaceBytes = *ptr++;
+            numFaceBytes = *ptr++;
        }
-	numFaces = *ptr++;
+        numFaces = *ptr++;
        if ( !isUTF8 ) {
            numFaceBytes = numFaces * charSize;
        }
@ -320,42 +322,47 @@ XP_ASSERT( flags == 0x0001 );
        }

        XP_U8 tmp[numFaceBytes];
-	memcpy( tmp, ptr, numFaceBytes );
-	ptr += numFaceBytes;
+        memcpy( tmp, ptr, numFaceBytes );
+        ptr += numFaceBytes;

        dict_splitFaces( &dctx->super, tmp, numFaceBytes, numFaces );

-	memcpy( &xloc, ptr, sizeof(xloc) );
-	ptr += sizeof(xloc);
-	memcpy( dctx->super.countsAndValues, ptr, numFaces*2 );
-	ptr += numFaces*2;
+        memcpy( &xloc, ptr, sizeof(xloc) );
+        ptr += sizeof(xloc);
+        memcpy( dctx->super.countsAndValues, ptr, numFaces*2 );
+        ptr += numFaces*2;
    }
    
    dctx->super.langCode = xloc & 0x7F;

    if ( formatOk ) {
+        XP_U32 numEdges;
        skipBitmaps( dctx, &ptr );

        curPos = ptr - dctx->dictBase;
        dictLength = dctx->dictLength - curPos;

        if ( dictLength > 0 ) {
-	    memcpy( &topOffset, ptr, sizeof(topOffset) );
+            memcpy( &topOffset, ptr, sizeof(topOffset) );
            /* it's in big-endian order */
            topOffset = ntohl(topOffset);
            dictLength -= sizeof(topOffset); /* first four bytes are offset */
-	    ptr += sizeof(topOffset);
+            ptr += sizeof(topOffset);
        }

        if ( dictLength > 0 ) {
+# ifdef NODE_CAN_4
+            numEdges = dictLength / dctx->super.nodeSize;
+# else
+            numEdges = dictLength / 3;
+# endif
 #ifdef DEBUG
 # ifdef NODE_CAN_4
-            dctx->super.numEdges = dictLength / dctx->super.nodeSize;
            XP_ASSERT( (dictLength % dctx->super.nodeSize) == 0 );
 # else
-            dctx->super.numEdges = dictLength / 3;
            XP_ASSERT( (dictLength % 3) == 0 );
 # endif
+            dctx->super.numEdges = numEdges;
 #endif
            dctx->super.base = (array_edge*)ptr;

@ -366,6 +373,10 @@ XP_ASSERT( flags == 0x0001 );
        }

        dctx->super.name = copyString( dctx->super.mpool, fileName );
+
+        if ( ! checkSanity( &dctx->super, numEdges ) ) {
+            goto closeAndExit;
+        }
    }
    goto ok;

--- a/xwords4/linux/linuxmain.c
+++ b/xwords4/linux/linuxmain.c
@ -903,6 +903,7 @@ tmp_noop_sigintterm( int XP_UNUSED(sig) )
 }

 #ifdef XWFEATURE_WALKDICT
+//# define PRINT_ALL
 static void
 testGetNthWord( const DictionaryCtxt* dict, char** words,
                XP_U16 depth, IndexData* data )
@ -948,7 +949,6 @@ walk_dict_test( const LaunchParams* params, const DictionaryCtxt* dict,
    XP_ASSERT( count == dict_countWords( dict ) );
    char** words = g_malloc( count * sizeof(char*) );
    XP_ASSERT( !!words );
-    // # define PRINT_ALL

    /* if ( dict_firstWord( dict, &word ) */
    /*      && dict_getNextWord( dict, &word ) */
@ -1085,9 +1085,11 @@ walk_dict_test_all( const LaunchParams* params, GSList* testDicts,
        DictionaryCtxt* dict = 
            linux_dictionary_make( MPPARM(params->util->mpool) name,
                                   params->useMmap );
-        XP_LOGF( "walk_dict_test(%s)", name );
-        walk_dict_test( params, dict, testPrefixes );
-        dict_destroy( dict );
+        if ( NULL != dict ) {
+            XP_LOGF( "walk_dict_test(%s)", name );
+            walk_dict_test( params, dict, testPrefixes );
+            dict_destroy( dict );
+        }
    }
 }
 #endif