From 268f018140bac3b1cd4250b592cea23ac9778b9c Mon Sep 17 00:00:00 2001 From: Eric House Date: Wed, 9 Nov 2011 06:51:12 -0800 Subject: [PATCH] add dict sanity check that will, I hope, catch corrupt dicts. --- xwords4/common/dictnry.c | 41 ++++++++++++++ xwords4/common/dictnry.h | 2 + xwords4/linux/Makefile | 1 + xwords4/linux/linuxdict.c | 113 +++++++++++++++++++++----------------- xwords4/linux/linuxmain.c | 10 ++-- 5 files changed, 112 insertions(+), 55 deletions(-) diff --git a/xwords4/common/dictnry.c b/xwords4/common/dictnry.c index 7c6237560..97d769f29 100644 --- a/xwords4/common/dictnry.c +++ b/xwords4/common/dictnry.c @@ -699,6 +699,47 @@ dict_getLangName( const DictionaryCtxt* ctxt ) return ctxt->langName; } +#ifdef XWFEATURE_DICTSANITY +XP_Bool +checkSanity( DictionaryCtxt* dict, const XP_U32 numEdges ) +{ + XP_U32 ii; + XP_Bool passed = XP_TRUE; + array_edge* edge = dict->base; + Tile prevTile = 0; + for ( ii = 0; ii < numEdges && passed; ++ii ) { + Tile tile = EDGETILE( dict, edge ); + if ( tile < prevTile ) { + XP_LOGF( "%s: node %ld of %ld has out-of-order tile", __func__, + ii, numEdges ); + passed = XP_FALSE; + break; + } + prevTile = tile; + + unsigned long index = dict_index_from( dict, edge ); + if ( index >= numEdges ) { + XP_LOGF( "%s: node %ld of %ld has too-high index", __func__, + ii, numEdges ); + passed = XP_FALSE; + break; + } + + if ( IS_LAST_EDGE( dict, edge ) ) { + prevTile = 0; + } + edge += dict->nodeSize; + } + + if ( passed ) { + passed = 0 == prevTile; /* last edge seen was a LAST_EDGE */ + } + + XP_LOGF( "%s(numEdges=%ld)=>%d", __func__, numEdges, passed ); + return passed; +} /* checkSanity */ +#endif + #ifdef CPLUS } #endif diff --git a/xwords4/common/dictnry.h b/xwords4/common/dictnry.h index ed78effe1..16efe36e6 100644 --- a/xwords4/common/dictnry.h +++ b/xwords4/common/dictnry.h @@ -207,6 +207,8 @@ void dict_super_init( DictionaryCtxt* ctxt ); void dict_splitFaces( DictionaryCtxt* dict, const XP_U8* bytes, XP_U16 nBytes, XP_U16 nFaces ); +XP_Bool checkSanity( DictionaryCtxt* dict, XP_U32 numEdges ); + #ifdef CPLUS } #endif diff --git a/xwords4/linux/Makefile b/xwords4/linux/Makefile index ffb795b45..f7e34d4d1 100644 --- a/xwords4/linux/Makefile +++ b/xwords4/linux/Makefile @@ -92,6 +92,7 @@ DEFINES += -DDISABLE_TILE_SEL DEFINES += -DSET_GAMESEED DEFINES += -DTEXT_MODEL DEFINES += -DXWFEATURE_WALKDICT +DEFINES += -DXWFEATURE_DICTSANITY ifdef CURSES_CELL_HT DEFINES += -DCURSES_CELL_HT=$(CURSES_CELL_HT) diff --git a/xwords4/linux/linuxdict.c b/xwords4/linux/linuxdict.c index abe0f1ceb..3b9e48a76 100644 --- a/xwords4/linux/linuxdict.c +++ b/xwords4/linux/linuxdict.c @@ -225,17 +225,19 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName ) dctx->dictLength = statbuf.st_size; { - FILE* dictF = fopen( fileName, "r" ); - XP_ASSERT( !!dictF ); - if ( dctx->useMMap ) { - dctx->dictBase = mmap( NULL, dctx->dictLength, PROT_READ, MAP_PRIVATE, fileno(dictF), 0 ); - } else { - dctx->dictBase = XP_MALLOC( dctx->super.mpool, dctx->dictLength ); - if ( dctx->dictLength != fread( dctx->dictBase, 1, dctx->dictLength, dictF ) ) { - XP_ASSERT( 0 ); - } - } - fclose( dictF ); + FILE* dictF = fopen( fileName, "r" ); + XP_ASSERT( !!dictF ); + if ( dctx->useMMap ) { + dctx->dictBase = mmap( NULL, dctx->dictLength, PROT_READ, + MAP_PRIVATE, fileno(dictF), 0 ); + } else { + dctx->dictBase = XP_MALLOC( dctx->super.mpool, dctx->dictLength ); + if ( dctx->dictLength != fread( dctx->dictBase, 1, + dctx->dictLength, dictF ) ) { + XP_ASSERT( 0 ); + } + } + fclose( dictF ); } ptr = dctx->dictBase; @@ -247,65 +249,65 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName ) XP_DEBUGF( "flags=0X%X", flags ); hasHeader = 0 != (DICT_HEADER_MASK & flags); if ( hasHeader ) { - flags &= ~DICT_HEADER_MASK; - XP_DEBUGF( "has header!" ); + flags &= ~DICT_HEADER_MASK; + XP_DEBUGF( "has header!" ); } #ifdef NODE_CAN_4 if ( flags == 0x0001 ) { - dctx->super.nodeSize = 3; - charSize = 1; - dctx->super.is_4_byte = XP_FALSE; + dctx->super.nodeSize = 3; + charSize = 1; + dctx->super.is_4_byte = XP_FALSE; } else if ( flags == 0x0002 ) { - dctx->super.nodeSize = 3; - charSize = 2; - dctx->super.is_4_byte = XP_FALSE; + dctx->super.nodeSize = 3; + charSize = 2; + dctx->super.is_4_byte = XP_FALSE; } else if ( flags == 0x0003 ) { - dctx->super.nodeSize = 4; - charSize = 2; - dctx->super.is_4_byte = XP_TRUE; + dctx->super.nodeSize = 4; + charSize = 2; + dctx->super.is_4_byte = XP_TRUE; } else if ( flags == 0x0004 ) { - dctx->super.nodeSize = 3; - dctx->super.isUTF8 = XP_TRUE; - isUTF8 = XP_TRUE; - dctx->super.is_4_byte = XP_FALSE; + dctx->super.nodeSize = 3; + dctx->super.isUTF8 = XP_TRUE; + isUTF8 = XP_TRUE; + dctx->super.is_4_byte = XP_FALSE; } else if ( flags == 0x0005 ) { - dctx->super.nodeSize = 4; - dctx->super.isUTF8 = XP_TRUE; - isUTF8 = XP_TRUE; - dctx->super.is_4_byte = XP_TRUE; + dctx->super.nodeSize = 4; + dctx->super.isUTF8 = XP_TRUE; + isUTF8 = XP_TRUE; + dctx->super.is_4_byte = XP_TRUE; } else { - /* case I don't know how to deal with */ - formatOk = XP_FALSE; - XP_ASSERT(0); + /* case I don't know how to deal with */ + formatOk = XP_FALSE; + XP_ASSERT(0); } #else -XP_ASSERT( flags == 0x0001 ); + XP_ASSERT( flags == 0x0001 ); #endif if ( formatOk ) { - XP_U8 numFaceBytes, numFaces; + XP_U8 numFaceBytes, numFaces; if ( hasHeader ) { XP_U16 headerLen; XP_U32 wordCount; - memcpy( &headerLen, ptr, sizeof(headerLen) ); - ptr += sizeof(headerLen); + memcpy( &headerLen, ptr, sizeof(headerLen) ); + ptr += sizeof(headerLen); headerLen = ntohs( headerLen ); if ( headerLen != sizeof(wordCount) ) { /* the only case we know right now */ goto closeAndExit; } - memcpy( &wordCount, ptr, sizeof(wordCount) ); - ptr += sizeof(wordCount); + memcpy( &wordCount, ptr, sizeof(wordCount) ); + ptr += sizeof(wordCount); dctx->super.nWords = ntohl( wordCount ); XP_DEBUGF( "dict contains %ld words", dctx->super.nWords ); } if ( isUTF8 ) { - numFaceBytes = *ptr++; + numFaceBytes = *ptr++; } - numFaces = *ptr++; + numFaces = *ptr++; if ( !isUTF8 ) { numFaceBytes = numFaces * charSize; } @@ -320,42 +322,47 @@ XP_ASSERT( flags == 0x0001 ); } XP_U8 tmp[numFaceBytes]; - memcpy( tmp, ptr, numFaceBytes ); - ptr += numFaceBytes; + memcpy( tmp, ptr, numFaceBytes ); + ptr += numFaceBytes; dict_splitFaces( &dctx->super, tmp, numFaceBytes, numFaces ); - memcpy( &xloc, ptr, sizeof(xloc) ); - ptr += sizeof(xloc); - memcpy( dctx->super.countsAndValues, ptr, numFaces*2 ); - ptr += numFaces*2; + memcpy( &xloc, ptr, sizeof(xloc) ); + ptr += sizeof(xloc); + memcpy( dctx->super.countsAndValues, ptr, numFaces*2 ); + ptr += numFaces*2; } dctx->super.langCode = xloc & 0x7F; if ( formatOk ) { + XP_U32 numEdges; skipBitmaps( dctx, &ptr ); curPos = ptr - dctx->dictBase; dictLength = dctx->dictLength - curPos; if ( dictLength > 0 ) { - memcpy( &topOffset, ptr, sizeof(topOffset) ); + memcpy( &topOffset, ptr, sizeof(topOffset) ); /* it's in big-endian order */ topOffset = ntohl(topOffset); dictLength -= sizeof(topOffset); /* first four bytes are offset */ - ptr += sizeof(topOffset); + ptr += sizeof(topOffset); } if ( dictLength > 0 ) { +# ifdef NODE_CAN_4 + numEdges = dictLength / dctx->super.nodeSize; +# else + numEdges = dictLength / 3; +# endif #ifdef DEBUG # ifdef NODE_CAN_4 - dctx->super.numEdges = dictLength / dctx->super.nodeSize; XP_ASSERT( (dictLength % dctx->super.nodeSize) == 0 ); # else - dctx->super.numEdges = dictLength / 3; XP_ASSERT( (dictLength % 3) == 0 ); # endif + dctx->super.numEdges = numEdges; #endif dctx->super.base = (array_edge*)ptr; @@ -366,6 +373,10 @@ XP_ASSERT( flags == 0x0001 ); } dctx->super.name = copyString( dctx->super.mpool, fileName ); + + if ( ! checkSanity( &dctx->super, numEdges ) ) { + goto closeAndExit; + } } goto ok; diff --git a/xwords4/linux/linuxmain.c b/xwords4/linux/linuxmain.c index 5b357f010..9b8af1c15 100644 --- a/xwords4/linux/linuxmain.c +++ b/xwords4/linux/linuxmain.c @@ -903,6 +903,7 @@ tmp_noop_sigintterm( int XP_UNUSED(sig) ) } #ifdef XWFEATURE_WALKDICT +//# define PRINT_ALL static void testGetNthWord( const DictionaryCtxt* dict, char** words, XP_U16 depth, IndexData* data ) @@ -948,7 +949,6 @@ walk_dict_test( const LaunchParams* params, const DictionaryCtxt* dict, XP_ASSERT( count == dict_countWords( dict ) ); char** words = g_malloc( count * sizeof(char*) ); XP_ASSERT( !!words ); - // # define PRINT_ALL /* if ( dict_firstWord( dict, &word ) */ /* && dict_getNextWord( dict, &word ) */ @@ -1085,9 +1085,11 @@ walk_dict_test_all( const LaunchParams* params, GSList* testDicts, DictionaryCtxt* dict = linux_dictionary_make( MPPARM(params->util->mpool) name, params->useMmap ); - XP_LOGF( "walk_dict_test(%s)", name ); - walk_dict_test( params, dict, testPrefixes ); - dict_destroy( dict ); + if ( NULL != dict ) { + XP_LOGF( "walk_dict_test(%s)", name ); + walk_dict_test( params, dict, testPrefixes ); + dict_destroy( dict ); + } } } #endif