add dict sanity check that will, I hope, catch corrupt dicts.

This commit is contained in:
Eric House 2011-11-09 06:51:12 -08:00
parent c591182d6c
commit 268f018140
5 changed files with 112 additions and 55 deletions

View file

@ -699,6 +699,47 @@ dict_getLangName( const DictionaryCtxt* ctxt )
return ctxt->langName;
}
#ifdef XWFEATURE_DICTSANITY
XP_Bool
checkSanity( DictionaryCtxt* dict, const XP_U32 numEdges )
{
XP_U32 ii;
XP_Bool passed = XP_TRUE;
array_edge* edge = dict->base;
Tile prevTile = 0;
for ( ii = 0; ii < numEdges && passed; ++ii ) {
Tile tile = EDGETILE( dict, edge );
if ( tile < prevTile ) {
XP_LOGF( "%s: node %ld of %ld has out-of-order tile", __func__,
ii, numEdges );
passed = XP_FALSE;
break;
}
prevTile = tile;
unsigned long index = dict_index_from( dict, edge );
if ( index >= numEdges ) {
XP_LOGF( "%s: node %ld of %ld has too-high index", __func__,
ii, numEdges );
passed = XP_FALSE;
break;
}
if ( IS_LAST_EDGE( dict, edge ) ) {
prevTile = 0;
}
edge += dict->nodeSize;
}
if ( passed ) {
passed = 0 == prevTile; /* last edge seen was a LAST_EDGE */
}
XP_LOGF( "%s(numEdges=%ld)=>%d", __func__, numEdges, passed );
return passed;
} /* checkSanity */
#endif
#ifdef CPLUS
}
#endif

View file

@ -207,6 +207,8 @@ void dict_super_init( DictionaryCtxt* ctxt );
void dict_splitFaces( DictionaryCtxt* dict, const XP_U8* bytes,
XP_U16 nBytes, XP_U16 nFaces );
XP_Bool checkSanity( DictionaryCtxt* dict, XP_U32 numEdges );
#ifdef CPLUS
}
#endif

View file

@ -92,6 +92,7 @@ DEFINES += -DDISABLE_TILE_SEL
DEFINES += -DSET_GAMESEED
DEFINES += -DTEXT_MODEL
DEFINES += -DXWFEATURE_WALKDICT
DEFINES += -DXWFEATURE_DICTSANITY
ifdef CURSES_CELL_HT
DEFINES += -DCURSES_CELL_HT=$(CURSES_CELL_HT)

View file

@ -225,17 +225,19 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
dctx->dictLength = statbuf.st_size;
{
FILE* dictF = fopen( fileName, "r" );
XP_ASSERT( !!dictF );
if ( dctx->useMMap ) {
dctx->dictBase = mmap( NULL, dctx->dictLength, PROT_READ, MAP_PRIVATE, fileno(dictF), 0 );
} else {
dctx->dictBase = XP_MALLOC( dctx->super.mpool, dctx->dictLength );
if ( dctx->dictLength != fread( dctx->dictBase, 1, dctx->dictLength, dictF ) ) {
XP_ASSERT( 0 );
}
}
fclose( dictF );
FILE* dictF = fopen( fileName, "r" );
XP_ASSERT( !!dictF );
if ( dctx->useMMap ) {
dctx->dictBase = mmap( NULL, dctx->dictLength, PROT_READ,
MAP_PRIVATE, fileno(dictF), 0 );
} else {
dctx->dictBase = XP_MALLOC( dctx->super.mpool, dctx->dictLength );
if ( dctx->dictLength != fread( dctx->dictBase, 1,
dctx->dictLength, dictF ) ) {
XP_ASSERT( 0 );
}
}
fclose( dictF );
}
ptr = dctx->dictBase;
@ -247,65 +249,65 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
XP_DEBUGF( "flags=0X%X", flags );
hasHeader = 0 != (DICT_HEADER_MASK & flags);
if ( hasHeader ) {
flags &= ~DICT_HEADER_MASK;
XP_DEBUGF( "has header!" );
flags &= ~DICT_HEADER_MASK;
XP_DEBUGF( "has header!" );
}
#ifdef NODE_CAN_4
if ( flags == 0x0001 ) {
dctx->super.nodeSize = 3;
charSize = 1;
dctx->super.is_4_byte = XP_FALSE;
dctx->super.nodeSize = 3;
charSize = 1;
dctx->super.is_4_byte = XP_FALSE;
} else if ( flags == 0x0002 ) {
dctx->super.nodeSize = 3;
charSize = 2;
dctx->super.is_4_byte = XP_FALSE;
dctx->super.nodeSize = 3;
charSize = 2;
dctx->super.is_4_byte = XP_FALSE;
} else if ( flags == 0x0003 ) {
dctx->super.nodeSize = 4;
charSize = 2;
dctx->super.is_4_byte = XP_TRUE;
dctx->super.nodeSize = 4;
charSize = 2;
dctx->super.is_4_byte = XP_TRUE;
} else if ( flags == 0x0004 ) {
dctx->super.nodeSize = 3;
dctx->super.isUTF8 = XP_TRUE;
isUTF8 = XP_TRUE;
dctx->super.is_4_byte = XP_FALSE;
dctx->super.nodeSize = 3;
dctx->super.isUTF8 = XP_TRUE;
isUTF8 = XP_TRUE;
dctx->super.is_4_byte = XP_FALSE;
} else if ( flags == 0x0005 ) {
dctx->super.nodeSize = 4;
dctx->super.isUTF8 = XP_TRUE;
isUTF8 = XP_TRUE;
dctx->super.is_4_byte = XP_TRUE;
dctx->super.nodeSize = 4;
dctx->super.isUTF8 = XP_TRUE;
isUTF8 = XP_TRUE;
dctx->super.is_4_byte = XP_TRUE;
} else {
/* case I don't know how to deal with */
formatOk = XP_FALSE;
XP_ASSERT(0);
/* case I don't know how to deal with */
formatOk = XP_FALSE;
XP_ASSERT(0);
}
#else
XP_ASSERT( flags == 0x0001 );
XP_ASSERT( flags == 0x0001 );
#endif
if ( formatOk ) {
XP_U8 numFaceBytes, numFaces;
XP_U8 numFaceBytes, numFaces;
if ( hasHeader ) {
XP_U16 headerLen;
XP_U32 wordCount;
memcpy( &headerLen, ptr, sizeof(headerLen) );
ptr += sizeof(headerLen);
memcpy( &headerLen, ptr, sizeof(headerLen) );
ptr += sizeof(headerLen);
headerLen = ntohs( headerLen );
if ( headerLen != sizeof(wordCount) ) { /* the only case we know right now */
goto closeAndExit;
}
memcpy( &wordCount, ptr, sizeof(wordCount) );
ptr += sizeof(wordCount);
memcpy( &wordCount, ptr, sizeof(wordCount) );
ptr += sizeof(wordCount);
dctx->super.nWords = ntohl( wordCount );
XP_DEBUGF( "dict contains %ld words", dctx->super.nWords );
}
if ( isUTF8 ) {
numFaceBytes = *ptr++;
numFaceBytes = *ptr++;
}
numFaces = *ptr++;
numFaces = *ptr++;
if ( !isUTF8 ) {
numFaceBytes = numFaces * charSize;
}
@ -320,42 +322,47 @@ XP_ASSERT( flags == 0x0001 );
}
XP_U8 tmp[numFaceBytes];
memcpy( tmp, ptr, numFaceBytes );
ptr += numFaceBytes;
memcpy( tmp, ptr, numFaceBytes );
ptr += numFaceBytes;
dict_splitFaces( &dctx->super, tmp, numFaceBytes, numFaces );
memcpy( &xloc, ptr, sizeof(xloc) );
ptr += sizeof(xloc);
memcpy( dctx->super.countsAndValues, ptr, numFaces*2 );
ptr += numFaces*2;
memcpy( &xloc, ptr, sizeof(xloc) );
ptr += sizeof(xloc);
memcpy( dctx->super.countsAndValues, ptr, numFaces*2 );
ptr += numFaces*2;
}
dctx->super.langCode = xloc & 0x7F;
if ( formatOk ) {
XP_U32 numEdges;
skipBitmaps( dctx, &ptr );
curPos = ptr - dctx->dictBase;
dictLength = dctx->dictLength - curPos;
if ( dictLength > 0 ) {
memcpy( &topOffset, ptr, sizeof(topOffset) );
memcpy( &topOffset, ptr, sizeof(topOffset) );
/* it's in big-endian order */
topOffset = ntohl(topOffset);
dictLength -= sizeof(topOffset); /* first four bytes are offset */
ptr += sizeof(topOffset);
ptr += sizeof(topOffset);
}
if ( dictLength > 0 ) {
# ifdef NODE_CAN_4
numEdges = dictLength / dctx->super.nodeSize;
# else
numEdges = dictLength / 3;
# endif
#ifdef DEBUG
# ifdef NODE_CAN_4
dctx->super.numEdges = dictLength / dctx->super.nodeSize;
XP_ASSERT( (dictLength % dctx->super.nodeSize) == 0 );
# else
dctx->super.numEdges = dictLength / 3;
XP_ASSERT( (dictLength % 3) == 0 );
# endif
dctx->super.numEdges = numEdges;
#endif
dctx->super.base = (array_edge*)ptr;
@ -366,6 +373,10 @@ XP_ASSERT( flags == 0x0001 );
}
dctx->super.name = copyString( dctx->super.mpool, fileName );
if ( ! checkSanity( &dctx->super, numEdges ) ) {
goto closeAndExit;
}
}
goto ok;

View file

@ -903,6 +903,7 @@ tmp_noop_sigintterm( int XP_UNUSED(sig) )
}
#ifdef XWFEATURE_WALKDICT
//# define PRINT_ALL
static void
testGetNthWord( const DictionaryCtxt* dict, char** words,
XP_U16 depth, IndexData* data )
@ -948,7 +949,6 @@ walk_dict_test( const LaunchParams* params, const DictionaryCtxt* dict,
XP_ASSERT( count == dict_countWords( dict ) );
char** words = g_malloc( count * sizeof(char*) );
XP_ASSERT( !!words );
// # define PRINT_ALL
/* if ( dict_firstWord( dict, &word ) */
/* && dict_getNextWord( dict, &word ) */
@ -1085,9 +1085,11 @@ walk_dict_test_all( const LaunchParams* params, GSList* testDicts,
DictionaryCtxt* dict =
linux_dictionary_make( MPPARM(params->util->mpool) name,
params->useMmap );
XP_LOGF( "walk_dict_test(%s)", name );
walk_dict_test( params, dict, testPrefixes );
dict_destroy( dict );
if ( NULL != dict ) {
XP_LOGF( "walk_dict_test(%s)", name );
walk_dict_test( params, dict, testPrefixes );
dict_destroy( dict );
}
}
}
#endif