add param to dict_countWords in which it accumulates counts per word

length.  Shouldn't hurt speed too much if null passed.
This commit is contained in:
Andy2 2011-11-18 08:15:21 -08:00
parent 598be8b404
commit cc0105e14c
4 changed files with 28 additions and 7 deletions

View file

@ -244,16 +244,24 @@ firstWord( DictIter* iter )
} }
XP_U32 XP_U32
dict_countWords( const DictIter* iter ) dict_countWords( const DictIter* iter, LengthsArray* lens )
{ {
DictIter counter; DictIter counter;
dict_initIterFrom( &counter, iter ); dict_initIterFrom( &counter, iter );
if ( NULL != lens ) {
XP_MEMSET( lens, 0, sizeof(*lens) );
}
XP_U32 count; XP_U32 count;
XP_Bool ok; XP_Bool ok;
for ( count = 0, ok = firstWord( &counter ); for ( count = 0, ok = firstWord( &counter );
ok; ok = nextWord( &counter) ) { ok; ok = nextWord( &counter) ) {
++count; ++count;
if ( NULL != lens ) {
++lens->lens[counter.nEdges];
}
} }
return count; return count;
} }
@ -438,7 +446,7 @@ dict_makeIndex( const DictIter* iter, XP_U16 depth, IndexData* data )
static void static void
initWord( DictIter* iter ) initWord( DictIter* iter )
{ {
iter->nWords = dict_countWords( iter ); iter->nWords = dict_countWords( iter, NULL );
} }
XP_Bool XP_Bool
@ -505,7 +513,7 @@ dict_getNthWord( DictIter* iter, DictPosition position, XP_U16 depth,
XP_Bool validWord = 0 < iter->nEdges; XP_Bool validWord = 0 < iter->nEdges;
if ( validWord ) { /* uninitialized */ if ( validWord ) { /* uninitialized */
wordCount = iter->nWords; wordCount = iter->nWords;
XP_ASSERT( wordCount == dict_countWords( iter ) ); XP_ASSERT( wordCount == dict_countWords( iter, NULL ) );
} else { } else {
wordCount = dict_getWordCount( dict ); wordCount = dict_getWordCount( dict );
} }

View file

@ -59,9 +59,13 @@ typedef struct _IndexData {
XP_U16 count; /* in-out: must indicate others are large enough */ XP_U16 count; /* in-out: must indicate others are large enough */
} IndexData; } IndexData;
typedef struct _LengthsArray {
XP_U32 lens[MAX_COLS_DICT];
} LengthsArray;
void dict_initIter( DictIter* iter, const DictionaryCtxt* dict, void dict_initIter( DictIter* iter, const DictionaryCtxt* dict,
XP_U16 min, XP_U16 max ); XP_U16 min, XP_U16 max );
XP_U32 dict_countWords( const DictIter* iter ); XP_U32 dict_countWords( const DictIter* iter, LengthsArray* lens );
void dict_makeIndex( const DictIter* iter, XP_U16 depth, IndexData* data ); void dict_makeIndex( const DictIter* iter, XP_U16 depth, IndexData* data );
XP_Bool dict_firstWord( DictIter* iter ); XP_Bool dict_firstWord( DictIter* iter );
XP_Bool dict_lastWord( DictIter* iter ); XP_Bool dict_lastWord( DictIter* iter );

View file

@ -483,7 +483,7 @@ dict_getWordCount( const DictionaryCtxt* dict )
if ( 0 == nWords ) { if ( 0 == nWords ) {
DictIter iter; DictIter iter;
dict_initIter( &iter, dict, 0, MAX_COLS_DICT ); dict_initIter( &iter, dict, 0, MAX_COLS_DICT );
nWords = dict_countWords( &iter ); nWords = dict_countWords( &iter, NULL );
} }
#endif #endif
return nWords; return nWords;

View file

@ -915,7 +915,7 @@ testGetNthWord( const DictionaryCtxt* dict, char** words,
DictIter iter; DictIter iter;
dict_initIter( &iter, dict, min, max ); dict_initIter( &iter, dict, min, max );
XP_U32 half = dict_countWords( &iter ) / 2; XP_U32 half = dict_countWords( &iter, NULL ) / 2;
XP_U32 interval = half / 100; XP_U32 interval = half / 100;
if ( interval == 0 ) { if ( interval == 0 ) {
++interval; ++interval;
@ -960,7 +960,16 @@ walk_dict_test( const LaunchParams* params, const DictionaryCtxt* dict,
} }
dict_initIter( &iter, dict, min, max ); dict_initIter( &iter, dict, min, max );
XP_U32 count = dict_countWords( &iter ); LengthsArray lens;
XP_U32 count = dict_countWords( &iter, &lens );
XP_U32 sum = 0;
for ( jj = 0; jj < VSIZE(lens.lens); ++jj ) {
sum += lens.lens[jj];
XP_LOGF( "%ld words of length %ld", lens.lens[jj], jj );
}
XP_ASSERT( sum == count );
if ( count > 0 ) { if ( count > 0 ) {
char** words = g_malloc( count * sizeof(char*) ); char** words = g_malloc( count * sizeof(char*) );
XP_ASSERT( !!words ); XP_ASSERT( !!words );