debug: works now! Also ifdef out debug/verbose code.

This commit is contained in:
ehouse 2006-04-13 03:49:41 +00:00
parent 72532d72a8
commit 08557184a5

View file

@ -53,6 +53,8 @@ typedef unsigned int Node;
typedef std::vector<Node> NodeList; typedef std::vector<Node> NodeList;
typedef std::vector<char*> WordList; typedef std::vector<char*> WordList;
#define MAX_WORD_LEN 15
int gFirstDiff; int gFirstDiff;
char* gCurrentWord = ""; char* gCurrentWord = "";
int gCurrentWordLen; int gCurrentWordLen;
@ -76,7 +78,9 @@ int gWordCount = 0;
std::map<char,int> gTableHash; std::map<char,int> gTableHash;
int gBlankIndex; int gBlankIndex;
std::vector<char> gRevMap; std::vector<char> gRevMap;
#ifdef DEBUG
bool gDebug = false; bool gDebug = false;
#endif
std::map<NodeList, int> gSubsHash; std::map<NodeList, int> gSubsHash;
bool gForceFour = false; // use four bytes regardless of need? bool gForceFour = false; // use four bytes regardless of need?
int gNBytesPerNode; int gNBytesPerNode;
@ -95,7 +99,7 @@ static WordList* parseAndSort( FILE* file );
static void printWords( WordList* strings ); static void printWords( WordList* strings );
static void readNextWord( void ); static void readNextWord( void );
static bool firstBeforeSecond( const char* lhs, const char* rhs ); static bool firstBeforeSecond( const char* lhs, const char* rhs );
static char* tileToAscii( char* out, const char* in ); static char* tileToAscii( char* out, int outSize, const char* in );
static int buildNode( int depth ); static int buildNode( int depth );
static void TrieNodeSetIsLastSibling( Node* nodeR, bool isLastSibling ); static void TrieNodeSetIsLastSibling( Node* nodeR, bool isLastSibling );
static int addNodes( NodeList& newedgesR ); static int addNodes( NodeList& newedgesR );
@ -142,11 +146,11 @@ main( int argc, char** argv )
if ( gInFileName ) { if ( gInFileName ) {
fclose( infile ); fclose( infile );
} }
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
printWords( gInputStrings ); printWords( gInputStrings );
} }
#endif
// Do I need this stupid thing? Better to move the first row to // Do I need this stupid thing? Better to move the first row to
// the front of the array and patch everything else. Or fix the // the front of the array and patch everything else. Or fix the
// non-palm dictionary format to include the offset of the first // non-palm dictionary format to include the offset of the first
@ -165,11 +169,12 @@ main( int argc, char** argv )
writeOutStartNode( gStartNodeOut, firstRootChildOffset ); writeOutStartNode( gStartNodeOut, firstRootChildOffset );
} }
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "\n... dumping table ...\n" ); fprintf( stderr, "\n... dumping table ...\n" );
printNodes( gNodes ); printNodes( gNodes );
} }
#endif
// write out the number of nodes if requested // write out the number of nodes if requested
if ( gCountFile ) { if ( gCountFile ) {
FILE* OFILE; FILE* OFILE;
@ -185,7 +190,7 @@ main( int argc, char** argv )
} }
if ( gDumpText && gNodes.size() > 0 ) { if ( gDumpText && gNodes.size() > 0 ) {
char buf[31]; char buf[(MAX_WORD_LEN*2)+1];
printOneLevel( firstRootChildOffset, buf, 0 ); printOneLevel( firstRootChildOffset, buf, 0 );
} }
@ -220,7 +225,7 @@ moveTopToFront( int* firstRef )
lastSub.assign( gNodes.begin() + firstChild, gNodes.end() ); lastSub.assign( gNodes.begin() + firstChild, gNodes.end() );
gNodes.erase( gNodes.begin() + firstChild, gNodes.end() ); gNodes.erase( gNodes.begin() + firstChild, gNodes.end() );
} else if ( gWordCount != 0 ) { } else if ( gWordCount != 0 ) {
error_exit( "there should be no words!!" ); ERROR_EXIT( "there should be no words!!" );
} }
// remove the first (garbage) node // remove the first (garbage) node
@ -231,7 +236,7 @@ moveTopToFront( int* firstRef )
// -1 because all move down by 1; see prev line // -1 because all move down by 1; see prev line
diff = lastSub.size() - 1; diff = lastSub.size() - 1;
if ( diff < 0 ) { if ( diff < 0 ) {
error_exit( "something wrong with lastSub.size()" ); ERROR_EXIT( "something wrong with lastSub.size()" );
} }
} else { } else {
diff = 0; diff = 0;
@ -291,23 +296,26 @@ addNodes( NodeList& newedgesR )
int found = findSubArray( newedgesR ); int found = findSubArray( newedgesR );
if ( found == 0 ) { if ( found == 0 ) {
error_exit( "0 is an invalid match!!!" ); ERROR_EXIT( "0 is an invalid match!!!" );
} }
if ( found < 0 ) { if ( found < 0 ) {
found = gNodes.size(); found = gNodes.size();
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "adding...\n" ); fprintf( stderr, "adding...\n" );
printNodes( newedgesR ); printNodes( newedgesR );
} }
#endif
gNodes.insert( gNodes.end(), newedgesR.begin(), newedgesR.end() ); gNodes.insert( gNodes.end(), newedgesR.begin(), newedgesR.end() );
registerSubArray( newedgesR, found ); registerSubArray( newedgesR, found );
} }
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "%s => %d\n", __FUNCTION__, found ); fprintf( stderr, "%s => %d\n", __FUNCTION__, found );
} }
#endif
return found; return found;
} // addNodes } // addNodes
@ -315,6 +323,7 @@ static void
printNode( int index, Node node ) printNode( int index, Node node )
{ {
int letter = TrieNodeGetLetter(node); int letter = TrieNodeGetLetter(node);
assert( letter < gRevMap.size() );
fprintf( stderr, fprintf( stderr,
"[%d] letter=%d(%c); isTerminal=%s; isLastSib=%s; fco=%d;\n", "[%d] letter=%d(%c); isTerminal=%s; isLastSib=%s; fco=%d;\n",
index, letter, gRevMap[letter], index, letter, gRevMap[letter],
@ -356,7 +365,7 @@ registerSubArray( NodeList& edgesR, int nodeLoc )
#ifdef DEBUG #ifdef DEBUG
std::map<NodeList, int>::iterator iter = gSubsHash.find( edgesR ); std::map<NodeList, int>::iterator iter = gSubsHash.find( edgesR );
if ( iter != gSubsHash.end() ) { if ( iter != gSubsHash.end() ) {
error_exit( "entry for key shouldn't exist!!" ); ERROR_EXIT( "entry for key shouldn't exist!!" );
} }
#endif #endif
gSubsHash[edgesR] = nodeLoc; gSubsHash[edgesR] = nodeLoc;
@ -389,12 +398,16 @@ readNextWord( void )
gDone = gNextWordIndex == gInputStrings->size(); gDone = gNextWordIndex == gInputStrings->size();
if ( !gDone ) { if ( !gDone ) {
word = gInputStrings->at(gNextWordIndex++); word = gInputStrings->at(gNextWordIndex++);
#ifdef DEBUG
} else if ( gDebug ) { } else if ( gDebug ) {
fprintf( stderr, "gDone set to true\n" ); fprintf( stderr, "gDone set to true\n" );
#endif
} }
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "got word: %s\n", word ); fprintf( stderr, "got word: %s\n", word );
} }
#endif
} }
int numCommonLetters = 0; int numCommonLetters = 0;
int len = strlen( word ); int len = strlen( word );
@ -410,20 +423,23 @@ readNextWord( void )
gFirstDiff = numCommonLetters; gFirstDiff = numCommonLetters;
if ( (gCurrentWordLen > 0) && (strlen(word) > 0) if ( (gCurrentWordLen > 0) && (strlen(word) > 0)
&& !firstBeforeSecond( gCurrentWord, word ) ) { && !firstBeforeSecond( gCurrentWord, word ) ) {
char buf1[16]; char buf1[MAX_WORD_LEN+1];
char buf2[16]; char buf2[MAX_WORD_LEN+1];
tileToAscii( buf1, gCurrentWord ); tileToAscii( buf1, sizeof(buf1), gCurrentWord );
tileToAscii( buf1, word ); tileToAscii( buf2, sizeof(buf2), word );
error_exit( "words %s and %s are out of order\n", ERROR_EXIT( "words %s and %s are out of order\n",
buf1, buf2 ); buf1, buf2 );
} }
gCurrentWord = word; gCurrentWord = word;
gCurrentWordLen = strlen(word); gCurrentWordLen = strlen(word);
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
char buf[16]; char buf[MAX_WORD_LEN+1];
fprintf( stderr, "gCurrentWord now %s\n", tileToAscii(buf, gCurrentWord) ); fprintf( stderr, "gCurrentWord now %s\n",
tileToAscii( buf, sizeof(buf), gCurrentWord) );
} }
#endif
} // readNextWord } // readNextWord
static bool static bool
@ -432,8 +448,8 @@ firstBeforeSecond( const char* lhs, const char* rhs )
char sl[16]; char sl[16];
char sr[16]; char sr[16];
tileToAscii( sl, lhs ); // tileToAscii( sl, lhs );
tileToAscii( sr, rhs ); // tileToAscii( sr, rhs );
bool gt = 0 > strcmp( lhs, rhs ); bool gt = 0 > strcmp( lhs, rhs );
// fprintf( stderr, "comparing %s, %s; returning %s\n", // fprintf( stderr, "comparing %s, %s; returning %s\n",
@ -470,7 +486,7 @@ sub cmpWords {
#endif #endif
static char* static char*
tileToAscii( char* out, const char* in ) tileToAscii( char* out, int outSize, const char* in )
{ {
char* orig = out; char* orig = out;
for ( ; ; ) { for ( ; ; ) {
@ -479,7 +495,9 @@ tileToAscii( char* out, const char* in )
*out = '\0'; *out = '\0';
break; break;
} }
assert( ch < gRevMap.size() );
*out++ = gRevMap[ch]; *out++ = gRevMap[ch];
assert( (out - orig) < outSize );
} }
return orig; return orig;
} }
@ -515,27 +533,36 @@ parseAndSort( FILE* infile )
int len = word.length() + 1; int len = word.length() + 1;
char* str = (char*)malloc( len ); char* str = (char*)malloc( len );
assert( str ); assert( str );
memcpy( str, word.c_str(), word.length()); memcpy( str, word.c_str(), len);
str[len] = '\0';
wordlist->push_back( str ); wordlist->push_back( str );
++gWordCount; ++gWordCount;
#ifdef DEBUG
if ( gDebug ) {
char buf[MAX_WORD_LEN+1];
fprintf( stderr, "loaded %s\n", asciiWord.c_str() );
fprintf( stderr, "from tiles: %s\n",
tileToAscii( buf, sizeof(buf), str ) );
}
#endif
} }
asciiWord = ""; asciiWord = "";
break; break;
} else if ( gTableHash.find(byt) != gTableHash.end() ) { } else if ( gTableHash.find(byt) != gTableHash.end() ) {
if ( !dropWord ) { if ( !dropWord ) {
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "adding %d for %c\n", fprintf( stderr, "adding %d for %c\n",
gTableHash[byt], (char)byt ); gTableHash[byt], (char)byt );
} }
#endif
word += (char)gTableHash[byt]; word += (char)gTableHash[byt];
assert( word.size() <= 15 ); assert( word.size() <= MAX_WORD_LEN );
if ( gKillIfMissing ) { if ( gKillIfMissing ) {
asciiWord += byt; asciiWord += byt;
} }
} }
} else if ( gKillIfMissing ) { } else if ( gKillIfMissing ) {
error_exit( "chr %c (%d) not in map file %s\n" ERROR_EXIT( "chr %c (%d) not in map file %s\n"
"last word was %s\n", "last word was %s\n",
byt, (int)byt, gTableFile, asciiWord.c_str() ); byt, (int)byt, gTableFile, asciiWord.c_str() );
} else { } else {
@ -546,17 +573,17 @@ parseAndSort( FILE* infile )
} }
done: done:
if ( gNeedsSort && (gWordCount > 1) ) { if ( gNeedsSort && (gWordCount > 1) ) {
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "starting sort...\n" ); fprintf( stderr, "starting sort...\n" );
} }
#endif
std::sort( wordlist->begin(), wordlist->end(), firstBeforeSecond ); std::sort( wordlist->begin(), wordlist->end(), firstBeforeSecond );
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "sort finished\n" ); fprintf( stderr, "sort finished\n" );
} }
} #endif
if ( gDebug ) {
fprintf( stderr, "length of list is %d.\n", wordlist->size() );
} }
return wordlist; return wordlist;
} // parseAndSort } // parseAndSort
@ -566,8 +593,8 @@ printWords( std::vector<char*>* strings )
{ {
std::vector<char*>::iterator iter = strings->begin(); std::vector<char*>::iterator iter = strings->begin();
while ( iter != strings->end() ) { while ( iter != strings->end() ) {
char buf[16]; char buf[MAX_WORD_LEN+1];
tileToAscii( buf, *iter ); tileToAscii( buf, sizeof(buf), *iter );
fprintf( stderr, "%s\n", buf ); fprintf( stderr, "%s\n", buf );
++iter; ++iter;
} }
@ -640,7 +667,7 @@ static void
TrieNodeSetLetter( Node* nodeR, int letter ) TrieNodeSetLetter( Node* nodeR, int letter )
{ {
if( letter >= 64 ) { if( letter >= 64 ) {
error_exit( "letter %d too big", letter ); ERROR_EXIT( "letter %d too big", letter );
} }
int mask = ~(0x3F << 24); int mask = ~(0x3F << 24);
@ -660,7 +687,7 @@ static void
TrieNodeSetFirstChildOffset( Node* nodeR, int fco ) TrieNodeSetFirstChildOffset( Node* nodeR, int fco )
{ {
if ( (fco & 0xFF000000) != 0 ) { if ( (fco & 0xFF000000) != 0 ) {
error_exit( "%x larger than 24 bits", fco ); ERROR_EXIT( "%x larger than 24 bits", fco );
} }
int mask = ~0x00FFFFFF; int mask = ~0x00FFFFFF;
@ -769,7 +796,7 @@ emitNodes( unsigned int nBytesPerOutfile, const char* outFileBase )
fprintf( stderr, "blank's at 32; 3-byte-nodes still ok\n" ); fprintf( stderr, "blank's at 32; 3-byte-nodes still ok\n" );
gNBytesPerNode = 3; gNBytesPerNode = 3;
} else { } else {
error_exit( "move blank to last position in info.txt " ERROR_EXIT( "move blank to last position in info.txt "
"for smaller DAWG." ); "for smaller DAWG." );
} }
} }
@ -784,7 +811,7 @@ emitNodes( unsigned int nBytesPerOutfile, const char* outFileBase )
} }
if ( nextFileNum > 99 ) { if ( nextFileNum > 99 ) {
error_exit( "Too many outfiles; infinite loop?" ); ERROR_EXIT( "Too many outfiles; infinite loop?" );
} }
char outName[256]; char outName[256];
@ -801,7 +828,7 @@ emitNodes( unsigned int nBytesPerOutfile, const char* outFileBase )
// do nothing but a sanity check // do nothing but a sanity check
if ( i >= gNodes.size() ) { if ( i >= gNodes.size() ) {
error_exit( "bad trie format: last node not last sibling" ); ERROR_EXIT( "bad trie format: last node not last sibling" );
} }
} }
@ -833,15 +860,18 @@ printOneLevel( int index, char* str, int curlen )
// char* newStr = str; // char* newStr = str;
Node node = gNodes[index++]; Node node = gNodes[index++];
assert( TrieNodeGetLetter(node) < gRevMap.size() );
char lindx = gRevMap[TrieNodeGetLetter(node)]; char lindx = gRevMap[TrieNodeGetLetter(node)];
if ( (int)lindx >= 0x20 ) { if ( (int)lindx >= 0x20 ) {
// newStr .= "$lindx"; // newStr .= "$lindx";
str[curlen++] = lindx; str[curlen++] = lindx;
} else { } else {
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "sub space\n" ); fprintf( stderr, "sub space\n" );
} }
#endif
// $newStr .= "\\" . chr('0'+$lindx); // $newStr .= "\\" . chr('0'+$lindx);
str[curlen++] = '\\'; str[curlen++] = '\\';
str[curlen++] = '0' + lindx; str[curlen++] = '0' + lindx;
@ -874,7 +904,7 @@ outputNode( Node node, int nBytes, FILE* outfile )
if ( nBytes == 4 ) { if ( nBytes == 4 ) {
fourthByte = fco >> 16; fourthByte = fco >> 16;
if ( fourthByte > 0xFF ) { if ( fourthByte > 0xFF ) {
error_exit( "fco too big" ); ERROR_EXIT( "fco too big" );
} }
fco &= 0xFFFF; fco &= 0xFFFF;
} }
@ -907,13 +937,13 @@ outputNode( Node node, int nBytes, FILE* outfile )
} }
fco >>= 16; // it should now be 1 or 0 fco >>= 16; // it should now be 1 or 0
if ( fco > 1 ) { if ( fco > 1 ) {
error_exit( "fco not 1 or 0" ); ERROR_EXIT( "fco not 1 or 0" );
} }
unsigned char chIn5 = TrieNodeGetLetter(node); unsigned char chIn5 = TrieNodeGetLetter(node);
unsigned char bits = chIn5; unsigned char bits = chIn5;
if ( bits > 0x1F && nBytes == 3 ) { if ( bits > 0x1F && nBytes == 3 ) {
error_exit( "char %d too big", bits ); ERROR_EXIT( "char %d too big", bits );
} }
if ( TrieNodeGetIsLastSibling(node) ) { if ( TrieNodeGetIsLastSibling(node) ) {
@ -951,7 +981,9 @@ usage( const char* name )
"\t[-nosort] (input already sorted in accord with -m; " "\t[-nosort] (input already sorted in accord with -m; "
" default=sort'\n" " default=sort'\n"
"\t[-dump] (write dictionary as text to STDERR for testing)\n" "\t[-dump] (write dictionary as text to STDERR for testing)\n"
#ifdef DEBUG
"\t[-debug] (turn on verbose output)\n" "\t[-debug] (turn on verbose output)\n"
#endif
"\t[-force4](use 4 bytes per node regardless of need)\n" "\t[-force4](use 4 bytes per node regardless of need)\n"
"\t[-r] (reject words with letters not in mapfile)\n" "\t[-r] (reject words with letters not in mapfile)\n"
"\t[-k] (kill if any letters not in mapfile -- default)\n", "\t[-k] (kill if any letters not in mapfile -- default)\n",
@ -962,7 +994,7 @@ usage( const char* name )
static void static void
error_exit( int line, const char* fmt, ... ) error_exit( int line, const char* fmt, ... )
{ {
fprintf( stderr, "Line %d: ", line ); fprintf( stderr, "Error on line %d: ", line );
va_list ap; va_list ap;
va_start( ap, fmt ); va_start( ap, fmt );
vfprintf( stderr, fmt, ap ); vfprintf( stderr, fmt, ap );
@ -1008,13 +1040,16 @@ parseARGV( int argc, char** argv )
gBytesPerNodeFile = argv[index++]; gBytesPerNodeFile = argv[index++];
} else if ( 0 == strcmp( arg, "-force4" ) ) { } else if ( 0 == strcmp( arg, "-force4" ) ) {
gForceFour = true; gForceFour = true;
#ifdef DEBUG
} else if ( 0 == strcmp( arg, "-debug" ) ) { } else if ( 0 == strcmp( arg, "-debug" ) ) {
gDebug = true; gDebug = true;
#endif
} else { } else {
error_exit( "unexpected arg %s", arg ); ERROR_EXIT( "unexpected arg %s", arg );
} }
} }
#ifdef DEBUG
if ( gDebug ) { if ( gDebug ) {
fprintf( stderr, "gNBytesPerOutfile=$gNBytesPerOutfile\n" ); fprintf( stderr, "gNBytesPerOutfile=$gNBytesPerOutfile\n" );
fprintf( stderr, "gTableFile=$gTableFile\n" ); fprintf( stderr, "gTableFile=$gTableFile\n" );
@ -1022,6 +1057,6 @@ parseARGV( int argc, char** argv )
fprintf( stderr, "gStartNodeOut=$gStartNodeOut\n" ); fprintf( stderr, "gStartNodeOut=$gStartNodeOut\n" );
fprintf( stderr, "gTermChar=%c(%d)\n", gTermChar, (int)gTermChar ); fprintf( stderr, "gTermChar=%c(%d)\n", gTermChar, (int)gTermChar );
} }
#endif
return gTableFile; return gTableFile;
} // parseARGV } // parseARGV