Set locale based on params passed in, only on ENV if not specified.

2025-01-18 22:26:30 +01:00 · 2009-01-25 20:13:36 +00:00 · 2009-01-25 20:13:36 +00:00 · b7fa674c28
commit b7fa674c28
parent 90f8a276e1
1 changed files with 60 additions and 35 deletions
--- a/xwords4/dawg/dict2dawg.cpp
+++ b/xwords4/dawg/dict2dawg.cpp
@ -47,6 +47,7 @@
 #include <string.h>
 #include <netinet/in.h>
 #include <assert.h>
+#include <errno.h>
 #include <algorithm>

 #include <string>
@ -71,22 +72,23 @@ static Letter gCurrentWordBuf[MAX_WORD_LEN+1] = { '\0' };
 static Letter* gCurrentWord = gCurrentWordBuf;
 static int gCurrentWordLen;

-Letter* gCurWord = NULL;                   // save so can check for sortedness
-bool gDone = false;
+static bool gDone = false;
 static unsigned int gNextWordIndex;
 static void (*gReadWordProc)(void) = NULL;
-NodeList gNodes;       // final array of nodes
-unsigned int gNBytesPerOutfile = 0xFFFFFFFF;
-char* gTableFile = NULL;
+static NodeList gNodes;       // final array of nodes
+static unsigned int gNBytesPerOutfile = 0xFFFFFFFF;
+static char* gTableFile = NULL;
 static bool gIsMultibyte = false;
-char* gOutFileBase = NULL;
-char* gStartNodeOut = NULL;
+static const char* gEncoding = NULL;
+static char* gOutFileBase = NULL;
+static char* gStartNodeOut = NULL;
 static FILE* gInFile = NULL;
-bool gKillIfMissing = true;
-char gTermChar = '\n';
-bool gDumpText = false;                // dump the dict as text after?
-char* gCountFile = NULL;
-char* gBytesPerNodeFile = NULL;        // where to write whether node
+static bool gKillIfMissing = true;
+static char gTermChar = '\n';
+static bool gDumpText = false;                // dump the dict as text after?
+static char* gCountFile = NULL;
+static const char* gLang = NULL;
+static char* gBytesPerNodeFile = NULL;        // where to write whether node
                                       // size 3 or 4
 int gWordCount = 0;
 std::map<Letter,wchar_t> gTableHash;
@ -144,8 +146,6 @@ static void readFromSortedArray( void );
 int 
 main( int argc, char** argv ) 
 { 
-    setlocale(LC_CTYPE, "");
-    
    gReadWordProc = readFromSortedArray;

    const char* inFileName;
@ -154,6 +154,20 @@ main( int argc, char** argv )
        exit(1);
    }

+    char buf[32];
+    const char* locale = "";
+    if ( !!gLang && !!gEncoding ) {
+        snprintf( buf, sizeof(buf), "%s.%s", gLang, gEncoding );
+        locale = buf;
+    }
+    char* oldloc = setlocale( LC_ALL, locale );
+    if ( !oldloc ) {
+        ERROR_EXIT( "setlocale(%s) failed, error: %s", locale, 
+                    strerror(errno) );
+    } else {
+        fprintf( stderr, "old locale: %s\n", oldloc );
+    }
+    
    makeTableHash();

    // Do I need this stupid thing?  Better to move the first row to
@ -545,8 +559,9 @@ readOneWord( Letter* wordBuf, int bufLen, int* lenp, bool* gotEOF )
            if ( gDebug ) {
                char buf[T2ABUFLEN(count)];
                wordBuf[count] = '\0';
-                fprintf( stderr, "%s: dropping word (len>=%d): %s\n", __func__,
-                         count, tileToAscii( buf, sizeof(buf), wordBuf ) );
+                fprintf( stderr, "%s: dropping word (len %d>=%d): %s\n", 
+                         __func__, count, gLimHigh, 
+                         tileToAscii( buf, sizeof(buf), wordBuf ) );
            }
 #endif
            count = 0;  // we'll start over
@ -1111,27 +1126,30 @@ static void
 usage( const char* name )
 {
    fprintf( stderr, "usage: %s \n"
-             "\t[-v]                 (print version and exit)\n"
-             "\t[-poolsize]          (print hardcoded size of pool and exit)\n"
-             "\t[-b    bytesPerFile] (default = 0xFFFFFFFF)\n"
-             "\t[-min   <num in 0..15>]\n"
-             "\t[-max   <num in 0..15>]\n"
-             "\t-m     mapFile\n"
-             "\t-mn    mapFile (unicode)\n"
-             "\t-ob    outFileBase\n"
-             "\t-sn    start node out file\n"
-             "\t[-if   input file name]  -- default = stdin\n"
-             "\t[-term ch] (word terminator -- default = '\\0'\n"
-             "\t[-nosort] (input already sorted in accord with -m; " 
-             " default=sort'\n"
-             "\t[-dump]  (write dictionary as text to STDERR for testing)\n"
+             "\t[-v]                # print version and exit\n"
+             "\t[-poolsize]         # print hardcoded size of pool and exit\n"
+             "\t[-b    bytesPerFile]# for Palm only (default = 0xFFFFFFFF)\n"
+             "\t[-min   <0<=num<=15># min length word to keep\n"
+             "\t[-max   <0<=num<=15># max length word to keep\n"
+             "\t-m      mapFile\n"
+             "\t-mn     mapFile     # 16 bits per entry\n"
+             "\t-ob     outFileBase\n"
+             "\t-sn                 # start node out file\n"
+             "\t[-if    input_file] # default = stdin\n"
+             "\t[-term  ch]         # word terminator; default = '\\0'\n"
+             "\t[-nosort]           # input already sorted in accord with -m\n"
+             "\t                    #     default=sort'\n"
+             "\t[-dump]             # write dictionary as text to STDERR \n"
+             "\t                    #     for testing\n"
 #ifdef DEBUG
-             "\t[-debug] (turn on verbose output)\n"
+             "\t[-debug]            # turn on verbose output\n"
 #endif
-             "\t[-force4](use 4 bytes per node regardless of need)\n"
-             "\t[-r]     (reject words with letters not in mapfile)\n"
-             "\t[-k]     (kill if any letters not in mapfile -- default)\n",
-             name
+             "\t[-force4]           # always use 4 bytes per node\n"
+             "\t[-lang  lang]       # e.g. en_US\n"
+             "\t[-fsize nBytes]     # max buffer [default %d]\n"
+             "\t[-r]                # drop words with letters not in mapfile\n"
+             "\t[-k]                # (default) exit on any letter not in mapfile \n",
+             name, MAX_POOL_SIZE
             );
 } // usage

@ -1201,6 +1219,8 @@ parseARGV( int argc, char** argv, const char** inFileName )
            gForceFour = true;
        } else if ( 0 == strcmp( arg, "-fsize" ) ) {
            gFileSize = atoi(argv[index++]);
+        } else if ( 0 == strcmp( arg, "-lang" ) ) {
+            gLang = argv[index++];
 #ifdef DEBUG
        } else if ( 0 == strcmp( arg, "-debug" ) ) {
            gDebug = true;
@ -1218,9 +1238,14 @@ parseARGV( int argc, char** argv, const char** inFileName )
    if ( !!enc ) {
        if ( !strcasecmp( enc, "UTF-8" ) ) {
            gIsMultibyte = true;
+        } else if ( !strcasecmp( enc, "iso-8859-1" ) ) {
+            gIsMultibyte = false;
+        } else if ( !strcasecmp( enc, "iso-latin-1" ) ) {
+            gIsMultibyte = false;
        } else {
            ERROR_EXIT( "%s: unknown encoding %s", __func__, enc );
        }
+        gEncoding = enc;
    }

 #ifdef DEBUG