diff --git a/xwords4/android/app/src/main/java/org/eehouse/android/xw4/DictLangCache.java b/xwords4/android/app/src/main/java/org/eehouse/android/xw4/DictLangCache.java index 94710aca3..1f0f8497c 100644 --- a/xwords4/android/app/src/main/java/org/eehouse/android/xw4/DictLangCache.java +++ b/xwords4/android/app/src/main/java/org/eehouse/android/xw4/DictLangCache.java @@ -34,6 +34,7 @@ import org.eehouse.android.xw4.jni.JNIUtilsImpl; import org.eehouse.android.xw4.jni.XwJNI; import org.eehouse.android.xw4.loc.LocUtils; +import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; @@ -299,6 +300,12 @@ public class DictLangCache { return result; } + public static long getFileLen( Context context, DictAndLoc dal ) + { + File path = dal.getPath( context ); + return path.length(); + } + public static int getDictLangCode( Context context, String dict ) { return getInfo( context, dict ).langCode; diff --git a/xwords4/android/app/src/main/java/org/eehouse/android/xw4/DictUtils.java b/xwords4/android/app/src/main/java/org/eehouse/android/xw4/DictUtils.java index 5f6bc27bd..ed7130bf2 100644 --- a/xwords4/android/app/src/main/java/org/eehouse/android/xw4/DictUtils.java +++ b/xwords4/android/app/src/main/java/org/eehouse/android/xw4/DictUtils.java @@ -117,6 +117,12 @@ public class DictUtils { public String name; public DictLoc loc; + public File getPath( Context context ) + { + File path = getDictFile( context, addDictExtn(name), loc ); + return path; + } + @Override public boolean equals( Object obj ) { diff --git a/xwords4/android/app/src/main/java/org/eehouse/android/xw4/UpdateCheckReceiver.java b/xwords4/android/app/src/main/java/org/eehouse/android/xw4/UpdateCheckReceiver.java index 9473b124e..27951ff81 100644 --- a/xwords4/android/app/src/main/java/org/eehouse/android/xw4/UpdateCheckReceiver.java +++ b/xwords4/android/app/src/main/java/org/eehouse/android/xw4/UpdateCheckReceiver.java @@ -39,6 +39,7 @@ import org.json.JSONObject; public class UpdateCheckReceiver extends BroadcastReceiver { private static final String TAG = UpdateCheckReceiver.class.getSimpleName(); + private static final boolean LOG_QUERIES = false; public static final String NEW_DICT_URL = "NEW_DICT_URL"; public static final String NEW_DICT_LOC = "NEW_DICT_LOC"; @@ -62,6 +63,7 @@ public class UpdateCheckReceiver extends BroadcastReceiver { private static final String k_LANG = "lang"; private static final String k_MD5SUM = "md5sum"; private static final String k_INDEX = "index"; + private static final String k_LEN = "len"; private static final String k_URL = "url"; private static final String k_DEVID = "did"; private static final String k_DEBUG = "dbg"; @@ -174,7 +176,9 @@ public class UpdateCheckReceiver extends BroadcastReceiver { params.put( k_STRINGSHASH, BuildConfig.STRINGS_HASH ); params.put( k_NAME, packageName ); params.put( k_AVERS, versionCode ); - // Log.d( TAG, "current update: %s", params ); + if ( LOG_QUERIES ) { + Log.d( TAG, "checkVersions(): sending: %s", params ); + } new UpdateQueryTask( context, params, fromUI, pm, packageName, dals ).execute(); } catch ( org.json.JSONException jse ) { @@ -215,11 +219,13 @@ public class UpdateCheckReceiver extends BroadcastReceiver { int lang = DictLangCache.getDictLangCode( context, dal ); String langStr = DictLangCache.getLangName( context, lang ); String sum = DictLangCache.getDictMD5Sum( context, dal.name ); + long len = DictLangCache.getFileLen( context, dal ); try { params.put( k_NAME, dal.name ); params.put( k_LANG, langStr ); params.put( k_MD5SUM, sum ); params.put( k_INDEX, index ); + params.put( k_LEN, len ); } catch( org.json.JSONException jse ) { Log.ex( TAG, jse ); } @@ -269,6 +275,9 @@ public class UpdateCheckReceiver extends BroadcastReceiver { protected void onPostExecute( String json ) { if ( null != json ) { + if ( LOG_QUERIES ) { + Log.d( TAG, "onPostExecute(): received: %s", json ); + } makeNotificationsIf( json, m_params ); XWPrefs.setHaveCheckedUpgrades( m_context, true ); } diff --git a/xwords4/android/jni/anddict.c b/xwords4/android/jni/anddict.c index 521aadc6c..7a43e680b 100644 --- a/xwords4/android/jni/anddict.c +++ b/xwords4/android/jni/anddict.c @@ -49,12 +49,6 @@ typedef struct _AndDictionaryCtxt { #endif } AndDictionaryCtxt; -#define CHECK_PTR(p,c,e) \ - if ( ((p)+(c)) > (e) ) { \ - XP_LOGF( "%s (line %d); out of bytes", __func__, __LINE__ ); \ - goto error; \ - } - static void splitFaces_via_java( JNIEnv* env, AndDictionaryCtxt* ctxt, const XP_U8* ptr, int nFaceBytes, int nFaces, XP_Bool isUTF8 ); @@ -80,152 +74,6 @@ n_ptr_tohl( XP_U8 const** inp ) return XP_NTOHL(t); } /* n_ptr_tohl */ -static XP_U16 -n_ptr_tohs( XP_U8 const ** inp ) -{ - XP_U16 t; - XP_MEMCPY( &t, *inp, sizeof(t) ); - - *inp += sizeof(t); - - return XP_NTOHS(t); -} /* n_ptr_tohs */ - -static XP_U16 -andCountSpecials( AndDictionaryCtxt* ctxt ) -{ - XP_U16 result = 0; - - for ( int ii = 0; ii < ctxt->super.nFaces; ++ii ) { - if ( IS_SPECIAL( ctxt->super.facePtrs[ii][0] ) ) { - ++result; - } - } - - return result; -} /* andCountSpecials */ - -static XP_Bool -andMakeBitmap( AndDictionaryCtxt* ctxt, XP_U8 const** ptrp, - const XP_U8* end, XP_Bitmap* result ) -{ - XP_Bool success = XP_TRUE; - XP_U8 const* ptr = *ptrp; - jobject bitmap = NULL; - CHECK_PTR( ptr, 1, end ); - XP_U8 nCols = *ptr++; - if ( nCols > 0 ) { - CHECK_PTR( ptr, 1, end ); - XP_U8 nRows = *ptr++; - CHECK_PTR( ptr, ((nRows*nCols)+7) / 8, end ); -#ifdef DROP_BITMAPS - ptr += ((nRows*nCols)+7) / 8; -#else - XP_U8 srcByte = 0; - XP_U8 nBits; - - jboolean* colors = (jboolean*)XP_CALLOC( ctxt->super.mpool, - nCols * nRows * sizeof(*colors) ); - jboolean* next = colors; - - nBits = nRows * nCols; - for ( int ii = 0; ii < nBits; ++ii ) { - XP_U8 srcBitIndex = ii % 8; - XP_U8 srcMask; - - if ( srcBitIndex == 0 ) { - srcByte = *ptr++; - } - - srcMask = 1 << (7 - srcBitIndex); - XP_ASSERT( next < (colors + (nRows * nCols)) ); - *next++ = ((srcByte & srcMask) == 0) ? JNI_FALSE : JNI_TRUE; - } - - JNIEnv* env = ctxt->env; - jobject tmp = and_util_makeJBitmap( ctxt->jniutil, nCols, nRows, colors ); - bitmap = (*env)->NewGlobalRef( env, tmp ); - deleteLocalRef( env, tmp ); - XP_FREE( ctxt->super.mpool, colors ); -#endif - } - goto done; - error: - success = XP_FALSE; - done: - *ptrp = ptr; - *result = bitmap; - return success; -} /* andMakeBitmap */ - -static XP_Bool -andLoadSpecialData( AndDictionaryCtxt* ctxt, XP_U8 const** ptrp, - const XP_U8* end ) -{ - XP_Bool success = XP_TRUE; - XP_U16 nSpecials = andCountSpecials( ctxt ); - XP_U8 const* ptr = *ptrp; - XP_UCHAR** texts; - XP_UCHAR** textEnds; - SpecialBitmaps* bitmaps; - - texts = (XP_UCHAR**)XP_MALLOC( ctxt->super.mpool, - nSpecials * sizeof(*texts) ); - textEnds = (XP_UCHAR**)XP_MALLOC( ctxt->super.mpool, - nSpecials * sizeof(*textEnds) ); - - bitmaps = (SpecialBitmaps*) - XP_CALLOC( ctxt->super.mpool, nSpecials * sizeof(*bitmaps) ); - - for ( Tile ii = 0; ii < ctxt->super.nFaces; ++ii ) { - - const XP_UCHAR* facep = ctxt->super.facePtrs[(short)ii]; - if ( IS_SPECIAL(*facep) ) { - /* get the string */ - CHECK_PTR( ptr, 1, end ); - XP_U8 txtlen = *ptr++; - CHECK_PTR( ptr, txtlen, end ); - XP_UCHAR* text = (XP_UCHAR*)XP_MALLOC(ctxt->super.mpool, txtlen+1); - texts[(int)*facep] = text; - textEnds[(int)*facep] = text + txtlen + 1; - XP_MEMCPY( text, ptr, txtlen ); - ptr += txtlen; - text[txtlen] = '\0'; - XP_ASSERT( *facep < nSpecials ); /* firing */ - - /* This little hack is safe because all bytes but the first in a - multi-byte utf-8 char have the high bit set. SYNONYM_DELIM - does not have its high bit set */ - XP_ASSERT( 0 == (SYNONYM_DELIM & 0x80) ); - for ( ; '\0' != *text; ++text ) { - if ( *text == SYNONYM_DELIM ) { - *text = '\0'; - } - } - - if ( !andMakeBitmap( ctxt, &ptr, end, - &bitmaps[(int)*facep].largeBM ) ) { - goto error; - } - if ( !andMakeBitmap( ctxt, &ptr, end, - &bitmaps[(int)*facep].smallBM ) ) { - goto error; - } - } - } - - goto done; - error: - success = XP_FALSE; - done: - ctxt->super.chars = texts; - ctxt->super.charEnds = textEnds; - ctxt->super.bitmaps = bitmaps; - - *ptrp = ptr; - return success; -} /* andLoadSpecialData */ - /** Android doesn't include iconv for C code to use, so we'll have java do it. * Cons up a string with all the tile faces (skipping the specials to make * things easier) and have java return an array of strings. Then load one at @@ -290,16 +138,18 @@ splitFaces_via_java( JNIEnv* env, AndDictionaryCtxt* ctxt, const XP_U8* ptr, ctxt->super.facePtrs = ptrs; } /* splitFaces_via_java */ -static XP_UCHAR* -getNullTermParam( AndDictionaryCtxt* dctx, const XP_U8** ptr, - XP_U16* headerLen ) +void +computeChecksum( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8* ptr, + XP_U32 len, XP_UCHAR* out ) { - XP_U16 len = 1 + XP_STRLEN( (XP_UCHAR*)*ptr ); - XP_UCHAR* result = XP_MALLOC( dctx->super.mpool, len ); - XP_MEMCPY( result, *ptr, len ); - *ptr += len; - *headerLen -= len; - return result; + AndDictionaryCtxt* ctxt = (AndDictionaryCtxt*)dctx; + JNIEnv* env = xwe; + jstring jsum = and_util_getMD5SumForDict( ctxt->jniutil, env, + ctxt->super.name, ptr, len ); + const char* sum = (*env)->GetStringUTFChars( env, jsum, NULL ); + XP_MEMCPY( out, sum, 1 + XP_STRLEN(sum) ); + (*env)->ReleaseStringUTFChars( env, jsum, sum ); + deleteLocalRef( env, jsum ); } static XP_Bool @@ -311,149 +161,15 @@ parseDict( AndDictionaryCtxt* ctxt, XWEnv xwe, XP_U8 const* ptr, ASSERT_ENV( ctxt->ti, xwe ); const XP_U8* end = ptr + dictLength; XP_U32 offset; - XP_U16 nFaces, numFaceBytes = 0; - XP_U16 flags; void* mappedBase = (void*)ptr; - XP_U8 nodeSize; - XP_Bool isUTF8 = XP_FALSE; - CHECK_PTR( ptr, sizeof(flags), end ); - flags = n_ptr_tohs( &ptr ); - if ( 0 != (DICT_HEADER_MASK & flags) ) { - XP_U16 headerLen; - flags &= ~DICT_HEADER_MASK; - CHECK_PTR( ptr, sizeof(headerLen), end ); - headerLen = n_ptr_tohs( &ptr ); - if ( 4 <= headerLen ) { /* have word count? */ - CHECK_PTR( ptr, sizeof(ctxt->super.nWords), end ); - ctxt->super.nWords = n_ptr_tohl( &ptr ); - headerLen -= 4; /* don't skip it */ - } - - if ( 1 <= headerLen ) { /* have description? */ - ctxt->super.desc = getNullTermParam( ctxt, &ptr, &headerLen ); - } - if ( 1 <= headerLen ) { /* have md5sum? */ - ctxt->super.md5Sum = getNullTermParam( ctxt, &ptr, &headerLen ); - } - - CHECK_PTR( ptr, headerLen, end ); - ptr += headerLen; - } - - flags &= ~DICT_SYNONYMS_MASK; - if ( flags == 0x0002 ) { - nodeSize = 3; - } else if ( flags == 0x0003 ) { - nodeSize = 4; - } else if ( flags == 0x0004 ) { - isUTF8 = XP_TRUE; - nodeSize = 3; - } else if ( flags == 0x0005 ) { - isUTF8 = XP_TRUE; - nodeSize = 4; - } else { - goto error; - } - - if ( isUTF8 ) { - CHECK_PTR( ptr, 1, end ); - numFaceBytes = (XP_U16)(*ptr++); - } - CHECK_PTR( ptr, 1, end ); - nFaces = (XP_U16)(*ptr++); - if ( nFaces > 64 ) { - goto error; - } - - JNIEnv* env = xwe; - if ( NULL == ctxt->super.md5Sum -#ifdef DEBUG - || XP_TRUE -#endif - ) { - jstring jsum = and_util_getMD5SumForDict( ctxt->jniutil, env, - ctxt->super.name, NULL, 0 ); - XP_UCHAR* md5Sum = NULL; - /* If we have a cached sum, check that it's correct. */ - if ( NULL != jsum && NULL != ctxt->super.md5Sum ) { - md5Sum = getStringCopy( MPPARM(ctxt->super.mpool) env, jsum ); - if ( 0 != XP_STRCMP( ctxt->super.md5Sum, md5Sum ) ) { - deleteLocalRef( env, jsum ); - jsum = NULL; - XP_FREE( ctxt->super.mpool, md5Sum ); - md5Sum = NULL; - } - } - - if ( NULL == jsum ) { - jsum = and_util_getMD5SumForDict( ctxt->jniutil, env, - ctxt->super.name, ptr, end - ptr ); - } - if ( NULL == md5Sum ) { - md5Sum = getStringCopy( MPPARM(ctxt->super.mpool) env, jsum ); - } - deleteLocalRef( env, jsum ); - - if ( NULL == ctxt->super.md5Sum ) { - ctxt->super.md5Sum = md5Sum; - } else { - XP_FREE( ctxt->super.mpool, md5Sum ); - } - } - - ctxt->super.nodeSize = nodeSize; - - if ( !isUTF8 ) { - numFaceBytes = nFaces * 2; - } - - ctxt->super.nFaces = (XP_U8)nFaces; - ctxt->super.isUTF8 = isUTF8; - - if ( isUTF8 ) { - CHECK_PTR( ptr, numFaceBytes, end ); - splitFaces_via_java( env, ctxt, ptr, numFaceBytes, nFaces, - XP_TRUE ); - ptr += numFaceBytes; - } else { - XP_U8 tmp[nFaces*4]; /* should be enough... */ - XP_U16 nBytes = 0; - /* Need to translate from iso-8859-n to utf8 */ - CHECK_PTR( ptr, 2 * nFaces, end ); - for ( int ii = 0; ii < nFaces; ++ii ) { - XP_UCHAR ch = ptr[1]; - - ptr += 2; - - tmp[nBytes] = ch; - nBytes += 1; - } - XP_ASSERT( nFaces == nBytes ); - splitFaces_via_java( env, ctxt, tmp, nBytes, nFaces, XP_FALSE ); - } - - ctxt->super.is_4_byte = (ctxt->super.nodeSize == 4); - - ctxt->super.countsAndValues = - (XP_U8*)XP_MALLOC(ctxt->super.mpool, nFaces*2); - - CHECK_PTR( ptr, 2, end ); - ctxt->super.langCode = ptr[0] & 0x7F; - ptr += 2; /* skip xloc header */ - CHECK_PTR( ptr, 2 * nFaces, end ); - for ( int ii = 0; ii < nFaces*2; ii += 2 ) { - ctxt->super.countsAndValues[ii] = *ptr++; - ctxt->super.countsAndValues[ii+1] = *ptr++; - } - - if ( !andLoadSpecialData( ctxt, &ptr, end ) ) { + if ( !parseCommon( &ctxt->super, xwe, &ptr, end ) ) { goto error; } dictLength -= ptr - (XP_U8*)mappedBase; if ( dictLength >= sizeof(offset) ) { - CHECK_PTR( ptr, sizeof(offset), end ); + CHECK_PTR( ptr, sizeof(offset), end, error ); offset = n_ptr_tohl( &ptr ); dictLength -= sizeof(offset); XP_ASSERT( dictLength % ctxt->super.nodeSize == 0 ); @@ -470,6 +186,8 @@ parseDict( AndDictionaryCtxt* ctxt, XWEnv xwe, XP_U8 const* ptr, ctxt->super.topEdge = ctxt->super.base + (offset * ctxt->super.nodeSize); } else { + XP_ASSERT( !ctxt->super.topEdge ); + XP_ASSERT( !ctxt->super.base ); ctxt->super.topEdge = (array_edge*)NULL; ctxt->super.base = (array_edge*)NULL; } @@ -489,7 +207,7 @@ and_dictionary_destroy( DictionaryCtxt* dict, XWEnv xwe ) AndDictionaryCtxt* ctxt = (AndDictionaryCtxt*)dict; ASSERT_ENV( ctxt->ti, xwe ); XP_LOGF( "%s(dict=%p); code=%x", __func__, ctxt, ctxt->dbgid ); - XP_U16 nSpecials = andCountSpecials( ctxt ); + XP_U16 nSpecials = countSpecials( &ctxt->super ); JNIEnv* env = xwe; if ( !!ctxt->super.chars ) { diff --git a/xwords4/android/jni/andutils.c b/xwords4/android/jni/andutils.c index 272787f9d..c04e3a0bf 100644 --- a/xwords4/android/jni/andutils.c +++ b/xwords4/android/jni/andutils.c @@ -26,6 +26,7 @@ #include "comtypes.h" #include "xwstream.h" +#include "strutils.h" void and_assert( const char* test, int line, const char* file, const char* func ) @@ -251,10 +252,8 @@ getStringCopy( MPFORMAL JNIEnv* env, jstring jstr ) { XP_UCHAR* result = NULL; if ( NULL != jstr ) { - jsize len = 1 + (*env)->GetStringUTFLength( env, jstr ); const char* chars = (*env)->GetStringUTFChars( env, jstr, NULL ); - result = XP_MALLOC( mpool, len ); - XP_MEMCPY( result, chars, len ); + result = copyString( mpool, chars ); (*env)->ReleaseStringUTFChars( env, jstr, chars ); } return result; diff --git a/xwords4/android/scripts/info.py b/xwords4/android/scripts/info.py index 387e396ed..14320f24c 100755 --- a/xwords4/android/scripts/info.py +++ b/xwords4/android/scripts/info.py @@ -15,9 +15,12 @@ except: from stat import ST_CTIME try: from mod_python import apache - apacheAvailable = True except ImportError: - apacheAvailable = False + apache = None + +def apache_log_error(msg): + if apache: + apache.log_error(msg) # constants that are also used in UpdateCheckReceiver.java VERBOSE = False @@ -50,11 +53,13 @@ k_PAIRS = 'pairs' k_LANG = 'lang' k_MD5SUM = 'md5sum' k_INDEX = 'index' +k_LEN = 'len' k_ISUM = 'isum' k_SUCCESS = 'success' k_URL = 'url' k_SUMS = 'sums' +k_LENS = 'lens' k_COUNT = 'count' k_LANGS = 'langs' k_LANGSVERS = 'lvers' @@ -102,11 +107,11 @@ def getInternalSum( filePath ): '--', k_filebase + 'xw4/dawg2dict.py', '--get-sum', - '--dict', filePath ], + '--dawg', filePath ], stdout = subprocess.PIPE, stderr = subprocess.PIPE) results = proc.communicate() - # apache.log_error(filePath + ': ' + results[1].strip()) + # apache_log_error(filePath + ': ' + results[1].strip()) return results[0].strip() def md5Checksums( sums, filePath ): @@ -125,11 +130,11 @@ def md5Checksums( sums, filePath ): sums[filePath] = [ md5.hexdigest(), getInternalSum( filePath ) ] - apache.log_error( "figured sum for %s: %s" % (filePath, + apache_log_error( "figured sum for %s: %s" % (filePath, sums[filePath] ) ) result = sums[filePath] except: - # apache.log_error( "Unexpected error: " + sys.exc_info()[0] ) + # apache_log_error( "Unexpected error: " + sys.exc_info()[0] ) result = None return result @@ -143,9 +148,10 @@ def openShelf(): except: s_shelf = {} if not k_SUMS in s_shelf: s_shelf[k_SUMS] = {} + if not k_LENS in s_shelf: s_shelf[k_LENS] = {} if not k_COUNT in s_shelf: s_shelf[k_COUNT] = 0 s_shelf[k_COUNT] += 1 - # apache.log_error( "Count now %d" % s_shelf[k_COUNT] ) + # apache_log_error( "Count now %d" % s_shelf[k_COUNT] ) def closeShelf(): global s_shelf @@ -156,9 +162,14 @@ def getDictSums(): openShelf() return s_shelf[k_SUMS] +def getDictLens(): + global s_shelf + openShelf() + return s_shelf[k_LENS] + def getGitRevFor(file, repo): result = None - zip = zipfile.ZipFile(file); + zip = zipfile.ZipFile(file) try: result = zip.read('assets/gitvers.txt').split("\n")[0] @@ -243,48 +254,47 @@ def getVariantDir( name ): splits = string.split( name, '.' ) last = splits[-1] if not last == 'xw4': result = last + '/' - # apache.log_error( 'getVariantDir(' + name + ") => " + result ) + # apache_log_error( 'getVariantDir(' + name + ") => " + result ) return result # public, but deprecated def curVersion( req, name, avers = 41, gvers = None, installer = None ): global k_versions result = { k_SUCCESS : True } - if apacheAvailable: - apache.log_error( 'IP address of requester is %s' - % req.get_remote_host(apache.REMOTE_NAME) ) + apache_log_error( 'IP address of requester is %s' + % req.get_remote_host(apache.REMOTE_NAME) ) - apache.log_error( "name: %s; avers: %s; installer: %s; gvers: %s" + apache_log_error( "name: %s; avers: %s; installer: %s; gvers: %s" % (name, avers, installer, gvers) ) if name in k_versions: versions = k_versions[name] if versions[k_AVERS] > int(avers): - apache.log_error( avers + " is old" ) + apache_log_error( avers + " is old" ) result[k_URL] = k_urlbase + '/' + versions[k_URL] else: - apache.log_error(name + " is up-to-date") + apache_log_error(name + " is up-to-date") else: - apache.log_error( 'Error: bad name ' + name ) + apache_log_error( 'Error: bad name ' + name ) return json.dumps( result ) # public, but deprecated -def dictVersion( req, name, lang, md5sum ): - result = { k_SUCCESS : True } - if not name.endswith(k_suffix): name += k_suffix - dictSums = getDictSums() - path = lang + "/" + name - if not path in dictSums: - sums = md5Checksums( dictSums, path ) - if sums: - dictSums[path] = sums - s_shelf[k_SUMS] = dictSums - if path in dictSums: - if not md5sum in dictSums[path]: - result[k_URL] = k_urlbase + "/and_wordlists/" + path - else: - apache.log_error( path + " not known" ) - closeShelf() - return json.dumps( result ) +# def dictVersion( req, name, lang, md5sum ): +# result = { k_SUCCESS : True } +# if not name.endswith(k_suffix): name += k_suffix +# dictSums = getDictSums() +# path = lang + "/" + name +# if not path in dictSums: +# sums = md5Checksums( dictSums, path ) +# if sums: +# dictSums[path] = sums +# s_shelf[k_SUMS] = dictSums +# if path in dictSums: +# if not md5sum in dictSums[path]: +# result[k_URL] = k_urlbase + "/and_wordlists/" + path +# else: +# apache_log_error( path + " not known" ) +# closeShelf() +# return json.dumps( result ) def getApp( params, name = None, debug = False): result = None @@ -300,10 +310,10 @@ def getApp( params, name = None, debug = False): apk = apks[0] curApk = params[k_GVERS] + '.apk' if curApk in apk: - apache.log_error( "already have " + curApk ) + apache_log_error( "already have " + curApk ) else: url = k_urlbase + '/' + k_apkDir + variantDir + apk[len(dir):] - apache.log_error("url: " + url) + apache_log_error("url: " + url) result = {k_URL: url} elif k_DEVOK in params and params[k_DEVOK]: apks = getOrderedApks( k_filebase + k_apkDir, name, False ) @@ -312,18 +322,18 @@ def getApp( params, name = None, debug = False): # Does path NOT contain name of installed file curApk = params[k_GVERS] + '.apk' if curApk in apk: - apache.log_error( "already have " + curApk ) + apache_log_error( "already have " + curApk ) else: url = k_urlbase + '/' + apk[len(k_filebase):] result = {k_URL: url} - apache.log_error( result ) + apache_log_error( result ) elif k_AVERS in params: vers = params[k_AVERS] if k_INSTALLER in params: installer = params[k_INSTALLER] else: installer = '' - apache.log_error( "name: %s; installer: %s; gvers: %s" + apache_log_error( "name: %s; installer: %s; gvers: %s" % (name, installer, vers) ) print "name: %s; installer: %s; vers: %s" % (name, installer, vers) dir = k_filebase + k_apkDir @@ -332,11 +342,11 @@ def getApp( params, name = None, debug = False): apk = apk[len(k_filebase):] # strip fs path result = {k_URL: k_urlbase + '/' + apk} else: - apache.log_error(name + " is up-to-date") + apache_log_error(name + " is up-to-date") else: - apache.log_error( 'Error: bad name ' + name ) + apache_log_error( 'Error: bad name ' + name ) else: - apache.log_error( 'missing param' ) + apache_log_error( 'missing param' ) return result def getStats( path ): @@ -402,7 +412,7 @@ def listDicts( lc = None ): s_shelf[k_LANGSVERS] = langsVers result = { 'langs' : s_shelf[k_LANGS] } - closeShelf(); + closeShelf() print "looking for", lc if lc: @@ -413,13 +423,27 @@ def listDicts( lc = None ): def getDicts( params ): result = [] dictSums = getDictSums() + dictLens = getDictLens() for param in params: name = param[k_NAME] lang = param[k_LANG] md5sum = param[k_MD5SUM] index = param[k_INDEX] + if k_LEN in param: dictLen = int(param[k_LEN]) + else: dictLen = 0 + if not name.endswith(k_suffix): name += k_suffix path = lang + "/" + name + try: + fullPath = k_filebase + "and_wordlists/" + path + # Use this as an excuse to check for existance + dictLens[path] = int(os.stat( fullPath ).st_size) + except: + apache_log_error( 'dropping for non-existant file: {}'.format(fullPath) ) + continue + + needsUpgrade = False + if not path in dictSums: sums = md5Checksums( dictSums, path ) if sums: @@ -427,11 +451,19 @@ def getDicts( params ): s_shelf[k_SUMS] = dictSums if path in dictSums: if not md5sum in dictSums[path]: - cur = { k_URL : k_urlbase + "/and_wordlists/" + path, - k_INDEX : index, k_ISUM: dictSums[path][1] } - result.append( cur ) + needsUpgrade = True + + if not needsUpgrade and dictLen > 0: + if not dictLens[path] == dictLen: needsUpgrade = True + + if needsUpgrade: + cur = { k_URL : k_urlbase + "/and_wordlists/" + path, + k_INDEX : index, k_ISUM: dictSums[path][-1], + k_LEN : dictLens[path], + } + result.append( cur ) else: - apache.log_error( path + " not known" ) + apache_log_error( path + " not known" ) closeShelf() if 0 == len(result): result = None @@ -439,22 +471,22 @@ def getDicts( params ): def variantFor( name ): if name == 'xw4': result = 'XWords4' - apache.log_error( 'variantFor(%s)=>%s' % (name, result)) + apache_log_error( 'variantFor(%s)=>%s' % (name, result)) return result def getXlate( params, name, stringsHash ): result = [] path = xwconfig.k_REPOPATH - apache.log_error('creating repo with path ' + path) + apache_log_error('creating repo with path ' + path) repo = mygit.GitRepo( path ) - apache.log_error( "getXlate: %s, hash=%s" % (json.dumps(params), stringsHash) ) - # apache.log_error( 'status: ' + repo.status() ) + apache_log_error( "getXlate: %s, hash=%s" % (json.dumps(params), stringsHash) ) + # apache_log_error( 'status: ' + repo.status() ) # reduce org.eehouse.anroid.xxx to xxx, then turn it into a # variant and get the contents of the R.java file splits = name.split('.') name = splits[-1] - variant = variantFor( name ); + variant = variantFor( name ) rPath = '%s/archive/R.java' % variant rDotJava = repo.cat( rPath, stringsHash ) @@ -463,7 +495,7 @@ def getXlate( params, name, stringsHash ): # the revision BEFORE the revision that changed R.java head = repo.getHeadRev() - apache.log_error('head = %s' % head) + apache_log_error('head = %s' % head) rjavarevs = repo.getRevsBetween(head, stringsHash, rPath) if rjavarevs: assert( 1 >= len(rjavarevs) ) @@ -474,7 +506,7 @@ def getXlate( params, name, stringsHash ): firstPossible = rjavarevs[-2] + '^' # get actual number for rev^ firstPossible = repo.getRevsBetween( firstPossible, firstPossible )[0] - apache.log_error('firstPossible: %s' % firstPossible) + apache_log_error('firstPossible: %s' % firstPossible) for entry in params: curVers = entry[k_XLATEVERS] @@ -490,7 +522,7 @@ def getXlate( params, name, stringsHash ): } ) if 0 == len(result): result = None - apache.log_error( "getXlate=>%s" % (json.dumps(result)) ) + apache_log_error( "getXlate=>%s" % (json.dumps(result)) ) return result def init(): @@ -546,7 +578,7 @@ def opponentIDsFor( req, params ): def getUpdates( req, params ): result = { k_SUCCESS : True } appResult = None - apache.log_error( "getUpdates: got params: %s" % params ) + apache_log_error( "getUpdates: got params: %s" % params ) asJson = json.loads( params ) if k_APP in asJson: name = None @@ -561,15 +593,15 @@ def getUpdates( req, params ): # Let's not upgrade strings at the same time as we're upgrading the app # if appResult: - # apache.log_error( 'skipping xlation upgrade because app being updated' ) + # apache_log_error( 'skipping xlation upgrade because app being updated' ) # elif k_XLATEINFO in asJson and k_NAME in asJson and k_STRINGSHASH in asJson: # xlateResult = getXlate( asJson[k_XLATEINFO], asJson[k_NAME], asJson[k_STRINGSHASH] ) # if xlateResult: - # apache.log_error( xlateResult ) + # apache_log_error( xlateResult ) # result[k_XLATEINFO] = xlateResult; result = json.dumps( result ) - apache.log_error( 'getUpdates() => ' + result ) + apache_log_error( 'getUpdates() => ' + result ) return result def clearShelf(): @@ -581,7 +613,7 @@ def usage(msg=None): if msg: print "ERROR:", msg print "usage:", sys.argv[0], '--get-sums [lang/dict]*' print ' | --get-app --appID --vers --gvers [--debug]' - print ' | --test-get-dicts name lang curSum' + print ' | --test-get-dicts name lang curSum curLen/0' print ' | --list-apks [--path ] [--debug] --appID org.something' print ' | --list-dicts' print ' | --opponent-ids-for' @@ -625,10 +657,11 @@ def main(): } print getApp( params ) elif arg == '--test-get-dicts': - if not 5 == argc: usage() + if not 6 == argc: usage() params = { k_NAME: sys.argv[2], k_LANG : sys.argv[3], k_MD5SUM : sys.argv[4], + k_LEN : sys.argv[5], k_INDEX : 0, } print getDicts( [params] ) @@ -662,6 +695,8 @@ def main(): else: usage() + print("normal exit") + ############################################################################## if __name__ == '__main__': main() diff --git a/xwords4/common/dictiter.c b/xwords4/common/dictiter.c index 6cbc5f3e9..eacde18c5 100644 --- a/xwords4/common/dictiter.c +++ b/xwords4/common/dictiter.c @@ -33,6 +33,13 @@ #include "dictiter.h" #include "game.h" +/* Define DI_DEBUG in Makefile. It's makes iteration really slow on Android */ +#ifdef DI_DEBUG +# define DI_ASSERT(...) XP_ASSERT(__VA_ARGS__) +#else +# define DI_ASSERT(...) +#endif + #ifdef CPLUS extern "C" { #endif @@ -455,7 +462,7 @@ dict_makeIndex( const DictIter* iter, XP_U16 depth, IndexData* data ) { ASSERT_INITED( iter ); const DictionaryCtxt* dict = iter->dict; - XP_ASSERT( depth < MAX_COLS_DICT ); + DI_ASSERT( depth < MAX_COLS_DICT ); XP_U16 ii, needCount; const XP_U16 nFaces = dict_numTileFaces( dict ); XP_U16 nNonBlankFaces = nFaces; @@ -466,7 +473,7 @@ dict_makeIndex( const DictIter* iter, XP_U16 depth, IndexData* data ) for ( ii = 1, needCount = nNonBlankFaces; ii < depth; ++ii ) { needCount *= nNonBlankFaces; } - XP_ASSERT( needCount <= data->count ); + DI_ASSERT( needCount <= data->count ); Tile allTiles[nNonBlankFaces]; XP_U16 nTiles = 0; @@ -491,10 +498,10 @@ dict_makeIndex( const DictIter* iter, XP_U16 depth, IndexData* data ) data, &prevIter, &prevIndex ); } -#ifdef DEBUG +#ifdef DI_DEBUG DictPosition pos; for ( pos = 1; pos < data->count; ++pos ) { - XP_ASSERT( data->indices[pos-1] < data->indices[pos] ); + DI_ASSERT( data->indices[pos-1] < data->indices[pos] ); } #endif } /* dict_makeIndex */ @@ -569,7 +576,7 @@ dict_getNthWord( DictIter* iter, DictPosition position, XP_U16 depth, XP_Bool validWord = 0 < iter->nEdges; if ( validWord ) { /* uninitialized */ wordCount = iter->nWords; - XP_ASSERT( wordCount == dict_countWords( iter, NULL ) ); + DI_ASSERT( wordCount == dict_countWords( iter, NULL ) ); } else { wordCount = dict_getWordCount( dict ); } diff --git a/xwords4/common/dictnry.c b/xwords4/common/dictnry.c index dd41ff19d..516dd2173 100644 --- a/xwords4/common/dictnry.c +++ b/xwords4/common/dictnry.c @@ -30,6 +30,7 @@ #include "strutils.h" #include "dictiter.h" #include "game.h" +#include "dbgutil.h" #ifdef CPLUS extern "C" { @@ -39,6 +40,8 @@ extern "C" { * ****************************************************************************/ +static XP_Bool makeBitmap( XP_U8 const** ptrp, const XP_U8* end ); + DictionaryCtxt* p_dict_ref( DictionaryCtxt* dict, XWEnv XP_UNUSED(xwe) #ifdef DEBUG_REF @@ -90,6 +93,266 @@ dict_unref_all( PlayerDicts* pd, XWEnv xwe ) } } +static XP_UCHAR* +getNullTermParam( DictionaryCtxt* XP_UNUSED_DBG(dctx), const XP_U8** ptr, + XP_U16* headerLen ) +{ + XP_U16 len = 1 + XP_STRLEN( (XP_UCHAR*)*ptr ); + XP_UCHAR* result = XP_MALLOC( dctx->mpool, len ); + XP_MEMCPY( result, *ptr, len ); + *ptr += len; + *headerLen -= len; + return result; +} + +static XP_Bool +loadSpecialData( DictionaryCtxt* ctxt, XP_U8 const** ptrp, + const XP_U8* end ) +{ + LOG_FUNC(); + XP_Bool success = XP_TRUE; + XP_U16 nSpecials = countSpecials( ctxt ); + XP_U8 const* ptr = *ptrp; + XP_UCHAR** texts; + XP_UCHAR** textEnds; + SpecialBitmaps* bitmaps; + + texts = (XP_UCHAR**)XP_MALLOC( ctxt->mpool, + nSpecials * sizeof(*texts) ); + textEnds = (XP_UCHAR**)XP_MALLOC( ctxt->mpool, + nSpecials * sizeof(*textEnds) ); + + bitmaps = (SpecialBitmaps*) + XP_CALLOC( ctxt->mpool, nSpecials * sizeof(*bitmaps) ); + + for ( Tile ii = 0; ii < ctxt->nFaces; ++ii ) { + const XP_UCHAR* facep = ctxt->facePtrs[(short)ii]; + if ( IS_SPECIAL(*facep) ) { + /* get the string */ + CHECK_PTR( ptr, 1, end, error ); + XP_U8 txtlen = *ptr++; + CHECK_PTR( ptr, txtlen, end, error ); + XP_UCHAR* text = (XP_UCHAR*)XP_MALLOC(ctxt->mpool, txtlen+1); + texts[(int)*facep] = text; + textEnds[(int)*facep] = text + txtlen + 1; + XP_MEMCPY( text, ptr, txtlen ); + ptr += txtlen; + text[txtlen] = '\0'; + XP_ASSERT( *facep < nSpecials ); /* firing */ + + /* This little hack is safe because all bytes but the first in a + multi-byte utf-8 char have the high bit set. SYNONYM_DELIM + does not have its high bit set */ + XP_ASSERT( 0 == (SYNONYM_DELIM & 0x80) ); + for ( ; '\0' != *text; ++text ) { + if ( *text == SYNONYM_DELIM ) { + *text = '\0'; + } + } + + if ( !makeBitmap( &ptr, end ) ) { + goto error; + } + if ( !makeBitmap( &ptr, end ) ) { + goto error; + } + } + } + + goto done; + error: + success = XP_FALSE; + done: + ctxt->chars = texts; + ctxt->charEnds = textEnds; + ctxt->bitmaps = bitmaps; + + *ptrp = ptr; + return success; +} /* loadSpecialData */ + +XP_Bool +parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* end ) +{ + const XP_U8* ptr = *ptrp; + XP_Bool hasHeader = XP_FALSE; + XP_Bool isUTF8 = XP_FALSE; + XP_U16 charSize; + + XP_U16 flags; + XP_Bool formatOk = sizeof(flags) <= end - ptr; + if ( formatOk ) { + XP_MEMCPY( &flags, ptr, sizeof(flags) ); + ptr += sizeof( flags ); + flags = XP_NTOHS(flags); + + XP_LOGFF( "flags=0X%X", flags ); + hasHeader = 0 != (DICT_HEADER_MASK & flags); + /* if ( hasHeader ) { */ + /* flags &= ~DICT_HEADER_MASK; */ + /* } */ + + XP_U8 nodeSize = 4; + switch ( flags & 0x0007 ) { + case 0x0001: + nodeSize = 3; + charSize = 1; + dctx->is_4_byte = XP_FALSE; + break; + case 0x0002: + nodeSize = 3; + charSize = 2; + dctx->is_4_byte = XP_FALSE; + break; + case 0x0003: + charSize = 2; + dctx->is_4_byte = XP_TRUE; + break; + case 0x0004: + nodeSize = 3; + isUTF8 = XP_TRUE; + dctx->is_4_byte = XP_FALSE; + break; + case 0x0005: + isUTF8 = XP_TRUE; + dctx->is_4_byte = XP_TRUE; + break; + default: + formatOk = XP_FALSE; + break; + } + dctx->isUTF8 = isUTF8; + dctx->nodeSize = nodeSize; + } + + if ( formatOk ) { + XP_U8 numFaceBytes, numFaces; + + if ( hasHeader ) { + XP_U16 headerLen; + XP_U32 wordCount; + + memcpy( &headerLen, ptr, sizeof(headerLen) ); + ptr += sizeof(headerLen); + headerLen = XP_NTOHS( headerLen ); + + memcpy( &wordCount, ptr, sizeof(wordCount) ); + ptr += sizeof(wordCount); + headerLen -= sizeof(wordCount); + dctx->nWords = XP_NTOHL( wordCount ); + XP_DEBUGF( "dict contains %d words", dctx->nWords ); + + if ( 0 < headerLen ) { + dctx->desc = getNullTermParam( dctx, &ptr, &headerLen ); + } else { + XP_LOGF( "%s: no note", __func__ ); + } + if ( 0 < headerLen ) { + dctx->md5Sum = getNullTermParam( dctx, &ptr, &headerLen ); + } else { + XP_LOGF( "%s: no md5Sum", __func__ ); + } + ptr += headerLen; + } + + if ( isUTF8 ) { + numFaceBytes = *ptr++; + } + numFaces = *ptr++; + if ( !isUTF8 ) { + numFaceBytes = numFaces * charSize; + } + + if ( NULL == dctx->md5Sum +#ifdef DEBUG + || XP_TRUE +#endif + ) { + XP_UCHAR checksum[256]; + // XP_LOGFF( "figuring checksum with len: %uz", end - ptr ); + computeChecksum( dctx, xwe, ptr, end - ptr, checksum ); + if ( NULL == dctx->md5Sum ) { + dctx->md5Sum = copyString( dctx->mpool, checksum ); + } else { + XP_ASSERT( 0 == XP_STRCMP( dctx->md5Sum, checksum ) ); + } + } + + dctx->nFaces = numFaces; + + dctx->countsAndValues = XP_MALLOC( dctx->mpool, numFaces * 2 ); + XP_U16 facesSize = numFaceBytes; + if ( !isUTF8 ) { + facesSize /= 2; + } + + XP_U8 tmp[numFaceBytes]; + XP_MEMCPY( tmp, ptr, numFaceBytes ); + ptr += numFaceBytes; + + dict_splitFaces( dctx, xwe, tmp, numFaceBytes, numFaces ); + + unsigned short xloc; + XP_MEMCPY( &xloc, ptr, sizeof(xloc) ); + ptr += sizeof(xloc); + XP_MEMCPY( dctx->countsAndValues, ptr, numFaces*2 ); + ptr += numFaces*2; + + dctx->langCode = xloc & 0x7F; + } + + if ( formatOk ) { + formatOk = loadSpecialData( dctx, &ptr, end ); + } + + if ( formatOk ) { + XP_ASSERT( ptr < end ); + *ptrp = ptr; + } + + LOG_RETURNF( "%s", boolToStr(formatOk) ); + return formatOk; +} + +static XP_Bool +makeBitmap( XP_U8 const** ptrp, const XP_U8* end ) +{ + XP_Bool success = XP_TRUE; + XP_U8 const* ptr = *ptrp; + CHECK_PTR( ptr, 1, end, error ); + XP_U8 nCols = *ptr++; + if ( nCols > 0 ) { + CHECK_PTR( ptr, 1, end, error ); + XP_U8 nRows = *ptr++; + CHECK_PTR( ptr, ((nRows*nCols)+7) / 8, end, error ); +#ifdef DROP_BITMAPS + ptr += ((nRows*nCols)+7) / 8; +#else + do not compile +#endif + } + goto done; + error: + success = XP_FALSE; + done: + *ptrp = ptr; + return success; +} + +XP_U16 +countSpecials( DictionaryCtxt* ctxt ) +{ + XP_U16 result = 0; + + for ( int ii = 0; ii < ctxt->nFaces; ++ii ) { + if ( IS_SPECIAL( ctxt->facePtrs[ii][0] ) ) { + ++result; + } + } + + return result; +} /* countSpecials */ + void setBlankTile( DictionaryCtxt* dict ) { diff --git a/xwords4/common/dictnry.h b/xwords4/common/dictnry.h index 87a088a89..4aa2a33f8 100644 --- a/xwords4/common/dictnry.h +++ b/xwords4/common/dictnry.h @@ -215,14 +215,25 @@ XP_U16 dict_getMaxWidth( const DictionaryCtxt* dict ); #ifdef STUBBED_DICT DictionaryCtxt* make_stubbed_dict( MPFORMAL_NOCOMMA ); #endif +XP_U16 countSpecials( DictionaryCtxt* ctxt ); +XP_Bool parseCommon( DictionaryCtxt* dict, XWEnv xwe, const XP_U8** ptrp, + const XP_U8* end ); +XP_Bool checkSanity( DictionaryCtxt* dict, XP_U32 numEdges ); /* To be called only by subclasses!!! */ void dict_super_init( DictionaryCtxt* ctxt ); -/* Must be implemented by subclass */ +/* Must be implemented by subclasses */ void dict_splitFaces( DictionaryCtxt* dict, XWEnv xwe, const XP_U8* bytes, XP_U16 nBytes, XP_U16 nFaceos ); +void computeChecksum( DictionaryCtxt* dctxt, XWEnv xwe, const XP_U8* ptr, + XP_U32 len, XP_UCHAR* out ); -XP_Bool checkSanity( DictionaryCtxt* dict, XP_U32 numEdges ); +/* Utility used only by dict-parsing code for now */ +#define CHECK_PTR(p,c,e,lab) \ + if ( ((p)+(c)) > (e) ) { \ + XP_LOGF( "%s (line %d); out of bytes", __func__, __LINE__ ); \ + goto lab; \ + } #ifdef CPLUS } diff --git a/xwords4/dawg/Hungarian/.gitignore b/xwords4/dawg/Hungarian/.gitignore index d0b2fc328..be22b5284 100644 --- a/xwords4/dawg/Hungarian/.gitignore +++ b/xwords4/dawg/Hungarian/.gitignore @@ -1 +1,2 @@ -magyarispell +magyarispell/ +tmp_wordlist.txt diff --git a/xwords4/dawg/Hungarian/Makefile b/xwords4/dawg/Hungarian/Makefile index 69d5f8744..6ab3ac7f5 100644 --- a/xwords4/dawg/Hungarian/Makefile +++ b/xwords4/dawg/Hungarian/Makefile @@ -44,6 +44,8 @@ SRC = \ ${GIT_DIR}/szotar/alap/ige_targy.1 \ ${GIT_DIR}/szotar/alap/ragozatlan.2 +TMP_LIST = tmp_wordlist.txt + PHONY: GIT_TREE $(SRC) : GIT_TREE @@ -55,7 +57,7 @@ GIT_TREE: fi (cd $(GIT_DIR) && git checkout $(SRC_COMMIT)) -hungarian_wordlist.txt: $(SRC) +$(TMP_LIST): $(SRC) cat $^ | \ sed -e 's/#.*$$//' -e 's/\[.*$$//' -e 's/ .*$$//' |\ grep -v '^$$' |\ @@ -65,7 +67,7 @@ hungarian_wordlist.txt: $(SRC) # double-letter tile or two single-letter tiles, it's in the list with # both spellings. That's what the longer sed expressions are doing # (emitting two words) -$(XWLANG)Main.dict.gz: hungarian_wordlist.txt +$(XWLANG)Main.dict.gz: $(TMP_LIST) cat $< \ | tr -d '\r' \ | tr [aábcdeéfghiíjklmnnyoóöőprtuúüűvzs] [AÁBCDEÉFGHIÍJKLMNNYOÓÖŐPRTUÚÜŰVZS] \ @@ -85,4 +87,4 @@ $(XWLANG)Main.dict.gz: hungarian_wordlist.txt # "parent" Makefile.langcommon in the parent directory. clean: clean_common - rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb hungarian_wordlist.txt + rm -f $(XWLANG)Main.dict.gz *.bin $(XWLANG)*.pdb $(XWLANG)*.seb $(TMP_LIST) diff --git a/xwords4/dawg/Spanish/Makefile b/xwords4/dawg/Spanish/Makefile index 0a2f4ed36..937a92c70 100644 --- a/xwords4/dawg/Spanish/Makefile +++ b/xwords4/dawg/Spanish/Makefile @@ -15,7 +15,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -XWLANG = SpanishFAA41 +XWLANG = FAA_4_1 LANGCODE = es_ES TARGET_TYPE ?= WINCE ENC = UTF-8 diff --git a/xwords4/dawg/dawg2dict.py b/xwords4/dawg/dawg2dict.py index bdff12f34..460ca8c33 100755 --- a/xwords4/dawg/dawg2dict.py +++ b/xwords4/dawg/dawg2dict.py @@ -222,7 +222,9 @@ def process(args): words = [] if nodes: expandDAWG( nodes, nodeSize, args.DELIM, offset, data, words ) - assert len(words) == nWords + if not len(words) == nWords: + print("loaded {} words but header claims {}".format(len(words), nWords), file=sys.stderr) + # assert len(words) == nWords if args.DUMP_WORDS: for word in words: print(word) diff --git a/xwords4/linux/Makefile b/xwords4/linux/Makefile index 9330bc0a8..cf09929b9 100644 --- a/xwords4/linux/Makefile +++ b/xwords4/linux/Makefile @@ -22,6 +22,7 @@ ifeq ($(MEMDEBUG),TRUE) DEFINES = -DMEM_DEBUG -DDEBUG -DENABLE_LOGGING -DNUMBER_KEY_AS_INDEX DEFINES += -DCOMMS_CHECKSUM DEFINES += -DLOG_COMMS_MSGNOS +DEFINES += -DDI_DEBUG CFLAGS += -g $(GPROFFLAG) -Wall -Wunused-parameter -Wcast-align -Werror -O0 DEFINES += -DDEBUG_HASHING CFLAGS += -DDEBUG_TS -rdynamic @@ -260,6 +261,7 @@ CFLAGS += `pkg-config --cflags glib-2.0` LIBS += `pkg-config --libs glib-2.0` CFLAGS += $(POINTER_SUPPORT) +CFLAGS += -DDROP_BITMAPS ifneq (,$(findstring DPLATFORM_NCURSES,$(DEFINES))) LIBS += $(OE_LIBDIR) -lncursesw diff --git a/xwords4/linux/linuxdict.c b/xwords4/linux/linuxdict.c index e477cf0da..fcf477140 100644 --- a/xwords4/linux/linuxdict.c +++ b/xwords4/linux/linuxdict.c @@ -98,122 +98,15 @@ linux_dictionary_make( MPFORMAL XWEnv xwe, const LaunchParams* params, return &result->super; } /* gtk_dictionary_make */ -static XP_UCHAR* -getNullTermParam( LinuxDictionaryCtxt* XP_UNUSED_DBG(dctx), const XP_U8** ptr, - XP_U16* headerLen ) +void +computeChecksum( DictionaryCtxt* XP_UNUSED(dctx), XWEnv XP_UNUSED(xwe), + const XP_U8* ptr, XP_U32 len, XP_UCHAR* out ) { - XP_U16 len = 1 + XP_STRLEN( (XP_UCHAR*)*ptr ); - XP_UCHAR* result = XP_MALLOC( dctx->super.mpool, len ); - XP_MEMCPY( result, *ptr, len ); - *ptr += len; - *headerLen -= len; - return result; + gchar* checksum = g_compute_checksum_for_data( G_CHECKSUM_MD5, ptr, len ); + XP_MEMCPY( out, checksum, XP_STRLEN(checksum) + 1 ); + g_free( checksum ); } -static XP_U16 -countSpecials( LinuxDictionaryCtxt* ctxt ) -{ - XP_U16 result = 0; - XP_U16 ii; - - for ( ii = 0; ii < ctxt->super.nFaces; ++ii ) { - if ( IS_SPECIAL(ctxt->super.facePtrs[ii][0]) ) { - ++result; - } - } - - return result; -} /* countSpecials */ - -static XP_Bitmap -skipBitmap( LinuxDictionaryCtxt* XP_UNUSED_DBG(ctxt), const XP_U8** ptrp ) -{ - XP_U8 nCols, nRows, nBytes; - LinuxBMStruct* lbs = NULL; - const XP_U8* ptr = *ptrp; - - nCols = *ptr++; - if ( nCols > 0 ) { - nRows = *ptr++; - - nBytes = ((nRows * nCols) + 7) / 8; - - lbs = XP_MALLOC( ctxt->super.mpool, sizeof(*lbs) + nBytes ); - lbs->nRows = nRows; - lbs->nCols = nCols; - lbs->nBytes = nBytes; - - memcpy( lbs + 1, ptr, nBytes ); - ptr += nBytes; - } - - *ptrp = ptr; - return lbs; -} /* skipBitmap */ - -static void -skipBitmaps( LinuxDictionaryCtxt* ctxt, const XP_U8** ptrp ) -{ - XP_U16 nSpecials; - XP_UCHAR* text; - XP_UCHAR** texts; - XP_UCHAR** textEnds; - SpecialBitmaps* bitmaps; - Tile tile; - const XP_U8* ptr = *ptrp; - - nSpecials = countSpecials( ctxt ); - - texts = (XP_UCHAR**)XP_MALLOC( ctxt->super.mpool, - nSpecials * sizeof(*texts) ); - textEnds = (XP_UCHAR**)XP_MALLOC( ctxt->super.mpool, - nSpecials * sizeof(*textEnds) ); - bitmaps = (SpecialBitmaps*)XP_MALLOC( ctxt->super.mpool, - nSpecials * sizeof(*bitmaps) ); - XP_MEMSET( bitmaps, 0, nSpecials * sizeof(*bitmaps) ); - - for ( tile = 0; tile < ctxt->super.nFaces; ++tile ) { - - const XP_UCHAR* facep = ctxt->super.facePtrs[(short)tile]; - if ( IS_SPECIAL(*facep) ) { - XP_U16 asIndex = (XP_U16)*facep; - XP_U8 txtlen; - XP_ASSERT( *facep < nSpecials ); - - /* get the string */ - txtlen = *ptr++; - text = (XP_UCHAR*)XP_MALLOC(ctxt->super.mpool, txtlen+1); - memcpy( text, ptr, txtlen ); - ptr += txtlen; - - text[txtlen] = '\0'; - texts[(XP_U16)*facep] = text; - textEnds[(XP_U16)*facep] = text + txtlen + 1; - - /* Now replace the delimiter char with \0. It must be one byte in - length and of course equal to the delimiter */ - XP_ASSERT( 0 == (SYNONYM_DELIM & 0x80) ); - while ( '\0' != *text ) { - XP_UCHAR* cp = g_utf8_offset_to_pointer( text, 1 ); - if ( 1 == (cp - text) && *text == SYNONYM_DELIM ) { - *text = '\0'; - } - text = cp; - } - - XP_DEBUGF( "skipping bitmaps for " XP_S, texts[asIndex] ); - - bitmaps[asIndex].largeBM = skipBitmap( ctxt, &ptr ); - bitmaps[asIndex].smallBM = skipBitmap( ctxt, &ptr ); - } - } - *ptrp = ptr; - - ctxt->super.chars = texts; - ctxt->super.charEnds = textEnds; - ctxt->super.bitmaps = bitmaps; -} /* skipBitmaps */ - void dict_splitFaces( DictionaryCtxt* dict, XWEnv XP_UNUSED(xwe), const XP_U8* utf8, XP_U16 nBytes, XP_U16 nFaces ) @@ -263,13 +156,6 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const LaunchParams* params, XP_Bool formatOk = XP_TRUE; size_t dictLength; XP_U32 topOffset; - unsigned short xloc; - XP_U16 flags; - XP_U16 facesSize; - XP_U16 charSize; - XP_Bool isUTF8 = XP_FALSE; - XP_Bool hasHeader = XP_FALSE; - const XP_U8* ptr; char path[256]; if ( !getDictPath( params, fileName, path, VSIZE(path) ) ) { @@ -298,130 +184,12 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const LaunchParams* params, fclose( dictF ); } - ptr = dctx->dictBase; - - memcpy( &flags, ptr, sizeof(flags) ); - ptr += sizeof( flags ); - flags = ntohs(flags); - - XP_DEBUGF( "flags=0X%X", flags ); - hasHeader = 0 != (DICT_HEADER_MASK & flags); - if ( hasHeader ) { - flags &= ~DICT_HEADER_MASK; - } - - flags &= ~DICT_SYNONYMS_MASK; - - if ( flags == 0x0001 ) { - dctx->super.nodeSize = 3; - charSize = 1; - dctx->super.is_4_byte = XP_FALSE; - } else if ( flags == 0x0002 ) { - dctx->super.nodeSize = 3; - charSize = 2; - dctx->super.is_4_byte = XP_FALSE; - } else if ( flags == 0x0003 ) { - dctx->super.nodeSize = 4; - charSize = 2; - dctx->super.is_4_byte = XP_TRUE; - } else if ( flags == 0x0004 ) { - dctx->super.nodeSize = 3; - dctx->super.isUTF8 = XP_TRUE; - isUTF8 = XP_TRUE; - dctx->super.is_4_byte = XP_FALSE; - } else if ( flags == 0x0005 ) { - dctx->super.nodeSize = 4; - dctx->super.isUTF8 = XP_TRUE; - isUTF8 = XP_TRUE; - dctx->super.is_4_byte = XP_TRUE; - } else { - /* case I don't know how to deal with */ - formatOk = XP_FALSE; - XP_ASSERT(0); - } + const XP_U8* ptr = dctx->dictBase; + const XP_U8* end = ptr + dctx->dictLength; + formatOk = parseCommon( &dctx->super, NULL_XWE, &ptr, end ); + /* && loadSpecialData( &dctx->super, &ptr, end ); */ if ( formatOk ) { - XP_U8 numFaceBytes, numFaces; - - if ( hasHeader ) { - XP_U16 headerLen; - XP_U32 wordCount; - - memcpy( &headerLen, ptr, sizeof(headerLen) ); - ptr += sizeof(headerLen); - headerLen = ntohs( headerLen ); - - memcpy( &wordCount, ptr, sizeof(wordCount) ); - ptr += sizeof(wordCount); - headerLen -= sizeof(wordCount); - dctx->super.nWords = ntohl( wordCount ); - XP_DEBUGF( "dict contains %d words", dctx->super.nWords ); - - if ( 0 < headerLen ) { - dctx->super.desc = getNullTermParam( dctx, &ptr, &headerLen ); - } else { - XP_LOGF( "%s: no note", __func__ ); - } - if ( 0 < headerLen ) { - dctx->super.md5Sum = getNullTermParam( dctx, &ptr, &headerLen ); - } else { - XP_LOGF( "%s: no md5Sum", __func__ ); - } - ptr += headerLen; - } - - if ( isUTF8 ) { - numFaceBytes = *ptr++; - } - numFaces = *ptr++; - if ( !isUTF8 ) { - numFaceBytes = numFaces * charSize; - } - - if ( NULL == dctx->super.md5Sum -#ifdef DEBUG - || XP_TRUE -#endif - ) { - size_t curPos = ptr - dctx->dictBase; - gssize dictLength = dctx->dictLength - curPos; - - gchar* checksum = g_compute_checksum_for_data( G_CHECKSUM_MD5, ptr, dictLength ); - if ( NULL == dctx->super.md5Sum ) { - dctx->super.md5Sum = copyString( dctx->super.mpool, checksum ); - } else { - XP_ASSERT( 0 == XP_STRCMP( dctx->super.md5Sum, checksum ) ); - } - g_free( checksum ); - } - - dctx->super.nFaces = numFaces; - - dctx->super.countsAndValues = XP_MALLOC( dctx->super.mpool, - numFaces*2 ); - facesSize = numFaceBytes; - if ( !isUTF8 ) { - facesSize /= 2; - } - - XP_U8 tmp[numFaceBytes]; - memcpy( tmp, ptr, numFaceBytes ); - ptr += numFaceBytes; - - dict_splitFaces( &dctx->super, NULL, tmp, numFaceBytes, numFaces ); - - memcpy( &xloc, ptr, sizeof(xloc) ); - ptr += sizeof(xloc); - memcpy( dctx->super.countsAndValues, ptr, numFaces*2 ); - ptr += numFaces*2; - } - - dctx->super.langCode = xloc & 0x7F; - - if ( formatOk ) { - XP_U32 numEdges; - skipBitmaps( dctx, &ptr ); - size_t curPos = ptr - dctx->dictBase; dictLength = dctx->dictLength - curPos; @@ -433,6 +201,7 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const LaunchParams* params, ptr += sizeof(topOffset); } + XP_U32 numEdges; if ( dictLength > 0 ) { numEdges = dictLength / dctx->super.nodeSize; #ifdef DEBUG @@ -509,6 +278,7 @@ linux_dictionary_destroy( DictionaryCtxt* dict, XWEnv XP_UNUSED(xwe) ) } } + /* super's destructor should do this!!!! */ XP_FREEP( dict->mpool, &ctxt->super.desc ); XP_FREEP( dict->mpool, &ctxt->super.md5Sum ); XP_FREEP( dict->mpool, &ctxt->super.countsAndValues );