merge android_wordlists (local branch)

This commit is contained in:
Eric House 2013-05-01 06:39:31 -07:00
commit 5a026ffda3
29 changed files with 401 additions and 138 deletions

View file

@ -22,7 +22,7 @@
to come from a domain that you own or have control over. -->
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="org.eehouse.android.xw4"
android:versionCode="51"
android:versionCode="52"
android:versionName="@string/app_version"
>

View file

@ -163,10 +163,14 @@ andLoadSpecialData( AndDictionaryCtxt* ctxt, XP_U8 const** ptrp,
XP_U8 const* ptr = *ptrp;
Tile ii;
XP_UCHAR** texts;
XP_UCHAR** textEnds;
SpecialBitmaps* bitmaps;
texts = (XP_UCHAR**)XP_MALLOC( ctxt->super.mpool,
nSpecials * sizeof(*texts) );
textEnds = (XP_UCHAR**)XP_MALLOC( ctxt->super.mpool,
nSpecials * sizeof(*textEnds) );
bitmaps = (SpecialBitmaps*)
XP_CALLOC( ctxt->super.mpool, nSpecials * sizeof(*bitmaps) );
@ -180,11 +184,22 @@ andLoadSpecialData( AndDictionaryCtxt* ctxt, XP_U8 const** ptrp,
CHECK_PTR( ptr, txtlen, end );
XP_UCHAR* text = (XP_UCHAR*)XP_MALLOC(ctxt->super.mpool, txtlen+1);
texts[(int)*facep] = text;
textEnds[(int)*facep] = text + txtlen + 1;
XP_MEMCPY( text, ptr, txtlen );
ptr += txtlen;
text[txtlen] = '\0';
XP_ASSERT( *facep < nSpecials ); /* firing */
/* This little hack is safe because all bytes but the first in a
multi-byte utf-8 char have the high bit set. SYNONYM_DELIM
does not have its high bit set */
XP_ASSERT( 0 == (SYNONYM_DELIM & 0x80) );
for ( ; '\0' != *text; ++text ) {
if ( *text == SYNONYM_DELIM ) {
*text = '\0';
}
}
if ( !andMakeBitmap( ctxt, &ptr, end,
&bitmaps[(int)*facep].largeBM ) ) {
goto error;
@ -201,6 +216,7 @@ andLoadSpecialData( AndDictionaryCtxt* ctxt, XP_U8 const** ptrp,
success = XP_FALSE;
done:
ctxt->super.chars = texts;
ctxt->super.charEnds = textEnds;
ctxt->super.bitmaps = bitmaps;
*ptrp = ptr;
@ -216,36 +232,42 @@ static void
splitFaces_via_java( JNIEnv* env, AndDictionaryCtxt* ctxt, const XP_U8* ptr,
int nFaceBytes, int nFaces, XP_Bool isUTF8 )
{
XP_UCHAR facesBuf[nFaces*4]; /* seems a reasonable upper bound... */
XP_UCHAR facesBuf[nFaces*16]; /* seems a reasonable upper bound... */
int indx = 0;
int offsets[nFaces];
int nBytes;
int ii;
int ii, jj;
jobject jstrarr = and_util_splitFaces( ctxt->jniutil, ptr, nFaceBytes,
isUTF8 );
XP_ASSERT( (*env)->GetArrayLength( env, jstrarr ) == nFaces );
for ( ii = 0; ii < nFaces; ++ii ) {
jobject jstr = (*env)->GetObjectArrayElement( env, jstrarr, ii );
jobject jstrs = (*env)->GetObjectArrayElement( env, jstrarr, ii );
offsets[ii] = indx;
nBytes = (*env)->GetStringUTFLength( env, jstr );
int nAlternates = (*env)->GetArrayLength( env, jstrs );
for ( jj = 0; jj < nAlternates; ++jj ) {
jobject jstr = (*env)->GetObjectArrayElement( env, jstrs, jj );
nBytes = (*env)->GetStringUTFLength( env, jstr );
const char* bytes = (*env)->GetStringUTFChars( env, jstr, NULL );
char* end;
long numval = strtol( bytes, &end, 10 );
if ( end > bytes ) {
XP_ASSERT( numval < 32 );
nBytes = 1;
facesBuf[indx] = (XP_UCHAR)numval;
} else {
XP_MEMCPY( &facesBuf[indx], bytes, nBytes );
const char* bytes = (*env)->GetStringUTFChars( env, jstr, NULL );
char* end;
long numval = strtol( bytes, &end, 10 );
if ( end > bytes ) {
XP_ASSERT( numval < 32 );
XP_ASSERT( jj == 0 );
nBytes = 1;
facesBuf[indx] = (XP_UCHAR)numval;
} else {
XP_MEMCPY( &facesBuf[indx], bytes, nBytes );
}
(*env)->ReleaseStringUTFChars( env, jstr, bytes );
deleteLocalRef( env, jstr );
indx += nBytes;
facesBuf[indx++] = '\0';
}
(*env)->ReleaseStringUTFChars( env, jstr, bytes );
deleteLocalRef( env, jstr );
indx += nBytes;
facesBuf[indx++] = '\0';
deleteLocalRef( env, jstrs );
XP_ASSERT( indx < VSIZE(facesBuf) );
}
deleteLocalRef( env, jstrarr );
@ -261,6 +283,7 @@ splitFaces_via_java( JNIEnv* env, AndDictionaryCtxt* ctxt, const XP_U8* ptr,
XP_ASSERT( !ctxt->super.faces );
ctxt->super.faces = faces;
ctxt->super.facesEnd = faces + indx;
XP_ASSERT( !ctxt->super.facePtrs );
ctxt->super.facePtrs = ptrs;
} /* splitFaces_via_java */
@ -316,6 +339,7 @@ parseDict( AndDictionaryCtxt* ctxt, XP_U8 const* ptr, XP_U32 dictLength,
ptr += headerLen;
}
flags &= ~DICT_SYNONYMS_MASK;
if ( flags == 0x0002 ) {
nodeSize = 3;
} else if ( flags == 0x0003 ) {
@ -348,16 +372,30 @@ parseDict( AndDictionaryCtxt* ctxt, XP_U8 const* ptr, XP_U32 dictLength,
JNIEnv* env = ctxt->env;
jstring jsum = and_util_getMD5SumFor( ctxt->jniutil, ctxt->super.name,
NULL, 0 );
XP_UCHAR* md5Sum = NULL;
/* If we have a cached sum, check that it's correct. */
if ( NULL != jsum && NULL != ctxt->super.md5Sum ) {
md5Sum = getStringCopy( MPPARM(ctxt->super.mpool) env, jsum );
if ( 0 != XP_STRCMP( ctxt->super.md5Sum, md5Sum ) ) {
deleteLocalRef( env, jsum );
jsum = NULL;
XP_FREE( ctxt->super.mpool, md5Sum );
md5Sum = NULL;
}
}
if ( NULL == jsum ) {
jsum = and_util_getMD5SumFor( ctxt->jniutil, ctxt->super.name,
ptr, end - ptr );
}
XP_UCHAR* md5Sum = getStringCopy( MPPARM(ctxt->super.mpool) env, jsum );
if ( NULL == md5Sum ) {
md5Sum = getStringCopy( MPPARM(ctxt->super.mpool) env, jsum );
}
deleteLocalRef( env, jsum );
if ( NULL == ctxt->super.md5Sum ) {
ctxt->super.md5Sum = md5Sum;
} else {
XP_ASSERT( 0 == XP_STRCMP( ctxt->super.md5Sum, md5Sum ) );
XP_FREE( ctxt->super.mpool, md5Sum );
}
}
@ -462,6 +500,8 @@ and_dictionary_destroy( DictionaryCtxt* dict )
}
XP_FREE( ctxt->super.mpool, ctxt->super.chars );
}
XP_FREEP( ctxt->super.mpool, &ctxt->super.charEnds );
if ( !!ctxt->super.bitmaps ) {
for ( ii = 0; ii < nSpecials; ++ii ) {
jobject bitmap = ctxt->super.bitmaps[ii].largeBM;

View file

@ -82,7 +82,7 @@ and_util_splitFaces( JNIUtilCtxt* jniutil, const XP_U8* bytes, jsize len,
JNIEnv* env = *jniutil->envp;
jmethodID mid
= getMethodID( env, jniutil->jjniutil, "splitFaces",
"([BZ)[Ljava/lang/String;" );
"([BZ)[[Ljava/lang/String;" );
jbyteArray jbytes = makeByteArray( env, len, (jbyte*)bytes );
strarray =

View file

@ -5,14 +5,26 @@
</style>
</head>
<body>
<b>Crosswords 4.4 beta 59 release</b>
<b>Crosswords 4.4 beta 60 release</b>
<h3>New with this release</h3>
<ul>
<li>Allow alternate spellings for tiles in the Find field in the
wordlist browser, e.g. 'a' for 'A' and 'L-L' for 'L&middot;L' (in
Catalan). The new wordlist format requires this upgrade, so I will
wait a few weeks before releasing new wordlists. </li>
<li>Upgrade built-in English wordlists.</li>
<li>Don&apos;t run SMSService if play via SSM is disabled</li>
<li>Fix bug with invites to SMS games where invitee is missing
wordlist</li>
</ul>
<h3>Next up</h3>
<ul>
<li>Improve communication with relay</li>
</ul>
<p>(The full changelog

View file

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<string name="app_version">4.4 beta 59</string>
<string name="app_version">4.4 beta 60</string>
</resources>

View file

@ -20,17 +20,23 @@
package org.eehouse.android.xw4;
import java.lang.Thread;
import java.util.Formatter;
import android.content.Context;
import android.content.Intent;
import android.content.SharedPreferences;
import android.database.Cursor;
import android.database.DatabaseUtils;
import android.os.Bundle;
import android.preference.PreferenceManager;
import android.text.TextUtils;
import android.text.format.Time;
import android.util.Log;
import android.widget.Toast;
import java.lang.Thread;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.Iterator;
import java.util.Set;
public class DbgUtils {
private static final String TAG = "XW4";
@ -113,6 +119,22 @@ public class DbgUtils {
}
}
// public static void printIntent( Intent intent )
// {
// if ( s_doLog ) {
// Bundle bundle = intent.getExtras();
// ArrayList<String> al = new ArrayList<String>();
// if ( null != bundle ) {
// Set<String> keys = bundle.keySet();
// Iterator<String> iter = keys.iterator();
// while ( iter.hasNext() ) {
// al.add( iter.next() );
// }
// }
// DbgUtils.logf( "intent extras: %s", TextUtils.join( ", ", al ) );
// }
// }
public static void dumpCursor( Cursor cursor )
{
if ( s_doLog ) {

View file

@ -203,6 +203,15 @@ public class DictUtils {
}
}
if ( null == loc ) {
File file = getDownloadsPathFor( context, name );
if ( null != file && file.exists() ) {
loc = DictLoc.DOWNLOAD;
}
}
// DbgUtils.logf( "getDictLoc(%s)=>%h(%s)", name, loc,
// ((null != loc)?loc.toString():"UNKNOWN") );
return loc;
}

View file

@ -165,6 +165,7 @@ public class PrefsActivity extends PreferenceActivity
if ( sp.getBoolean( key, true ) ) {
SMSService.checkForInvites( this );
} else {
SMSService.stopService( this );
XWPrefs.setHaveCheckedSMS( this, false );
}
} else if ( key.equals( m_downloadPath ) ) {

View file

@ -69,6 +69,7 @@ public class SMSService extends Service {
private static final int MESG_GAMEGONE = 5;
private static final int CHECK_MSGDB = 6;
private static final int ADDED_MISSING = 7;
private static final int STOP_SELF = 8;
private static final String CMD_STR = "CMD";
private static final String BUFFER = "BUFFER";
@ -101,6 +102,12 @@ public class SMSService extends Service {
}
}
public static void stopService( Context context )
{
Intent intent = getIntentTo( context, STOP_SELF );
context.startService( intent );
}
public static void handleFrom( Context context, String buffer,
String phone )
{
@ -226,10 +233,13 @@ public class SMSService extends Service {
@Override
public int onStartCommand( Intent intent, int flags, int startId )
{
int result;
int result = Service.START_NOT_STICKY;
if ( XWApp.SMSSUPPORTED && null != intent ) {
int cmd = intent.getIntExtra( CMD_STR, -1 );
switch( cmd ) {
case STOP_SELF:
stopSelf();
break;
case CHECK_MSGDB:
if ( ! XWPrefs.getHaveCheckedSMS( this ) ) {
XWPrefs.setHaveCheckedSMS( this, true );
@ -283,9 +293,13 @@ public class SMSService extends Service {
}
result = Service.START_STICKY;
} else {
result = Service.START_STICKY_COMPATIBILITY;
}
if ( Service.START_NOT_STICKY == result
|| !XWPrefs.getSMSEnabled( this ) ) {
stopSelf( startId );
}
return result;
} // onStartCommand
@ -430,6 +444,7 @@ public class SMSService extends Service {
MultiService.OWNER_SMS );
intent.putExtra( MultiService.INVITER,
Utils.phoneToContact( this, phone, true ) );
intent.putExtra( MultiService.GAMEID, gameID );
MultiService.postMissingDictNotification( this, intent,
gameID );
}

View file

@ -25,6 +25,6 @@ import android.graphics.drawable.BitmapDrawable;
public interface JNIUtils {
// Stuff I can't do in C....
String[] splitFaces( byte[] chars, boolean isUTF8 );
String[][] splitFaces( byte[] chars, boolean isUTF8 );
String getMD5SumFor( String dictName, byte[] bytes );
}

View file

@ -28,10 +28,14 @@ import java.io.InputStreamReader;
import java.security.MessageDigest;
import java.util.ArrayList;
import junit.framework.Assert;
import org.eehouse.android.xw4.*;
public class JNIUtilsImpl implements JNIUtils {
private static final char SYNONYM_DELIM = ' ';
private static JNIUtilsImpl s_impl = null;
private Context m_context;
@ -51,10 +55,15 @@ public class JNIUtilsImpl implements JNIUtils {
* convert into individual strings. The 0 is the problem: it's
* not valid utf8. So turn it and the other nums into strings and
* catch them on the other side.
*
* Changes for "synonyms" (A and a being the same tile): return an
* array of Strings for each face. Each face is
* <letter>[<delim><letter]*, so for each loop until the delim
* isn't found.
*/
public String[] splitFaces( byte[] chars, boolean isUTF8 )
public String[][] splitFaces( byte[] chars, boolean isUTF8 )
{
ArrayList<String> al = new ArrayList<String>();
ArrayList<String[]> faces = new ArrayList<String[]>();
ByteArrayInputStream bais = new ByteArrayInputStream( chars );
InputStreamReader isr;
try {
@ -66,6 +75,9 @@ public class JNIUtilsImpl implements JNIUtils {
int[] codePoints = new int[1];
// "A aB bC c"
boolean lastWasDelim = false;
ArrayList<String> face = null;
for ( ; ; ) {
int chr = -1;
try {
@ -74,7 +86,12 @@ public class JNIUtilsImpl implements JNIUtils {
DbgUtils.logf( ioe.toString() );
}
if ( -1 == chr ) {
addFace( faces, face );
break;
} else if ( SYNONYM_DELIM == chr ) {
Assert.assertNotNull( face );
lastWasDelim = true;
continue;
} else {
String letter;
if ( chr < 32 ) {
@ -83,14 +100,35 @@ public class JNIUtilsImpl implements JNIUtils {
codePoints[0] = chr;
letter = new String( codePoints, 0, 1 );
}
al.add( letter );
// Ok, we have a letter. Is it part of an existing
// one or the start of a new? If the latter, insert
// what we have before starting over.
if ( null == face ) { // start of a new, clearly
// do nothing
} else {
Assert.assertTrue( 0 < face.size() );
if ( !lastWasDelim ) {
addFace( faces, face );
face = null;
}
}
lastWasDelim = false;
if ( null == face ) {
face = new ArrayList<String>();
}
face.add( letter );
}
}
String[] result = al.toArray( new String[al.size()] );
String[][] result = faces.toArray( new String[faces.size()][] );
return result;
}
private void addFace( ArrayList<String[]> faces, ArrayList<String> face )
{
faces.add( face.toArray( new String[face.size()] ) );
}
public String getMD5SumFor( String dictName, byte[] bytes )
{
String result = null;

View file

@ -1,3 +1,27 @@
#!/bin/sh
adb uninstall org.eehouse.android.xw4
set -e -u
INDEX=0
usage() {
echo "usage: $0 [--help] [-n <index>]"
echo "uninstall crosswords from the <index>th device"
exit 0
}
while [ $# -ge 1 ]; do
case $1 in
-n)
shift
INDEX=$1
;;
*) usage
;;
esac
shift
done
SERIAL="$(adb devices | grep 'device$' | sed -n "$((1+INDEX)) p" | awk '{print $1}')"
adb -s $SERIAL uninstall org.eehouse.android.xw4

View file

@ -83,4 +83,6 @@ typedef struct Xloc_specialEntry {
short hasSmall;
} Xloc_specialEntry;
#define SYNONYM_DELIM ' '
#endif /* _DAWG_H_ */

View file

@ -206,21 +206,29 @@ findStartsWithChars( DictIter* iter, const XP_UCHAR* chars, XP_U16 charsOffset,
} else {
const DictionaryCtxt* dict = iter->dict;
XP_U16 nodeSize = dict->nodeSize;
for ( ; ; ) {
for ( ; ; ) { /* for all the tiles */
Tile tile = EDGETILE( dict, edge );
const XP_UCHAR* facep = dict_getTileString( dict, tile );
XP_U16 faceLen = XP_STRLEN( facep );
if ( faceLen > charsLen ) {
faceLen = charsLen;
}
if ( 0 == XP_STRNCMP( facep, &chars[charsOffset], faceLen ) ) {
XP_S16 newOffset = findStartsWithChars( iter, chars,
charsOffset + faceLen,
dict_follow( dict, edge ),
nTilesUsed + 1 );
if ( result < newOffset ) {
iter->edges[nTilesUsed] = edge;
result = newOffset;
const XP_UCHAR* facep = NULL;
for ( ; ; ) { /* for each string that tile can be */
facep = dict_getNextTileString( dict, tile, facep );
if ( NULL == facep ) {
break;
}
XP_U16 faceLen = XP_STRLEN( facep );
if ( faceLen > charsLen ) {
faceLen = charsLen;
}
if ( 0 == XP_STRNCMP( facep, &chars[charsOffset], faceLen ) ) {
XP_S16 newOffset =
findStartsWithChars( iter, chars,
charsOffset + faceLen,
dict_follow( dict, edge ),
nTilesUsed + 1 );
if ( result < newOffset ) {
iter->edges[nTilesUsed] = edge;
result = newOffset;
}
break;
}
}
if ( IS_LAST_EDGE( dict, edge ) ) {

View file

@ -100,6 +100,39 @@ dict_getTileString( const DictionaryCtxt* dict, Tile tile )
return facep;
}
const XP_UCHAR*
dict_getNextTileString( const DictionaryCtxt* dict, Tile tile,
const XP_UCHAR* cur )
{
const XP_UCHAR* result = NULL;
if ( NULL == cur ) {
result = dict_getTileString( dict, tile );
} else {
cur += XP_STRLEN( cur ) + 1;
XP_Bool isSpecial = dict_faceIsBitmap( dict, tile );
if ( isSpecial ) {
const XP_UCHAR* facep = dict_getTileStringRaw( dict, tile );
if ( cur < dict->charEnds[(XP_U16)*facep] ) {
result = cur;
}
} else {
/* use cur only if it is is not now off the end or pointing to to the
next tile */
if ( ++tile == dict->nFaces ) {
if ( cur < dict->facesEnd ) {
result = cur;
}
} else {
const XP_UCHAR* nxt = dict_getTileStringRaw( dict, tile );
if ( nxt != cur ) {
result = cur;
}
}
}
}
return result;
}
XP_U16
dict_numTiles( const DictionaryCtxt* dict, Tile tile )
{

View file

@ -36,6 +36,7 @@ extern "C" {
#define IS_SPECIAL(face) ((XP_U16)(face) < 0x0020)
#define DICT_HEADER_MASK 0x08
#define DICT_SYNONYMS_MASK 0x10
typedef XP_U8 XP_LangCode;
@ -72,14 +73,16 @@ struct DictionaryCtxt {
necessarily the entry point for search!! */
XP_UCHAR* name;
XP_UCHAR* langName;
XP_UCHAR* faces;
XP_UCHAR* faces; /* storage for faces */
XP_UCHAR* facesEnd;
XP_UCHAR* desc;
XP_UCHAR* md5Sum;
const XP_UCHAR** facePtrs;
const XP_UCHAR** facePtrs; /* elems point into faces, above */
XP_U8* countsAndValues;
SpecialBitmaps* bitmaps;
XP_UCHAR** chars;
XP_UCHAR** charEnds;
XP_U32 nWords;
XP_LangCode langCode;
@ -150,6 +153,8 @@ XP_U16 dict_numTileFaces( const DictionaryCtxt* ctxt );
XP_U16 dict_tilesToString( const DictionaryCtxt* ctxt, const Tile* tiles,
XP_U16 nTiles, XP_UCHAR* buf, XP_U16 bufSize );
const XP_UCHAR* dict_getTileString( const DictionaryCtxt* ctxt, Tile tile );
const XP_UCHAR* dict_getNextTileString( const DictionaryCtxt* ctxt, Tile tile,
const XP_UCHAR* cur );
const XP_UCHAR* dict_getName( const DictionaryCtxt* ctxt );
const XP_UCHAR* dict_getLangName(const DictionaryCtxt* ctxt );

View file

@ -35,10 +35,11 @@ endif
endif
endif
LANG_SPECIAL_INFO = \
"L·L" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
"NY" $(PBITMS)/large_ny.pbitm $(PBITMS)/small_ny.pbitm \
"QU" $(PBITMS)/large_qu.pbitm $(PBITMS)/small_qu.pbitm \
"L·L L·l l·L l·l L-L L-l l-L l-l ĿL ŀl" $(PBITMS)/large_ll.pbitm $(PBITMS)/small_ll.pbitm \
"NY Ny nY ny" $(PBITMS)/large_ny.pbitm $(PBITMS)/small_ny.pbitm \
"QU Qu qU qu" $(PBITMS)/large_qu.pbitm $(PBITMS)/small_qu.pbitm \
include ../Makefile.langcommon
@ -58,4 +59,4 @@ $(XWLANG)Main.dict.gz: $(SOURCEDICT) $(MAKEFILE)
| gzip - > $@
clean: clean_common
rm -rf *.saved
rm -rf *.saved

View file

@ -63,32 +63,32 @@ XLOC_HEADER:0x8C00
<BEGIN_TILES>
2 0 {"_"}
12 1 'A'
2 3 'B'
3 2 'C'
1 10 'Ç'
3 2 'D'
13 1 'E'
1 4 'F'
2 3 'G'
1 8 'H'
8 1 'I'
1 8 'J'
4 1 'L'
1 10 {"L·L"}
3 2 'M'
6 1 'N'
1 10 {"NY"}
5 1 'O'
2 3 'P'
1 8 {"QU"}
8 1 'R'
8 1 'S'
5 1 'T'
4 1 'U'
1 4 'V'
1 10 'X'
1 8 'Z'
12 1 'A|a'
2 3 'B|b'
3 2 'C|c'
1 10 'Ç'
3 2 'D|d'
13 1 'E|e'
1 4 'F|f'
2 3 'G|g'
1 8 'H|h'
8 1 'I|i'
1 8 'J|j'
4 1 'L|l'
1 10 {"L·L|L-L|ĿL|l·l|l-l|ŀl"}
3 2 'M|m'
6 1 'N|n'
1 10 {"ny|ny"}
5 1 'O|o'
2 3 'P|p'
1 8 {"QU|qu"}
8 1 'R|r'
8 1 'S|s'
5 1 'T|t'
4 1 'U|u'
1 4 'V|v'
1 10 'X|x'
1 8 'Z|z'
<END_TILES>
#
# NOTES:

View file

@ -40,32 +40,32 @@ XLOC_HEADER:0x8100
<BEGIN_TILES>
2 0 {"_"}
9 1 'A'
2 3 'B'
2 3 'C'
4 2 'D'
12 1 'E'
2 4 'F'
3 2 'G'
2 4 'H'
9 1 'I'
1 8 'J'
1 5 'K'
4 1 'L'
2 3 'M'
6 1 'N'
8 1 'O'
2 3 'P'
1 10 'Q'
6 1 'R'
4 1 'S'
6 1 'T'
4 1 'U'
2 4 'V'
2 4 'W'
1 8 'X'
2 4 'Y'
1 10 'Z'
9 1 'A|a'
2 3 'B|b'
2 3 'C|c'
4 2 'D|d'
12 1 'E|e'
2 4 'F|f'
3 2 'G|g'
2 4 'H|h'
9 1 'I|i'
1 8 'J|j'
1 5 'K|k'
4 1 'L|l'
2 3 'M|m'
6 1 'N|n'
8 1 'O|o'
2 3 'P|p'
1 10 'Q|q'
6 1 'R|r'
4 1 'S|s'
6 1 'T|t'
4 1 'U|u'
2 4 'V|v'
2 4 'W|w'
1 8 'X|x'
2 4 'Y|y'
1 10 'Z|z'
<END_TILES>
# should ignore all after the <END> above

View file

@ -220,12 +220,14 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_ne
# 1: old-style DAWG.
# 2: new-style DAWG, three bytes per node.
# 3: new-style DAWG, four bytes per node
# 4: had dict-header
# 5: has new (2013) synonyms feature, e.g. 'a' for 'A'
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
ifdef NEWDAWG
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x000C)" > $@; echo "flags=4"; \
then perl -e "print pack(\"n\",0x001C)" > $@; echo "flags=4"; \
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x000D)" > $@; echo "flags=5"; \
then perl -e "print pack(\"n\",0x001D)" > $@; echo "flags=5"; \
elif true; \
then echo "Unexpected node size"; exit 1; \
fi

View file

@ -109,16 +109,25 @@ sub readXWDFaces($$$) {
}
} else {
binmode( $fh, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
sysread( $fh, $buf, $nChars );
length($buf) == $nChars or die "didn't read expected number of bytes\n";
sysread( $fh, $buf, $nBytes );
length($buf) == $nBytes or die "didn't read expected number of bytes\n";
binmode( $fh ) or die "binmode failed\n";
print STDERR "string now: $buf\n";
my @faces;
for ( my $ii = 0; $ii < $nChars; ++$ii ) {
my $chr = substr( $buf, $ii, 1 );
my $index = 0;
for ( my $nFound = 0; $nFound < $nChars; ) {
my $chr = substr( $buf, $index++, 1 );
if ( $chr eq ' ' ) {
# For testing, this next line uses last rather than
# first alternate
# $faces[$#faces] = substr( $buf, $index, 1 );
++$index;
next;
}
print STDERR "pushing $chr \n";
push( @faces, $chr );
++$nFound;
}
printf STDERR "at 0x%x after reading faces\n", systell($fh);

View file

@ -909,8 +909,9 @@ writeOutStartNode( const char* startNodeOut, int firstRootChildOffset )
fclose( nodeout );
} // writeOutStartNode
// build the hash for translating. I'm using a hash assuming it'll be
// fast. Key is the letter; value is the 0..31 value to be output.
// build the hash for translating. I'm using a hash assuming it'll be fast.
// Key is the letter; value is the 0..31 value to be output. Note that input
// may be in the format "A a" rather than just "A"
static void
makeTableHash( void )
{
@ -923,19 +924,24 @@ makeTableHash( void )
// Fill the 0th space since references are one-based
gRevMap.push_back(0);
for ( ii = 0; ; ++ii ) {
for ( ii = 0; ; ) {
wchar_t ch = getWideChar( TABLEFILE );
if ( EOF == ch ) {
break;
}
if ( ' ' == ch ) {
// discard a synonym
(void)getWideChar( TABLEFILE );
continue;
}
fprintf( stderr, "adding %lc/%x\n", ch, ch );
gRevMap.push_back(ch);
if ( ch == 0 ) { // blank
gBlankIndex = ii;
// we want to increment i when blank seen since it is a
// tile value
gBlankIndex = ii++;
continue;
}
// die "$0: $gTableFile too large\n"
@ -946,7 +952,7 @@ makeTableHash( void )
// Add 1 to i so no tile-strings contain 0 and we can treat as
// null-terminated. The 1 is subtracted again in
// outputNode().
gTableHash[ch] = ii + 1;
gTableHash[ch] = ++ii;
}
fclose( TABLEFILE );

View file

@ -100,6 +100,16 @@ sub GetValue($$) {
return ${$hashR}{$name};
}
sub printLetters($$) {
my ( $str, $fhr ) = @_;
my @letters = split( /\|/, $str );
$str = join( " ", @letters );
for ( my $key = 0; $key < length($str); ++$key ) {
my $chr = substr( $str, $key, 1 );
print $fhr pack( "U", ord($chr) );
}
}
sub WriteMapFile($$$) {
my ( $hashR, $unicode, $fhr ) = @_;
@ -109,9 +119,8 @@ sub WriteMapFile($$$) {
my $tileR = GetNthTile( $hashR, $i );
my $str = ${$tileR}[2];
if ( $str =~ /\'(.)\'/ ) {
print $fhr pack( "U", ord($1) );
# printf STDERR "ord: %x ($1)\n", ord($1);
if ( $str =~ /\'(.(\|.)*)\'/ ) {
printLetters( $1, $fhr );
} elsif ( $str =~ /\"(.+)\"/ ) {
print $fhr pack( "c", $specialCount++ );
} elsif ( $str =~ /(\d+)/ ) {

View file

@ -146,6 +146,7 @@ skipBitmaps( LinuxDictionaryCtxt* ctxt, const XP_U8** ptrp )
XP_U16 nSpecials;
XP_UCHAR* text;
XP_UCHAR** texts;
XP_UCHAR** textEnds;
SpecialBitmaps* bitmaps;
Tile tile;
const XP_U8* ptr = *ptrp;
@ -154,6 +155,8 @@ skipBitmaps( LinuxDictionaryCtxt* ctxt, const XP_U8** ptrp )
texts = (XP_UCHAR**)XP_MALLOC( ctxt->super.mpool,
nSpecials * sizeof(*texts) );
textEnds = (XP_UCHAR**)XP_MALLOC( ctxt->super.mpool,
nSpecials * sizeof(*textEnds) );
bitmaps = (SpecialBitmaps*)XP_MALLOC( ctxt->super.mpool,
nSpecials * sizeof(*bitmaps) );
XP_MEMSET( bitmaps, 0, nSpecials * sizeof(*bitmaps) );
@ -167,23 +170,36 @@ skipBitmaps( LinuxDictionaryCtxt* ctxt, const XP_U8** ptrp )
XP_ASSERT( *facep < nSpecials );
/* get the string */
txtlen = *ptr++;
text = (XP_UCHAR*)XP_MALLOC(ctxt->super.mpool, txtlen+1);
memcpy( text, ptr, txtlen );
ptr += txtlen;
txtlen = *ptr++;
text = (XP_UCHAR*)XP_MALLOC(ctxt->super.mpool, txtlen+1);
memcpy( text, ptr, txtlen );
ptr += txtlen;
text[txtlen] = '\0';
texts[(XP_U16)*facep] = text;
text[txtlen] = '\0';
texts[(XP_U16)*facep] = text;
textEnds[(XP_U16)*facep] = text + txtlen + 1;
/* Now replace the delimiter char with \0. It must be one byte in
length and of course equal to the delimiter */
XP_ASSERT( 0 == (SYNONYM_DELIM & 0x80) );
while ( '\0' != *text ) {
XP_UCHAR* cp = g_utf8_offset_to_pointer( text, 1 );
if ( 1 == (cp - text) && *text == SYNONYM_DELIM ) {
*text = '\0';
}
text = cp;
}
XP_DEBUGF( "skipping bitmaps for " XP_S, texts[asIndex] );
XP_DEBUGF( "skipping bitmaps for " XP_S, texts[asIndex] );
bitmaps[asIndex].largeBM = skipBitmap( ctxt, &ptr );
bitmaps[asIndex].smallBM = skipBitmap( ctxt, &ptr );
bitmaps[asIndex].largeBM = skipBitmap( ctxt, &ptr );
bitmaps[asIndex].smallBM = skipBitmap( ctxt, &ptr );
}
}
*ptrp = ptr;
ctxt->super.chars = texts;
ctxt->super.charEnds = textEnds;
ctxt->super.bitmaps = bitmaps;
} /* skipBitmaps */
@ -201,11 +217,18 @@ dict_splitFaces( DictionaryCtxt* dict, const XP_U8* utf8,
for ( ii = 0; ii < nFaces; ++ii ) {
ptrs[ii] = next;
if ( isUTF8 ) {
gchar* cp = g_utf8_offset_to_pointer( bytes, 1 );
XP_U16 len = cp - bytes;
XP_MEMCPY( next, bytes, len );
next += len;
bytes += len;
for ( ; ; ) {
gchar* cp = g_utf8_offset_to_pointer( bytes, 1 );
XP_U16 len = cp - bytes;
XP_MEMCPY( next, bytes, len );
next += len;
bytes += len;
if ( SYNONYM_DELIM != bytes[0] ) {
break;
}
++bytes; /* skip delimiter */
*next++ = '\0';
}
} else {
XP_ASSERT( 0 == *bytes );
++bytes; /* skip empty */
@ -216,6 +239,7 @@ dict_splitFaces( DictionaryCtxt* dict, const XP_U8* utf8,
}
XP_ASSERT( !dict->faces );
dict->faces = faces;
dict->facesEnd = faces + nFaces + nBytes;
XP_ASSERT( !dict->facePtrs );
dict->facePtrs = ptrs;
} /* dict_splitFaces */
@ -274,6 +298,9 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const LaunchParams* params,
flags &= ~DICT_HEADER_MASK;
XP_DEBUGF( "has header!" );
}
flags &= ~DICT_SYNONYMS_MASK;
if ( flags == 0x0001 ) {
dctx->super.nodeSize = 3;
charSize = 1;
@ -454,9 +481,8 @@ freeSpecials( LinuxDictionaryCtxt* ctxt )
if ( !!ctxt->super.bitmaps ) {
XP_FREE( ctxt->super.mpool, ctxt->super.bitmaps );
}
if ( !!ctxt->super.chars ) {
XP_FREE( ctxt->super.mpool, ctxt->super.chars );
}
XP_FREEP( ctxt->super.mpool, &ctxt->super.chars );
XP_FREEP( ctxt->super.mpool, &ctxt->super.charEnds );
} /* freeSpecials */
static void

View file

@ -1377,7 +1377,8 @@ walk_dict_test( const LaunchParams* XP_UNUSED_DBG(params),
XP_UCHAR bufPrev[32] = {0};
dict_wordToString( &iter, buf, VSIZE(buf) );
XP_ASSERT( 0 == strncmp( buf, prefix, lenMatched ) );
/* This doesn't work with synonyms like "L-L" for "L·L" */
// XP_ASSERT( 0 == strncasecmp( buf, prefix, lenMatched ) );
DictPosition pos = dict_getPosition( &iter );
XP_ASSERT( 0 == strcmp( buf, words[pos] ) );