pass boolean isUTF8 in from jni, and if false create InputStreamReader

to decode ISO8859_1 rather than utf-8.  Fixes problem with Ñ in old
Spanish dicts.  Should fix all pre-utf8-format dicts, actually.
This commit is contained in:
eehouse 2010-04-20 02:38:42 +00:00
parent 25ffcbec88
commit fbe46c1284
5 changed files with 25 additions and 12 deletions

View file

@ -185,7 +185,7 @@ andLoadSpecialData( AndDictionaryCtxt* ctxt, XP_U8** ptrp )
*/
static void
splitFaces_via_java( JNIEnv* env, AndDictionaryCtxt* ctxt, const XP_U8* ptr,
int nFaceBytes, int nFaces )
int nFaceBytes, int nFaces, XP_Bool isUTF8 )
{
XP_UCHAR facesBuf[nFaces*4]; /* seems a reasonable upper bound... */
int indx = 0;
@ -193,7 +193,8 @@ splitFaces_via_java( JNIEnv* env, AndDictionaryCtxt* ctxt, const XP_U8* ptr,
int nBytes;
int ii;
jobject jstrarr = and_util_splitFaces( ctxt->jniutil, ptr, nFaceBytes );
jobject jstrarr = and_util_splitFaces( ctxt->jniutil, ptr, nFaceBytes,
isUTF8 );
XP_ASSERT( (*env)->GetArrayLength( env, jstrarr ) == nFaces );
for ( ii = 0; ii < nFaces; ++ii ) {
@ -281,7 +282,8 @@ parseDict( AndDictionaryCtxt* ctxt, XP_U8* ptr, XP_U32 dictLength )
ctxt->super.isUTF8 = isUTF8;
if ( isUTF8 ) {
splitFaces_via_java( ctxt->env, ctxt, ptr, numFaceBytes, nFaces );
splitFaces_via_java( ctxt->env, ctxt, ptr, numFaceBytes, nFaces,
XP_TRUE );
ptr += numFaceBytes;
} else {
XP_U8 tmp[nFaces*4]; /* should be enough... */
@ -296,7 +298,9 @@ parseDict( AndDictionaryCtxt* ctxt, XP_U8* ptr, XP_U32 dictLength )
tmp[nBytes] = ch;
nBytes += 1;
}
dict_splitFaces( &ctxt->super, tmp, nBytes, nFaces );
XP_ASSERT( nFaces == nBytes );
splitFaces_via_java( ctxt->env, ctxt, tmp, nBytes, nFaces,
XP_FALSE );
}
ctxt->super.is_4_byte = (ctxt->super.nodeSize == 4);

View file

@ -73,13 +73,14 @@ and_util_makeJBitmap( JNIUtilCtxt* jniutil, int nCols, int nRows,
}
jobject
and_util_splitFaces( JNIUtilCtxt* jniutil, const XP_U8* bytes, jsize len )
and_util_splitFaces( JNIUtilCtxt* jniutil, const XP_U8* bytes, jsize len,
XP_Bool isUTF8 )
{
jobject strarray = NULL;
JNIEnv* env = *jniutil->envp;
jmethodID mid
= getMethodID( env, jniutil->jjniutil, "splitFaces",
"([B)[Ljava/lang/String;" );
"([BZ)[Ljava/lang/String;" );
jbyteArray jbytes = (*env)->NewByteArray( env, len );
@ -87,7 +88,8 @@ and_util_splitFaces( JNIUtilCtxt* jniutil, const XP_U8* bytes, jsize len )
XP_MEMCPY( jp, bytes, len );
(*env)->ReleaseByteArrayElements( env, jbytes, jp, 0 );
strarray = (*env)->CallObjectMethod( env, jniutil->jjniutil, mid, jbytes );
strarray = (*env)->CallObjectMethod( env, jniutil->jjniutil, mid, jbytes,
isUTF8 );
(*env)->DeleteLocalRef( env, jbytes );
return strarray;
}

View file

@ -33,6 +33,7 @@ void destroyJNIUtil( JNIUtilCtxt** jniu );
jobject and_util_makeJBitmap( JNIUtilCtxt* jniu, int nCols, int nRows,
const jboolean* colors );
jobject and_util_splitFaces( JNIUtilCtxt* jniu, const XP_U8* bytes, int len );
jobject and_util_splitFaces( JNIUtilCtxt* jniu, const XP_U8* bytes, int len,
XP_Bool isUTF8 );
#endif

View file

@ -26,5 +26,5 @@ public interface JNIUtils {
// Stuff I can't do in C....
BitmapDrawable makeBitmap( int width, int height, boolean[] colors );
String[] splitFaces( byte[] chars );
String[] splitFaces( byte[] chars, boolean isUTF8 );
}

View file

@ -66,12 +66,18 @@ public class JNIUtilsImpl implements JNIUtils {
* not valid utf8. So turn it and the other nums into strings and
* catch them on the other side.
*/
public String[] splitFaces( byte[] chars )
public String[] splitFaces( byte[] chars, boolean isUTF8 )
{
ArrayList<String> al = new ArrayList<String>();
ByteArrayInputStream bais = new ByteArrayInputStream( chars );
InputStreamReader isr = new InputStreamReader( bais );
InputStreamReader isr;
try {
isr = new InputStreamReader( bais, isUTF8? "UTF8" : "ISO8859_1" );
} catch( java.io.UnsupportedEncodingException uee ) {
Utils.logf( "splitFaces: %s", uee.toString() );
isr = new InputStreamReader( bais );
}
int[] codePoints = new int[1];
for ( ; ; ) {