Merge branch 'android_branch' into android_dictname

This commit is contained in:
Eric House 2012-09-08 13:20:59 -07:00
commit 9a72f252ed
16 changed files with 161 additions and 51 deletions

View file

@ -265,6 +265,18 @@ splitFaces_via_java( JNIEnv* env, AndDictionaryCtxt* ctxt, const XP_U8* ptr,
ctxt->super.facePtrs = ptrs; ctxt->super.facePtrs = ptrs;
} /* splitFaces_via_java */ } /* splitFaces_via_java */
static XP_UCHAR*
getNullTermParam( AndDictionaryCtxt* dctx, const XP_U8** ptr,
XP_U16* headerLen )
{
XP_U16 len = 1 + XP_STRLEN( (XP_UCHAR*)*ptr );
XP_UCHAR* result = XP_MALLOC( dctx->super.mpool, len );
XP_MEMCPY( result, *ptr, len );
*ptr += len;
*headerLen -= len;
return result;
}
static XP_Bool static XP_Bool
parseDict( AndDictionaryCtxt* ctxt, XP_U8 const* ptr, XP_U32 dictLength, parseDict( AndDictionaryCtxt* ctxt, XP_U8 const* ptr, XP_U32 dictLength,
XP_U32* numEdges ) XP_U32* numEdges )
@ -294,12 +306,10 @@ parseDict( AndDictionaryCtxt* ctxt, XP_U8 const* ptr, XP_U32 dictLength,
} }
if ( 1 <= headerLen ) { /* have description? */ if ( 1 <= headerLen ) { /* have description? */
XP_U16 len = 1 + XP_STRLEN( (XP_UCHAR*)ptr ); ctxt->super.desc = getNullTermParam( ctxt, &ptr, &headerLen );
ctxt->super.desc = }
(XP_UCHAR*)XP_MALLOC(ctxt->super.mpool, len); if ( 1 <= headerLen ) { /* have md5sum? */
XP_MEMCPY( ctxt->super.desc, ptr, len ); ctxt->super.md5Sum = getNullTermParam( ctxt, &ptr, &headerLen );
ptr += len;
headerLen -= len;
} }
CHECK_PTR( ptr, headerLen, end ); CHECK_PTR( ptr, headerLen, end );
@ -440,6 +450,7 @@ and_dictionary_destroy( DictionaryCtxt* dict )
XP_FREE( ctxt->super.mpool, ctxt->super.bitmaps ); XP_FREE( ctxt->super.mpool, ctxt->super.bitmaps );
} }
XP_FREEP( ctxt->super.mpool, &ctxt->super.md5Sum );
XP_FREEP( ctxt->super.mpool, &ctxt->super.desc ); XP_FREEP( ctxt->super.mpool, &ctxt->super.desc );
XP_FREEP( ctxt->super.mpool, &ctxt->super.faces ); XP_FREEP( ctxt->super.mpool, &ctxt->super.faces );
XP_FREEP( ctxt->super.mpool, &ctxt->super.facePtrs ); XP_FREEP( ctxt->super.mpool, &ctxt->super.facePtrs );

View file

@ -299,6 +299,7 @@ Java_org_eehouse_android_xw4_jni_XwJNI_dict_1getInfo
if ( NULL != jinfo ) { if ( NULL != jinfo ) {
setInt( env, jinfo, "langCode", dict_getLangCode( dict ) ); setInt( env, jinfo, "langCode", dict_getLangCode( dict ) );
setInt( env, jinfo, "wordCount", dict_getWordCount( dict ) ); setInt( env, jinfo, "wordCount", dict_getWordCount( dict ) );
setString( env, jinfo, "md5Sum", dict_getMd5Sum( dict ) );
} }
dict_destroy( dict ); dict_destroy( dict );
result = true; result = true;

View file

@ -197,6 +197,11 @@ public class DictLangCache {
return getInfo( context, dal ).langCode; return getInfo( context, dal ).langCode;
} }
public static String getDictMD5Sum( Context context, String dict )
{
return getInfo( context, dict ).md5Sum;
}
public static int getDictLangCode( Context context, String dict ) public static int getDictLangCode( Context context, String dict )
{ {
return getInfo( context, dict ).langCode; return getInfo( context, dict ).langCode;

View file

@ -21,7 +21,11 @@
package org.eehouse.android.xw4.jni; package org.eehouse.android.xw4.jni;
public class DictInfo { public class DictInfo {
// set in java code
public String name;
// set in jni code
public int langCode; public int langCode;
public int wordCount; public int wordCount;
public String name; public String md5Sum;
}; };

View file

@ -505,6 +505,12 @@ dict_getDesc( const DictionaryCtxt* dict )
return dict->desc; return dict->desc;
} }
const XP_UCHAR*
dict_getMd5Sum( const DictionaryCtxt* dict )
{
return dict->md5Sum;
}
#ifdef STUBBED_DICT #ifdef STUBBED_DICT
#define BLANK_FACE '\0' #define BLANK_FACE '\0'

View file

@ -74,6 +74,7 @@ struct DictionaryCtxt {
XP_UCHAR* langName; XP_UCHAR* langName;
XP_UCHAR* faces; XP_UCHAR* faces;
XP_UCHAR* desc; XP_UCHAR* desc;
XP_UCHAR* md5Sum;
const XP_UCHAR** facePtrs; const XP_UCHAR** facePtrs;
XP_U8* countsAndValues; XP_U8* countsAndValues;
@ -165,6 +166,7 @@ XP_LangCode dict_getLangCode( const DictionaryCtxt* dict );
XP_U32 dict_getWordCount( const DictionaryCtxt* dict ); XP_U32 dict_getWordCount( const DictionaryCtxt* dict );
const XP_UCHAR* dict_getDesc( const DictionaryCtxt* dict ); const XP_UCHAR* dict_getDesc( const DictionaryCtxt* dict );
const XP_UCHAR* dict_getMd5Sum( const DictionaryCtxt* dict );
void dict_writeToStream( const DictionaryCtxt* ctxt, XWStreamCtxt* stream ); void dict_writeToStream( const DictionaryCtxt* ctxt, XWStreamCtxt* stream );
void dict_loadFromStream( DictionaryCtxt* dict, XWStreamCtxt* stream ); void dict_loadFromStream( DictionaryCtxt* dict, XWStreamCtxt* stream );

View file

@ -18,7 +18,7 @@
XWLANG=BasEnglish XWLANG=BasEnglish
LANGCODE=en_US LANGCODE=en_US
DICT2DAWGARGS = -r -nosort DICT2DAWGARGS = -r -nosort
DICTNOTE = "Built on $(shell date)" DICTNOTE = "Wordlist created in the 1930s for language learners"
TARGET_TYPE ?= WINCE TARGET_TYPE ?= WINCE

View file

@ -18,6 +18,7 @@
XWLANG=CollegeEng XWLANG=CollegeEng
LANGCODE=en_US LANGCODE=en_US
TARGET_TYPE=WINCE TARGET_TYPE=WINCE
DICTNOTE = "From the old PalmOS app Niggle"
include ../Makefile.langcommon include ../Makefile.langcommon

View file

@ -18,6 +18,7 @@
XWLANG = SOWPODS XWLANG = SOWPODS
LANGCODE = en_US LANGCODE = en_US
TARGET_TYPE = WINCE TARGET_TYPE = WINCE
DICTNOTE = "Used in Scrabble™ tournaments especially outside the US"
include ../Makefile.langcommon include ../Makefile.langcommon

View file

@ -280,7 +280,12 @@ $(XWLANG)%_note.bin:
echo -n $(DICTNOTE) > $@ echo -n $(DICTNOTE) > $@
perl -e "print pack(\"c\",0)" >> $@ perl -e "print pack(\"c\",0)" >> $@
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin $(XWLANG)%_md5sum.bin:
cat table.bin values.bin frankspecials.bin $(XWLANG)StartLoc.bin \
dawg$(XWLANG)$*_*.bin | md5sum | awk '{print $$1}' | tr -d '\n' > $@
perl -e "print pack(\"c\",0)" >> $@
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin $(XWLANG)%_md5sum.bin
SIZ=0; \ SIZ=0; \
for FILE in $+; do \ for FILE in $+; do \
SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \ SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \

View file

@ -26,6 +26,10 @@ use Encode 'from_to';
use Encode; use Encode;
my $gInFile; my $gInFile;
my $gSumOnly = 0;
my $gSum;
my $gDescOnly = 0;
my $gDesc;
my $gDoRaw = 0; my $gDoRaw = 0;
my $gDoJSON = 0; my $gDoJSON = 0;
my $gFileType; my $gFileType;
@ -35,9 +39,11 @@ use Fcntl 'SEEK_CUR';
sub systell { sysseek($_[0], 0, SEEK_CUR) } sub systell { sysseek($_[0], 0, SEEK_CUR) }
sub usage() { sub usage() {
print STDERR "USAGE: $0 " print STDERR "USAGE: $0"
. "[-raw | -json] " . " [-raw | -json] "
. "-dict <xwdORpdb>" . " [-get-sum]"
. " [-get-desc]"
. " -dict <xwdORpdb>"
. "\n" . "\n"
. "\t(Takes a .pdb or .xwd and prints its words to stdout)\n"; . "\t(Takes a .pdb or .xwd and prints its words to stdout)\n";
exit 1; exit 1;
@ -52,6 +58,10 @@ sub parseARGV() {
$gDoJSON = 1; $gDoJSON = 1;
} elsif ( $parm eq "-dict" ) { } elsif ( $parm eq "-dict" ) {
$gInFile = shift(@ARGV); $gInFile = shift(@ARGV);
} elsif ( $parm eq "-get-sum" ) {
$gSumOnly = 1;
} elsif ( $parm eq "-get-desc" ) {
$gDescOnly = 1;
} else { } else {
usage(); usage();
} }
@ -156,6 +166,22 @@ sub readNodesToEnd($) {
return @nodes; return @nodes;
} # readNodesToEnd } # readNodesToEnd
sub printHeader($$) {
my ( $buf, $len ) = @_;
printf STDERR "skipped %d bytes of header:\n", $len + 2;
my $asStr = Encode::decode_utf8($buf);
my @strs = split( '\0', $asStr );
# There are variable numbers of strings showing up in this thing.
# Need to figure out the right way to unpack the thing.
$gDesc = $strs[1];
$gSum = $strs[2];
foreach my $str (@strs) {
if ( 0 < length($str) ) {
print STDERR 'Got: ', $str, "\n";
}
}
}
sub nodeSizeFromFlags($$) { sub nodeSizeFromFlags($$) {
my ( $fh, $flags ) = @_; my ( $fh, $flags ) = @_;
@ -167,7 +193,7 @@ sub nodeSizeFromFlags($$) {
2 == sysread( $fh, $buf, 2 ) || die "couldn't read length of header"; 2 == sysread( $fh, $buf, 2 ) || die "couldn't read length of header";
my $len = unpack( "n", $buf ); my $len = unpack( "n", $buf );
$len == sysread( $fh, $buf, $len ) || die "couldn't read header bytes"; $len == sysread( $fh, $buf, $len ) || die "couldn't read header bytes";
printf STDERR "skipped %d bytes of header\n", $len + 2; printHeader( $buf, $len );
} }
if ( $flags == 2 || $ flags == 4 ) { if ( $flags == 2 || $ flags == 4 ) {
@ -192,6 +218,7 @@ sub mergeSpecials($$) {
sub prepXWD($$$$) { sub prepXWD($$$$) {
my ( $fh, $facRef, $nodesRef, $startRef ) = @_; my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
my $done = 1;
printf STDERR "at 0x%x at start\n", systell($fh); printf STDERR "at 0x%x at start\n", systell($fh);
my $buf; my $buf;
@ -200,33 +227,44 @@ sub prepXWD($$$$) {
$gNodeSize = nodeSizeFromFlags( $fh, $flags ); $gNodeSize = nodeSizeFromFlags( $fh, $flags );
my $nSpecials; if ( $gSumOnly ) {
my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials ); print STDOUT $gSum, "\n";
} elsif( $gDescOnly ) {
print STDOUT $gDesc, "\n";
} else {
$done = 0;
}
printf STDERR "at 0x%x before header read\n", systell($fh); if ( !$done ) {
# skip xloc header my $nSpecials;
$nRead = sysread( $fh, $buf, 2 ); my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
# skip values info. printf STDERR "at 0x%x before header read\n", systell($fh);
printf STDERR "at 0x%x before reading %d values\n", systell($fh), $faceCount; # skip xloc header
sysread( $fh, $buf, $faceCount * 2 ); $nRead = sysread( $fh, $buf, 2 );
printf STDERR "at 0x%x after values read\n", systell($fh);
printf STDERR "at 0x%x before specials read\n", systell($fh); # skip values info.
my @specials; printf STDERR "at 0x%x before reading %d values\n", systell($fh), $faceCount;
getSpecials( $fh, $nSpecials, \@specials ); sysread( $fh, $buf, $faceCount * 2 );
mergeSpecials( $facRef, \@specials ); printf STDERR "at 0x%x after values read\n", systell($fh);
printf STDERR "at 0x%x after specials read\n", systell($fh);
printf STDERR "at 0x%x before offset read\n", systell($fh); printf STDERR "at 0x%x before specials read\n", systell($fh);
sysread( $fh, $buf, 4 ); my @specials;
$$startRef = unpack( 'N', $buf ); getSpecials( $fh, $nSpecials, \@specials );
print STDERR "startRef=$$startRef\n"; mergeSpecials( $facRef, \@specials );
printf STDERR "at 0x%x after specials read\n", systell($fh);
my @nodes = readNodesToEnd( $fh ); printf STDERR "at 0x%x before offset read\n", systell($fh);
sysread( $fh, $buf, 4 );
$$startRef = unpack( 'N', $buf );
print STDERR "startRef=$$startRef\n";
@$nodesRef = @nodes; my @nodes = readNodesToEnd( $fh );
@$nodesRef = @nodes;
}
print STDERR "prepXWD done\n"; print STDERR "prepXWD done\n";
return $done;
} # prepXWD } # prepXWD
sub readPDBSpecials($$$$$) { sub readPDBSpecials($$$$$) {
@ -436,17 +474,20 @@ binmode INFILE;
my @faces; my @faces;
my @nodes; my @nodes;
my $startIndex; my $startIndex;
my $done;
if ( $gFileType eq "xwd" ){ if ( $gFileType eq "xwd" ){
prepXWD( *INFILE, \@faces, \@nodes, \$startIndex ); $done = prepXWD( *INFILE, \@faces, \@nodes, \$startIndex );
} elsif ( $gFileType eq "pdb" ) { } elsif ( $gFileType eq "pdb" ) {
prepPDB( *INFILE, \@faces, \@nodes, \$startIndex ); $done = prepPDB( *INFILE, \@faces, \@nodes, \$startIndex );
} }
close INFILE; close INFILE;
die "no nodes!!!" if 0 == @nodes; die "no nodes!!!" if 0 == @nodes;
if ( $gDoRaw ) { if ( $done ) {
# we're done...
} elsif ( $gDoRaw ) {
printNodes( \@nodes, \@faces ); printNodes( \@nodes, \@faces );
} elsif ( $gDoJSON ) { } elsif ( $gDoJSON ) {
print "dict = {\n"; print "dict = {\n";

View file

@ -85,6 +85,20 @@ linux_dictionary_make( MPFORMAL const LaunchParams* params,
return (DictionaryCtxt*)result; return (DictionaryCtxt*)result;
} /* gtk_dictionary_make */ } /* gtk_dictionary_make */
static XP_UCHAR*
getNullTermParam( LinuxDictionaryCtxt* dctx, const XP_U8** ptr,
XP_U16* headerLen )
{
XP_U16 len = 1 + XP_STRLEN( (XP_UCHAR*)*ptr );
XP_UCHAR* result = XP_MALLOC( dctx->super.mpool, len );
XP_MEMCPY( result, *ptr, len );
XP_LOGF( "%s: got param of len %d: \"%s\"", __func__,
len, result );
*ptr += len;
*headerLen -= len;
return result;
}
static XP_U16 static XP_U16
countSpecials( LinuxDictionaryCtxt* ctxt ) countSpecials( LinuxDictionaryCtxt* ctxt )
{ {
@ -109,7 +123,7 @@ skipBitmap( LinuxDictionaryCtxt* XP_UNUSED_DBG(ctxt), const XP_U8** ptrp )
nCols = *ptr++; nCols = *ptr++;
if ( nCols > 0 ) { if ( nCols > 0 ) {
nRows = *ptr++; nRows = *ptr++;
nBytes = ((nRows * nCols) + 7) / 8; nBytes = ((nRows * nCols) + 7) / 8;
@ -118,8 +132,8 @@ skipBitmap( LinuxDictionaryCtxt* XP_UNUSED_DBG(ctxt), const XP_U8** ptrp )
lbs->nCols = nCols; lbs->nCols = nCols;
lbs->nBytes = nBytes; lbs->nBytes = nBytes;
memcpy( lbs + 1, ptr, nBytes ); memcpy( lbs + 1, ptr, nBytes );
ptr += nBytes; ptr += nBytes;
} }
*ptrp = ptr; *ptrp = ptr;
@ -306,16 +320,15 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const LaunchParams* params,
XP_DEBUGF( "dict contains %ld words", dctx->super.nWords ); XP_DEBUGF( "dict contains %ld words", dctx->super.nWords );
if ( 0 < headerLen ) { if ( 0 < headerLen ) {
XP_U16 len = 1 + XP_STRLEN( (XP_UCHAR*)ptr ); dctx->super.desc = getNullTermParam( dctx, &ptr, &headerLen );
dctx->super.desc = XP_MALLOC( dctx->super.mpool, len );
XP_MEMCPY( dctx->super.desc, ptr, len );
XP_LOGF( "%s: got note of len %d: \"%s\"", __func__,
headerLen-1, dctx->super.desc );
ptr += len;
headerLen -= len;
} else { } else {
XP_LOGF( "%s: no note", __func__ ); XP_LOGF( "%s: no note", __func__ );
} }
if ( 0 < headerLen ) {
dctx->super.md5Sum = getNullTermParam( dctx, &ptr, &headerLen );
} else {
XP_LOGF( "%s: no md5Sum", __func__ );
}
ptr += headerLen; ptr += headerLen;
} }
@ -327,6 +340,25 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const LaunchParams* params,
numFaceBytes = numFaces * charSize; numFaceBytes = numFaces * charSize;
} }
if ( NULL == dctx->super.md5Sum
#ifdef DEBUG
|| XP_TRUE
#endif
) {
XP_U32 curPos = ptr - dctx->dictBase;
gssize dictLength = dctx->dictLength - curPos;
GChecksum* cksum = g_checksum_new( G_CHECKSUM_MD5 );
g_checksum_update( cksum, ptr, dictLength );
const gchar* sum = g_checksum_get_string( cksum );
XP_LOGF( "calculated sum on %d bytes: %s", dictLength, sum );
if ( NULL == dctx->super.md5Sum ) {
dctx->super.md5Sum = copyString( dctx->super.mpool, sum );
} else {
XP_ASSERT( 0 == XP_STRCMP( dctx->super.md5Sum, sum ) );
}
g_checksum_free( cksum );
}
dctx->super.nFaces = numFaces; dctx->super.nFaces = numFaces;
dctx->super.countsAndValues = XP_MALLOC( dctx->super.mpool, dctx->super.countsAndValues = XP_MALLOC( dctx->super.mpool,
@ -435,14 +467,15 @@ linux_dictionary_destroy( DictionaryCtxt* dict )
freeSpecials( ctxt ); freeSpecials( ctxt );
if ( !!ctxt->dictBase ) { if ( !!ctxt->dictBase ) {
if ( ctxt->useMMap ) { if ( ctxt->useMMap ) {
(void)munmap( ctxt->dictBase, ctxt->dictLength ); (void)munmap( ctxt->dictBase, ctxt->dictLength );
} else { } else {
XP_FREE( dict->mpool, ctxt->dictBase ); XP_FREE( dict->mpool, ctxt->dictBase );
} }
} }
XP_FREEP( dict->mpool, &ctxt->super.desc ); XP_FREEP( dict->mpool, &ctxt->super.desc );
XP_FREEP( dict->mpool, &ctxt->super.md5Sum );
XP_FREE( dict->mpool, ctxt->super.countsAndValues ); XP_FREE( dict->mpool, ctxt->super.countsAndValues );
XP_FREE( dict->mpool, ctxt->super.faces ); XP_FREE( dict->mpool, ctxt->super.faces );
XP_FREE( dict->mpool, ctxt->super.facePtrs ); XP_FREE( dict->mpool, ctxt->super.facePtrs );