initial changes to add a header to xwd format so that stuff like

number of words can be included.  Changed to build dicts and linux to
open them.  Android still needs to learn.  Also, some of the tools in
dawg/ need to be fixed to read old-format (pre-utf8) .xwd files.
This commit is contained in:
Eric House 2010-12-05 19:33:10 -08:00
parent eff2324950
commit c4cdc24b78
6 changed files with 57 additions and 14 deletions

View file

@ -432,7 +432,6 @@ makeDict( MPFORMAL JNIEnv *env, JNIUtilCtxt* jniutil, jbyteArray jbytes,
anddict->bytes = localBytes;
parseDict( anddict, localBytes, len );
setBlankTile( &anddict->super );
/* copy the name */
if ( NULL != jname ) {

View file

@ -35,6 +35,8 @@ extern "C" {
/* cast to unsigned in case XP_UCHAR is signed */
#define IS_SPECIAL(face) ((XP_U16)(face) < 0x0020)
#define DICT_HEADER_MASK 0x08
typedef XP_U8 XP_LangCode;
typedef enum {
@ -78,6 +80,7 @@ struct DictionaryCtxt {
SpecialBitmaps* bitmaps;
XP_UCHAR** chars;
XP_U32 nWords;
XP_LangCode langCode;

View file

@ -19,7 +19,7 @@ XWLANG=BasEnglish
LANGCODE=en_US
DICT2DAWGARGS = -r -nosort
TARGET_TYPE ?= PALM
TARGET_TYPE ?= WINCE
include ../Makefile.2to8

View file

@ -204,8 +204,8 @@ endif
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin
cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin frankspecials.bin
cat $(XWLANG)$*_flags.bin $(XWLANG)*_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin \
frankspecials.bin $(XWLANG)StartLoc.bin \
$$(ls dawg$(XWLANG)$*_*.bin) > $@
cp $@ saveme.bin
@ -223,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
ifdef NEWDAWG
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \
then perl -e "print pack(\"n\",0x000C)" > $@; echo "flags=4"; \
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \
then perl -e "print pack(\"n\",0x000D)" > $@; echo "flags=5"; \
elif true; \
then echo "Unexpected node size"; exit 1; \
fi
@ -247,7 +247,7 @@ dawg$(XWLANG)%.stamp: $(XWLANG)Main.dict.gz $(DICT2DAWG) table.bin ../Makefile.l
touch $@
$(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp
@echo
@echo "got this rule"
# the files to export for byod
allbins:
@ -268,7 +268,7 @@ values.bin: ../xloc.pl
# a binary file, two bytes, one giving the size of tiles data and the
# other the number of tiles in the dict. Tiles data is utf-8 and so
# number is not derivable from size.
charcount.bin: table.bin ../xloc.pl
$(XWLANG)_charcount.bin: table.bin ../xloc.pl
SIZ=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"c\",$$SIZ)" > $@
TMP=/tmp/tmp$$$$; \
@ -276,6 +276,11 @@ charcount.bin: table.bin ../xloc.pl
cat $$TMP >> $@; \
rm -f $$TMP
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin
SIZ=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"n\",$$SIZ)" > $@
cat $< >> $@
%.dict: %.dict.gz
zcat $< > $@

View file

@ -156,9 +156,21 @@ sub readNodesToEnd($) {
return @nodes;
} # readNodesToEnd
sub nodeSizeFromFlags($) {
my ( $flags ) = @_;
if ( $flags == 4 ) {
sub nodeSizeFromFlags($$) {
my ( $fh, $flags ) = @_;
my $bitSet = $flags & 0x0008;
printf STDERR "checking flags 0x%x with 0x%x -> 0x%x\n", $flags, 0x0008, $bitSet;
if ( 0 != $bitSet ){
$flags = $flags & ~0x0008;
# need to skip header
my $buf;
2 == sysread( $fh, $buf, 2 ) || die "couldn't read length of header";
my $len = unpack( "n", $buf );
$len == sysread( $fh, $buf, $len ) || die "couldn't read header bytes";
}
if ( $flags == 2 || $ flags == 4 ) {
return 3;
} elsif ( $flags == 5 ) {
return 4;
@ -186,7 +198,7 @@ sub prepXWD($$$$) {
my $nRead = sysread( $fh, $buf, 2 );
my $flags = unpack( "n", $buf );
$gNodeSize = nodeSizeFromFlags( $flags );
$gNodeSize = nodeSizeFromFlags( $fh, $flags );
my $nSpecials;
my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
@ -270,7 +282,7 @@ sub prepPDB($$$$) {
my $nChars = ($offsets[2] - $facesOffset) / 2;
$nRead += sysread( $fh, $buf, $facesOffset - $nRead );
my @tmp = unpack( 'Nc6n', $buf );
$gNodeSize = nodeSizeFromFlags( $tmp[7] );
$gNodeSize = nodeSizeFromFlags( 0, $tmp[7] );
my @faces;
for ( my $i = 0; $i < $nChars; ++$i ) {

View file

@ -209,11 +209,17 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
XP_U16 facesSize;
XP_U16 charSize;
XP_Bool isUTF8 = XP_FALSE;
XP_Bool hasHeader = XP_FALSE;
XP_ASSERT( dictF );
if ( 1 == fread( &flags, sizeof(flags), 1, dictF ) ) {
flags = ntohs(flags);
XP_DEBUGF( "flags=0x%x", flags );
XP_DEBUGF( "flags=0X%X", flags );
hasHeader = 0 != (DICT_HEADER_MASK & flags);
if ( hasHeader ) {
flags &= ~DICT_HEADER_MASK;
XP_DEBUGF( "has header!" );
}
#ifdef NODE_CAN_4
if ( flags == 0x0001 ) {
dctx->super.nodeSize = 3;
@ -250,6 +256,24 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
#endif
if ( formatOk ) {
if ( hasHeader ) {
XP_U16 headerLen;
if ( 1 != fread( &headerLen, sizeof(headerLen), 1, dictF ) ) {
goto closeAndExit;
}
headerLen = ntohs( headerLen );
XP_U32 wordCount;
if ( headerLen != sizeof(wordCount) ) { /* the only case we know right now */
goto closeAndExit;
}
if ( 1 != fread( &wordCount, sizeof(wordCount), 1, dictF ) ) {
goto closeAndExit;
}
dctx->super.nWords = ntohl( wordCount );
XP_DEBUGF( "dict contains %ld words", dctx->super.nWords );
}
if ( isUTF8 ) {
if ( 1 != fread( &numFaceBytes, sizeof(numFaceBytes), 1, dictF ) ) {
goto closeAndExit;