initial changes to add a header to xwd format so that stuff like

number of words can be included.  Changed to build dicts and linux to
open them.  Android still needs to learn.  Also, some of the tools in
dawg/ need to be fixed to read old-format (pre-utf8) .xwd files.
This commit is contained in:
Eric House 2010-12-05 19:33:10 -08:00
parent eff2324950
commit c4cdc24b78
6 changed files with 57 additions and 14 deletions

View file

@ -432,7 +432,6 @@ makeDict( MPFORMAL JNIEnv *env, JNIUtilCtxt* jniutil, jbyteArray jbytes,
anddict->bytes = localBytes; anddict->bytes = localBytes;
parseDict( anddict, localBytes, len ); parseDict( anddict, localBytes, len );
setBlankTile( &anddict->super );
/* copy the name */ /* copy the name */
if ( NULL != jname ) { if ( NULL != jname ) {

View file

@ -35,6 +35,8 @@ extern "C" {
/* cast to unsigned in case XP_UCHAR is signed */ /* cast to unsigned in case XP_UCHAR is signed */
#define IS_SPECIAL(face) ((XP_U16)(face) < 0x0020) #define IS_SPECIAL(face) ((XP_U16)(face) < 0x0020)
#define DICT_HEADER_MASK 0x08
typedef XP_U8 XP_LangCode; typedef XP_U8 XP_LangCode;
typedef enum { typedef enum {
@ -78,6 +80,7 @@ struct DictionaryCtxt {
SpecialBitmaps* bitmaps; SpecialBitmaps* bitmaps;
XP_UCHAR** chars; XP_UCHAR** chars;
XP_U32 nWords;
XP_LangCode langCode; XP_LangCode langCode;

View file

@ -19,7 +19,7 @@ XWLANG=BasEnglish
LANGCODE=en_US LANGCODE=en_US
DICT2DAWGARGS = -r -nosort DICT2DAWGARGS = -r -nosort
TARGET_TYPE ?= PALM TARGET_TYPE ?= WINCE
include ../Makefile.2to8 include ../Makefile.2to8

View file

@ -204,8 +204,8 @@ endif
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES) frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@ $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin frankspecials.bin
cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \ cat $(XWLANG)$*_flags.bin $(XWLANG)*_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin \
frankspecials.bin $(XWLANG)StartLoc.bin \ frankspecials.bin $(XWLANG)StartLoc.bin \
$$(ls dawg$(XWLANG)$*_*.bin) > $@ $$(ls dawg$(XWLANG)$*_*.bin) > $@
cp $@ saveme.bin cp $@ saveme.bin
@ -223,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
ifdef NEWDAWG ifdef NEWDAWG
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \ if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \ then perl -e "print pack(\"n\",0x000C)" > $@; echo "flags=4"; \
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \ elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \ then perl -e "print pack(\"n\",0x000D)" > $@; echo "flags=5"; \
elif true; \ elif true; \
then echo "Unexpected node size"; exit 1; \ then echo "Unexpected node size"; exit 1; \
fi fi
@ -247,7 +247,7 @@ dawg$(XWLANG)%.stamp: $(XWLANG)Main.dict.gz $(DICT2DAWG) table.bin ../Makefile.l
touch $@ touch $@
$(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp $(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp
@echo @echo "got this rule"
# the files to export for byod # the files to export for byod
allbins: allbins:
@ -268,7 +268,7 @@ values.bin: ../xloc.pl
# a binary file, two bytes, one giving the size of tiles data and the # a binary file, two bytes, one giving the size of tiles data and the
# other the number of tiles in the dict. Tiles data is utf-8 and so # other the number of tiles in the dict. Tiles data is utf-8 and so
# number is not derivable from size. # number is not derivable from size.
charcount.bin: table.bin ../xloc.pl $(XWLANG)_charcount.bin: table.bin ../xloc.pl
SIZ=$$(ls -l $< | awk '{print $$5}'); \ SIZ=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"c\",$$SIZ)" > $@ perl -e "print pack(\"c\",$$SIZ)" > $@
TMP=/tmp/tmp$$$$; \ TMP=/tmp/tmp$$$$; \
@ -276,6 +276,11 @@ charcount.bin: table.bin ../xloc.pl
cat $$TMP >> $@; \ cat $$TMP >> $@; \
rm -f $$TMP rm -f $$TMP
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin
SIZ=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"n\",$$SIZ)" > $@
cat $< >> $@
%.dict: %.dict.gz %.dict: %.dict.gz
zcat $< > $@ zcat $< > $@

View file

@ -156,9 +156,21 @@ sub readNodesToEnd($) {
return @nodes; return @nodes;
} # readNodesToEnd } # readNodesToEnd
sub nodeSizeFromFlags($) { sub nodeSizeFromFlags($$) {
my ( $flags ) = @_; my ( $fh, $flags ) = @_;
if ( $flags == 4 ) {
my $bitSet = $flags & 0x0008;
printf STDERR "checking flags 0x%x with 0x%x -> 0x%x\n", $flags, 0x0008, $bitSet;
if ( 0 != $bitSet ){
$flags = $flags & ~0x0008;
# need to skip header
my $buf;
2 == sysread( $fh, $buf, 2 ) || die "couldn't read length of header";
my $len = unpack( "n", $buf );
$len == sysread( $fh, $buf, $len ) || die "couldn't read header bytes";
}
if ( $flags == 2 || $ flags == 4 ) {
return 3; return 3;
} elsif ( $flags == 5 ) { } elsif ( $flags == 5 ) {
return 4; return 4;
@ -186,7 +198,7 @@ sub prepXWD($$$$) {
my $nRead = sysread( $fh, $buf, 2 ); my $nRead = sysread( $fh, $buf, 2 );
my $flags = unpack( "n", $buf ); my $flags = unpack( "n", $buf );
$gNodeSize = nodeSizeFromFlags( $flags ); $gNodeSize = nodeSizeFromFlags( $fh, $flags );
my $nSpecials; my $nSpecials;
my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials ); my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
@ -270,7 +282,7 @@ sub prepPDB($$$$) {
my $nChars = ($offsets[2] - $facesOffset) / 2; my $nChars = ($offsets[2] - $facesOffset) / 2;
$nRead += sysread( $fh, $buf, $facesOffset - $nRead ); $nRead += sysread( $fh, $buf, $facesOffset - $nRead );
my @tmp = unpack( 'Nc6n', $buf ); my @tmp = unpack( 'Nc6n', $buf );
$gNodeSize = nodeSizeFromFlags( $tmp[7] ); $gNodeSize = nodeSizeFromFlags( 0, $tmp[7] );
my @faces; my @faces;
for ( my $i = 0; $i < $nChars; ++$i ) { for ( my $i = 0; $i < $nChars; ++$i ) {

View file

@ -209,11 +209,17 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
XP_U16 facesSize; XP_U16 facesSize;
XP_U16 charSize; XP_U16 charSize;
XP_Bool isUTF8 = XP_FALSE; XP_Bool isUTF8 = XP_FALSE;
XP_Bool hasHeader = XP_FALSE;
XP_ASSERT( dictF ); XP_ASSERT( dictF );
if ( 1 == fread( &flags, sizeof(flags), 1, dictF ) ) { if ( 1 == fread( &flags, sizeof(flags), 1, dictF ) ) {
flags = ntohs(flags); flags = ntohs(flags);
XP_DEBUGF( "flags=0x%x", flags ); XP_DEBUGF( "flags=0X%X", flags );
hasHeader = 0 != (DICT_HEADER_MASK & flags);
if ( hasHeader ) {
flags &= ~DICT_HEADER_MASK;
XP_DEBUGF( "has header!" );
}
#ifdef NODE_CAN_4 #ifdef NODE_CAN_4
if ( flags == 0x0001 ) { if ( flags == 0x0001 ) {
dctx->super.nodeSize = 3; dctx->super.nodeSize = 3;
@ -250,6 +256,24 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
#endif #endif
if ( formatOk ) { if ( formatOk ) {
if ( hasHeader ) {
XP_U16 headerLen;
if ( 1 != fread( &headerLen, sizeof(headerLen), 1, dictF ) ) {
goto closeAndExit;
}
headerLen = ntohs( headerLen );
XP_U32 wordCount;
if ( headerLen != sizeof(wordCount) ) { /* the only case we know right now */
goto closeAndExit;
}
if ( 1 != fread( &wordCount, sizeof(wordCount), 1, dictF ) ) {
goto closeAndExit;
}
dctx->super.nWords = ntohl( wordCount );
XP_DEBUGF( "dict contains %ld words", dctx->super.nWords );
}
if ( isUTF8 ) { if ( isUTF8 ) {
if ( 1 != fread( &numFaceBytes, sizeof(numFaceBytes), 1, dictF ) ) { if ( 1 != fread( &numFaceBytes, sizeof(numFaceBytes), 1, dictF ) ) {
goto closeAndExit; goto closeAndExit;