mirror of
git://xwords.git.sourceforge.net/gitroot/xwords/xwords
synced 2025-01-03 23:04:08 +01:00
initial changes to add a header to xwd format so that stuff like
number of words can be included. Changed to build dicts and linux to open them. Android still needs to learn. Also, some of the tools in dawg/ need to be fixed to read old-format (pre-utf8) .xwd files.
This commit is contained in:
parent
eff2324950
commit
c4cdc24b78
6 changed files with 57 additions and 14 deletions
|
@ -432,7 +432,6 @@ makeDict( MPFORMAL JNIEnv *env, JNIUtilCtxt* jniutil, jbyteArray jbytes,
|
||||||
anddict->bytes = localBytes;
|
anddict->bytes = localBytes;
|
||||||
|
|
||||||
parseDict( anddict, localBytes, len );
|
parseDict( anddict, localBytes, len );
|
||||||
setBlankTile( &anddict->super );
|
|
||||||
|
|
||||||
/* copy the name */
|
/* copy the name */
|
||||||
if ( NULL != jname ) {
|
if ( NULL != jname ) {
|
||||||
|
|
|
@ -35,6 +35,8 @@ extern "C" {
|
||||||
/* cast to unsigned in case XP_UCHAR is signed */
|
/* cast to unsigned in case XP_UCHAR is signed */
|
||||||
#define IS_SPECIAL(face) ((XP_U16)(face) < 0x0020)
|
#define IS_SPECIAL(face) ((XP_U16)(face) < 0x0020)
|
||||||
|
|
||||||
|
#define DICT_HEADER_MASK 0x08
|
||||||
|
|
||||||
typedef XP_U8 XP_LangCode;
|
typedef XP_U8 XP_LangCode;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -78,6 +80,7 @@ struct DictionaryCtxt {
|
||||||
|
|
||||||
SpecialBitmaps* bitmaps;
|
SpecialBitmaps* bitmaps;
|
||||||
XP_UCHAR** chars;
|
XP_UCHAR** chars;
|
||||||
|
XP_U32 nWords;
|
||||||
|
|
||||||
XP_LangCode langCode;
|
XP_LangCode langCode;
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ XWLANG=BasEnglish
|
||||||
LANGCODE=en_US
|
LANGCODE=en_US
|
||||||
DICT2DAWGARGS = -r -nosort
|
DICT2DAWGARGS = -r -nosort
|
||||||
|
|
||||||
TARGET_TYPE ?= PALM
|
TARGET_TYPE ?= WINCE
|
||||||
|
|
||||||
include ../Makefile.2to8
|
include ../Makefile.2to8
|
||||||
|
|
||||||
|
|
|
@ -204,8 +204,8 @@ endif
|
||||||
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
|
frankspecials.bin: ../frank_mkspecials.pl $(BMPFILES)
|
||||||
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
|
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
|
||||||
|
|
||||||
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin table.bin values.bin frankspecials.bin
|
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin frankspecials.bin
|
||||||
cat $(XWLANG)$*_flags.bin charcount.bin table.bin values.bin \
|
cat $(XWLANG)$*_flags.bin $(XWLANG)*_newheader.bin $(XWLANG)_charcount.bin table.bin values.bin \
|
||||||
frankspecials.bin $(XWLANG)StartLoc.bin \
|
frankspecials.bin $(XWLANG)StartLoc.bin \
|
||||||
$$(ls dawg$(XWLANG)$*_*.bin) > $@
|
$$(ls dawg$(XWLANG)$*_*.bin) > $@
|
||||||
cp $@ saveme.bin
|
cp $@ saveme.bin
|
||||||
|
@ -223,9 +223,9 @@ $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin charcount.bin
|
||||||
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
|
$(XWLANG)%_flags.bin: dawg$(XWLANG)%.stamp
|
||||||
ifdef NEWDAWG
|
ifdef NEWDAWG
|
||||||
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
|
if [ 3 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
|
||||||
then perl -e "print pack(\"n\",0x0004)" > $@; echo "flags=4"; \
|
then perl -e "print pack(\"n\",0x000C)" > $@; echo "flags=4"; \
|
||||||
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
|
elif [ 4 = $$(cat $(XWLANG)$*_nodesize.bin) ] ; \
|
||||||
then perl -e "print pack(\"n\",0x0005)" > $@; echo "flags=5"; \
|
then perl -e "print pack(\"n\",0x000D)" > $@; echo "flags=5"; \
|
||||||
elif true; \
|
elif true; \
|
||||||
then echo "Unexpected node size"; exit 1; \
|
then echo "Unexpected node size"; exit 1; \
|
||||||
fi
|
fi
|
||||||
|
@ -247,7 +247,7 @@ dawg$(XWLANG)%.stamp: $(XWLANG)Main.dict.gz $(DICT2DAWG) table.bin ../Makefile.l
|
||||||
touch $@
|
touch $@
|
||||||
|
|
||||||
$(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp
|
$(XWLANG)%_wordcount.bin: dawg$(XWLANG)%.stamp
|
||||||
@echo
|
@echo "got this rule"
|
||||||
|
|
||||||
# the files to export for byod
|
# the files to export for byod
|
||||||
allbins:
|
allbins:
|
||||||
|
@ -268,7 +268,7 @@ values.bin: ../xloc.pl
|
||||||
# a binary file, two bytes, one giving the size of tiles data and the
|
# a binary file, two bytes, one giving the size of tiles data and the
|
||||||
# other the number of tiles in the dict. Tiles data is utf-8 and so
|
# other the number of tiles in the dict. Tiles data is utf-8 and so
|
||||||
# number is not derivable from size.
|
# number is not derivable from size.
|
||||||
charcount.bin: table.bin ../xloc.pl
|
$(XWLANG)_charcount.bin: table.bin ../xloc.pl
|
||||||
SIZ=$$(ls -l $< | awk '{print $$5}'); \
|
SIZ=$$(ls -l $< | awk '{print $$5}'); \
|
||||||
perl -e "print pack(\"c\",$$SIZ)" > $@
|
perl -e "print pack(\"c\",$$SIZ)" > $@
|
||||||
TMP=/tmp/tmp$$$$; \
|
TMP=/tmp/tmp$$$$; \
|
||||||
|
@ -276,6 +276,11 @@ charcount.bin: table.bin ../xloc.pl
|
||||||
cat $$TMP >> $@; \
|
cat $$TMP >> $@; \
|
||||||
rm -f $$TMP
|
rm -f $$TMP
|
||||||
|
|
||||||
|
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin
|
||||||
|
SIZ=$$(ls -l $< | awk '{print $$5}'); \
|
||||||
|
perl -e "print pack(\"n\",$$SIZ)" > $@
|
||||||
|
cat $< >> $@
|
||||||
|
|
||||||
%.dict: %.dict.gz
|
%.dict: %.dict.gz
|
||||||
zcat $< > $@
|
zcat $< > $@
|
||||||
|
|
||||||
|
|
|
@ -156,9 +156,21 @@ sub readNodesToEnd($) {
|
||||||
return @nodes;
|
return @nodes;
|
||||||
} # readNodesToEnd
|
} # readNodesToEnd
|
||||||
|
|
||||||
sub nodeSizeFromFlags($) {
|
sub nodeSizeFromFlags($$) {
|
||||||
my ( $flags ) = @_;
|
my ( $fh, $flags ) = @_;
|
||||||
if ( $flags == 4 ) {
|
|
||||||
|
my $bitSet = $flags & 0x0008;
|
||||||
|
printf STDERR "checking flags 0x%x with 0x%x -> 0x%x\n", $flags, 0x0008, $bitSet;
|
||||||
|
if ( 0 != $bitSet ){
|
||||||
|
$flags = $flags & ~0x0008;
|
||||||
|
# need to skip header
|
||||||
|
my $buf;
|
||||||
|
2 == sysread( $fh, $buf, 2 ) || die "couldn't read length of header";
|
||||||
|
my $len = unpack( "n", $buf );
|
||||||
|
$len == sysread( $fh, $buf, $len ) || die "couldn't read header bytes";
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $flags == 2 || $ flags == 4 ) {
|
||||||
return 3;
|
return 3;
|
||||||
} elsif ( $flags == 5 ) {
|
} elsif ( $flags == 5 ) {
|
||||||
return 4;
|
return 4;
|
||||||
|
@ -186,7 +198,7 @@ sub prepXWD($$$$) {
|
||||||
my $nRead = sysread( $fh, $buf, 2 );
|
my $nRead = sysread( $fh, $buf, 2 );
|
||||||
my $flags = unpack( "n", $buf );
|
my $flags = unpack( "n", $buf );
|
||||||
|
|
||||||
$gNodeSize = nodeSizeFromFlags( $flags );
|
$gNodeSize = nodeSizeFromFlags( $fh, $flags );
|
||||||
|
|
||||||
my $nSpecials;
|
my $nSpecials;
|
||||||
my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
|
my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
|
||||||
|
@ -270,7 +282,7 @@ sub prepPDB($$$$) {
|
||||||
my $nChars = ($offsets[2] - $facesOffset) / 2;
|
my $nChars = ($offsets[2] - $facesOffset) / 2;
|
||||||
$nRead += sysread( $fh, $buf, $facesOffset - $nRead );
|
$nRead += sysread( $fh, $buf, $facesOffset - $nRead );
|
||||||
my @tmp = unpack( 'Nc6n', $buf );
|
my @tmp = unpack( 'Nc6n', $buf );
|
||||||
$gNodeSize = nodeSizeFromFlags( $tmp[7] );
|
$gNodeSize = nodeSizeFromFlags( 0, $tmp[7] );
|
||||||
|
|
||||||
my @faces;
|
my @faces;
|
||||||
for ( my $i = 0; $i < $nChars; ++$i ) {
|
for ( my $i = 0; $i < $nChars; ++$i ) {
|
||||||
|
|
|
@ -209,11 +209,17 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
|
||||||
XP_U16 facesSize;
|
XP_U16 facesSize;
|
||||||
XP_U16 charSize;
|
XP_U16 charSize;
|
||||||
XP_Bool isUTF8 = XP_FALSE;
|
XP_Bool isUTF8 = XP_FALSE;
|
||||||
|
XP_Bool hasHeader = XP_FALSE;
|
||||||
|
|
||||||
XP_ASSERT( dictF );
|
XP_ASSERT( dictF );
|
||||||
if ( 1 == fread( &flags, sizeof(flags), 1, dictF ) ) {
|
if ( 1 == fread( &flags, sizeof(flags), 1, dictF ) ) {
|
||||||
flags = ntohs(flags);
|
flags = ntohs(flags);
|
||||||
XP_DEBUGF( "flags=0x%x", flags );
|
XP_DEBUGF( "flags=0X%X", flags );
|
||||||
|
hasHeader = 0 != (DICT_HEADER_MASK & flags);
|
||||||
|
if ( hasHeader ) {
|
||||||
|
flags &= ~DICT_HEADER_MASK;
|
||||||
|
XP_DEBUGF( "has header!" );
|
||||||
|
}
|
||||||
#ifdef NODE_CAN_4
|
#ifdef NODE_CAN_4
|
||||||
if ( flags == 0x0001 ) {
|
if ( flags == 0x0001 ) {
|
||||||
dctx->super.nodeSize = 3;
|
dctx->super.nodeSize = 3;
|
||||||
|
@ -250,6 +256,24 @@ initFromDictFile( LinuxDictionaryCtxt* dctx, const char* fileName )
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ( formatOk ) {
|
if ( formatOk ) {
|
||||||
|
|
||||||
|
if ( hasHeader ) {
|
||||||
|
XP_U16 headerLen;
|
||||||
|
if ( 1 != fread( &headerLen, sizeof(headerLen), 1, dictF ) ) {
|
||||||
|
goto closeAndExit;
|
||||||
|
}
|
||||||
|
headerLen = ntohs( headerLen );
|
||||||
|
XP_U32 wordCount;
|
||||||
|
if ( headerLen != sizeof(wordCount) ) { /* the only case we know right now */
|
||||||
|
goto closeAndExit;
|
||||||
|
}
|
||||||
|
if ( 1 != fread( &wordCount, sizeof(wordCount), 1, dictF ) ) {
|
||||||
|
goto closeAndExit;
|
||||||
|
}
|
||||||
|
dctx->super.nWords = ntohl( wordCount );
|
||||||
|
XP_DEBUGF( "dict contains %ld words", dctx->super.nWords );
|
||||||
|
}
|
||||||
|
|
||||||
if ( isUTF8 ) {
|
if ( isUTF8 ) {
|
||||||
if ( 1 != fread( &numFaceBytes, sizeof(numFaceBytes), 1, dictF ) ) {
|
if ( 1 != fread( &numFaceBytes, sizeof(numFaceBytes), 1, dictF ) ) {
|
||||||
goto closeAndExit;
|
goto closeAndExit;
|
||||||
|
|
Loading…
Reference in a new issue