add board sizes to wordlist format (English only)

Modify language metadata to have possibly different counts of tiles
for different board sizes. Make the necessary changes for loading such
files. Works on linux version at least. Only English will build for
now thanks to changes in info.txt format.
This commit is contained in:
Eric House 2022-03-14 18:28:08 +01:00
parent cd4fb88a4d
commit cdc77eaf68
6 changed files with 150 additions and 68 deletions

View file

@ -250,12 +250,12 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
if ( 0 < headerLen ) { if ( 0 < headerLen ) {
dctx->desc = getNullTermParam( dctx, &ptr, &headerLen ); dctx->desc = getNullTermParam( dctx, &ptr, &headerLen );
} else { } else {
XP_LOGF( "%s: no note", __func__ ); XP_LOGFF( "no note" );
} }
if ( 0 < headerLen ) { if ( 0 < headerLen ) {
dctx->md5Sum = getNullTermParam( dctx, &ptr, &headerLen ); dctx->md5Sum = getNullTermParam( dctx, &ptr, &headerLen );
} else { } else {
XP_LOGF( "%s: no md5Sum", __func__ ); XP_LOGFF( "no md5Sum" );
} }
XP_U16 headerFlags = 0; XP_U16 headerFlags = 0;
@ -268,12 +268,27 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
XP_LOGFF( "setting headerFlags: 0x%x", headerFlags ); XP_LOGFF( "setting headerFlags: 0x%x", headerFlags );
dctx->headerFlags = headerFlags; dctx->headerFlags = headerFlags;
if ( 0 < headerLen ) {
dctx->nBoardSizes = *ptr++;
XP_ASSERT( dctx->nBoardSizes <= VSIZE(dctx->boardSizes) );
for ( int ii = 0; ii < dctx->nBoardSizes; ++ii ) {
dctx->boardSizes[ii] = *ptr++;
}
headerLen -= 1 + dctx->nBoardSizes;
XP_ASSERT( 0 <= headerLen );
}
if ( 0 < headerLen ) { if ( 0 < headerLen ) {
XP_LOGFF( "skipping %d bytes of header", headerLen ); XP_LOGFF( "skipping %d bytes of header", headerLen );
} }
ptr += headerLen; ptr += headerLen;
} }
if ( 0 == dctx->nBoardSizes ) { /* wasn't provided */
dctx->boardSizes[0] = 15;
dctx->nBoardSizes = 1;
}
if ( isUTF8 ) { if ( isUTF8 ) {
numFaceBytes = *ptr++; numFaceBytes = *ptr++;
} }
@ -301,7 +316,8 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
dctx->nFaces = numFaces; dctx->nFaces = numFaces;
dctx->countsAndValues = XP_MALLOC( dctx->mpool, numFaces * 2 ); dctx->countsAndValues = XP_MALLOC( dctx->mpool,
numFaces * (1 + dctx->nBoardSizes) );
XP_U16 facesSize = numFaceBytes; XP_U16 facesSize = numFaceBytes;
if ( !isUTF8 ) { if ( !isUTF8 ) {
facesSize /= 2; facesSize /= 2;
@ -316,8 +332,9 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
unsigned short xloc; unsigned short xloc;
XP_MEMCPY( &xloc, ptr, sizeof(xloc) ); XP_MEMCPY( &xloc, ptr, sizeof(xloc) );
ptr += sizeof(xloc); ptr += sizeof(xloc);
XP_MEMCPY( dctx->countsAndValues, ptr, numFaces*2 ); size_t cvSize = numFaces * (1 + dctx->nBoardSizes);
ptr += numFaces*2; XP_MEMCPY( dctx->countsAndValues, ptr, cvSize );
ptr += cvSize;
dctx->langCode = xloc & 0x7F; dctx->langCode = xloc & 0x7F;
} }
@ -413,7 +430,7 @@ dict_getBlankTile( const DictionaryCtxt* dict )
} /* dict_getBlankTile */ } /* dict_getBlankTile */
XP_U16 XP_U16
dict_getTileValue( const DictionaryCtxt* dict, Tile tile ) dict_getTileValue( const DictionaryCtxt* dict, const Tile tile )
{ {
XP_ASSERT( !!dict ); XP_ASSERT( !!dict );
if ( (tile & TILE_VALUE_MASK) != tile ) { if ( (tile & TILE_VALUE_MASK) != tile ) {
@ -421,9 +438,11 @@ dict_getTileValue( const DictionaryCtxt* dict, Tile tile )
tile == dict_getBlankTile( dict ) ); tile == dict_getBlankTile( dict ) );
} }
XP_ASSERT( tile < dict->nFaces ); XP_ASSERT( tile < dict->nFaces );
tile *= 2; int offset = tile * (1 + dict->nBoardSizes);
XP_ASSERT( !!dict->countsAndValues ); XP_ASSERT( !!dict->countsAndValues );
return dict->countsAndValues[tile+1]; XP_U16 result = dict->countsAndValues[offset + dict->nBoardSizes];
/* XP_LOGFF( "(%d) => %d", tile, result ); */
return result;
} /* dict_getTileValue */ } /* dict_getTileValue */
static const XP_UCHAR* static const XP_UCHAR*
@ -479,22 +498,32 @@ dict_getNextTileString( const DictionaryCtxt* dict, Tile tile,
XP_U16 XP_U16
dict_numTilesForSize( const DictionaryCtxt* dict, Tile tile, XP_U16 nCols ) dict_numTilesForSize( const DictionaryCtxt* dict, Tile tile, XP_U16 nCols )
{ {
tile *= 2; XP_Bool matched = XP_FALSE;
XP_U16 count = dict->countsAndValues[tile]; int offset = tile * (1 + dict->nBoardSizes);
for ( int ii = 0; !matched && ii < dict->nBoardSizes; ++ii ) {
if ( nCols == dict->boardSizes[ii] ) { /* perfect match? */
offset += ii;
matched = XP_TRUE;
}
}
/* Wordlists are built assuming 15x15 boards. Different sized boards need XP_U16 count = dict->countsAndValues[offset];
different numbers of tiles. The wordlist might provide for the size we if ( !matched ) {
have. If not, let's adjust the count based on how many squares we have /* Older wordlists are built assuming 15x15 boards. Different sized
vs. 15x15. boards need different numbers of tiles. The wordlist might provide
for the size we have. If not, let's adjust the count based on how
many squares we have vs. 15x15.
*/ */
XP_U16 pct = (nCols * nCols * 100) / (15 * 15); XP_U16 pct = (nCols * nCols * 100) / (15 * 15);
XP_U16 newCount = count * pct / 100; XP_U16 newCount = count * pct / 100;
if ( 50 < (count * pct) % 100 ) { if ( 50 < (count * pct) % 100 ) {
++newCount; ++newCount;
} }
XP_LOGFF( "adjusted count %d to %d based on pct of %d", count, newCount, pct ); // XP_LOGFF( "adjusted count %d to %d based on pct of %d", count, newCount, pct );
count = newCount; count = newCount;
}
// XP_LOGFF( "(tile=%d, ncols=%d) => %d", tile, nCols, count );
return count; return count;
} /* dict_numTiles */ } /* dict_numTiles */
@ -702,6 +731,7 @@ dict_writeToStream( const DictionaryCtxt* dict, XWStreamCtxt* stream )
stream_putBits( stream, 6, dict->nFaces ); stream_putBits( stream, 6, dict->nFaces );
XP_ASSERT(0); /* if this fires, need to fix for per-boardSize counts */
for ( ii = 0; ii < dict->nFaces*2; ii+=2 ) { for ( ii = 0; ii < dict->nFaces*2; ii+=2 ) {
XP_U16 count, value; XP_U16 count, value;
@ -789,6 +819,7 @@ common_destructor( DictionaryCtxt* dict, XWEnv XP_UNUSED(xwe) )
void void
dict_loadFromStream( DictionaryCtxt* dict, XWEnv xwe, XWStreamCtxt* stream ) dict_loadFromStream( DictionaryCtxt* dict, XWEnv xwe, XWStreamCtxt* stream )
{ {
XP_ASSERT(0); /* if this fires, need to fix for per-boardSize counts */
XP_U8 nFaces, nFaceBytes; XP_U8 nFaces, nFaceBytes;
XP_U16 maxCountBits, maxValueBits; XP_U16 maxCountBits, maxValueBits;
XP_U16 ii, nSpecials; XP_U16 ii, nSpecials;
@ -1043,7 +1074,7 @@ make_stubbed_dict( MPFORMAL_NOCOMMA )
setBlankTile( dict ); setBlankTile( dict );
return dict; return dict;
} /* make_subbed_dict */ } /* make_stubbed_dict */
#endif /* STUBBED_DICT */ #endif /* STUBBED_DICT */

View file

@ -88,6 +88,9 @@ struct DictionaryCtxt {
const XP_UCHAR** facePtrs; /* elems point into faces, above */ const XP_UCHAR** facePtrs; /* elems point into faces, above */
XP_U8* countsAndValues; XP_U8* countsAndValues;
XP_U8 nBoardSizes;
XP_U8 boardSizes[2]; /* will be [15] or [15,21] for now */
SpecialBitmaps* bitmaps; SpecialBitmaps* bitmaps;
XP_UCHAR** chars; XP_UCHAR** chars;
XP_UCHAR** charEnds; XP_UCHAR** charEnds;

View file

@ -38,34 +38,40 @@ LANGINFO: will simply be excluded from the dictionary.</p>
# English==1. Low byte is padding # English==1. Low byte is padding
XLOC_HEADER:0x8100 XLOC_HEADER:0x8100
# COUNT_SIZES: Columns 2-n in the BEGIN_TILES section are for boards
# of what sizes? 15 is the default, and COUNT_SIZES is not needed if
# there's only one sizes column and it's for a 15x15 board. Having
# only one column that's for other than 15 is an error.
COUNT_SIZES: 15 21
<BEGIN_TILES> <BEGIN_TILES>
2 0 {"_"} {"_"} 0 2 4
9 1 'A|a' 'A|a' 1 9 16
2 3 'B|b' 'B|b' 3 2 4
2 3 'C|c' 'C|c' 3 2 6
4 2 'D|d' 'D|d' 2 4 8
12 1 'E|e' 'E|e' 1 12 24
2 4 'F|f' 'F|f' 4 2 4
3 2 'G|g' 'G|g' 2 3 5
2 4 'H|h' 'H|h' 4 2 5
9 1 'I|i' 'I|i' 1 9 13
1 8 'J|j' 'J|j' 8 1 2
1 5 'K|k' 'K|k' 5 1 2
4 1 'L|l' 'L|l' 1 4 7
2 3 'M|m' 'M|m' 3 2 6
6 1 'N|n' 'N|n' 1 6 13
8 1 'O|o' 'O|o' 1 8 15
2 3 'P|p' 'P|p' 3 2 4
1 10 'Q|q' 'Q|q' 10 1 2
6 1 'R|r' 'R|r' 1 6 13
4 1 'S|s' 'S|s' 1 4 10
6 1 'T|t' 'T|t' 1 6 15
4 1 'U|u' 'U|u' 1 4 7
2 4 'V|v' 'V|v' 4 2 3
2 4 'W|w' 'W|w' 4 2 4
1 8 'X|x' 'X|x' 8 1 2
2 4 'Y|y' 'Y|y' 4 2 4
1 10 'Z|z' 'Z|z' 10 1 2
<END_TILES> <END_TILES>
# should ignore all after the <END> above # should ignore all after the <END> above

View file

@ -220,7 +220,8 @@ endif
frankspecials.bin: ../frank_mkspecials.py $(BMPFILES) frankspecials.bin: ../frank_mkspecials.py $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@ $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin charcount.bin table.bin values.bin frankspecials.bin $(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin \
charcount.bin table.bin values.bin frankspecials.bin
cat $(XWLANG)$*_flags.bin $(XWLANG)$*_newheader.bin charcount.bin table.bin values.bin \ cat $(XWLANG)$*_flags.bin $(XWLANG)$*_newheader.bin charcount.bin table.bin values.bin \
frankspecials.bin $(XWLANG)StartLoc.bin \ frankspecials.bin $(XWLANG)StartLoc.bin \
$$(ls dawg$(XWLANG)$*_*.bin) > $@ $$(ls dawg$(XWLANG)$*_*.bin) > $@
@ -273,6 +274,9 @@ allbins:
$(MAKE) TARGET_TYPE=FRANK byodbins $(MAKE) TARGET_TYPE=FRANK byodbins
rm palmspecials.bin rm palmspecials.bin
boardSizes.bin: ../xloc.py info.txt
../xloc.py -bs -out $@
table.bin: ../xloc.py table.bin: ../xloc.py
ifdef NEWDAWG ifdef NEWDAWG
../xloc.py $(ENCP) -tn -out $@ ../xloc.py $(ENCP) -tn -out $@
@ -308,7 +312,7 @@ $(XWLANG)%_headerFlags.bin:
perl -e "print pack(\"n\",$$FLAGS)" > $@ perl -e "print pack(\"n\",$$FLAGS)" > $@
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \ $(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \
$(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin $(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin boardSizes.bin
SIZ=0; \ SIZ=0; \
for FILE in $+; do \ for FILE in $+; do \
SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \ SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \

View file

@ -54,12 +54,13 @@ def splitFaces( buf ):
return faces return faces
def loadCountsAndValues( fh, numFaces, data ): def loadCountsAndValues( fh, numFaces, nSizes, data ):
twoBytesFmt = struct.Struct('BB')
for ii in range(numFaces): for ii in range(numFaces):
pair = twoBytesFmt.unpack(fh.read(twoBytesFmt.size)) counts = []
data[ii]['count'] = int(pair[0]) for jj in range(nSizes):
data[ii]['val'] = int(pair[1]) counts.append(int.from_bytes(fh.read(1), 'little'))
data[ii]['counts'] = counts
data[ii]['val'] = int.from_bytes(fh.read(1), 'little')
def eatBitmap( fh ): def eatBitmap( fh ):
nCols = int(oneByteFmt.unpack(fh.read(oneByteFmt.size))[0]) nCols = int(oneByteFmt.unpack(fh.read(oneByteFmt.size))[0])
@ -142,6 +143,7 @@ def process(args):
with open(args.DAWG, "rb") as dawg: with open(args.DAWG, "rb") as dawg:
nWords = 0 nWords = 0
boardSizes = [15]
headerFmt = struct.Struct('!HH') headerFmt = struct.Struct('!HH')
(flags, headerLen) = headerFmt.unpack(dawg.read(headerFmt.size)) (flags, headerLen) = headerFmt.unpack(dawg.read(headerFmt.size))
@ -162,7 +164,21 @@ def process(args):
sys.exit(0) sys.exit(0)
md5Sum = getNullTermParam(header) md5Sum = getNullTermParam(header)
print( 'header: read sum: {}'.format(md5Sum), file=sys.stderr ) print( 'header: read sum: {}'.format(md5Sum), file=sys.stderr )
except:
# skip header flags
header.read(2)
print( 'header: skipped flags', file=sys.stderr)
nBoardSizes = int.from_bytes(header.read(1), 'big')
print( 'header: nBoardSizes: {}'.format(nBoardSizes), file=sys.stderr )
boardSizes = []
for ii in range(nBoardSizes):
siz = int.from_bytes(header.read(1), 'big')
boardSizes.append(siz)
print( 'header: read sizes: {}'.format(boardSizes), file=sys.stderr)
except Exception as ex:
print( 'header: exception!! {} '.format(ex) )
md5Sum = None md5Sum = None
if args.GET_SUM: if args.GET_SUM:
@ -214,7 +230,7 @@ def process(args):
langCode = 0x7F & oneByteFmt.unpack(dawg.read(oneByteFmt.size))[0] langCode = 0x7F & oneByteFmt.unpack(dawg.read(oneByteFmt.size))[0]
dawg.read( oneByteFmt.size ) # skip byte dawg.read( oneByteFmt.size ) # skip byte
loadCountsAndValues( dawg, numFaces, data ) loadCountsAndValues( dawg, numFaces, len(boardSizes), data )
loadSpecialData( dawg, data ) loadSpecialData( dawg, data )
offsetStruct = struct.Struct('!L') offsetStruct = struct.Struct('!L')

View file

@ -8,7 +8,10 @@ def errorOut(msg):
def mkParser(): def mkParser():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' ) parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' )
parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', help = 'output table file' ) parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true',
help = 'output table file' )
parser.add_argument('-bs', dest = 'DO_BOARDSIZE', action = 'store_true',
help = 'output boardSizes file' )
# parser.add_argument('-tn', dest = 'UNICODE', default = False, # parser.add_argument('-tn', dest = 'UNICODE', default = False,
# action = 'store_true', help = 'assume unicode') # action = 'store_true', help = 'assume unicode')
@ -53,8 +56,10 @@ def parseTileInfo(infoFile, encoding):
if sEndTiles.match(line): if sEndTiles.match(line):
break break
else: else:
(count, val, face) = line.split(None, 2) (face, val, counts) = line.split(None, 2)
result['_TILES'].append((count, val, face)) result['_TILES'].append({'counts': counts,
'val': val,
'face': face})
elif sBeginTiles.match(line): elif sBeginTiles.match(line):
inTiles = True inTiles = True
else: else:
@ -72,11 +77,11 @@ def printLetters( letters, outfile ):
outfile.write(letters.encode('utf8')) outfile.write(letters.encode('utf8'))
def writeMapFile(xlocToken, outfile): def writeMapFile(xlocToken, outfile):
print('writeMapFile()') print('writeMapFile(out={})'.format(outfile))
tiles = xlocToken['_TILES'] tiles = xlocToken['_TILES']
specialCount = 0 specialCount = 0
for tile in tiles: for tile in tiles:
face = tile[2] face = tile['face']
match = sSingleCharMatch.match(face) match = sSingleCharMatch.match(face)
if match: if match:
printLetters( match.group(1), outfile ) printLetters( match.group(1), outfile )
@ -94,13 +99,25 @@ def writeMapFile(xlocToken, outfile):
def writeValuesFile(xlocToken, outfile): def writeValuesFile(xlocToken, outfile):
header = xlocToken.get('XLOC_HEADER') or errorOut('no XLOC_HEADER found') header = xlocToken.get('XLOC_HEADER') or errorOut('no XLOC_HEADER found')
print('writing header: {}'.format(header)) print('writeValuesFile(out={}): writing header: {}'.format(outfile, header))
outfile.write(struct.pack('!H', int(header, 16))) outfile.write(struct.pack('!H', int(header, 16)))
nCounts = 0
for tile in xlocToken['_TILES']: for tile in xlocToken['_TILES']:
val = int(tile[0]) counts = tile['counts'].split()
count = int(tile[1]) assert nCounts == 0 or nCounts == len(counts)
outfile.write(struct.pack('BB', val, count)) nCounts = len(counts)
for count in counts:
outfile.write(struct.pack('B', int(count)))
val = int(tile['val'])
outfile.write(struct.pack('B', val))
def writeBoardSizesFile(xlocToken, outfile):
cs = xlocToken.get('COUNT_SIZES', '15').split()
outfile.write(struct.pack('B', len(cs)))
for siz in cs:
outfile.write(struct.pack('B', int(siz)))
def main(): def main():
print('{}.main {} called'.format(sys.argv[0], sys.argv[1:])) print('{}.main {} called'.format(sys.argv[0], sys.argv[1:]))
@ -127,6 +144,11 @@ def main():
with open(path, 'wb') as outfile: with open(path, 'wb') as outfile:
writeValuesFile( xlocToken, outfile ) writeValuesFile( xlocToken, outfile )
if args.DO_BOARDSIZE and args.OUTFILE:
with open(args.OUTFILE, 'wb') as outfile:
writeBoardSizesFile( xlocToken, outfile )
############################################################################## ##############################################################################
if __name__ == '__main__': if __name__ == '__main__':
main() main()