add board sizes to wordlist format (English only)

Modify language metadata to have possibly different counts of tiles
for different board sizes. Make the necessary changes for loading such
files. Works on linux version at least. Only English will build for
now thanks to changes in info.txt format.
This commit is contained in:
Eric House 2022-03-14 18:28:08 +01:00
parent cd4fb88a4d
commit cdc77eaf68
6 changed files with 150 additions and 68 deletions

View file

@ -250,12 +250,12 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
if ( 0 < headerLen ) {
dctx->desc = getNullTermParam( dctx, &ptr, &headerLen );
} else {
XP_LOGF( "%s: no note", __func__ );
XP_LOGFF( "no note" );
}
if ( 0 < headerLen ) {
dctx->md5Sum = getNullTermParam( dctx, &ptr, &headerLen );
} else {
XP_LOGF( "%s: no md5Sum", __func__ );
XP_LOGFF( "no md5Sum" );
}
XP_U16 headerFlags = 0;
@ -268,12 +268,27 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
XP_LOGFF( "setting headerFlags: 0x%x", headerFlags );
dctx->headerFlags = headerFlags;
if ( 0 < headerLen ) {
dctx->nBoardSizes = *ptr++;
XP_ASSERT( dctx->nBoardSizes <= VSIZE(dctx->boardSizes) );
for ( int ii = 0; ii < dctx->nBoardSizes; ++ii ) {
dctx->boardSizes[ii] = *ptr++;
}
headerLen -= 1 + dctx->nBoardSizes;
XP_ASSERT( 0 <= headerLen );
}
if ( 0 < headerLen ) {
XP_LOGFF( "skipping %d bytes of header", headerLen );
}
ptr += headerLen;
}
if ( 0 == dctx->nBoardSizes ) { /* wasn't provided */
dctx->boardSizes[0] = 15;
dctx->nBoardSizes = 1;
}
if ( isUTF8 ) {
numFaceBytes = *ptr++;
}
@ -301,7 +316,8 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
dctx->nFaces = numFaces;
dctx->countsAndValues = XP_MALLOC( dctx->mpool, numFaces * 2 );
dctx->countsAndValues = XP_MALLOC( dctx->mpool,
numFaces * (1 + dctx->nBoardSizes) );
XP_U16 facesSize = numFaceBytes;
if ( !isUTF8 ) {
facesSize /= 2;
@ -316,8 +332,9 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
unsigned short xloc;
XP_MEMCPY( &xloc, ptr, sizeof(xloc) );
ptr += sizeof(xloc);
XP_MEMCPY( dctx->countsAndValues, ptr, numFaces*2 );
ptr += numFaces*2;
size_t cvSize = numFaces * (1 + dctx->nBoardSizes);
XP_MEMCPY( dctx->countsAndValues, ptr, cvSize );
ptr += cvSize;
dctx->langCode = xloc & 0x7F;
}
@ -413,7 +430,7 @@ dict_getBlankTile( const DictionaryCtxt* dict )
} /* dict_getBlankTile */
XP_U16
dict_getTileValue( const DictionaryCtxt* dict, Tile tile )
dict_getTileValue( const DictionaryCtxt* dict, const Tile tile )
{
XP_ASSERT( !!dict );
if ( (tile & TILE_VALUE_MASK) != tile ) {
@ -421,9 +438,11 @@ dict_getTileValue( const DictionaryCtxt* dict, Tile tile )
tile == dict_getBlankTile( dict ) );
}
XP_ASSERT( tile < dict->nFaces );
tile *= 2;
int offset = tile * (1 + dict->nBoardSizes);
XP_ASSERT( !!dict->countsAndValues );
return dict->countsAndValues[tile+1];
XP_U16 result = dict->countsAndValues[offset + dict->nBoardSizes];
/* XP_LOGFF( "(%d) => %d", tile, result ); */
return result;
} /* dict_getTileValue */
static const XP_UCHAR*
@ -479,22 +498,32 @@ dict_getNextTileString( const DictionaryCtxt* dict, Tile tile,
XP_U16
dict_numTilesForSize( const DictionaryCtxt* dict, Tile tile, XP_U16 nCols )
{
tile *= 2;
XP_U16 count = dict->countsAndValues[tile];
XP_Bool matched = XP_FALSE;
int offset = tile * (1 + dict->nBoardSizes);
for ( int ii = 0; !matched && ii < dict->nBoardSizes; ++ii ) {
if ( nCols == dict->boardSizes[ii] ) { /* perfect match? */
offset += ii;
matched = XP_TRUE;
}
}
/* Wordlists are built assuming 15x15 boards. Different sized boards need
different numbers of tiles. The wordlist might provide for the size we
have. If not, let's adjust the count based on how many squares we have
vs. 15x15.
XP_U16 count = dict->countsAndValues[offset];
if ( !matched ) {
/* Older wordlists are built assuming 15x15 boards. Different sized
boards need different numbers of tiles. The wordlist might provide
for the size we have. If not, let's adjust the count based on how
many squares we have vs. 15x15.
*/
XP_U16 pct = (nCols * nCols * 100) / (15 * 15);
XP_U16 newCount = count * pct / 100;
if ( 50 < (count * pct) % 100 ) {
++newCount;
}
XP_LOGFF( "adjusted count %d to %d based on pct of %d", count, newCount, pct );
// XP_LOGFF( "adjusted count %d to %d based on pct of %d", count, newCount, pct );
count = newCount;
}
// XP_LOGFF( "(tile=%d, ncols=%d) => %d", tile, nCols, count );
return count;
} /* dict_numTiles */
@ -702,6 +731,7 @@ dict_writeToStream( const DictionaryCtxt* dict, XWStreamCtxt* stream )
stream_putBits( stream, 6, dict->nFaces );
XP_ASSERT(0); /* if this fires, need to fix for per-boardSize counts */
for ( ii = 0; ii < dict->nFaces*2; ii+=2 ) {
XP_U16 count, value;
@ -789,6 +819,7 @@ common_destructor( DictionaryCtxt* dict, XWEnv XP_UNUSED(xwe) )
void
dict_loadFromStream( DictionaryCtxt* dict, XWEnv xwe, XWStreamCtxt* stream )
{
XP_ASSERT(0); /* if this fires, need to fix for per-boardSize counts */
XP_U8 nFaces, nFaceBytes;
XP_U16 maxCountBits, maxValueBits;
XP_U16 ii, nSpecials;
@ -1043,7 +1074,7 @@ make_stubbed_dict( MPFORMAL_NOCOMMA )
setBlankTile( dict );
return dict;
} /* make_subbed_dict */
} /* make_stubbed_dict */
#endif /* STUBBED_DICT */

View file

@ -88,6 +88,9 @@ struct DictionaryCtxt {
const XP_UCHAR** facePtrs; /* elems point into faces, above */
XP_U8* countsAndValues;
XP_U8 nBoardSizes;
XP_U8 boardSizes[2]; /* will be [15] or [15,21] for now */
SpecialBitmaps* bitmaps;
XP_UCHAR** chars;
XP_UCHAR** charEnds;

View file

@ -38,34 +38,40 @@ LANGINFO: will simply be excluded from the dictionary.</p>
# English==1. Low byte is padding
XLOC_HEADER:0x8100
# COUNT_SIZES: Columns 2-n in the BEGIN_TILES section are for boards
# of what sizes? 15 is the default, and COUNT_SIZES is not needed if
# there's only one sizes column and it's for a 15x15 board. Having
# only one column that's for other than 15 is an error.
COUNT_SIZES: 15 21
<BEGIN_TILES>
2 0 {"_"}
9 1 'A|a'
2 3 'B|b'
2 3 'C|c'
4 2 'D|d'
12 1 'E|e'
2 4 'F|f'
3 2 'G|g'
2 4 'H|h'
9 1 'I|i'
1 8 'J|j'
1 5 'K|k'
4 1 'L|l'
2 3 'M|m'
6 1 'N|n'
8 1 'O|o'
2 3 'P|p'
1 10 'Q|q'
6 1 'R|r'
4 1 'S|s'
6 1 'T|t'
4 1 'U|u'
2 4 'V|v'
2 4 'W|w'
1 8 'X|x'
2 4 'Y|y'
1 10 'Z|z'
{"_"} 0 2 4
'A|a' 1 9 16
'B|b' 3 2 4
'C|c' 3 2 6
'D|d' 2 4 8
'E|e' 1 12 24
'F|f' 4 2 4
'G|g' 2 3 5
'H|h' 4 2 5
'I|i' 1 9 13
'J|j' 8 1 2
'K|k' 5 1 2
'L|l' 1 4 7
'M|m' 3 2 6
'N|n' 1 6 13
'O|o' 1 8 15
'P|p' 3 2 4
'Q|q' 10 1 2
'R|r' 1 6 13
'S|s' 1 4 10
'T|t' 1 6 15
'U|u' 1 4 7
'V|v' 4 2 3
'W|w' 4 2 4
'X|x' 8 1 2
'Y|y' 4 2 4
'Z|z' 10 1 2
<END_TILES>
# should ignore all after the <END> above

View file

@ -220,7 +220,8 @@ endif
frankspecials.bin: ../frank_mkspecials.py $(BMPFILES)
$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin charcount.bin table.bin values.bin frankspecials.bin
$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin \
charcount.bin table.bin values.bin frankspecials.bin
cat $(XWLANG)$*_flags.bin $(XWLANG)$*_newheader.bin charcount.bin table.bin values.bin \
frankspecials.bin $(XWLANG)StartLoc.bin \
$$(ls dawg$(XWLANG)$*_*.bin) > $@
@ -273,6 +274,9 @@ allbins:
$(MAKE) TARGET_TYPE=FRANK byodbins
rm palmspecials.bin
boardSizes.bin: ../xloc.py info.txt
../xloc.py -bs -out $@
table.bin: ../xloc.py
ifdef NEWDAWG
../xloc.py $(ENCP) -tn -out $@
@ -308,7 +312,7 @@ $(XWLANG)%_headerFlags.bin:
perl -e "print pack(\"n\",$$FLAGS)" > $@
$(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \
$(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin
$(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin boardSizes.bin
SIZ=0; \
for FILE in $+; do \
SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \

View file

@ -54,12 +54,13 @@ def splitFaces( buf ):
return faces
def loadCountsAndValues( fh, numFaces, data ):
twoBytesFmt = struct.Struct('BB')
def loadCountsAndValues( fh, numFaces, nSizes, data ):
for ii in range(numFaces):
pair = twoBytesFmt.unpack(fh.read(twoBytesFmt.size))
data[ii]['count'] = int(pair[0])
data[ii]['val'] = int(pair[1])
counts = []
for jj in range(nSizes):
counts.append(int.from_bytes(fh.read(1), 'little'))
data[ii]['counts'] = counts
data[ii]['val'] = int.from_bytes(fh.read(1), 'little')
def eatBitmap( fh ):
nCols = int(oneByteFmt.unpack(fh.read(oneByteFmt.size))[0])
@ -142,6 +143,7 @@ def process(args):
with open(args.DAWG, "rb") as dawg:
nWords = 0
boardSizes = [15]
headerFmt = struct.Struct('!HH')
(flags, headerLen) = headerFmt.unpack(dawg.read(headerFmt.size))
@ -162,7 +164,21 @@ def process(args):
sys.exit(0)
md5Sum = getNullTermParam(header)
print( 'header: read sum: {}'.format(md5Sum), file=sys.stderr )
except:
# skip header flags
header.read(2)
print( 'header: skipped flags', file=sys.stderr)
nBoardSizes = int.from_bytes(header.read(1), 'big')
print( 'header: nBoardSizes: {}'.format(nBoardSizes), file=sys.stderr )
boardSizes = []
for ii in range(nBoardSizes):
siz = int.from_bytes(header.read(1), 'big')
boardSizes.append(siz)
print( 'header: read sizes: {}'.format(boardSizes), file=sys.stderr)
except Exception as ex:
print( 'header: exception!! {} '.format(ex) )
md5Sum = None
if args.GET_SUM:
@ -214,7 +230,7 @@ def process(args):
langCode = 0x7F & oneByteFmt.unpack(dawg.read(oneByteFmt.size))[0]
dawg.read( oneByteFmt.size ) # skip byte
loadCountsAndValues( dawg, numFaces, data )
loadCountsAndValues( dawg, numFaces, len(boardSizes), data )
loadSpecialData( dawg, data )
offsetStruct = struct.Struct('!L')

View file

@ -8,7 +8,10 @@ def errorOut(msg):
def mkParser():
parser = argparse.ArgumentParser()
parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' )
parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', help = 'output table file' )
parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true',
help = 'output table file' )
parser.add_argument('-bs', dest = 'DO_BOARDSIZE', action = 'store_true',
help = 'output boardSizes file' )
# parser.add_argument('-tn', dest = 'UNICODE', default = False,
# action = 'store_true', help = 'assume unicode')
@ -53,8 +56,10 @@ def parseTileInfo(infoFile, encoding):
if sEndTiles.match(line):
break
else:
(count, val, face) = line.split(None, 2)
result['_TILES'].append((count, val, face))
(face, val, counts) = line.split(None, 2)
result['_TILES'].append({'counts': counts,
'val': val,
'face': face})
elif sBeginTiles.match(line):
inTiles = True
else:
@ -72,11 +77,11 @@ def printLetters( letters, outfile ):
outfile.write(letters.encode('utf8'))
def writeMapFile(xlocToken, outfile):
print('writeMapFile()')
print('writeMapFile(out={})'.format(outfile))
tiles = xlocToken['_TILES']
specialCount = 0
for tile in tiles:
face = tile[2]
face = tile['face']
match = sSingleCharMatch.match(face)
if match:
printLetters( match.group(1), outfile )
@ -94,13 +99,25 @@ def writeMapFile(xlocToken, outfile):
def writeValuesFile(xlocToken, outfile):
header = xlocToken.get('XLOC_HEADER') or errorOut('no XLOC_HEADER found')
print('writing header: {}'.format(header))
print('writeValuesFile(out={}): writing header: {}'.format(outfile, header))
outfile.write(struct.pack('!H', int(header, 16)))
nCounts = 0
for tile in xlocToken['_TILES']:
val = int(tile[0])
count = int(tile[1])
outfile.write(struct.pack('BB', val, count))
counts = tile['counts'].split()
assert nCounts == 0 or nCounts == len(counts)
nCounts = len(counts)
for count in counts:
outfile.write(struct.pack('B', int(count)))
val = int(tile['val'])
outfile.write(struct.pack('B', val))
def writeBoardSizesFile(xlocToken, outfile):
cs = xlocToken.get('COUNT_SIZES', '15').split()
outfile.write(struct.pack('B', len(cs)))
for siz in cs:
outfile.write(struct.pack('B', int(siz)))
def main():
print('{}.main {} called'.format(sys.argv[0], sys.argv[1:]))
@ -127,6 +144,11 @@ def main():
with open(path, 'wb') as outfile:
writeValuesFile( xlocToken, outfile )
if args.DO_BOARDSIZE and args.OUTFILE:
with open(args.OUTFILE, 'wb') as outfile:
writeBoardSizesFile( xlocToken, outfile )
##############################################################################
if __name__ == '__main__':
main()