add board sizes to wordlist format (English only)

Modify language metadata to have possibly different counts of tiles for different board sizes. Make the necessary changes for loading such files. Works on linux version at least. Only English will build for now thanks to changes in info.txt format.
2025-01-29 08:34:37 +01:00 · 2022-03-14 18:28:08 +01:00 · 2022-03-14 18:28:08 +01:00 · cdc77eaf68
commit cdc77eaf68
parent cd4fb88a4d
6 changed files with 150 additions and 68 deletions
--- a/xwords4/common/dictnry.c
+++ b/xwords4/common/dictnry.c
@ -250,12 +250,12 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
            if ( 0 < headerLen ) {
                dctx->desc = getNullTermParam( dctx, &ptr, &headerLen );
            } else {
-                XP_LOGF( "%s: no note", __func__ );
+                XP_LOGFF( "no note" );
            }
            if ( 0 < headerLen ) {
                dctx->md5Sum = getNullTermParam( dctx, &ptr, &headerLen );
            } else {
-                XP_LOGF( "%s: no md5Sum", __func__ );
+                XP_LOGFF( "no md5Sum" );
            }

            XP_U16 headerFlags = 0;
@ -268,12 +268,27 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
            XP_LOGFF( "setting headerFlags: 0x%x", headerFlags );
            dctx->headerFlags = headerFlags;

+            if ( 0 < headerLen ) {
+                dctx->nBoardSizes = *ptr++;
+                XP_ASSERT( dctx->nBoardSizes <= VSIZE(dctx->boardSizes) );
+                for ( int ii = 0; ii < dctx->nBoardSizes; ++ii ) {
+                    dctx->boardSizes[ii] = *ptr++;
+                }
+                headerLen -= 1 + dctx->nBoardSizes;
+                XP_ASSERT( 0 <= headerLen );
+            }
+
            if ( 0 < headerLen ) {
                XP_LOGFF( "skipping %d bytes of header", headerLen );
            }
            ptr += headerLen;
        }

+        if ( 0 == dctx->nBoardSizes ) { /* wasn't provided */
+            dctx->boardSizes[0] = 15;
+            dctx->nBoardSizes = 1;
+        }
+
        if ( isUTF8 ) {
            numFaceBytes = *ptr++;
        }
@ -301,7 +316,8 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e

        dctx->nFaces = numFaces;

-        dctx->countsAndValues = XP_MALLOC( dctx->mpool, numFaces * 2 );
+        dctx->countsAndValues = XP_MALLOC( dctx->mpool,
+                                           numFaces * (1 + dctx->nBoardSizes) );
        XP_U16 facesSize = numFaceBytes;
        if ( !isUTF8 ) {
            facesSize /= 2;
@ -316,8 +332,9 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e
        unsigned short xloc;
        XP_MEMCPY( &xloc, ptr, sizeof(xloc) );
        ptr += sizeof(xloc);
-        XP_MEMCPY( dctx->countsAndValues, ptr, numFaces*2 );
-        ptr += numFaces*2;
+        size_t cvSize = numFaces * (1 + dctx->nBoardSizes);
+        XP_MEMCPY( dctx->countsAndValues, ptr, cvSize );
+        ptr += cvSize;

        dctx->langCode = xloc & 0x7F;
    }
@ -413,7 +430,7 @@ dict_getBlankTile( const DictionaryCtxt* dict )
 } /* dict_getBlankTile */

 XP_U16
-dict_getTileValue( const DictionaryCtxt* dict, Tile tile )
+dict_getTileValue( const DictionaryCtxt* dict, const Tile tile )
 {
    XP_ASSERT( !!dict );
    if ( (tile & TILE_VALUE_MASK) != tile ) {
@ -421,9 +438,11 @@ dict_getTileValue( const DictionaryCtxt* dict, Tile tile )
                   tile == dict_getBlankTile( dict ) );
    }
    XP_ASSERT( tile < dict->nFaces );
-    tile *= 2;
+    int offset = tile * (1 + dict->nBoardSizes);
    XP_ASSERT( !!dict->countsAndValues );
-    return dict->countsAndValues[tile+1];    
+    XP_U16 result = dict->countsAndValues[offset + dict->nBoardSizes];
+    /* XP_LOGFF( "(%d) => %d", tile, result ); */
+    return result;
 } /* dict_getTileValue */

 static const XP_UCHAR*
@ -479,22 +498,32 @@ dict_getNextTileString( const DictionaryCtxt* dict, Tile tile,
 XP_U16
 dict_numTilesForSize( const DictionaryCtxt* dict, Tile tile, XP_U16 nCols )
 {
-    tile *= 2;
-    XP_U16 count = dict->countsAndValues[tile];
-
-    /* Wordlists are built assuming 15x15  boards. Different sized boards need
-       different numbers of tiles. The wordlist  might provide for the size we
-       have. If not, let's adjust the count based on how many squares we have
-       vs. 15x15.
-    */
-    XP_U16 pct = (nCols * nCols * 100) / (15 * 15);
-    XP_U16 newCount = count * pct / 100;
-    if ( 50 < (count * pct) % 100 ) {
-        ++newCount;
+    XP_Bool matched = XP_FALSE;
+    int offset = tile * (1 + dict->nBoardSizes);
+    for ( int ii = 0; !matched && ii < dict->nBoardSizes; ++ii ) {
+        if ( nCols == dict->boardSizes[ii] ) { /* perfect match? */
+            offset += ii;
+            matched = XP_TRUE;
+        }
    }
-    XP_LOGFF( "adjusted count %d to %d based on pct of %d", count, newCount, pct );
-    count = newCount;

+    XP_U16 count = dict->countsAndValues[offset];
+    if ( !matched ) {
+        /* Older wordlists are built assuming 15x15 boards. Different sized
+           boards need different numbers of tiles. The wordlist might provide
+           for the size we have. If not, let's adjust the count based on how
+           many squares we have vs. 15x15.
+        */
+        XP_U16 pct = (nCols * nCols * 100) / (15 * 15);
+        XP_U16 newCount = count * pct / 100;
+        if ( 50 < (count * pct) % 100 ) {
+            ++newCount;
+        }
+        // XP_LOGFF( "adjusted count %d to %d based on pct of %d", count, newCount, pct );
+        count = newCount;
+    }
+
+    // XP_LOGFF( "(tile=%d, ncols=%d) => %d", tile, nCols, count );
    return count;
 } /* dict_numTiles */

@ -702,6 +731,7 @@ dict_writeToStream( const DictionaryCtxt* dict, XWStreamCtxt* stream )

    stream_putBits( stream, 6, dict->nFaces );

+    XP_ASSERT(0);       /* if this fires, need to fix for per-boardSize counts */
    for ( ii = 0; ii < dict->nFaces*2; ii+=2 ) {
        XP_U16 count, value;

@ -789,6 +819,7 @@ common_destructor( DictionaryCtxt* dict, XWEnv XP_UNUSED(xwe) )
 void
 dict_loadFromStream( DictionaryCtxt* dict, XWEnv xwe, XWStreamCtxt* stream )
 {
+    XP_ASSERT(0);       /* if this fires, need to fix for per-boardSize counts */
    XP_U8 nFaces, nFaceBytes;
    XP_U16 maxCountBits, maxValueBits;
    XP_U16 ii, nSpecials;
@ -1043,7 +1074,7 @@ make_stubbed_dict( MPFORMAL_NOCOMMA )
    setBlankTile( dict );

    return dict;
-} /* make_subbed_dict */
+} /* make_stubbed_dict */

 #endif /* STUBBED_DICT */

--- a/xwords4/common/dictnry.h
+++ b/xwords4/common/dictnry.h
@ -88,6 +88,9 @@ struct DictionaryCtxt {
    const XP_UCHAR** facePtrs;  /* elems point into faces, above */
    XP_U8* countsAndValues;

+    XP_U8 nBoardSizes;
+    XP_U8 boardSizes[2];        /* will be [15] or [15,21] for now */
+
    SpecialBitmaps* bitmaps;
    XP_UCHAR** chars;
    XP_UCHAR** charEnds;
--- a/xwords4/dawg/English/info.txt
+++ b/xwords4/dawg/English/info.txt
@ -38,34 +38,40 @@ LANGINFO: will simply be excluded from the dictionary.</p>
 # English==1.  Low byte is padding
 XLOC_HEADER:0x8100

+# COUNT_SIZES: Columns 2-n in the BEGIN_TILES section are for boards
+# of what sizes? 15 is the default, and COUNT_SIZES is not needed if
+# there's only one sizes column and it's for a 15x15 board. Having
+# only one column that's for other than 15 is an error.
+COUNT_SIZES: 15 21
+
 <BEGIN_TILES>
-2			0		{"_"}
-9			1		'A|a'
-2			3		'B|b'
-2			3		'C|c'
-4			2		'D|d'
-12			1		'E|e'
-2			4		'F|f'
-3			2		'G|g'
-2			4		'H|h'
-9			1		'I|i'
-1			8		'J|j'
-1			5		'K|k'
-4			1		'L|l'
-2			3		'M|m'
-6			1		'N|n'
-8			1		'O|o'
-2			3		'P|p'
-1			10		'Q|q'
-6			1		'R|r'
-4			1		'S|s'
-6			1		'T|t'
-4			1		'U|u'
-2			4		'V|v'
-2			4		'W|w'
-1			8		'X|x'
-2			4		'Y|y'
-1			10		'Z|z'
+{"_"}		0		 2		4
+'A|a'		1		 9		16
+'B|b'		3		 2		4
+'C|c'		3		 2		6
+'D|d'		2		 4		8
+'E|e'		1		 12		24
+'F|f'		4		 2		4
+'G|g'		2		 3		5
+'H|h'		4		 2		5
+'I|i'		1		 9		13
+'J|j'		8		 1		2
+'K|k'		5		 1		2
+'L|l'		1		 4		7
+'M|m'		3		 2		6
+'N|n'		1		 6		13
+'O|o'		1		 8		15
+'P|p'		3		 2		4
+'Q|q'		10		 1		2
+'R|r'		1		 6		13
+'S|s'		1		 4		10
+'T|t'		1		 6		15
+'U|u'		1		 4		7
+'V|v'		4		 2		3
+'W|w'		4		 2		4
+'X|x'		8		 1		2
+'Y|y'		4		 2		4
+'Z|z'		10		 1		2
 <END_TILES>

 # should ignore all after the <END> above
--- a/xwords4/dawg/Makefile.langcommon
+++ b/xwords4/dawg/Makefile.langcommon
@ -220,7 +220,8 @@ endif
 frankspecials.bin: ../frank_mkspecials.py  $(BMPFILES)
 	$< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@

-$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin charcount.bin table.bin values.bin frankspecials.bin 
+$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin \
+		charcount.bin table.bin values.bin frankspecials.bin
 	cat $(XWLANG)$*_flags.bin $(XWLANG)$*_newheader.bin charcount.bin table.bin values.bin \
 		frankspecials.bin $(XWLANG)StartLoc.bin  \
 		$$(ls dawg$(XWLANG)$*_*.bin) > $@
@ -273,6 +274,9 @@ allbins:
 	$(MAKE) TARGET_TYPE=FRANK byodbins
 	rm palmspecials.bin

+boardSizes.bin: ../xloc.py info.txt
+	../xloc.py -bs -out $@
+
 table.bin:  ../xloc.py
 ifdef NEWDAWG
 	../xloc.py $(ENCP) -tn -out $@
@ -308,7 +312,7 @@ $(XWLANG)%_headerFlags.bin:
 	perl -e "print pack(\"n\",$$FLAGS)" > $@

 $(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \
-		$(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin
+		$(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin boardSizes.bin
 	SIZ=0; \
 	for FILE in $+; do \
 		SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \
--- a/xwords4/dawg/dawg2dict.py
+++ b/xwords4/dawg/dawg2dict.py
@ -54,12 +54,13 @@ def splitFaces( buf ):

    return faces

-def loadCountsAndValues( fh, numFaces, data ):
-    twoBytesFmt = struct.Struct('BB')
+def loadCountsAndValues( fh, numFaces, nSizes, data ):
    for ii in range(numFaces):
-        pair = twoBytesFmt.unpack(fh.read(twoBytesFmt.size))
-        data[ii]['count'] = int(pair[0])
-        data[ii]['val'] = int(pair[1])
+        counts = []
+        for jj in range(nSizes):
+            counts.append(int.from_bytes(fh.read(1), 'little'))
+        data[ii]['counts'] = counts
+        data[ii]['val'] = int.from_bytes(fh.read(1), 'little')

 def eatBitmap( fh ):
    nCols = int(oneByteFmt.unpack(fh.read(oneByteFmt.size))[0])
@ -142,6 +143,7 @@ def process(args):

    with open(args.DAWG, "rb") as dawg:
        nWords = 0
+        boardSizes = [15]

        headerFmt = struct.Struct('!HH')
        (flags, headerLen) = headerFmt.unpack(dawg.read(headerFmt.size))
@ -162,7 +164,21 @@ def process(args):
                    sys.exit(0)
                md5Sum = getNullTermParam(header)
                print( 'header: read sum: {}'.format(md5Sum), file=sys.stderr )
-            except:
+
+                # skip header flags
+                header.read(2)
+                print( 'header: skipped flags', file=sys.stderr)
+
+                nBoardSizes = int.from_bytes(header.read(1), 'big')
+                print( 'header: nBoardSizes: {}'.format(nBoardSizes), file=sys.stderr )
+                boardSizes = []
+                for ii in range(nBoardSizes):
+                    siz = int.from_bytes(header.read(1), 'big')
+                    boardSizes.append(siz)
+                print( 'header: read sizes: {}'.format(boardSizes), file=sys.stderr)
+
+            except Exception as ex:
+                print( 'header: exception!! {} '.format(ex) )
                md5Sum = None

            if args.GET_SUM:
@ -214,7 +230,7 @@ def process(args):
        langCode = 0x7F & oneByteFmt.unpack(dawg.read(oneByteFmt.size))[0]
        dawg.read( oneByteFmt.size ) # skip byte

-        loadCountsAndValues( dawg, numFaces, data )
+        loadCountsAndValues( dawg, numFaces, len(boardSizes), data )
        loadSpecialData( dawg, data )

        offsetStruct = struct.Struct('!L')
--- a/xwords4/dawg/xloc.py
+++ b/xwords4/dawg/xloc.py
@ -8,7 +8,10 @@ def errorOut(msg):
 def mkParser():
    parser = argparse.ArgumentParser()
    parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' )
-    parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', help = 'output table file' )
+    parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true',
+                        help = 'output table file' )
+    parser.add_argument('-bs', dest = 'DO_BOARDSIZE', action = 'store_true',
+                        help = 'output boardSizes file' )

    # parser.add_argument('-tn', dest = 'UNICODE', default = False,
    #                     action = 'store_true', help = 'assume unicode')
@ -53,8 +56,10 @@ def parseTileInfo(infoFile, encoding):
                if sEndTiles.match(line):
                    break
                else:
-                    (count, val, face) = line.split(None, 2)
-                    result['_TILES'].append((count, val, face))
+                    (face, val, counts) = line.split(None, 2)
+                    result['_TILES'].append({'counts': counts,
+                                             'val': val,
+                                             'face': face})
            elif sBeginTiles.match(line):
                inTiles = True
            else:
@ -72,11 +77,11 @@ def printLetters( letters, outfile ):
    outfile.write(letters.encode('utf8'))

 def writeMapFile(xlocToken, outfile):
-    print('writeMapFile()')
+    print('writeMapFile(out={})'.format(outfile))
    tiles = xlocToken['_TILES']
    specialCount = 0
    for tile in tiles:
-        face = tile[2]
+        face = tile['face']
        match = sSingleCharMatch.match(face)
        if match:
            printLetters( match.group(1), outfile )
@ -94,13 +99,25 @@ def writeMapFile(xlocToken, outfile):
 def writeValuesFile(xlocToken, outfile):
    header = xlocToken.get('XLOC_HEADER') or errorOut('no XLOC_HEADER found')

-    print('writing header: {}'.format(header))
+    print('writeValuesFile(out={}): writing header: {}'.format(outfile, header))
    outfile.write(struct.pack('!H', int(header, 16)))

+    nCounts = 0
    for tile in xlocToken['_TILES']:
-        val = int(tile[0])
-        count = int(tile[1])
-        outfile.write(struct.pack('BB', val, count))
+        counts = tile['counts'].split()
+        assert nCounts == 0 or nCounts == len(counts)
+        nCounts = len(counts)
+        for count in counts:
+            outfile.write(struct.pack('B', int(count)))
+
+        val = int(tile['val'])
+        outfile.write(struct.pack('B', val))
+
+def writeBoardSizesFile(xlocToken, outfile):
+    cs = xlocToken.get('COUNT_SIZES', '15').split()
+    outfile.write(struct.pack('B', len(cs)))
+    for siz in cs:
+        outfile.write(struct.pack('B', int(siz)))

 def main():
    print('{}.main {} called'.format(sys.argv[0], sys.argv[1:]))
@ -127,6 +144,11 @@ def main():
        with open(path, 'wb') as outfile:
            writeValuesFile( xlocToken, outfile )

+    if args.DO_BOARDSIZE and args.OUTFILE:
+        with open(args.OUTFILE, 'wb') as outfile:
+            writeBoardSizesFile( xlocToken, outfile )
+
+
 ##############################################################################
 if __name__ == '__main__':
    main()