diff --git a/xwords4/common/dictnry.c b/xwords4/common/dictnry.c index afaad90a3..8d00473a4 100644 --- a/xwords4/common/dictnry.c +++ b/xwords4/common/dictnry.c @@ -250,12 +250,12 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e if ( 0 < headerLen ) { dctx->desc = getNullTermParam( dctx, &ptr, &headerLen ); } else { - XP_LOGF( "%s: no note", __func__ ); + XP_LOGFF( "no note" ); } if ( 0 < headerLen ) { dctx->md5Sum = getNullTermParam( dctx, &ptr, &headerLen ); } else { - XP_LOGF( "%s: no md5Sum", __func__ ); + XP_LOGFF( "no md5Sum" ); } XP_U16 headerFlags = 0; @@ -268,12 +268,27 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e XP_LOGFF( "setting headerFlags: 0x%x", headerFlags ); dctx->headerFlags = headerFlags; + if ( 0 < headerLen ) { + dctx->nBoardSizes = *ptr++; + XP_ASSERT( dctx->nBoardSizes <= VSIZE(dctx->boardSizes) ); + for ( int ii = 0; ii < dctx->nBoardSizes; ++ii ) { + dctx->boardSizes[ii] = *ptr++; + } + headerLen -= 1 + dctx->nBoardSizes; + XP_ASSERT( 0 <= headerLen ); + } + if ( 0 < headerLen ) { XP_LOGFF( "skipping %d bytes of header", headerLen ); } ptr += headerLen; } + if ( 0 == dctx->nBoardSizes ) { /* wasn't provided */ + dctx->boardSizes[0] = 15; + dctx->nBoardSizes = 1; + } + if ( isUTF8 ) { numFaceBytes = *ptr++; } @@ -301,7 +316,8 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e dctx->nFaces = numFaces; - dctx->countsAndValues = XP_MALLOC( dctx->mpool, numFaces * 2 ); + dctx->countsAndValues = XP_MALLOC( dctx->mpool, + numFaces * (1 + dctx->nBoardSizes) ); XP_U16 facesSize = numFaceBytes; if ( !isUTF8 ) { facesSize /= 2; @@ -316,8 +332,9 @@ parseCommon( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8** ptrp, const XP_U8* e unsigned short xloc; XP_MEMCPY( &xloc, ptr, sizeof(xloc) ); ptr += sizeof(xloc); - XP_MEMCPY( dctx->countsAndValues, ptr, numFaces*2 ); - ptr += numFaces*2; + size_t cvSize = numFaces * (1 + dctx->nBoardSizes); + XP_MEMCPY( dctx->countsAndValues, ptr, cvSize ); + ptr += cvSize; dctx->langCode = xloc & 0x7F; } @@ -413,7 +430,7 @@ dict_getBlankTile( const DictionaryCtxt* dict ) } /* dict_getBlankTile */ XP_U16 -dict_getTileValue( const DictionaryCtxt* dict, Tile tile ) +dict_getTileValue( const DictionaryCtxt* dict, const Tile tile ) { XP_ASSERT( !!dict ); if ( (tile & TILE_VALUE_MASK) != tile ) { @@ -421,9 +438,11 @@ dict_getTileValue( const DictionaryCtxt* dict, Tile tile ) tile == dict_getBlankTile( dict ) ); } XP_ASSERT( tile < dict->nFaces ); - tile *= 2; + int offset = tile * (1 + dict->nBoardSizes); XP_ASSERT( !!dict->countsAndValues ); - return dict->countsAndValues[tile+1]; + XP_U16 result = dict->countsAndValues[offset + dict->nBoardSizes]; + /* XP_LOGFF( "(%d) => %d", tile, result ); */ + return result; } /* dict_getTileValue */ static const XP_UCHAR* @@ -479,22 +498,32 @@ dict_getNextTileString( const DictionaryCtxt* dict, Tile tile, XP_U16 dict_numTilesForSize( const DictionaryCtxt* dict, Tile tile, XP_U16 nCols ) { - tile *= 2; - XP_U16 count = dict->countsAndValues[tile]; - - /* Wordlists are built assuming 15x15 boards. Different sized boards need - different numbers of tiles. The wordlist might provide for the size we - have. If not, let's adjust the count based on how many squares we have - vs. 15x15. - */ - XP_U16 pct = (nCols * nCols * 100) / (15 * 15); - XP_U16 newCount = count * pct / 100; - if ( 50 < (count * pct) % 100 ) { - ++newCount; + XP_Bool matched = XP_FALSE; + int offset = tile * (1 + dict->nBoardSizes); + for ( int ii = 0; !matched && ii < dict->nBoardSizes; ++ii ) { + if ( nCols == dict->boardSizes[ii] ) { /* perfect match? */ + offset += ii; + matched = XP_TRUE; + } } - XP_LOGFF( "adjusted count %d to %d based on pct of %d", count, newCount, pct ); - count = newCount; + XP_U16 count = dict->countsAndValues[offset]; + if ( !matched ) { + /* Older wordlists are built assuming 15x15 boards. Different sized + boards need different numbers of tiles. The wordlist might provide + for the size we have. If not, let's adjust the count based on how + many squares we have vs. 15x15. + */ + XP_U16 pct = (nCols * nCols * 100) / (15 * 15); + XP_U16 newCount = count * pct / 100; + if ( 50 < (count * pct) % 100 ) { + ++newCount; + } + // XP_LOGFF( "adjusted count %d to %d based on pct of %d", count, newCount, pct ); + count = newCount; + } + + // XP_LOGFF( "(tile=%d, ncols=%d) => %d", tile, nCols, count ); return count; } /* dict_numTiles */ @@ -702,6 +731,7 @@ dict_writeToStream( const DictionaryCtxt* dict, XWStreamCtxt* stream ) stream_putBits( stream, 6, dict->nFaces ); + XP_ASSERT(0); /* if this fires, need to fix for per-boardSize counts */ for ( ii = 0; ii < dict->nFaces*2; ii+=2 ) { XP_U16 count, value; @@ -789,6 +819,7 @@ common_destructor( DictionaryCtxt* dict, XWEnv XP_UNUSED(xwe) ) void dict_loadFromStream( DictionaryCtxt* dict, XWEnv xwe, XWStreamCtxt* stream ) { + XP_ASSERT(0); /* if this fires, need to fix for per-boardSize counts */ XP_U8 nFaces, nFaceBytes; XP_U16 maxCountBits, maxValueBits; XP_U16 ii, nSpecials; @@ -1043,7 +1074,7 @@ make_stubbed_dict( MPFORMAL_NOCOMMA ) setBlankTile( dict ); return dict; -} /* make_subbed_dict */ +} /* make_stubbed_dict */ #endif /* STUBBED_DICT */ diff --git a/xwords4/common/dictnry.h b/xwords4/common/dictnry.h index 7815423b1..e47e1b1b1 100644 --- a/xwords4/common/dictnry.h +++ b/xwords4/common/dictnry.h @@ -88,6 +88,9 @@ struct DictionaryCtxt { const XP_UCHAR** facePtrs; /* elems point into faces, above */ XP_U8* countsAndValues; + XP_U8 nBoardSizes; + XP_U8 boardSizes[2]; /* will be [15] or [15,21] for now */ + SpecialBitmaps* bitmaps; XP_UCHAR** chars; XP_UCHAR** charEnds; diff --git a/xwords4/dawg/English/info.txt b/xwords4/dawg/English/info.txt index 40593ae95..a99abd55e 100644 --- a/xwords4/dawg/English/info.txt +++ b/xwords4/dawg/English/info.txt @@ -38,34 +38,40 @@ LANGINFO: will simply be excluded from the dictionary.

# English==1. Low byte is padding XLOC_HEADER:0x8100 +# COUNT_SIZES: Columns 2-n in the BEGIN_TILES section are for boards +# of what sizes? 15 is the default, and COUNT_SIZES is not needed if +# there's only one sizes column and it's for a 15x15 board. Having +# only one column that's for other than 15 is an error. +COUNT_SIZES: 15 21 + -2 0 {"_"} -9 1 'A|a' -2 3 'B|b' -2 3 'C|c' -4 2 'D|d' -12 1 'E|e' -2 4 'F|f' -3 2 'G|g' -2 4 'H|h' -9 1 'I|i' -1 8 'J|j' -1 5 'K|k' -4 1 'L|l' -2 3 'M|m' -6 1 'N|n' -8 1 'O|o' -2 3 'P|p' -1 10 'Q|q' -6 1 'R|r' -4 1 'S|s' -6 1 'T|t' -4 1 'U|u' -2 4 'V|v' -2 4 'W|w' -1 8 'X|x' -2 4 'Y|y' -1 10 'Z|z' +{"_"} 0 2 4 +'A|a' 1 9 16 +'B|b' 3 2 4 +'C|c' 3 2 6 +'D|d' 2 4 8 +'E|e' 1 12 24 +'F|f' 4 2 4 +'G|g' 2 3 5 +'H|h' 4 2 5 +'I|i' 1 9 13 +'J|j' 8 1 2 +'K|k' 5 1 2 +'L|l' 1 4 7 +'M|m' 3 2 6 +'N|n' 1 6 13 +'O|o' 1 8 15 +'P|p' 3 2 4 +'Q|q' 10 1 2 +'R|r' 1 6 13 +'S|s' 1 4 10 +'T|t' 1 6 15 +'U|u' 1 4 7 +'V|v' 4 2 3 +'W|w' 4 2 4 +'X|x' 8 1 2 +'Y|y' 4 2 4 +'Z|z' 10 1 2 # should ignore all after the above diff --git a/xwords4/dawg/Makefile.langcommon b/xwords4/dawg/Makefile.langcommon index 45d26f033..432bbf118 100644 --- a/xwords4/dawg/Makefile.langcommon +++ b/xwords4/dawg/Makefile.langcommon @@ -220,7 +220,8 @@ endif frankspecials.bin: ../frank_mkspecials.py $(BMPFILES) $< $(BLANK_INFO) $(LANG_SPECIAL_INFO) > $@ -$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin charcount.bin table.bin values.bin frankspecials.bin +$(XWLANG)%.$(FRANK_EXT): dawg$(XWLANG)%.stamp $(XWLANG)%_flags.bin $(XWLANG)%_newheader.bin \ + charcount.bin table.bin values.bin frankspecials.bin cat $(XWLANG)$*_flags.bin $(XWLANG)$*_newheader.bin charcount.bin table.bin values.bin \ frankspecials.bin $(XWLANG)StartLoc.bin \ $$(ls dawg$(XWLANG)$*_*.bin) > $@ @@ -273,6 +274,9 @@ allbins: $(MAKE) TARGET_TYPE=FRANK byodbins rm palmspecials.bin +boardSizes.bin: ../xloc.py info.txt + ../xloc.py -bs -out $@ + table.bin: ../xloc.py ifdef NEWDAWG ../xloc.py $(ENCP) -tn -out $@ @@ -308,7 +312,7 @@ $(XWLANG)%_headerFlags.bin: perl -e "print pack(\"n\",$$FLAGS)" > $@ $(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \ - $(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin + $(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin boardSizes.bin SIZ=0; \ for FILE in $+; do \ SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \ diff --git a/xwords4/dawg/dawg2dict.py b/xwords4/dawg/dawg2dict.py index 3c325f188..9d7ae87b1 100755 --- a/xwords4/dawg/dawg2dict.py +++ b/xwords4/dawg/dawg2dict.py @@ -54,12 +54,13 @@ def splitFaces( buf ): return faces -def loadCountsAndValues( fh, numFaces, data ): - twoBytesFmt = struct.Struct('BB') +def loadCountsAndValues( fh, numFaces, nSizes, data ): for ii in range(numFaces): - pair = twoBytesFmt.unpack(fh.read(twoBytesFmt.size)) - data[ii]['count'] = int(pair[0]) - data[ii]['val'] = int(pair[1]) + counts = [] + for jj in range(nSizes): + counts.append(int.from_bytes(fh.read(1), 'little')) + data[ii]['counts'] = counts + data[ii]['val'] = int.from_bytes(fh.read(1), 'little') def eatBitmap( fh ): nCols = int(oneByteFmt.unpack(fh.read(oneByteFmt.size))[0]) @@ -142,6 +143,7 @@ def process(args): with open(args.DAWG, "rb") as dawg: nWords = 0 + boardSizes = [15] headerFmt = struct.Struct('!HH') (flags, headerLen) = headerFmt.unpack(dawg.read(headerFmt.size)) @@ -162,7 +164,21 @@ def process(args): sys.exit(0) md5Sum = getNullTermParam(header) print( 'header: read sum: {}'.format(md5Sum), file=sys.stderr ) - except: + + # skip header flags + header.read(2) + print( 'header: skipped flags', file=sys.stderr) + + nBoardSizes = int.from_bytes(header.read(1), 'big') + print( 'header: nBoardSizes: {}'.format(nBoardSizes), file=sys.stderr ) + boardSizes = [] + for ii in range(nBoardSizes): + siz = int.from_bytes(header.read(1), 'big') + boardSizes.append(siz) + print( 'header: read sizes: {}'.format(boardSizes), file=sys.stderr) + + except Exception as ex: + print( 'header: exception!! {} '.format(ex) ) md5Sum = None if args.GET_SUM: @@ -214,7 +230,7 @@ def process(args): langCode = 0x7F & oneByteFmt.unpack(dawg.read(oneByteFmt.size))[0] dawg.read( oneByteFmt.size ) # skip byte - loadCountsAndValues( dawg, numFaces, data ) + loadCountsAndValues( dawg, numFaces, len(boardSizes), data ) loadSpecialData( dawg, data ) offsetStruct = struct.Struct('!L') diff --git a/xwords4/dawg/xloc.py b/xwords4/dawg/xloc.py index c329ebd34..b3e651817 100755 --- a/xwords4/dawg/xloc.py +++ b/xwords4/dawg/xloc.py @@ -8,7 +8,10 @@ def errorOut(msg): def mkParser(): parser = argparse.ArgumentParser() parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' ) - parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', help = 'output table file' ) + parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', + help = 'output table file' ) + parser.add_argument('-bs', dest = 'DO_BOARDSIZE', action = 'store_true', + help = 'output boardSizes file' ) # parser.add_argument('-tn', dest = 'UNICODE', default = False, # action = 'store_true', help = 'assume unicode') @@ -53,8 +56,10 @@ def parseTileInfo(infoFile, encoding): if sEndTiles.match(line): break else: - (count, val, face) = line.split(None, 2) - result['_TILES'].append((count, val, face)) + (face, val, counts) = line.split(None, 2) + result['_TILES'].append({'counts': counts, + 'val': val, + 'face': face}) elif sBeginTiles.match(line): inTiles = True else: @@ -72,11 +77,11 @@ def printLetters( letters, outfile ): outfile.write(letters.encode('utf8')) def writeMapFile(xlocToken, outfile): - print('writeMapFile()') + print('writeMapFile(out={})'.format(outfile)) tiles = xlocToken['_TILES'] specialCount = 0 for tile in tiles: - face = tile[2] + face = tile['face'] match = sSingleCharMatch.match(face) if match: printLetters( match.group(1), outfile ) @@ -94,13 +99,25 @@ def writeMapFile(xlocToken, outfile): def writeValuesFile(xlocToken, outfile): header = xlocToken.get('XLOC_HEADER') or errorOut('no XLOC_HEADER found') - print('writing header: {}'.format(header)) + print('writeValuesFile(out={}): writing header: {}'.format(outfile, header)) outfile.write(struct.pack('!H', int(header, 16))) + nCounts = 0 for tile in xlocToken['_TILES']: - val = int(tile[0]) - count = int(tile[1]) - outfile.write(struct.pack('BB', val, count)) + counts = tile['counts'].split() + assert nCounts == 0 or nCounts == len(counts) + nCounts = len(counts) + for count in counts: + outfile.write(struct.pack('B', int(count))) + + val = int(tile['val']) + outfile.write(struct.pack('B', val)) + +def writeBoardSizesFile(xlocToken, outfile): + cs = xlocToken.get('COUNT_SIZES', '15').split() + outfile.write(struct.pack('B', len(cs))) + for siz in cs: + outfile.write(struct.pack('B', int(siz))) def main(): print('{}.main {} called'.format(sys.argv[0], sys.argv[1:])) @@ -127,6 +144,11 @@ def main(): with open(path, 'wb') as outfile: writeValuesFile( xlocToken, outfile ) + if args.DO_BOARDSIZE and args.OUTFILE: + with open(args.OUTFILE, 'wb') as outfile: + writeBoardSizesFile( xlocToken, outfile ) + + ############################################################################## if __name__ == '__main__': main()