From c970d397686532e982e7ab08cd79cc56add45ff1 Mon Sep 17 00:00:00 2001 From: Eric House Date: Tue, 15 Mar 2022 10:12:31 +0100 Subject: [PATCH] move extra counts to header Fix so old versions of the app will still be able to read the new format --- xwords4/dawg/English/Makefile.BasEnglish | 2 +- xwords4/dawg/English/info.txt | 3 +- xwords4/dawg/Makefile.langcommon | 16 ++++++-- xwords4/dawg/dawg2dict.py | 34 ++++++++++------- xwords4/dawg/xloc.py | 47 ++++++++++++++++++------ 5 files changed, 72 insertions(+), 30 deletions(-) diff --git a/xwords4/dawg/English/Makefile.BasEnglish b/xwords4/dawg/English/Makefile.BasEnglish index fa67d38a8..0041e8dd5 100644 --- a/xwords4/dawg/English/Makefile.BasEnglish +++ b/xwords4/dawg/English/Makefile.BasEnglish @@ -16,7 +16,7 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. XWLANG=BasEnglish -LANGCODE=en_US +LANGCODE=en DICT2DAWGARGS = -r -nosort DICTNOTE = "Wordlist created in the 1930s for language learners" diff --git a/xwords4/dawg/English/info.txt b/xwords4/dawg/English/info.txt index a99abd55e..ca95195f9 100644 --- a/xwords4/dawg/English/info.txt +++ b/xwords4/dawg/English/info.txt @@ -15,7 +15,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -LANGCODE:en_US +LANGCODE:en +LANGNAME:English # deal with DOS files LANGFILTER: tr -d '\r' diff --git a/xwords4/dawg/Makefile.langcommon b/xwords4/dawg/Makefile.langcommon index 432bbf118..16c48441a 100644 --- a/xwords4/dawg/Makefile.langcommon +++ b/xwords4/dawg/Makefile.langcommon @@ -274,8 +274,8 @@ allbins: $(MAKE) TARGET_TYPE=FRANK byodbins rm palmspecials.bin -boardSizes.bin: ../xloc.py info.txt - ../xloc.py -bs -out $@ +extraCounts.bin: ../xloc.py info.txt + ../xloc.py -ec -out $@ table.bin: ../xloc.py ifdef NEWDAWG @@ -302,6 +302,15 @@ $(XWLANG)%_note.bin: echo -n $(DICTNOTE) > $@ perl -e "print pack(\"c\",0)" >> $@ +langName.bin: ../xloc.py + ../xloc.py -info LANGNAME -out $@ + +langCode.bin: ../xloc.py + ../xloc.py -info LANGCODE -out $@ + +otherCounts.bin: ../xloc.py + ../xloc.py -oc -out otherCounts.bin + $(XWLANG)%_md5sum.bin: cat table.bin values.bin frankspecials.bin $(XWLANG)StartLoc.bin \ dawg$(XWLANG)$*_*.bin | md5sum | awk '{print $$1}' | tr -d '\n' > $@ @@ -312,7 +321,8 @@ $(XWLANG)%_headerFlags.bin: perl -e "print pack(\"n\",$$FLAGS)" > $@ $(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \ - $(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin boardSizes.bin + $(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin langName.bin \ + langCode.bin otherCounts.bin SIZ=0; \ for FILE in $+; do \ SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \ diff --git a/xwords4/dawg/dawg2dict.py b/xwords4/dawg/dawg2dict.py index 9d7ae87b1..08248a317 100755 --- a/xwords4/dawg/dawg2dict.py +++ b/xwords4/dawg/dawg2dict.py @@ -54,13 +54,18 @@ def splitFaces( buf ): return faces -def loadCountsAndValues( fh, numFaces, nSizes, data ): +def loadCountsAndValues( fh, numFaces, extraData, data ): for ii in range(numFaces): - counts = [] - for jj in range(nSizes): - counts.append(int.from_bytes(fh.read(1), 'little')) - data[ii]['counts'] = counts + data[ii]['counts'] = {15: int.from_bytes(fh.read(1), 'little')} data[ii]['val'] = int.from_bytes(fh.read(1), 'little') + if extraData: + buf = io.BytesIO(extraData) + while True: + siz = int.from_bytes(buf.read(1), 'little') + if not siz: break + for ii in range(numFaces): + count = int.from_bytes(buf.read(1), 'little') + data[ii]['counts'][siz] = count def eatBitmap( fh ): nCols = int(oneByteFmt.unpack(fh.read(oneByteFmt.size))[0]) @@ -143,7 +148,7 @@ def process(args): with open(args.DAWG, "rb") as dawg: nWords = 0 - boardSizes = [15] + extraData = None headerFmt = struct.Struct('!HH') (flags, headerLen) = headerFmt.unpack(dawg.read(headerFmt.size)) @@ -169,13 +174,14 @@ def process(args): header.read(2) print( 'header: skipped flags', file=sys.stderr) - nBoardSizes = int.from_bytes(header.read(1), 'big') - print( 'header: nBoardSizes: {}'.format(nBoardSizes), file=sys.stderr ) - boardSizes = [] - for ii in range(nBoardSizes): - siz = int.from_bytes(header.read(1), 'big') - boardSizes.append(siz) - print( 'header: read sizes: {}'.format(boardSizes), file=sys.stderr) + langName = getNullTermParam(header) + langCode = getNullTermParam(header) + print('header: langName: {}; langCode: {}'.format(langName, langCode), + file=sys.stderr) + + extraSize = int.from_bytes(header.read(1), 'little') + print( 'header: extraSize: {}'.format(extraSize), file=sys.stderr ) + extraData = header.read(extraSize) except Exception as ex: print( 'header: exception!! {} '.format(ex) ) @@ -230,7 +236,7 @@ def process(args): langCode = 0x7F & oneByteFmt.unpack(dawg.read(oneByteFmt.size))[0] dawg.read( oneByteFmt.size ) # skip byte - loadCountsAndValues( dawg, numFaces, len(boardSizes), data ) + loadCountsAndValues( dawg, numFaces, extraData, data ) loadSpecialData( dawg, data ) offsetStruct = struct.Struct('!L') diff --git a/xwords4/dawg/xloc.py b/xwords4/dawg/xloc.py index b3e651817..ec895ffa8 100755 --- a/xwords4/dawg/xloc.py +++ b/xwords4/dawg/xloc.py @@ -10,8 +10,10 @@ def mkParser(): parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' ) parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', help = 'output table file' ) - parser.add_argument('-bs', dest = 'DO_BOARDSIZE', action = 'store_true', - help = 'output boardSizes file' ) + parser.add_argument('-oc', dest = 'DO_OTHERCOUNTS', action = 'store_true', + help = 'write extra (non-15x15 board) counts' ) + parser.add_argument('-info', dest = 'INFO_KEY', type = str, + help = 'info.txt keyword to write null-terminated' ) # parser.add_argument('-tn', dest = 'UNICODE', default = False, # action = 'store_true', help = 'assume unicode') @@ -76,6 +78,12 @@ def printLetters( letters, outfile ): letters = ' '.join(letters) outfile.write(letters.encode('utf8')) +def writeInfoFile(xlocToken, key, outfile): + val = xlocToken[key] + assert val + outfile.write(val.encode('utf8')) + outfile.write(struct.pack('B', 0 )) + def writeMapFile(xlocToken, outfile): print('writeMapFile(out={})'.format(outfile)) tiles = xlocToken['_TILES'] @@ -102,22 +110,36 @@ def writeValuesFile(xlocToken, outfile): print('writeValuesFile(out={}): writing header: {}'.format(outfile, header)) outfile.write(struct.pack('!H', int(header, 16))) + cs = xlocToken.get('COUNT_SIZES', '15').split() + useOffset = cs.index('15') + nCounts = 0 for tile in xlocToken['_TILES']: counts = tile['counts'].split() assert nCounts == 0 or nCounts == len(counts) nCounts = len(counts) - for count in counts: - outfile.write(struct.pack('B', int(count))) + assert nCounts == len(cs) + outfile.write(struct.pack('B', int(counts[useOffset]))) val = int(tile['val']) outfile.write(struct.pack('B', val)) -def writeBoardSizesFile(xlocToken, outfile): +def writeOtherCounts(xlocToken, outfile): cs = xlocToken.get('COUNT_SIZES', '15').split() - outfile.write(struct.pack('B', len(cs))) - for siz in cs: - outfile.write(struct.pack('B', int(siz))) + + tiles = xlocToken['_TILES'] + # Write the size of the data so it can be skipped by the reader, + # which won't know how many faces the tile set has yet. + totalSiz = (len(cs) - 1) * (1 + len(tiles)) + outfile.write(struct.pack('B', totalSiz)) + + for useOffset in range(len(cs)): + siz = int(cs[useOffset]) + if siz == 15: continue + outfile.write(struct.pack('B', siz)) + for tile in tiles: + count = tile['counts'].split()[useOffset] + outfile.write(struct.pack('B', int(count))) def main(): print('{}.main {} called'.format(sys.argv[0], sys.argv[1:])) @@ -128,6 +150,10 @@ def main(): errorOut('{} not found'.format(infoFile)) xlocToken = parseTileInfo(infoFile, args.ENCODING) + if args.INFO_KEY and args.OUTFILE: + with open(args.OUTFILE, 'wb') as outfile: + writeInfoFile(xlocToken, args.INFO_KEY, outfile); + if args.DO_TABLE or args.TABLE_FILE: path = args.TABLE_FILE or args.OUTFILE with open(path, 'wb') as outfile: @@ -144,10 +170,9 @@ def main(): with open(path, 'wb') as outfile: writeValuesFile( xlocToken, outfile ) - if args.DO_BOARDSIZE and args.OUTFILE: + if args.DO_OTHERCOUNTS and args.OUTFILE: with open(args.OUTFILE, 'wb') as outfile: - writeBoardSizesFile( xlocToken, outfile ) - + writeOtherCounts(xlocToken, outfile) ############################################################################## if __name__ == '__main__':