From c970d397686532e982e7ab08cd79cc56add45ff1 Mon Sep 17 00:00:00 2001
From: Eric House <eehouse@eehouse.org>
Date: Tue, 15 Mar 2022 10:12:31 +0100
Subject: [PATCH] move extra counts to header

Fix so old versions of the app will still be able to read the new format
---
 xwords4/dawg/English/Makefile.BasEnglish |  2 +-
 xwords4/dawg/English/info.txt            |  3 +-
 xwords4/dawg/Makefile.langcommon         | 16 ++++++--
 xwords4/dawg/dawg2dict.py                | 34 ++++++++++-------
 xwords4/dawg/xloc.py                     | 47 ++++++++++++++++++------
 5 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/xwords4/dawg/English/Makefile.BasEnglish b/xwords4/dawg/English/Makefile.BasEnglish
index fa67d38a8..0041e8dd5 100644
--- a/xwords4/dawg/English/Makefile.BasEnglish
+++ b/xwords4/dawg/English/Makefile.BasEnglish
@@ -16,7 +16,7 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
 XWLANG=BasEnglish
-LANGCODE=en_US
+LANGCODE=en
 DICT2DAWGARGS = -r -nosort
 DICTNOTE = "Wordlist created in the 1930s for language learners"
 
diff --git a/xwords4/dawg/English/info.txt b/xwords4/dawg/English/info.txt
index a99abd55e..ca95195f9 100644
--- a/xwords4/dawg/English/info.txt
+++ b/xwords4/dawg/English/info.txt
@@ -15,7 +15,8 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 
-LANGCODE:en_US
+LANGCODE:en
+LANGNAME:English
 
 # deal with DOS files
 LANGFILTER: tr -d '\r'
diff --git a/xwords4/dawg/Makefile.langcommon b/xwords4/dawg/Makefile.langcommon
index 432bbf118..16c48441a 100644
--- a/xwords4/dawg/Makefile.langcommon
+++ b/xwords4/dawg/Makefile.langcommon
@@ -274,8 +274,8 @@ allbins:
 	$(MAKE) TARGET_TYPE=FRANK byodbins
 	rm palmspecials.bin
 
-boardSizes.bin: ../xloc.py info.txt
-	../xloc.py -bs -out $@
+extraCounts.bin: ../xloc.py info.txt
+	../xloc.py -ec -out $@
 
 table.bin:  ../xloc.py
 ifdef NEWDAWG
@@ -302,6 +302,15 @@ $(XWLANG)%_note.bin:
 	echo -n $(DICTNOTE) > $@
 	perl -e "print pack(\"c\",0)" >> $@
 
+langName.bin: ../xloc.py
+	../xloc.py -info LANGNAME -out $@
+
+langCode.bin: ../xloc.py
+	../xloc.py -info LANGCODE -out $@
+
+otherCounts.bin: ../xloc.py
+	../xloc.py -oc -out otherCounts.bin
+
 $(XWLANG)%_md5sum.bin: 
 	cat table.bin values.bin frankspecials.bin $(XWLANG)StartLoc.bin \
 		dawg$(XWLANG)$*_*.bin | md5sum | awk '{print $$1}' | tr -d '\n' > $@
@@ -312,7 +321,8 @@ $(XWLANG)%_headerFlags.bin:
 	perl -e "print pack(\"n\",$$FLAGS)" > $@
 
 $(XWLANG)%_newheader.bin: $(XWLANG)%_wordcount.bin $(XWLANG)%_note.bin \
-		$(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin boardSizes.bin
+		$(XWLANG)%_md5sum.bin $(XWLANG)%_headerFlags.bin langName.bin \
+		langCode.bin otherCounts.bin
 	SIZ=0; \
 	for FILE in $+; do \
 		SIZ=$$(($$SIZ + $$(ls -l $$FILE | awk '{print $$5}'))); \
diff --git a/xwords4/dawg/dawg2dict.py b/xwords4/dawg/dawg2dict.py
index 9d7ae87b1..08248a317 100755
--- a/xwords4/dawg/dawg2dict.py
+++ b/xwords4/dawg/dawg2dict.py
@@ -54,13 +54,18 @@ def splitFaces( buf ):
 
     return faces
 
-def loadCountsAndValues( fh, numFaces, nSizes, data ):
+def loadCountsAndValues( fh, numFaces, extraData, data ):
     for ii in range(numFaces):
-        counts = []
-        for jj in range(nSizes):
-            counts.append(int.from_bytes(fh.read(1), 'little'))
-        data[ii]['counts'] = counts
+        data[ii]['counts'] = {15: int.from_bytes(fh.read(1), 'little')}
         data[ii]['val'] = int.from_bytes(fh.read(1), 'little')
+    if extraData:
+        buf = io.BytesIO(extraData)
+        while True:
+            siz = int.from_bytes(buf.read(1), 'little')
+            if not siz: break
+            for ii in range(numFaces):
+                count = int.from_bytes(buf.read(1), 'little')
+                data[ii]['counts'][siz] = count
 
 def eatBitmap( fh ):
     nCols = int(oneByteFmt.unpack(fh.read(oneByteFmt.size))[0])
@@ -143,7 +148,7 @@ def process(args):
 
     with open(args.DAWG, "rb") as dawg:
         nWords = 0
-        boardSizes = [15]
+        extraData = None
 
         headerFmt = struct.Struct('!HH')
         (flags, headerLen) = headerFmt.unpack(dawg.read(headerFmt.size))
@@ -169,13 +174,14 @@ def process(args):
                 header.read(2)
                 print( 'header: skipped flags', file=sys.stderr)
 
-                nBoardSizes = int.from_bytes(header.read(1), 'big')
-                print( 'header: nBoardSizes: {}'.format(nBoardSizes), file=sys.stderr )
-                boardSizes = []
-                for ii in range(nBoardSizes):
-                    siz = int.from_bytes(header.read(1), 'big')
-                    boardSizes.append(siz)
-                print( 'header: read sizes: {}'.format(boardSizes), file=sys.stderr)
+                langName = getNullTermParam(header)
+                langCode = getNullTermParam(header)
+                print('header: langName: {}; langCode: {}'.format(langName, langCode),
+                      file=sys.stderr)
+
+                extraSize = int.from_bytes(header.read(1), 'little')
+                print( 'header: extraSize: {}'.format(extraSize), file=sys.stderr )
+                extraData = header.read(extraSize)
 
             except Exception as ex:
                 print( 'header: exception!! {} '.format(ex) )
@@ -230,7 +236,7 @@ def process(args):
         langCode = 0x7F & oneByteFmt.unpack(dawg.read(oneByteFmt.size))[0]
         dawg.read( oneByteFmt.size ) # skip byte
 
-        loadCountsAndValues( dawg, numFaces, len(boardSizes), data )
+        loadCountsAndValues( dawg, numFaces, extraData, data )
         loadSpecialData( dawg, data )
 
         offsetStruct = struct.Struct('!L')
diff --git a/xwords4/dawg/xloc.py b/xwords4/dawg/xloc.py
index b3e651817..ec895ffa8 100755
--- a/xwords4/dawg/xloc.py
+++ b/xwords4/dawg/xloc.py
@@ -10,8 +10,10 @@ def mkParser():
     parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' )
     parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true',
                         help = 'output table file' )
-    parser.add_argument('-bs', dest = 'DO_BOARDSIZE', action = 'store_true',
-                        help = 'output boardSizes file' )
+    parser.add_argument('-oc', dest = 'DO_OTHERCOUNTS', action = 'store_true',
+                        help = 'write extra (non-15x15 board) counts' )
+    parser.add_argument('-info', dest = 'INFO_KEY', type = str,
+                        help = 'info.txt keyword to write null-terminated' )
 
     # parser.add_argument('-tn', dest = 'UNICODE', default = False,
     #                     action = 'store_true', help = 'assume unicode')
@@ -76,6 +78,12 @@ def printLetters( letters, outfile ):
     letters = ' '.join(letters)
     outfile.write(letters.encode('utf8'))
 
+def writeInfoFile(xlocToken, key, outfile):
+    val = xlocToken[key]
+    assert val
+    outfile.write(val.encode('utf8'))
+    outfile.write(struct.pack('B', 0 ))
+
 def writeMapFile(xlocToken, outfile):
     print('writeMapFile(out={})'.format(outfile))
     tiles = xlocToken['_TILES']
@@ -102,22 +110,36 @@ def writeValuesFile(xlocToken, outfile):
     print('writeValuesFile(out={}): writing header: {}'.format(outfile, header))
     outfile.write(struct.pack('!H', int(header, 16)))
 
+    cs = xlocToken.get('COUNT_SIZES', '15').split()
+    useOffset = cs.index('15')
+
     nCounts = 0
     for tile in xlocToken['_TILES']:
         counts = tile['counts'].split()
         assert nCounts == 0 or nCounts == len(counts)
         nCounts = len(counts)
-        for count in counts:
-            outfile.write(struct.pack('B', int(count)))
+        assert nCounts == len(cs)
+        outfile.write(struct.pack('B', int(counts[useOffset])))
 
         val = int(tile['val'])
         outfile.write(struct.pack('B', val))
 
-def writeBoardSizesFile(xlocToken, outfile):
+def writeOtherCounts(xlocToken, outfile):
     cs = xlocToken.get('COUNT_SIZES', '15').split()
-    outfile.write(struct.pack('B', len(cs)))
-    for siz in cs:
-        outfile.write(struct.pack('B', int(siz)))
+
+    tiles = xlocToken['_TILES']
+    # Write the size of the data so it can be skipped by the reader,
+    # which won't know how many faces the tile set has yet.
+    totalSiz = (len(cs) - 1) * (1 + len(tiles))
+    outfile.write(struct.pack('B', totalSiz))
+
+    for useOffset in range(len(cs)):
+        siz = int(cs[useOffset])
+        if siz == 15: continue
+        outfile.write(struct.pack('B', siz))
+        for tile in tiles:
+            count = tile['counts'].split()[useOffset]
+            outfile.write(struct.pack('B', int(count)))
 
 def main():
     print('{}.main {} called'.format(sys.argv[0], sys.argv[1:]))
@@ -128,6 +150,10 @@ def main():
         errorOut('{} not found'.format(infoFile))
     xlocToken = parseTileInfo(infoFile, args.ENCODING)
 
+    if args.INFO_KEY and args.OUTFILE:
+        with open(args.OUTFILE, 'wb') as outfile:
+            writeInfoFile(xlocToken, args.INFO_KEY, outfile);
+
     if args.DO_TABLE or args.TABLE_FILE:
         path = args.TABLE_FILE or args.OUTFILE
         with open(path, 'wb') as outfile:
@@ -144,10 +170,9 @@ def main():
         with open(path, 'wb') as outfile:
             writeValuesFile( xlocToken, outfile )
 
-    if args.DO_BOARDSIZE and args.OUTFILE:
+    if args.DO_OTHERCOUNTS and args.OUTFILE:
         with open(args.OUTFILE, 'wb') as outfile:
-            writeBoardSizesFile( xlocToken, outfile )
-
+            writeOtherCounts(xlocToken, outfile)
 
 ##############################################################################
 if __name__ == '__main__':