From 1c0348dbf14effc0593cd0143c0fbdf71cb15478 Mon Sep 17 00:00:00 2001 From: Eric House Date: Fri, 24 Apr 2020 21:14:20 -0700 Subject: [PATCH] add option to print a delimiter between tiles For Hungarian, there are "duplicate" words because e.g. the string CS can be spelled with two tiles or one. If a delimiter is printed at tile boundaries the duplication goes away. --- xwords4/dawg/dawg2dict.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xwords4/dawg/dawg2dict.py b/xwords4/dawg/dawg2dict.py index c2ab89077..bdff12f34 100755 --- a/xwords4/dawg/dawg2dict.py +++ b/xwords4/dawg/dawg2dict.py @@ -116,7 +116,7 @@ def parseNode( node, nodeSize ): return (nextEdge, chrIndex, accepting, isLast ) -def expandDAWG( nodes, nodeSize, indx, data, words, letters = [] ): +def expandDAWG( nodes, nodeSize, delim, indx, data, words, letters = [] ): if len(letters) > 15: error( "infinite recursion???" ) while True: @@ -126,10 +126,10 @@ def expandDAWG( nodes, nodeSize, indx, data, words, letters = [] ): letters.append( data[chrIndex]['faces'][0] ) if accepting: - words.append( ''.join(letters) ) + words.append( delim.join(letters) ) if nextEdge != 0: - expandDAWG( nodes, nodeSize, nextEdge, data, words, letters ) + expandDAWG( nodes, nodeSize, delim, nextEdge, data, words, letters ) letters.pop() @@ -221,7 +221,7 @@ def process(args): nodes = loadNodes( dawg, nodeSize ) words = [] if nodes: - expandDAWG( nodes, nodeSize, offset, data, words ) + expandDAWG( nodes, nodeSize, args.DELIM, offset, data, words ) assert len(words) == nWords if args.DUMP_WORDS: for word in words: @@ -239,6 +239,7 @@ def mkParser(): action = 'store_true', help = 'write header user-visible message to stdout') parser.add_argument('--get-sum', dest = 'GET_SUM', default = False, action = 'store_true', help = 'write md5sum to stdout') + parser.add_argument('--separator', dest = 'DELIM', default = '', help = 'printed between tiles') # [-raw | -json] [-get-sum] [-get-desc] -dict