diff --git a/xwords4/dawg/Makefile b/xwords4/dawg/Makefile
index 8e0623956..736a65152 100644
--- a/xwords4/dawg/Makefile
+++ b/xwords4/dawg/Makefile
@@ -23,7 +23,7 @@ byodfiles.tgz: byodfiles.tar
byodfiles.tar: dict2dawg
rm -f $@ langinfo
- tar cvf $@ ./dict2dawg ./dict2dawg.cpp ./par.pl ./xloc.pl ./xloc.pm
+ tar cvf $@ ./dict2dawg ./dict2dawg.cpp ./par.pl ./xloc.py
for dir in $$(ls .); do \
if [ $$dir = "Hëx" ]; then \
:; \
diff --git a/xwords4/dawg/Makefile.langcommon b/xwords4/dawg/Makefile.langcommon
index 04471c625..5ae03dceb 100644
--- a/xwords4/dawg/Makefile.langcommon
+++ b/xwords4/dawg/Makefile.langcommon
@@ -269,24 +269,24 @@ allbins:
$(MAKE) TARGET_TYPE=FRANK byodbins
rm palmspecials.bin
-table.bin: ../xloc.pl
+table.bin: ../xloc.py
ifdef NEWDAWG
- perl -I../ ../xloc.pl $(ENCP) -tn -out $@
+ ../xloc.py $(ENCP) -tn -out $@
else
- perl -I../ ../xloc.pl -t -out $@
+ error
endif
-values.bin: ../xloc.pl
- perl -I../ ../xloc.pl -v -out $@
+values.bin: ../xloc.py
+ ../xloc.py -v -out $@
# a binary file, two bytes, one giving the size of tiles data and the
# other the number of tiles in the dict. Tiles data is utf-8 and so
# number is not derivable from size.
-$(XWLANG)_charcount.bin: table.bin ../xloc.pl
+$(XWLANG)_charcount.bin: table.bin ../xloc.py
SIZ=$$(ls -l $< | awk '{print $$5}'); \
perl -e "print pack(\"c\",$$SIZ)" > $@
TMP=/tmp/tmp$$$$; \
- perl -I../ ../xloc.pl -s -out $$TMP; \
+ ../xloc.py -s -out $$TMP; \
cat $$TMP >> $@; \
rm -f $$TMP
diff --git a/xwords4/dawg/dictstats.pl b/xwords4/dawg/dictstats.pl
deleted file mode 100755
index 819b82e35..000000000
--- a/xwords4/dawg/dictstats.pl
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/perl
-
-# print stats about in input stream that's assumed to be a dictionary.
-# Counts and percentages of each letter, as well as total numbers of
-# words. This is not part of the dictionary build process. I use it
-# for creating info.txt files for new languages and debugging the
-# creation of dictionaries from new wordlists.
-#
-# Something like this might form the basis for choosing counts and
-# values for tiles without using the conventions established by
-# Scrabble players. This isn't enough, though: the frequency of
-# letter tuples and triples -- how often letters appear together -- is
-# a better indicator than just letter count.
-
-use strict;
-
-my @wordSizeCounts;
-my %letterCounts;
-my $wordCount;
-my $letterCount;
-my $enc = "utf8"; # this could be a cmdline arg....
-
-if ( $enc ) {
- binmode( STDOUT, ":encoding($enc)" ) ;
- binmode( STDIN, ":encoding($enc)" ) ;
-}
-
-while (<>) {
-
- chomp;
-
- ++$wordSizeCounts[length];
- ++$wordCount;
-
- foreach my $letter (split( / */ ) ) {
- my $ii = ord($letter);
- # special-case the bogus chars we add for "specials"
- die "$0: this is a letter?: $ii" if $ii <= 32 && $ii >= 4 && $ii != 0;
- ++$letterCounts{$letter};
- ++$letterCount;
- }
-}
-
-print "Number of words: $wordCount\n";
-print "Number of letters: $letterCount\n\n";
-
-
-print "**** word sizes ****\n";
-print "SIZE COUNT PERCENT\n";
-my $pctTotal = 0.0;
-my $wordTotal = 0;
-for ( my $i = 1 ; $i <= 99; ++$i ) {
- my $count = $wordSizeCounts[$i];
- $wordTotal += $count;
- if ( $count > 0 ) {
- my $pct = (100.00 * $count)/$wordCount;
- $pctTotal += $pct;
- printf "%2d %6d %.2f\n", $i, $count, $pct;
- }
-}
-printf "-------------------------------\n";
-printf " %6d %.2f\n", $wordTotal, $pctTotal;
-
-
-print "\n\n**** Letter counts ****\n";
-print " ASCII ORD HEX PCT (of $letterCount)\n";
-my $lineNo = 1;
-foreach my $key (sort keys %letterCounts) {
- my $count = $letterCounts{$key};
- my $pct = (100.00 * $count) / $letterCount;
- printf( "%2d: %3s %3d %x %5.2f (%d)\n",
- $lineNo, $key, ord($key), ord($key), $pct, $count );
- ++$lineNo;
-}
-
-print "\n";
diff --git a/xwords4/dawg/dictstats.py b/xwords4/dawg/dictstats.py
new file mode 100755
index 000000000..28bfb631f
--- /dev/null
+++ b/xwords4/dawg/dictstats.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+
+import sys
+
+"""
+print stats about in input stream that's assumed to be a dictionary.
+Counts and percentages of each letter, as well as total numbers of
+words. This is not part of the dictionary build process. I use it
+for creating info.txt files for new languages and debugging the
+creation of dictionaries from new wordlists.
+
+Something like this might form the basis for choosing counts and
+values for tiles without using the conventions established by
+Scrabble players. This isn't enough, though: the frequency of
+letter tuples and triples -- how often letters appear together -- is
+a better indicator than just letter count.
+"""
+
+
+
+def main():
+ wordSizeCounts = {}
+ letterCounts = {}
+ wordCount = 0
+ letterCount = 0
+ enc = 'utf8' # this could be a cmdline arg....
+
+ for line in sys.stdin.readlines():
+ line = line.strip()
+
+ length = len(line)
+ if not length in wordSizeCounts: wordSizeCounts[length] = 0
+ wordSizeCounts[length] += 1
+ wordCount += 1
+
+ for letter in line:
+ ii = ord(letter)
+ # perl did this: die "$0: this is a letter?: $ii" if $ii <= 32 && $ii >= 4 && $ii != 0;
+ assert ii > 32 or ii < 4 or ii == 0, 'letter {} out of range'.format(ii)
+ if not letter in letterCounts: letterCounts[letter] = 0
+ letterCounts[letter] += 1
+ letterCount += 1
+
+ print( 'Number of words: {}'.format(wordCount))
+ print( 'Number of letters: {}'.format(letterCount))
+ print('')
+
+ print( '**** word sizes ****' )
+ print( 'SIZE COUNT PERCENT' )
+ pctTotal = 0.0
+ wordTotal = 0
+ for ii in sorted(wordSizeCounts):
+ count = wordSizeCounts[ii]
+ wordTotal += count
+ pct = (100.00 * count)/wordCount
+ pctTotal += pct
+ print( '{:2d} {:6d} {:02.2f}'.format(ii, count, pct))
+
+ print( '-------------------------------' )
+ print(' {:6d} {:.2f}'.format( wordTotal, pctTotal))
+ print('')
+
+ lineNo = 1
+ pctTotal = 0.0
+ print( '**** Letter counts ****' )
+ print( ' ASCII ORD HEX PCT (of {})'.format(letterCount))
+ for letter in sorted(letterCounts):
+ count = letterCounts[letter]
+ pct = (100.00 * count) / letterCount
+ pctTotal += pct
+ print( '{:2d}: {: >6s} {:2d} {:x} {:5.2f} ({:d})' \
+ .format(lineNo, letter, ord(letter), ord(letter), pct, count ) )
+ lineNo += 1
+
+ print('percent total {:.2f}'.format( pctTotal))
+ print('')
+
+##############################################################################
+if __name__ == '__main__':
+ main()
diff --git a/xwords4/dawg/frank_mkspecials.pl b/xwords4/dawg/frank_mkspecials.pl
deleted file mode 100755
index 5c0ed4465..000000000
--- a/xwords4/dawg/frank_mkspecials.pl
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright 2001 by Eric House (xwords@eehouse.org)
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-# Given arguments consisting of triples, first a string and then pbitm
-# files representing bitmaps. For each triple, print out the string and
-# then the converted bitmaps.
-
-use strict;
-
-while ( @ARGV ) {
- my $str = shift();
- my $largebmp = shift();
- my $smallbmp = shift();
-
- doOne( $str, $largebmp, $smallbmp );
-}
-
-sub doOne {
- my ( $str, $largebmp, $smallbmp ) = @_;
-
- print pack( "C", length($str) );
- print $str;
-
- print STDERR "looking at $largebmp", "\n";
-
- die "file $largebmp does not exist\n" unless -e $largebmp;
- print `cat $largebmp | ../pbitm2bin.pl`;
- die "file $smallbmp does not exist\n" unless -e $smallbmp;
- print `cat $smallbmp | ../pbitm2bin.pl`;
-}
-
-
diff --git a/xwords4/dawg/xloc.pl b/xwords4/dawg/xloc.pl
deleted file mode 100755
index 23ef0ca43..000000000
--- a/xwords4/dawg/xloc.pl
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright 2002 by Eric House (xwords@eehouse.org). All rights reserved.
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-# test and wrapper file for xloc.pm
-
-use strict;
-use xloc;
-
-my $unicode = -1;
-my $doval = 0;
-my $dosize = 0;
-my $enc;
-my $outfile;
-
-my $arg;
-while ( $arg = $ARGV[0] ) {
- if ( $arg eq '-enc' ) {
- $enc = $ARGV[1];
- shift @ARGV;
- } elsif ( $arg eq "-tn" ) {
- $unicode = 1;
- } elsif ( $arg eq "-t" ) {
- $unicode = 0;
- } elsif ( $arg eq "-v" ) {
- $doval = 1;
- } elsif ( $arg eq "-s" ) {
- $dosize = 1;
- } elsif ( $arg eq '-out' ) {
- $outfile = $ARGV[1];
- shift @ARGV;
- } else {
- die "unknown arg $arg\n";
- }
- shift @ARGV;
-}
-
-my $infoFile = "info.txt";
-
-die "info file $infoFile not found\n" if ! -s $infoFile;
-
-my $xlocToken = xloc::ParseTileInfo($infoFile, $enc);
-
-if ( $enc ) {
- open OUTFILE, ">:encoding($enc)", "$outfile"
- or die "couldn't open $outfile";
-} else {
- open OUTFILE, ">$outfile" or die "couldn't open $outfile";
-}
-# For f*cking windoze linefeeds
-# binmode( OUTFILE );
-
-if ( $unicode ne -1 ) {
- xloc::WriteMapFile( $xlocToken, $unicode, \*OUTFILE );
-} elsif ( $dosize ) {
- my $count = xloc::GetNTiles( $xlocToken );
- print OUTFILE pack("c", $count );
-} elsif ( $doval ) {
- xloc::WriteValuesFile( $xlocToken, \*OUTFILE );
-}
-
-close OUTFILE;
diff --git a/xwords4/dawg/xloc.pm b/xwords4/dawg/xloc.pm
deleted file mode 100644
index 2bcb624b5..000000000
--- a/xwords4/dawg/xloc.pm
+++ /dev/null
@@ -1,194 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright 2002-2014 by Eric House (xwords@eehouse.org). All rights
-# reserved.
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-# The idea here is that all that matters about a language is stored in
-# one file (possibly excepting rules for prepping a dictionary).
-# There's a list of tile faces, counts and values, and also some
-# name-value pairs as needed. The pairs come first, and then a list
-# of tiles.
-
-package xloc;
-
-use strict;
-use warnings;
-# force output in utf8
-use open qw/:std :utf8/;
-
-BEGIN {
- use Exporter ();
- our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
-
- $VERSION = 1.00;
-
- @ISA = qw(Exporter);
- @EXPORT = qw(&ParseTileInfo &GetNTiles &TileFace &TileValue
- &TileCount &GetValue &WriteMapFile &WriteValuesFile);
- %EXPORT_TAGS = ( );
-}
-
-# Returns what's meant to be an opaque object that can be passed back
-# for queries. It's a hash with name-value pairs and an _INFO entry
-# containing a list of tile info lists.
-
-sub ParseTileInfo($$) {
- my ( $filePath, $enc ) = @_;
- my %result;
-
- if ( $enc ) {
- open( INPUT, "<:encoding($enc)", "$filePath" )
- or die "couldn't open $filePath";
- } else {
- open( INPUT, "<$filePath" )
- or die "couldn't open $filePath";
- }
-
- my $inTiles = 0;
- my @tiles;
- while ( ) {
-
- chomp;
- s/\#.*$//;
- s/^\s*$//; # nuke all-white-space lines
- next if !length;
-
- if ( $inTiles ) {
- if ( // ) {
- last;
- } else {
- my ( $count, $val, $face ) = m/^\s*(\w+)\s+(\w+)\s+(.*)\s*$/;
- push @tiles, [ $count, $val, $face ];
- }
- } elsif ( /\w:/ ) {
- my ( $nam, $val ) = split ':', $_, 2;
- $result{$nam} .= $val;
- } elsif ( // ) {
- $inTiles = 1;
- }
-
- }
-
- close INPUT;
-
- $result{"_TILES"} = [ @tiles ];
-
- return \%result;
-}
-
-sub GetNTiles($) {
- my ( $hashR ) = @_;
-
- my $listR = ${$hashR}{"_TILES"};
-
- return 0 + @{$listR};
-}
-
-sub GetValue($$) {
- my ( $hashR, $name ) = @_;
- return ${$hashR}{$name};
-}
-
-sub printLetters($$) {
- my ( $str, $fhr ) = @_;
- my @letters = split( /\|/, $str );
- $str = join( " ", @letters );
- for ( my $key = 0; $key < length($str); ++$key ) {
- my $chr = substr( $str, $key, 1 );
- print $fhr pack( "U", ord($chr) );
- }
-}
-
-sub WriteMapFile($$$) {
- my ( $hashR, $unicode, $fhr ) = @_;
-
- my $count = GetNTiles($hashR);
- my $specialCount = 0;
- for ( my $i = 0; $i < $count; ++$i ) {
- my $tileR = GetNthTile( $hashR, $i );
- my $str = ${$tileR}[2];
-
- if ( $str =~ /\'(.(\|.)*)\'/ ) {
- printLetters( $1, $fhr );
- } elsif ( $str =~ /\"(.+)\"/ ) {
- print $fhr pack( "c", $specialCount++ );
- } elsif ( $str =~ /(\d+)/ ) {
- print $fhr pack( "n", $1 );
- } else {
- die "WriteMapFile: unrecognized face format $str, elem $i";
- }
- }
-} # WriteMapFile
-
-sub WriteValuesFile($$) {
- my ( $hashR, $fhr ) = @_;
-
- my $header = GetValue( $hashR,"XLOC_HEADER" );
- die "no XLOC_HEADER found" if ! $header;
-
- print STDERR "header is $header\n";
-
- print $fhr pack( "n", hex($header) );
-
- my $count = GetNTiles($hashR);
- for ( my $i = 0; $i < $count; ++$i ) {
- my $tileR = GetNthTile( $hashR, $i );
-
- print $fhr pack( "c", TileValue($tileR) );
- print $fhr pack( "c", TileCount($tileR) );
- }
-
-} # WriteValuesFile
-
-sub GetNthTile($$) {
- my ( $hashR, $n ) = @_;
- my $listR = ${$hashR}{"_TILES"};
-
- return ${$listR}[$n];
-}
-
-sub TileFace($) {
- my ( $tileR ) = @_;
- my $result;
-
- my $str = ${$tileR}[2];
-
- if ( $str =~ /\'(.(\|.)*)\'/ ) {
- $result = $1;
- } elsif ( $str =~ /\"(.+)\"/ ) {
- $result = $1;
- } elsif ( $str =~ /(\d+)/ ) {
- $result = chr($1);
- } else {
- die "TileFace: unrecognized face format: $str";
- }
- return $result;
-}
-
-sub TileValue($) {
- my ( $tileR ) = @_;
-
- return ${$tileR}[0];
-}
-
-sub TileCount($) {
- my ( $tileR ) = @_;
-
- return ${$tileR}[1];
-}
-
-1;
diff --git a/xwords4/dawg/xloc.py b/xwords4/dawg/xloc.py
new file mode 100755
index 000000000..1aef1ff0c
--- /dev/null
+++ b/xwords4/dawg/xloc.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+import argparse, os, re, struct, sys
+
+def errorOut(msg):
+ print('ERROR: {}'.format(msg))
+ sys.exit(1)
+
+def mkParser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-enc', dest = 'ENCODING', type = str, help = 'use this encoding' )
+ parser.add_argument('-tn', dest = 'DO_TABLE', action = 'store_true', help = 'output table file' )
+ # parser.add_argument('-tn', dest = 'UNICODE', default = False,
+ # action = 'store_true', help = 'assume unicode')
+ # parser.add_argument('-t', dest = 'UNICODE', type = str, default = True,
+ # action = 'store_false', help = 'DO NOT assume unicode')
+ parser.add_argument('-v', dest = 'DO_VALS', action = 'store_true', help = 'output values file' )
+ parser.add_argument('-s', dest = 'DO_SIZE', action = 'store_true', help = 'output size file')
+ parser.add_argument('-out', dest = 'OUTFILE', type = str, help = 'outfile path')
+ return parser
+
+sPreComment = re.compile('^(.*)#.*$')
+sVarAssign = re.compile('^(\w+):(.*)$')
+sBeginTiles = re.compile('^$')
+sEndTiles = re.compile('^$')
+sSingleCharMatch = re.compile('\'(.(\|.)+)\'')
+sSpecialsMatch = re.compile('{"(.+)"}')
+
+def parseTileInfo(infoFile, encoding):
+ result = {'_TILES' : []}
+ with open(infoFile, 'rt') as file:
+ data = file.read()
+ # if encoding:
+ # data = data.decode(encoding)
+ data = data.split('\n')
+
+ inTiles = False
+ tiles = []
+ for line in data:
+ # print('line at start: {}'.format(line))
+ match = sPreComment.match(line)
+ if match:
+ line = match.group(1)
+ # print('line sans comment: {}'.format(line))
+ if 0 == len(line):continue
+
+ if inTiles:
+ if sEndTiles.match(line):
+ break
+ else:
+ (count, val, face) = line.split(None, 3)
+ result['_TILES'].append((count, val, face))
+ elif sBeginTiles.match(line):
+ inTiles = True
+ else:
+ match = sVarAssign.match(line)
+ if match:
+ var = match.group(1)
+ if not var in result: result[var] = ''
+ result[var] += match.group(2)
+
+ return result
+
+class XLOC():
+ None
+
+def readXLOC():
+ return XLOC()
+
+# sub WriteMapFile($$$) {
+# my ( $hashR, $unicode, $fhr ) = @_;
+
+# my $count = GetNTiles($hashR);
+# my $specialCount = 0;
+# for ( my $i = 0; $i < $count; ++$i ) {
+# my $tileR = GetNthTile( $hashR, $i );
+# my $str = ${$tileR}[2];
+
+# if ( $str =~ /\'(.(\|.)*)\'/ ) {
+# printLetters( $1, $fhr );
+# } elsif ( $str =~ /\"(.+)\"/ ) {
+# print $fhr pack( "c", $specialCount++ );
+# } elsif ( $str =~ /(\d+)/ ) {
+# print $fhr pack( "n", $1 );
+# } else {
+# die "WriteMapFile: unrecognized face format $str, elem $i";
+# }
+# }
+# } # WriteMapFile
+
+def printLetters( letters, outfile ):
+ letters = letters.split('|')
+ letters = ' '.join(letters)
+ outfile.write(letters.encode('utf8'))
+
+def writeMapFile(xlocToken, outfile):
+ print('writeMapFile()')
+ tiles = xlocToken['_TILES']
+ specialCount = 0
+ for tile in tiles:
+ face = tile[2]
+ match = sSingleCharMatch.match(face)
+ if match:
+ print('single char: {}'.format(match.group(1)))
+ printLetters( match.group(1), outfile )
+ continue
+ match = sSpecialsMatch.match(face)
+ if match:
+ print('specials char: {}'.format(match.group(1)))
+ outfile.write(struct.pack('B', specialCount ))
+ specialCount += 1
+ continue
+
+ print('bad/unmatched face: {}'.format(face))
+ assert False
+
+def writeValuesFile(xlocToken, outfile):
+ header = xlocToken.get('XLOC_HEADER') or errorOut('no XLOC_HEADER found')
+
+ print('writing header: {}'.format(header))
+ outfile.write(struct.pack('!H', int(header, 16)))
+
+ for tile in xlocToken['_TILES']:
+ val = int(tile[0])
+ count = int(tile[1])
+ outfile.write(struct.pack('BB', val, count))
+
+def main():
+ print('{}.main {} called'.format(sys.argv[0], sys.argv[1:]))
+ args = mkParser().parse_args()
+ assert args.OUTFILE
+
+ infoFile = 'info.txt'
+ if not os.path.exists(infoFile):
+ errorOut('{} not found'.format(infoFile))
+ xlocToken = parseTileInfo(infoFile, args.ENCODING)
+
+ xloc = readXLOC()
+
+ with open(args.OUTFILE, 'wb') as outfile:
+ if args.DO_TABLE:
+ writeMapFile(xlocToken, outfile);
+ elif args.DO_SIZE:
+ assert not args.DO_VALS
+ count = len(xlocToken['_TILES'])
+ outfile.write(struct.pack('!B', count))
+ elif args.DO_VALS:
+ assert not args.DO_SIZE
+ writeValuesFile( xlocToken, outfile )
+
+
+##############################################################################
+if __name__ == '__main__':
+ main()