fix lower->upper translation: tr didn't like my strings

For whatever reason, though emacs thought the lower- and uppercase strings I was passing to tr were the same but for case two letters were getting dropped. This lets tr figure things out itself.
2025-01-30 08:34:16 +01:00 · 2019-11-19 22:27:41 -08:00 · 2019-11-19 22:27:41 -08:00 · 1e7ae2b2ec
commit 1e7ae2b2ec
parent 8da3c5fc68
2 changed files with 1 additions and 552 deletions
--- a/xwords4/dawg/Slovak/Makefile
+++ b/xwords4/dawg/Slovak/Makefile
@ -28,7 +28,7 @@ SOURCEDICT ?= $(XWDICTPATH)/Slovak/Scrabble_SK.dict

 $(XWLANG)Main.dict.gz: $(SOURCEDICT) Makefile
 	cat $< | tr -d '\r' | \
-		tr [aáäbcčdďeéfghiíjklĺľmnňoôóprŕsštťuúvxyýzž] [AÁÄBCČDĎEÉFGHIÍJKLĹĽMNŇOÔÓPRŔSŠTŤUÚVXYÝZŽ] | \
+		tr '[:lower:]' '[:upper:]' | \
 		sed 's,.,\U\0,g' | \
 		grep '^[AÁÄBCČDĎEÉFGHIÍJKLĹĽMNŇOÔÓPRŔSŠTŤUÚVXYÝZŽ]*$$' | \
 		gzip -c > $@
--- a/xwords4/dawg/dawg2dict.pl
+++ b/xwords4/dawg/dawg2dict.pl
@ -1,551 +0,0 @@
-#!/usr/bin/perl
-#
-# Copyright 2004 - 2012 by Eric House (xwords@eehouse.org)
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-
-
-# Given a .pdb or .xwd file, print all the words in the DAWG to
-# stdout.
-
-use strict;
-use Fcntl;
-use Encode 'from_to';
-use Encode;
-use Digest::MD5;
-
-my $gInFile;
-my $gSumOnly = 0;
-my $gSum = '';
-my $gDescOnly = 0;
-my $gDesc;
-my $gDoRaw = 0;
-my $gDoJSON = 0;
-my $gFileType;
-my $gNodeSize;
-my $gIsUTF;
-
-use Fcntl 'SEEK_CUR';
-
-use constant {
-	DICT_HEADER_MASK => 0x0008,
-	DICT_SYNONYMS_MASK => 0x0010
-};
-
-sub systell { sysseek($_[0], 0, SEEK_CUR) }
-
-sub usage() {
-    print STDERR "USAGE: $0"
-        . " [-raw | -json] "
-        . " [-get-sum]"
-        . " [-get-desc]"
-        . " -dict <xwdORpdb>"
-        . "\n"
-        . "\t(Takes a .pdb or .xwd and prints its words to stdout)\n";
-    exit 1;
-}
-
-sub parseARGV() {
-
-    while ( my $parm = shift(@ARGV) ) {
-        if ( $parm eq "-raw" ) {
-            $gDoRaw = 1;
-        } elsif ( $parm eq "-json" ) {
-            $gDoJSON = 1;
-        } elsif ( $parm eq "-dict" ) {
-            $gInFile = shift(@ARGV);
-        } elsif ( $parm eq "-get-sum" ) {
-            $gSumOnly = 1;
-        } elsif ( $parm eq "-get-desc" ) {
-            $gDescOnly = 1;
-        } else {
-            usage();
-        }
-    }
-
-    if ( $gInFile =~ m|.xwd$| ) {
-        $gFileType = "xwd";
-    } elsif ( $gInFile =~ m|.pdb$| ) {
-        $gFileType = "pdb";
-    } else {
-        usage();
-    }
-} # parseARGV
-
-sub countSpecials($) {
-    my ( $facesRef ) = @_;
-    my $count = 0;
-
-    map { ++$count if ( ord($_) < 32 ); } @$facesRef;
-    return $count;
-} # countSpecials
-
-sub readXWDFaces($$$) {
-    my ( $fh, $facRef, $nSpecials ) = @_;
-
-    my ( $buf, $nRead, $nChars, $nBytes );
-    $nRead = sysread( $fh, $buf, 1 );
-    $nBytes = unpack( 'c', $buf );
-    printf STDERR "nBytes of faces: %d\n", $nBytes;
-
-	if ( $gIsUTF ) {
-		$nRead = sysread( $fh, $buf, 1 );
-		$nChars = unpack( 'c', $buf );
-		printf STDERR "nChars of faces: %d\n", $nChars;
-	}
-
-    # At this point $fh is pointing at the start of data
-    if ( $gSumOnly ) {
-        my $sum = sumRestOfFile( $fh );
-        if ( $sum eq $gSum ) {
-            print STDERR "got: $sum, $gSum twice!!!\n";
-        } elsif ( !$gSum ) {
-            $gSum = $sum;
-        } else {
-            print STDERR "disagreement: $sum vs $gSum\n";
-            exit( -1 );
-        }
-    } else {
-        binmode( $fh, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
-        sysread( $fh, $buf, $nBytes );
-        length($buf) == $nBytes or die "didn't read expected number of bytes\n";
-        binmode( $fh ) or die "binmode failed\n";
-
-        print STDERR "string now: $buf\n";
-        my @faces;
-        my $index = 0;
-        for ( my $nFound = 0; $nFound < $nChars; ) {
-            my $chr = substr( $buf, $index++, 1 );
-            if ( $chr eq ' ' ) {
-                # For testing, this next line uses last rather than
-                # first alternate
-                # $faces[$#faces] = substr( $buf, $index, 1 );
-                ++$index;
-                next;
-            }
-            print STDERR "pushing $chr \n";
-            push( @faces, $chr );
-            ++$nFound;
-        }
-
-        printf STDERR "at 0x%x after reading faces\n", systell($fh);
-
-        ${$nSpecials} = countSpecials( \@faces );
-        @{$facRef} = @faces;
-        printf STDERR "readXWDFaces=>%d\n", $nChars;
-    }
-    return $nChars;
-} # readXWDFaces
-
-sub skipBitmap($) {
-    my ( $fh ) = @_;
-    my $buf;
-    sysread( $fh, $buf, 1 );
-    my $nCols = unpack( 'C', $buf );
-    if ( $nCols > 0 ) {
-        sysread( $fh, $buf, 1 );
-        my $nRows = unpack( 'C', $buf );
-        my $nBytes = (($nRows * $nCols) + 7) / 8;
-
-        sysread( $fh, $buf, $nBytes );
-    }
-    printf STDERR "skipBitmap\n";
-} # skipBitmap
-
-sub getSpecials($$$) {
-    my ( $fh, $nSpecials, $specRef ) = @_;
-
-    my @specials;
-    for ( my $i = 0; $i < $nSpecials; ++$i ) {
-        my $buf;
-        sysread( $fh, $buf, 1 );
-        my $len = unpack( 'C', $buf );
-        sysread( $fh, $buf, $len );
-        push( @specials, $buf );
-        skipBitmap( $fh );
-        skipBitmap( $fh );
-    }
-
-    @{$specRef} = @specials;
-} # getSpecials
-
-sub readNodesToEnd($) {
-    my ( $fh ) = @_;
-    my @nodes;
-    my $count = 0;
-    my $offset = 4 - $gNodeSize;
-    my ( $buf, $nRead );
-
-    do {
-        $nRead = sysread( $fh, $buf, $gNodeSize, $offset );
-        $count += $nRead;
-        my $node = unpack( 'N', $buf );
-        push( @nodes, $node );
-    } while ( $nRead == $gNodeSize );
-    die "out of sync? nRead=$nRead, count=$count" if $nRead != 0;
-
-    return @nodes;
-} # readNodesToEnd
-
-sub printHeader($$) {
-    my ( $buf, $len ) = @_;
-    printf STDERR "skipped %d bytes of header:\n", $len + 2;
-    my $count;
-    if ( $len == 4 ) {
-        ($count) = unpack( 'N', $buf );
-    } else {
-        print STDERR 'unpacking...\n';
-        ($count, $gDesc, $gSum) = unpack( 'N Z* Z*', $buf );
-    }
-    printf STDERR "has %d words\n", $count;
-}
-
-sub nodeSizeFromFlags($$) {
-    my ( $fh, $flags ) = @_;
-
-    my $bitSet = $flags & DICT_HEADER_MASK;
-    if ( 0 != $bitSet ){
-        $flags = $flags & ~DICT_HEADER_MASK;
-        # need to skip header
-        my $buf;
-        2 == sysread( $fh, $buf, 2 ) || die "couldn't read length of header";
-        my $len = unpack( "n", $buf );
-        $len == sysread( $fh, $buf, $len ) || die  "couldn't read header bytes";
-        printHeader( $buf, $len );
-    }
-
-	$flags = $flags & ~DICT_SYNONYMS_MASK;
-
-	my $nodeSize;
-    if ( $flags == 2 || $flags == 4 ) {
-        $nodeSize = 3;
-    } elsif ( $flags == 3 || $flags == 5 ) {
-		$nodeSize = 4;
-    } else {
-        die "invalid dict flags $flags";
-    }
-	my $isUTF = $flags == 4 || $flags == 5;
-
-	return ( $nodeSize, $isUTF );
-} # nodeSizeFromFlags
-
-sub mergeSpecials($$) {
-    my ( $facesRef, $specialsRef ) = @_;
-    for ( my $i = 0; $i < @$facesRef; ++$i ) {
-        my $ref = ord($$facesRef[$i]);
-        if ( $ref < 32 ) {
-            $$facesRef[$i] = $$specialsRef[$ref];
-            #print STDERR "set $ref to $$specialsRef[$ref]\n";
-        }
-    }
-}
-
-sub sumRestOfFile($) {
-    my ( $fh ) = @_;
-    my $buf;
-    my $count = 0;
-    my $md5 = Digest::MD5->new;
-    for ( ; ; ) {
-        my $nRead = sysread( $fh, $buf, 128 );
-        0 == $nRead && last;
-        $count += $nRead;
-        $md5->add( $buf );
-    }
-    # print STDERR "read $count bytes\n";
-    return $md5->hexdigest();
-}
-
-sub prepXWD($$$$) {
-    my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
-    my $done = 1;
-
-    printf STDERR "at 0x%x at start\n", systell($fh);
-    my $buf;
-    my $nRead = sysread( $fh, $buf, 2 );
-    my $flags = unpack( "n", $buf );
-
-    ($gNodeSize, $gIsUTF) = nodeSizeFromFlags( $fh, $flags );
-
-    my $nSpecials;
-    my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials ); # does checksum
-
-    if ( $gSumOnly ) {
-        print STDOUT $gSum, "\n";
-    } elsif( $gDescOnly ) {
-        print STDOUT $gDesc, "\n";
-    } else {
-        $done = 0;
-        printf STDERR "at 0x%x before header read\n", systell($fh);
-        # skip xloc header
-        $nRead = sysread( $fh, $buf, 2 );
-
-        # skip values info.
-        printf STDERR "at 0x%x before reading %d values\n", systell($fh), $faceCount;
-        sysread( $fh, $buf, $faceCount * 2 );
-        printf STDERR "at 0x%x after values read\n", systell($fh);
-
-        printf STDERR "at 0x%x before specials read\n", systell($fh);
-        my @specials;
-        getSpecials( $fh, $nSpecials, \@specials );
-        mergeSpecials( $facRef, \@specials );
-        printf STDERR "at 0x%x after specials read\n", systell($fh);
-
-        printf STDERR "at 0x%x before offset read\n", systell($fh);
-        sysread( $fh, $buf, 4 );
-        $$startRef = unpack( 'N', $buf );
-        print STDERR "startRef=$$startRef\n";
-
-        my @nodes = readNodesToEnd( $fh );
-
-        @$nodesRef = @nodes;
-    }
-    print STDERR "prepXWD done\n";
-    return $done;
-} # prepXWD
-
-sub readPDBSpecials($$$$$) {
-    my ( $fh, $nChars, $nToRead, $nSpecials, $specRef ) = @_;
-
-    my ( $nRead, $buf );
-
-    # first skip counts and values, and xloc header
-    $nRead += sysread( $fh, $buf, ($nChars * 2) + 2 );
-
-    while ( $nSpecials-- ) {
-        $nRead += sysread( $fh, $buf, 8 ); # sizeof(Xloc_specialEntry)
-        my @chars = unpack( 'C8', $buf );
-        my $str;
-        foreach my $char (@chars) {
-            if ( $char == 0 ) { # null-terminated on palm
-                last;
-            }
-            $str .= chr($char);
-        }
-        push( @$specRef, $str );
-    }
-
-    $nRead += sysread( $fh, $buf, $nToRead - $nRead ); # skip bitmaps
-
-    return $nRead;
-} # readPDBSpecials
-
-sub prepPDB($$$$) {
-    my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
-
-    $$startRef = 0; # always for palm?
-
-    my $buf;
-    # skip header info
-    my $nRead = sysread( $fh, $buf, 76 );
-    $nRead += sysread( $fh, $buf, 2 );
-    my $nRecs = unpack( 'n', $buf );
-
-    my @offsets;
-    for ( my $i = 0; $i < $nRecs; ++$i ) {
-        $nRead += sysread( $fh, $buf, 4 );
-        push( @offsets, unpack( 'N', $buf ) );
-        $nRead += sysread( $fh, $buf, 4 ); # skip
-    }
-
-    die "too far" if $nRead > $offsets[0];
-    while ( $nRead < $offsets[0] ) {
-        $nRead += sysread( $fh, $buf, 1 );
-    }
-
-    my $facesOffset = $offsets[1];
-    my $nChars = ($offsets[2] - $facesOffset) / 2;
-    $nRead += sysread( $fh, $buf, $facesOffset - $nRead );
-    my @tmp = unpack( 'Nc6n', $buf );
-    $gNodeSize = nodeSizeFromFlags( 0, $tmp[7] );
-
-    my @faces;
-    for ( my $i = 0; $i < $nChars; ++$i ) {
-        $nRead += sysread( $fh, $buf, 2 );
-        push( @faces, chr(unpack( "n", $buf ) ) );
-    }
-    @{$facRef} = @faces;
-
-    die "out of sync: $nRead != $offsets[2]" if $nRead != $offsets[2];
-
-    my @specials;
-    $nRead += readPDBSpecials( $fh, $nChars, $offsets[3] - $nRead,
-                               countSpecials($facRef), \@specials );
-    mergeSpecials( $facRef, \@specials );
-
-    die "out of sync" if $nRead != $offsets[3];
-    my @nodes = readNodesToEnd( $fh );
-
-    @$nodesRef = @nodes;
-} # prepPDB
-
-sub parseNode($$$$$) {
-    my ( $node, $chrIndex, $nextEdge, $accepting, $last ) = @_;
-
-    if ( $gNodeSize == 4 ) {
-        $$accepting = ($node & 0x00008000) != 0;
-        $$last = ($node & 0x00004000) != 0;
-        $$chrIndex = ($node & 0x00003f00) >> 8;
-        $$nextEdge = ($node >> 16) + (($node & 0x000000FF) << 16);
-    } elsif( $gNodeSize == 3 ) {
-        $$accepting = ($node & 0x00000080) != 0;
-        $$last = ($node & 0x00000040) != 0;
-        $$chrIndex = $node & 0x0000001f;
-        $$nextEdge = ($node >> 8) + (($node & 0x00000020) << 11);
-    }
-
-   # printf "%x: acpt=$$accepting; last=$$last; "
-   #    . "next=$$nextEdge; ci=$$chrIndex\n", $node;
-} # parseNode
-
-sub printStr($$) {
-    my ( $strRef, $facesRef ) = @_;
-
-    print join( "", map {$$facesRef[$_]} @$strRef), "\n";
-} # printStr
-
-# Given an array of 4-byte nodes, a start index. and another array of
-# two-byte faces, print out all of the words in the nodes array.
-sub printDAWG($$$$) {
-    my ( $strRef, $arrRef, $start, $facesRef ) = @_;
-
-    die "infinite recursion???" if @$strRef > 15;
-
-    for ( ; ; ) {
-        my $node = $$arrRef[$start++];
-        my $nextEdge;
-        my $chrIndex;
-        my $accepting;
-        my $lastEdge;
-
-        parseNode( $node, \$chrIndex, \$nextEdge, \$accepting, \$lastEdge );
-
-        push( @$strRef, $chrIndex );
-        if ( $accepting ) {
-            printStr( $strRef, $facesRef );
-        }
-
-        if ( $nextEdge != 0 ) {
-            printDAWG( $strRef, $arrRef, $nextEdge, $facesRef );
-        }
-
-        pop( @$strRef );
-
-        if ( $lastEdge ) {
-            last;
-        }
-    }
-} # printDAWG
-
-sub printNodes($$) {
-    my ( $nr, $fr ) = @_;
-    
-    my $len = @$nr;
-    for ( my $i = 0; $i < $len; ++$i ) {
-        my $node = $$nr[$i];
-
-        my ( $chrIndex, $nextEdge, $accepting, $lastEdge );
-        parseNode( $node, \$chrIndex, \$nextEdge, \$accepting, \$lastEdge );
-
-        printf "%.8x: (%.8x) %2d(%s) %.8x ", $i, $node, $chrIndex, 
-        $$fr[$chrIndex], $nextEdge;
-        print ($accepting? "A":"a");
-        print " ";
-        print ($lastEdge? "L":"l");
-        print "\n";
-    }
-}
-
-sub printStartJson($) {
-    my ( $startIndex ) = @_;
-    printf( "  start: 0x%.8x,\n", $startIndex );
-}
-
-sub printCharsJson($) {
-    my ( $fr ) = @_;
-    print "  chars: [ ";
-    foreach my $char (@$fr) {
-        print "\"$char\", "
-    }
-    print "],\n"
-}
-
-sub printNodesJson($) {
-    my ( $nr ) = @_;
-    print "  dawg: [\n";
-
-    my $len = @$nr;
-    my $newLine = 1;
-    for ( my $ii = 0; $ii < $len; ++$ii ) {
-        my $node = $$nr[$ii];
-
-        if ( $newLine ) {
-            printf( "    /*%.6x*/ ", $ii );
-            $newLine = 0;
-        }
-
-        printf "0x%.8x, ", $node;
-
-        my ( $chrIndex, $nextEdge, $accepting, $lastEdge );
-        parseNode( $node, \$chrIndex, \$nextEdge, \$accepting, \$lastEdge );
-        if ( $lastEdge ) {
-            print "\n";
-            $newLine = 1;
-        }
-    }
-
-    print "\n  ],\n"
-}
-
-#################################################################
-# main
-#################################################################
-
-binmode( STDERR, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
-
-parseARGV();
-
-sysopen(INFILE, $gInFile, O_RDONLY) or die "couldn't open $gInFile: $!\n";;
-binmode INFILE;
-
-my @faces;
-my @nodes;
-my $startIndex;
-my $done;
-
-if ( $gFileType eq "xwd" ){ 
-    $done = prepXWD( *INFILE, \@faces, \@nodes, \$startIndex );
-} elsif ( $gFileType eq "pdb" ) {
-    $done = prepPDB( *INFILE, \@faces, \@nodes, \$startIndex );
-}
-close INFILE;
-
-die "no nodes!!!" if 0 == @nodes;
-
-if ( $done ) {
-    # we're done...
-} elsif ( $gDoRaw ) {
-    printNodes( \@nodes, \@faces );
-} elsif ( $gDoJSON ) {
-    print "dict = {\n";
-    printStartJson( $startIndex );
-    printCharsJson( \@faces );
-    printNodesJson( \@nodes );
-    print "}\n";
-} else {
-    binmode( STDOUT, ":encoding(utf8)" ) or die "binmode(:utf-8) failed\n";
-    printDAWG( [], \@nodes, $startIndex, \@faces );
-}
-
-exit 0;