cleanup. Everything works now AFAIK.

2025-01-30 08:34:16 +01:00 · 2004-06-10 03:27:40 +00:00 · 2004-06-10 03:27:40 +00:00 · 3cbe2f4b80
commit 3cbe2f4b80
parent 153d2ae34d
1 changed files with 99 additions and 106 deletions
--- a/dawg/dawg2dict.pl
+++ b/dawg/dawg2dict.pl
@ -1,49 +1,45 @@
 #!/usr/bin/perl
 #
 # Copyright 2004 by Eric House (fixin@peak.org)
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-# Given a .pdb or .xwd file, print all the words in the DAWG.
+
-# Optionally write the values and faces to files whose names are
+# Given a .pdb or .xwd file, print all the words in the DAWG to
-# provided.
+# stdout.
 use strict;
 use Fcntl;
 my $gValueFileName;
 my $gASCIIFacesFileName;
 my $gUTFFacesFileName;
 my $gInFile;
 my $gFileType;
 my $gNodeSize;
 sub usage() {
    print STDERR "USAGE: $0 "
-        . "[-vf valuesFileName] "
+        . "<xwdORpdb>"
-        . "[-fa asciiFacesFileName] "
+        . "\n"
-        . "[-fu unicodeFacesFileName] "
+        . "\t(Takes a .pdb or .xwd and prints its words to stdout)\n";
-        . "xwdORpdb"
+    exit 1;
        . "\n";
 }
-sub parseARGV {
+sub parseARGV() {
-    my $arg;
+    $gInFile = shift(@ARGV);
-    while ( my $arg = shift(@ARGV) ) {
+    if ( 0 != @ARGV ) {
-
+        usage();
      SWITCH: {
          if ($arg =~ /-vf/) {$gValueFileName = shift(@ARGV), last SWITCH;}
          if ($arg =~ /-fa/) {$gASCIIFacesFileName = shift(@ARGV);
                              last SWITCH;}
          if ($arg =~ /-fu/) {$gUTFFacesFileName = shift(@ARGV);
                              last SWITCH;}
          # Get here it must be the final arg, the input file name.
          $gInFile = $arg;
          if ( 0 != @ARGV ) {
              usage();
              exit 1;
          }
      }
    }
    if ( $gInFile =~ m|.xwd$| ) {
@ -52,24 +48,18 @@ sub parseARGV {
        $gFileType = "pdb";
    } else {
        usage();
        exit 1;
    }
    return 1;
 } # parseARGV
 sub countSpecials($) {
-    my ( $lref ) = @_;
+    my ( $facesRef ) = @_;
    my $count = 0;
-    foreach my $val (@$lref) {
+
-        if ( ord($val) < 32 ) {
+    map { ++$count if ( ord($_) < 32 ); } @$facesRef;
            ++$count;
        }
    }
    return $count;
 } # countSpecials
-sub readFaces($$$) {
+sub readXWDFaces($$$) {
    my ( $fh, $facRef, $nSpecials ) = @_;
    my $buf;
@ -85,7 +75,7 @@ sub readFaces($$$) {
    ${$nSpecials} = countSpecials( \@faces );
    @{$facRef} = @faces;
    return $nChars;
-} # readFaces
+} # readXWDFaces
 sub skipBitmap($) {
    my ( $fh ) = @_;
@ -143,7 +133,7 @@ sub nodeSizeFromFlags($) {
    } elsif ( $flags == 3 ) {
        return 4;
    } else {
-        die "invalid dict flags";
+        die "invalid dict flags $flags";
    }
 } # nodeSizeFromFlags
@ -153,94 +143,112 @@ sub mergeSpecials($$) {
        my $ref = ord($$facesRef[$i]);
        if ( $ref < 32 ) {
            $$facesRef[$i] = $$specialsRef[$ref];
-            print STDERR "set $ref to $$specialsRef[$ref]\n";
+            #print STDERR "set $ref to $$specialsRef[$ref]\n";
        }
    }
 }
 sub prepXWD($$$$) {
-    my ( $path, $facRef, $nodesRef, $startRef ) = @_;
+    my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
    sysopen(INFILE, $path, O_RDONLY) or die "couldn't open $path: $!\n";;
    binmode INFILE;
    my $buf;
-    my $nRead = sysread( INFILE, $buf, 2 );
+    my $nRead = sysread( $fh, $buf, 2 );
    my $flags = unpack( "n", $buf );
    $gNodeSize = nodeSizeFromFlags( $flags );
    my $nSpecials;
-    my $faceCount = readFaces( *INFILE, $facRef, \$nSpecials );
+    my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
    # skip xloc header
-    $nRead = sysread( INFILE, $buf, 2 );
+    $nRead = sysread( $fh, $buf, 2 );
    # skip values info.
-    sysread( INFILE, $buf, $faceCount * 2 );
+    sysread( $fh, $buf, $faceCount * 2 );
    my @specials;
-    getSpecials( *INFILE, $nSpecials, \@specials );
+    getSpecials( $fh, $nSpecials, \@specials );
    mergeSpecials( $facRef, \@specials );
-    sysread( INFILE, $buf, 4 );
+    sysread( $fh, $buf, 4 );
    $$startRef = unpack( 'N', $buf );
-    my @nodes = readNodesToEnd( *INFILE );
+    my @nodes = readNodesToEnd( $fh );
    close INFILE;
    @$nodesRef = @nodes;
 } # prepXWD
 sub readPDBSpecials($$$$$) {
    my ( $fh, $nChars, $nToRead, $nSpecials, $specRef ) = @_;
    my ( $nRead, $buf );
    # first skip counts and values, and xloc header
    $nRead += sysread( $fh, $buf, ($nChars * 2) + 2 );
    while ( $nSpecials-- ) {
        $nRead += sysread( $fh, $buf, 8 ); # sizeof(Xloc_specialEntry)
        my @chars = unpack( 'C8', $buf );
        my $str;
        foreach my $char (@chars) {
            if ( $char == 0 ) { # null-terminated on palm
                last;
            }
            $str .= chr($char);
        }
        push( @$specRef, $str );
    }
    $nRead += sysread( $fh, $buf, $nToRead - $nRead ); # skip bitmaps
    return $nRead;
 } # readPDBSpecials
 sub prepPDB($$$$) {
-    my ( $path, $facRef, $nodesRef, $startRef ) = @_;
+    my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
    $$startRef = 0; # always for palm?
    sysopen(INFILE, $path, O_RDONLY) or die "couldn't open $path: $!\n";;
    binmode INFILE;
    my $buf;
    # skip header info
-    my $nRead = sysread( INFILE, $buf, 76 );
+    my $nRead = sysread( $fh, $buf, 76 );
-    $nRead += sysread( INFILE, $buf, 2 );
+    $nRead += sysread( $fh, $buf, 2 );
    my $nRecs = unpack( 'n', $buf );
    my @offsets;
    for ( my $i = 0; $i < $nRecs; ++$i ) {
-        $nRead += sysread( INFILE, $buf, 4 );
+        $nRead += sysread( $fh, $buf, 4 );
        push( @offsets, unpack( 'N', $buf ) );
-        $nRead += sysread( INFILE, $buf, 4 ); # skip
+        $nRead += sysread( $fh, $buf, 4 ); # skip
    }
    die "too far" if $nRead > $offsets[0];
    while ( $nRead < $offsets[0] ) {
-        $nRead += sysread( INFILE, $buf, 1 );
+        $nRead += sysread( $fh, $buf, 1 );
    }
    my $facesOffset = $offsets[1];
    my $nChars = ($offsets[2] - $facesOffset) / 2;
-    $nRead += sysread( INFILE, $buf, $facesOffset - $nRead );
+    $nRead += sysread( $fh, $buf, $facesOffset - $nRead );
-    my @tmp = unpack( 'Nccccccn', $buf );
+    my @tmp = unpack( 'Nc6n', $buf );
    $gNodeSize = nodeSizeFromFlags( $tmp[7] );
    my @faces;
    for ( my $i = 0; $i < $nChars; ++$i ) {
-        $nRead += sysread( INFILE, $buf, 2 );
+        $nRead += sysread( $fh, $buf, 2 );
        push( @faces, chr(unpack( "n", $buf ) ) );
    }
    @{$facRef} = @faces;
    die "out of sync: $nRead != $offsets[2]" if $nRead != $offsets[2];
-    # now skip count and values.  We'll want to get the "specials"
+    my @specials;
-    # shortly.
+    $nRead += readPDBSpecials( $fh, $nChars, $offsets[3] - $nRead,
-    $nRead += sysread( INFILE, $buf, $offsets[3] - $nRead );
+                               countSpecials($facRef), \@specials );
    mergeSpecials( $facRef, \@specials );
    die "out of sync" if $nRead != $offsets[3];
-    my @nodes = readNodesToEnd( *INFILE );
+    my @nodes = readNodesToEnd( $fh );
    close INFILE;
    @$nodesRef = @nodes;
 } # prepPDB
@ -270,12 +278,12 @@ sub printStr($$) {
    print join( "", map {$$facesRef[$_]} @$strRef), "\n";
 } # printStr
 # Given an array of 4-byte nodes, a start index. and another array of
 # two-byte faces, print out all of the words in the nodes array.
 sub printDAWG($$$$) {
    my ( $strRef, $arrRef, $start, $facesRef ) = @_;
-sub printDAWGInternal($$$$) {
+    die "infinite recursion???" if @$strRef > 15;
    my ( $str, $arrRef, $start, $facesRef ) = @_;
    for ( ; ; ) {
        my $node = $$arrRef[$start++];
@ -286,60 +294,45 @@ sub printDAWGInternal($$$$) {
        parseNode( $node, \$chrIndex, \$nextEdge, \$accepting, \$lastEdge );
-        push( @$str, $chrIndex );
+        push( @$strRef, $chrIndex );
        if ( $accepting ) {
-            printStr( $str, $facesRef );
+            printStr( $strRef, $facesRef );
        }
        if ( $nextEdge != 0 ) {
-            printDAWGInternal( $str, $arrRef, $nextEdge, $facesRef );
+            printDAWG( $strRef, $arrRef, $nextEdge, $facesRef );
        }
-        pop( @$str );
+        pop( @$strRef );
        if ( $lastEdge ) {
            last;
        }
    }
-} # printDAWGInternal
+} # printDAWG
 sub printDAWG($$$) {
    my ( $arrRef, $start, $facesRef ) = @_;
    die "no nodes!!!" if 0 == @$arrRef;
    my @str;
    printDAWGInternal( \@str, $arrRef, $start, $facesRef );
 }
 #################################################################
 # main
 #################################################################
-if ( !parseARGV() ) {
+parseARGV();
-    usage();
+
-    exit 1;
+sysopen(INFILE, $gInFile, O_RDONLY) or die "couldn't open $gInFile: $!\n";;
-}
+binmode INFILE;
 my @faces;
 my @nodes;
 my $startIndex;
 if ( $gFileType eq "xwd" ){ 
-    prepXWD( $gInFile, \@faces, \@nodes, \$startIndex );
+    prepXWD( *INFILE, \@faces, \@nodes, \$startIndex );
 } elsif ( $gFileType eq "pdb" ) {
-    prepPDB( $gInFile, \@faces, \@nodes, \$startIndex );
+    prepPDB( *INFILE, \@faces, \@nodes, \$startIndex );
    print STDERR join( ",", @faces), "\n";
 }
 close INFILE;
-printDAWG( \@nodes, $startIndex, \@faces );
+die "no nodes!!!" if 0 == @nodes;
 printDAWG( [], \@nodes, $startIndex, \@faces );
-if ( $gASCIIFacesFileName ) {
+exit 0;
    open FACES, "> $gASCIIFacesFileName";
    foreach my $face (@faces) {
        print FACES pack('cc', 0, $face );
    }
    close FACES;
 }