cleanup. Everything works now AFAIK.

This commit is contained in:
ehouse 2004-06-10 03:27:40 +00:00
parent 153d2ae34d
commit 3cbe2f4b80

View file

@ -1,49 +1,45 @@
#!/usr/bin/perl #!/usr/bin/perl
# #
# Copyright 2004 by Eric House (fixin@peak.org)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Given a .pdb or .xwd file, print all the words in the DAWG.
# Optionally write the values and faces to files whose names are # Given a .pdb or .xwd file, print all the words in the DAWG to
# provided. # stdout.
use strict; use strict;
use Fcntl; use Fcntl;
my $gValueFileName;
my $gASCIIFacesFileName;
my $gUTFFacesFileName;
my $gInFile; my $gInFile;
my $gFileType; my $gFileType;
my $gNodeSize; my $gNodeSize;
sub usage() { sub usage() {
print STDERR "USAGE: $0 " print STDERR "USAGE: $0 "
. "[-vf valuesFileName] " . "<xwdORpdb>"
. "[-fa asciiFacesFileName] " . "\n"
. "[-fu unicodeFacesFileName] " . "\t(Takes a .pdb or .xwd and prints its words to stdout)\n";
. "xwdORpdb" exit 1;
. "\n";
} }
sub parseARGV { sub parseARGV() {
my $arg; $gInFile = shift(@ARGV);
while ( my $arg = shift(@ARGV) ) { if ( 0 != @ARGV ) {
usage();
SWITCH: {
if ($arg =~ /-vf/) {$gValueFileName = shift(@ARGV), last SWITCH;}
if ($arg =~ /-fa/) {$gASCIIFacesFileName = shift(@ARGV);
last SWITCH;}
if ($arg =~ /-fu/) {$gUTFFacesFileName = shift(@ARGV);
last SWITCH;}
# Get here it must be the final arg, the input file name.
$gInFile = $arg;
if ( 0 != @ARGV ) {
usage();
exit 1;
}
}
} }
if ( $gInFile =~ m|.xwd$| ) { if ( $gInFile =~ m|.xwd$| ) {
@ -52,24 +48,18 @@ sub parseARGV {
$gFileType = "pdb"; $gFileType = "pdb";
} else { } else {
usage(); usage();
exit 1;
} }
return 1;
} # parseARGV } # parseARGV
sub countSpecials($) { sub countSpecials($) {
my ( $lref ) = @_; my ( $facesRef ) = @_;
my $count = 0; my $count = 0;
foreach my $val (@$lref) {
if ( ord($val) < 32 ) { map { ++$count if ( ord($_) < 32 ); } @$facesRef;
++$count;
}
}
return $count; return $count;
} # countSpecials } # countSpecials
sub readFaces($$$) { sub readXWDFaces($$$) {
my ( $fh, $facRef, $nSpecials ) = @_; my ( $fh, $facRef, $nSpecials ) = @_;
my $buf; my $buf;
@ -85,7 +75,7 @@ sub readFaces($$$) {
${$nSpecials} = countSpecials( \@faces ); ${$nSpecials} = countSpecials( \@faces );
@{$facRef} = @faces; @{$facRef} = @faces;
return $nChars; return $nChars;
} # readFaces } # readXWDFaces
sub skipBitmap($) { sub skipBitmap($) {
my ( $fh ) = @_; my ( $fh ) = @_;
@ -143,7 +133,7 @@ sub nodeSizeFromFlags($) {
} elsif ( $flags == 3 ) { } elsif ( $flags == 3 ) {
return 4; return 4;
} else { } else {
die "invalid dict flags"; die "invalid dict flags $flags";
} }
} # nodeSizeFromFlags } # nodeSizeFromFlags
@ -153,94 +143,112 @@ sub mergeSpecials($$) {
my $ref = ord($$facesRef[$i]); my $ref = ord($$facesRef[$i]);
if ( $ref < 32 ) { if ( $ref < 32 ) {
$$facesRef[$i] = $$specialsRef[$ref]; $$facesRef[$i] = $$specialsRef[$ref];
print STDERR "set $ref to $$specialsRef[$ref]\n"; #print STDERR "set $ref to $$specialsRef[$ref]\n";
} }
} }
} }
sub prepXWD($$$$) { sub prepXWD($$$$) {
my ( $path, $facRef, $nodesRef, $startRef ) = @_; my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
sysopen(INFILE, $path, O_RDONLY) or die "couldn't open $path: $!\n";;
binmode INFILE;
my $buf; my $buf;
my $nRead = sysread( INFILE, $buf, 2 ); my $nRead = sysread( $fh, $buf, 2 );
my $flags = unpack( "n", $buf ); my $flags = unpack( "n", $buf );
$gNodeSize = nodeSizeFromFlags( $flags ); $gNodeSize = nodeSizeFromFlags( $flags );
my $nSpecials; my $nSpecials;
my $faceCount = readFaces( *INFILE, $facRef, \$nSpecials ); my $faceCount = readXWDFaces( $fh, $facRef, \$nSpecials );
# skip xloc header # skip xloc header
$nRead = sysread( INFILE, $buf, 2 ); $nRead = sysread( $fh, $buf, 2 );
# skip values info. # skip values info.
sysread( INFILE, $buf, $faceCount * 2 ); sysread( $fh, $buf, $faceCount * 2 );
my @specials; my @specials;
getSpecials( *INFILE, $nSpecials, \@specials ); getSpecials( $fh, $nSpecials, \@specials );
mergeSpecials( $facRef, \@specials ); mergeSpecials( $facRef, \@specials );
sysread( INFILE, $buf, 4 ); sysread( $fh, $buf, 4 );
$$startRef = unpack( 'N', $buf ); $$startRef = unpack( 'N', $buf );
my @nodes = readNodesToEnd( *INFILE ); my @nodes = readNodesToEnd( $fh );
close INFILE;
@$nodesRef = @nodes; @$nodesRef = @nodes;
} # prepXWD } # prepXWD
sub readPDBSpecials($$$$$) {
my ( $fh, $nChars, $nToRead, $nSpecials, $specRef ) = @_;
my ( $nRead, $buf );
# first skip counts and values, and xloc header
$nRead += sysread( $fh, $buf, ($nChars * 2) + 2 );
while ( $nSpecials-- ) {
$nRead += sysread( $fh, $buf, 8 ); # sizeof(Xloc_specialEntry)
my @chars = unpack( 'C8', $buf );
my $str;
foreach my $char (@chars) {
if ( $char == 0 ) { # null-terminated on palm
last;
}
$str .= chr($char);
}
push( @$specRef, $str );
}
$nRead += sysread( $fh, $buf, $nToRead - $nRead ); # skip bitmaps
return $nRead;
} # readPDBSpecials
sub prepPDB($$$$) { sub prepPDB($$$$) {
my ( $path, $facRef, $nodesRef, $startRef ) = @_; my ( $fh, $facRef, $nodesRef, $startRef ) = @_;
$$startRef = 0; # always for palm? $$startRef = 0; # always for palm?
sysopen(INFILE, $path, O_RDONLY) or die "couldn't open $path: $!\n";;
binmode INFILE;
my $buf; my $buf;
# skip header info # skip header info
my $nRead = sysread( INFILE, $buf, 76 ); my $nRead = sysread( $fh, $buf, 76 );
$nRead += sysread( INFILE, $buf, 2 ); $nRead += sysread( $fh, $buf, 2 );
my $nRecs = unpack( 'n', $buf ); my $nRecs = unpack( 'n', $buf );
my @offsets; my @offsets;
for ( my $i = 0; $i < $nRecs; ++$i ) { for ( my $i = 0; $i < $nRecs; ++$i ) {
$nRead += sysread( INFILE, $buf, 4 ); $nRead += sysread( $fh, $buf, 4 );
push( @offsets, unpack( 'N', $buf ) ); push( @offsets, unpack( 'N', $buf ) );
$nRead += sysread( INFILE, $buf, 4 ); # skip $nRead += sysread( $fh, $buf, 4 ); # skip
} }
die "too far" if $nRead > $offsets[0]; die "too far" if $nRead > $offsets[0];
while ( $nRead < $offsets[0] ) { while ( $nRead < $offsets[0] ) {
$nRead += sysread( INFILE, $buf, 1 ); $nRead += sysread( $fh, $buf, 1 );
} }
my $facesOffset = $offsets[1]; my $facesOffset = $offsets[1];
my $nChars = ($offsets[2] - $facesOffset) / 2; my $nChars = ($offsets[2] - $facesOffset) / 2;
$nRead += sysread( INFILE, $buf, $facesOffset - $nRead ); $nRead += sysread( $fh, $buf, $facesOffset - $nRead );
my @tmp = unpack( 'Nccccccn', $buf ); my @tmp = unpack( 'Nc6n', $buf );
$gNodeSize = nodeSizeFromFlags( $tmp[7] ); $gNodeSize = nodeSizeFromFlags( $tmp[7] );
my @faces; my @faces;
for ( my $i = 0; $i < $nChars; ++$i ) { for ( my $i = 0; $i < $nChars; ++$i ) {
$nRead += sysread( INFILE, $buf, 2 ); $nRead += sysread( $fh, $buf, 2 );
push( @faces, chr(unpack( "n", $buf ) ) ); push( @faces, chr(unpack( "n", $buf ) ) );
} }
@{$facRef} = @faces; @{$facRef} = @faces;
die "out of sync: $nRead != $offsets[2]" if $nRead != $offsets[2]; die "out of sync: $nRead != $offsets[2]" if $nRead != $offsets[2];
# now skip count and values. We'll want to get the "specials" my @specials;
# shortly. $nRead += readPDBSpecials( $fh, $nChars, $offsets[3] - $nRead,
$nRead += sysread( INFILE, $buf, $offsets[3] - $nRead ); countSpecials($facRef), \@specials );
mergeSpecials( $facRef, \@specials );
die "out of sync" if $nRead != $offsets[3]; die "out of sync" if $nRead != $offsets[3];
my @nodes = readNodesToEnd( *INFILE ); my @nodes = readNodesToEnd( $fh );
close INFILE;
@$nodesRef = @nodes; @$nodesRef = @nodes;
} # prepPDB } # prepPDB
@ -270,12 +278,12 @@ sub printStr($$) {
print join( "", map {$$facesRef[$_]} @$strRef), "\n"; print join( "", map {$$facesRef[$_]} @$strRef), "\n";
} # printStr } # printStr
# Given an array of 4-byte nodes, a start index. and another array of # Given an array of 4-byte nodes, a start index. and another array of
# two-byte faces, print out all of the words in the nodes array. # two-byte faces, print out all of the words in the nodes array.
sub printDAWG($$$$) {
my ( $strRef, $arrRef, $start, $facesRef ) = @_;
sub printDAWGInternal($$$$) { die "infinite recursion???" if @$strRef > 15;
my ( $str, $arrRef, $start, $facesRef ) = @_;
for ( ; ; ) { for ( ; ; ) {
my $node = $$arrRef[$start++]; my $node = $$arrRef[$start++];
@ -286,60 +294,45 @@ sub printDAWGInternal($$$$) {
parseNode( $node, \$chrIndex, \$nextEdge, \$accepting, \$lastEdge ); parseNode( $node, \$chrIndex, \$nextEdge, \$accepting, \$lastEdge );
push( @$str, $chrIndex ); push( @$strRef, $chrIndex );
if ( $accepting ) { if ( $accepting ) {
printStr( $str, $facesRef ); printStr( $strRef, $facesRef );
} }
if ( $nextEdge != 0 ) { if ( $nextEdge != 0 ) {
printDAWGInternal( $str, $arrRef, $nextEdge, $facesRef ); printDAWG( $strRef, $arrRef, $nextEdge, $facesRef );
} }
pop( @$str ); pop( @$strRef );
if ( $lastEdge ) { if ( $lastEdge ) {
last; last;
} }
} }
} # printDAWGInternal } # printDAWG
sub printDAWG($$$) {
my ( $arrRef, $start, $facesRef ) = @_;
die "no nodes!!!" if 0 == @$arrRef;
my @str;
printDAWGInternal( \@str, $arrRef, $start, $facesRef );
}
################################################################# #################################################################
# main # main
################################################################# #################################################################
if ( !parseARGV() ) { parseARGV();
usage();
exit 1; sysopen(INFILE, $gInFile, O_RDONLY) or die "couldn't open $gInFile: $!\n";;
} binmode INFILE;
my @faces; my @faces;
my @nodes; my @nodes;
my $startIndex; my $startIndex;
if ( $gFileType eq "xwd" ){ if ( $gFileType eq "xwd" ){
prepXWD( $gInFile, \@faces, \@nodes, \$startIndex ); prepXWD( *INFILE, \@faces, \@nodes, \$startIndex );
} elsif ( $gFileType eq "pdb" ) { } elsif ( $gFileType eq "pdb" ) {
prepPDB( $gInFile, \@faces, \@nodes, \$startIndex ); prepPDB( *INFILE, \@faces, \@nodes, \$startIndex );
print STDERR join( ",", @faces), "\n";
} }
close INFILE;
printDAWG( \@nodes, $startIndex, \@faces ); die "no nodes!!!" if 0 == @nodes;
printDAWG( [], \@nodes, $startIndex, \@faces );
if ( $gASCIIFacesFileName ) { exit 0;
open FACES, "> $gASCIIFacesFileName";
foreach my $face (@faces) {
print FACES pack('cc', 0, $face );
}
close FACES;
}