Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

Parsing program output snippets.

by shotgunefx (Parson)
on Jul 21, 2002 at 22:45 UTC ( #183888=snippet: print w/ replies, xml ) Need Help??

Description: Two handy snippets to help you parse program output. snippet 1.

findpositions()
pass it an array of needed headers and the header line.
returns an array ref of results. elements are an anonymous array with ('headername', 'startpos', 'endpos')

getfields()
pass it the ref returned from findpositions and the scalar containing the data.
returns a hashref of name=>value for each position.
#!/usr/bin/perl -w

use strict;

######################################################################
+###############
# This script contains two handy functions to assist in the parsing of
+              
# program output.                                     
# The nice thing about it is that it is fairly field position agnostic
+ so it won't   
# be so fragile. As long as the names are present and don't change it 
+will work.     
# Even if they are different widths or in different positions.        
+               
#                                                  
# There are two functions.                                  
# findpositions():                                      
#    pass it an array of needed headers and the header line.          
+       
#    returns an array ref of results. els are  ['headername', 'stpos',
+ 'endpos']  
#                                             
# getfields()                                         
#    pass it the ref returned from findpositions and the scalar contai
+ning data.  
#    returns a hashref of name=>value for each position.              
+   
#                                             
# Caveats:                                         
#    These snippets expect that columns NEVER overlap and they are lef
+t justified.
#    Trailing whitespace in values is removed.                     
######################################################################
+###############


sub findpositions(\@$){
    my $headar = shift;
    my $line = shift;
    my @results;
    my @errors = ();
    my $pos = 1;
    for (my $i = 0; $i < @$headar; $i++){
        my $header = quotemeta($headar->[$i]); 
        if ($line=~/$header\s*/g){
            push @results, [ $headar->[$i], $pos - 1, pos($line) - ($p
+os - 1) ];
            $pos = pos($line);
        }else {    # Failure
            push @errors, "Couldn't find $headar->[$i]\n" if $^W;
        }
    }
    warn @errors if $^W;
    return if @errors;    
    return \@results;
}

sub getfields{
    my $aref = shift; # Headers
    my $line = shift; # Data
    my %res = ();
    foreach (@$aref){
        $res{$_->[0]} = substr($line,$_->[1],$_->[2]);
        $res{$_->[0]} =~s/\s+$//;
    }
    return \%res;
}


###########################################################
# Sample                           
# Run netstat                            
###########################################################

# Headers we are expecting.
my @ACTIVE_INET_HEADERS = (
    'Proto',
    'Recv-Q',
    'Send-Q',
    'Local Address',
    'Foreign Address',
    'State',
    'User',
    'Inode',
    'PID/Program name',
);

my @ACTIVE_UNIX_HEADERS = (
    'Proto',
    'RefCnt',
    'Flags',
    'Type',
    'State',
    'I-Node',
    'PID/Program name',
    'Path',
);

##################
# Run command      
##################

my @output = `netstat -apve`;
die "Couldn't run netstat" unless @output;

my $href; # Headers;

while (my $line = shift @output){
 chomp $line;
 
 
 ###########################################################
 # Parse Internet sockets.                    
 ###########################################################
 if (!$href && $line=~/^Active Internet connections/ ){
    print "Active Internet connections.\n","#" x 30,"\n";
    $line = shift @output;
    
    if ( $line=~/^Proto/ ){
        $href = findpositions(@ACTIVE_INET_HEADERS,$line); 
        die "Couldn't parse headers!" unless $href;
        next;
    }else{
        die "Unexpected: $line";
    }

 }
 ###########################################################
 # Parse Unix sockets.                        
 ###########################################################
 if ($line=~/^Active UNIX domain sockets/ ){
     print "\nActive UNIX sockets.\n","#" x 30,"\n";
    $line = shift @output;
    # Parse Internet sockets.
    if ( $line=~/^Proto/ ){
        $href = findpositions(@ACTIVE_UNIX_HEADERS,$line); 
        die "Couldn't parse headers!" unless $href;
        next;
    }else{
        die "Unexpected: $line";
    }

 }




 if ($href){
    my $lhref = getfields($href,$line); # Hash with name=>value
    ##################################################################
+###############
    # Why doesn't the line below get flattened? It only prints the fir
+st list.     
    # print  ( map{"\t$_ =\> $lhref->{$_}\n"} sort keys %$lhref) , "#"
+ x 40, "\n"  ; # Why bad?
    # Anybody? Bueller... Bueller? I suppose two print statements woul
+dn't kill me.  
    ##################################################################
+###############
    print  map{"\t$_ =\> $lhref->{$_}\n"} sort keys %$lhref;
    print "#" x 40, "\n";
 }


}




print "Done!\n";




__END__
>netstat -apve sample data
Active Internet connections (servers and established)
Proto Recv-Q Send-Q Local Address           Foreign Address         St
+ate       User       Inode      PID/Program name   
tcp        0      0 somedomainher.net:www   trder102.stare.ya:46312 TI
+ME_WAIT   user       0          -                   
tcp        0      0 somedomainher.ne:tproxy trder104.stare.ya:25013 TI
+ME_WAIT   user       0          -                   
tcp        0      0 somedomainher.net:www   trder104.stare.ya:49319 TI
+ME_WAIT   user       0          -                   
tcp        0      0 somedomainher.net:www   trder104.stare.ya:28205 TI
+ME_WAIT   user       0          -                   
tcp        0      0 somedomainher.net:www   trder104.stare.ya:28202 TI
+ME_WAIT   user       0          -                   
tcp        0      0 *:www                   *:*                     LI
+STEN      user       1957270    16874/httpd         
tcp        0    424 somedomainher.net:ssh   209.X.XXX.XXX:1115      ES
+TABLISHED user       1582988    947/sshd2           
tcp        0      0 somedomains:netbios-ssn 209.6.XXX.XXX:1026      ES
+TABLISHED user       1582180    892/smbd            
tcp        0      0 *:https                 *:*                     LI
+STEN      user       714        607/httpd           
tcp        0      0 *:mysql                 *:*                     LI
+STEN      user       654        594/mysqld          
tcp        0      0 *:ssh                   *:*                     LI
+STEN      user       576        540/sshd2           
tcp        0      0 *:locker                *:*                     LI
+STEN      autobot    560        536/SERVER      
tcp        0      0 *:tproxy                *:*                     LI
+STEN      autobot    546        530/ShipTax S 
tcp        0      0 *:netbios-ssn           *:*                     LI
+STEN      user       456        482/                
tcp        0      0 *:616                   *:*                     LI
+STEN      user       400        433/                
tcp        0      0 *:npmp-gui              *:*                     LI
+STEN      user       392        433/                
udp        0      0 *:sunrpc                *:*                       
+          user       274        330/                
raw      208      0 *:icmp                  *:*                     7 
+          user       2017587    -                   
raw        0      0 *:icmp                  *:*                     7 
+          user       0          -                   
raw        0      0 *:tcp                   *:*                     7 
+          user       0          -                   
Active UNIX domain sockets (servers and established)
Proto RefCnt Flags       Type       State         I-Node PID/Program n
+ame    Path
unix  1      [ ]         STREAM     CONNECTED     2021255 16879/httpd 
+        @000044ac
unix  1      [ ]         STREAM     CONNECTED     2011633 16877/httpd 
+        @00004466
unix  1      [ ]         STREAM     CONNECTED     1969869 16883/httpd 
+        @00004325
unix  1      [ ]         STREAM     CONNECTED     1965032 16875/httpd 
+        @00004307
unix  1      [ ]         STREAM     CONNECTED     2021003 16878/httpd 
+        @000044ab

Comment on Parsing program output snippets.
Download Code

Back to Snippets Section

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: snippet [id://183888]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others wandering the Monastery: (4)
As of 2014-07-28 05:56 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    My favorite superfluous repetitious redundant duplicative phrase is:









    Results (187 votes), past polls