Clovis_Sangrail's scratchpad

Public Scratchpad
#!/usr/bin/perl
################################################
# Title: crunchrep.pl
# Author: Clovis Sangrail
# Description: The crunchrep.pl Perl script will process one or more i
+nput
# data files consisting of GT.M Journal extract log records. It produc
+es a
# report of exeptional transactions for possible further scrutiny by a
+udit
# or security personnel. 
#
# Revision History
# Ver           Author        Date
# ---        ------        ----
# 1.0        Clovis Sangrail    Apr 2012
#    Initial port/rewrite of audit.gawk.
# 1.1        Clovis Sangrail    9-May-2012
#    Rewrite regex for Global Variable analysis to handle subscripts
# like """""ABU%"""" . Then change back.
################################################

#####################
# Calling Arguments #
#####################
# See usage function.

###########
# Modules #
###########
use strict;
use Getopt::Std;            # Process switches.
use File::Basename;

#############
# Variables #
#############
$| = 1;                    # Flush after every write.
my $NMF = "namefile";            # Userids to report upon. (Default.)
my $ALLF;                # Journal extract Log file.
my %rpt;                # Hash of Hash of Hash of 2x1 arrays.
my %opts;                # Hash of commandline opts => vals.
my %rname;                # Hash of uid => GECOS name.
my @fields;                # Array of Journal Log fields.
my $PWLINE;                # Capture grep output.
my ( $uid , $cnt);
my %matchpid;                # Hash of pid => uid.

#############
# Functions #
#############
sub usage                # Display program usage instructions.
{
    my $prg = basename($0);
    print <<EOF

$prg usage:

$prg -f <Glbfld#> [ -F <skipregex> -k <keepregex> <names>] <data>

The $prg program reads input files of GT.M Journal Extract Log recs
generated by the MUPIP program and Unix UIDs and produces a report of
transactions performed by the users in the namefile. The <data> file
is required, the <names> file defaults to "namefile" in the current
directory.

Switches and Parameters:
    -f = Field number of Global Variable, currently 7 or 9 so far. Thi
+s
         may change with new versions of GT.M and MUPIP. Required swit
+ch.
    -F = Perl regex of Global Variable names to omit from report. Opti
+onal.
    -k = Perl regex of Global Variable names for which subscripts will
+ be
         preserved and reported on separately. Default is to aggregate
+ all
         subscripts into single count. Optional.
<names>  File of User ID's (one per line) on which to report. Optional
+,
         defaults to "namefile" in current directory.
<data>   File of 01/04/05/10 mupip journal extract log records, sorted
+ by
         PID and rectype within PID. Required parameter.

EOF
# Sorry no indentation allowed with EOF. (No in-line comments either.)
}

########
# Main #
########

#####################################################################
# Process the switch arguments. (See 'usage{}' function above for a #
# description of the switches and other commandline parameters.)    #
#####################################################################
getopt('fFk', \%opts);            # Read switches into %opts hash.

my $gfld = $opts{"f"};            # This is global var field number.
if( (7 != $gfld) && (9 != $gfld) ) {    # Must be one of these.
    &usage();
    die "Invalid logfile global field number\n\n";
} else {
    $gfld--;                # Perl numbers from zero.
}

my $fltr = $opts{"F"};            # Get regex of globals to skip.
my $keepsub = $opts{"k"};        # and Globals to not aggregate.
                    # (These can be null, I guess.)

##############################
#DBG#$fltr='^\^LOG\(?|^\^BCHLOG\(?|^\^ORSLOG\(?|^\^ZLOG\(?|^\^PROCID\(
+?|^\^SYSLOG\(?';
#DBG#$keepsub='^\^SCAU\(?';
#DBG#print "\n";
#DBG#print " 'f'  switch value: $opts{\"f\"} " . "\n";
#DBG#print " 'F'  switch value: $opts{\"F\"} " . "\n";
#DBG#print "fltr = $fltr \n";
#DBG#print " keepsub = $keepsub \n";
#DBG#print " 'k'  switch value: $opts{\"k\"} " . "\n";
#DBG#print "\n";
#DBG#exit(0);
##############################

######################################################################
+#####
# Process filename arguments. Error if none. One arg is the data file 
+of  #
# journal records, and the namefile is the default. If two args, then 
+1st #
# is the namefile, 2nd is the data file.                              
+    #
######################################################################
+#####
$cnt = 0 + @ARGV;            # Remaining arg cnt. (getopts shifts.)

BLOCK: {
    if ( 0 == $cnt ) {            # This is an error.
        &usage();
        die "Need a journal extract log file name\n\n";
        last BLOCK;            # Not really needed.
    }
    if ( 1 == $cnt ) {
        $ALLF = $ARGV[0];        # Just log file.
        last BLOCK;
    }
    if ( 2 == $cnt ) {            # Names file and log file.
        $NMF = $ARGV[0];
        $ALLF = $ARGV[1];
        last BLOCK;
    }
}

#######################################################
# Open the $NAMES and $ALL files, err out on failure. #
#######################################################
die "Cannot open $NMF : $!" unless ( open NAMES, $NMF );
die "Cannot open $ALLF : $!" unless ( open ALL, $ALLF );

######################################################################
+######
# Process NAMES file. This is the list of User IDs (UIDs) on which we 
+wish #
# to report. Read each UID and seek it's /etc/passwd entry. If found s
+et   #
# the rname{UID} hash entry to the real name field of the PW line. If 
+not  #
# found set that hash entry to "Name_Not_Found".                      
+     #
######################################################################
+######
while (<NAMES>) {            # Read till EOF.

    chomp;                # Trim trailing '\n'.

    if( $PWLINE = `/usr/bin/grep $_ /etc/passwd` ) {
        @fields = split ":" , $PWLINE;    # If found load PW name into
+ array.
        $rname{$_} = $fields[4];
    } else {
        $rname{$_} = "Name_Not_Found";    # Or if not found say so.
    }
}

######################################
#DBG##my ( $c , $mcnt , $pmil );
#DBG#my $uid;
#DBG#foreach $uid ( sort keys %rname ) {
#DBG#    print "uid: $uid , name: \"$rname{$uid}\" \n";
#DBG#}
#DBG#exit(0);
#DBG#$c=0;
#DBG#$mcnt=0;
#DBG#print "\n.";
######################################

######################################################################
+######
# This is the main part. Read each line of the input file of MUPIP jou
+rnal #
# extract log records. Input file is sorted by PID and by rectype with
+in   #
# each PID. Skip all but '01', '04', '05', and '10' record types. Skip
+ '01'#
# recs if UID (6th field) was not among those in the namefile, else sa
+ve   #
# PID and set the matchpid entry for that PID to be the UID. For 04/05
+/10  #
# skip if matchpid entry for that PID not defined, or if Global Var is
+ in  #
# the ignore list. Replace any subscripts with "(..)" unless Global is
+ in  #
# the do-not-aggregate list. Translate timestamp and either make new a
+rray #
# entry of [ timestamp, cnt=1 ] for this rpt{uid}[pid}{startglob} hash
+ or  #
# increment count of the existing entry.                              
+     #
######################################################################
+######
while (<ALL>) {

    my ( $pid, $curpid );        # Process IDs.
    my ( $wholeglob, $saveglob, $startglob );
                    # Parts of Global Var.
    my $stamp;                # Xlate of Horolog format.

    chomp;                # Remove trailing newline.

    ################################
    #DBG#if( 100 == $c++ ) {
    #DBG#    $c=0;
    #DBG#    print ".";
    #DBG#    if( 0 == ( ++$mcnt % 10 ) ) {
    #DBG#        $pmil = $mcnt / 10;
    #DBG#        print "\n $pmil K recs\n";
    #DBG#    }
    #DBG#}
    ################################

    @fields = split /\\/ , $_ , $gfld+1;
                    # Split on backslashes.`

    ################################
    #DBG#print "$_ \nrectype: x$fields[0]x\n";
    #DBG#print ".";
    ################################

    ##################################################################
    # If we find an '01' record, skip if username is not among those #
    # found when the namefile was processed.                         #
    ##################################################################
    if( "01" eq $fields[0] ) {        # Record type '01'?
        next unless defined( $rname{$fields[5]} );
                    # Skip if not read from namefile.
        $matchpid{ $fields[3] } = $fields[5]; 
                    # Save UID matching PID.

        ####################################################
        #DBG#print "Inside 01 compare. pid = $fields[3] , matching uid
+ = $matchpid{$fields[3]}\n";
        #DBG#if( defined( $rname{$fields[5]} ) ) {
        #DBG#  print "rectype: $fields[0] ,pid: $fields[3] ,uid: $fiel
+ds[5]\n";
        #DBG#} else {
        #DBG#  print "skip: $fields[5]\n";
        #DBG#}
        #DBG#} else {
        ####################################################

    }

    ################################
    #DBG# print "$_ \nrectype: x$fields[0]x\n";
    #DBG#    print ".";
    ################################

    ##################################################################
+###
    # Global variables are altered by record types '04, '05', and '10'
+. #
    ##################################################################
+###
    if( $fields[0] =~ /04|05|10/ ) {    # Have 04/05/10 record type?
        #DBG#print "Inside 05 compare. pid = $fields[3] , matching uid
+ = $matchpid{$fields[3]}\n";
        #DBG#print "$_ \n";
        next unless defined( $matchpid{$fields[3]} );
        $curpid = $fields[3];        # Capture PID if found in '01' re
+c.
                    # Otherwise, skip it.
        $uid = $matchpid{$curpid};

        ##############################################################
+####
        # Now extract the initial portion of the Global Variable being
+   #
        # affected by this rtecord, and also extract the whole Variabl
+e. #
        # (include any subscripts present).                           
+   #
        ##############################################################
+####
        #DBG#print "Global Field = $fields[$gfld] \n";
        #if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|\(([^"]+|(("+)[^
+"]+\6)+)+\)))/ ) {
        if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|.*?\)))/ ) {
            $wholeglob = $1;        # Nested regex memory variables.
            $startglob = $2;
        } else {
            print "Warning: Cannot parse $fields[$gfld] for pid $curpi
+d \n";
            #DBG#print "$_ \n";
            next;            # Print error msg if regex fails.
        }
        #DBG#print "whole global = $wholeglob \n";

        ##############################################################
+### 
        # If this Global's start matches the regex, input via switch o
+n #
        # the commandline, of variables to skip, then skip it.        
+  #
        ##############################################################
+### 
        if( ($fltr ne "") &&  ($startglob =~ /$fltr/) ) {
            #DBG#print "Skipping global $wholeglob \n";
            next;
        }

        ##############################################################
+#
        # The GTM Journal timestamp is in HoroLog format: DDDDD,SSSSS 
+#
        # where DDDDD = days since 12/31/1840 midnight and SSSSS =    
+#
        # seconds since midnight. We need to convert this to Timestamp
+#
        # format (secs since start of 1/1/1970). (Note: 47117 is the  
+#
        # number of days between midnight 12/31/1840 and 1/1/1970.)   
+#
        ##############################################################
+#
        if( $fields[1] =~ /^(\d+),(\d+)/ ) {
                    # Extract DDDDD & SSSSS via regex.
            $stamp = ( $1 - 47117 ) * 86400 + $2 + 3600 * 6;
        } else {            # Convert, incl CST timezone offset.
            print "Cannot parse horolog field $fields[1] \n";
        }                # Err, can't find DDDDD &/or SSSSS.

        ##############################################################
+###
        # Now match the start of the Global variable against the searc
+h #
        # regex of variables that we do not aggregate. These are vars 
+  #
        # for which we keep separate counts of each different set of  
+  #
        # subscripts that are modified. If we find a match, then leave
+  #
        # the variable alone. If no match, then replace any subscripts
+  #
        # with the string (..) .                                      
+  #
        ##############################################################
+###
        $saveglob = "";            # Start out assuming no aggregate.
        if( ($keepsub ne "") && ($startglob =~ /$keepsub/) ) {
            # If match, preserve Global
            #DBG#print "$wholeglob will not be aggregated  \n";
        } else {
            if( $wholeglob =~ /\(/ ) {  # Aggregate if have subscripts
+.
                $saveglob = $wholeglob;
                $wholeglob = $startglob . "(..)";
                #DBG#print "Aggregating $saveglob into $wholeglob \n";
            }
        }

        ##############################################################
        # Finally, if we already have an entry for this (possibly an #
        # aggregated) Global then increment it's count. If this is a #
        # new one then create the initial [ timestamp, cnt=1 ] array #
        # for the newly-created hash element to reference.           #
        ##############################################################
        if( defined( $rpt{$uid}{$curpid}{$wholeglob} ) ) {
            $rpt{$uid}{$curpid}{$wholeglob}[2]++;
            #DBG#print "inc node: uid = $uid pid = $curpid Global = $w
+holeglob \n";
        } else {            # Have entry, increment count.
            #DBG#print "new node: uid = $uid pid = $curpid Global = $w
+holeglob tstamp = $stamp \n";
            if( $saveglob eq "" ) {
                $rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $wholeglo
+b, 1 ];
            } else {
                $rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $saveglob
+, 1 ];
            }

        }                # Create new node, cnt=1.
    }
    #DBG#print "Did /04/05/10 \n";
}

####################################################################
# At this point the three-dimensional %rpt hash is loaded with the #
# report data, we're ready to print the report. The %rpt hash is   #
# dimensioned as rpt{uid}{pid}{global}.                            #
####################################################################
for $uid ( sort keys %rpt ) {        # Outermost dimension is UIDs.

    my @rptblock;            # Report lines for current UID.
    my @tparts;                # Return value of localtime.
    my ( $pid, $glob );            # Hash indices.
    my ( $stamp, $cnt, $prtglob );    # Array variables.
    my ( $rptline, $baseline );        # For composing rptblock entry.

    #################################
    # print header for current UID. #
    #################################
    printf "%s: %s\n" , $uid , $rname{$uid};
    printf "YYYYMMDD hh:mm       PID     Global\n";
    printf "-------------- ------------- ------\n";
 
    @rptblock = ();            # Clear data lines array.

    ################################################
    # Within current UID 2nd index is Process IDs. #
    ################################################
    for $pid ( keys %{ $rpt{$uid} } ) {

        ##############################################################
+#
        # Within current PID, 3rd index is each affected GT.M Global. 
+#
        ##############################################################
+#
        for $glob ( keys %{ $rpt{$uid}{$pid} } ) {
            $stamp = $rpt{$uid}{$pid}{$glob}[0];
                    # Timestamp of 1st access.
            $cnt = $rpt{$uid}{$pid}{$glob}[2];
                    # Count of accesses.
            if( 1 == $cnt ) {
                $prtglob = $rpt{$uid}{$pid}{$glob}[1];
            } else {            # Show subscripts if cnt=1.
                $prtglob = $glob;    # Show "(..)" unless keepsubs.
            }
            @tparts = localtime $stamp;    # Convert to list.
            $baseline = sprintf "%4d%02d%02d %02d:%02d%13s  %s" ,
                   1900 + $tparts[5], 1 + $tparts[4], $tparts[3],
                   $tparts[2], $tparts[1], $pid, $prtglob;
                    # YYYYMMDD HH:MM  pid  Globalname
            ############################
            # One or multiple updates? #
            ############################
            if( 1 == $cnt ) {
                $rptline = sprintf "%s\n", $baseline;
            } else {            # Just add newline for single.
                $rptline = sprintf "%s\t(%d updates)\n" , $baseline, $
+cnt;
            }                # Multiple updates, print count.
            push( @rptblock , $rptline);# Load rptblock array.
        }
    }

    ##################################################################
+#####
    # Finally, print out the formatted data lines for this UID. Sort w
+ill #
    # put them in date/timestamp order, and PID order within that.    
+    # 
    ##################################################################
+#####
    print sort @rptblock;
    print "\n";                # Skip line before next user.
}
[download]