http://www.perlmonks.org?node_id=970458

#!/usr/bin/perl ################################################ # Title: crunchrep.pl # Author: Clovis Sangrail # Description: The crunchrep.pl Perl script will process one or more i +nput # data files consisting of GT.M Journal extract log records. It produc +es a # report of exeptional transactions for possible further scrutiny by a +udit # or security personnel. # # Revision History # Ver Author Date # --- ------ ---- # 1.0 Clovis Sangrail Apr 2012 # Initial port/rewrite of audit.gawk. # 1.1 Clovis Sangrail 9-May-2012 # Rewrite regex for Global Variable analysis to handle subscripts # like """""ABU%"""" . Then change back. ################################################ ##################### # Calling Arguments # ##################### # See usage function. ########### # Modules # ########### use strict; use Getopt::Std; # Process switches. use File::Basename; ############# # Variables # ############# $| = 1; # Flush after every write. my $NMF = "namefile"; # Userids to report upon. (Default.) my $ALLF; # Journal extract Log file. my %rpt; # Hash of Hash of Hash of 2x1 arrays. my %opts; # Hash of commandline opts => vals. my %rname; # Hash of uid => GECOS name. my @fields; # Array of Journal Log fields. my $PWLINE; # Capture grep output. my ( $uid , $cnt); my %matchpid; # Hash of pid => uid. ############# # Functions # ############# sub usage # Display program usage instructions. { my $prg = basename($0); print <<EOF $prg usage: $prg -f <Glbfld#> [ -F <skipregex> -k <keepregex> <names>] <data> The $prg program reads input files of GT.M Journal Extract Log recs generated by the MUPIP program and Unix UIDs and produces a report of transactions performed by the users in the namefile. The <data> file is required, the <names> file defaults to "namefile" in the current directory. Switches and Parameters: -f = Field number of Global Variable, currently 7 or 9 so far. Thi +s may change with new versions of GT.M and MUPIP. Required swit +ch. -F = Perl regex of Global Variable names to omit from report. Opti +onal. -k = Perl regex of Global Variable names for which subscripts will + be preserved and reported on separately. Default is to aggregate + all subscripts into single count. Optional. <names> File of User ID's (one per line) on which to report. Optional +, defaults to "namefile" in current directory. <data> File of 01/04/05/10 mupip journal extract log records, sorted + by PID and rectype within PID. Required parameter. EOF # Sorry no indentation allowed with EOF. (No in-line comments either.) } ######## # Main # ######## ##################################################################### # Process the switch arguments. (See 'usage{}' function above for a # # description of the switches and other commandline parameters.) # ##################################################################### getopt('fFk', \%opts); # Read switches into %opts hash. my $gfld = $opts{"f"}; # This is global var field number. if( (7 != $gfld) && (9 != $gfld) ) { # Must be one of these. &usage(); die "Invalid logfile global field number\n\n"; } else { $gfld--; # Perl numbers from zero. } my $fltr = $opts{"F"}; # Get regex of globals to skip. my $keepsub = $opts{"k"}; # and Globals to not aggregate. # (These can be null, I guess.) ############################## #DBG#$fltr='^\^LOG\(?|^\^BCHLOG\(?|^\^ORSLOG\(?|^\^ZLOG\(?|^\^PROCID\( +?|^\^SYSLOG\(?'; #DBG#$keepsub='^\^SCAU\(?'; #DBG#print "\n"; #DBG#print " 'f' switch value: $opts{\"f\"} " . "\n"; #DBG#print " 'F' switch value: $opts{\"F\"} " . "\n"; #DBG#print "fltr = $fltr \n"; #DBG#print " keepsub = $keepsub \n"; #DBG#print " 'k' switch value: $opts{\"k\"} " . "\n"; #DBG#print "\n"; #DBG#exit(0); ############################## ###################################################################### +##### # Process filename arguments. Error if none. One arg is the data file +of # # journal records, and the namefile is the default. If two args, then +1st # # is the namefile, 2nd is the data file. + # ###################################################################### +##### $cnt = 0 + @ARGV; # Remaining arg cnt. (getopts shifts.) BLOCK: { if ( 0 == $cnt ) { # This is an error. &usage(); die "Need a journal extract log file name\n\n"; last BLOCK; # Not really needed. } if ( 1 == $cnt ) { $ALLF = $ARGV[0]; # Just log file. last BLOCK; } if ( 2 == $cnt ) { # Names file and log file. $NMF = $ARGV[0]; $ALLF = $ARGV[1]; last BLOCK; } } ####################################################### # Open the $NAMES and $ALL files, err out on failure. # ####################################################### die "Cannot open $NMF : $!" unless ( open NAMES, $NMF ); die "Cannot open $ALLF : $!" unless ( open ALL, $ALLF ); ###################################################################### +###### # Process NAMES file. This is the list of User IDs (UIDs) on which we +wish # # to report. Read each UID and seek it's /etc/passwd entry. If found s +et # # the rname{UID} hash entry to the real name field of the PW line. If +not # # found set that hash entry to "Name_Not_Found". + # ###################################################################### +###### while (<NAMES>) { # Read till EOF. chomp; # Trim trailing '\n'. if( $PWLINE = `/usr/bin/grep $_ /etc/passwd` ) { @fields = split ":" , $PWLINE; # If found load PW name into + array. $rname{$_} = $fields[4]; } else { $rname{$_} = "Name_Not_Found"; # Or if not found say so. } } ###################################### #DBG##my ( $c , $mcnt , $pmil ); #DBG#my $uid; #DBG#foreach $uid ( sort keys %rname ) { #DBG# print "uid: $uid , name: \"$rname{$uid}\" \n"; #DBG#} #DBG#exit(0); #DBG#$c=0; #DBG#$mcnt=0; #DBG#print "\n."; ###################################### ###################################################################### +###### # This is the main part. Read each line of the input file of MUPIP jou +rnal # # extract log records. Input file is sorted by PID and by rectype with +in # # each PID. Skip all but '01', '04', '05', and '10' record types. Skip + '01'# # recs if UID (6th field) was not among those in the namefile, else sa +ve # # PID and set the matchpid entry for that PID to be the UID. For 04/05 +/10 # # skip if matchpid entry for that PID not defined, or if Global Var is + in # # the ignore list. Replace any subscripts with "(..)" unless Global is + in # # the do-not-aggregate list. Translate timestamp and either make new a +rray # # entry of [ timestamp, cnt=1 ] for this rpt{uid}[pid}{startglob} hash + or # # increment count of the existing entry. + # ###################################################################### +###### while (<ALL>) { my ( $pid, $curpid ); # Process IDs. my ( $wholeglob, $saveglob, $startglob ); # Parts of Global Var. my $stamp; # Xlate of Horolog format. chomp; # Remove trailing newline. ################################ #DBG#if( 100 == $c++ ) { #DBG# $c=0; #DBG# print "."; #DBG# if( 0 == ( ++$mcnt % 10 ) ) { #DBG# $pmil = $mcnt / 10; #DBG# print "\n $pmil K recs\n"; #DBG# } #DBG#} ################################ @fields = split /\\/ , $_ , $gfld+1; # Split on backslashes.` ################################ #DBG#print "$_ \nrectype: x$fields[0]x\n"; #DBG#print "."; ################################ ################################################################## # If we find an '01' record, skip if username is not among those # # found when the namefile was processed. # ################################################################## if( "01" eq $fields[0] ) { # Record type '01'? next unless defined( $rname{$fields[5]} ); # Skip if not read from namefile. $matchpid{ $fields[3] } = $fields[5]; # Save UID matching PID. #################################################### #DBG#print "Inside 01 compare. pid = $fields[3] , matching uid + = $matchpid{$fields[3]}\n"; #DBG#if( defined( $rname{$fields[5]} ) ) { #DBG# print "rectype: $fields[0] ,pid: $fields[3] ,uid: $fiel +ds[5]\n"; #DBG#} else { #DBG# print "skip: $fields[5]\n"; #DBG#} #DBG#} else { #################################################### } ################################ #DBG# print "$_ \nrectype: x$fields[0]x\n"; #DBG# print "."; ################################ ################################################################## +### # Global variables are altered by record types '04, '05', and '10' +. # ################################################################## +### if( $fields[0] =~ /04|05|10/ ) { # Have 04/05/10 record type? #DBG#print "Inside 05 compare. pid = $fields[3] , matching uid + = $matchpid{$fields[3]}\n"; #DBG#print "$_ \n"; next unless defined( $matchpid{$fields[3]} ); $curpid = $fields[3]; # Capture PID if found in '01' re +c. # Otherwise, skip it. $uid = $matchpid{$curpid}; ############################################################## +#### # Now extract the initial portion of the Global Variable being + # # affected by this rtecord, and also extract the whole Variabl +e. # # (include any subscripts present). + # ############################################################## +#### #DBG#print "Global Field = $fields[$gfld] \n"; #if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|\(([^"]+|(("+)[^ +"]+\6)+)+\)))/ ) { if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|.*?\)))/ ) { $wholeglob = $1; # Nested regex memory variables. $startglob = $2; } else { print "Warning: Cannot parse $fields[$gfld] for pid $curpi +d \n"; #DBG#print "$_ \n"; next; # Print error msg if regex fails. } #DBG#print "whole global = $wholeglob \n"; ############################################################## +### # If this Global's start matches the regex, input via switch o +n # # the commandline, of variables to skip, then skip it. + # ############################################################## +### if( ($fltr ne "") && ($startglob =~ /$fltr/) ) { #DBG#print "Skipping global $wholeglob \n"; next; } ############################################################## +# # The GTM Journal timestamp is in HoroLog format: DDDDD,SSSSS +# # where DDDDD = days since 12/31/1840 midnight and SSSSS = +# # seconds since midnight. We need to convert this to Timestamp +# # format (secs since start of 1/1/1970). (Note: 47117 is the +# # number of days between midnight 12/31/1840 and 1/1/1970.) +# ############################################################## +# if( $fields[1] =~ /^(\d+),(\d+)/ ) { # Extract DDDDD & SSSSS via regex. $stamp = ( $1 - 47117 ) * 86400 + $2 + 3600 * 6; } else { # Convert, incl CST timezone offset. print "Cannot parse horolog field $fields[1] \n"; } # Err, can't find DDDDD &/or SSSSS. ############################################################## +### # Now match the start of the Global variable against the searc +h # # regex of variables that we do not aggregate. These are vars + # # for which we keep separate counts of each different set of + # # subscripts that are modified. If we find a match, then leave + # # the variable alone. If no match, then replace any subscripts + # # with the string (..) . + # ############################################################## +### $saveglob = ""; # Start out assuming no aggregate. if( ($keepsub ne "") && ($startglob =~ /$keepsub/) ) { # If match, preserve Global #DBG#print "$wholeglob will not be aggregated \n"; } else { if( $wholeglob =~ /\(/ ) { # Aggregate if have subscripts +. $saveglob = $wholeglob; $wholeglob = $startglob . "(..)"; #DBG#print "Aggregating $saveglob into $wholeglob \n"; } } ############################################################## # Finally, if we already have an entry for this (possibly an # # aggregated) Global then increment it's count. If this is a # # new one then create the initial [ timestamp, cnt=1 ] array # # for the newly-created hash element to reference. # ############################################################## if( defined( $rpt{$uid}{$curpid}{$wholeglob} ) ) { $rpt{$uid}{$curpid}{$wholeglob}[2]++; #DBG#print "inc node: uid = $uid pid = $curpid Global = $w +holeglob \n"; } else { # Have entry, increment count. #DBG#print "new node: uid = $uid pid = $curpid Global = $w +holeglob tstamp = $stamp \n"; if( $saveglob eq "" ) { $rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $wholeglo +b, 1 ]; } else { $rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $saveglob +, 1 ]; } } # Create new node, cnt=1. } #DBG#print "Did /04/05/10 \n"; } #################################################################### # At this point the three-dimensional %rpt hash is loaded with the # # report data, we're ready to print the report. The %rpt hash is # # dimensioned as rpt{uid}{pid}{global}. # #################################################################### for $uid ( sort keys %rpt ) { # Outermost dimension is UIDs. my @rptblock; # Report lines for current UID. my @tparts; # Return value of localtime. my ( $pid, $glob ); # Hash indices. my ( $stamp, $cnt, $prtglob ); # Array variables. my ( $rptline, $baseline ); # For composing rptblock entry. ################################# # print header for current UID. # ################################# printf "%s: %s\n" , $uid , $rname{$uid}; printf "YYYYMMDD hh:mm PID Global\n"; printf "-------------- ------------- ------\n"; @rptblock = (); # Clear data lines array. ################################################ # Within current UID 2nd index is Process IDs. # ################################################ for $pid ( keys %{ $rpt{$uid} } ) { ############################################################## +# # Within current PID, 3rd index is each affected GT.M Global. +# ############################################################## +# for $glob ( keys %{ $rpt{$uid}{$pid} } ) { $stamp = $rpt{$uid}{$pid}{$glob}[0]; # Timestamp of 1st access. $cnt = $rpt{$uid}{$pid}{$glob}[2]; # Count of accesses. if( 1 == $cnt ) { $prtglob = $rpt{$uid}{$pid}{$glob}[1]; } else { # Show subscripts if cnt=1. $prtglob = $glob; # Show "(..)" unless keepsubs. } @tparts = localtime $stamp; # Convert to list. $baseline = sprintf "%4d%02d%02d %02d:%02d%13s %s" , 1900 + $tparts[5], 1 + $tparts[4], $tparts[3], $tparts[2], $tparts[1], $pid, $prtglob; # YYYYMMDD HH:MM pid Globalname ############################ # One or multiple updates? # ############################ if( 1 == $cnt ) { $rptline = sprintf "%s\n", $baseline; } else { # Just add newline for single. $rptline = sprintf "%s\t(%d updates)\n" , $baseline, $ +cnt; } # Multiple updates, print count. push( @rptblock , $rptline);# Load rptblock array. } } ################################################################## +##### # Finally, print out the formatted data lines for this UID. Sort w +ill # # put them in date/timestamp order, and PID order within that. + # ################################################################## +##### print sort @rptblock; print "\n"; # Skip line before next user. }