#!/usr/bin/perl ################################################ # Title: crunchrep.pl # Author: Clovis Sangrail # Description: The crunchrep.pl Perl script will process one or more input # data files consisting of GT.M Journal extract log records. It produces a # report of exeptional transactions for possible further scrutiny by audit # or security personnel. # # Revision History # Ver Author Date # --- ------ ---- # 1.0 Clovis Sangrail Apr 2012 # Initial port/rewrite of audit.gawk. # 1.1 Clovis Sangrail 9-May-2012 # Rewrite regex for Global Variable analysis to handle subscripts # like """""ABU%"""" . Then change back. ################################################ ##################### # Calling Arguments # ##################### # See usage function. ########### # Modules # ########### use strict; use Getopt::Std; # Process switches. use File::Basename; ############# # Variables # ############# $| = 1; # Flush after every write. my $NMF = "namefile"; # Userids to report upon. (Default.) my $ALLF; # Journal extract Log file. my %rpt; # Hash of Hash of Hash of 2x1 arrays. my %opts; # Hash of commandline opts => vals. my %rname; # Hash of uid => GECOS name. my @fields; # Array of Journal Log fields. my $PWLINE; # Capture grep output. my ( $uid , $cnt); my %matchpid; # Hash of pid => uid. ############# # Functions # ############# sub usage # Display program usage instructions. { my $prg = basename($0); print < [ -F -k ] The $prg program reads input files of GT.M Journal Extract Log recs generated by the MUPIP program and Unix UIDs and produces a report of transactions performed by the users in the namefile. The file is required, the file defaults to "namefile" in the current directory. Switches and Parameters: -f = Field number of Global Variable, currently 7 or 9 so far. This may change with new versions of GT.M and MUPIP. Required switch. -F = Perl regex of Global Variable names to omit from report. Optional. -k = Perl regex of Global Variable names for which subscripts will be preserved and reported on separately. Default is to aggregate all subscripts into single count. Optional. File of User ID's (one per line) on which to report. Optional, defaults to "namefile" in current directory. File of 01/04/05/10 mupip journal extract log records, sorted by PID and rectype within PID. Required parameter. EOF # Sorry no indentation allowed with EOF. (No in-line comments either.) } ######## # Main # ######## ##################################################################### # Process the switch arguments. (See 'usage{}' function above for a # # description of the switches and other commandline parameters.) # ##################################################################### getopt('fFk', \%opts); # Read switches into %opts hash. my $gfld = $opts{"f"}; # This is global var field number. if( (7 != $gfld) && (9 != $gfld) ) { # Must be one of these. &usage(); die "Invalid logfile global field number\n\n"; } else { $gfld--; # Perl numbers from zero. } my $fltr = $opts{"F"}; # Get regex of globals to skip. my $keepsub = $opts{"k"}; # and Globals to not aggregate. # (These can be null, I guess.) ############################## #DBG#$fltr='^\^LOG\(?|^\^BCHLOG\(?|^\^ORSLOG\(?|^\^ZLOG\(?|^\^PROCID\(?|^\^SYSLOG\(?'; #DBG#$keepsub='^\^SCAU\(?'; #DBG#print "\n"; #DBG#print " 'f' switch value: $opts{\"f\"} " . "\n"; #DBG#print " 'F' switch value: $opts{\"F\"} " . "\n"; #DBG#print "fltr = $fltr \n"; #DBG#print " keepsub = $keepsub \n"; #DBG#print " 'k' switch value: $opts{\"k\"} " . "\n"; #DBG#print "\n"; #DBG#exit(0); ############################## ########################################################################### # Process filename arguments. Error if none. One arg is the data file of # # journal records, and the namefile is the default. If two args, then 1st # # is the namefile, 2nd is the data file. # ########################################################################### $cnt = 0 + @ARGV; # Remaining arg cnt. (getopts shifts.) BLOCK: { if ( 0 == $cnt ) { # This is an error. &usage(); die "Need a journal extract log file name\n\n"; last BLOCK; # Not really needed. } if ( 1 == $cnt ) { $ALLF = $ARGV[0]; # Just log file. last BLOCK; } if ( 2 == $cnt ) { # Names file and log file. $NMF = $ARGV[0]; $ALLF = $ARGV[1]; last BLOCK; } } ####################################################### # Open the $NAMES and $ALL files, err out on failure. # ####################################################### die "Cannot open $NMF : $!" unless ( open NAMES, $NMF ); die "Cannot open $ALLF : $!" unless ( open ALL, $ALLF ); ############################################################################ # Process NAMES file. This is the list of User IDs (UIDs) on which we wish # # to report. Read each UID and seek it's /etc/passwd entry. If found set # # the rname{UID} hash entry to the real name field of the PW line. If not # # found set that hash entry to "Name_Not_Found". # ############################################################################ while () { # Read till EOF. chomp; # Trim trailing '\n'. if( $PWLINE = `/usr/bin/grep $_ /etc/passwd` ) { @fields = split ":" , $PWLINE; # If found load PW name into array. $rname{$_} = $fields[4]; } else { $rname{$_} = "Name_Not_Found"; # Or if not found say so. } } ###################################### #DBG##my ( $c , $mcnt , $pmil ); #DBG#my $uid; #DBG#foreach $uid ( sort keys %rname ) { #DBG# print "uid: $uid , name: \"$rname{$uid}\" \n"; #DBG#} #DBG#exit(0); #DBG#$c=0; #DBG#$mcnt=0; #DBG#print "\n."; ###################################### ############################################################################ # This is the main part. Read each line of the input file of MUPIP journal # # extract log records. Input file is sorted by PID and by rectype within # # each PID. Skip all but '01', '04', '05', and '10' record types. Skip '01'# # recs if UID (6th field) was not among those in the namefile, else save # # PID and set the matchpid entry for that PID to be the UID. For 04/05/10 # # skip if matchpid entry for that PID not defined, or if Global Var is in # # the ignore list. Replace any subscripts with "(..)" unless Global is in # # the do-not-aggregate list. Translate timestamp and either make new array # # entry of [ timestamp, cnt=1 ] for this rpt{uid}[pid}{startglob} hash or # # increment count of the existing entry. # ############################################################################ while () { my ( $pid, $curpid ); # Process IDs. my ( $wholeglob, $saveglob, $startglob ); # Parts of Global Var. my $stamp; # Xlate of Horolog format. chomp; # Remove trailing newline. ################################ #DBG#if( 100 == $c++ ) { #DBG# $c=0; #DBG# print "."; #DBG# if( 0 == ( ++$mcnt % 10 ) ) { #DBG# $pmil = $mcnt / 10; #DBG# print "\n $pmil K recs\n"; #DBG# } #DBG#} ################################ @fields = split /\\/ , $_ , $gfld+1; # Split on backslashes.` ################################ #DBG#print "$_ \nrectype: x$fields[0]x\n"; #DBG#print "."; ################################ ################################################################## # If we find an '01' record, skip if username is not among those # # found when the namefile was processed. # ################################################################## if( "01" eq $fields[0] ) { # Record type '01'? next unless defined( $rname{$fields[5]} ); # Skip if not read from namefile. $matchpid{ $fields[3] } = $fields[5]; # Save UID matching PID. #################################################### #DBG#print "Inside 01 compare. pid = $fields[3] , matching uid = $matchpid{$fields[3]}\n"; #DBG#if( defined( $rname{$fields[5]} ) ) { #DBG# print "rectype: $fields[0] ,pid: $fields[3] ,uid: $fields[5]\n"; #DBG#} else { #DBG# print "skip: $fields[5]\n"; #DBG#} #DBG#} else { #################################################### } ################################ #DBG# print "$_ \nrectype: x$fields[0]x\n"; #DBG# print "."; ################################ ##################################################################### # Global variables are altered by record types '04, '05', and '10'. # ##################################################################### if( $fields[0] =~ /04|05|10/ ) { # Have 04/05/10 record type? #DBG#print "Inside 05 compare. pid = $fields[3] , matching uid = $matchpid{$fields[3]}\n"; #DBG#print "$_ \n"; next unless defined( $matchpid{$fields[3]} ); $curpid = $fields[3]; # Capture PID if found in '01' rec. # Otherwise, skip it. $uid = $matchpid{$curpid}; ################################################################## # Now extract the initial portion of the Global Variable being # # affected by this rtecord, and also extract the whole Variable. # # (include any subscripts present). # ################################################################## #DBG#print "Global Field = $fields[$gfld] \n"; #if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|\(([^"]+|(("+)[^"]+\6)+)+\)))/ ) { if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|.*?\)))/ ) { $wholeglob = $1; # Nested regex memory variables. $startglob = $2; } else { print "Warning: Cannot parse $fields[$gfld] for pid $curpid \n"; #DBG#print "$_ \n"; next; # Print error msg if regex fails. } #DBG#print "whole global = $wholeglob \n"; ################################################################# # If this Global's start matches the regex, input via switch on # # the commandline, of variables to skip, then skip it. # ################################################################# if( ($fltr ne "") && ($startglob =~ /$fltr/) ) { #DBG#print "Skipping global $wholeglob \n"; next; } ############################################################### # The GTM Journal timestamp is in HoroLog format: DDDDD,SSSSS # # where DDDDD = days since 12/31/1840 midnight and SSSSS = # # seconds since midnight. We need to convert this to Timestamp# # format (secs since start of 1/1/1970). (Note: 47117 is the # # number of days between midnight 12/31/1840 and 1/1/1970.) # ############################################################### if( $fields[1] =~ /^(\d+),(\d+)/ ) { # Extract DDDDD & SSSSS via regex. $stamp = ( $1 - 47117 ) * 86400 + $2 + 3600 * 6; } else { # Convert, incl CST timezone offset. print "Cannot parse horolog field $fields[1] \n"; } # Err, can't find DDDDD &/or SSSSS. ################################################################# # Now match the start of the Global variable against the search # # regex of variables that we do not aggregate. These are vars # # for which we keep separate counts of each different set of # # subscripts that are modified. If we find a match, then leave # # the variable alone. If no match, then replace any subscripts # # with the string (..) . # ################################################################# $saveglob = ""; # Start out assuming no aggregate. if( ($keepsub ne "") && ($startglob =~ /$keepsub/) ) { # If match, preserve Global #DBG#print "$wholeglob will not be aggregated \n"; } else { if( $wholeglob =~ /\(/ ) { # Aggregate if have subscripts. $saveglob = $wholeglob; $wholeglob = $startglob . "(..)"; #DBG#print "Aggregating $saveglob into $wholeglob \n"; } } ############################################################## # Finally, if we already have an entry for this (possibly an # # aggregated) Global then increment it's count. If this is a # # new one then create the initial [ timestamp, cnt=1 ] array # # for the newly-created hash element to reference. # ############################################################## if( defined( $rpt{$uid}{$curpid}{$wholeglob} ) ) { $rpt{$uid}{$curpid}{$wholeglob}[2]++; #DBG#print "inc node: uid = $uid pid = $curpid Global = $wholeglob \n"; } else { # Have entry, increment count. #DBG#print "new node: uid = $uid pid = $curpid Global = $wholeglob tstamp = $stamp \n"; if( $saveglob eq "" ) { $rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $wholeglob, 1 ]; } else { $rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $saveglob, 1 ]; } } # Create new node, cnt=1. } #DBG#print "Did /04/05/10 \n"; } #################################################################### # At this point the three-dimensional %rpt hash is loaded with the # # report data, we're ready to print the report. The %rpt hash is # # dimensioned as rpt{uid}{pid}{global}. # #################################################################### for $uid ( sort keys %rpt ) { # Outermost dimension is UIDs. my @rptblock; # Report lines for current UID. my @tparts; # Return value of localtime. my ( $pid, $glob ); # Hash indices. my ( $stamp, $cnt, $prtglob ); # Array variables. my ( $rptline, $baseline ); # For composing rptblock entry. ################################# # print header for current UID. # ################################# printf "%s: %s\n" , $uid , $rname{$uid}; printf "YYYYMMDD hh:mm PID Global\n"; printf "-------------- ------------- ------\n"; @rptblock = (); # Clear data lines array. ################################################ # Within current UID 2nd index is Process IDs. # ################################################ for $pid ( keys %{ $rpt{$uid} } ) { ############################################################### # Within current PID, 3rd index is each affected GT.M Global. # ############################################################### for $glob ( keys %{ $rpt{$uid}{$pid} } ) { $stamp = $rpt{$uid}{$pid}{$glob}[0]; # Timestamp of 1st access. $cnt = $rpt{$uid}{$pid}{$glob}[2]; # Count of accesses. if( 1 == $cnt ) { $prtglob = $rpt{$uid}{$pid}{$glob}[1]; } else { # Show subscripts if cnt=1. $prtglob = $glob; # Show "(..)" unless keepsubs. } @tparts = localtime $stamp; # Convert to list. $baseline = sprintf "%4d%02d%02d %02d:%02d%13s %s" , 1900 + $tparts[5], 1 + $tparts[4], $tparts[3], $tparts[2], $tparts[1], $pid, $prtglob; # YYYYMMDD HH:MM pid Globalname ############################ # One or multiple updates? # ############################ if( 1 == $cnt ) { $rptline = sprintf "%s\n", $baseline; } else { # Just add newline for single. $rptline = sprintf "%s\t(%d updates)\n" , $baseline, $cnt; } # Multiple updates, print count. push( @rptblock , $rptline);# Load rptblock array. } } ####################################################################### # Finally, print out the formatted data lines for this UID. Sort will # # put them in date/timestamp order, and PID order within that. # ####################################################################### print sort @rptblock; print "\n"; # Skip line before next user. }