Beefy Boxes and Bandwidth Generously Provided by pair Networks
Don't ask to ask, just ask
 
PerlMonks  

Clovis_Sangrail's scratchpad

by Clovis_Sangrail (Beadle)
on May 14, 2012 at 15:55 UTC ( #970458=scratchpad: print w/ replies, xml ) Need Help??

#!/usr/bin/perl ################################################ # Title: crunchrep.pl # Author: Clovis Sangrail # Description: The crunchrep.pl Perl script will process one or more i +nput # data files consisting of GT.M Journal extract log records. It produc +es a # report of exeptional transactions for possible further scrutiny by a +udit # or security personnel. # # Revision History # Ver Author Date # --- ------ ---- # 1.0 Clovis Sangrail Apr 2012 # Initial port/rewrite of audit.gawk. # 1.1 Clovis Sangrail 9-May-2012 # Rewrite regex for Global Variable analysis to handle subscripts # like """""ABU%"""" . Then change back. ################################################ ##################### # Calling Arguments # ##################### # See usage function. ########### # Modules # ########### use strict; use Getopt::Std; # Process switches. use File::Basename; ############# # Variables # ############# $| = 1; # Flush after every write. my $NMF = "namefile"; # Userids to report upon. (Default.) my $ALLF; # Journal extract Log file. my %rpt; # Hash of Hash of Hash of 2x1 arrays. my %opts; # Hash of commandline opts => vals. my %rname; # Hash of uid => GECOS name. my @fields; # Array of Journal Log fields. my $PWLINE; # Capture grep output. my ( $uid , $cnt); my %matchpid; # Hash of pid => uid. ############# # Functions # ############# sub usage # Display program usage instructions. { my $prg = basename($0); print <<EOF $prg usage: $prg -f <Glbfld#> [ -F <skipregex> -k <keepregex> <names>] <data> The $prg program reads input files of GT.M Journal Extract Log recs generated by the MUPIP program and Unix UIDs and produces a report of transactions performed by the users in the namefile. The <data> file is required, the <names> file defaults to "namefile" in the current directory. Switches and Parameters: -f = Field number of Global Variable, currently 7 or 9 so far. Thi +s may change with new versions of GT.M and MUPIP. Required swit +ch. -F = Perl regex of Global Variable names to omit from report. Opti +onal. -k = Perl regex of Global Variable names for which subscripts will + be preserved and reported on separately. Default is to aggregate + all subscripts into single count. Optional. <names> File of User ID's (one per line) on which to report. Optional +, defaults to "namefile" in current directory. <data> File of 01/04/05/10 mupip journal extract log records, sorted + by PID and rectype within PID. Required parameter. EOF # Sorry no indentation allowed with EOF. (No in-line comments either.) } ######## # Main # ######## ##################################################################### # Process the switch arguments. (See 'usage{}' function above for a # # description of the switches and other commandline parameters.) # ##################################################################### getopt('fFk', \%opts); # Read switches into %opts hash. my $gfld = $opts{"f"}; # This is global var field number. if( (7 != $gfld) && (9 != $gfld) ) { # Must be one of these. &usage(); die "Invalid logfile global field number\n\n"; } else { $gfld--; # Perl numbers from zero. } my $fltr = $opts{"F"}; # Get regex of globals to skip. my $keepsub = $opts{"k"}; # and Globals to not aggregate. # (These can be null, I guess.) ############################## #DBG#$fltr='^\^LOG\(?|^\^BCHLOG\(?|^\^ORSLOG\(?|^\^ZLOG\(?|^\^PROCID\( +?|^\^SYSLOG\(?'; #DBG#$keepsub='^\^SCAU\(?'; #DBG#print "\n"; #DBG#print " 'f' switch value: $opts{\"f\"} " . "\n"; #DBG#print " 'F' switch value: $opts{\"F\"} " . "\n"; #DBG#print "fltr = $fltr \n"; #DBG#print " keepsub = $keepsub \n"; #DBG#print " 'k' switch value: $opts{\"k\"} " . "\n"; #DBG#print "\n"; #DBG#exit(0); ############################## ###################################################################### +##### # Process filename arguments. Error if none. One arg is the data file +of # # journal records, and the namefile is the default. If two args, then +1st # # is the namefile, 2nd is the data file. + # ###################################################################### +##### $cnt = 0 + @ARGV; # Remaining arg cnt. (getopts shifts.) BLOCK: { if ( 0 == $cnt ) { # This is an error. &usage(); die "Need a journal extract log file name\n\n"; last BLOCK; # Not really needed. } if ( 1 == $cnt ) { $ALLF = $ARGV[0]; # Just log file. last BLOCK; } if ( 2 == $cnt ) { # Names file and log file. $NMF = $ARGV[0]; $ALLF = $ARGV[1]; last BLOCK; } } ####################################################### # Open the $NAMES and $ALL files, err out on failure. # ####################################################### die "Cannot open $NMF : $!" unless ( open NAMES, $NMF ); die "Cannot open $ALLF : $!" unless ( open ALL, $ALLF ); ###################################################################### +###### # Process NAMES file. This is the list of User IDs (UIDs) on which we +wish # # to report. Read each UID and seek it's /etc/passwd entry. If found s +et # # the rname{UID} hash entry to the real name field of the PW line. If +not # # found set that hash entry to "Name_Not_Found". + # ###################################################################### +###### while (<NAMES>) { # Read till EOF. chomp; # Trim trailing '\n'. if( $PWLINE = `/usr/bin/grep $_ /etc/passwd` ) { @fields = split ":" , $PWLINE; # If found load PW name into + array. $rname{$_} = $fields[4]; } else { $rname{$_} = "Name_Not_Found"; # Or if not found say so. } } ###################################### #DBG##my ( $c , $mcnt , $pmil ); #DBG#my $uid; #DBG#foreach $uid ( sort keys %rname ) { #DBG# print "uid: $uid , name: \"$rname{$uid}\" \n"; #DBG#} #DBG#exit(0); #DBG#$c=0; #DBG#$mcnt=0; #DBG#print "\n."; ###################################### ###################################################################### +###### # This is the main part. Read each line of the input file of MUPIP jou +rnal # # extract log records. Input file is sorted by PID and by rectype with +in # # each PID. Skip all but '01', '04', '05', and '10' record types. Skip + '01'# # recs if UID (6th field) was not among those in the namefile, else sa +ve # # PID and set the matchpid entry for that PID to be the UID. For 04/05 +/10 # # skip if matchpid entry for that PID not defined, or if Global Var is + in # # the ignore list. Replace any subscripts with "(..)" unless Global is + in # # the do-not-aggregate list. Translate timestamp and either make new a +rray # # entry of [ timestamp, cnt=1 ] for this rpt{uid}[pid}{startglob} hash + or # # increment count of the existing entry. + # ###################################################################### +###### while (<ALL>) { my ( $pid, $curpid ); # Process IDs. my ( $wholeglob, $saveglob, $startglob ); # Parts of Global Var. my $stamp; # Xlate of Horolog format. chomp; # Remove trailing newline. ################################ #DBG#if( 100 == $c++ ) { #DBG# $c=0; #DBG# print "."; #DBG# if( 0 == ( ++$mcnt % 10 ) ) { #DBG# $pmil = $mcnt / 10; #DBG# print "\n $pmil K recs\n"; #DBG# } #DBG#} ################################ @fields = split /\\/ , $_ , $gfld+1; # Split on backslashes.` ################################ #DBG#print "$_ \nrectype: x$fields[0]x\n"; #DBG#print "."; ################################ ################################################################## # If we find an '01' record, skip if username is not among those # # found when the namefile was processed. # ################################################################## if( "01" eq $fields[0] ) { # Record type '01'? next unless defined( $rname{$fields[5]} ); # Skip if not read from namefile. $matchpid{ $fields[3] } = $fields[5]; # Save UID matching PID. #################################################### #DBG#print "Inside 01 compare. pid = $fields[3] , matching uid + = $matchpid{$fields[3]}\n"; #DBG#if( defined( $rname{$fields[5]} ) ) { #DBG# print "rectype: $fields[0] ,pid: $fields[3] ,uid: $fiel +ds[5]\n"; #DBG#} else { #DBG# print "skip: $fields[5]\n"; #DBG#} #DBG#} else { #################################################### } ################################ #DBG# print "$_ \nrectype: x$fields[0]x\n"; #DBG# print "."; ################################ ################################################################## +### # Global variables are altered by record types '04, '05', and '10' +. # ################################################################## +### if( $fields[0] =~ /04|05|10/ ) { # Have 04/05/10 record type? #DBG#print "Inside 05 compare. pid = $fields[3] , matching uid + = $matchpid{$fields[3]}\n"; #DBG#print "$_ \n"; next unless defined( $matchpid{$fields[3]} ); $curpid = $fields[3]; # Capture PID if found in '01' re +c. # Otherwise, skip it. $uid = $matchpid{$curpid}; ############################################################## +#### # Now extract the initial portion of the Global Variable being + # # affected by this rtecord, and also extract the whole Variabl +e. # # (include any subscripts present). + # ############################################################## +#### #DBG#print "Global Field = $fields[$gfld] \n"; #if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|\(([^"]+|(("+)[^ +"]+\6)+)+\)))/ ) { if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|.*?\)))/ ) { $wholeglob = $1; # Nested regex memory variables. $startglob = $2; } else { print "Warning: Cannot parse $fields[$gfld] for pid $curpi +d \n"; #DBG#print "$_ \n"; next; # Print error msg if regex fails. } #DBG#print "whole global = $wholeglob \n"; ############################################################## +### # If this Global's start matches the regex, input via switch o +n # # the commandline, of variables to skip, then skip it. + # ############################################################## +### if( ($fltr ne "") && ($startglob =~ /$fltr/) ) { #DBG#print "Skipping global $wholeglob \n"; next; } ############################################################## +# # The GTM Journal timestamp is in HoroLog format: DDDDD,SSSSS +# # where DDDDD = days since 12/31/1840 midnight and SSSSS = +# # seconds since midnight. We need to convert this to Timestamp +# # format (secs since start of 1/1/1970). (Note: 47117 is the +# # number of days between midnight 12/31/1840 and 1/1/1970.) +# ############################################################## +# if( $fields[1] =~ /^(\d+),(\d+)/ ) { # Extract DDDDD & SSSSS via regex. $stamp = ( $1 - 47117 ) * 86400 + $2 + 3600 * 6; } else { # Convert, incl CST timezone offset. print "Cannot parse horolog field $fields[1] \n"; } # Err, can't find DDDDD &/or SSSSS. ############################################################## +### # Now match the start of the Global variable against the searc +h # # regex of variables that we do not aggregate. These are vars + # # for which we keep separate counts of each different set of + # # subscripts that are modified. If we find a match, then leave + # # the variable alone. If no match, then replace any subscripts + # # with the string (..) . + # ############################################################## +### $saveglob = ""; # Start out assuming no aggregate. if( ($keepsub ne "") && ($startglob =~ /$keepsub/) ) { # If match, preserve Global #DBG#print "$wholeglob will not be aggregated \n"; } else { if( $wholeglob =~ /\(/ ) { # Aggregate if have subscripts +. $saveglob = $wholeglob; $wholeglob = $startglob . "(..)"; #DBG#print "Aggregating $saveglob into $wholeglob \n"; } } ############################################################## # Finally, if we already have an entry for this (possibly an # # aggregated) Global then increment it's count. If this is a # # new one then create the initial [ timestamp, cnt=1 ] array # # for the newly-created hash element to reference. # ############################################################## if( defined( $rpt{$uid}{$curpid}{$wholeglob} ) ) { $rpt{$uid}{$curpid}{$wholeglob}[2]++; #DBG#print "inc node: uid = $uid pid = $curpid Global = $w +holeglob \n"; } else { # Have entry, increment count. #DBG#print "new node: uid = $uid pid = $curpid Global = $w +holeglob tstamp = $stamp \n"; if( $saveglob eq "" ) { $rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $wholeglo +b, 1 ]; } else { $rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $saveglob +, 1 ]; } } # Create new node, cnt=1. } #DBG#print "Did /04/05/10 \n"; } #################################################################### # At this point the three-dimensional %rpt hash is loaded with the # # report data, we're ready to print the report. The %rpt hash is # # dimensioned as rpt{uid}{pid}{global}. # #################################################################### for $uid ( sort keys %rpt ) { # Outermost dimension is UIDs. my @rptblock; # Report lines for current UID. my @tparts; # Return value of localtime. my ( $pid, $glob ); # Hash indices. my ( $stamp, $cnt, $prtglob ); # Array variables. my ( $rptline, $baseline ); # For composing rptblock entry. ################################# # print header for current UID. # ################################# printf "%s: %s\n" , $uid , $rname{$uid}; printf "YYYYMMDD hh:mm PID Global\n"; printf "-------------- ------------- ------\n"; @rptblock = (); # Clear data lines array. ################################################ # Within current UID 2nd index is Process IDs. # ################################################ for $pid ( keys %{ $rpt{$uid} } ) { ############################################################## +# # Within current PID, 3rd index is each affected GT.M Global. +# ############################################################## +# for $glob ( keys %{ $rpt{$uid}{$pid} } ) { $stamp = $rpt{$uid}{$pid}{$glob}[0]; # Timestamp of 1st access. $cnt = $rpt{$uid}{$pid}{$glob}[2]; # Count of accesses. if( 1 == $cnt ) { $prtglob = $rpt{$uid}{$pid}{$glob}[1]; } else { # Show subscripts if cnt=1. $prtglob = $glob; # Show "(..)" unless keepsubs. } @tparts = localtime $stamp; # Convert to list. $baseline = sprintf "%4d%02d%02d %02d:%02d%13s %s" , 1900 + $tparts[5], 1 + $tparts[4], $tparts[3], $tparts[2], $tparts[1], $pid, $prtglob; # YYYYMMDD HH:MM pid Globalname ############################ # One or multiple updates? # ############################ if( 1 == $cnt ) { $rptline = sprintf "%s\n", $baseline; } else { # Just add newline for single. $rptline = sprintf "%s\t(%d updates)\n" , $baseline, $ +cnt; } # Multiple updates, print count. push( @rptblock , $rptline);# Load rptblock array. } } ################################################################## +##### # Finally, print out the formatted data lines for this UID. Sort w +ill # # put them in date/timestamp order, and PID order within that. + # ################################################################## +##### print sort @rptblock; print "\n"; # Skip line before next user. }
Log In?
Username:
Password:

What's my password?
Create A New User
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others cooling their heels in the Monastery: (6)
As of 2015-07-05 07:07 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (60 votes), past polls