1: #!/usr/bin/perl 2: 3: ############################# 4: # 5: # Version 2.0 6: # 7: # A simple multi-format log parser which is intended to 8: # to be used as a filter. Could be faster, but it does 9: # allow you to define a pretty output format. 10: # 11: # Author: Chris Jensen 12: # 13: # Update: 14: # 15: # - If log format is unspecified, an attempt is 16: # made to determine the closest matching format 17: # by analyzing a log entry. 18: # 19: # - Reduced amount of code; Sub-formats defined 20: # similar to log formats; Minor changes. 21: # 22: 23: use Getopt::Long; 24: 25: my %optctl; 26: GetOptions (\%optctl, "type|t=s", "pattern|p=s"); 27: 28: 29: my $log_formats = { 30: 'common' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)}, [qw(h l u t r c b)] ], 31: 'virtual' => [ qr{(\S+) (\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+)}, [qw(v h l u t r c b)] ], 32: 'combined' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\"}, [qw(h l u t r c b R A)] ], 33: 'referer' => [ qr{(\S+) \-\> (\S+)}, [qw(R r)] ], 34: 'agent' => [ qr{(\S+)}, [qw(A)] ], 35: 'extended' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+) (\d+)}, [qw(h l u t r c b R A P T)] ], 36: 'custom' => [ qr{(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^\"]*)\" (\d+) (\d+) \"([^\"]*)\" \"([^\"]*)\" (\d+)}, [qw(h l u t r c b A R T)] ], 37: }; 38: 39: 40: my $type = $optctl{type} || 'unknown'; 41: 42: my $pattern = $optctl{pattern} or usage(); 43: 44: my ($format, $control) = @{$log_formats->{$type}}; 45: 46: my @pats; 47: 48: map { $_ =~ /^([^a-zA-Z\_\%]*)(.)/ && push(@pats, $2) } (split(/\%/, $pattern)); 49: 50: my $outpat = $pattern . "\n"; 51: $outpat =~ s/(\%[^a-zA-Z\_\%]*)([a-zA-Z\_])/$1s/g; 52: 53: 54: # Formats and sub-formats are now defined similarly 55: 56: my $sub_formats = { 57: 't' => [ qr{(\d+)\/(\w+)\/(\d+)\:(\d+)\:(\d+)\:(\d+)\s}, [qw(d m y H M S)] ], 58: 'r' => [ qr{(\w+)\s([^\?]*)\??([^\s]*)?\s(.*)}, [qw(a f q p)] ], 59: 'u' => [ qr{(\w*)\-(\w*)}, [qw(s i)] ], 60: 'R' => [ qr{.*\:\/\/([^\/]+)(\/.*)}, [qw(o F)] ] 61: }; 62: 63: 64: while(<>) { 65: 66: # Attempt to automatically determine log type/format 67: # Pick the matching format with the most control entities 68: 69: if ($type eq 'unknown') { 70: my ($last, $t, $p); 71: while (($t, $p) = each(%{$log_formats})) { 72: my ($f, $c) = @{$p}; 73: if (/$f/ && scalar @{$c} > $last) { 74: $last = scalar @{$c}; 75: $format = $f; 76: $control = $c; 77: $type = $t; 78: } 79: } 80: die "Can't auto-determine log type\n" if ($type eq 'unknown'); 81: } 82: 83: my @vals; 84: my %info; 85: if (/$format/) { 86: my $x = 0; 87: 88: foreach my $ctl (@{$control}) { 89: $info{$ctl} = ${++$x}; 90: 91: my ($sfmt, $sctl) = @{$sub_formats->{$ctl}}; 92: 93: if (defined($sfmt)) { 94: my $y = 0; 95: $info{$ctl} =~ /$sfmt/ && 96: map { $info{$_} = ${++$y} } @{$sctl}; 97: } 98: } 99: 100: map { push(@vals, $info{$_}) } @pats; 101: 102: printf $outpat, @vals; 103: } 104: } 105: 106: 107: sub usage { 108: 109: print qq{ 110: usage: logparse [-t=<type>] -p=<pattern> 111: 112: example: tail -50 access_log | logparse -t=extended -p="%H:%M %-15o %f" 113: 114: Formatting characters: 115: 116: v - The virtual host name/address 117: h - The host IP name/address 118: l - The remote logname 119: u - Remote User/Session 120: t - The time of the request 121: r - The full request 122: c - The HTTP code (302, 200, etc) 123: b - Bytes 124: R - Referrer string 125: A - User Agent string 126: P - Process ID 127: T - Time taken in seconds 128: 129: Request string breakdown: 130: 131: a - Action/Method (GET, POST, etc) 132: f - File path 133: q - Query string 134: p - HTTP protocol version 135: 136: Time of request breakdown: 137: 138: d - Day of the month 139: m - Month (Apr, May, etc) 140: y - Year 141: H - Hour 142: M - Minute 143: S - Second 144: 145: User Session breakdown: 146: 147: s - Session ID 148: i - User ID 149: 150: Referrer string breakdown: 151: 152: o - Host of referrer 153: F - File path of referrer 154: 155: }; 156: 157: exit(0); 158: 159: }
|
---|
Replies are listed 'Best First'. | |
---|---|
Re: Multi-Format Log Parser - Version 2.0
by grinder (Bishop) on Jan 16, 2002 at 17:07 UTC | |
by cjensen (Sexton) on Jan 18, 2002 at 23:05 UTC | |
by cjensen (Sexton) on Jan 23, 2002 at 06:08 UTC |
Back to
Craft