Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl: the Markov chain saw
 
PerlMonks  

ALF: Apache Log Filter

by ciderpunx (Vicar)
on Apr 01, 2007 at 15:47 UTC ( [id://607707]=CUFP: print w/replies, xml ) Need Help??

ALF is a project I've been working on to learn some more about Gtk2. It uses the fantastic DBI::AnyData and allows you to run SQL queries on an Apache log. I've written a couple of extra parser plugins so that AnyData can read combined and vhost logs. You need to chuck them into /path/to/AnyData/Format.

There's a demo video on the internet archive.

alf.pl

#!/usr/bin/perl ################################################### # ALF: Apache Log Filter (c) 2007 Charlie Harvey # ################################################### use strict; use warnings; use DBD::AnyData; use AnyData::Format::Weblog; use AnyData::Format::WeblogVhost; use AnyData::Format::WeblogCombined; use Gtk2 -init; use Gtk2::Ex::Simple::Menu; use Getopt::Std; my %opts; getopts('?cvl:', \%opts); my $APP_NAME = 'ALF: Apache Log Filter'; my $VERSION = '0.41'; my $USAGE = "$APP_NAME, version $VERSION\nUsage: $0 [cv?][l f +ilename]\n"; # Tweakable options # my $TABLE = 'log'; my $SEPERATOR = ' | '; my $QUERY_FONT = 'FreeSans 12'; my $RESULTS_FONT = 'FreeMono 12'; my $LOG_DEFAULT = '/home/charlie/access_log_test'; # Stop tweaking # my %log_types = (4=>"WeblogCombined", 5=>"WeblogVhost", 6=>'Weblo +g', ); my $log_type = "WeblogCombined"; my $dbh = DBI->connect('dbi:AnyData:(RaiseError=>0)'); my $window = Gtk2::Window->new ('toplevel'); my $menu = Gtk2::Ex::Simple::Menu->new ( menu_tree => do './alf_menu.pl', default_callback => \&not_implemented, ); my $query = Gtk2::Entry->new; my $col_selecter = Gtk2::ComboBox->new_text; my $results_view = Gtk2::TextView->new; my $results = $results_view->get_buffer; my $scroll_results = Gtk2::ScrolledWindow->new; my $select_all = Gtk2::Button->new_with_label('Get everything'); my $hits = Gtk2::Button->new_with_label('Total Hits'); my $_404 = Gtk2::Button->new_with_label('404s'); my $short = Gtk2::RadioButton->new_with_label(undef,'Short'); my $long = Gtk2::RadioButton->new_with_label($short,'Vertica +l'); my $top_box = Gtk2::HBox->new; my $main_box = Gtk2::VBox->new(0,0); my $bottom_box = Gtk2::HBox->new(0,15); my $status_bar = Gtk2::Statusbar->new; my $log_file = $LOG_DEFAULT; # Show error dialogue box sub err { my $msg = shift; my $error = Gtk2::MessageDialog->new( $window, 'modal', 'error', 'cl +ose', $msg ); $error->signal_connect ( response => \&dialog_close ); $error->run; $error->destroy; } # Show informational dialogue box sub info { my $msg = shift; my $info = Gtk2::MessageDialog->new( $window, 'modal', 'info', 'clos +e', $msg ); $info->signal_connect ( response => \&dialog_close ); $info->run; $info->destroy; } # Actually only used during development - pop up an info box warning t +hat this feature # isn't done. sub not_implemented { info("Not done yet :-(") } # Show about dialogue box sub about { Gtk2->show_about_dialog( $window, name=>$APP_NAME, version=>$VERSION, copyright=>"(C) 2007 +Charlie Harvey", authors => "Charlie Harvey", ); } # Set status text to arg sub status { my $status = shift; $status_bar->pop(0); $status_bar->push(0,$status); } # Set standard status text sub changed_status { status("$APP_NAME, using $log_file as $TABLE ($log_type format)"); } # Change the log type, call dbh->func for that type, update column sel +ecter sub set_log_type { my ($caller,$log_t,$widget)=(@_); if ($widget) { return unless $widget->get_active; } $log_type = $log_types{$log_t}; $log_type ||= $log_t; $dbh->func($TABLE , $log_type, $log_file, 'ad_catalog'); $col_selecter->remove_text(0) for (1..10); my $i = 0; my @cols = col_names(); $col_selecter->insert_text($i++,$_) for (@cols); info("Changed to $log_type format parser") if $widget; changed_status(); } # Open a new log file sub open_log { my $chooser = Gtk2::FileChooserDialog->new( "Open apache log", $window, 'open', ('Open' => 'ok', 'Cancel' =>'cancel') ); my $response = $chooser->run; if ($response eq 'ok') { $log_file = $chooser->get_filename; $dbh->func($TABLE , $log_type, $log_file, 'ad_catalog'); changed_status(); } $chooser->destroy; } # Save text from results text_view sub export_results { my $success = 0; my $chooser = Gtk2::FileChooserDialog->new( "Save filtered log", $window, 'save', ('Export' => 'ok', 'Cancel' =>'cancel') ); my $response = $chooser->run; if ($response eq 'ok') { my $output_file = $chooser->get_filename(); if (-e $output_file) { err("Can't overwrite existing file $output_file"); } else { my $length = $results->get_char_count; my $text_to_export = $results->get_text($results->get_start_iter +,$results->get_end_iter,0); open OUT, ">$output_file" || err("Can't write to file"); print OUT $text_to_export; close OUT; $success=1; } } $chooser->destroy; info("Filtered results exported") if ($success); } # Close a dialogue box sub dialog_close { my ($self, $response) = @_; $self->destroy; } # Close application sub close { Gtk2->main_quit; } # When user presses enter, call go with current sql sub filter_pressed { my ($widget,$event,$data) = @_; return unless ($event->keyval==65293); go(); } # Run sql that is given as arg sub go { my $sql = $query->get_text; status("Filtering..."); my $res = query($sql); $results->set_text( $res ); } # Run select * query sub select_all_query { my $sql = "SELECT * FROM $TABLE"; $query->set_text($sql); $results->set_text(query($sql)); } # Run hits query sub hits_query { my $sql = "SELECT count(client) AS hits, status FROM $TABLE GROUP BY + status ORDER BY status"; $query->set_text($sql); $results->set_text(query($sql)); } # Run 404 query sub _404_query { my $sql = "SELECT * FROM $TABLE WHERE status = 404"; $query->set_text($sql); $results->set_text(query($sql)); } # Append column name to query field sub append_col_to_qry { my $widget = shift; my @cols = col_names(); $col_selecter->popdown; $query->append_text( ' ' . $cols[$widget->get_active] . ' '); $query->grab_focus; my $pos=length $query->get_text; $query->set_position($pos); 0; } # Run query sub query { my $sql = shift; my $vl = 0; if ($sql =~ /\\g\s*$/i) { $vl = 1; $sql =~ s/\\g\s*$//i; } if ($long->get_active) { $vl = 1; } my $return = ''; my $row_count=0; my $sth = $dbh->prepare($sql) || err("Can't prepare $sql\nIs your SQL syntactically correct?"); $sth->execute || err("Can't execute $sql\n" . $sth->errstr); while (my $x = $sth->fetchrow_hashref) { if ($vl) { $return .= sprintf("%10s : %s\n",$_, $x->{$_}) for ( sort keys % +$x ); } else { $return .= $x->{$_} . $SEPERATOR for ( sort keys %$x); } $return .= "\n"; $row_count++; } err("Couldn't run SQL.\nMaybe the file you opened wasn't a valid Apa +che log?\n\n" . $sth->errstr) if $sth->err; status("$row_count rows. Query: $sql"); $return; } # Return column names for current parser sub col_names { my $parser_name = "AnyData::Format::$log_type"; return (sort split /,/, $parser_name->new->{col_names}); } ## Process commandline options if ($opts{'?'}) { print $USAGE; exit(0); } if ($opts{l}) { $log_file = $opts{l}; } if ($opts{v}) { $log_type = "WeblogVhost"; } elsif ($opts{c}) { $log_type = "Weblog"; } # Connect signals up. $window->signal_connect ( destroy => \&close ); $query->signal_connect ( key_press_event => \&filter_pressed ); $col_selecter->signal_connect ( changed => \&append_col_to_qry ); $select_all->signal_connect ( clicked => \&select_all_query ); $hits->signal_connect ( clicked => \&hits_query ); $_404->signal_connect ( clicked => \&_404_query ); # Initialise widgets $window->set_border_width(5); $window->set_default_size(1024,768); $window->set_title($APP_NAME); $query->set_width_chars(80); $query->set_has_frame(1); $query->modify_font(Gtk2::Pango::FontDescription->from_string($QUERY_F +ONT)); $col_selecter->set_wrap_width(1); $results_view->set_editable(0); $results_view->set_wrap_mode('word-char'); $results_view->set_overwrite(1); $results_view->modify_font(Gtk2::Pango::FontDescription->from_string($ +RESULTS_FONT)); $scroll_results->set_policy ('never', 'always'); # Layout $scroll_results->add($results_view); $top_box->add($query); $top_box->add($col_selecter); $bottom_box->pack_start($short,0,1,1); $bottom_box->pack_start($long,0,0,1); $bottom_box->pack_start($select_all,1,1,1); $bottom_box->pack_start($hits,1,1,1); $bottom_box->pack_start($_404,1,1,1); $main_box->pack_start($menu->{widget},0,0,0); $main_box->pack_start($top_box,0,0,5); $main_box->pack_start($scroll_results,1,1,5); $main_box->pack_start($bottom_box,0,0,5); $main_box->pack_start($status_bar,0,0,5); $window->add($main_box); $window->show_all; # Lets go set_log_type(undef,$log_type); Gtk2->main; 0; __END__ =head1 NAME ALF: Apache Log Filter. Or alf.pl if you want to be picky. =head1 SYNOPSIS ./alf.pl [-l /path/to/access.log] [-cv] =head1 DESCRIPTION ALF is a filter for people using Apache logging to plain text logs, wh +o still want the flexibility of being able to search their logs like a database. It rel +ies heavily on Jeff Zucker's DBD::AnyData to do the clever stuff, and overlays that with a + simple GTK2 interface. You can run SQL queries and see the results in horizontal or vertical +formats; open different logs; run a few everyday queries from buttons; append c +olumn names to your SQL from a dropdown box; switch between parser formats; and expor +t your filtered results in vertical or horizontal formats. You'll need to copy WeblogCombined.pm and WeblogVhost.pm into /path/to +/AnyData/Format before ALF will play with you. AFAICS WeblogVhost.pm ought to work on +all three types of logs but I don't have common format logs available to test on. This is very + much a learning project so all comments are welcome. =head1 OPTIONS =head2 Commandline options =over =item -? Print usage and exit =item -l /path/to/access.log A valid apache log =item -c Use common format parser =item -v Use vhost parser =back =head2 Set in script body =over =item $TABLE The name which you wish to give the table you'll search. Default: 'log +'. =item $SEPERATOR Field seperator for use when viewing records in horizontal format. Def +ault: ' | '. =item $QUERY_FONT Font used in the query box. Default: 'FreeSans 12'. =item $RESULTS_FONT Font used to display your results. Default: 'FreeMono 12'. =item $LOG_DEFAULT Log that gets filtered if you don't specify one on the commandline. De +fault: '/home/charlie/access_log_test' =back =head1 REQUIREMENTS perl 5.8.8 DBD::AnyData AnyData::Format::WeblogVhost AnyData::Format::WeblogCombined Gtk2 Gtk2::Ex::Simple::Menu Getopt::Std =head1 LICENSE Copyright (C)2007 Charlie Harvey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Also available on line: http://www.gnu.org/copyleft/gpl.html =cut

alf_menu.pl

#!/usr/bin/perl use strict; use warnings; my $menu_tree = [ _File => { item_type => '<Branch>', children => [ _Open => { item_type => '<StockItem>', extra_data => 'gtk-open', callback => \&open_log, callback_action => 0, accelerator => '<ctrl>O', }, _Save => { item_type => '<StockItem>', extra_data => 'gtk-save', callback_action => 1, callback => \&export_results, accelerator => '<ctrl>S', }, _Quit => { item_type => '<StockItem>', extra_data => 'gtk-quit', callback => \&close, callback_action => 3, accelerator => '<ctrl>Q', }, ], }, For_mat => { item_type => '<Branch>', children => [ _Combined => { item_type => '<RadioItem>', callback => \&set_log_type, callback_action => 4, groupid => 1, }, _Vhost => { item_type => '<RadioItem>', callback => \&set_log_type, callback_action => 5, groupid => 1, }, _Common => { extra_data => 1, item_type => '<RadioItem>', callback => \&set_log_type, callback_action => 6, groupid => 1, }, ], }, _Help => { item_type => '<Branch>', children => [ _About => { item_type => '<StockItem>', extra_data => 'gtk-about', callback => \&about, callback_action => 7, }, ], }, ];

WeblogVhost.pm

######################################################### package AnyData::Format::WeblogVhost; ######################################################### # AnyData driver for "Vhost Log Format" web log files # Also supports combined and common log formats. # Copyright (c) 2007, Charlie <charlie@charlieharvey.com> ######################################################### =head1 NAME AnyData::Format::WeblogVhost - tiedhash & DBI/SQL access to HTTPD Log +s =head1 SYNOPSIS use AnyData; my $weblog = adTie( 'Weblog', $filename ); while (my $hit = each %$weblog) { print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/; } # ... other tied hash operations OR use DBI my $dbh = DBI->connect('dbi:AnyData:'); $dbh->func('hits','Weblog','access_log','ad_catalog'); my $hits = $dbh->selectall_arrayref( qq{ SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%' }); # ... other DBI/SQL read operations =head1 DESCRIPTION This is a plug-in format parser for the AnyData and DBD::AnyData modul +es. You can gain read access to Vhost Log Format files web server log files (e.g. NCSA or Apache) either through tied h +ashes or arrays or through SQL database queries. Fieldnames are taken from the W3 definitions found at, with the additi +on of client, referer and vhost fields http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-forma +t remotehost usernname authuser date request status bytes referer client vhost This module does not currently support writing to weblog files. Please refer to the documentation for AnyData.pm and DBD::AnyData.pm for further details. =head1 LICENCE Copyright (C)2007 Charlie Harvey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Also available on line: http://www.gnu.org/copyleft/gpl.html =cut use strict; use AnyData::Format::Base; use vars qw( @ISA $DEBUG $VERSION); @AnyData::Format::WeblogVhost::ISA = qw( AnyData::Format::Base ); $DEBUG = 0; $VERSION = '0.02'; my $vlog_re = qr/^(\S*) (\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" ( +\S*) (\S*)\s*(.*)$/; my $norm_re = qr/^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) ( +\S*)\s*(.*)$/; my $ref_client_re = qr/^"(.*?)" "(.*?)".*$/; sub new { my $class = shift; my $self = shift || {}; $self->{col_names} = 'vhost,remotehost,username,authuser,date,request,status,bytes, +client,referer'; $self->{record_sep} = "\n"; $self->{key} = 'datestamp'; $self->{keep_first_line} = 1; return bless $self, $class; } sub read_fields { my $self = shift; my $str = shift || return undef; $str =~ s/^\s+//; $str =~ s/\s+$//; return undef unless $str; my @row; if($str =~ /VLOG=-$/) { (@row) = $str =~ $vlog_re; } else { (@row) = ('', $str =~ $norm_re); } return undef unless defined $row[0]; my($referer,$client) = $row[8] =~ $ref_client_re; $client ||= ''; $referer ||= ''; ($row[8],$row[9])=($client,$referer); # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp return @row } 1;

WeblogCombined.pm

######################################################### package AnyData::Format::WeblogCombined; ######################################################### # AnyData driver for "Common Log Format" web log files # copyright (c) 2007, Charlie <charlie_ampersat_charlieharvey_fullstop +_com> ######################################################### =head1 NAME AnyData::Format::WeblogCombined - tiedhash & DBI/SQL access to HTTPD +Logs =head1 SYNOPSIS use AnyData; my $weblog = adTie( 'Weblog', $filename ); while (my $hit = each %$weblog) { print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/; } # ... other tied hash operations OR use DBI my $dbh = DBI->connect('dbi:AnyData:'); $dbh->func('hits','Weblog','access_log','ad_catalog'); my $hits = $dbh->selectall_arrayref( qq{ SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%' }); # ... other DBI/SQL read operations =head1 DESCRIPTION This is a plug-in format parser for the AnyData and DBD::AnyData modul +es. You can gain read access to Combined Log Format files web server log files (e.g. NCSA or Apache) either through tied h +ashes or arrays or through SQL database queries. Fieldnames are taken from the W3 definitions found at http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-forma +t remotehost usernname authuser date request status bytes referer client This module does not currently support writing to weblog files. Please refer to the documentation for AnyData.pm and DBD::AnyData.pm for further details. =head1 AUTHOR & COPYRIGHT (C)Copyright 2007, Charlie <charlie_ampersat_charlieharvey_fullstop_co +m> All rights reversed =cut use strict; use AnyData::Format::Base; use vars qw( @ISA $DEBUG $VERSION); @AnyData::Format::WeblogCombined::ISA = qw( AnyData::Format::Base ); $DEBUG = 0; $VERSION = '0.01'; sub new { my $class = shift; my $self = shift || {}; $self->{col_names} = 'remotehost,username,authuser,date,request,status,bytes,client +,referer'; $self->{record_sep} = "\n"; $self->{key} = 'datestamp'; $self->{keep_first_line} = 1; return bless $self, $class; } sub read_fields { print "PARSE RECORD\n" if $DEBUG; my $self = shift; my $str = shift || return undef; $str =~ s/^\s+//; $str =~ s/\s+$//; return undef unless $str; my(@row) = $str =~ /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/; return undef unless defined $row[0]; my($referer,$client) = $row[7] =~ /^"(.*?)" "(.*?)"$/; $client ||= ''; $referer ||= ''; ($row[7],$row[8])=($client,$referer); # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp return @row } 1;
--
Linux, perl, punk rock, cider: charlieharvey.org.uk.

Replies are listed 'Best First'.
Re: ALF: Apache Log Filter
by zentara (Cardinal) on Apr 02, 2007 at 09:28 UTC
    Nice demo video. I guess with the rising bandwidth everyone is getting, the demo-video will eventually replace the screenshot. What did you use to make it?

    I'm not really a human, but I play one on earth. Cogito ergo sum a bum
      Thanks zentara. I used xvidcap, which seems to do pretty much everything I wanted it to do - though I also tried Istanbul, which can only produce ogg theoras.
      --
      Linux, perl, punk rock, cider: charlieharvey.org.uk.

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: CUFP [id://607707]
Approved by valdez
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others perusing the Monastery: (3)
As of 2025-06-19 00:23 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found

    Notices?
    erzuuliAnonymous Monks are no longer allowed to use Super Search, due to an excessive use of this resource by robots.