Beefy Boxes and Bandwidth Generously Provided by pair Networks
Don't ask to ask, just ask
 
PerlMonks  

Scripted Actions upon Page Changes

by rob_au (Abbot)
on Dec 08, 2001 at 13:37 UTC ( [id://130402]=sourcecode: print w/replies, xml ) Need Help??
Category: Web Stuff
Author/Contact Info rob_au
Description: This code fragment was written with the intent to minimise some of my system administration overhead by providing a script framework that allowed arbitrary scripting actions to be performed should a web page be modified or updated. The code uses LWP::UserAgent to get a web page and then should the web page have changed since the last execution of the script, as measured by the last_modified header or should this be unavailable, an MD5 digest of the page contents, execute a script subroutine or method.

Independent subroutines can be specified for different URLs, in the single example provided, the subroutine virus_alert is executed should the Symantec web page have changed since the last execution of the script.

#!/usr/bin/perl -Tw

use Carp;
use DB_File;
use Digest::MD5 qw/md5_hex/;
use Fcntl qw/:flock/;
use HTTP::Request;
use LWP::UserAgent;
use URI;

use strict;

#   %despatch_table hash
#
#   The keys of this hash specify the web page to monitor,
#   with the corresponding hash values containing references
#   to the script method or subroutine to be executed upon
#   change in the web page.
#
my %despatch_table = (
    'http://securityresponse.symantec.com/avcenter/vinfodb.html'    =>
+  'virus_alert'
);

#   Create a DBM hash containing the last modification time
#   or page MD5 hash, indexed by the page URL

my %index;
my $db = tie %index, 'DB_File', "wpmon.dbm", O_CREAT | O_RDWR, 0666;
my $fd = $db->fd();
open (DF, "+<&=$fd") || croak $!;
flock (DF, LOCK_EX) || croak $!;

#   Add any new keys within %despatch_table to the array
#   of URLs to monitor

my @urls = keys %index;
foreach my $url (keys %despatch_table) {
    push (@urls, $url) unless (exists $index{$url});
}
my $ua = LWP::UserAgent->new;
foreach my $url (sort @urls) {
    my $uri = URI->new($url);
    my $request = HTTP::Request->new('GET', $uri->canonical->as_string
+);
    my $response = $ua->request($request);
    if ($response->is_success) {

        #   Update measure for page is last_modified header if
        #   defined or failing this, an MD5 digest of the page

        my $html = $response->content;
        my $unique = $response->last_modified || md5_hex($html);
        $index{$url} = $unique unless (exists $index{$url});
        if ($index{$url} ne $unique) {

            #   Ensure that a code or method reference still exists
            #   within the %despatch_table hash - If so, execute it
            #   via an eval statement

            if (exists $despatch_table{$url}) {
                $html = quotemeta $html;
                eval qq/
                    $despatch_table{$url}("$html")
                /;
                carp $@ if $@;
            }
            $index{$url} = $unique;
        }
    }
}
undef $db;
untie %index;
close DF;

exit 0;


sub virus_alert {
    #   ... response code to changed web page within subroutines
}

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: sourcecode [id://130402]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others pondering the Monastery: (5)
As of 2024-03-19 09:59 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found