Beefy Boxes and Bandwidth Generously Provided by pair Networks
good chemistry is complicated,
and a little bit messy -LW
 
PerlMonks  

Scripted Actions upon Page Changes

by rob_au (Abbot)
on Dec 08, 2001 at 13:37 UTC ( #130402=sourcecode: print w/ replies, xml ) Need Help??

Category: Web Stuff
Author/Contact Info rob_au
Description: This code fragment was written with the intent to minimise some of my system administration overhead by providing a script framework that allowed arbitrary scripting actions to be performed should a web page be modified or updated. The code uses LWP::UserAgent to get a web page and then should the web page have changed since the last execution of the script, as measured by the last_modified header or should this be unavailable, an MD5 digest of the page contents, execute a script subroutine or method.

Independent subroutines can be specified for different URLs, in the single example provided, the subroutine virus_alert is executed should the Symantec web page have changed since the last execution of the script.

#!/usr/bin/perl -Tw

use Carp;
use DB_File;
use Digest::MD5 qw/md5_hex/;
use Fcntl qw/:flock/;
use HTTP::Request;
use LWP::UserAgent;
use URI;

use strict;

#   %despatch_table hash
#
#   The keys of this hash specify the web page to monitor,
#   with the corresponding hash values containing references
#   to the script method or subroutine to be executed upon
#   change in the web page.
#
my %despatch_table = (
    'http://securityresponse.symantec.com/avcenter/vinfodb.html'    =>
+  'virus_alert'
);

#   Create a DBM hash containing the last modification time
#   or page MD5 hash, indexed by the page URL

my %index;
my $db = tie %index, 'DB_File', "wpmon.dbm", O_CREAT | O_RDWR, 0666;
my $fd = $db->fd();
open (DF, "+<&=$fd") || croak $!;
flock (DF, LOCK_EX) || croak $!;

#   Add any new keys within %despatch_table to the array
#   of URLs to monitor

my @urls = keys %index;
foreach my $url (keys %despatch_table) {
    push (@urls, $url) unless (exists $index{$url});
}
my $ua = LWP::UserAgent->new;
foreach my $url (sort @urls) {
    my $uri = URI->new($url);
    my $request = HTTP::Request->new('GET', $uri->canonical->as_string
+);
    my $response = $ua->request($request);
    if ($response->is_success) {

        #   Update measure for page is last_modified header if
        #   defined or failing this, an MD5 digest of the page

        my $html = $response->content;
        my $unique = $response->last_modified || md5_hex($html);
        $index{$url} = $unique unless (exists $index{$url});
        if ($index{$url} ne $unique) {

            #   Ensure that a code or method reference still exists
            #   within the %despatch_table hash - If so, execute it
            #   via an eval statement

            if (exists $despatch_table{$url}) {
                $html = quotemeta $html;
                eval qq/
                    $despatch_table{$url}("$html")
                /;
                carp $@ if $@;
            }
            $index{$url} = $unique;
        }
    }
}
undef $db;
untie %index;
close DF;

exit 0;


sub virus_alert {
    #   ... response code to changed web page within subroutines
}

Comment on Scripted Actions upon Page Changes
Download Code

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://130402]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others taking refuge in the Monastery: (4)
As of 2015-07-29 07:04 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (260 votes), past polls