Mainly it's a HTML::Parser exercise, done
during a heavy
research moment :)
The idea is very simple: randomly putting
together texts and images from your browser's
cache, you could get a snapshot of your
(or someone else's) behaviours. Like a statistician
in your garbage :)
I grabbed the idea from an old DOMUS issue, but I've lost
the original URL.
Update: I'm thinking to use it as a permanent installation in an Internet Cafe'. Two monitors: one used by surfer, and one connected to a box that automatically refresh a page generated by this script.
#!/usr/bin/perl
use strict;
# Digs in your browser's cache
# like a statistician in your trashcan...
package Lurker;
use File::Find;
my $cache = {
IMAGES => [],
DOCS => [],
};
sub lurk
{
my $dir = shift;
print STDERR "Reading cache...";
find(
sub
{
for ( $File::Find::name ) {
/\.gif$/ ||
/\.png$/ ||
/\.jpg$/ && push @{ $cache->{ IMAGES }}, $_;
/\.html$/ && push @{ $cache->{ DOCS }}, $_;
}
}, $dir
);
print STDERR "OK!\n";
}
sub pick_random
{
my $what = shift;
my $n = scalar( @{$cache->{ $what }} );
return ${$cache->{ $what }}[ rand $n ];
}
package My_HTML_Parser;
use base 'HTML::Parser';
sub start
{
my $self = shift;
my ($tag, $attr, $attrseq, $origtext) = @_;
my ($orig_src, $new_src);
if ($tag eq 'img') {
$orig_src = $attr->{'src'};
$new_src = Lurker::pick_random( 'IMAGES' );
$origtext =~ s/$orig_src/$new_src/;
}
print $origtext;
}
sub text
{
my $self = shift;
my ($text) = @_;
print $text;
}
sub end
{
my $self = shift;
my ($tag) = @_;
print "</$tag>";
}
package main;
my $cache_directory = '/home/stefano/.netscape/cache';
Lurker::lurk( $cache_directory );
my $doc = Lurker::pick_random('DOCS');
print STDERR "Now parsing $doc...\n";
my $a = new My_HTML_Parser;
$a->parse_file( $doc );