#! perl -w
use HTML::Parser;
use LWP::Simple;
use URI::Escape;
use strict;
my $fn = shift;
$fn || die "supply a url to parse\n";
my $f = get ($fn) || die "unable to get $fn\n";
HTML::Parser->new(
default_h => [\&check_attrs, 'text, tagname, attr'],
)->parse($f ) || die $!;
sub check_attrs {
my @forbiddenprotos= qw(javascript mocha data);
my $line = shift;
my $tagname = shift;
return unless $tagname;
$tagname = uri_unescape($tagname);
print "found script tag.\n\t$line\n" if $tagname eq "script";
my $attr = shift;
my $attrs = uri_unescape(join " ", keys %$attr);
my $attrvals = uri_unescape(join " ", values %$attr);
print "events $1 found.\n\t$line\n" if $attrs=~/\b(on\w+)/;
foreach (@forbiddenprotos) {
print "$_ protocol found.\n\t$line\n" if $attrvals=~/\b$_:/;
}
print "javascript entity found.\n\t$line\n" if $attrvals=~/\&\{/;
}