package HTML::TokeParser::Smart; require 5.006; use strict; use warnings; use Carp; use LWP; use base 'HTML::TokeParser'; our $VERSION = '0.2'; sub new { my $proto = shift; my $class = ref($proto) || $proto; my $url = shift; my $self; if (-e $url) { # It's a file! $self = HTML::TokeParser->new($url); } elsif ($url =~ m/^https?|^ftp|^file/) { # It's a URL! my $browser = LWP::UserAgent->new; my $req = $browser->request(HTTP::Request->new(GET=>$url)); croak "Unable to get webpage: $url ", $req->status_line unless $req->is_success; $self = HTML::TokeParser->new($req->content_ref); } elsif ($url =~ m/<[^>]+>/) { # It's HTML! $self = HTML::TokeParser->new(\$url); } else { croak "'$url' is neither a valid URL, file, or HTML."; } bless ($self, $class); return $self; } 1;