Category: | Web Stuff |
Author/Contact Info | lshatzer |
Description: | This will detect if it is a URL, file, or html and pass it to HTML::TokeParser, and returns the HTML::TokeParser object. (This was my first venture into inheritance.)
Updated: Changed a few things from Amoe's suggestions. |
package HTML::TokeParser::Smart;
require 5.006;
use strict;
use warnings;
use Carp;
use LWP;
use base 'HTML::TokeParser';
our $VERSION = '0.2';
sub new {
my $proto = shift;
my $class = ref($proto) || $proto;
my $url = shift;
my $self;
if (-e $url) {
# It's a file!
$self = HTML::TokeParser->new($url);
}
elsif ($url =~ m/^https?|^ftp|^file/) {
# It's a URL!
my $browser = LWP::UserAgent->new;
my $req = $browser->request(HTTP::Request->new(GET=>$url));
croak "Unable to get webpage: $url ", $req->status_line unless $re
+q->is_success;
$self = HTML::TokeParser->new($req->content_ref);
}
elsif ($url =~ m/<[^>]+>/) {
# It's HTML!
$self = HTML::TokeParser->new(\$url);
}
else {
croak "'$url' is neither a valid URL, file, or HTML.";
}
bless ($self, $class);
return $self;
}
1;
|
|
---|
Replies are listed 'Best First'. | |
---|---|
Re: HTML::TokeParser::Smart
by Amoe (Friar) on Apr 09, 2002 at 10:22 UTC | |
by lshatzer (Friar) on Apr 09, 2002 at 13:15 UTC | |
by Anonymous Monk on Jul 29, 2005 at 16:56 UTC |
Back to
Code Catacombs