#!/usr/bin/perl use HTML::LinkExtor; use Getopt::Std ; getopts('b:t:'); @ARGV = '-' unless @ARGV; for my $file (@ARGV) { extract($file); } sub extract { my $file = shift; unless (open F, "< $file") { warn "Couldn't open file $file: $!; skipping\n"; return; } my $p = HTML::LinkExtor->new(undef, $opt_b); while (read F, my $buf, 8192) { $p->parse($buf); } for my $ln ($p->links) { my @ln = @$ln; my $tag = shift @ln; next if $opt_t && lc($opt_t) ne lc($tag); while (@ln) { shift @ln; my $url = shift @ln; print $url, "\n" unless $seen{$url}++; } } }