This example uses HTML::TokeParser to locate all of the META tags in a document.
#!/usr/bin/perl -w
use strict;
use HTML::TokeParser;
# get the file name to parse from the command line
my $html = shift or die 'Specify a file name';
# access the file with the parser
my $p = HTML::TokeParser->new($html) or die $!;
# loop through all tokens
while (my $r = $p->get_token) {
# $r->[0] tells us if it's a Start tag, End tag, Text, Comment
# for start tags, $r->[1] tells us the type of tag
# only process META start tags
next unless $r->[0] eq 'S' && $r->[1] eq 'meta';
print "Found <meta> tag\n";
# $r->[2] is a hash ref containing attributes and values
while (my ($k,$v) = each %{$r->[2]}) {
print "\t$k = $v\n";
}
}
Here's the result from running it against an html file on my system.
$ perl meta.pl ~/public_html/cvsbook.html
Found <meta> tag
content = text/html
http-equiv = Content-Type
Found <meta> tag
content = Open Source Development With CVS
name = description
Found <meta> tag
content = makeinfo 4.0
name = generator
---
print map { my ($m)=1<<hex($_)&11?' ':'';
$m.=substr('AHJPacehklnorstu',hex($_),1) }
split //,'2fde0abe76c36c914586c';