use strict;
use warnings;
use Data::Dumper;
# build a hash of known patterns for each known web site
my %patterns = (
'www.foo.com' => {
start => "
]*>",
finish => "(?\n)
",
},
'www.bar.com' => {
start => "...",
finish => "...",
},
);
my $html = do { local $/; };
print ExtractSection($html, 'www.foo.com', 'Section 2'), "\n\n";
print ExtractSection($html, 'www.foo.com', 'Section 1'), "\n\n";
print ExtractSection($html, 'www.foo.com', 'Section 3'), "\n\n";
# -----------------------------------------------------
sub ExtractSection
{
my ($html, $site, $section) = @_;
my $ps = $patterns{$site}->{start};
my $pf = $patterns{$site}->{finish};
$ps =~ s//$section/;
$pf =~ s//$section/;
my ($text) = $html =~ /($ps.*?$pf)/sm;
return $text;
}
__DATA__
Section 1
Item 1
Item 2
Item 3
Section 2
Item 4
Item 5
Item 6
Section 3
Item 7
Item 8
Item 9