use strict; use warnings; use Data::Dumper; # build a hash of known patterns for each known web site my %patterns = ( 'www.foo.com' => { start => "

]*>", finish => "(?\n)
", }, 'www.bar.com' => { start => "...", finish => "...", }, ); my $html = do { local $/; }; print ExtractSection($html, 'www.foo.com', 'Section 2'), "\n\n"; print ExtractSection($html, 'www.foo.com', 'Section 1'), "\n\n"; print ExtractSection($html, 'www.foo.com', 'Section 3'), "\n\n"; # ----------------------------------------------------- sub ExtractSection { my ($html, $site, $section) = @_; my $ps = $patterns{$site}->{start}; my $pf = $patterns{$site}->{finish}; $ps =~ s//$section/; $pf =~ s//$section/; my ($text) = $html =~ /($ps.*?$pf)/sm; return $text; } __DATA__

Section 1
  • Item 1
  • Item 2
  • Item 3

    Section 2
  • Item 4
  • Item 5
  • Item 6

    Section 3
  • Item 7
  • Item 8
  • Item 9