Hi,
I picked up Spidering Hacks and I've had a problem with hack #19.
I keep getting the errors:
"my" variable @perlbooks masks earlier declaration in same scope at treebuilder.pl line 50.
Bareword "parent" not allowed while "strict subs" in use at treebuilder.pl line 15.
syntax error at treebuilder.pl line 34, near ")
Here is the script:
#!/usr/bin/perl
use strict;
use LWP::Simple;
use HTML::TreeBuilder;
my $url = 'http:www.oreilly.com/catalog/prindex.html';
my $page = get( $url ) or die $!;
my $p = HTML::TreeBuilder->new_from_content( $page );
my @links = $p->look_down(
_tag => 'a',
href => qr{^ \Qhttp://www.oreilly.com/catalog/\E \w+$}x
);
my @rows = map { $_->parent-parent } @links;
my @books;
for my $row (@rows) {
my %book;
my @cells = $row->look_down( _tag => 'td' );
$book{title} = $cells[0]->as_trimmed_text;
$book{isbn} = $cells[1]->as_trimmed_text;
$book{price} = $cells[2]->as_trimmed_text;
$book{price} =~ s/^\$//;
$book{url} = get_url( $cells[0] );
$book{safari} = get_url( $cells[3] );
$book{examples} = get_url( $cells[4] );
push @books, \%book;
}
sub get_url {
my $node = shift;
my @hrefs = $node->look_down( _tag => 'a' )
return unless @hrefs;
my $url = $hrefs[0]->attr('href');
$url =~ s/\s+$//;
return $url;
}
$p = $p->delete;
{
my $count = 1;
my @perlbooks = sort { $a->{price} <=> $b->{price} }
grep { $_->{title} =~ /perl/i } @books;
print $count++, "\t", $_->{price}, "\t", $_->{title} for @perl
+books;
}
{
my @perlbooks = grep { $_->{title} =~ /perl/i } @books;
my @javabooks = grep { $_->{title} =~ /java/i } @books;
my $diff = @javabooks - @perlbooks;
print "There are " .@perlbooks." Perl books and ".@javabooks.
" Java books. $diff more java than Perl."
}