#!/usr/bin/perl -w

use strict;
use warnings;

use LWP::Simple;
use Perl6::Slurp;             # to load the page from the cache
use HTML::TreeBuilder::XPath; # easier to use than bare HTML::TreeBuilder

# during development we don't want to hit the real page, 
# so we'll have a -c switch to use a cache 
use Getopt::Std;
my %opt;
getopts( 'c', \%opt); # if called with -c then $opt{c} is true

my $base='http://www.costacrociere.it';
my $url='/it/lista_crociere/capitali_nord_europa-201206.html';
my $cache= 'capitali_nord_europa-201206.html';

# this will get rid of the bad characters you were seeing in the output
binmode( STDOUT, ':utf8');

if( ! $opt{c}) { getstore( $base.$url, $cache); } # only get the live page without -c
my $page= slurp '<:utf8', $cache;

my $p = HTML::TreeBuilder::XPath->new_from_content( $page );

my @trips= $p->findnodes( '//div[@class="info-cruise"]');
foreach my $trip (@trips){
   my $title = $trip->findvalue( './/div[@class="sx"]/h3');
   print "$title\n";

   my $price = $trip->findvalue( './/span[@class="new-price"]');
   print "price: $price\n";

   # this is very brittle, but it gives you a base on which you can build
   foreach my $info ( $trip->findnodes( './/p[@class="itinerari-info"]//span[@class != "note" and @class != "strike"]'))
     { 
       my $info_title= $info->findnodes( './b')->[0];
       print $info_title->as_text();
       $info_title->detach;
       my $info_value= $info->as_text;
       print ": ", $info_value, "\n";
    }
  print "\n";
       
}