#!/usr/bin/perl --
use strict; use warnings;
use Data::Dump;
use URI;
use Web::Scraper;


my $soy = scraper {
## only get leafs/twigs with this @class
## store the results into  { info => \@info }
    process '.info-cruise' => 'info[]' => scraper {
        process './/div[@class="sx"]/h3' => 'title'  => 'TEXT';
        process '.new-price'             => 'price'  => 'TEXT';
        process '.itinerari-info'        => 'span[]' => scraper {

#~             process '//span' => 'span[]' => 'RAW'; ## this
            process '//span/b | //span/child::text()' => 'span[]' => sub {
                my $ishtml   = $_[0]->isa('HTML::Element');
                my $keyOrVal = $ishtml ? 'key' : 'val';
                my %foo      = ( $keyOrVal => $_[0]->getValue );
                $foo{raw} = $_[0]->as_XML if $ishtml;
                return \%foo;
            };
        };
    };
};

## NOTE Web::Scraper wants URI objects
my $url = URI->new('file:capitali_nord_europa-201206.html');
my $base='http://www.costacrociere.it';
my $ret = $soy->scrape( $url , $base );

#~ dd $ret;
dd $ret->{info}->[0];

__END__
{
  price => "\x{20AC} 510,00",
  span  => [
             {
               span => [
                 { key => " Itinerario ", raw => "<b> Itinerario </b>\n" },
                 { val => " Danimarca, fiordi norvegesi, Germania" },
                 { val => " " },
                 { key => "Data partenza", raw => "<b>Data partenza</b>\n" },
                 { val => " 17\xA0giugno\xA02012 " },
                 { key => " Nave ", raw => "<b> Nave </b>\n" },
                 { val => " Costa Fortuna" },
                 {
                   key => " N.ro giorni crociera \xA0 ",
                   raw => "<b> N.ro giorni crociera \xA0 </b>\n",
                 },
                 { val => " 7" },
                 { key => " Porto di partenza ", raw => "<b> Porto di partenza </b>\n" },
                 { val => " Copenhagen" },
                 {
                   key => " Documenti di viaggio ",
                   raw => "<b> <a href=\"http://www.costacrociere.it/B2C/I/Before_you_go/documentation/travel.htm\" target=\"_blank\">Documenti di viaggio</a> </b>\n",
                 },
                 {
                   val => " Passaporto\xA0o\xA0Carta d'identit\xE0 valida per l'espatrio",
                 },
                 { val => " Possono essere disponibili le seguenti tariffe " },
               ],
             },
           ],
  title => "Le terre dei vichinghi",
}