<?xml version="1.0" encoding="windows-1252"?>
<node id="891181" title="Re^2: Help With Online Table Scraper" created="2011-03-03 04:40:47" updated="2011-03-03 04:40:47">
<type id="11">
note</type>
<author id="961">
Anonymous Monk</author>
<data>
<field name="doctext">
Sinistral is right, use a documented API whenever available, scraping is a fragile PITA :)
&lt;readmore&gt;&lt;C&gt;#!/usr/bin/perl --
use strict;
use warnings;

use Data::Dumper;
use Web::Scraper;

use File::Slurp qw' read_file ';
use URI;


Main( @ARGV );
exit( 0 );

sub Main {
    my $uri = URI-&gt;new('http://finance.yahoo.com/q/ks?s=MNDO+Key+Statistics');
    if(@_){
        my $html_content = read_file( @_ );
        ScrapePrint(\$html_content, $uri );
    } else {    
        ScrapePrint( $uri );
    }
}


sub ScrapePrint {
    my $scraper =  scraper {
        process
            q~//table[@id='yfncsumtab']/tr/td/table[8]//tr//tr~,
            'Balance Sheet[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },
        process
#~             q~//table[@id='yfncsumtab']/tr/td/table[9]//tr//tr~,
            q~//table[@id='yfncsumtab']//table[9]//table//tr~,
            'Cash Flow Statement[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },
        process
            q~//table[@id='yfncsumtab']/tr[2]/td[3]/table[4]//tr//tr~,
            'Dividends &amp; Splits[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },
        process
            q~//table[@id='yfncsumtab']/tr/td/table[4]//tr//tr~,
            'Fiscal Year[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },
        process
            q~//table[@id='yfncsumtab']/tr/td/table[7]//tr//tr~,
            'Income Statement[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '//tr/td[2]', 'value.', 'TEXT';
            },
        process
            q~//table[@id='yfncsumtab']/tr/td/table[6]//tr//tr~,
            'Management Effectiveness[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },
        process
            q~//table[@id='yfncsumtab']/tr/td/table[5]//tr//tr~,
            'Profitability[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },
        process
            q~//table[@id='yfncsumtab']/tr/td[3]/table[3]//tr//tr~,
            'Share Statistics[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },

        process
            q~//table[@id='yfncsumtab']/tr[2]/td[3]/table[2]/tr//tr~,
            'Stock Price History[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },
        process
            q~//table[@id='yfncsumtab']/tr[2]/td/table[2]/tr/td/table/tr~,
            'Valuation Measures[]' =&gt; scraper {
                process '/tr/td[1]', 'key', 'TEXT';
                process '/tr/td[2]', 'value', 'TEXT';
            },
    };


    my $res = $scraper-&gt;scrape( @_ );
    print Data::Dumper-&gt;new([ $res ])-&gt;Sortkeys(1)-&gt;Dump;
}


__END__

$VAR1 = {

...

'Cash Flow Statement' =&gt; [
                         {
                           'key' =&gt; 'Cash Flow Statement'
                         },
                         {
                           'key' =&gt; 'Operating Cash Flow (ttm):',
                           'value' =&gt; '6.30M'
                         },
                         {
                           'key' =&gt; 'Levered Free Cash Flow (ttm):',
                           'value' =&gt; '3.66M'
                         }
                       ],
                                   

...
&lt;/c&gt;&lt;/readmore&gt;
Come to think of it, Web::Scraper might be also be a bit of a PITA, but I've only studied the trivial examples, not the [http://deps.cpantesters.org/depended-on-by.pl?dist=Web-Scraper|others]</field>
<field name="root_node">
890901</field>
<field name="parent_node">
890907</field>
<field name="reputation">
0</field>
</data>
</node>
