<?xml version="1.0" encoding="windows-1252"?>
<node id="558953" title="Re^2: Google Earth Monks" created="2006-07-03 04:19:22" updated="2006-07-03 00:19:22">
<type id="11">
note</type>
<author id="483556">
McDarren</author>
<data>
<field name="doctext">
Thanks :)

&lt;p&gt;Actually, I've never used [cpan://WWW::Mechanize], so it didn't occur to me to try that. The routine I use for scraping the data from the Monk homenodes is given below. I think the main performance hit is the fact that I need to issue a separate request for each Monk. Ideally, it would be good to be able to grab all this information in a single go. But I'm not aware of any way that this is currently possible.

&lt;code&gt;
sub get_monk_stats {
    my $ref = shift;
    my $monk_url = 'http://www.perlmonks.org/?node_id=';

    my %monk_fields = (
        'User since:'   =&gt; 1,
        'Last here:'    =&gt; 1,
        'Experience:'   =&gt; 1,
        'Level:'        =&gt; 1,
        'Writeups:'     =&gt; 1,
    );

    MONK:
    foreach my $id (keys %{$ref}) {
        print "Getting data for $ref-&gt;{$id}{name} ($id)\n";
        my $ua = LWP::UserAgent-&gt;new();
        my $req = HTTP::Request-&gt;new(GET=&gt;"$monk_url$id");
        my $result = $ua-&gt;request($req);
        next MONK if !$result-&gt;is_success;
        my $content = $result-&gt;content;

        my $p = HTML::TokeParser-&gt;new(\$content);

        while (my $tag = $p-&gt;get_tag("td")) {
            my $text = $p-&gt;get_trimmed_text("/td");
            if ($monk_fields{$text}) {
                $p-&gt;get_tag("td");
                $ref-&gt;{$id}{$text} = $p-&gt;get_trimmed_text("/td");
            }
        }
    }
    return $ref;
}
&lt;/code&gt;</field>
<field name="root_node">
558846</field>
<field name="parent_node">
558944</field>
</data>
</node>
