#!/usr/bin/perl # -*- cperl -*- use Data::Dumper; use WWW::Mechanize; use HTML::TreeBuilder; my $email = 'username' . '@' . 'example.net'; my $pass = 'censored'; my (@substring) = @ARGV; if (scalar @substring) { print "Looking for " . @substring . " strings.\n"; } else { die "You must specify one or more strings to look for.\n"; } my $mech = WWW::Mechanize->new(); $mech->get('http://lang-8.com/login'); $mech->submit_form(form_number => 2, fields => {username => $email, password => $pass,}); my ($page, $done, @pagetosearch) = (1, 0); while (not $done) { print "Fetching page $page...\n"; $mech->get("http://lang-8.com/journals/joined?page=$page"); my $content = $mech->content(); open OUT, '>', 'tempfile.html'; print OUT Dumper($content); close OUT; my $tree = HTML::TreeBuilder->new(); $tree->parse_file('tempfile.html'); my @entry = $tree->look_down('_tag' => 'h3', "class" => 'journal_title',); my @url = map { $_->look_down('_tag' => 'a')->attr('href'); } @entry; if (scalar @url) { print " * Found " . @url . " journal entries.\n"; push @pagetosearch, @url; sleep 1; ++$page; } else { ++$done; }} for my $url (@pagetosearch) { print "Checking $url\n"; $mech->get($url); my $content = $mech->content(); for my $str (@substring) { my @match = $content =~ /([^<>]*${str}[^<>]*)/sg; print " * Found $str: $_\n" for @match; } select undef, undef, undef, 0.1; }