\s*(.*?)#s;
$desc =~ s/\r//g;
# any other cleanup needed?
{
id => $id,
date => $date,
author_id => $author_id,
author_name => $author_name,
desc => decode_entities($desc),
title => decode_entities($title),
}
}
my $snips = do $snippets_data_file; # load the snippets base (ref to AoH)
if ( $get_new )
{
my %snips = map { ( $_->{'id'} => $_ ) } @$snips; # key by id
warn scalar(keys %snips)." snippets already in local cache.\n";
my $last_date = do $last_date_file;
my( $year, $mon, $mday ) = @{$last_date}{qw( year mon mday )};
# back the date up by a day or three
# sure, we could use a module for this, but that would be
# way heavier-weight than we need. Fuzzy is fine.
if ( $mday > 1 ) { $mday--; }
elsif ( $mon > 1 ) { $mon--; $mday = $mon == 2 ? 28 : 30; }
else { $year--; $mon = 12; $mday = 31; }
my $query = sprintf "yr=%04d;mo=%02d;dy=%02d", $year, $mon, $mday;
warn "Querying for snippets posted since $query ...\n";
$query = "node_id=3989;nf=0;$query;re=N;Sn;go=Search;as_user=961"; # anonymonk
my $html = get( "http://www.perlmonks.org/bare/?".$query );
$html =~ /\(searched [.0-9]+% of DB\)/
or die "Failed to get good result from Super Search!";
my $n_added = 0;
for my $tr ( $html =~ m#.*?
#sg )
{
local $_ = $tr;
s/[\r\n]+//g;
/Snippet<\/a><\/td>/ or next;
my( $y, $m, $d ) = m/ | (\d\d\d\d)?(\d\d)?(\d\d)<\/td>/;
my( $author_id, $author_name, $id, $title, ) =
m/ | (.*?)<\/a><\/td>\s* | (.*?)<\/a><\/td>/;
if ( $author_id == 52855 )
{
warn "Skipping $id, owned by $author_name\n";
next;
}
if ( $snips{$id} )
{
warn "Skipping $id, already got it.\n";
next;
}
my $r = get_one_snippet( $id, $title );
unless ( defined $r )
{
warn "Bogus! No valid snippet data returned for $id ".
qq("$title" by $author_name ($author_id)\n);
next;
}
warn "Adding new snippet $id, posted on $y-$m-$d\n";
$snips{$id} = $r;
$n_added++;
}
print $n_added ? "Added $n_added new snippets.\n" : "No new snippets found.\n";
@$snips = values %snips;
open F, ">", $snippets_data_file or die "write $snippets_data_file - $!";
print F Dumper($snips);
close F;
my @now = gmtime;
$last_date->{'year'} = $now[5] + 1900;
$last_date->{'mon'} = $now[4] + 1;
$last_date->{'mday'} = $now[3];
open F, ">", $last_date_file or die "write $last_date_file - $!";
print F Dumper($last_date);
close F;
exit 0;
}
=pod
=begin comment
typical:
{
'id' => 248201,
'title' => 'Quick and Dirty Seti@home Server Status',
'author_name' => 'Mr. Muskrat',
'author_id' => 155876,
'date' => '2003-04-04 17:14:39',
'desc' => 'Perform a quick and dirty check of the Seti@home servers.',
}
=end comment
=cut
sub header
{
my( $title, $ref_id, $ref_title ) = @_;
my( $year, $mon, $day ) = (gmtime)[5,4,3];
my $date = sprintf "%04d-%02d-%02d", $year+1900, $mon+1, $day;
<$title
$title
See $ref_title.
Generated on $date
EOF
}
sub linkId
{
my( $id, $title ) = @_;
if (0)
{
return defined $title ? "[id://$id|$title]" : "[id://$id]"
}
defined $title or $title = $id;
qq($title)
}
# by title/id
if ( $by_titles )
{
print header('PerlMonks Snippets - By Title',619683,'New Snippets Index'), "\n";
for ( sort {
$a->{'title'} cmp $b->{'title'}
or
$a->{'id'} <=> $b->{'id'}
} @$snips )
{
my $d = $_->{'date'};
$d =~ s/ .*//; # strip off time part
print
'',
linkId( $_->{'id'}, $_->{'title'} ),
' | ',
linkId( $_->{'author_id'}, $_->{'author_name'} ),
" | $d | \n"
}
print " |