Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl: the Markov chain saw
 
PerlMonks  

CB60

by demerphq (Chancellor)
on Feb 17, 2006 at 20:56 UTC ( #531067=sourcecode: print w/ replies, xml ) Need Help??

Category: PerlMonks Related Scripts
Author/Contact Info Copyright demerphq -- Released under the Perl Artistic License.
Description:

A lightweight CB Mirror similar to cblast35.

No DB needed at all. Uses the 'modern' xml feed so it can provide properly parsed output.

Set up mini-cb60-fetch.pl on cron job (--sleeptime=0) or as an always running daemon. Set up mini-cb-render.pl as a CGI script for access.

If the CGI parameter 'domain' is provided and is one of 'org','com' or 'net' then that domain will be used for the PM specific links.

This is just a proof of concept for an incremental, low load, PM-parsed CBlast 60.

Update:

  • v0.01001: Removed superfluous use of DDS (hangover from initial dev). Added $VERSION to the render code. Trivial changes.
  • v0.02: Added cookie support, better defaults. Other minor changes.


Arunbear has kindly made this available online. Many thanks to Arunbear for doing so.
Version 0.02 as posted below is running on http://mini-cb60.datenzoo.de. Thanks to corion for support in getting this set up. The latest version will be available at this site until further notice.

mini-cb60-fetch.pl
#!/usr/bin/perl
#mini-cb60-fetch.pl
use strict;
use warnings;
use vars qw/$VERSION/;
use XML::Simple;
use File::Path;
use LWP::UserAgent;
use HTTP::Cookies;
use Getopt::Long;
use HTTP::Request::Common qw(POST);
use Data::Dumper;

# This is free Software.
# Released under the same terms as Perl.
# Copyright Yves (demerphq) 2006

$VERSION=0.02;

my $ua = LWP::UserAgent->new();
$ua->timeout(10);
$ua->agent("Mini-CB60 $VERSION ");
$ua->env_proxy;

$|++;
my ( $verbose, $lastid, $sleeptime )= (1, 0, 0);
my $site_url= 'http://perlmonks.org/index.pl';
my $ticker_url= $site_url.'?node_id=207304;xmlstyle=modern';
(my $dir= $0) =~ s![^\\\/]+$!messages/!;

my ($user,$pass)= ('','');

GetOptions(
    'msgdir=s'    => \$dir,
    'verbose=i'   => \$verbose,
    'sleeptime=i' => \$sleeptime,
    'lastid=i'    => \$lastid,
    'user=s'      => \$user,
    'pass=s'      => \$pass,
) or die <<EOFUSAGE;
mini-cb60-fetch.pl --msgdir=PATH --verbose=INT --sleeptime=SECS
                   --user=USER --pass=PASS
--sleeptime of 0 means a single fetch only. Otherwise never halts.
Defaults to --sleeptime of 0 seconds, and ./messages for --msgdir.
EOFUSAGE

if ($sleeptime && $sleeptime < 60) {
    warn "Theres no need to fetch more often than every 60 seconds!\n"
+;
}

if ( $user && $pass ) {
    my $req = POST $site_url, [
        op     => 'login',
        node_id=> 109, # login
        user   => $user,
        passwd => $pass,
        ticker =>'yes',
        displaytype=>'xml',
        xmlstyle=>'flat',
    ];
    my $jar=HTTP::Cookies->new(file => "$0.cookies", autosave => 1);
    $ua->cookie_jar($jar);
    my $response=$ua->request($req);
    die "Failed to login\n",$response->as_string
        if ! $response->is_success;

    my $xml=XMLin( $response->content );
    if (!$xml->{loggedin} ||
               $xml->{loggedin}{username} ne $user
    ) {
        die "Failed to login."
    }
    print "Logged in as $_->{username} ($_->{user_id})\n"
        for $xml->{loggedin};
} elsif ( $user || $pass ) {
    die "Must have both a username and a password to log in\n";
}

# cleanup the path for win32 users, just for aesthetic reasons
$dir =~ s![\\\/]+!\\!g
    if $^O=~/Win32/;

if ( ! -d $dir ) {
    mkpath $dir
        or die "Failed to create '$dir'";
}

if ($verbose) {
    print "Base Url: '$site_url'\n";
    print "Message directory is: '$dir'\n"
}
chdir $dir
    or die "Failed to chdir to '$dir':$!";

while ( 1 ) {
    # first we delete any old messages
    my $threshold= sprintf "%08x-%08x.msg",time() - 3600, 0;
    print "Threshold file is $threshold\n" if $verbose>1;
    my @files=sort glob "*.msg";
    foreach my $f (@files) {
        print "Found $f\n" if $verbose>1;
        next if $f ge $threshold;
        print "Unlinking $f\n" if $verbose;
        unlink $f or warn "Failed to unlink '$f':$!";
    }
    # and try to autodetect where we left off if this is the first run
    if ( ! $lastid && @files && -e $files[-1] ) {
        my ($t,$id)=split/[-.]/,$files[-1];
        $lastid= hex($id) if $lastid < hex($id);
    }

    # and now we do the fetch
    my $url= $ticker_url;
    $url .= ";fromid=$lastid" if $lastid;

    print "Fetching from id $lastid\n" if $verbose>1;
    my $response= $ua->get($url);
    if (!$response->is_success) {
        print "Fetch '$url' failed!\n";
        next;
    }

    # and now we process the results
    my $xml= XMLin( $response->content,
                    ForceArray=>['message'] );
    print Dumper($xml) if $verbose>2;

    my $msgs= $xml->{message}||[];
    my $count= $xml->{info}{count};
    $lastid= $xml->{info}{lastid}
        if $count;
    print "Got $count messages -- Last id: $lastid\n"
        if $verbose;
    next unless $count && $msgs && @$msgs;

    # each message is written as its own file.
    foreach my $msg (@$msgs) {
        my ($id,$epoch)= @{$msg}{qw(message_id createdepoch)};
        my $authlink= sprintf '<a href="?node_id=%d">%s</a></i>',
            $msg->{author_user},$msg->{author};
        (my $text= $msg->{parsed})=~s{^/me(\s+.+)?$}{<i>$authlink$1</i
+>};

        my $file= sprintf "%08x-%08x.msg",$epoch,$id;
        next if -e $file;
        open my $fh,">",$file
            or die "Failed to open '$file' for writing:$!";
        print "writing $file\n" if $verbose;
        print $fh "0\n"; # version
        print $fh join "\n",
              "<dt>$authlink&nbsp; &nbsp; <small>$msg->{createdgmtime}
+ GMT</small></dt>",
              "<dd>$text</dd>",
              "";
        close $fh;
    }
} continue {
    exit(0) if $sleeptime <= 0; # exit out if this is a single pass
    print "sleeping for $sleeptime seconds....\n" if $verbose>1;
    sleep $sleeptime;
}

mini-cb60-render.pl
#!/usr/bin/perl
#mini-cb60-render.pl
# This is free Software.
# Released under the same terms as Perl.
# Copyright Yves (demerphq) 2006

use strict;
use warnings;
use CGI ();
use Getopt::Long;
use vars qw/$VERSION/;
$VERSION= 0.02;

############### SET THESE AS APPROPRIATE ######################
my $SITE_DOMAIN= "mini-cb60.flux8.com";
my $SITE_PATH= "/";
my $PM_HOST_USERNAME= "demerphq";
my $PM_HOST_USERID= 108447;
############### OR SET FROM COMMANDLINE #######################

$CGI::POST_MAX= 1024;       # no need for more than 1k.
$CGI::DISABLE_UPLOADS = 1;  # no uploads
my $do_header= $ENV{REQUEST_METHOD} ? 1 : 0; # Default based on if run
+ from shell
my $q= CGI->new();
my @cookies;

sub get_user_param {
    my ( $name, @legal )= @_;
    my %screen= map { lc($_) => $_ } @legal;
    my $cval= $screen{lc($q->cookie($name))};
    my $pval= $screen{lc($q->param($name))};
    my $ret= $pval || $cval || $legal[0];
    if ($do_header && ($ret ne $legal[0] || ($cval && $cval ne $ret)))
+ {
        push @cookies,
            $q->cookie(
                -name => $name,
                -value => $ret,
                -expires => '+10y',
                $SITE_PATH ?  (-path => $SITE_PATH) : (),
                $SITE_DOMAIN ? (-domain => $SITE_DOMAIN) : (),
            );
    }
    return $ret;
}

(my $dir= $0) =~ s![^\\\/]+$!messages/!;
GetOptions(
    'msgdir=s'    => \$dir,
    'header!'     => \$do_header,
    'domain=s'    => sub { $q->param(@_) },
    'site_domain' => \$SITE_DOMAIN,
    'site_path'   => \$SITE_PATH,
    'hoster=s'    => \$PM_HOST_USERNAME,
    'hosterid=i'  => \$PM_HOST_USERID,
    'reverse!'    => sub { $q->param(@_) },
) or die <<EOFUSAGE;
mini-cb60-render.pl --msgdir=PATH --[no]header
                    --domain=(org|com|net)
                    --hoster=MONKNAME --hosterid=MONKID
EOFUSAGE

my @domains=qw(org com net);
my $domain= get_user_param('domain',@domains);
my $order= get_user_param('order',qw(desc asc));

chdir $dir;

print $do_header
        ? $q->header(@cookies ? ( -cookie => \@cookies ) : ())
        : "",
    $q->start_html(
        -title=> 'Perlmonks Mini-CB60',
        -xbase=> "http://perlmonks.$domain/index.pl",
        -meta=> { 'keywords'=>"Perlmonks Mini-CB60",},
        -head=>$q->Link({
            -rel=>'icon',
            -href=>"/favicon.ico"}
        ),
        -style=> { -code => "dt { background-color:#ddd }" },
    ),
    $q->center(
        $q->h1(
            $q->a( { href => '?node_id=131' },
                "Perlmonks")
          . " "
          . $q->a( { href => "http://$SITE_DOMAIN$SITE_PATH" },
                "Mini-CB60")
        )
    ),
    $q->start_dl();

my $threshold= sprintf "%08x-%08x.msg",time() - 3600, 0;
my @files=sort glob "*.msg";
@files=reverse @files if $order eq 'desc';
foreach my $file (@files) {
    if ($file lt $threshold) {
        unlink $file;
        next;
    }
    open my $fh,"<",$file or die "Failed to read '$file' :$!";
    chomp(my $version=<$fh>);
    if ( $version == 0 ) {
        print <$fh>;
    } else {
        die "Error '$file' is of an unknown version: '$version'";
    }
}

my $hostspec="";
if ( $PM_HOST_USERNAME ) {
    $hostspec.= "<br/>Hosted by:" .
        ( $PM_HOST_USERID
          ?  $q->a(
                { href => "?node_id=$PM_HOST_USERID" },
                $PM_HOST_USERNAME
            )
          : $PM_HOST_USERNAME
        )
}
my @form=!$do_header ? () : (
      $q->hr,
      $q->start_center,
      $q->start_form(-action=>"http://$SITE_DOMAIN"),
      "Preferred Domain for Perlmonks: ",
      $q->radio_group(
        -name=>'domain',
        -values=>\@domains,
        -default=>$domain,
        -force=>1,
      ),
      $q->br,
      "Order messages: ",
      $q->radio_group(
        -name=>'order',
        -values=>['desc','asc'],
        -default=>$order,
        -force=>1,
        -labels=>{ asc => 'Oldest First',
                   desc => 'Newest First'}
      ),
      $q->br,
      $q->submit(-name=>'stumbit',
                 -value=>'Update Preferences'),
      $q->endform(),
      $q->end_center,
);

print $q->end_dl(),
      @form,
      $q->hr,
      $q->p( { align => 'right' },
            $q->small(
                  $q->a({href=>'?node_id=531067'},"Version $VERSION")
                . "<br/>Coding By "
                . $q->a({ href => '?node_id=108447' },'demerphq')
                . $hostspec
            )
      ),
      $q->end_html(),
      "\n";

Comment on CB60
Select or Download Code
Re: CB60
by jdporter (Canon) on Mar 30, 2006 at 15:59 UTC

    Awesome, of course. I have two suggestions for changes that will make it easier to use it programmatically:

    1. For the form, make it use GET rather than POST
    2. Provide a 'render as xml' option

    We're building the house of the future together.
Re: CB60
by jdporter (Canon) on Jul 13, 2006 at 16:07 UTC

    I'd like to submit a patch for mini-cb60-fetch.pl.

    In line 150: s/<dt>/<dt id=$id>/.

    Thanks!

    We're building the house of the future together.

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://531067]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others imbibing at the Monastery: (7)
As of 2014-08-30 23:30 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The best computer themed movie is:











    Results (294 votes), past polls