Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl-Sensitive Sunglasses
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??
#!perl use strict; use warnings; use Cache::FileCache; # use Data::Dump::Streamer; use DBM::Deep; use HTML::FormatText; use Mail::Sendmail 'sendmail'; use Params::Validate ':all'; use WWW::Mechanize; use XML::RSS; use vars ( '$DB', '$CACHE', '%CONFIG' ); main( sendmail => { smtp => '...', From => '...', To => '...' } ); exit; sub main { my %p = @_; initialize( %p ); do_jobs_perl_org( category => 'MN perl', url => 'http://jobs.perl.org/rss/standard.rss?li +mit=20', constraints => [ sub { /minnesota/i } ] ); do_jobs_perl_org( category => 'Tele perl', url => 'http://jobs.perl.org/rss/telecommute.rss +?limit=20' ); do_jobs_perl_org( category => 'Domino perl', url => 'http://jobs.perl.org/rss/standard.rss?li +mit=20', constraints => [ sub { /\b(?:domino|lotus|notes) +\b/i } ] ); # do_mn_job_bank( category => 'MN perl', # constraints => [ sub { /\bperl\b/i } ], # username => '...', # password => '...', # miles => 25, # zip => ..., # keyword => 'perl' ); #do_mn_job_bank( category => 'MN Domino', # constraints => [ sub { /\b(?:domino|lotus\s+notes)\b/i + } ], # username => '...', # password => '...' ); return; } sub initialize { my %p = validate( @_, { test => { type => SCALAR, optional => 1, default => 0 }, cache_namespace => { type => SCALAR, optional => 1, default => '1 week' }, cache_autopurge_interval => { type => SCALAR, optional => 1, default => '1 ho +ur' }, db_filename => { type => SCALAR, optional => 1, default => "$ENV{HOME}/.jobwa +tcher.db" }, wget_expiration => { type => SCALAR, optional => 1, default => '1 hour' }, sendmail => { type => HASHREF } } ); $DB = DBM::Deep->new( $p{db_filename} ); $CACHE = Cache::FileCache->new( { namespace => delete $p{cache_nam +espace}, autopurge_interval => delete $p{ +cache_autopurge_interval} } ); @CONFIG{keys %p} = values %p; return; } sub do_mn_job_bank { my %p = validate( @_, { category => { type => SCALAR }, constraints => { type => ARRAYREF, default => [ sub { 1 } ] }, username => { type => SCALAR }, password => { type => SCALAR }, zip => { type => SCALAR }, miles => { type => SCALAR }, keyword => { type => SCALAR },} ); my $www = WWW::Mechanize->new; # Log in $www->get( 'http://www.mnworks.org/jsli.cfm' ); $www->submit_form( fields => { li_user_name => $p{username}, li_password => $p{password} }, button => 'Submit' ); # Search $www->get( 'http://www1.mnworks.org/jobseeker/js_srch_pg.cfm' ); $www->submit_form( fields => { i_geo_search_flag => 'zip_yes', zip_miles => $p{miles}, zip_code => $p{zip}, Submit => 'More Search Options' } ) +; print $www->content; $www->submit_form( fields => { # i_geo_search_flags => 'zip_yes', # zip_miles => $p{miles}, # zip_code => $p{zip}, i_job_description => $p{keyword} } +); } sub do_jobs_perl_org { my %p = validate( @_, { category => { type => SCALAR }, url => { type => SCALAR }, constraints => { type => ARRAYREF, default => [ sub { 1 } ] } } +); my $rssurl = $p{url}; my $feed = rss_feed_from_url( url => $rssurl ); for my $job ( @{$feed->{items}} ) { my $url = $job->{link}; my $title = $job->{title}; my $desc = html2text( html => get( url => $url ) ); # Remove spaces @ the start of lines { my %spaces; ++ $spaces{$_} for map length(), $desc =~ /^( +)/gm; my $spaces_to_remove = ( sort { $spaces{$b} <=> $spaces{$a} } keys %spaces )[0]; my $space_to_remove = " " x $spaces_to_remove; $desc =~ s/^$space_to_remove//mg; } # Remove boilerplate & navigation at the top $desc =~ s/ (?s:.+?) (.+)[\r\n]+ [ \t]*=+[ \t]*[\r\n]+ /$1\n/x; # Now collapse the description fields $desc =~ s((.+)(?=^Description:)){ # Now I have a handle on just the key/value area ahead of +the # description local $_ = $1; # Collapse field lines together s/^(.+:)[ \t]*[\r\n]+ (.+)/$1 $2/mgx; # Remove empty lines s/^[ \t]*[\r\n]+//gm; # Remove trailing / leading whitespace s/^\s+//; s/\s+$//; # Add in the \n I want. "$_\n"; }sem; # Finally remove the boilerplate from the botton. $desc = reverse $desc; $desc =~ s/(?s:.+?) ^ [ \t]* -+ [ \t]* [\r\n]+ ^ [ \t]* [\r\n]+ //xsm; # $desc =~ s/.+?^[ \t]*-{30,}[ \t]*[\r\n]+^\s+//s; $desc = reverse $desc; $desc =~ s/\s+$//; $desc .= "\n"; if ( passes_constraints( constraints => $p{constraints}, title => $title, desc => $desc ) ) { notify( id => $url, title => $title, desc => "$p{category} $desc" ); } } return; } sub passes_constraints { my %p = validate( @_, { constraints => { type => ARRAYREF }, title => { type => SCALAR }, desc => { type => SCALAR } } ); my @constraints = @{delete $p{constraints}}; if ( not @constraints ) { @constraints = sub { 1 }; } local $_ = "$p{title}\n$p{desc}"; # When if ( /Job no longer in database/ ) { return 0; } for my $constraint ( @constraints ) { if ( $constraint->() ) { return 1; } } return 0; } sub notify { my %p = validate( @_, { id => { type => SCALAR }, title => { type => SCALAR }, desc => { type => SCALAR } } ); return if $DB->{notified}{$p{id}}; if ( $CONFIG{test} ) { print "NOTIFY $p{id}\n $p{title}\n"; return; } $DB->{notified}{$p{id}} = 1; sendmail( %{$CONFIG{sendmail}}, Subject => $p{title}, Text => $p{desc} ); } sub rss_feed_from_url { my %p = validate( @_, { url => { type => SCALAR } } ); my $url = delete $p{url}; my $parser = XML::RSS->new; $parser->parse( get( url => $url ) ); return $parser; } sub html2text { my %p = validate( @_, { html => { type => SCALAR } } ); return HTML::FormatText->format_string( $p{html} ); } sub get { my %p = validate( @_, { url => { type => SCALAR } } ); my $url = delete $p{url}; my $content = $CACHE->get( $url ); if ( not defined $content ) { my $www = WWW::Mechanize->new; $www->get( $url ); $content = $www->content; $CACHE->set( $url, $content, $CONFIG{wget_expiration} ); } return $content; }

In reply to jobs.perl.org watcher by diotalevi

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others studying the Monastery: (7)
As of 2024-04-18 11:18 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found