Fellow monks,
Trying to simply grab the job postings from a corporate careers section and am getting into an unusual table construct. I don't quite see how to get the table rows to come back.
I am getting a basic error:
Can't call method "rows" on an undefined value at /opt/local/lib/perl5/site_perl/5.12.4/HTML/TableExtract.pm line 237.
I have tried dumping the table but I don't understand the results from Dumper.
If I can get as far as listing out the job postings I'll be in good shape. However, at this juncture I'm stumped. The code I'm using is below:
#!/usr/bin/perl
use strict;
use warnings;
use WWW::Mechanize;
use Data::Dumper;
use HTML::TableExtract;
use XML::FeedPP;
use UTF8;
# initialize
my $cols;
my $url;
my $depth;
my $count;
my $data;
my $tracking_code;
my $location;
my $job_title;
my $date_posted;
my $out_fh;
# get the data from the web. Typically this is:
# https://commvault.silkroad.com/epostings/index.cfm?fuseaction=app.jo
+bsearch#
# Either pass this in as --url <page_url> when invoking or just set it
+.
$cols = 'tracking_code,job_title,location,date_posted';
$url = "https://commvault.silkroad.com/epostings/index.cfm?fuseaction=
+app.jobsearch";
my $input;
my $directory = "/Users/coblem/testing/";
my $outfile = "cvlt_jobs.csv";
open( $out_fh, '>', $ directory . $outfile)
or die("Unable to create output file \"$out_fh\": $!\n");
my $m = WWW::Mechanize->new();
$m->get($url);
$input = $m->content;
my $te;
if ( defined ($cols)) {
print ("columns ", $cols, "\n");
my @headers = split(/,/, $cols);
# $te = HTML::TableExtract->new( attribs => { border => 1 } );
$te = HTML::TableExtract->new(
headers => [qw( tracking_code job_titl
+e location date_posted )]
) or die qq{$!};
print Dumper($te);
}
else
{
$te = new HTML::TableExtract( depth => $depth, count=>$count);
};
$te->parse($input);
foreach my $row ($te->rows) {
$tracking_code = $ { $row }[0];
$job_title = $ { $row }[1];
$location = $ { $row }[2];
$date_posted = $ { $row }[3];
print "positions: $tracking_code $job_title $location $date_posted
+ \n";
}
The page source HTML seems straightforward enough - it has a table definition in it and looks like this:
<tbody><tr class="cssSearchResultsColHead">
<td align="center" class="cssSearchResultsColHead"
+><a id="header_trackingCode" href="index.cfm?fuseaction=app.jobsearch
+&newsort=1&tcorder=asc&thiscol=TRACKINGCODE&company_i
+d=15636&version=2&byBusinessUnit=NULL&bycountry=0&bys
+tate=0&byRegion=&bylocation=&keywords=&byCat=&pro
+ximityCountry=&postalCode=&radiusDistance=&isKilometers=&
+amp;tosearch=yes">Tracking Code</a></td>
<td align="center" class="cssSearchResultsColHead"
+><a id="header_jobTitle" href="index.cfm?fuseaction=app.jobsearch&
+;newsort=1&jtorder=asc&thiscol=job_title&company_id=15636
+&version=2&byBusinessUnit=NULL&bycountry=0&bystate=0&
+amp;byRegion=&bylocation=&keywords=&byCat=&proximityC
+ountry=&postalCode=&radiusDistance=&isKilometers=&tos
+earch=yes">Job Title</a></td>
<td align="center" class="cssSearchResultsColHead"
+><a id="header_location" href="index.cfm?fuseaction=app.jobsearch&
+;newsort=1&lorder=asc&thiscol=location&company_id=15636&a
+mp;version=2&byBusinessUnit=NULL&bycountry=0&bystate=0&am
+p;byRegion=&bylocation=&keywords=&byCat=&proximityCou
+ntry=&postalCode=&radiusDistance=&isKilometers=&tosea
+rch=yes">Location</a></td>
<td align="center" class="cssSearchResultsColHead"
+><a id="header_datePosted" href="index.cfm?fuseaction=app.jobsearch&a
+mp;newsort=1&dporder=asc&thiscol=postingdate&company_id=1
+5636&version=2&byBusinessUnit=NULL&bycountry=0&bystat
+e=0&byRegion=&bylocation=&keywords=&byCat=&proxim
+ityCountry=&postalCode=&radiusDistance=&isKilometers=&
+;tosearch=yes">Date Posted</a></td>
</tr>
<tr class="cssSearchResultsHighlight">
<td align="center" class="cssSearchResults
+Body">306145-636</td>
<td align="left" class="cssSearchResultsBo
+dy"><a id="jobTitle_306145" href="index.cfm?fuseaction=app.jobinfo&am
+p;jobid=306145&source=ONLINE&JobOwner=1013826&company_id=
+15636&version=2&byBusinessUnit=NULL&bycountry=0&bysta
+te=0&byRegion=&bylocation=&keywords=&byCat=&proxi
+mityCountry=&postalCode=&radiusDistance=&isKilometers=&am
+p;tosearch=yes" class="cssSearchResultsBody">Sales Account Manager -
+Enterprise</a></td>
<td align="left" class="cssSearchResultsBo
+dy">Seattle, Washington, United States</td>
<td align="center" class="cssSearchResults
+Body">10/31/2013</td>
</tr>
<tr class="cssSearchResultsLowlight">
<td align="center" class="cssSearchResults
+Body">306144-636</td>
<td align="left" class="cssSearchResultsBo
+dy"><a id="jobTitle_306144" href="index.cfm?fuseaction=app.jobinfo&am
+p;jobid=306144&source=ONLINE&JobOwner=1013767&company_id=
+15636&version=2&byBusinessUnit=NULL&bycountry=0&bysta
+te=0&byRegion=&bylocation=&keywords=&byCat=&proxi
+mityCountry=&postalCode=&radiusDistance=&isKilometers=&am
+p;tosearch=yes" class="cssSearchResultsBody">Inside Sales Administrat
+or</a></td>
<td align="left" class="cssSearchResultsBo
+dy">Madrid, Madrid, Spain</td>
<td align="center" class="cssSearchResults
+Body">10/30/2013</td>
</tr>
<tr class="cssSearchResultsHighlight">
<td align="center" class="cssSearchResults
+Body">306143-636</td>
<td align="left" class="cssSearchResultsBo
+dy"><a id="jobTitle_306143" href="index.cfm?fuseaction=app.jobinfo&am
+p;jobid=306143&source=ONLINE&JobOwner=1013767&company_id=
+15636&version=2&byBusinessUnit=NULL&bycountry=0&bysta
+te=0&byRegion=&bylocation=&keywords=&byCat=&proxi
+mityCountry=&postalCode=&radiusDistance=&isKilometers=&am
+p;tosearch=yes" class="cssSearchResultsBody">Inside Sales Administrat
+or</a></td>
<td align="left" class="cssSearchResultsBo
+dy">Milano, Lombardia, Italy</td>
<td align="center" class="cssSearchResults
+Body">10/30/2013</td>
</tr>
<tr class="cssSearchResultsLowlight">
<td align="center" class="cssSearchResults
+Body">306134-636</td>
<td align="left" class="cssSearchResultsBo
+dy"><a id="jobTitle_306134" href="index.cfm?fuseaction=app.jobinfo&am
+p;jobid=306134&source=ONLINE&JobOwner=1013767&company_id=
+15636&version=2&byBusinessUnit=NULL&bycountry=0&bysta
+te=0&byRegion=&bylocation=&keywords=&byCat=&proxi
+mityCountry=&postalCode=&radiusDistance=&isKilometers=&am
+p;tosearch=yes" class="cssSearchResultsBody">Senior Technical Consult
+ant / Enterprise Solutions Architect</a></td>
<td align="left" class="cssSearchResultsBo
+dy">Reading, West Berkshire, United Kingdom</td>
<td align="center" class="cssSearchResults
+Body">10/30/2013</td>
</tr>
<tr class="cssSearchResultsHighlight">
<td align="center" class="cssSearchResults
+Body">306142-636</td>
<td align="left" class="cssSearchResultsBo
+dy"><a id="jobTitle_306142" href="index.cfm?fuseaction=app.jobinfo&am
+p;jobid=306142&source=ONLINE&JobOwner=1013697&company_id=
+15636&version=2&byBusinessUnit=NULL&bycountry=0&bysta
+te=0&byRegion=&bylocation=&keywords=&byCat=&proxi
+mityCountry=&postalCode=&radiusDistance=&isKilometers=&am
+p;tosearch=yes" class="cssSearchResultsBody">Product Manager - Databa
+se</a></td>
<td align="left" class="cssSearchResultsBo
+dy">Oceanport, New Jersey, United States</td>
<td align="center" class="cssSearchResults
+Body">10/29/2013</td>
</tr>
</tbody>