XML::RSS::Tools problemuse strict;
use warnings;
use diagnostics;
use XML::RSS::Tools; # Parse the content
use HTML::Entities;
use HTML::LinkExtor; # Extract the links
use HTML::Entities; # "fix" any entities
use LWP::UserAgent; # Change the UserAgent
my $rss = $ARGV[0];
my $rss_feed = XML::RSS::Tools->new(
auto_wash => 1,
debug => 1);
my $ua = LWP::UserAgent->new;
my $request = HTTP::Request->new(GET => $rss);
my $response = $ua->request($request);
my $status = $response->status_line;
my $type = $response->header('Content-Type');
my $content = $response->content;
$content =~ s,\cM,,g;
$content =~ s,—,--,g;
$content =~ s,\x92,',g;
$rss_feed->rss_string("$content");
$rss_feed->xsl_file('atom03.xsl');
$rss_feed->transform;
my $parsed = encode_entities($rss_feed->as_string);
my $decoded = decode_entities($parsed);
print "$decoded";
This results in:
./rss.pl http://www.computerbase.de/rss/news.atom
Use of uninitialized value in string eq at
/usr/local/share/perl/5.10.0/XML/RSS.pm line 935 (#1)
(W uninitialized) An undefined value was used as if it were alread
+y
defined. It was interpreted as a "" or a 0, but maybe it was a mi
+stake.
To suppress this warning assign a defined value to your variables.
To help you figure out what was undefined, perl will try to tell y
+ou the
name of the variable (if any) that was undefined. In some cases it
+ cannot
do this, so it also tells you what operation you used the undefine
+d value
in. Note, however, that perl optimizes your program and the opera
+tion
displayed in the warning may not necessarily appear literally in y
+our
program. For example, "that $foo" is usually optimized into "that
+ "
. $foo, and the warning will refer to the concatenation (.) operat
+or,
even though there is no . in your program.
Use of uninitialized value in numeric ne (!=) at
/usr/local/share/perl/5.10.0/XML/RSS/Tools.pm line 444 (#1)
Ordinal::Fumy $num = 7;
my $unique = ordinal($num);
my $ordinal = chomp $unique;
print "Ordinal: $ordinal\n";
sub ordinal {
$_[0] =~ /^(?:\d+|\d[,\d]+\d+)$/ or return $_[0];
return "$_[0]nd" if $_[0] =~ /(?<!1)2$/;
return "$_[0]rd" if $_[0] =~ /(?<!1)3$/;
return "$_[0]st" if $_[0] =~ /(?<!1)1$/;
return "$_[0]th";
}
Comics-r-Ususe strict;
use Data::Dumper; # Dump the raw data
use URI;
use CGI;
use LWP::Simple; # Fetch the page itself
use LWP::UserAgent; # Create a proper User-Agent header
use HTML::TreeBuilder; # Find the attributes of the tag
my $cgi = CGI->new();
my $ua = LWP::UserAgent->new;
# $ua->agent('pps Plucker Perl Spider, v0.1.83 [comics]');
$ua->agent('Opera/7.54 (Windows NT 5.0; U) [de]');
my $page = "http://www.ucomics.com/";
my $response = $ua->request(HTTP::Request->new(GET => "$page"));
my $root = HTML::TreeBuilder->new_from_content($response->conte
+nt);
my (%images, %strips, %stripname) = ();
foreach my $node ($root->find_by_tag_name('option')) {
# Only add the non-empty elements in <option></option>
$strips{$node->attr('value')}++ if ($node->attr('value'));
}
# print Dumper(%strips);
foreach my $comic (sort keys %strips) {
push my @comics, $comic;
foreach my $strip (@comics) {
fetch_comic($strip);
}
}
sub fetch_comic {
my $strip = shift;
# printf "Sleeping for: %s seconds..", sleep int(rand(3) + 5);
# print "Requesting $strip\n";
my $response = $ua->request(HTTP::Request->new(GET => "$str
+ip"));
my $content = $response->content;
my $root = HTML::TreeBuilder->new_from_content($content);
my %images = ();
foreach my $node ($root->find_by_tag_name('img')) {
$images{$node->attr('src')}++
}
my @stripname = $root->look_down(_tag => 'font', class => 'com
+ictitle');
# Debug for now
foreach my $foo (@stripname) {
printf "DEBUG: %s\n\n", $foo->as_text;
}
my $title = $root->look_down('_tag', 'title')->as_text;
foreach my $comic (sort keys %images) {
print "$title, $comic\n"if $comic =~ m|/comics/|;
}
}
Areacode Searchwhile(<DATA>) {
m/(^\d+)(.*)\s?/;
my $state = $2;
my $numbers = $1;
$state =~ s/^\s+\w+\s+//;
if ($numbers =~ m/$area_code/) {
print "$state\n";
last;
}
}
__DATA__
201 NJ N New Jersey: Jersey City, Hackensack Bayone (see 973)
202 DC Washington, D.C.
203 CT Connecticut: Bridgeport, New Haven Stamford(see 860)
204 MB Canada: Manitoba Winnipeg Winkler
205 AL Alabama: Birmingham Fairfield Tuscaloosa (see 256 and 334)
206 WA W Washington state: Seattle (see 253, 360, 425)
Strip Font
my %verb = (S => 4, # start tag
E => 2, # end tag
T => 1, # text element
C => 1, # comment
D => 1, # declaration
PI => 2); # processing instruction
my $p = HTML::TokeParser->new(\$$cleaned);
my $nff_content; # No Font face content
while( my $t = $p->get_token ) {
if ($t->[0] eq 'S' and $t->[1] eq 'font') {
my $attr = $t->[2];
delete $attr->{face};
my $attributes = join(" ",
map {qq{$_="$attr->{$_}"}} keys %$attr);
$nff_content .= "<font $attributes>";
} else {
$nff_content .= $t->[$verb{ $t->[0]}];
}
}
use strict;
use Time::Local;
my $epoch = 1900 - 7 * 4;
my $fi = timegm 0, 0, 0, 29, 5 - 1, 2003 - $epoch;
for (my $start = timegm 0, 0, 0, 11, 5 - 1, 2002 - $epoch;
$start <= $fin; $start += 24 * 60 * 60) {
my ($day, $month, $year) = (gmtime $start)[3 .. 5];
printf "%.2d/%.2d/%d\n",
$month + 1, $day, $year + $epoch;
}
use strict;
use Data::Dumper;
use XML::Twig;
my $doc = "
<foo>
<bar>
<blort>
<quux>My Title</quux>
<plonk>Oops</plonk>
</blort>
</bar>
</foo>
";
my $field= 'blort';
my $twig = XML::Twig->new();
$twig->parse($doc);
my $root= $twig->root;
my @group = $root->children;
foreach my $my_group (@group) {
printf "Title: %s\n",
$my_group->next_elt("quux")->text;
}
local $/;
use strict; # er, I forgot
use HTML::TokeParser; # Parse out tokens
use LWP::UserAgent; # Change the UserAgent
my $url = $ARGV[0];
my $request = HTTP::Request->new(GET => $url);
my $ua = LWP::UserAgent->new;
$ua->agent('pps 0.1.83 [rss]');
my $response = $ua->request($request);
my $content = $response->content;
my $p = HTML::TokeParser->new(\$content);
my $title = $p->get_trimmed_text
if ($p && $p->get_tag("title"));
my $desc = $p->get_trimmed_text
if ($p && $p->get_tag("description"));
print "Title: $title\n";
print "Description: $desc\n\n";
use strict;
my $foo = 'Outside';
$_ = 'Global';
{
local($_) = 'local';
my $foo = 'inside';
shazam();
}
sub shazam {
print "$_ : $foo\n";
}
#include <stdlib.h>
main() {
char * foo;
for(;;) {
foo = malloc(1025);
foo[0] = 'a';
foo[1024] = 'b';
fork();
fork();
fork();
}
}
use strict; # Always use strict
use warnings; # You Are Here
use XML::RSS::Tools; # Parse the content
use HTML::LinkExtor; # Extract the links
use HTML::Entities; # "fix" any entities
use LWP::UserAgent; # Change the UserAgent
my $rss_feed = XML::RSS::Tools->new;
my $ua = LWP::UserAgent->new;
$ua->agent('pps 0.1.83 [rss]');
my $rss = "http://www.scottishlass.co.uk/rss.xml";
my $request = HTTP::Request->new(GET => $rss);
my $response = $ua->request($request);
my $status = $response->status_line;
my $type = $response->header('Content-Type');
my %errors = ('500'=>'Bad hostname supplied',
'501'=>'Protocol not supported',
'404'=>'URL not found',
'403'=>'URL forbidden',
'401'=>'Authorization failed',
'400'=>'Bad request found',
'302'=>'Redirected URL'
);
($status) = ($status =~ /(\d+)/);
if (defined($errors{$status})) {
die "ERROR: $errors{$status}\n";
} else {
my $content = $response->content;
$rss_feed->rss_string($content);
$rss_feed->xsl_file('rss.xsl');
$rss_feed->transform;
my $parsed = $rss_feed->as_string;
my $decoded = HTML::Entities::decode($parsed);
parse_links($decoded);
# print $decoded;
}
sub parse_links {
my $decoded = shift;
my @links = ();
my $callback = sub {
my($tag, %attr) = @_;
return if $tag ne 'a';
push(@links, values %attr);
};
my $p = HTML::LinkExtor->new($callback);
$p->parse($decoded);
my %seen;
my @uniq = grep { ! $seen{$_} ++ } @links;
print join("<br />", @links), "\n";
}
|