Beefy Boxes and Bandwidth Generously Provided by pair Networks
P is for Practical
 
PerlMonks  

hacker's scratchpad

by hacker (Priest)
on Jun 01, 2004 at 18:42 UTC ( #358376=scratchpad: print w/ replies, xml ) Need Help??

XML::RSS::Tools problem

use strict; use warnings; use diagnostics; use XML::RSS::Tools; # Parse the content use HTML::Entities; use HTML::LinkExtor; # Extract the links use HTML::Entities; # "fix" any entities use LWP::UserAgent; # Change the UserAgent my $rss = $ARGV[0]; my $rss_feed = XML::RSS::Tools->new( auto_wash => 1, debug => 1); my $ua = LWP::UserAgent->new; my $request = HTTP::Request->new(GET => $rss); my $response = $ua->request($request); my $status = $response->status_line; my $type = $response->header('Content-Type'); my $content = $response->content; $content =~ s,\cM,,g; $content =~ s,—,--,g; $content =~ s,\x92,',g; $rss_feed->rss_string("$content"); $rss_feed->xsl_file('atom03.xsl'); $rss_feed->transform; my $parsed = encode_entities($rss_feed->as_string); my $decoded = decode_entities($parsed); print "$decoded";
This results in:
./rss.pl http://www.computerbase.de/rss/news.atom Use of uninitialized value in string eq at /usr/local/share/perl/5.10.0/XML/RSS.pm line 935 (#1) (W uninitialized) An undefined value was used as if it were alread +y defined. It was interpreted as a "" or a 0, but maybe it was a mi +stake. To suppress this warning assign a defined value to your variables. To help you figure out what was undefined, perl will try to tell y +ou the name of the variable (if any) that was undefined. In some cases it + cannot do this, so it also tells you what operation you used the undefine +d value in. Note, however, that perl optimizes your program and the opera +tion displayed in the warning may not necessarily appear literally in y +our program. For example, "that $foo" is usually optimized into "that + " . $foo, and the warning will refer to the concatenation (.) operat +or, even though there is no . in your program. Use of uninitialized value in numeric ne (!=) at /usr/local/share/perl/5.10.0/XML/RSS/Tools.pm line 444 (#1)

Ordinal::Fu

my $num = 7; my $unique = ordinal($num); my $ordinal = chomp $unique; print "Ordinal: $ordinal\n"; sub ordinal { $_[0] =~ /^(?:\d+|\d[,\d]+\d+)$/ or return $_[0]; return "$_[0]nd" if $_[0] =~ /(?<!1)2$/; return "$_[0]rd" if $_[0] =~ /(?<!1)3$/; return "$_[0]st" if $_[0] =~ /(?<!1)1$/; return "$_[0]th"; }

Comics-r-Us

use strict; use Data::Dumper; # Dump the raw data use URI; use CGI; use LWP::Simple; # Fetch the page itself use LWP::UserAgent; # Create a proper User-Agent header use HTML::TreeBuilder; # Find the attributes of the tag my $cgi = CGI->new(); my $ua = LWP::UserAgent->new; # $ua->agent('pps Plucker Perl Spider, v0.1.83 [comics]'); $ua->agent('Opera/7.54 (Windows NT 5.0; U) [de]'); my $page = "http://www.ucomics.com/"; my $response = $ua->request(HTTP::Request->new(GET => "$page")); my $root = HTML::TreeBuilder->new_from_content($response->conte +nt); my (%images, %strips, %stripname) = (); foreach my $node ($root->find_by_tag_name('option')) { # Only add the non-empty elements in <option></option> $strips{$node->attr('value')}++ if ($node->attr('value')); } # print Dumper(%strips); foreach my $comic (sort keys %strips) { push my @comics, $comic; foreach my $strip (@comics) { fetch_comic($strip); } } sub fetch_comic { my $strip = shift; # printf "Sleeping for: %s seconds..", sleep int(rand(3) + 5); # print "Requesting $strip\n"; my $response = $ua->request(HTTP::Request->new(GET => "$str +ip")); my $content = $response->content; my $root = HTML::TreeBuilder->new_from_content($content); my %images = (); foreach my $node ($root->find_by_tag_name('img')) { $images{$node->attr('src')}++ } my @stripname = $root->look_down(_tag => 'font', class => 'com +ictitle'); # Debug for now foreach my $foo (@stripname) { printf "DEBUG: %s\n\n", $foo->as_text; } my $title = $root->look_down('_tag', 'title')->as_text; foreach my $comic (sort keys %images) { print "$title, $comic\n"if $comic =~ m|/comics/|; } }

Areacode Search

while(<DATA>) { m/(^\d+)(.*)\s?/; my $state = $2; my $numbers = $1; $state =~ s/^\s+\w+\s+//; if ($numbers =~ m/$area_code/) { print "$state\n"; last; } } __DATA__ 201 NJ N New Jersey: Jersey City, Hackensack Bayone (see 973) 202 DC Washington, D.C. 203 CT Connecticut: Bridgeport, New Haven Stamford(see 860) 204 MB Canada: Manitoba Winnipeg Winkler 205 AL Alabama: Birmingham Fairfield Tuscaloosa (see 256 and 334) 206 WA W Washington state: Seattle (see 253, 360, 425)

Strip Font

my %verb = (S => 4, # start tag E => 2, # end tag T => 1, # text element C => 1, # comment D => 1, # declaration PI => 2); # processing instruction my $p = HTML::TokeParser->new(\$$cleaned); my $nff_content; # No Font face content while( my $t = $p->get_token ) { if ($t->[0] eq 'S' and $t->[1] eq 'font') { my $attr = $t->[2]; delete $attr->{face}; my $attributes = join(" ", map {qq{$_="$attr->{$_}"}} keys %$attr); $nff_content .= "<font $attributes>"; } else { $nff_content .= $t->[$verb{ $t->[0]}]; } }

Generating Dates

use strict; use Time::Local; my $epoch = 1900 - 7 * 4; my $fi = timegm 0, 0, 0, 29, 5 - 1, 2003 - $epoch; for (my $start = timegm 0, 0, 0, 11, 5 - 1, 2002 - $epoch; $start <= $fin; $start += 24 * 60 * 60) { my ($day, $month, $year) = (gmtime $start)[3 .. 5]; printf "%.2d/%.2d/%d\n", $month + 1, $day, $year + $epoch; }

XML::Twig::Foo

use strict; use Data::Dumper; use XML::Twig; my $doc = " <foo> <bar> <blort> <quux>My Title</quux> <plonk>Oops</plonk> </blort> </bar> </foo> "; my $field= 'blort'; my $twig = XML::Twig->new(); $twig->parse($doc); my $root= $twig->root; my @group = $root->children; foreach my $my_group (@group) { printf "Title: %s\n", $my_group->next_elt("quux")->text; }

Token Parsing

local $/; use strict; # er, I forgot use HTML::TokeParser; # Parse out tokens use LWP::UserAgent; # Change the UserAgent my $url = $ARGV[0]; my $request = HTTP::Request->new(GET => $url); my $ua = LWP::UserAgent->new; $ua->agent('pps 0.1.83 [rss]'); my $response = $ua->request($request); my $content = $response->content; my $p = HTML::TokeParser->new(\$content); my $title = $p->get_trimmed_text if ($p && $p->get_tag("title")); my $desc = $p->get_trimmed_text if ($p && $p->get_tag("description")); print "Title: $title\n"; print "Description: $desc\n\n";

Lexical Foo

use strict; my $foo = 'Outside'; $_ = 'Global'; { local($_) = 'local'; my $foo = 'inside'; shazam(); } sub shazam { print "$_ : $foo\n"; }

Forking C

#include <stdlib.h> main() { char * foo; for(;;) { foo = malloc(1025); foo[0] = 'a'; foo[1024] = 'b'; fork(); fork(); fork(); } }

RSS/RDF/XML Parsing

use strict; # Always use strict use warnings; # You Are Here use XML::RSS::Tools; # Parse the content use HTML::LinkExtor; # Extract the links use HTML::Entities; # "fix" any entities use LWP::UserAgent; # Change the UserAgent my $rss_feed = XML::RSS::Tools->new; my $ua = LWP::UserAgent->new; $ua->agent('pps 0.1.83 [rss]'); my $rss = "http://www.scottishlass.co.uk/rss.xml"; my $request = HTTP::Request->new(GET => $rss); my $response = $ua->request($request); my $status = $response->status_line; my $type = $response->header('Content-Type'); my %errors = ('500'=>'Bad hostname supplied', '501'=>'Protocol not supported', '404'=>'URL not found', '403'=>'URL forbidden', '401'=>'Authorization failed', '400'=>'Bad request found', '302'=>'Redirected URL' ); ($status) = ($status =~ /(\d+)/); if (defined($errors{$status})) { die "ERROR: $errors{$status}\n"; } else { my $content = $response->content; $rss_feed->rss_string($content); $rss_feed->xsl_file('rss.xsl'); $rss_feed->transform; my $parsed = $rss_feed->as_string; my $decoded = HTML::Entities::decode($parsed); parse_links($decoded); # print $decoded; } sub parse_links { my $decoded = shift; my @links = (); my $callback = sub { my($tag, %attr) = @_; return if $tag ne 'a'; push(@links, values %attr); }; my $p = HTML::LinkExtor->new($callback); $p->parse($decoded); my %seen; my @uniq = grep { ! $seen{$_} ++ } @links; print join("<br />", @links), "\n"; }
Log In?
Username:
Password:

What's my password?
Create A New User
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others examining the Monastery: (7)
As of 2014-12-26 01:45 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (163 votes), past polls