Perl & LWP
HTTP Connections In Perl
####
What is HTTP?
A simple to read (and write) protocol...
lynx -source -mime_header http://www.cnet.com/bogus
telnet www.cnet.com 80
GET /bogus HTTP/1.0
Host: www.cnet.com
$ telnet www.cnet.com 80
Trying 216.239.115.141...
Connected to c10-ssa-xw-lb.cnet.com.
Escape character is '^]'.
GET /bogus HTTP/1.0
Host: www.cnet.com
HTTP/1.1 404 Not Found
Date: Fri, 04 Feb 2005 00:43:02 GMT
Server: Apache/2.0
Accept-Ranges: bytes
P3P: CP="CAO DSP COR CURa ADMa DEVa PSAa PSDa IVAi IVDi CONi OUR OTRi IND PHY ONL UNI FIN COM NAV INT DEM STA"
Connection: close
Content-Type: text/html
Expires: Fri, 04 Feb 2005 00:43:02 GMT
...
##
##
LWP is easy to use
#!/usr/local/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
use HTTP::Request;
my $url = 'http://www.cnet.com/bogus/';
my $ua = LWP::UserAgent->new;
my $request = HTTP::Request->new(GET => $url);
my $response = $ua->request($request);
if ($response->is_success) {
print $response->content;
} else {
print $response->status_line, " \n";
}
##
##
LWP::Simple is even easier
#!/usr/local/bin/perl
use strict;
use warnings;
use LWP::Simple;
getprint('http://www.cnet.com/bogus/');
##
##
First, a Perl refresher
#!/user/local/bin/perl # Tell your OS this is a perl script
use strict; # If you write a script without these
use warnings; # ... two lines, you owe me 10 bucks
use Module::Name; # "use" is like java's "import"
my $stuff = undef; # "my" is how you declare variables
# ... "undef" is like java's "null"
my $bat = get_bat($name); # Function calls are just like java
my $bar = ['Hoss',2,3,$stuff]; # square brackets make an array ref
my $baz = {a => 1, b => 2}; # currly braces make a hash (ie: Map) ref
my $first = $bar->[0]; # -> dereferences attributes/methods
$baz->{'c'} = 3; # ... like "." in java.
$bat->do_something(); #
if ($bar->[1]) { # "if" and "print" work the way you think
print "$first is true\n"; # Double quotes evaluate variables
} # ... prints: Hoss is true
##
##
More fun with LWP::Simple
#!/usr/local/bin/perl
use strict;
use warnings;
use LWP::Simple;
my $data = get('http://www.cnet.com/');
if (defined $data) {
print $data;
}
if (is_success(getstore('http://mysimon.com/',
'./mysimon.html')))
{
print "fetched mysimon front door just fine\n";
}
if (RC_NOT_MODIFIED != mirror('http://mp3.com','./mp3.html')) {
print "Something interesting happened with mp3.com\n";
}
my ($type, $length, $mod, $exp, $server) = head('http://news.com');
print "Content Type is $type, and the Server is $server\n";
##
##
LWP::UserAgent is more "full featured"
#!/usr/local/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new();
$ua->cookie_jar({});
$ua->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7)');
$ua->timeout(42); # seconds
$ua->proxy('http', 'http://proxy.sn.no:8001/');
$ua->no_proxy('com.com');
my $response1 = $ua->get('http://www.cnet.com/');
my $response2 = $ua->head('http://news.com/');
my $response3 = $ua->post('http://bugz.foo.com/postbug.cgi',
{'descr' => 'your bug description',
'component' => 'your bug component',
'priority' => 'P1' });
##
##
HTTP::Request & HTTP::Response
#!/usr/local/bin/perl
use strict; use warnings;
use LWP::UserAgent; use HTTP::Request; use HTTP::Response;
my ($ua, $res) = (LWP::UserAgent->new(), undef);
# these two lines are equivilent
$res = $ua->get('http://cnet.com/');
$res = $ua->request(HTTP::Request->new('GET','http://cnet.com/'));
# but you can do a lot more with the Request object...
my $req = HTTP::Request->new('GET','http://builder.com/foo');
$req->header(Accept => 'text/x-dvi, text/html, text/*, */*');
$res = $ua->request($req);
# the Response object also contains a lot of usefull info...
print "The Status code was: ", $res->code(), "\n";
print "The Content Type was: ", $res->header('Content-Type'), "\n";
print "The URL was: ", $res->request()->uri(), "\n";
while ($res = $res->previous()) {
print "Redirected from: ", $res->request()->uri(), "\n";
}
##
##
Previous script's output
$ ./example-script.pl
The Status code was: 200
The Content Type was: text/html
The URL was: http://builder.com.com/
Redirected from: http://builder.cnet.com/webbuilding/pages/foo
Redirected from: http://builder.com/foo
##
##
Other cool features worth looking at
* Use proxies
* POST complex requests (including file uploads)
* Authenticate using Basic Credentials
* Read/write cookies from disk (in Mozilla or Microsoft formats)
* Parse/Submit forms auto-magically
* LWP:RobotUA, LWP::Parallel::UserAgent, LWP::UserAgent::FramesReady
##
##
One caveat to LWP::Simple
#!/usr/local/bin/perl
use strict;
use warnings;
# use LWP::Simple this way, to ensure a proper LWP::UserAgent object
# (that speaks HTTP/1.1) is used by all methods.
#
# Without it some methods use a lightweight client that only
# speaks HTTP/1.0, which your server might treat differently,
# and might give you different results then you expect.
use LWP::Simple qw(:DEFAULT $ua);
my $data = get('http://cnet.com/bogus/');
# http://www.perlmonks.org/?node=412544
##
##
More Info
perldoc LWP
perldoc LWP::Simple
perldoc LWP::UserAgent
perldoc HTTP::Request::Common
perldoc lwpcook
http://www.perlmonks.org/?node=168684
http://search.cpan.org/search?query=LWP
http://search.cpan.org/dist/WWW-Mechanize/