Perl & LWP HTTP Connections In Perl #### What is HTTP? A simple to read (and write) protocol... lynx -source -mime_header http://www.cnet.com/bogus telnet www.cnet.com 80 GET /bogus HTTP/1.0 Host: www.cnet.com $ telnet www.cnet.com 80 Trying 216.239.115.141... Connected to c10-ssa-xw-lb.cnet.com. Escape character is '^]'. GET /bogus HTTP/1.0 Host: www.cnet.com HTTP/1.1 404 Not Found Date: Fri, 04 Feb 2005 00:43:02 GMT Server: Apache/2.0 Accept-Ranges: bytes P3P: CP="CAO DSP COR CURa ADMa DEVa PSAa PSDa IVAi IVDi CONi OUR OTRi IND PHY ONL UNI FIN COM NAV INT DEM STA" Connection: close Content-Type: text/html Expires: Fri, 04 Feb 2005 00:43:02 GMT ... #### LWP is easy to use #!/usr/local/bin/perl use strict; use warnings; use LWP::UserAgent; use HTTP::Request; my $url = 'http://www.cnet.com/bogus/'; my $ua = LWP::UserAgent->new; my $request = HTTP::Request->new(GET => $url); my $response = $ua->request($request); if ($response->is_success) { print $response->content; } else { print $response->status_line, " \n"; } #### LWP::Simple is even easier #!/usr/local/bin/perl use strict; use warnings; use LWP::Simple; getprint('http://www.cnet.com/bogus/'); #### First, a Perl refresher #!/user/local/bin/perl # Tell your OS this is a perl script use strict; # If you write a script without these use warnings; # ... two lines, you owe me 10 bucks use Module::Name; # "use" is like java's "import" my $stuff = undef; # "my" is how you declare variables # ... "undef" is like java's "null" my $bat = get_bat($name); # Function calls are just like java my $bar = ['Hoss',2,3,$stuff]; # square brackets make an array ref my $baz = {a => 1, b => 2}; # currly braces make a hash (ie: Map) ref my $first = $bar->[0]; # -> dereferences attributes/methods $baz->{'c'} = 3; # ... like "." in java. $bat->do_something(); # if ($bar->[1]) { # "if" and "print" work the way you think print "$first is true\n"; # Double quotes evaluate variables } # ... prints: Hoss is true #### More fun with LWP::Simple #!/usr/local/bin/perl use strict; use warnings; use LWP::Simple; my $data = get('http://www.cnet.com/'); if (defined $data) { print $data; } if (is_success(getstore('http://mysimon.com/', './mysimon.html'))) { print "fetched mysimon front door just fine\n"; } if (RC_NOT_MODIFIED != mirror('http://mp3.com','./mp3.html')) { print "Something interesting happened with mp3.com\n"; } my ($type, $length, $mod, $exp, $server) = head('http://news.com'); print "Content Type is $type, and the Server is $server\n"; #### LWP::UserAgent is more "full featured" #!/usr/local/bin/perl use strict; use warnings; use LWP::UserAgent; my $ua = LWP::UserAgent->new(); $ua->cookie_jar({}); $ua->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7)'); $ua->timeout(42); # seconds $ua->proxy('http', 'http://proxy.sn.no:8001/'); $ua->no_proxy('com.com'); my $response1 = $ua->get('http://www.cnet.com/'); my $response2 = $ua->head('http://news.com/'); my $response3 = $ua->post('http://bugz.foo.com/postbug.cgi', {'descr' => 'your bug description', 'component' => 'your bug component', 'priority' => 'P1' }); #### HTTP::Request & HTTP::Response #!/usr/local/bin/perl use strict; use warnings; use LWP::UserAgent; use HTTP::Request; use HTTP::Response; my ($ua, $res) = (LWP::UserAgent->new(), undef); # these two lines are equivilent $res = $ua->get('http://cnet.com/'); $res = $ua->request(HTTP::Request->new('GET','http://cnet.com/')); # but you can do a lot more with the Request object... my $req = HTTP::Request->new('GET','http://builder.com/foo'); $req->header(Accept => 'text/x-dvi, text/html, text/*, */*'); $res = $ua->request($req); # the Response object also contains a lot of usefull info... print "The Status code was: ", $res->code(), "\n"; print "The Content Type was: ", $res->header('Content-Type'), "\n"; print "The URL was: ", $res->request()->uri(), "\n"; while ($res = $res->previous()) { print "Redirected from: ", $res->request()->uri(), "\n"; } #### Previous script's output $ ./example-script.pl The Status code was: 200 The Content Type was: text/html The URL was: http://builder.com.com/ Redirected from: http://builder.cnet.com/webbuilding/pages/foo Redirected from: http://builder.com/foo #### Other cool features worth looking at * Use proxies * POST complex requests (including file uploads) * Authenticate using Basic Credentials * Read/write cookies from disk (in Mozilla or Microsoft formats) * Parse/Submit forms auto-magically * LWP:RobotUA, LWP::Parallel::UserAgent, LWP::UserAgent::FramesReady #### One caveat to LWP::Simple #!/usr/local/bin/perl use strict; use warnings; # use LWP::Simple this way, to ensure a proper LWP::UserAgent object # (that speaks HTTP/1.1) is used by all methods. # # Without it some methods use a lightweight client that only # speaks HTTP/1.0, which your server might treat differently, # and might give you different results then you expect. use LWP::Simple qw(:DEFAULT $ua); my $data = get('http://cnet.com/bogus/'); # http://www.perlmonks.org/?node=412544 #### More Info perldoc LWP perldoc LWP::Simple perldoc LWP::UserAgent perldoc HTTP::Request::Common perldoc lwpcook http://www.perlmonks.org/?node=168684 http://search.cpan.org/search?query=LWP http://search.cpan.org/dist/WWW-Mechanize/