use LWP::Simple; $content = get("http://www.sn.no/"); die "Couldn't get it!" unless defined $content; #### #Download all the modules I used# use LWP::Simple; use HTML::TreeBuilder; use HTML::FormatText; use WWW::Mechanize; use Data::Dumper; #Download original webpage and acquire 500+ Links# $url = "http://wx.toronto.ca/festevents.nsf/all?openform"; my $mechanize = WWW::Mechanize->new(autocheck => 1); $mechanize->get($url); my $title = $mechanize->title; print "$title
"; my @links = $mechanize->links; ## THIS IS WHERE MY PROBLEM STARTS: I dont know how to use foreach loops. I thought if I put the "$link" variable as the "get ()" each time it would go through the loop it would "get" a different webpage. However it does not work even though no error shows## foreach my $link (@links) { # Retrieve the link URL my $href = $link->url; $URL1= get("$link"); die "Couldn't get '$link'" unless defined $URL1; $Format=HTML::FormatText->new; $TreeBuilder=HTML::TreeBuilder->new; $TreeBuilder->parse($URL1); $Parsed=$Format->format($TreeBuilder); open(FILE, ">TorontoParties.txt"); print FILE "$Parsed"; close (FILE); } ##
## Couldn't get 'WWW::Mechanize::Link=ARRAY(0x37c6e2c)' at test.pl line 34. Festival and event calendar - all
##
## #Download all the modules I used# use LWP::UserAgent; use HTML::TreeBuilder; use HTML::FormatText; use WWW::Mechanize; use Data::Dumper; #Download original webpage and acquire 500+ Links# $url = "http://wx.toronto.ca/festevents.nsf/all?openform"; my $mechanize = WWW::Mechanize->new(autocheck => 1); $mechanize->get($url); my $title = $mechanize->title; print "$title
"; my @links = $mechanize->links; ## THIS IS WHERE MY PROBLEM STARTS: I dont know how to use foreach loops. I thought if I put the "$link" variable as the "get ()" each time it would go through the loop it would "get" a different webpage. However it does not work even though no error shows## foreach my $link (@links) { # Retrieve the link URL my $href = $link->url; # # $URL1= get("$link"); # my $ua = LWP::UserAgent->new; my $response = $ua->get($link); unless($response->is_success) { die $response->status_line; } my $URL1 = $response->decoded_content; die Dumper($URL1); $Format=HTML::FormatText->new; $TreeBuilder=HTML::TreeBuilder->new; $TreeBuilder->parse($URL1); $Parsed=$Format->format($TreeBuilder); open(FILE, ">TorontoParties.txt"); print FILE "$Parsed"; close (FILE); } ##
## Can't use a WWW::Mechanize::Link object as a URI at C:/strawberry/perl/site/lib/HTTP/Request/Common.pm line 106 Festival and event calendar - all
##
## 400 URL must be absolute at test.pl line 39. Festival and event calendar - all
##
## #Download all the modules I used# use LWP::UserAgent; use HTML::TreeBuilder; use HTML::FormatText; use WWW::Mechanize; use Data::Dumper; #Download original webpage and acquire 500+ Links# $url = "http://wx.toronto.ca/festevents.nsf/all?openform"; my $mechanize = WWW::Mechanize->new(autocheck => 1); $mechanize->get($url); my $title = $mechanize->title; print "$title
"; my @links = $mechanize->links; ## THIS IS WHERE MY PROBLEM STARTS: I dont know how to use foreach loops. I thought if I put the "$link" variable as the "get ()" each time it would go through the loop it would "get" a different webpage. However it does not work even though no error shows## foreach my $link (@links) { # Retrieve the link URL my $href = $link->url_abs; # # $URL1= get("$link"); # my $ua = LWP::UserAgent->new; my $response = $ua->get($href); unless($response->is_success) { die $response->status_line; } my $URL1 = $response->decoded_content; die Dumper($URL1); $Format=HTML::FormatText->new; $TreeBuilder=HTML::TreeBuilder->new; $TreeBuilder->parse($URL1); $Parsed=$Format->format($TreeBuilder); open(FILE, ">TorontoParties.txt"); print FILE "$Parsed"; close (FILE); } ##
## $VAR1 = "\x{feff}/* Adjust default template */ #header001 {padding-bottom: 17px;} #background-nav{ width: 100%; float: left; overflow: hidden;} .wrapper{width: 100%; } #nav-side{} #nav-side h2{margin-bottom: 0em ! important;} #content{ width: 100%;float: right; margin: 0 -147px 0; } /**/ body,h1, h2, h3, h4, h5, h6, form,input {color: #000; font-family: Arial,Helveti ca,sans-serif; margin: 0px; padding: 0px; /*background-color: #fff; */} a:hover{color: #000; } h2{font-size: 1.3em;} ol li{ margin-left: 20px;} h2.icon-rss{ background:url(../images/rss14x14.gif) no-repeat 0px 2px; padding-l eft: 18px;} li.icon-rss{ background:url(../images/rss10x10.gif) no-repeat 0px 4px; list-sty le: none; margin-left: -15px; padding-left: 15px;} .general-text{line-height: 0em; line-height: 1em ! important; } .general-text.body{ float: left;width: 10%; background:#ccc;} .general-text h2{font-size: 1.3em; margin-bottom: 0.5em;} .bullet {background: url(../images/section1_bullet.gif) no-repeat 0 5px; padding -left: 10px;} .shade {color: #999;} .terms-of-use{} .terms-of-use li, .general-text ol li{margin-top: 1em;} .terms-of-use label{ font-weight: bold; font-size: 1.5em; margin-left: 3em;} #evt-feature{ border: 1px solid #ccc; float: left; clear: both; padding: 3px; wi dth: 396px; } #evt-feature .desc h2{ color: #000; font-size: 1.5em; font-weight: normal; margi n-bottom: 8px; margin-top: 8px;} #evt-feature .desc p{ color: #333; font-size: 0.965em;} #evt-feature .desc .highlight{ background: none; border: none; clear: both; floa t: left;} #evt-feature .two-column{ float: left;} #evt-feature .two-column .col0{ border-right: 1px solid #ccc; float: left; paddi ng-right: 10px; width: 260px; } #evt-feature .two-column .col1{ float: left; padding-left: 10px; width: 10px;} #evt-highlight{ clear: both; float: left;margin-top: 14px; width: 404px;} #evt-highlight .h{display: block; float: left; width: 129px;} #evt-highlight .h.spacing{margin-left: 7px; margin-right: 7px;} #evt-highlight p.img{border: 1px solid #ccc; padding: 3px; margin-bottom: 0.05em ;} #evt-highlight p{ font-size: 1em; color: #333; padding: 3px; padding-top: 0px;} #category-body{ float: left;} #banner{ border: 1px solid #ccc; clear: both; display: block; float: left; heig ht: 100px; padding: 3px; width: 82%; margin-bottom: 14px;} #banner h2 { display: block; float: left; font-size: 1.5em; font-weight: normal; margin-top: 5px; margin-bottom: 6px; height: 1.4em;} #banner .img{ display: block; float: left; height: 65px; width: 100%;} #evt-selection{ display: block; float: left; margin-left: 10px; width: 200px;} #evt-selection #calendar{border: none ! important; width: 200px; float:left; cl ear:both;} #evt-selection #calendar{margin-bottom: -24px;} #evt-selection form label{ margin-top: 12px;} #evt-selection input.textbox, #evt-selection select.textbox{ width: 189px;} #evt-selection input.button {margin-top: 14px;} #evt-listing{ clear: both; display: block; float: left;} #evt-listing h2{ font-size: 1.2em; font-weight: normal; margin-bottom: 14px; mar gin-top: 14px;} #evt-listing table{ width: 600px;} #evt-listing table th{ text-align: left; background: #ccc;} #evt-listing table td {padding-top: 0.5em;} #evt-listing table td.col0, #evt-listing table th.col0{ border-left: 1px #fff s olid; width: 7%; padding: 5px;} #evt-listing table td.col1, #evt-listing table th.col1{ border-left: 1px #fff s olid; width: 63%; padding: 5px; padding-left: 10px;} #evt-listing table td.col2, #evt-listing table th.col2{ border-left: 1px #fff s olid; width: 15%; padding: 5px;} #evt-listing table td.col3, #evt-listing table th.col3{ border-left: 1px #fff s olid; width: 25%; padding: 5px;}"; Festival and event calendar - all