#!/usr/bin/env perl use strict; $|++; use LWP::Simple; -d "RESULTS" or mkdir "RESULTS", 0755 or die "cannot mkdir RESULTS: $!"; my $all_model_index = get "http://sportsillustrated.cnn.com/features/2008_swimsuit/models/"; for ($all_model_index =~ m{"/features/2008[^"]+/index2\.html"}g) { next if /(\/features\/2008_swimsuit\/(models|athleteswives|painting|cheerleaders|onlocation)\/[-\w]+\/)/; print "$_\n"; } # exit 0; while ($all_model_index =~ /(\/features\/2008_swimsuit\/(models|athleteswives|painting|cheerleaders|onlocation)\/[-\w]+\/)/g) { doit("$1"); } doit("/features/2008_swimsuit/selfportraits/"); doit("/features/2008_swimsuit/heidi-klum/"); doit("/features/2008_swimsuit/danica-patrick/"); sub doit { my $base = shift; print "$base =>\n"; my $model_index = get "http://sportsillustrated.cnn.com/$base/index2.html"; unless ($model_index) { $model_index = get "http://sportsillustrated.cnn.com/$base/"; } while ($model_index =~ m{\"(http://i.a.cnn.net/si/pr/subs/swimsuit/images/)([\w.\-]+)_t.jpg\"}g) { my $url = "$1$2.jpg"; my $file = "RESULTS/$2.jpg"; if (-e $file) { print "$url => $file: "; print "skip\n"; } else { print "$url => $file: "; print mirror($url, $file), "\n"; } } }