Beefy Boxes and Bandwidth Generously Provided by pair Networks
The stupid question is the question not asked
 
PerlMonks  

FlickrDownload.pl

by jfroebe (Vicar)
on Feb 20, 2008 at 16:02 UTC ( #669034=sourcecode: print w/ replies, xml ) Need Help??

Category: Web Stuff
Author/Contact Info
Description:

If you've ever tried to back up your Flickr account or download the public Flickr photos of other users, then your options were pretty limited on Linux. After trying and failing to get FlickrDown to work under wine, I decided to write my own... in perl :)

The various Flickr API's on CPAN are incomplete with respect to obtaining the list of public/friends photos. I had to use XML::Parser::Lite::Tree::XPath to extract the photos from each Flickr page.

By default, we obtain the largest photo available (orginal -> large -> medium -> small), populate the EXIF comments field with the Flickr photo title, description and url. We verify the file type with File::MimeInfo::Magic and change/add the file extension as necessary.

During testing I received numerous duplicate files which were mostly resolved by adding a rudimentary duplicate file checker.

It should work on just about any platform that Perl does.

Thoughts?

FlickrDownload.pl:
#!/usr/bin/perl

use strict;
use warnings;

use Digest::MD5 qw(md5_hex);
use Config::Simple;
use Image::ExifTool;
use File::MimeInfo::Magic qw(mimetype extensions);
use Flickr::API::Photos;
use Flickr::Person;
use Getopt::Std;
use HTML::Entities ();
use File::MimeInfo::Magic;
use IO::Scalar;
use LWP;
use Tie::File::AsHash;
use XML::Parser::Lite::Tree::XPath;


##############################

my $cfg = new Config::Simple('FlickrDownload.ini');

my $user_name;
my $id;
my $email;
my %arg_options;
my $photo_directory;
my $found;

getopts('e:i:u:', \%arg_options);

$user_name = $arg_options{u};
$id = $arg_options{i};
$email = $arg_options{e};

my $flickr_api_key = $cfg->param('Flickr.API_KEY');
my $flickr_secret = $cfg->param('Flickr.API_SHARED_SECRET');
my $flickr_email = $cfg->param('Flickr.email');
my $flickr_password = $cfg->param('Flickr.password');

##############################
sub decode_html {
  my $string = shift;

  my $new_string = HTML::Entities::decode($string);

  if ($string ne $new_string) {
    $new_string = decode_html($new_string);
  }

  return $new_string;
}
##############################

my $flickr_person = Flickr::Person->new( {
    api_key    => $flickr_api_key,
    email    => $flickr_email,
    password  => $flickr_password
  } );

if ($user_name) {
  $found = $flickr_person->find( { username => $user_name } );
} elsif ($email) {
  $found = $flickr_person->find( { email => $email } );
  $user_name = $flickr_person->username();
  $found = $flickr_person->find( { username => $user_name } );
} elsif ($id) {
  $found = $flickr_person->id( {id => $id} );
  $user_name = $flickr_person->username();
  $found = $flickr_person->find( { username => $user_name } );
}

if ( $found ) {
  my $page_num = 1;
  my $more_pages = 1;
  my $api = $flickr_person->{people_api}->{api};
  
  $photo_directory = $cfg->param('Photos.directory') . "/" . $user_nam
+e;
  $api->{api_secret} = $flickr_secret;

  my $flickr_photos = Flickr::API::Photos->new(
    $flickr_api_key,
    $flickr_email,
    $flickr_password);

  unless (-d $photo_directory) {
    mkdir $photo_directory
      or die ('Unable to create directory "' . $photo_directory . '"' 
+);
  }

  # for determine whether we might be downloading a duplicate, we need
+ a hash with
  #   the MD5 sum & filename.  We tie both hashes to a file in the fli
+ckr user's
  #   directory
  tie my %MD5_HASH, 'Tie::File::AsHash', $photo_directory . "/.md5s", 
+split => ':'
    or die "Problem tying %hash: $!\n";

  tie my %FILES_HASH, 'Tie::File::AsHash', $photo_directory . "/.files
+_md5s", split => ':'
    or die "Problem tying %hash: $!\n";
    
  while ($more_pages) {
    my $response = $api->execute_method('flickr.people.getPublicPhotos
+', { 
      api_key    => $flickr_api_key,
      user_id    => $flickr_person->id,
      per_page  => 500,
      page    => $page_num
      } );
  
    my $xpath = new XML::Parser::Lite::Tree::XPath($response->{tree});
    my @nodes = $xpath->select_nodes('/photos/photo');

    if ($#nodes > 0) { 
      foreach my $node (@nodes) {
        my $original_photo;
        my $photo_id = $node->{attributes}->{id};
        my $photo_hash = $flickr_photos->getInfo($photo_id);
        my $photo_title =
          $photo_hash->{'title'}
            ? $photo_hash->{'title'}
            : "";
        my $description =
          $photo_hash->{'description'}
            ? decode_html( $photo_hash->{'description'} )
            : "";
        
        my %photo_sizes = 
          map { $_->{'label'} => $_ }
            @{ $flickr_photos->getSizes($photo_id)->{sizes} };

        if (exists $photo_sizes{'Original'}) {
          $original_photo = $photo_sizes{'Original'};
        } elsif (exists $photo_sizes{'Large'}) {
          $original_photo = $photo_sizes{'Large'};
        } elsif (exists $photo_sizes{'Medium'}) {
          $original_photo = $photo_sizes{'Medium'};
        } elsif (exists $photo_sizes{'Small'}) {
          $original_photo = $photo_sizes{'Small'};
        } else {
          warn "Unable to find url.  Skipping photo.";
          next;
        }

        printf "name: %s id: %s description: %s\n",
          $photo_title,
          $photo_hash->{'id'},
          $description;

        my $photo_filename = $photo_directory . '/' . $photo_title;
        $photo_filename =~ s/\.\w+$//;

        # Prepopulating the file extension will allow us to eliminate
        #   the vast majority of duplicate images by not downloading
        #   them in the first place.        
        if (exists $photo_hash->{'originalformat'}) {
          my $extension = $photo_hash->{'originalformat'};
          $photo_filename .= "_" . $photo_hash->{'id'} . "." . $extens
+ion;
        } else {
          # if we don't know at this point what format the image file 
+is
          #   without downloading the image, we can assume it is a jpg
          #   because the vast majority of the photos are jpg.
          $photo_filename .= "_" . $photo_hash->{'id'} . ".jpeg";
        }
        
        if (-f $photo_filename && (stat($photo_filename))[7] > 2048) {
          printf "We already have photo %s .. Skipping\n", $photo_titl
+e;
        } else {
          my $FH;
          
          my $request = HTTP::Request->new(GET => $original_photo->{'s
+ource'} );
          my $response = $api->request($request);
          my $md5_sum = md5_hex($response->content);

          # since we have downloaded the photo, let's put the proper f
+ile
          #   extension on it.
          if (my $file_ext = extensions( mimetype(new IO::Scalar \($re
+sponse->content) ) ) ) {
            $photo_filename =~ s/\.\w+$//;
            $photo_filename .= "." . $file_ext;
          }

          if (exists $MD5_HASH{$md5_sum}) {
            printf "We already have photo %s .. Skipping\n", $photo_ti
+tle;
          } else {
            $MD5_HASH{$md5_sum} = $photo_filename;
            $FILES_HASH{$photo_filename} = $md5_sum;
            
            open($FH, ">", $photo_filename)
              or warn ("Unable to write to $photo_filename.\n" );

            binmode $FH;
            print $FH $response->content;
            close $FH;
  
            # We're going to use Image::ExifTool instead of the built 
+in
            #  exif extracted information from Flickr::API::Photos bec
+ause
            #  we want to write to the file.
            my $exifTool = new Image::ExifTool;
            my $info = $exifTool->ImageInfo($photo_filename);
  
            unless ($info->{'DateTimeOriginal'}) {
              if ($photo_hash->{'dates'}->{'taken'}) {
                my $taken_date = $photo_hash->{'dates'}->{'taken'};
                $taken_date =~ s/\-/\:/g;
                $exifTool->SetNewValue("DateTimeOriginal", $taken_date
+);
              }
            }
  
            $exifTool->SetNewValue("Comment", $photo_title . ": " . $d
+escription . " " . $original_photo->{'source'}); 
            my $result = $exifTool->WriteInfo($photo_filename);
          }            
        }
      }

      $page_num++;
    } else {
      $more_pages = undef;
    }
    
    untie %MD5_HASH;
    untie %FILES_HASH;
  }
}
FlickrDownload.ini
[Flickr]
email=jason_froebe@email.org
password=**SuperSecretPassword**
API_KEY=**YOUR Flickr API KEY**
API_SHARED_SECRET=**YOUR SHARED SECRET FROM Flickr**

[Photos]
#  where you want to put the photos
directory=/home/jason/flickr

Comment on FlickrDownload.pl
Select or Download Code
Re: FlickrDownload.pl
by STG (Initiate) on Aug 05, 2008 at 10:17 UTC
    excellent! works great here

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://669034]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others about the Monastery: (10)
As of 2014-12-25 20:43 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (163 votes), past polls