Beefy Boxes and Bandwidth Generously Provided by pair Networks
No such thing as a small change
 
PerlMonks  

Pic Thresher

by ichimunki (Priest)
on Feb 24, 2001 at 08:34 UTC ( #60607=sourcecode: print w/ replies, xml ) Need Help??

Category: GUI Programming
Author/Contact Info michael@andsoforth.com
Description: Assists in rapidly sorting and "threshing" a heap of JPGs. Requires an RDBMS (set to postgreSQL, but easily changed) and Tk.

Images and RDBMS not included. :)
#!/usr/bin/perl -w
use strict;
use Carp;

=head1 picthresher

C<picthresher.pl>

=head2 Info

Author: Michael Libby

Contact: michael@andsoforth.com

Copyright: 2001, And So Forth Internet Services

This is Free software under the GPL, see http://www.gnu.org/copyleft/ 
+for info.

=head2 Summary

This program takes jpg files from a specified directory and
assists the user in cataloging and storing the images using a SQL
database to keep everything tidy. The program assists in preventing
duplicates by assigning unique, but derived IDs to each image based on
an MD5 hash (it is possible that these will not be unique, but in
practice, a duplicate is unlikely).

The user can classify the image, designate it for archiving, or ban
the image. The program is built around the notion of harvesting images
from a variety of sources and needing to specify which ones should
stay "live" (i.e. on the hard-drive) and which ones should be set up
for writing to CD or other archive media. The program will
automatically delete duplicate and banned images (don't worry, it will
let you watch while it does this).

The program will also create the directories to correspond to the sort
categories if needed.

=head2 Preparation

You will need an RDBMS with a table for storing picture info. The
implementation here works with postgreSQL, but I imagine that any DB
for which there is a DBD::* module would work. The table needs to have
the following fields (you're on your own for creating this):

 hex_id  char[32]  * stores the unique key. recommend building 
                     an index on this field

 height  int       * the height of the image
 width   int       * the width of the image

 status  varchar   * if you want to use a more efficient char[x] 
                     type, make sure the code strips trailing spaces

 times_seen int    * not yet used, intended to store number of times
                     this image has been run through the thresher
 location  varchar * the path to the file
 names  varchar    * all names this file has had when found by
                     this program. not implemented.

You will need to customize a few global variables to work with your
particular installation.

=cut
;

############################################################
#
# External Modules

use Tk;
use Tk::JPEG;
use DBI;
use DBD::Pg;
use Digest::MD5;
use File::Find;
use File::Copy;
use File::Basename;

############################################################
#
# Prepare Globals

my $ARCH_DIR = '/home/user/images';
my $SRC_DIR = "$ARCH_DIR/thresh_pile";
my $PREP_DIR = "$ARCH_DIR/cd_prep";
my $TOP_DIR = "$ARCH_DIR/favorites";

my @CATEGORIES = ( '01_category_one',
                   '02_category_two',
                   '03_category_three',
                   '04_category_four'
                   );

my %DB_VAR = ( dbname     => 'database_name',
               host       => 'machine.domain',
               user       => 'username',
               pass       => 'password',
               table      => 'table_name',
               key        => 'hex_id',  #must match this column in DB
               );

my $DB_HANDLE = '';#_open_DB_connection();

my %IMAGE;

############################################################
#
# Prepare the main window

my $MW = MainWindow->new;
$MW->title( "Pic Thresher" );
_maximize_MW( $MW );

my $Menubar = $MW->Menu();
$MW->configure( -menu => $Menubar );
_fill_menubar( $Menubar );

my $Filename = get_next_jpg();
my $Filename_LINE = $MW->
    Label( -textvariable => \$Filename )->
    pack( -side => 'top', -anchor => 'n', -fill => 'x' );

my $Message = 'Initializing';
my $Message_Line = $MW->
    Label( -textvariable => \$Message )->
    pack( -side => 'top', -anchor => 'n', -fill => 'x' );

my $Image = $MW->
    Label()->
    pack( -side => 'top',
          -anchor => 'center',
          -fill => 'both',
          -expand => 1,
          );

my $Photo = $Image->
    Photo( '-format' => 'jpeg',
           -file    => $Filename );

my $Sized_Photo;
resize_image();

MainLoop();

############################################################
#
# Tk-related initialization routines

sub _maximize_MW {
    my $mw = shift;
    my $max_width = $mw->screenwidth()-10;
    my $max_height = $mw->screenheight()-55;
    my $geostring = join '', $max_width, 'x', $max_height, '+0+0';
    $mw->geometry($geostring);
}

sub _fill_menubar {
    my $menubar = shift;

    my $file_menu = $menubar->cascade( -label => '~Thresher', -tearoff
+ => 0 );

    $file_menu->command( -label => 'Create Archive Dirs', -command => 
+\&create_archives );
    $file_menu->command( -label => 'Quit', -command => \&quit );
    
    my $archive_menu = $menubar->cascade( -label => '~Archive', -tearo
+ff => 0 );
    my $highlight_menu = $menubar->cascade( -label => '~Favorites', -t
+earoff =>0 );
    foreach my $category ( @CATEGORIES ) {
        $archive_menu->command( -label => $category, -command => [\&ar
+chive_pic, "$category"] );
        $highlight_menu->command( -label => $category, -command => [\&
+top_pic, "$category"] );
    }
    
    my $ban_menu = $menubar->cascade( -label => '~Ban', -tearoff => 0 
+);
    $ban_menu->command( -label => 'Ban Image', -command => \&ban_pic )
+;
}

############################################################
#
# Program flow controls

sub _open_DB_connection {
    my $dbh = DBI->

        connect( "DBI:Pg:dbname=$DB_VAR{'dbname'};" .
                 "host=$DB_VAR{'host'};",
                 $DB_VAR{'user'}, $DB_VAR{'pass'},
                 { RaiseError => 1, AutoCommit => 1}
                 ) or
                     confess( "Unable to connect to DB: $!\n" );   
    
    return $dbh;
}

sub quit { 
    my $msg = shift || 'Done threshing.';
    print "$msg\n";

    $DB_HANDLE->disconnect() or
        confess( "Error disconnecting DB: $!\n" );

    exit;
}

sub ban_pic {
    populate_image_data();
    store_image_data();
    change_image_status( 'banned' );
    delete_file( $Filename );
    display_next_image();
}

sub top_pic {
    my $category = shift;
    archive_pic( $category, 'favorites' );
}

sub archive_pic {
    my $category = shift;
    my $status = shift || undef;

    my $newpath = store_pic( $category, $PREP_DIR );
    if( $status && $status eq 'favorites') {
        store_pic( $category, $TOP_DIR );
    }

    populate_image_data( $newpath );
    store_image_data();
    change_image_status( $status ) if $status;

    delete_file( $Filename );
    display_next_image();
}

sub store_pic {
    my $category = shift;
    my $store_dir = shift;

    my $filename = basename( $Filename );
    my $newpath = "$store_dir/$category/$filename";
    $newpath = check_filename( $newpath );

    copy( $Filename, $newpath ) or
        confess( "cannot copy $Filename" );
    return $newpath;
}


############################################################
#
# File Handlers

sub check_filename {
    my $newpath = shift;
    $newpath =~ s/'//g;
    while( stat $newpath ) {
        my( $base, $dir, $ext ) = fileparse( $newpath, '\..*?' );
        if( $base =~ m/^\d$/){
            $base = "thresher_$base";
            $newpath = "$dir/$base.$ext";
        }
        elsif ( $base =~ m/\w.+\d$/ ) {
            $base =~ m/(.+)(\d+)$/;
            my( $basetext, $basenum ) = ( $1, $2 );
            $basenum = $basenum + 1;
            $newpath = "$dir/$basetext$basenum.$ext";
        }
        else {
            $base = $base . "_001";
            $newpath = "$dir/$base.$ext";
        }
    }
    return $newpath;
}

sub get_next_jpg { 
    opendir( JPG, "$SRC_DIR" ) or
        confess( "Unable to open $SRC_DIR: $!\n" );

    my @filenames = readdir( JPG );

    closedir( JPG ) or 
        confess( "Unable to close $SRC_DIR: $!\n" );

    foreach my $filename (@filenames) {
        if( $filename =~ m;jpe*g$;i ) {
            #Exit as soon as a valid file is found
            return "$SRC_DIR/$filename";
        }
    }

    #else no JPG files in $SRC_DIR
    quit( "Out of images to thresh" );
} 

sub create_archives {
    #if these directories exist, this should not overwrite them
    mkdir( $ARCH_DIR );
    mkdir( $PREP_DIR );
    mkdir( $TOP_DIR );
    foreach my $new_dir ( @CATEGORIES ) {
        mkdir( "$PREP_DIR/$new_dir" );
        mkdir( "$TOP_DIR/$new_dir" );
    }
}

sub delete_file {
    #this routine needs to interact with the database someday
    my $filename = shift;
    unlink( $filename ) or
        confess( "Cannot unlink $filename: $!\n" );
    $Message = "Deleted $filename";
    $MW->update();
}

sub create_hex_id {
    my $filename = shift;

    my $md5 = Digest::MD5->new;

    open( IMAGE, "$filename" ) or
        confess( "Unable to open $filename: $!\n" );
    $md5->add( <IMAGE> );
    close( IMAGE ) or
        confess( "Unable to close $filename: $!\n" );

    my $hex = $md5->hexdigest;
    return $hex;
}

############################################################
#
# Image Handlers

sub clear_IMAGE_data {
    my %temp_hash = ( 'hex_id'     => '',
                      'height'     => '',
                      'width'      => '',
                      'status'     => '',
                      'times_seen' => '',
                      'location'   => '',
                      'names'      => ''
                      );
    return %temp_hash;
}

sub display_next_image {
    $Filename = get_next_jpg;

    $Photo->blank;
    $Photo->configure( -file => $Filename );
    $Photo->read( $Filename );

    resize_image();

    #$Message = 'Image Loaded';
    
    verify_image();
}

sub verify_image {
    my $hex_id = create_hex_id( $Filename );
    if( get_DB_entry( $hex_id ) ) {
        $Message = 'Deleting duplicate image';
        delete_file( $Filename );
        display_next_image();
    }
}

sub populate_image_data {
    my $location = shift || $Filename;

    # the ' will mess up the SQL otherwise
    # and should only be here if the file is being banned
    # as it should have been eliminated earlier for stored files
    $location =~ s/'//g;

    my $hex_id = create_hex_id( $Filename );

    %IMAGE = ( 'hex_id'     => $hex_id,
               'height'     => $Photo->height(),
               'width'      => $Photo->width(),
               'status'     => 'archived',
               'times_seen' => '1',
               'location'   => $location,
               'names'      => basename( $location )
               );
}

sub resize_image {
    my ($img_w, $img_h) = ($Photo->width, $Photo->height);  
    my $max_width = $MW->screenwidth();
    my $max_height = $MW->screenheight() - 20;
    my $xfactor = $img_w / $max_width;
    my $yfactor = $img_h / $max_height;
    my $intfactor = $xfactor > $yfactor ? int($xfactor) : int($yfactor
+);
    $intfactor += 1;

    unless( $Sized_Photo) {
        $Sized_Photo = $Image->
            Photo( '-format' => 'jpeg',
                   -file    => $Filename );
    }
    $Sized_Photo->blank;
    $Sized_Photo->copy( $Photo, -subsample => $intfactor, -shrink);

    $Message = ( $Sized_Photo->width == $Photo->width ) ? 
        'image loaded: full-size' : "image loaded: reduced by $intfact
+or";

    $Image->configure( -image => $Sized_Photo );
}

############################################################
#
# Database Handlers

sub exec_SQL {
    my $sql = shift;
    #print "Trying:\n$sql\n\n";
    my $sth = $DB_HANDLE->prepare( $sql );
    my $rv = $sth->execute() or
        confess( "Cannot execute SQL : $!\n" );
    return $sth;
}

sub get_DB_entry {
    my $key_id = shift or
        confess( "Missing parameter: $!\n" );
    my $sql = "SELECT * FROM $DB_VAR{'table'} WHERE $DB_VAR{'key'} = '
+$key_id';";
    my $sth = exec_SQL( $sql );
    my @record = $sth->fetchrow_array;
    return @record if $record[0];
    return 0;
}

sub store_image_data {
    my $sql = "INSERT INTO $DB_VAR{'table'} values ( " .
              "'$IMAGE{'hex_id'}', " .
              "$IMAGE{'height'}, " .
              "$IMAGE{'width'}, "  .
              "'$IMAGE{'status'}', " .
              "$IMAGE{'times_seen'}, " .
              "'$IMAGE{'location'}', " .
              "'$IMAGE{'names'}' );";
    my $sth = exec_SQL( $sql );
}

sub change_image_status {
    my $status = shift;

    my $sql = "UPDATE $DB_VAR{'table'} SET status = '$status' " .
        "WHERE hex_id = '$IMAGE{'hex_id'}';";

    my $sth = exec_SQL( $sql );
}

Comment on Pic Thresher
Download Code

Back to Code Catacombs

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://60607]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others musing on the Monastery: (9)
As of 2014-12-27 19:42 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (177 votes), past polls