#!/usr/bin/env perl # References: # http://drdobbs.com/web-development/184416070 use strict; use warnings; use File::Find; use lib qw(lib); use FileDups; use Data::Dumper; # Hash of => [array of [array]], [array of objects] my (%dup, %sizes, @object, $number_files, $number_size_dups); my $max_size = 99999999; # Size above of whitch md5 will not be calculated my $return = "Press return to continue \n\n"; my $line = "-"x70 . "\n"; while (my $dir = shift @ARGV) { # Find and classify files die "\"$dir\" is not a directory. Give me a directory to search\n" unless (-d "$dir"); File::Find::find (\&wanted,"$dir"); } print "\n"; foreach (@object) { # Calculates md5 for files with equal size if ($sizes{$_->size} == "1") { $number_size_dups += 1; print "$number_size_dups Files with the same size \r"; $_->dupe(1); # The object has another object with the same size $_->md5; # Calculates md5 } } foreach (@object) { # Creates a hash of md5 values if ($_->dupe == 1) { # for files with the same size if (exists $dup{$_->md5}) { push @{$dup{$_->md5}}, [$_->size, $_->name, $_->pathname]; } else { $dup{$_->md5} = [ [$_->size, $_->name, $_->pathname] ]; } } } print "\n\nDuplicated files\n $line $return"; my $pausa4 = <>; foreach (sort keys %dup) { # sort hash by md5sum if ($#{$dup{$_}} > 0) # $_ = keys { # if we have more than 1 array whithin the same hash printf("\n%8s %10.10s %s\n", "Size", "Name", "Pathname"); foreach ( @{$dup{$_}} ) # $_ = keys, $dupes{keys} = list of references (scalars) { # iterate trough the first dimension of the array printf("%8d %10.10s %s\n",@{$_}); # dereference reference to array } } } my $r1 = &list_files("Big files","big",@object); # List big files my $r2 = &list_files("Unread files","unread",@object); # List unread files sub wanted { return unless (-f $_); my $file = FileDups->new(name => $_, pathname => $File::Find::name, max_size => $max_size); $number_files += 1; print "$number_files Files seen\r"; if ($file->size == $max_size) { # Identifies big files $sizes{$file->size} = "0"; # We do not check md5 for big files } elsif (exists $sizes{$file->size}) { # There are more then one file with this size $sizes{$file->size} = "1"; } else { $sizes{$file->size} = "0"; # This is a new size value, not duplicated } push @object, $file; # Puts the object in the @object array } sub list_files { # List objects according to criteria: my ($title,$criteria,@object) = @_; # (a) big files; (b) unread files print "\n \n $title \n" . $line; my $pausa = <>; foreach (@object) { if ($_->$criteria) { printf(" %10.10s %s\n",$_->name,$_->pathname); } } print $line; }