...
37,411,688 bytes in duplicated files
real 5m1.753s
user 1m48.760s
sys 0m31.010s
####
Duplicates: 11
Bytes: 37411688
real 17m1.262s
user 6m56.060s
sys 1m59.830s
##
##
real 6m25.131s
user 2m42.310s
sys 0m32.450s
##
##
37,411,688 bytes in duplicated files
real 0m48.545s
user 0m2.150s
sys 0m1.460s
##
##
15,073,517 bytes in duplicated files
real 0m9.745s
user 0m2.610s
sys 0m1.220s
##
##
Duplicates: 998
Bytes: 15073517
real 0m51.197s
user 0m17.520s
sys 0m6.700s
##
##
real 0m18.332s
user 0m8.080s
sys 0m5.270s
##
##
15,069,331 bytes in duplicated files
real 0m12.924s
user 0m3.110s
sys 0m2.510s
##
##
#!/usr/bin/perl -w
# Usage: ./fdupes.pl
use strict;
use Term::ReadKey;
use File::Find;
# testing - 0 for interactive mode, 1 to skip all deletion etc
my $testing = 0;
# skip files smaller than 100 bytes. Set to zero if you like...
my $minsize = 100;
my $filecount = my $bytecount = my $fileschecked = my $wasted = 0;
my %files = ();
&usage unless (@ARGV);
sub wanted {
return unless -f;
my $filesize = (stat($_))[7];
$bytecount += $filesize;
return unless $filesize > $minsize; # skip small files
$filecount++;
push @{$files{$filesize}}, $File::Find::name;
}
find(\&wanted, $ARGV[0] || ".");
# update progress display 1000 times maximum
my $update_period = int($filecount/1000)+1;
if ($fileschecked % $update_period == 0) {
print "Progress: $fileschecked/$filecount\r";
# note \r does carriage return, but NO LINE FEED
# for progress display
}
my @dupesets;
# list of lists - @{$dupesets[0]} = (file1, file2)
# where file1 and file2 are dupes
foreach my $size (keys %files) {
my @entries = @{$files{$size}};
my $samesizecount = scalar @entries;
if (@{$files{$size}} == 1) { # unique size
$fileschecked++;
next;
}
# duplicates by file size.. Check if files are the same
while (my $base = shift @entries) {
# get first entry in list under filesize
my @dupes = ();
my $count = 0;
while ($count <= $#entries) {
# go through all @entries
my $compare = $entries[$count];
if (&same($base, $compare)) {
# remove "compare" from list so it can't be used
# on next run
splice(@entries, $count,1);
# removed "compare" from list - update progress
if (++$fileschecked % $update_period == 0) {
print "Progress: $fileschecked/$filecount\r";
}
if (@dupes) {
# already have some dupes - just add duplicate
# #n to list
push @dupes, $compare;
$wasted += $size;
} else {
# no dupes yet - include base file and duplicate
# #1 in list
push @dupes, ($base, $compare);
$wasted += $size;
}
} else {
$count++;
# only increase counter if not a dupe - note splice
# will break $array[$position] loop otherwise
}
}
if (@dupes) {
push @dupesets, \@dupes;
}
# "base" file removed from list of files to check - update
# progress meter
if (++$fileschecked % $update_period == 0) {
print "Progress: $fileschecked/$filecount\r";
}
}
}
if (@dupesets) {
my @deletelist = ();
# at least one set of duplicates exists
# number of sets of duplicates
my $dupesetcount = scalar(@dupesets);
my $dupesetcounter = 0;
foreach my $setref (@dupesets) {
if ($testing) {
print @$setref, "\n";
next;
}
$dupesetcounter++;
my @dupes = @$setref;
print "Duplicates found ($dupesetcounter / $dupesetcount)",
"... Should I keep...\n";
my $count = 0;
# print up list of options of which file to keep
while ($count <= $#dupes) { # go through all @entries
my $entry = $dupes[$count];
print $count + 1, " : $entry\n";
$count++;
}
# alternative options - keep all files, skip to end
print "0: All\n";
print "A: Skip all remaining duplicates\n";
# use ReadKey to get user input
ReadMode 4; # Turn off controls keys
my $key = '';
while (not defined ($key = ReadKey(-1))) {
# No key yet
}
ReadMode 0; # Reset tty mode before exiting
if ($key eq 'A') {
# skip any remaining dupes and get to deletion bit
last;
}
# not a number or 'A' - default to zero (ie keep all files)
$key = '0' unless ($key =~ /^\d+$/);
if ($key == 0) { # ALL - don't delete anything
#print "you chose: ALL\n";
} elsif (defined $dupes[$key-1]) {
print "you chose: ", $dupes[$key-1], "\n";
my @list_to_delete = @dupes;
# remove file to keep from list
splice(@list_to_delete, $key-1, 1);
# add rest to deletelist
push @deletelist, @list_to_delete;
} else {
#print "you chose: invalid number... (nothing will",
# " be deleted)\n";
}
print "\n";
}
# confirm deletion if any files are needing deleting
if (@deletelist) {
print "\n------------------------\n";
print "list of files to delete:\n";
foreach (@deletelist) {
print "$_\n";
}
print "\nAre you *sure* you want to delete all these files?",
" (Y/N)\n";
ReadMode 4; # Turn off controls keys
my $key = '';
while (not defined ($key = ReadKey(-1))) {
# No key yet
}
ReadMode 0; # Reset tty mode before exiting
if (lc($key) eq 'y') {
print "deleting\n";
unlink @deletelist;
} else {
print "wussing out\n";
}
}
1 while $wasted =~ s/^([-+]?\d+)(\d{3})/$1,$2/;
print "$wasted bytes in duplicated files\n";
}
# routine to check equivalence in files. pass 1 checks first
# "line" of file (up to \n char), rest of file checked if 1st
# line matches
sub same {
local($a, $b) = @_;
open(A, $a) || die;
open(B, $b) || die;
if ( ne ) { # FIRST LINE is not the same
return 0; # not duplicates
} else { # try WHOLE FILE
local $/ = undef;
return eq ;
}
}
sub usage {
print "Usage: $0 \n";
exit;
}