#! /usr/bin/perl -w use strict; use File::Find; use Digest::MD5; my %digest; my $total_bytes = 0; my $dups = 0; sub wanted { return unless -f $_; my $bytes = -s _; return unless $bytes; if( !open IN, $_ ) { print "Cannot open $_ for input: $!\n"; return; } my $md5 = Digest::MD5->new; my $d = $md5->addfile( *IN )->digest; close IN; if( defined $digest{$d} ) { print "$bytes\t$digest{$d}\t$File::Find::name\n"; $total_bytes += $bytes; ++$dups; } else { $digest{$d} = $File::Find::name; } } foreach my $d ( @ARGV ) { print "=== directory $d\n"; find \&wanted, $d; } printf "Statistics: Duplicates: %12d Bytes: %12d KBytes: %12d MBytes: %12d GBytes: %12d\n", $dups, $total_bytes, $total_bytes / (1024**1), $total_bytes / (1024**2), $total_bytes / (1024**3);