$ cp tshirt.jpg duplicate.image $ dups.pl I've MD5'd 191 files to 191 checksums ./3.jpg ./image00111.jpg ./4.jpg ./image00222.jpg ./duplicate.image ./tshirt.jpg $ dups.pl -q image ./duplicate.image ./image00111.jpg ./image00222.jpg ./image00222surf.jpg ./image00333.jpg ./image00554.jpg ./image00665.jpg ./image00776.jpg ./image00887.jpg ./image009.jpg ./image00998.jpg ./image010109.jpg ./image011.jpg ./image0121210.jpg $ #### #!/pro/bin/perl use strict; use warnings; use Digest::MD5 qw( md5_hex ); use DB_File; use File::Find; use Getopt::Long qw(:config bundling nopermute); my $opt_q = 0; # Query the database GetOptions ( "q" => \$opt_q, ) or die "usage: dups.pl [-q]\n"; my %sum; tie my %md5, "DB_File", "dups.md5"; if ($opt_q) { my @db = sort keys %md5; untie %md5; foreach my $pat (@ARGV) { print "$_\n" for grep m/$pat/i => @db; } exit; } my $nfile = 0; find (sub { if (-d and -f "$_/dups.md5") { tie my %d5, "DB_File", "$_/dups.md5"; foreach my $f (keys %d5) { $md5{"$File::Find::name/$f"} //= $d5{$f}; } untie %d5; } -f or return; (my $f = $File::Find::name) =~ s:^_new/::; printf STDERR " %6d %-70.70s\r", ++$nfile, $f; if (exists $md5{$f}) { push @{$sum{$md5{$f}}}, $f; return; } local $/; open my $p, "< $_" or die "$f: $!\n"; my $sum = md5_hex (<$p>); push @{$sum{$md5{$f} = $sum}}, $f; }, sort glob "*"); print STDERR "I've MD5'd $nfile files to ", scalar keys %md5, " checksums\n"; open STDOUT, "| sort"; foreach my $r (values %sum) { my @p = @$r; @p > 1 or next; $p[0] =~ m{(?:^|/)\d+/} and @p = map { $_->[0] } sort { $a->[1] <=> $b->[1] or $a->[2] <=> $b->[2] or $a->[0] cmp $b->[0] } map { [ $_, (m/(\d+)\b/g), 0, 0, 0 ] } @p; print join "\t", @p; print "\n"; } close STDOUT;