use strict; use warnings; use Digest::MD5 qw( md5_hex ); use MLDBM; use Fcntl; tie my %o, 'MLDBM', 'tempdb', O_CREAT|O_RDWR, 0640 or die $!; # First file we populate the hash. process_file( 'infile1.txt', \%o, sub { $_[0]->{$_[1]} = [ $., $_[2] ]; } ); # Second file we check for collisions. process_file( 'infile2.txt', \%o, sub { my( $tied, $hash, $line ) = @_; print "\nCollision: infile1.txt line $tied->{$hash}->[0]:\n", "\t($tied->{$hash}->[1])\n", "-- collides with: infile2.txt line $.:\n", "\t($line)\n\n" if exists $tied->{$hash}; } ); END { untie %o; unlink glob 'tempdb.*'; } # RAII. sub process_file { my( $filename, $tied, $code ) = @_; open my $infh, '<', $filename or die $!; while( my $line = <$infh> ) { my ( $wanted ) = $line =~ m/((?:[^,]*,){10})/; next unless length $wanted; chomp $line; my $hash = md5_hex( $wanted ); $code->($tied, $hash, $line); } close $infh; }