in reply to
file delta detection
All assuming that the key value is unique!
Sorry about the long lines ...
#!
# Just For A Lark
use strict;
use warnings;
use Digest::MD5;
# Hash for X
my %X_h;
{
open(my $IN,'<','X')
or die "Can't open 'X'! $!";
# Populate hash with third field and SizeAndMD5
while (<$IN>) {
chomp;
$X_h{(split('|'))[2]}=SizeAndMD5($_);
};
close($IN)
or die "Can't close 'X'! $!";
};
### %X_h
# Processing Y
# Hash for Y
my %Y_h;
{
open(my $IN,'<','Y')
or die "Can't open 'Y'! $!";
open(my $OUT,'>','Y.new')
or die "Can't open 'Y.new'! $!";
# Populate hash with third field and SizeAndMD5
while (<$IN>) {
chomp;
$Y_h{my $key=(split('|'))[2]}=SizeAndMD5($_);
unless (exists $X_h{$key}) { # No key in X so append a "D"
print $OUT "$_|D\n";
}
elsif ($Y_h{$key} ne $X_h{$key}) { # The corresponding record
+in X is different
print $OUT "$_|U\n";
}
else { # The corresponding record in X is the same
print $OUT "$_|\n";
};
};
close($OUT)
or die "Can't close 'Y.new'! $!";
close($IN)
or die "Can't close 'Y'! $!";
};
### %Y_h
# Re-processing X
{
open(my $IN,'<','X')
or die "Can't open 'X'! $!";
open(my $OUT,'>','X.new')
or die "Can't open 'X.new'! $!";
# Populate hash with third field and SizeAndMD5
while (<$IN>) {
chomp;
my $key=(split('|'))[2];
unless (exists $Y_h{$key}) { # No key in X so append a "I"
print $OUT "$_|I\n";
}
elsif ($X_h{$key} ne $Y_h{$key}) { # The corresponding record
+in Y is different
print $OUT "$_|U\n";
}
else { # The corresponding record in Y is the same
print $OUT "$_|\n";
};
};
close($OUT)
or die "Can't close 'X.new'! $!";
close($IN)
or die "Can't close 'X'! $!";
};
exit;
# Returns Size and MD5 of the string as "%8.8lx%32.32s"
sub SizeAndMD5 {
return sprintf("%8.8lx%32.32s",length($_[0]),Digest::MD5::md5_
+hex($_[0]));
};