I finally came up with a solution. you can choose which field to diff, and then print a diff report based on that field
use strict;
use warnings;
if ($#ARGV < 1) { die "usage csvout_compare.pl <csvoutout1> <csvout2>
+<csvoutout3>.....\n"};
my $f=0;
my %allhash;
my @indexes_to_print;
my %data;
my @line;
my $index;
foreach my $files (@ARGV) {
$f++;
open FILE,$files or die "Cannot open $files\n";
print "Opened $files \n";
while (<FILE>) {
next if (/csvout_ID/);
chomp;
@line = split(/,/,$_);
$index = $line[0].".".$line[1];
$allhash{$index} = 1;
$data{$f}{$index} = "$_";
}
close FILE;
}
foreach $index (keys %allhash) {
my @temparray;
my $flag = 0;
#First lets take all those indexes which are not in all files
foreach my $dataset (keys %data) {
if (!(exists $data{$dataset}{$index})) {
push (@indexes_to_print,$index);
#delete $allhash{$index};
$flag = 1;
}
}
next if ($flag eq 1);
#Now we take those indexes where the Final result is different
for (my $i=1;$i<=$f;$i++) {
my @temp = split(/,/,$data{$i}{$index});
$temparray[$i] = $temp[2];
if ($i>1) {
if ($temparray[$i] !~ $temparray[$i-1]) {
push (@indexes_to_print,$index);
}
}
}
}
#uniquify @indexes_to_print
#
my %seen = map { $_, 1 } @indexes_to_print;
my @unique_indexes = keys %seen;
foreach my $index (@unique_indexes) {
print "$index";
for (my $i=1;$i<=$f;$i++) {
if (exists $data{$i}{$index}) {
my @temp = split(/,/,$data{$i}{$index});
#print ",$i,$temp[2]";
print ",$i";
foreach (my $j=2;$j<=10;$j++) {
if (exists ($temp[$j])) {
print ",$temp[$j]";
} else {
print ",";
}
}
} else {
print ",$i,NA,,,,,,,,";
}
}
print "\n";
}