in reply to
Doing pair-exclusivity analysis and building a matrix
This is probably a bit long-winded but does seem to work:
my (@names,%hash,%matrix);
while (my $line = <DATA>) {
chomp($line);
my ($name,$list) = split(m/\s+/,$line);
push(@names,$name);
$hash{$name} = [ split(',',$list) ];
}
for my $name (@names) {
my $countries = $hash{$name};
for my $name2 (@names) {
my $diff = get_diff($countries,$hash{$name2});
push( @{ $matrix{$name} }, $diff );
}
}
sub get_diff {
my ($x,$y) = @_;
my (%union,%isect);
for my $item (@$x,@$y) {
$union{$item}++ && $isect{$item}++;
}
return scalar @$x - scalar keys %isect;
}
print "ID\t" . join("\t",@names) . "\n";
for my $name (@names) {
print "$name\t" . join("\t", @{ $matrix{$name} } ) . "\n";
}
__DATA__
Name1 USA,Canada,Yemen
Name2 Canada,Portugal,India
Name3 China,HongKong,Canada
Name4 London,Amsterdam,Ireland,USA
Name5 India,USA,Canada
Output:
ID Name1 Name2 Name3 Name4 Name5
Name1 0 2 2 2 1
Name2 2 0 2 3 1
Name3 2 2 0 3 2
Name4 3 4 4 0 3
Name5 1 1 2 2 0