use strict; use Data::Dumper; my %Gene_overlap; my %Pos_overlap; open (MYFILE,$ARGV[0]); my @file2 =; close MYFILE; open (MYFILE,$ARGV[1]); my @file1 =; close MYFILE; foreach(@file2){ chomp; my @s = split (/\t/, $_); #Splitting the Validations file for gene name and amino acid push(@{$Pos_overlap{$s[5]}{$s[0]}{$s[2]}},$s[2]); # pushes all sample/postion/gene/ combos into a hash } foreach(@file1){ chomp; my @s = split (/\t/, $_); # Splitting the file to get the sample/ position / gene if (exists($Pos_overlap{$s[5]})) { # Check to see if this gene is also found in the file2 print $_ ."\t" . (keys %{$Pos_overlap{$s[5]}}); # Prints how many times the exact combination of Gene and a unique sample is seen (but samples identity across files does not matter, just how many unique ones there are ### This is the part I can't get to work###### if (exists($Pos_overlap{$s[5]}{$s[2]})) { # Checks if the exact variant/position combination is present in both files # print "\t" . (keys %{$Pos_overlap{$s[5]}{$s[2]}}) . "\n"; #prints how many times variant seen or would also be acceptable to just print a "1", saying that it does exist across both files } else {print "\t0\n";} # prints 0 if no gene/position found } else {print $_ . "\t0\t0\n";} #if no gene overlap