open (IN,$tax2locus_file);
while(<IN>){
my($taxid,$locus)=split(/\t/,$_);
$tax2loc{$locus}=$taxid;
}
close(IN);
print "there are\t".scalar(keys %tax2loc)."\tlocus_ids as key in hash\n";

############### Now read in sharedTab file with pairwise overlap info

my $sharedTab_file=$ARGV[0];
my @columns; my $prophageA; my $prophageB;
my $outfile="$sharedTab_file.hostinfo";
my $hostA; my $PFnumA;
my $hostB; my $PFnumB;
my $regex; my $matching_key;
my $taxidA; my $taxidB;

open (OUT,">$outfile");
open(IN,$sharedTab_file);

print OUT "#prophageA\tprophageB\thostA\ttaxidA\thostB\ttaxidB\tjacc\n";
while(<IN>){
chomp;
next if (/^#/); # ignore comments
@columns=split(/\t/,$_);
$prophageA=$columns[0];
($hostA,$PFnumA)=split(/\./,$prophageA);
if ($hostA =~ /^NZ/){ ## for wgs genomes just match first 7 characters as only NZ_XXXX000000 are in tax2locus
        my $hostA=substr $hostA, 0, 7;
}
$regex=qr/$hostA/;
$matching_key=grep {  $_ =~ /$regex/ } keys %tax2loc;
$taxidA=$tax2loc{$matching_key};
$prophageB=$columns[1];
($hostB,$PFnumB)=split(/\./,$prophageB);
if ($hostB =~ /^NZ/){ ## for wgs genomes just match first 7 characters as only NZ_XXXX000000 are in tax2locus
        my $hostB=substr $hostB, 0, 7;
}
$regex=qr/$hostB/;
$matching_key=grep { $_ =~ /$regex/  } keys %tax2loc;
$taxidB=$tax2loc{$matching_key};

my $jacc=$columns[5];
print OUT join("\t",$prophageA,$prophageB,$hostA,$taxidA,$hostB,$taxidB,$jacc)."\n";