The code lost me at bit...
I would try to avoid this $1, $2 stuff..
Use match global or split and list assignment..
Anyway here are some hints on how to post runnable code - you will get better answers, the easier the code is to run and the more clear you explain why it is not producing the right result.
#!/usr/bin/perl -w
use strict;
use Data::Dumper;
# this is a way to specify short files within a Perl
# program.
my $uniprot =
"Q6GZX4 ORFNames=FV3-001R ;PF04947
Q6GZX3 ORFNames=FV3-002L ;PF03003
Q197F8 ORFNames=IIV3-002R
Q197F7 ORFNames=IIV3-003L
Q6GZX2 ORFNames=FV3-003R
Q6GZX1 ORFNames=FV3-004R
Q197F5 ORFNames=IIV3-005L
";
my $Activator =
"Q6GZX4 | PF04947.9
Q96355 | PF01486.12 PF00319.13
Q96356 | PF01486.12 PF00319.13
Q39371 | PF01486.12 PF00319.13
";
my $anti_oxidant =
"0EYG3 | PF10262.4
E7QVU5 | PF10417.4 PF00578.16
D1JAS4 | PF10417.4 PF00578.16
";
my $toxin =
"C7T183 | PF02950.12
C7T1P5 | PF02950.12
E2E4E4 | PF00918.12
A2PU44 | PF01375.12
";
# Open the input "files"
# A Perl scalar can be opened for reading
# and this how to do it...
open (my $UNIPROT_IN, '<', \$uniprot)
or die ("cannot open uniprot for reading ", "$!\n");
open (my $ACTIVATOR_IN, '<', \$Activator)
or die ("cannot open activator for reading ", "$!\n");
open (my $ANTIOX_IN, '<', \$anti_oxidant)
or die ("cannot open anti_oxidant for reading ", "$!\n");
open (my $TOXIN_IN, '<', \$toxin)
or die ("cannot open toxin for reading ", "$!\n");
# some local output "files"..
# use "real files" in the actual application
my $activator_out;
my $antiox_out;
my $toxin_out;
# Open the output "files"
#
# A Perl scalar can be opened for file system write
# and this is how to do that...
open (my $ACTIVATOR_OUT, '>', \$activator_out)
or die ("cannot open activator_out for write! ", "$!\n");
open (my $ANTIOX_OUT, '>', \$antiox_out)
or die ("cannot open antiox_out for write! ", "$!\n");
open (my $TOXIN_OUT, '>', \$toxin_out)
or die ("cannot open toxin_out for write! ", "$!\n");
# I personally prefer all caps for file handles
# If you don't, then I'm fine with that.
# This part seems odd, because you only get the first
# PF value...I just guessing that these other PF values do
# mean something?
my %activ = map { /(.+)\s+\|\s+(PF.{5})/; $1 => $2 } <$ACTIVATOR_IN>;
my %antiox = map { /(.+)\s+\|\s+(PF.{5})/; $1 => $2 } <$ANTIOX_IN>;
my %toxin = map { /(.+)\s+\|\s+(PF.{5})/; $1 => $2 } <$TOXIN_IN>;
#
# This a way to print the contents of a Perl data stucture..
# Data::Dumper is a standard part of Perl.
#
print "dumping the active hash...\n";
print Dumper \%activ;
print "dumping the anitox hash...\n";
print Dumper \%antiox;
print "dumping the toxin hash...\n";
print Dumper \%toxin;
print "###########\n";
#perhaps you meant something like this???:
print "\nAnother Possible Data Structure...\n";
seek ($ACTIVATOR_IN, 0, 0); # needed to "rewind the file"
seek ($ANTIOX_IN, 0,0); # needed to "rewind the file"
seek ($TOXIN_IN,0, 0); # needed to "rewind the file"
# There are many ways to do the regex or use split...
# but it seems to me that somehow you need all of the
# PF values, not just the first one???
my %activ_2 = map {my ($key, @pfvalues) = /([\w\.]+)/g;
$key, [@pfvalues];} <$ACTIVATOR_IN>;
my %antiox_2 = map {my ($key, @pfvalues) = /([\w\.]+)/g;
$key, [@pfvalues];} <$ANTIOX_IN>;
my %toxin_2 = map {my ($key, @pfvalues) = /([\w\.]+)/g;
$key, [@pfvalues];} <$TOXIN_IN>;
print Dumper \%activ_2;
print Dumper \%antiox_2;
print Dumper \%toxin_2;
# this part here:
# /.{6})\s+.+=([^\s]+)/
# just completely lost me, sorry about that
__END__
Prints:
dumping the active hash...
$VAR1 = {
'Q96355' => 'PF01486',
'Q96356' => 'PF01486',
'Q6GZX4' => 'PF04947',
'Q39371' => 'PF01486'
};
dumping the anitox hash...
$VAR1 = {
'E7QVU5' => 'PF10417',
'D1JAS4' => 'PF10417',
'0EYG3' => 'PF10262'
};
dumping the toxin hash...
$VAR1 = {
'E2E4E4' => 'PF00918',
'A2PU44' => 'PF01375',
'C7T183' => 'PF02950',
'C7T1P5' => 'PF02950'
};
###########
Another Possible Data Structure...
$VAR1 = {
'Q96355' => [
'PF01486.12',
'PF00319.13'
],
'Q96356' => [
'PF01486.12',
'PF00319.13'
],
'Q6GZX4' => [
'PF04947.9'
],
'Q39371' => [
'PF01486.12',
'PF00319.13'
]
};
$VAR1 = {
'E7QVU5' => [
'PF10417.4',
'PF00578.16'
],
'D1JAS4' => [
'PF10417.4',
'PF00578.16'
],
'0EYG3' => [
'PF10262.4'
]
};
$VAR1 = {
'E2E4E4' => [
'PF00918.12'
],
'A2PU44' => [
'PF01375.12'
],
'C7T183' => [
'PF02950.12'
],
'C7T1P5' => [
'PF02950.12'
]
};