#!/usr/bin/perl -w use strict; use Data::Dumper; # this is a way to specify short files within a Perl # program. my $uniprot = "Q6GZX4 ORFNames=FV3-001R ;PF04947 Q6GZX3 ORFNames=FV3-002L ;PF03003 Q197F8 ORFNames=IIV3-002R Q197F7 ORFNames=IIV3-003L Q6GZX2 ORFNames=FV3-003R Q6GZX1 ORFNames=FV3-004R Q197F5 ORFNames=IIV3-005L "; my $Activator = "Q6GZX4 | PF04947.9 Q96355 | PF01486.12 PF00319.13 Q96356 | PF01486.12 PF00319.13 Q39371 | PF01486.12 PF00319.13 "; my $anti_oxidant = "0EYG3 | PF10262.4 E7QVU5 | PF10417.4 PF00578.16 D1JAS4 | PF10417.4 PF00578.16 "; my $toxin = "C7T183 | PF02950.12 C7T1P5 | PF02950.12 E2E4E4 | PF00918.12 A2PU44 | PF01375.12 "; # Open the input "files" # A Perl scalar can be opened for reading # and this how to do it... open (my $UNIPROT_IN, '<', \$uniprot) or die ("cannot open uniprot for reading ", "$!\n"); open (my $ACTIVATOR_IN, '<', \$Activator) or die ("cannot open activator for reading ", "$!\n"); open (my $ANTIOX_IN, '<', \$anti_oxidant) or die ("cannot open anti_oxidant for reading ", "$!\n"); open (my $TOXIN_IN, '<', \$toxin) or die ("cannot open toxin for reading ", "$!\n"); # some local output "files".. # use "real files" in the actual application my $activator_out; my $antiox_out; my $toxin_out; # Open the output "files" # # A Perl scalar can be opened for file system write # and this is how to do that... open (my $ACTIVATOR_OUT, '>', \$activator_out) or die ("cannot open activator_out for write! ", "$!\n"); open (my $ANTIOX_OUT, '>', \$antiox_out) or die ("cannot open antiox_out for write! ", "$!\n"); open (my $TOXIN_OUT, '>', \$toxin_out) or die ("cannot open toxin_out for write! ", "$!\n"); # I personally prefer all caps for file handles # If you don't, then I'm fine with that. # This part seems odd, because you only get the first # PF value...I just guessing that these other PF values do # mean something? my %activ = map { /(.+)\s+\|\s+(PF.{5})/; $1 => $2 } <$ACTIVATOR_IN>; my %antiox = map { /(.+)\s+\|\s+(PF.{5})/; $1 => $2 } <$ANTIOX_IN>; my %toxin = map { /(.+)\s+\|\s+(PF.{5})/; $1 => $2 } <$TOXIN_IN>; # # This a way to print the contents of a Perl data stucture.. # Data::Dumper is a standard part of Perl. # print "dumping the active hash...\n"; print Dumper \%activ; print "dumping the anitox hash...\n"; print Dumper \%antiox; print "dumping the toxin hash...\n"; print Dumper \%toxin; print "###########\n"; #perhaps you meant something like this???: print "\nAnother Possible Data Structure...\n"; seek ($ACTIVATOR_IN, 0, 0); # needed to "rewind the file" seek ($ANTIOX_IN, 0,0); # needed to "rewind the file" seek ($TOXIN_IN,0, 0); # needed to "rewind the file" # There are many ways to do the regex or use split... # but it seems to me that somehow you need all of the # PF values, not just the first one??? my %activ_2 = map {my ($key, @pfvalues) = /([\w\.]+)/g; $key, [@pfvalues];} <$ACTIVATOR_IN>; my %antiox_2 = map {my ($key, @pfvalues) = /([\w\.]+)/g; $key, [@pfvalues];} <$ANTIOX_IN>; my %toxin_2 = map {my ($key, @pfvalues) = /([\w\.]+)/g; $key, [@pfvalues];} <$TOXIN_IN>; print Dumper \%activ_2; print Dumper \%antiox_2; print Dumper \%toxin_2; # this part here: # /.{6})\s+.+=([^\s]+)/ # just completely lost me, sorry about that __END__ Prints: dumping the active hash... $VAR1 = { 'Q96355' => 'PF01486', 'Q96356' => 'PF01486', 'Q6GZX4' => 'PF04947', 'Q39371' => 'PF01486' }; dumping the anitox hash... $VAR1 = { 'E7QVU5' => 'PF10417', 'D1JAS4' => 'PF10417', '0EYG3' => 'PF10262' }; dumping the toxin hash... $VAR1 = { 'E2E4E4' => 'PF00918', 'A2PU44' => 'PF01375', 'C7T183' => 'PF02950', 'C7T1P5' => 'PF02950' }; ########### Another Possible Data Structure... $VAR1 = { 'Q96355' => [ 'PF01486.12', 'PF00319.13' ], 'Q96356' => [ 'PF01486.12', 'PF00319.13' ], 'Q6GZX4' => [ 'PF04947.9' ], 'Q39371' => [ 'PF01486.12', 'PF00319.13' ] }; $VAR1 = { 'E7QVU5' => [ 'PF10417.4', 'PF00578.16' ], 'D1JAS4' => [ 'PF10417.4', 'PF00578.16' ], '0EYG3' => [ 'PF10262.4' ] }; $VAR1 = { 'E2E4E4' => [ 'PF00918.12' ], 'A2PU44' => [ 'PF01375.12' ], 'C7T183' => [ 'PF02950.12' ], 'C7T1P5' => [ 'PF02950.12' ] };