#!/usr/bin/perl use strict; use warnings; use Bio::Seq; use Bio::SeqIO; my $uniqueFile = $ARGV[0]; my $goodProteinsFile = $ARGV[1]; ## imports a bunch of gene IDs open (FILE, $uniqueFile); my @data_in = ; close FILE; ## create lookup hash; key = ID, val = sequence my %goodProteins_hash; my $in = Bio::SeqIO->new(-file=>$goodProteinsFile, -format=>'Fasta'); while (my $seq = $in -> next_seq() ) { my $id = $seq -> display_id(); my $seq_string = $seq -> seq(); $goodProteins_hash{$id} = $seq_string; } my $file_out = "strainSpecific_seqData.protein.fasta"; ## iterate thru @data_in; if $id eq $_ then get at the value in ## %goodProteins hash and store it in %strSpec... takes a while! my %strSpec; foreach (@data_in) { chomp ($_); while (my ($id, $seq) = each %goodProteins_hash) { if ($_ =~ /($id)$/) { $strSpec_protein_hash{$id} = $seq; } } } open (OUT, ">strainSpecific_seqData.protein.fasta"); while (my ($k, $v) = each %strSpec) { ## print to file print OUT "\>$k\n$v\n"; } close OUT; print "- Finished\n";