toolic expertly provided the solution to your script's concatenation issue. However, consider using Bio::SeqIO for your Fasta-parsing needs:
use strict;
use warnings;
use Bio::SeqIO;
my $in = Bio::SeqIO->new( -fh => \*ARGV, -format => 'Fasta' );
while ( my $seq = $in->next_seq() ) {
my $protein;
$protein .= codon2aa($_) for $seq->seq =~ /.../g;
print '>' . $seq->id . "\n$protein\n";
}
#' codon2aa
#'
#' A subroutine to translate a DNA 3-character codon to an amino acid
sub codon2aa {
my ($codon) = @_;
if ( $codon =~ /GC./i ) { return 'A' } # Alanine
elsif ( $codon =~ /TG[TC]/i ) { return 'C' } # Cysteine
elsif ( $codon =~ /GA[TC]/i ) { return 'D' } # Aspartic Aci
+d
elsif ( $codon =~ /GA[AG]/i ) { return 'E' } # Glutamic Aci
+d
elsif ( $codon =~ /TT[TC]/i ) { return 'F' } # Phenylalanin
+e
elsif ( $codon =~ /GG./i ) { return 'G' } # Glycine
elsif ( $codon =~ /CA[TC]/i ) { return 'H' } # Histidine
elsif ( $codon =~ /AT[TCA]/i ) { return 'I' } # Isoleucine
elsif ( $codon =~ /AA[AG]/i ) { return 'K' } # Lysine
elsif ( $codon =~ /TT[AG]|CT./i ) { return 'L' } # Leucine
elsif ( $codon =~ /ATG/i ) { return 'M' } # Methionine
elsif ( $codon =~ /AA[TC]/i ) { return 'N' } # Asparagine
elsif ( $codon =~ /CC./i ) { return 'P' } # Proline
elsif ( $codon =~ /CA[AG]/i ) { return 'Q' } # Glutamine
elsif ( $codon =~ /CG.|AG[AG]/i ) { return 'R' } # Arginine
elsif ( $codon =~ /TC.|AG[TC]/i ) { return 'S' } # Serine
elsif ( $codon =~ /AC./i ) { return 'T' } # Threonine
elsif ( $codon =~ /GT./i ) { return 'V' } # Valine
elsif ( $codon =~ /TGG/i ) { return 'W' } # Tryptophan
elsif ( $codon =~ /TA[TC]/i ) { return 'Y' } # Tyrosine
elsif ( $codon =~ /TA[AG]|TGA/i ) { return '_' } # Stop
else {
return '*';
}
}
Output on your data set:
>M01096:4:000000000-A23M1:1:1101:15974:1529
VTECGWQTAGCRMY
>M01096:4:000000000-A23M1:1:1101:16525:1548
LSHCDVKDWMCWLL
>M01096:4:000000000-A23M1:1:1101:13838:1554
AWTCVEIDGHFSMN