Hi
Not as polished as the example above, but outputs the same results
use strict;
use warnings;
my @processed;
while (my $a_line = <DATA>) {
chomp $a_line;
if ($a_line =~ /^>/) {
#this is a header, keep as is
push @processed, $a_line;
}
else {
# This is a dna seq, process
#' Translate each three-base codon into amino acid,
# and append to a protein
my $protein = '';
my $len = length($a_line) -2;
for(my $i=0; $i < $len ; $i += 3)
{
my $codon = substr($a_line, $i,3);
$protein .= codon2aa($codon);
}
push @processed, $protein;
}
}
#now display what we have processed
print $_, "\n" for @processed;
#' codon2aa
#'
#' A subroutine to translate a DNA 3-character codon to an amino acid
sub codon2aa {
my($codon) = @_;
if ( $codon =~ /GC./i) { return 'A' } # Alanine
elsif ( $codon =~ /TG[TC]/i) { return 'C' } # Cysteine
elsif ( $codon =~ /GA[TC]/i) { return 'D' } # Aspartic Acid
elsif ( $codon =~ /GA[AG]/i) { return 'E' } # Glutamic Acid
elsif ( $codon =~ /TT[TC]/i) { return 'F' } # Phenylalanine
elsif ( $codon =~ /GG./i) { return 'G' } # Glycine
elsif ( $codon =~ /CA[TC]/i) { return 'H' } # Histidine
elsif ( $codon =~ /AT[TCA]/i) { return 'I' } # Isoleucine
elsif ( $codon =~ /AA[AG]/i) { return 'K' } # Lysine
elsif ( $codon =~ /TT[AG]|CT./i) { return 'L' } # Leucine
elsif ( $codon =~ /ATG/i) { return 'M' } # Methionine
elsif ( $codon =~ /AA[TC]/i) { return 'N' } # Asparagine
elsif ( $codon =~ /CC./i) { return 'P' } # Proline
elsif ( $codon =~ /CA[AG]/i) { return 'Q' } # Glutamine
elsif ( $codon =~ /CG.|AG[AG]/i) { return 'R' } # Arginine
elsif ( $codon =~ /TC.|AG[TC]/i) { return 'S' } # Serine
elsif ( $codon =~ /AC./i) { return 'T' } # Threonine
elsif ( $codon =~ /GT./i) { return 'V' } # Valine
elsif ( $codon =~ /TGG/i) { return 'W' } # Tryptophan
elsif ( $codon =~ /TA[TC]/i) { return 'Y' } # Tyrosine
elsif ( $codon =~ /TA[AG]|TGA/i) { return '_' } # Stop
else {
return '*';
}
}
__DATA__
>M01096:4:000000000-A23M1:1:1101:15974:1529 1:N:0:16
GTTACTGAGTGTGGTTGGCAGACTGCTGGTTGCCGTATGTAT
>M01096:4:000000000-A23M1:1:1101:16525:1548 1:N:0:16
CTTTCTCATTGTGATGTTAAGGATTGGATGTGCTGGCTTCTG
>M01096:4:000000000-A23M1:1:1101:13838:1554 1:N:0:16
GCTTGGACTTGTGTTGAGATTGATGGTCATTTCTCTATGAAT
Output
>M01096:4:000000000-A23M1:1:1101:15974:1529 1:N:0:16
VTECGWQTAGCRMY
>M01096:4:000000000-A23M1:1:1101:16525:1548 1:N:0:16
LSHCDVKDWMCWLL
>M01096:4:000000000-A23M1:1:1101:13838:1554 1:N:0:16
AWTCVEIDGHFSMN
Arnaud
Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
Read Where should I post X? if you're not absolutely sure you're posting in the right place.
Please read these before you post! —
Posts may use any of the Perl Monks Approved HTML tags:
- a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
Outside of code tags, you may need to use entities for some characters:
| |
For: |
|
Use: |
| & | | & |
| < | | < |
| > | | > |
| [ | | [ |
| ] | | ] |
Link using PerlMonks shortcuts! What shortcuts can I use for linking?
See Writeup Formatting Tips and other pages linked from there for more info.
|
|