in reply to
Need help with code
You should use the 'g' regex modifier imho (see perlretut).
#!/usr/bin/perl -w
use strict;
use warnings;
use feature qw(say);
use constant GENETIC_CODE => {
TCA => 'S', # Serine
TCC => 'S', # Serine
TCG => 'S', # Serine
TCT => 'S', # Serine
TTC => 'F', # Phenylalanine
TTT => 'F', # Phenylalanine
TTA => 'L', # Leucine
TTG => 'L', # Leucine
TAC => 'Y', # Tyrosine
TAT => 'Y', # Tyrosine
TAA => '_', # exit
TAG => '_', # exit
TGC => 'C', # Cysteine
TGT => 'C', # Cysteine
TGA => '_', # exit
TGG => 'W', # Tryptophan
CTA => 'L', # Leucine
CTC => 'L', # Leucine
CTG => 'L', # Leucine
CTT => 'L', # Leucine
GTT => 'V', # Valine
GTC => 'V', # Valine
GTA => 'V', # Valine
GTG => 'V', # Valine
GCT => 'A', # Alanine
GCC => 'A', # Alanine
GCA => 'A', # Alanine
GCG => 'A', # Alanine
GAT => 'D', # Aspartic Acid
GAC => 'D', # Aspartic Acid
GAA => 'E', # Glutamate
GAG => 'E', # Glutamate
GGT => 'G', # Glycine
GGC => 'G', # Glycine
GGA => 'G', # Glycine
GGG => 'G', # Glycine
CCA => 'P', # Phenylalanine
CCC => 'P', # Phenylalanine
CCG => 'P', # Phenylalanine
CCT => 'P', # Phenylalanine
CAC => 'H', # Histidine
CAT => 'H', # Histidine
CAA => 'Q', # Glutamine
CAG => 'Q', # Glutamine
CGA => 'R', # Arginine
CGC => 'R', # Arginine
CGG => 'R', # Arginine
CGT => 'R', # Arginine
ATA => 'I', # Isoleucine
ATC => 'I', # Isoleucine
ATT => 'I', # Isoleucine
ATG => 'M', # Methionine
ACA => 'T', # Threonine
ACC => 'T', # Threonine
ACG => 'T', # Threonine
ACT => 'T', # Threonine
AAC => 'N', # Asparagine
AAT => 'N', # Asparagine
AAA => 'K', # Lysine
AAG => 'K', # Lysine
AGC => 'S', # Serine
AGT => 'S', # Serine
AGA => 'R', # Arginine
AGG => 'R', # Arginine
};
my $s = 'AGCCATGTAGCTAACTCAGGTTACATGGGGATGACCCCGCGACTTGGATTAGAGTCTCTTT
+TGGAATAAGCCTGAATGATCCGAGTAGCATCTCAG';
for ($s =~ /ATG (?:[ACGT]{3})*? (?: TAA | TAG | TGA )/gx) {
my @codons = /[ACGT]{3}/g;
say join '', map GENETIC_CODE->{$_}, @codons;
}
This gives me the following output:
M_
MGMTPRLGLESLLE_
PS. Thanks anyway for telling us about this site, it's pretty cool. It's kind like a game, except that you're doing some actual scientific work.
I solved the problem with some perl6 code:
use v6;
constant DNA-codon = Hash.new: <
TTT F CTT L ATT I GTT V
TTC F CTC L ATC I GTC V
TTA L CTA L ATA I GTA V
TTG L CTG L ATG M GTG V
TCT S CCT P ACT T GCT A
TCC S CCC P ACC T GCC A
TCA S CCA P ACA T GCA A
TCG S CCG P ACG T GCG A
TAT Y CAT H AAT N GAT D
TAC Y CAC H AAC N GAC D
TAA Stop CAA Q AAA K GAA E
TAG Stop CAG Q AAG K GAG E
TGT C CGT R AGT S GGT G
TGC C CGC R AGC S GGC G
TGA Stop CGA R AGA R GGA G
TGG W CGG R AGG R GGG G
>;
sub revc($dna) {
$dna.comb.reverse.join.trans:
[<A C T G>] => [<T G A C>]
}
sub orf($dna) {
my %match;
my @match = gather for $dna, revc $dna {
take .match: rx/ ATG [ <[ACGT]>**3 ]*? <before TAA|TAG|TGA> /, :ov
+erlap;
};
%match{
[~] map { DNA-codon{$_} }, .match: rx/ <[ACGT]>**3 /, :g
}++ for @match;
return %match.keys;
}
.say for orf open('rosalind_orf.txt').get;
The "overlap" option was much useful.