use warnings; use strict; use feature qw(state); use Test::More # tests => ?? + 1 # Test::NoWarnings add 1 test 'no_plan' ; use Test::NoWarnings; use Data::Dump; my @codons = qw(/GC./ /TG[TC]/ /GA[TC]/ /GA[AG]/); my @aminos = qw( A C D E); push @codons, qw(/TT[TC]/ /GG./ /CA[TC]/ /AT[TCA]/); push @aminos, qw( F G H I); push @codons, qw(/AA[AG]/ /TT[AG]|CT./ /ATG/ /AA[TC]/); push @aminos, qw( K L M N); push @codons, qw(/CC./ /CA[AG]/ /CG.|AG[AG]/ /TC.|AG[TC]/); push @aminos, qw( P Q R S); push @codons, qw(/AC./ /GT./ /TGG/ /TA[TC]/ /TA[AG]|TGA/); push @aminos, qw( T V W Y _); die qq{lists of codons and aminos not the same length} if @codons != @aminos; my %xlate; # translate base triplets to amino for (0 .. $#codons) { add_to_table(\%xlate, codons_to_amino($codons[$_], $aminos[$_])), } # dd \%xlate; # FOR DEBUG - see what we got for my $ar_vector ( # base sequence expected protein [ qw(GTTACTGAGTGTGGTTGGCAGACTGCTGGTTGCCGTATGTAT VTECGWQTAGCRMY) ], [ qw(CTTTCTCATTGTGATGTTAAGGATTGGATGTGCTGGCTTCTG LSHCDVKDWMCWLL) ], [ qw(GCTTGGACTTGTGTTGAGATTGATGGTCATTTCTCTATGAAT AWTCVEIDGHFSMN) ], ) { my ($bases, $protein) = @$ar_vector; is encode(\%xlate, $bases), $protein, qq{'$bases' -> '$protein'}; } # subroutines ###################################################### sub encode { my ($hr_xlate, $bases, ) = @_; $bases = canonicalize($bases); die qq{number of bases not multiple of 3} if length($bases) % 3; # translate NON-overlapping base triplets to aminos. $bases =~ s{ (...) }{ $hr_xlate->{$1} // '*' }xmsge; return $bases; # bases now translated to protein sequence } # subroutines for building translation table. sub canonicalize { return uc $_[0]; } sub codons_to_amino { my ($codon_pat, # pattern of codon(s) $amino, # amino codon(s) map to ) = @_; $codon_pat = canonicalize($codon_pat); $amino = canonicalize($amino); # translate regex to glob form. state $bases = canonicalize('ATCG'); state $base = qr{ [$bases] }xms; state $any = qr{ \. }xms; state $some = qr{ \[ $base{2,3} \] }xms; state $codon = qr{ $base $base (?: $base | $any | $some) }xms; state $triplet = qr{ \A $base{3} \z }xms; state $any_glob = '{' . join(q{,}, split '', $bases) . '}'; my @codon_groups = $codon_pat =~ m{ $codon }xmsg; for (@codon_groups) { s{ $any }{$any_glob}xmsg; s{ ($some) }{ some_glob($1) }xmsge; } my @triplets = glob(qq{@codon_groups}); for (@triplets) { die qq{'$_' not a triplet} if $_ !~ $triplet; } return $amino, @triplets; } sub some_glob { my ($some_pat, # 2-3 bases in regex class: [ATC] [AT] ) = @_; my $bases = join q{,}, $some_pat =~ m{ \w }xmsg; return qq{{$bases}}; } sub add_to_table { my ($hr_table, $amino, @triplets, ) = @_; for my $triplet (@triplets) { die qq{'$triplet' already in table} if exists $hr_table->{$triplet}; $hr_table->{$triplet} = $amino; } }