#!/usr/bin/perl -w $sequence1='file1.txt'; open(SEQUENCE,$sequence1); $seq=; print $seq, "\n"; $RNA=$seq; $RNA=~s/T/U/g; print "\n here is mRNA $RNA \n"; close SEQUENCE; $rna1=$RNA; print "\n Here is the 1st frame $rna1 \n" ; $rna2=substr($RNA,1) ; print " Here is the 2nd frame $rna2 \n"; $rna3=substr($RNA,2) ; print "Here is the 3rd frame $rna3 \n"; $length1= length$rna1; $length2= length$rna2; $length3= length$rna3; print "1st line ORFs\n"; for ($i = 0; $i <= ($length1 - 3); $i = $i + 3) { $codon1 = substr($rna1, $i, 3); print $codon1," "; } print "2nd line ORFs\n"; for ($i = 0; $i <= ($length2 - 3); $i = $i + 3) { $codon2 = substr($rna2, $i, 3); print $codon2," "; } print "\n 3rd line ORFs\n"; for ($i = 0; $i <= ($length3 - 3); $i = $i + 3) { $codon3 = substr($rna3, $i, 3); print $codon3," "; } local $_ = $RNA ; while ( / AUG /g ) { my $start = pos () - 2 ; if ( / UGA|UAA|UAG /g ) { my $stop = pos ; $gene = substr ( $_ , $start - 1 , $stop - $start + 1 ), $/ ; print "$gene" ; } # The next set of commands translates the ORF found above for an amino acid seq. print "\nThe largest reading Frame is:\t\t\t" . $protein { "gene" } . "\n" ; sub translate { my ( $gene , $reading_frame ) = @_ ; my %protein = (); for ( $i = $reading_frame ; $i < length ( $gene ); $i += 3 ) { $codon = substr ( $gene , $i , 3 ); $amino_acid = translate_codon( $codon ); $protein { $amino_acid }++; $protein { "gene" } .= $amino_acid ; } return %protein ; } sub translate_codon { if ( $_ [ 0 ] =~ / GC[AGCU] /i ) { return A;} # Alanine; if ( $_ [ 0 ] =~ / UGC|UGU /i ) { return C;} # Cysteine if ( $_ [ 0 ] =~ / GAC|GAU /i ) { return D;} # Aspartic Acid; if ( $_ [ 0 ] =~ / GAA|GAG /i ) { return Q;} # Glutamine; if ( $_ [ 0 ] =~ / UUC|UUU /i ) { return F;} # Phenylalanine; if ( $_ [ 0 ] =~ / GG[AGCU] /i ) { return G;} # Glycine; if ( $_ [ 0 ] =~ / CAC|CAU /i ) { return His;} # Histine (start codon); if ( $_ [ 0 ] =~ / AU[AUC] /i ) { return I;} # Isoleucine; if ( $_ [ 0 ] =~ / AAA|AAG /i ) { return K;} # Lysine; if ( $_ [ 0 ] =~ / UUA|UUG|CU[AGCU] /i ) { return Leu;} # Leucine; if ( $_ [ 0 ] =~ / AUG /i ) { return M;} # Methionine; if ( $_ [ 0 ] =~ / AAC|AAU /i ) { return N;} # Asparagine; if ( $_ [ 0 ] =~ / CC[AGCU] /i ) { return P;} # Proline; if ( $_ [ 0 ] =~ / CAA|CAG /i ) { return G;} # Glutamine; if ( $_ [ 0 ] =~ / AGA|AGG|CG[AGCU] /i ) { return R;} # Arginine; if ( $_ [ 0 ] =~ / AGC|AGU|UC[AGCU] /i ) { return S;} # Serine; if ( $_ [ 0 ] =~ / AC[AGCU] /i ) { return T;} # Threonine; if ( $_ [ 0 ] =~ / GU[AGCU] /i ) { return V;} # Valine; if ( $_ [ 0 ] =~ / UGG /i ) { return W;} # Tryptophan; if ( $_ [ 0 ] =~ / UAC|UAU /i ) { return Y;} # Tyrosine; if ( $_ [ 0 ] =~ / UAA|UGA|UAG /i ) { return "***" ;} # Stop Codons; } } exit;