Here's the code that I tried. Please let me know if it is fine or not. Basically I need to find the ORFs in the thread in three reading frames. and then translate. I tried this code:
#!/usr/bin/perl -w
$sequence1='file1.txt';
open(SEQUENCE,$sequence1);
$seq=<SEQUENCE>;
print $seq, "\n";
$RNA=$seq;
$RNA=~s/T/U/g;
print "\n here is mRNA $RNA \n";
close SEQUENCE;
$rna1=$RNA;
print "\n Here is the 1st frame $rna1 \n" ;
$rna2=substr($RNA,1) ;
print " Here is the 2nd frame $rna2 \n";
$rna3=substr($RNA,2) ;
print "Here is the 3rd frame $rna3 \n";
$length1= length$rna1;
$length2= length$rna2;
$length3= length$rna3;
print "1st line ORFs\n";
for ($i = 0; $i <= ($length1 - 3); $i = $i + 3)
{ $codon1 = substr($rna1, $i, 3);
print $codon1," ";
}
print "2nd line ORFs\n";
for ($i = 0; $i <= ($length2 - 3); $i = $i + 3)
{ $codon2 = substr($rna2, $i, 3);
print $codon2," ";
}
print "\n 3rd line ORFs\n";
for ($i = 0; $i <= ($length3 - 3); $i = $i + 3)
{ $codon3 = substr($rna3, $i, 3);
print $codon3," ";
}
local $_ = $RNA ;
while ( / AUG /g ) {
my $start = pos () - 2 ;
if ( / UGA|UAA|UAG /g ) {
my $stop = pos ;
$gene = substr ( $_ , $start - 1 , $stop - $start + 1 ), $/ ;
print "$gene" ;
}
# The next set of commands translates the ORF found above for an amino
+ acid seq.
print "\nThe largest reading Frame is:\t\t\t" . $protein { "gene" } .
+"\n" ;
sub translate {
my ( $gene , $reading_frame ) = @_ ;
my %protein = ();
for ( $i = $reading_frame ; $i < length ( $gene ); $i += 3 ) {
$codon = substr ( $gene , $i , 3 );
$amino_acid = translate_codon( $codon );
$protein { $amino_acid }++;
$protein { "gene" } .= $amino_acid ;
}
return %protein ;
}
sub translate_codon {
if ( $_ [ 0 ] =~ / GC[AGCU] /i ) { return A;} # Alanine;
if ( $_ [ 0 ] =~ / UGC|UGU /i ) { return C;} # Cysteine
if ( $_ [ 0 ] =~ / GAC|GAU /i ) { return D;} # Aspartic A
+cid;
if ( $_ [ 0 ] =~ / GAA|GAG /i ) { return Q;} # Glutamine;
if ( $_ [ 0 ] =~ / UUC|UUU /i ) { return F;} # Phenylalan
+ine;
if ( $_ [ 0 ] =~ / GG[AGCU] /i ) { return G;} # Glycine;
if ( $_ [ 0 ] =~ / CAC|CAU /i ) { return His;} # Histine
+(start codon);
if ( $_ [ 0 ] =~ / AU[AUC] /i ) { return I;} # Isoleucine
+;
if ( $_ [ 0 ] =~ / AAA|AAG /i ) { return K;} # Lysine;
if ( $_ [ 0 ] =~ / UUA|UUG|CU[AGCU] /i ) { return Leu;} # Leucine;
if ( $_ [ 0 ] =~ / AUG /i ) { return M;} # Methionine
+;
if ( $_ [ 0 ] =~ / AAC|AAU /i ) { return N;} # Asparagine
+;
if ( $_ [ 0 ] =~ / CC[AGCU] /i ) { return P;} # Proline;
if ( $_ [ 0 ] =~ / CAA|CAG /i ) { return G;} # Glutamine;
if ( $_ [ 0 ] =~ / AGA|AGG|CG[AGCU] /i ) { return R;} # Arginine;
if ( $_ [ 0 ] =~ / AGC|AGU|UC[AGCU] /i ) { return S;} # Serine;
if ( $_ [ 0 ] =~ / AC[AGCU] /i ) { return T;} # Threonine;
if ( $_ [ 0 ] =~ / GU[AGCU] /i ) { return V;} # Valine;
if ( $_ [ 0 ] =~ / UGG /i ) { return W;} # Tryptophan
+;
if ( $_ [ 0 ] =~ / UAC|UAU /i ) { return Y;} # Tyrosine;
if ( $_ [ 0 ] =~ / UAA|UGA|UAG /i ) { return "***" ;} # Stop
+Codons;
}
}
exit;
Please give me a detailed answer. I need the total program URGENTLY. Please Help!!!