hey i have a code (given below) which counts A,T,G,C and GC,and GC percentage in between 0-200, 1-201, 2-202 regions. But i have to count a lenghty sequence upto 10,000 basepairs, so this would be very lenghty and less efficient. What modification can i do in this code so that it can automateically count upto 10,000 bases.
#!/usr/bin/perl
use warnings;my$seq;my$file;my$line;my$len;my@seq;
print "Select the region where you want to find CpG Island\n";
print "1. Region 1-200\n";
print "2. Region 2-202\n";
print "3. Region 3-203\n";
$choice =<STDIN>;
chomp($choice);
if($choice == 1){
®ion1;
}
if($choice == 2){
®ion2;
}
if($choice == 3){
®ion3;
}
sub region1 {
$file = 'd:\perl\1.txt';
open(INPUT, $file) or die ("File not found");
foreach($line = <INPUT>){
$a = substr($line,0,200) =~tr/A//;
$t = substr($line,0,200) =~tr/T//;
$g = substr($line,0,200) =~tr/G//;
$c = substr($line,0,200) =~tr/C//;
$cg = substr($line,0,200) =~tr/ CG //;
$total = substr($line,0,200) =~tr/ATGC//;
}
$CpG = ($cg)/$total*100;
print"Count of A = $a\n";
print"Count of T = $t\n";
print"Count of G = $g\n";
print"Count of C = $c\n";
print"CG = $cg\n";
print"total count of ATGC = $total\n";
print"CpG percentage = $CpG\n";
if ($CpG>=60){
print"CpG present";}
else{ print"CpG absent";}
}
sub region2 {
$file = 'd:\perl\1.txt';
open(INPUT, $file) or die ("File not found");
foreach($line = <INPUT>){
$a = substr($line,1,201) =~tr/A//;
$t = substr($line,1,201) =~tr/T//;
$g = substr($line,1,201) =~tr/G//;
$c = substr($line,1,201) =~tr/C//;
$total = substr($line,1,201) =~tr/ATGC//;
}
$CpG = ($g+$c)/$total*100;
regarding perl,So i need your help for the same.
print"Count of A = $a\n";
print"Count of T = $t\n";
print"Count of G = $g\n";
print"Count of C = $c\n";
print"total count of ATGC = $total\n";
print"CpG percentage = $CpG\n";
if ($CpG>=60){
print"CpG present";}
else{ print"CpG absent";}
}
sub region3 {
$file = 'd:\perl\1.txt';
open(INPUT, $file) or die ("File not found");
foreach($line = <INPUT>){
$a = substr($line,2,203) =~tr/A//;
$t = substr($line,2,203) =~tr/T//;
$g = substr($line,2,203) =~tr/G//;
$c = substr($line,2,203) =~tr/C//;
$total = substr($line,2,203) =~tr/ATGC//;
}
$CpG = ($g+$c)/$total*100;
print"Count of A = $a\n";
print"Count of T = $t\n";
print"Count of G = $g\n";
print"Count of C = $c\n";
print"total count of ATGC = $total\n";
print"CpG percentage = $CpG\n";
if ($CpG>=60){
print"CpG present";}
else{ print"CpG absent";}
}