#!/usr/bin/perl
use strict;
use warnings;
my $DNA1 = "GGCT CTGCGCGGNN";
# Total base count
my $ total1=12;
# Remove N from sequence
$DNA1 =~ s/N//ig;
# Remove whitespace Line 5
$DNA1 =~ s/\s//g;
# In a loop, find every 4-base substring & then find its
# GC%, GC-skew & Purine Loading Index (PLI):
my $fm = 1.010; # Line 9
do {
while ( my $fm = substr( $DNA1, 0, 4 ) ) {
my $A = 0;
my $T = 0;
my $G = 0;
my $C = 0;
while ( $fm =~ /A/ig ) { $A++ }
while ( $fm =~ /T/ig ) { $T++ }
while ( $fm =~ /G/ig ) { $G++ }
while ( $fm =~ /C/ig ) { $C++ }
my $tot1 = $A + $T + $G + $C;
my $gc1 = $G - $C;
my $gc2 = $G + $C; # Line 16
my $cent = 100;
my $gccon2 = $gc2 / $tot1;
my $gccon3 = $cent * $gccon2;
my $gccon4 = sprintf( "%.2f", $gccon3 );
my $gcskew = $gc1 / $gc2;
my $GCSkew = sprintf( "%.4f", $gcskew );
# To find Purine Loading Index (PLI):
my $four = 4;
my $at1 = $A - $T;
my $x1 = ( $gc1 + $at1 ) / $tot1;
my $thousand=1000;
my $pli = $thousand * $x1;
my $PLI = sprintf( "%.0f", $pli );
# No. of sliding Windows:
my $numberwin = $total1 / $four;
my $NoWindows = sprintf( "%.0f", $numberwin );
print " Purine Loading Index of each 1Kb Window=$PLI bases/4-base.\n";
my $output = "GC-SkewResult .txt";
unless ( open( RESULT, ">my $output" ) ) {
print "Cannot open file\"my $output\".\n\n";
exit;
}
print RESULT"\n RESULTS for substrings:\n
GC-Skew values of substrings:\n
$GCSkew\n\n
Percent GC Content of substrings:\n
$gccon4\n\n";
close(RESULT);
}
} until ( $fm =~ /^\s*$/ );
exit;
####
RESULTS for substrings:
GC-Skew values of substrings:
0.3333
Percent GC Content of substrings:
75.00
##
##
RESULTS for substrings:
GC-Skew values of substrings:
0.3333
1.0000
0.5000
Percent GC Content of substrings:
75.00
50.00
100.00