#!/usr/bin/perl use strict; use warnings; my $DNA1 = "GGCT CTGCGCGGNN"; # Total base count my $ total1=12; # Remove N from sequence $DNA1 =~ s/N//ig; # Remove whitespace Line 5 $DNA1 =~ s/\s//g; # In a loop, find every 4-base substring & then find its # GC%, GC-skew & Purine Loading Index (PLI): my $fm = 1.010; # Line 9 do { while ( my $fm = substr( $DNA1, 0, 4 ) ) { my $A = 0; my $T = 0; my $G = 0; my $C = 0; while ( $fm =~ /A/ig ) { $A++ } while ( $fm =~ /T/ig ) { $T++ } while ( $fm =~ /G/ig ) { $G++ } while ( $fm =~ /C/ig ) { $C++ } my $tot1 = $A + $T + $G + $C; my $gc1 = $G - $C; my $gc2 = $G + $C; # Line 16 my $cent = 100; my $gccon2 = $gc2 / $tot1; my $gccon3 = $cent * $gccon2; my $gccon4 = sprintf( "%.2f", $gccon3 ); my $gcskew = $gc1 / $gc2; my $GCSkew = sprintf( "%.4f", $gcskew ); # To find Purine Loading Index (PLI): my $four = 4; my $at1 = $A - $T; my $x1 = ( $gc1 + $at1 ) / $tot1; my $thousand=1000; my $pli = $thousand * $x1; my $PLI = sprintf( "%.0f", $pli ); # No. of sliding Windows: my $numberwin = $total1 / $four; my $NoWindows = sprintf( "%.0f", $numberwin ); print " Purine Loading Index of each 1Kb Window=$PLI bases/4-base.\n"; my $output = "GC-SkewResult .txt"; unless ( open( RESULT, ">my $output" ) ) { print "Cannot open file\"my $output\".\n\n"; exit; } print RESULT"\n RESULTS for substrings:\n GC-Skew values of substrings:\n $GCSkew\n\n Percent GC Content of substrings:\n $gccon4\n\n"; close(RESULT); } } until ( $fm =~ /^\s*$/ ); exit; #### RESULTS for substrings: GC-Skew values of substrings: 0.3333 Percent GC Content of substrings: 75.00 #### RESULTS for substrings: GC-Skew values of substrings: 0.3333 1.0000 0.5000 Percent GC Content of substrings: 75.00 50.00 100.00