#!/usr/bin/perl use strict; use warnings; $DNA1 = "GGCT CTGCGCGGNN"; # Remove N from sequence $DNA1 =~ s/N//ig; # Remove whitespace Line 5 $DNA1 =~ s/\s//g; # In a loop, find every 4-base substring & then find its # GC%, GC-skew & Purine Loading Index (PLI): $fm = 1.010; # Line 9 do { while ( my $fm = substr( $DNA1, 0, 4 ) ) { $A = 0; $T = 0; $G = 0; $C = 0; while ( $fm =~ /A/ig ) { $A++ } while ( $fm =~ /T/ig ) { $T++ } while ( $fm =~ /G/ig ) { $G++ } while ( $fm =~ /C/ig ) { $C++ } $tot1 = $A + $T + $G + $C; $gc1 = $G - $C; $gc2 = $G + $C; # Line 16 $cent = 100; $gccon2 = $gc2 / $tot1; $gccon3 = $cent * $gccon2; $gccon4 = sprintf( "%.2f", $gccon3 ); $gcskew = $gc1 / $gc2; $GCSkew = sprintf( "%.4f", $gcskew ); # To find Purine Loading Index (PLI): $four = 4; $at1 = $A - $T; $x1 = ( $gc1 + $at1 ) / $tot1; $pli = $thousand * $x1; $PLI = sprintf( "%.0f", $pli ); # No. of sliding Windows: $numberwin = $total1 / $four; $NoWindows = sprintf( "%.0f", $numberwin ); print " Purine Loading Index of each 1Kb Window=$PLI bases/4-base.\n"; $output = "GC-SkewResult .txt"; unless ( open( RESULT, ">$output" ) ) { print "Cannot open file\"$output\".\n\n"; exit; } print RESULT"\n RESULTS for substrings:\n GC-Skew values of substrings:\n $GCSkew\n\n Percent GC Content of substrings:\n $gccon4\n\n"; close(RESULT); } } until ( $fm =~ /^\s*$/ ); exit;