#!/usr/bin/perl use strict; use warnings; my (@origin, $y); my $N_Sequences = 100; my @Alphabet = split(//,'ACGT'); my $P_Consensus = 0.85; # This is the probability of dominant letter # ====== Globals ========================== my @Probabilities; # Stores the probability of each character # ====== Program ========================== open (ORIGIN, "< origin8.txt"); # This file holds 200 sequences used for motif template chomp (@origin = ); close ORIGIN; for ($y=0; $y<=$#origin; $y++) { my @Motif = split(//,'$origin[$y]'); # This is a loop to get the motif template from origin8 open (OUT_NORM, ">short_sequences8_[$y].txt") or die "Unable to open file :$!"; for (my $i=0; $i < $N_Sequences; $i++) { for (my $j=0; $j < scalar(@Motif); $j++) { loadConsensusCharacter($Motif[$j]); addNoiseToDistribution(); convertToIntervals(); print OUT_NORM (getRandomCharacter(rand(1.0))); } print OUT_NORM "\n"; make_files(); } } exit(); # ====== Subroutines ======================= # sub loadConsensusCharacter { my ($char) = @_; my $Found = 'FALSE'; for (my $i=0; $i < scalar(@Alphabet); $i++) { if ( $char eq $Alphabet[$i]) { $Probabilities[$i] = 1.0; $Found = 'TRUE'; } else { $Probabilities[$i] = 0.0; } } if ($Found eq 'FALSE') { die("Panic: Motif-Character\"$char\" was not found in Alphabet. Aborting.\n"); } return(); } # ========================================== sub addNoiseToDistribution { my $P_NonConsensus = ( 1.0-$P_Consensus) / (scalar(@Alphabet) - 1); for (my $i=0; $i < scalar(@Probabilities); $i++) { if ( $Probabilities[$i] == 1.0 ) { $Probabilities[$i] = $P_Consensus; } else { $Probabilities[$i] = $P_NonConsensus; } } return(); } # ========================================== sub convertToIntervals { my $Sum = 0; for (my $i=1; $i < scalar(@Probabilities); $i++) { $Probabilities[$i] += $Probabilities[$i-1]; } return(); } # ========================================== sub getRandomCharacter { my ($RandomNumber) = @_; my $i=0; for ($i=0; $i < scalar(@Probabilities); $i++) { if ($Probabilities[$i] > $RandomNumber) { last; } } return($Alphabet[$i]); } # ========================================== sub make_files { my (@short, @long,$x,$r, $output_norm); open (SHORT, "< short_sequences8_[$y].txt"); chomp (@short = ); close SHORT; open (LONG, "< long_sequences.txt"); chomp (@long = ); close LONG; open (OUT_INITIAL, "> output8_[$y]1.txt"); open (OUT_REPLACED, "> output8_[$y]2.txt"); for ($x=0; $x<=$#short; $x++) { $r=2; print OUT_INITIAL ">SeqName$x\n$long[$x]\n"; print OUT_REPLACED "SeqName$x\n" . substr($long[$x], $r, length $short[$x]) . "\n";} close OUT_INITIAL; close OUT_REPLACED; }