#!/usr/bin/perl use strict; use warnings; my %windowSeqScore = (); my $input_file = '/scratch/Drosophila/dmel-all-chromosome-r6.02.fasta'; my $sequenceRef = loadSequence($input_file); my $output_file = 'unique12KmersEndingGG.fasta'; open (KMERS,">", $output_file) or die $!; my $windowSize = 21; my $stepSize = 1; for ( my $windowStart = 0 ; $windowStart <= ( length ( $$sequenceRef ) - $windowSize ); $windowStart += $stepSize ) { my $windowSeq = substr ( $$sequenceRef, $windowStart, $windowSize); if ($windowSeq =~ /([ATCG]{10}GG$)/) { $windowSeqScore{$windowSeq}++; } } my $count = 0; for (keys %windowSeqScore){ $count ++; if ($windowSeqScore{$_} == 1 ) { print KMERS ">crispr_$count", "\n", $_, "\n"; } } sub loadSequence { my ($sequenceFile) = @_; my $sequence = ""; unless ( open( FASTA, "<", $sequenceFile ) ) { die $!; } while (){ my $line = $_; chomp ($line); if ($line !~ /^>/ ) { $sequence .= $line; } } return \$sequence; }