#! perl -slw use strict; use bytes; our $FUZZY ||= 2; open KEYS, '<', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!"; my @keys = ; close KEYS; chomp @keys; warn "Loaded ${ \scalar @keys } keys"; open SEQ, '<', $ARGV[ 1 ] or die "$ARGV[ 1 ] : $!"; my( $masked, $pos ); my $totalLen = 0; my $count = 0; while( my $seq = ) { chomp $seq; my $seqLen = length $seq; $totalLen += $seqLen; for my $key ( @keys ) { my $keyLen = length $key; my $mask = $key x ( int( $seqLen / $keyLen ) + 1 ); my $maskLen = length $mask; my $minZeros = chr( 0 ) x int( $keyLen / ( $FUZZY + 1 ) ); my $minZlen = length $minZeros; for my $offset1 ( 0 .. $keyLen-1 ) { $masked = $mask ^ substr( $seq, $offset1, $maskLen ); $pos = 0; while( $pos = 1+index $masked, $minZeros, $pos ) { $pos--; my $offset2 = $pos - ($pos % $keyLen ); last unless $offset1 + $offset2 + $keyLen <= $seqLen; my $fuz = $keyLen - ( substr( $masked, $offset2, $keyLen ) =~ tr[\0][\0] ); if( $fuz <= $FUZZY ) { printf "\tFuzzy matched key:'$key' -v- '%s' in line:" . "%2d @ %6d (%6d+%6d) with fuzziness: %d\n", substr( $seq, $offset1 + $offset2, $keyLen ), $., $offset1 + $offset2, $offset1, $offset2, $fuz; } $pos = $offset2 + $keyLen; } } } } warn "\n\nProcessed $. sequences"; warn "Average length: ", $totalLen / $.; close SEQ;