#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use constant MIN_LENGTH => 2;
use constant MIN_REPEATS => 2; # Must have at least this many repeats
use Benchmark qw/:all :hireswallclock/;
my $str='aacdabcabcecdecd aabcdabcabcecdecd aabcdabcabcecdecd aabcdabc
+abcecdecd ';
#my $str = <<EOT;
#Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam no
+nummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat vol
+utpat. Ut wisi enim ad minim veniam, quis nostrud exerci tation ullam
+corper suscipit lobortis nisl ut aliquip ex ea commodo consequat. Dui
+s autem vel eum iriure dolor in hendrerit in vulputate velit esse mol
+estie consequat, vel illum dolore eu feugiat nulla facilisis at vero
+eros et accumsan et iusto odio dignissim qui blandit praesent luptatu
+m zzril delenit augue duis dolore te feugait nulla facilisi.
#EOT
sub blazar {
local $_=shift;
my $l=length;
my %count;
for my $off (0..$l-1) {
for my $len (MIN_LENGTH .. $l-$off) {
my $s = substr $_, $off, $len;
$count{ $s } ||= ()= /$s/g;
}
$count{$_} < MIN_REPEATS and
delete $count{$_} for keys %count;
}
\%count;
}
sub oha {
my $s=shift;
my %saw;
while($s =~ /(..+)(?=.*?\1)/g) {
for my $x (0..length $1) {
@saw{ map {substr $1, $x, $_} $x+2..length $1 } = ();
}
}
$saw{$_} =()= $s =~ /\Q$_/g for keys %saw;
\%saw;
}
sub ikegami {
my ($str) = @_;
local our %counts;
$str =~ /(.{2,})(?{ ++$counts{$1} })(?!)/;
delete @counts{ grep $counts{$_}<MIN_REPEATS, keys %counts };
return \%counts;
}
sub lodin {
my ($str) = @_;
#my $min_len = 2; # Substring is at least two chars long.
#my $min_count = 3; # Substring occures at least three times.
my $min_len = MIN_LENGTH;
my $min_count = MIN_REPEATS;
local our %count;
use re 'eval';
$str =~ / (.{$min_len,})
(?(?{ $count{$1} })
(?!)
)
(?>
.*?
\1
){@{[ $min_count - 2 ]}}
.*
\1
(?{ ($count{$1} ||= $min_count-1)++ })
(?!)
/x;
return \%count;
}
{
my %count;
sub kramba {
my( $string) = @_;
my $length = length( $string );
if ($length < MIN_LENGTH) {
for (keys %count) {
delete $count{$_}
if $count{$_} < MIN_REPEATS;
}
return \%count;
}
for my $l (MIN_LENGTH..$length) {
my $s = substr( $string, 0, $l );
$count{ $s } += 1;
}
kramba( substr( $string, 1 ) );
};
}
for my $multiplier (1) {
my $work_str = "$str " x $multiplier;
my $x1 = blazar $work_str;
my $x2 = oha $work_str;
my $x3 = kramba $work_str;
my $x4 = ikegami $work_str;
my $x5 = lodin $work_str;
#print Dumper( [$x1, $x2, $x3, $x4, $x5] );
print "Number of substrings found: \n";
printf( "%9s ", $_ ) for ('blazar', 'oha', 'kramba', 'ikegami', 'lodin
+' );
print "\n";
printf( "%9s ", $_ ) for map { scalar keys %$_ } ($x1, $x2, $x3, $x4,
+$x5);
print "\n";
#exit;
for my $key (keys %$x3) {
if (not exists $x4->{$key}) {
print ("\"$key\": ", $x3->{$key}, "\n");
my $index = index( $work_str, $key );
if ($index < 0) {
print "\tNot found...!!\n";
}
else {
print "\t$work_str\n",
"\t" . '.' x ($index) . $key . "\n";
}
}
}
print "Results for string:\n\n\"$work_str\"\n\n";
cmpthese 500/$multiplier => {
blazar => sub { blazar $work_str },
oha => sub { oha $work_str },
kramba => sub { kramba $work_str },
ikegami => sub { ikegami $work_str },
lodin => sub { lodin $work_str },
}
}
Thank you!