#!/usr/bin/perl use strict; use warnings; use Test::More 'no_plan'; use Benchmark qw/:all :hireswallclock/; my $str='aabcdabcabcecdecd'; sub blazar { my ($str, $min_len, $min_rep)=@_; my $l=length($str); my %count; for my $off (0..$l-1) { for my $len ($min_len .. $l-$off) { my $s = substr $str, $off, $len; $count{ $s } ||= ()= $str =~ /$s/g; } $count{$_} < $min_rep and delete $count{$_} for keys %count; } \%count; } sub kramba { my ($str, $min_len, $min_rep)=@_; my %count; no warnings 'recursion'; local *count = sub { my( $string) = @_; my $length = length( $string ); if ($length < $min_len) { for (keys %count) { delete $count{$_} if $count{$_} < $min_rep; } return \%count; } for my $l ($min_len..$length) { my $s = substr( $string, 0, $l ); $count{ $s } += 1; } count( substr( $string, 1 ) ); }; count($str); \%count; } sub ikegami { my ($str, $min_len, $min_rep)=@_; local our %counts; use re 'eval'; $str =~ / (.{$min_len,}) # or (.+) (?(?{ !$counts{$1} })(?=.*\1)) (?{ ++$counts{$1} }) (?!) /x; for (keys %counts) { delete $counts{$_} if $counts{$_} < $min_rep; } \%counts; } sub lodin { my ($str, $min_len, $min_rep)=@_; local our %count; use re 'eval'; $str =~ / (.{$min_len,}) (?(?{ $count{$1} }) (?!) ) (?> .*? \1 ){@{[ $min_rep - 2 ]}} .* \1 (?{ ($count{$1} ||= $min_rep-1)++ }) (?!) /x; \%count; } { my %cache; sub _reference { $cache{"@_"} ||= blazar @_ } } for my $s ( map {$str x $_} 1,3,42) { for my $len (1..2) { for my $rep (2..3) { my $strlen=length $s; print "\nstring length=$strlen, len=$len, rep=$rep\n\n"; cmpthese +($strlen < 100 ? 10_000 : -60) => { kramba => sub { kramba($s,$len,$rep) }, ikegami => sub { ikegami($s,$len,$rep) }, lodin => sub { lodin($s,$len,$rep) }, }; } } } print "\n"; for my $len (1..3) { for my $rep (2..3) { my $tag="len=$len, rep=$rep"; is_deeply kramba($str,$len,$rep), _reference($str,$len,$rep), "kramba $tag"; is_deeply ikegami($str,$len,$rep), _reference($str,$len,$rep), "ikegami $tag"; is_deeply lodin($str,$len,$rep), _reference($str,$len,$rep), "lodin $tag"; } } __END__