This is an interesting problem and that's an intruiging algorithm. It would be really nice to see these all implemented in C and compared. I suspect that yours would fair much better.
Which set of data you consider to be more realistic, will determine which algorithm/implementation is better suited to your application I guess. It's not very often that the choice of best algorithm varies so wildly with the input data.
#! perl -slw
use strict;
use Benchmark qw[ cmpthese ];
sub findlcs {
my $substr = $_[0];
my $len = length $_[0];
my $off = 0;
while ($substr) {
my @matches = grep /\Q$substr/, @_;
last if @matches == @_;
$off++;
$len-- and $off=0 if $off+$len > length $_[0];
$substr = substr $_[0], $off, $len;
}
return $substr;
}
sub LCSIndex {
my $substr = $_[0];
my $len = length $_[0];
my $off = 0;
while ($substr) {
my @matches = grep { -1 != index $_, $substr } @_;
last if @matches == @_;
$off++;
$len-- and $off=0 if $off+$len > length $_[0];
$substr = substr $_[0], $off, $len;
}
return $substr;
}
sub find_lcs {
my @index_info;
foreach (@_) {
my $string = $_;
push @index_info, [
\$string,
[ 0..length($string) ],
];
}
my ($length, @pos) = _find_lcs(0, @index_info);
return $length
? substr(${$index_info[0]->[0]}, $pos[0], $length)
: undef;
}
sub _find_lcs {
my ($offset, @index_info) = @_;
my $last_chars;
foreach my $data (@index_info) {
my %chars;
foreach my $pos (@{$data->[1]}) {
my $char = substr(${$data->[0]}, $pos + $offset, 1);
next unless length($char);
next if $last_chars and not exists $last_chars->{$char};
push @{$chars{$char}}, $pos;
}
return 0 unless %chars;
$last_chars = $data->[2] = \%chars;
}
my $best_length = 0;
my @best_pos;
foreach my $char (keys %$last_chars) {
my @index_info_for_char = map {
[ $_->[0], $_->[2]->{$char} ]
} @index_info;
my ($length, @pos) = _find_lcs($offset + 1, @index_info_for_char);
next if $length < $best_length;
$best_length = $length + 1;
if (0 == $length++) {
@best_pos = map {$_->[1]->[0]} @index_info_for_char;
}
else {
@best_pos = @pos;
}
}
return $best_length, @best_pos;
}
sub lcs{
my $strings = join "\0", @_;
study $strings;
my $re = '.*?\0.*?\1' x ( @_ - 1 );
my $lcs;
for ( 1 .. length $strings ) {
last unless $strings =~ "(.{$_})$re";
$lcs = $1
}
return $lcs;
}
our @arrays = (
[ qw(fooabc123 fooabc321 foobca232) ],
[ qw(abcfoo123 bcafoo321 foo123abc) ],
[ qw(foo bor boz bzo) ],
[
'The quick brown fox jump over the lazy dog',
'The quick brown fox jumps over the lazy',
'jumps over the lazy dog The quick brown fox',
'quick brown fox jumps over the lazy dog',
],
[
'The quick brown fox jump over the lazy dog',
'The quick brown fox jumps over the lazy',
'jumps over the lazy dog The quick brown fox',
'quick brown fox jumps over the lazy dog',
'x',
],
[
'The quick brown fox jump over the lazy dog',
'xThe quick brown fox jump over the lazy dog',
'xxThe quick brown fox jump over the lazy dog',
'xxxThe quick brown fox jump over the lazy dog',
'xxxxThe quick brown fox jump over the lazy dog',
],
);
for ( @arrays ) {
print "R: => ", findlcs @$_;
print "C: => ", LCSIndex @$_;
print "B: => ", lcs @$_;
print "T: => ", find_lcs @$_;
}
for our $array ( @arrays ) {
print for $/, @$array;
cmpthese( -5, {
revdiablo => q[ my $lcs = findlcs @$array ],
CombatSqu => q[ my $lcs = LCSIndex @$array ],
BrowserUk => q[ my $lcs = lcs @$array ],
Tilly => q[ my $lcs = find_lcs @$array ],
});
}
__END__
P:\test>junk2
R: => foo
C: => foo
B: => foo
T: => foo
R: => foo
C: => foo
B: => foo
T: => foo
R: => o
C: => o
B: => o
T: => o
R: => quick brown fox
C: => quick brown fox
B: => quick brown fox
T: => quick brown fox
R: => x
C: => x
B: => x
T: => x
R: => The quick brown fox jump over the lazy dog
C: => The quick brown fox jump over the lazy dog
B: => The quick brown fox jump over the lazy dog
T: => The quick brown fox jump over the lazy dog
fooabc123
fooabc321
foobca232
Rate Tilly revdiablo CombatSqu BrowserUk
Tilly 89.6/s -- -45% -85% -85%
revdiablo 163/s 81% -- -72% -72%
CombatSqu 581/s 549% 257% -- -1%
BrowserUk 587/s 554% 261% 1% --
abcfoo123
bcafoo321
foo123abc
Rate Tilly revdiablo CombatSqu BrowserUk
Tilly 91.7/s -- -37% -82% -84%
revdiablo 145/s 58% -- -72% -74%
CombatSqu 514/s 460% 255% -- -9%
BrowserUk 563/s 514% 289% 10% --
foo
bor
boz
bzo
Rate Tilly revdiablo BrowserUk CombatSqu
Tilly 408/s -- -43% -59% -82%
revdiablo 719/s 76% -- -28% -68%
BrowserUk 995/s 144% 38% -- -56%
CombatSqu 2236/s 449% 211% 125% --
The quick brown fox jump over the lazy dog
The quick brown fox jumps over the lazy
jumps over the lazy dog The quick brown fox
quick brown fox jumps over the lazy dog
Rate Tilly revdiablo CombatSqu BrowserUk
Tilly 3.59/s -- -59% -89% -95%
revdiablo 8.75/s 143% -- -74% -87%
CombatSqu 33.4/s 829% 282% -- -51%
BrowserUk 68.6/s 1807% 683% 105% --
The quick brown fox jump over the lazy dog
The quick brown fox jumps over the lazy
jumps over the lazy dog The quick brown fox
quick brown fox jumps over the lazy dog
x
Rate revdiablo CombatSqu BrowserUk Tilly
revdiablo 3.79/s -- -71% -91% -95%
CombatSqu 12.9/s 240% -- -69% -85%
BrowserUk 41.0/s 984% 219% -- -51%
Tilly 83.2/s 2098% 546% 103% --
The quick brown fox jump over the lazy dog
xThe quick brown fox jump over the lazy dog
xxThe quick brown fox jump over the lazy dog
xxxThe quick brown fox jump over the lazy dog
xxxxThe quick brown fox jump over the lazy dog
Rate Tilly BrowserUk revdiablo CombatSqu
Tilly 0.761/s -- -98% -100% -100%
BrowserUk 44.2/s 5714% -- -99% -99%
revdiablo 4728/s 621405% 10589% -- -25%
CombatSqu 6305/s 828641% 14153% 33% --