package String::LCSS::Fast;
use 5.006;
use strict;
use warnings;
use Algorithm::Diff qw(traverse_sequences);
require Exporter;
use vars qw( @ISA @EXPORT_OK $VERSION );
our @ISA = qw(Exporter);
@EXPORT_OK = qw( LCSS CSS CSS_Sorted );
$VERSION = '0.01';
sub _tokenize { [split //, $_[0]] }
sub CSS {
my $is_array = ref $_[0] eq 'ARRAY' ? 1 : 0;
my $sort = $_[2];
my ( $seq1, $seq2, @match, $from_match );
my $i = 0;
if ( $is_array ) {
$seq1 = $_[0];
$seq2 = $_[1];
traverse_sequences( $seq1, $seq2, {
MATCH => sub { push @{$match[$i]}, $seq1->[$_[0]]; $from_m
+atch = 1 },
DISCARD_A => sub { do{$i++; $from_match = 0} if $from_matc
+h },
DISCARD_B => sub { do{$i++; $from_match = 0} if $from_matc
+h },
});
}
else {
$seq1 = _tokenize($_[0]);
$seq2 = _tokenize($_[1]);
traverse_sequences( $seq1, $seq2, {
MATCH => sub { $match[$i] .= $seq1->[$_[0]]; $from_match =
+ 1 },
DISCARD_A => sub { do{$i++; $from_match = 0} if $from_matc
+h },
DISCARD_B => sub { do{$i++; $from_match = 0} if $from_matc
+h },
});
}
return \@match;
}
sub CSS_Sorted {
my $match = CSS(@_);
if ( ref $_[0] eq 'ARRAY' ) {
@$match = map{$_->[0]}sort{$b->[1]<=>$a->[1]}map{[$_,scalar(@$_
+)]}@$match
}
else {
@$match = map{$_->[0]}sort{$b->[1]<=>$a->[1]}map{[$_,length($_)
+]}@$match
}
return $match;
}
sub LCSS {
my $is_array = ref $_[0] eq 'ARRAY' ? 1 : 0;
my $css = CSS(@_);
my $index;
my $length = 0;
if ( $is_array ) {
for( my $i = 0; $i < @$css; $i++ ) {
next unless @{$css->[$i]}>$length;
$index = $i;
$length = @{$css->[$i]};
}
}
else {
for( my $i = 0; $i < @$css; $i++ ) {
next unless length($css->[$i])>$length;
$index = $i;
$length = length($css->[$i]);
}
}
return $css->[$index];
}
1;
__END__
=head1 NAME
String::LCSS::Fast - Perl extension for getting the Longest Common Sub
+String
=head1 SYNOPSIS
use String::LCSS::Fast qw( LCSS CSS CSS_Sorted );
=head1 DESCRIPTION
This module uses Algoritm::Diff to implement LCSS and is orders of mag
+nitude
faster than String::LCSS.
=head1 METHODS
=head2 LCSS
Returns the longest common sub string. If there may be more than one a
+nd
it matters use CSS instead.
my $lcss_ary_ref = LCSS( \@SEQ1, \@SEQ2 ); # ref to array
my $lcss_string = LCSS( $STR1, $STR2 ); # string
=head2 CSS
Returns all the common sub strings, unsorted.
my $css_ary_ref = CSS( \@SEQ1, \@SEQ2 ); # ref to array of arrays
my $css_str_ref = CSS( $STR1, $STR2 ); # ref to array of string
+s
=head2 CSS_Sorted
Returns all the common sub strings, sorted from longest to shortest
my $css_ary_ref = CSS( \@SEQ1, \@SEQ2 ); # ref to array of arrays
my $css_str_ref = CSS( $STR1, $STR2 ); # ref to array of string
+s
=head1 EXPORT
None by default.
=head1 AUTHOR
Dr James Freeman <james.freeman@id3.org.uk>
=head1 SEE ALSO
L<perl>.
=cut
cheers
tachyon
s&&rsenoyhcatreve&&&s&n.+t&"$'$`$\"$\&"&ee&&y&srve&&d&&print
|