#!/usr/bin/perl use Modern::Perl '2011'; use autodie; # pattern searching using suffix arrays and binary search open my $fh, '<', shift; chomp( my( $pattern, $str ) = <$fh> ); $str .= '$'; my @suff; # hold suffixes while ( 1 < length $str ) { # condition excludes '$' from @suff push @suff, $str; substr ( $str, 0, 1, '' ); } # lexically ordered suffix array my @indices = sort { $suff[$a] cmp $suff[$b] } 0 .. $#suff; #for ( 0 .. $#indices ) { # say $indices[$_]+1, ": ", $suff[ $indices[$_] ]; #} my $start = bsearch( \ @indices, $pattern, \ @suff ); # get consecutive positions, if any, # where pattern matches first n chars of suffix. my @positions; for my $index ( $start .. $#indices ) { last if $suff[ $indices[ $index ] ] !~ /^$pattern/; push @positions, $indices[ $index ] + 1; # omit +1 if 0-based indexing } # print results if ( @positions ) { # check if a match exists at all say "\nPattern \"$pattern\" found at position:"; for my $pos ( sort{ $a <=> $b } @positions ) { #chop( my $s = $suff[ $pos - 1] ); #suffix print "$pos "; } } else { say "\nPattern \"$pattern\" not found in string"; } print "\n"; # binary search. # find first potential match, ie first suffix after or equal to pattern, # such that pattern potentially matches first n characters of suffix. sub bsearch { my ( $indref, $pat, $sufref ) = @_; my $mid; my ( $lo, $hi ) = ( 0, $#$indref ); while ( 1 ) { $mid = int( ( $lo + $hi ) / 2 ); return $mid if $hi == $lo; if( ( $pattern cmp $$sufref[ $$indref[ $mid ] ] ) < 0 ) { $hi = $mid; } else { $lo = $mid + 1; } } }