in reply to
Re^2: Locating a specified number of contiguous 0 bits within a large bitstring efficiently.

in thread Locating a specified number of contiguous 0 bits within a large bitstring efficiently.

Using index looking for contiguous NULL bytes in the vector seems to be much faster.

`use strict;
use warnings;
use 5.014;
use List::Util qw{ max };
use Time::HiRes qw{ gettimeofday tv_interval };
my $t0 = [ gettimeofday() ];
srand 1234567;
my $vec = q{};
vec( $vec, 536_870_911, 1 ) = 0;
vec( $vec, $_ , 1 ) = 1 for 1 .. 21143;
vec( $vec, int rand 536_870_911, 1 ) = 1 for 1 .. 1e7;
my $t1 = [ gettimeofday() ];
say qq{Creating vector - @{ [ tv_interval( $t0, $t1 ) ] }};
my $bitStr = unpack q{b*}, $vec;
my $t2 = [ gettimeofday() ];
say qq{Unpacking bitstring - @{ [ tv_interval( $t1, $t2 ) ] }};
say
qq{Longest contiguous block of zeros is },
max map length, $bitStr =~ m{(0+)}g,
q{ bits long};
my $t3 = [ gettimeofday() ];
say qq{Finding longest block - @{ [ tv_interval( $t2, $t3 ) ] }};
say qq{\nSearch using regex};
for my $numZeros ( 25, 10, 78, 3, 943, 307, 5, 599, 19, 345 )
{
my $ts = [ gettimeofday() ];
say
qq{At least $numZeros contiguous 0s },
$bitStr =~ m{(0{$numZeros,})}
? qq{found at offset $-[ 0 ], length @{ [ length $1 ] }}
: q{could not be found};
say qq{ Search took - @{ [ tv_interval( $ts, [ gettimeofday()
+] ) ] }};
}
say qq{\nSearch using index};
for my $numZeros ( 25, 10, 78, 3, 943, 307, 5, 599, 19, 345 )
{
my $ts = [ gettimeofday() ];
if ( $numZeros < 23 )
{
my $buffer = q{};
my $offset = -1;
my $bufStart = 0;
my $lookFor = q{0} x $numZeros;
while ( $bufStart < length $vec )
{
$buffer = unpack q{b*}, substr $vec, $bufStart, 131;
do {
say
qq{At least $numZeros contiguous 0s found at },
$bufStart * 8 + $offset;
last;
} unless ( $offset = index $buffer, $lookFor ) == -1;
$bufStart += 128;
}
say qq{At least $numZeros contiguous 0s could not be found}
if $offset == -1;
}
else
{
my $wholeBytes = int( ( $numZeros - 7 ) / 8 );
my $lookFor = qq{\0} x $wholeBytes;
my $offset = -1;
my $zerosToTheLeft = 0;
my $zerosToTheRight = 0;
while ( ( $offset = index $vec, $lookFor, $offset ) > -1 )
{
$zerosToTheLeft = zerosToTheLeft( $offset );
$zerosToTheRight = zerosToTheRight( $offset, $wholeBytes )
+;
last if ( $wholeBytes * 8 + $zerosToTheLeft + $zerosToTheR
+ight )
>= $numZeros;
$offset += $wholeBytes;
}
if ( $offset == -1 )
{
say qq{At least $numZeros contiguous 0s could not be found
+};
}
else
{
say
qq{At least $numZeros contiguous 0s found at },
$offset * 8 - $zerosToTheLeft;
}
}
say qq{ Search took - @{ [ tv_interval( $ts, [ gettimeofday()
+] ) ] }};
}
sub zerosToTheLeft
{
my $offset = shift;
return 0 unless $offset;
my $byteStr = unpack q{b*}, substr $vec, $offset - 1, 1;
return 0 unless $byteStr =~ m{(0+)$};
return length $1;
}
sub zerosToTheRight
{
my( $offset, $wholeBytes ) = @_;
return 0 if ( $offset + $wholeBytes ) == length $vec;
my $byteStr = unpack q{b*}, substr $vec, $offset + $wholeBytes, 2;
return 0 unless $byteStr =~ m{^(0+)};
return length $1;
}
`

The output.

`Creating vector - 6.651795
Unpacking bitstring - 2.116776
Longest contiguous block of zeros is 843
Finding longest block - 9.871085
Search using regex
At least 25 contiguous 0s found at offset 21144, length 65
Search took - 1.684111
At least 10 contiguous 0s found at offset 21144, length 65
Search took - 0.737168
At least 78 contiguous 0s found at offset 21302, length 94
Search took - 0.701232
At least 3 contiguous 0s found at offset 21144, length 65
Search took - 0.704558
At least 943 contiguous 0s could not be found
Search took - 12.084963
At least 307 contiguous 0s found at offset 31289, length 343
Search took - 0.658849
At least 5 contiguous 0s found at offset 21144, length 65
Search took - 0.702935
At least 599 contiguous 0s found at offset 5476471, length 625
Search took - 0.822874
At least 19 contiguous 0s found at offset 21144, length 65
Search took - 0.702927
At least 345 contiguous 0s found at offset 70112, length 351
Search took - 0.703438
Search using index
At least 25 contiguous 0s found at 21144
Search took - 6.5e-05
At least 10 contiguous 0s found at 21144
Search took - 0.000149
At least 78 contiguous 0s found at 21302
Search took - 4.1e-05
At least 3 contiguous 0s found at 21144
Search took - 0.00014
At least 943 contiguous 0s could not be found
Search took - 0.959815
At least 307 contiguous 0s found at 31289
Search took - 8e-05
At least 5 contiguous 0s found at 21144
Search took - 0.00015
At least 599 contiguous 0s found at 5476471
Search took - 0.009874
At least 19 contiguous 0s found at 21144
Search took - 0.000154
At least 345 contiguous 0s found at 70112
Search took - 0.000122
`

This looks a lot more encouraging, I hope it can be adapted for your needs.