in reply to Re^2: Locating a specified number of contiguous 0 bits within a large bitstring efficiently. in thread Locating a specified number of contiguous 0 bits within a large bitstring efficiently.
Using index looking for contiguous NULL bytes in the vector seems to be much faster.
use strict;
use warnings;
use 5.014;
use List::Util qw{ max };
use Time::HiRes qw{ gettimeofday tv_interval };
my $t0 = [ gettimeofday() ];
srand 1234567;
my $vec = q{};
vec( $vec, 536_870_911, 1 ) = 0;
vec( $vec, $_ , 1 ) = 1 for 1 .. 21143;
vec( $vec, int rand 536_870_911, 1 ) = 1 for 1 .. 1e7;
my $t1 = [ gettimeofday() ];
say qq{Creating vector  @{ [ tv_interval( $t0, $t1 ) ] }};
my $bitStr = unpack q{b*}, $vec;
my $t2 = [ gettimeofday() ];
say qq{Unpacking bitstring  @{ [ tv_interval( $t1, $t2 ) ] }};
say
qq{Longest contiguous block of zeros is },
max map length, $bitStr =~ m{(0+)}g,
q{ bits long};
my $t3 = [ gettimeofday() ];
say qq{Finding longest block  @{ [ tv_interval( $t2, $t3 ) ] }};
say qq{\nSearch using regex};
for my $numZeros ( 25, 10, 78, 3, 943, 307, 5, 599, 19, 345 )
{
my $ts = [ gettimeofday() ];
say
qq{At least $numZeros contiguous 0s },
$bitStr =~ m{(0{$numZeros,})}
? qq{found at offset $[ 0 ], length @{ [ length $1 ] }}
: q{could not be found};
say qq{ Search took  @{ [ tv_interval( $ts, [ gettimeofday()
+] ) ] }};
}
say qq{\nSearch using index};
for my $numZeros ( 25, 10, 78, 3, 943, 307, 5, 599, 19, 345 )
{
my $ts = [ gettimeofday() ];
if ( $numZeros < 23 )
{
my $buffer = q{};
my $offset = 1;
my $bufStart = 0;
my $lookFor = q{0} x $numZeros;
while ( $bufStart < length $vec )
{
$buffer = unpack q{b*}, substr $vec, $bufStart, 131;
do {
say
qq{At least $numZeros contiguous 0s found at },
$bufStart * 8 + $offset;
last;
} unless ( $offset = index $buffer, $lookFor ) == 1;
$bufStart += 128;
}
say qq{At least $numZeros contiguous 0s could not be found}
if $offset == 1;
}
else
{
my $wholeBytes = int( ( $numZeros  7 ) / 8 );
my $lookFor = qq{\0} x $wholeBytes;
my $offset = 1;
my $zerosToTheLeft = 0;
my $zerosToTheRight = 0;
while ( ( $offset = index $vec, $lookFor, $offset ) > 1 )
{
$zerosToTheLeft = zerosToTheLeft( $offset );
$zerosToTheRight = zerosToTheRight( $offset, $wholeBytes )
+;
last if ( $wholeBytes * 8 + $zerosToTheLeft + $zerosToTheR
+ight )
>= $numZeros;
$offset += $wholeBytes;
}
if ( $offset == 1 )
{
say qq{At least $numZeros contiguous 0s could not be found
+};
}
else
{
say
qq{At least $numZeros contiguous 0s found at },
$offset * 8  $zerosToTheLeft;
}
}
say qq{ Search took  @{ [ tv_interval( $ts, [ gettimeofday()
+] ) ] }};
}
sub zerosToTheLeft
{
my $offset = shift;
return 0 unless $offset;
my $byteStr = unpack q{b*}, substr $vec, $offset  1, 1;
return 0 unless $byteStr =~ m{(0+)$};
return length $1;
}
sub zerosToTheRight
{
my( $offset, $wholeBytes ) = @_;
return 0 if ( $offset + $wholeBytes ) == length $vec;
my $byteStr = unpack q{b*}, substr $vec, $offset + $wholeBytes, 2;
return 0 unless $byteStr =~ m{^(0+)};
return length $1;
}
The output.
Creating vector  6.651795
Unpacking bitstring  2.116776
Longest contiguous block of zeros is 843
Finding longest block  9.871085
Search using regex
At least 25 contiguous 0s found at offset 21144, length 65
Search took  1.684111
At least 10 contiguous 0s found at offset 21144, length 65
Search took  0.737168
At least 78 contiguous 0s found at offset 21302, length 94
Search took  0.701232
At least 3 contiguous 0s found at offset 21144, length 65
Search took  0.704558
At least 943 contiguous 0s could not be found
Search took  12.084963
At least 307 contiguous 0s found at offset 31289, length 343
Search took  0.658849
At least 5 contiguous 0s found at offset 21144, length 65
Search took  0.702935
At least 599 contiguous 0s found at offset 5476471, length 625
Search took  0.822874
At least 19 contiguous 0s found at offset 21144, length 65
Search took  0.702927
At least 345 contiguous 0s found at offset 70112, length 351
Search took  0.703438
Search using index
At least 25 contiguous 0s found at 21144
Search took  6.5e05
At least 10 contiguous 0s found at 21144
Search took  0.000149
At least 78 contiguous 0s found at 21302
Search took  4.1e05
At least 3 contiguous 0s found at 21144
Search took  0.00014
At least 943 contiguous 0s could not be found
Search took  0.959815
At least 307 contiguous 0s found at 31289
Search took  8e05
At least 5 contiguous 0s found at 21144
Search took  0.00015
At least 599 contiguous 0s found at 5476471
Search took  0.009874
At least 19 contiguous 0s found at 21144
Search took  0.000154
At least 345 contiguous 0s found at 70112
Search took  0.000122
This looks a lot more encouraging, I hope it can be adapted for your needs.
Re^4: Locating a specified number of contiguous 0 bits within a large bitstring efficiently. by BrowserUk (Pope) on Jun 07, 2013 at 20:34 UTC 
This looks a lot more encouraging, I hope it can be adapted for your needs.
I wholeheartedly agree. Those timings make this a much more viable approach. Thank you. I will add it to my benchmarking.
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks  Silence betokens consent  Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
 [reply] 
