#!/usr/bin/perl use strict; use warnings; use File::Slurp qw(slurp); use Benchmark qw(cmpthese); sub binary_search { my ($str, $a) = @_; my $l = 0; my $h = @$a; while ($l < $h) { my $p = int (($l + $h) / 2); if ($a->[$p] lt $str) { $l = $p + 1; } else { $h = $p; } } $l } sub make_start { my $a = shift; my $last = $a->[0]; my @start = (0); for my $ix (1..$#$a) { my $current = $a->[$ix]; if ($current ne $last) { push @start, $ix; $last = $current; } } return \@start; } chomp (my @words = slurp '/usr/share/dict/words'); @words = grep /^\w+$/, @words; for my $size (100, 1000, 100_000, 1_000_000) { for my $dups (3, 10, 100) { next unless $size > $dups; for my $reps (100, 100_000, 1_000_000) { print "size: $size, dups: $dups, reps: $reps\n"; # generate data: my @a = map $words[rand @words], 1..1+($size/$dups); push @a, $a[rand @a] while @a < $size; @a = sort @a; cmpthese(-30, { naive => sub { my $ix; $ix = binary_search($a[rand @a], \@a) for (1..$reps); }, salva => sub { my $ix; my $start = make_start(\@a); my @a_start = @a[@$start]; $ix = $start->[binary_search($a[rand @a], \@a_start)] for (1..$reps); } }); print "\n"; } } }