The least bias would be
%a = 1..2e6; %b = 1..2e6;
cmpthese -1, {
A=>q[ scalar keys %a, ++$i for 1 .. 10000 ],
B=>q[ scalar keys %b and ++$i for 1 .. 10000 ],
C=>q[ my $x = keys %a, ++$i for 1 .. 10000 ],
D=>q[ my $x = keys %b and ++$i for 1 .. 10000 ],
}
__END__
Rate D C A B
D 280/s -- -4% -31% -31%
C 290/s 4% -- -28% -28%
A 404/s 44% 39% -- -1%
B 406/s 45% 40% 1% --
Testing with empty %b gives
%a = 1..2e6; %b = ();
cmpthese -1, {
A=>q[ scalar keys %a, ++$i for 1 .. 10000 ],
B=>q[ scalar keys %b, ++$i for 1 .. 10000 ],
C=>q[ my $x = keys %a, ++$i for 1 .. 10000 ],
D=>q[ my $x = keys %b, ++$i for 1 .. 10000 ],
}
__END__
Rate C D A B
C 288/s -- -2% -30% -31%
D 293/s 2% -- -28% -29%
A 410/s 43% 40% -- -1%
B 415/s 44% 41% 1% --
from which I would conclude that there's a minimal difference between testing an empty and a full hash.
update: moritz code using these instead of sub {die}:
use strict;
use warnings;
use Benchmark qw(cmpthese);
for my $len (2..6) {
our (%full, %empty);
my $a = 'A';
$full{$a++} = $a while length($a) < $len;
print "\nLength $len with ", scalar(%full), "items\n";
cmpthese -1, {
full => q[scalar keys %full, ++$i for 1 .. 10000],
empty => q[scalar keys %empty, ++$i for 1 .. 10000],
}
}
__END__
Length 2 with 22/32items
Rate empty full
empty 350/s -- -15%
full 411/s 17% --
Length 3 with 510/1024items
Rate full empty
full 406/s -- -2%
empty 415/s 2% --
Length 4 with 13961/32768items
Rate full empty
full 409/s -- -1%
empty 414/s 1% --
Length 5 with 310721/524288items
Rate empty full
empty 414/s -- 0%
full 414/s 0% --
Length 6 with 8655839/16777216items
Rate empty full
empty 407/s -- -4%
full 426/s 5% --
But! If I introduce a third test condition which also tests for %full, I get:
...
cmpthese -1, {
full => q[scalar keys %full, ++$i for 1 .. 10000],
empty => q[scalar keys %empty, ++$i for 1 .. 10000],
fake => q[scalar keys %full, ++$i for 1 .. 10000],
}
}
__END__
Length 2 with 22/32items
Rate fake empty full
fake 403/s -- -2% -3%
empty 411/s 2% -- -2%
full 417/s 4% 2% --
Length 3 with 510/1024items
Rate fake full empty
fake 415/s -- -1% -1%
full 418/s 1% -- -0%
empty 419/s 1% 0% --
Length 4 with 13961/32768items
Rate full empty fake
full 406/s -- -2% -2%
empty 414/s 2% -- 0%
fake 414/s 2% 0% --
Length 5 with 310721/524288items
Rate fake full empty
fake 406/s -- -0% -1%
full 407/s 0% -- -1%
empty 411/s 1% 1% --
Length 6 with 8655839/16777216items
Rate fake full empty
fake 400/s -- -3% -3%
full 411/s 3% -- -0%
empty 411/s 3% 0% --
using a sub instead of a quoted string
...
cmpthese -1, {
full => sub{scalar keys %full, ++$i },
empty => sub{scalar keys %empty, ++$i },
fake => sub{scalar keys %full, ++$i },
}
}
__END__
Length 2 with 22/32items
Rate empty full fake
empty 4955017/s -- -4% -5%
full 5167154/s 4% -- -1%
fake 5215901/s 5% 1% --
Length 3 with 510/1024items
Rate full empty fake
full 5038923/s -- -4% -5%
empty 5265576/s 4% -- -1%
fake 5293291/s 5% 1% --
Length 4 with 13961/32768items
Rate empty full fake
empty 4633858/s -- -1% -5%
full 4677165/s 1% -- -4%
fake 4858803/s 5% 4% --
Length 5 with 310721/524288items
Rate full fake empty
full 4812084/s -- -5% -7%
fake 5041231/s 5% -- -3%
empty 5193418/s 8% 3% --
Length 6 with 8655839/16777216items
Rate fake full empty
fake 4689731/s -- -4% -16%
full 4906438/s 5% -- -12%
empty 5604762/s 20% 14% --
Hmm... let's use moritz's code with strings instead of hashes:
for my $len (2..6) {
our ($full, $empty);
my $a = 'A';
$full .= $a while length($full) < $len;
print "\nLength $len with ", scalar($full), " items\n";
cmpthese -1, {
full => sub { die unless $full },
empty => sub { die if $empty },
fake => sub { die unless $full },
}
}
__END__
Length 2 with AA items
Rate empty full fake
empty 29399327/s -- -5% -35%
full 31036076/s 6% -- -31%
fake 44964665/s 53% 45% --
...
It looks like the iteration/sub call overhead is shadowing the scalar %hash lookup, and moritz has been testing something else...
I'm re-reading No More Meaningless Benchmarks! ;-)