Your benchmark is not very realistic. As $n doesn't vary, you are completely ignoring the effect of the CPU cache misses that the lookup table may introduce.

If you use a random $n, you will see that the table approach becomes actually quite slower than the simple one.

I have tried encoding the table in other ways, but have not been able to find any one good enough:

`#! perl -slw
use strict;
use Time::HiRes qw[ time ];
my @lookup; $#lookup = 0x3ffff;
$lookup[ $_ ] = [ ( $_ & 0x3f000 ) >> 12, ( $_ & 0xfc0 ) >> 6, $_ & 0x
+3f ]
for 0 .. 0x3ffff;
my( @nxt, @mid, @bot );
$nxt[ $_ ] = ( $_ & 0x3f000 ) >> 12,
$mid[ $_ ] = ( $_ & 0xfc0 ) >> 6,
$bot[ $_ ] = $_ & 0x3f
for 0 .. 0x3ffff;
my (@lookup3);
$#lookup3 = 0x3ffff;
$lookup3[$_ << 6] = [$_ >> 6, $_ & 0x3f] for 0 .. 0xfff;
my $lookup4 = 'x' x (3 * (1<<18));
$lookup4 = '';
$lookup4 .= pack CCC => $_ >> 12, ($_>>6) & 0x3f, $_ & 0x3f for 0..0x3
+ffff;
my $lookup6 = 'x' x (2 * (1<<12));
$lookup6 = '';
$lookup6 .= pack CC => $_ >> 6, $_ & 0x3f for 0..0xfff;
print "tables generated";
our $ITERS //= 10e6;
my @n = map int(rand(1<<18)), 1..$ITERS;
print "sample data generated";
sub stuff{
# print "@_";
}
my $start = time;
for my $n (@n) {
stuff(
( $n ) >> 18,
( $n & 0x0003f000 ) >> 12,
( $n & 0x00000fc0 ) >> 6,
( $n & 0x0000003f )
);
}
printf "Shift&and took: %.12f seconds\n", ( time() - $start ) / $ITERS
+;
$start = time;
for my $n (@n) {
stuff( $n >> 18, @{ $lookup[ $n & 0x3ffff ] } );
}
printf " Lookup took: %.12f seconds\n", ( time() - $start ) / $ITERS
+;
$start = time;
for my $n (@n) {
stuff( $n >> 18, @{ $lookup3[$n & 0x3ffc0] }, $n & 0x3f );
}
printf " Lookup3 took: %.12f seconds\n", ( time() - $start ) / $ITERS
+;
$start = time;
for my $n (@n) {
stuff( $n >> 18, unpack CCC => substr($lookup4, 3 * ($n & 0x3ffff)
+, 3));
}
printf " Lookup4 took: %.12f seconds\n", ( time() - $start ) / $ITERS
+;
$start = time;
for my $n (@n) {
stuff( $n >> 18, unpack 'x'.(3 * ($n & 0x3ffff)).'CCC' => $lookup4
+);
}
printf " Lookup5 took: %.12f seconds\n", ( time() - $start ) / $ITERS
+;
$start = time;
for my $n (@n) {
stuff( $n >> 18, unpack(CC => substr($lookup6, ($n & 0x3ffc0) >> 5
+, 3)), $n & 0x3f);
}
printf " Lookup6 took: %.12f seconds\n", ( time() - $start ) / $ITERS
+;
$start = time;
for my $n (@n) {
stuff( $n >> 18, unpack('x'.(($n & 0x3ffc0) >> 5).'CC', $lookup6),
+ $n & 0x3f);
}
printf " Lookup7 took: %.12f seconds\n", ( time() - $start ) / $ITERS
+;
__END__
Shift&and took: 0.000000783860 seconds
Lookup took: 0.000001267049 seconds
Lookup3 took: 0.000001018672 seconds
Lookup4 took: 0.000001903985 seconds
Lookup5 took: 0.000002110766 seconds
Lookup6 took: 0.000001607903 seconds
Lookup7 took: 0.000001791258 seconds
`

