sub log2{ int( ( log( $_[0] ) + 1e-15 ) / log( 2 ) ) } sub PP_EliasPack { my $packed = ''; my $out = 0; for my $num ( @_ ) { my $len = log2( $num ); $out += $len; vec( $packed, $out++, 1 ) = 1; vec( $packed, $out++, 1 ) = ( $num & ( 1 << $_ ) ) ? 1 : 0 for 0 .. $len - 1; } return $packed; } sub PP_EliasUnpack { my $packed = shift; my $bits = length( $packed ) * 8; my $in = 0; my @unpacked; while( $in < $bits ) { my( $len, $num ) = ( 0 ) x 2; $len++ while $in < $bits && vec( $packed, $in++, 1 ) == 0; last if $in == $bits; vec( $packed, $in++, 1 ) and $num |= ( 1 << $_ ) for 0 .. $len - 1; $num |= ( 1 << $len ); push @unpacked, $num; } return @unpacked; } #### C:\test>678848 Run with 15000 unique words in 1000 documents (Ave: 554 words/doc) ASCII uses 4755336 bytes W-BER uses 3196819 bytes Binary uses 3279128 bytes Elias uses 3980063 bytes PP_Elias uses 3980063 bytes 1 trial of Packing ascii (10.203s total) 1 trial of Unpacking ascii (3.159s total) 1 trial of Packing W-BER (18.159s total) 1 trial of Unpacking W-BER (1.516s total) 1 trial of Packing binary (9.910s total) 1 trial of Unpacking binary (1.455s total) 1 trial of Packing Elias (13.613s total) 1 trial of Unpacking Elias (2.739s total) 1 trial of Packing PP_Elias (31.128s total) 1 trial of Unpacking PP_Elias (18.094s total)