#! /bin/sh perl -E '$/ = \36; for (1 .. 200e6) { ($v,$s) = unpack q,<>; say qq($_ @{[$s =~ y/a-zA-Z//cdr || redo]} $v) }' /dev/urandom | sort -k2 | perl -ape '$_ x= $F ne $F[1], $F = $F[1]' | sort -k3 | perl -ape '$_ x= $F ne $F[2], $F = $F[2]' | sort -n | perl -pe 's/\S*\s*//' #### $ perl -anE '$h[length$F[0]]++ }{ say "@h";' data 52 2704 140608 7190883 35010259 35855576 28751420 19240344 10899542 5278104 2202814 795438 249757 68269 16155 3388 640 89 12 1 #### [ 1.303844] data ALLOCATED; tab = 160002048, ss = 14680064 (10000000 pairs) [ 5.478031] built BDZ for syms [ 2.873171] inplace REORDER [ 20.015568] built CHM for vals [ 0.000028] mph size when packed: syms = 3459398, vals = 83600028 [ 0.522367] fgets loop; lines=10000000 [ 1.195339] fgets+strtoul; lines=10000000 [ 2.235220] SYMS fetch; found=10000000 [ 2.940386] VALS fetch; found=10000000 [ 2.709484] VRFY sym to val; matched=10000000 [ 4.258673] VRFY two-way; matched=10000000 #### $ ./a.out 10000000 data [0000000000.000000] begin [0000000001.299480] data READ and ALLOCATED N=10000000; tab = 160002048, ss = 14680064 [0000000008.945925] built BDZ for syms [0000000000.003773] mph dump & reload [0000000005.915059] inplace REORDER done [0000000036.457684] built CHM for vals [0000000000.086798] mph dump & reload mph size when packed: syms = 3459398, vals = 83600028 [0000000006.374049] completed VERIFY [0000000000.518565] fgets loop [0000000001.192738] fgets+strtoul loop [0000000003.706227] SYMS search and compare cksum=49999995000000 [0000000004.366025] VALS search and compare cksum=49999995000000 [0000000000.000012] done.