#!/usr/bin/perl # # pm_sample_histo_test.pl # # Make a brief histogram to see if there's any obvious bias in the sampler. # Assumes you're sampling from a file containing a million lines, each of # which contains just the line number. # use 5.10.1; use strict; use warnings; my %H; for my $FName (@ARGV) { print "-- $FName --\n"; open my $FH, '<', $FName or die "can't open $FName\n"; while (<$FH>) { next if /^random/; my $I = int $_/10000; $H{$I}++; } } my $max=0; for (keys %H) { $max = $H{$_} if $H{$_} > $max; } for my $I (0 .. 99) { my $bar = substr('*' x int(50 * ($H{$I}/$max)) . ' 'x50, 0, 50); if ($I > 1 and $I < 98) { my $avg; my $sum = 0; $sum += $H{$_} for $I-2 .. $I+2; $avg = $sum/5; substr($bar,int(50 * ($avg/$max)),1)='+'; } printf "% 3u: (% 3u) %s\n", $I, $H{$I} // 0, $bar; }