Ok, this isn't golfing, but it's kinda cool anyways. I thought it would be nice to have a generalized number formatter that automagically recognized columns of numbers and reformatted then to KB, MB, etc. (BTW, the systems that inter-mix unit scales are very unfriendly. Outlook does this for example -- it intermixes bytes, kb and mb -- which makes it almost impossible to see relative sizes by quickly scanning a list.)
Here are a few examples of the automagical number formatter:
conch:~% du -sk * | perl -MFoo -e k2m
0.01M Foo.pm
6.88M Mail
0.18M Sent
0.00M bin
0.79M public_html
conch:~% ls -l | perl -MFoo -e b2k
total 394
-rw-r--r-- 1 fox guest 6.25K Oct 31 12:00 Foo.pm
drwx------ 2 fox guest 0.50K Oct 31 12:00 Mail
-rw------- 1 fox guest 169.05K Oct 30 15:33 Sent
drwx------ 2 fox guest 0.50K Aug 3 2000 bin
drwxr-xr-x 4 fox guest 0.50K Jul 17 17:38 public_html
conch:~% df -k | perl -MFoo -e k2g
Filesystem 1024-blocks Used Avail Capacity Mounted on
/dev/sd0a 0.06G 0.02G 0.04G 35% /
/dev/sd0h 1.89G 1.52G 0.28G 85% /usr
/dev/sd0g 1.89G 0.87G 0.93G 48% /var
/dev/sd0f 1.89G 0.87G 0.93G 48% /usr/guest1
/dev/sd0e 0.71G 0.38G 0.29G 56% /usr/msen
mfs:25 63471 116 60181 0% /tmp
Here's the Foo.pm module itself. It's too long and messy. Sorry about that. It's also not smart enough to handle columns of numbers that don't quite line up. (OT: Wouldn't it be fun if all our tools could generate XML? And our terminals could automatically format it?)
package Foo;
use strict;
require Exporter;
use vars qw(@ISA @EXPORT);
@ISA = qw(Exporter);
@EXPORT = qw(f
b2k k2m m2g
b2m k2g
b2g);
use Text::Tabs;
sub format_number {
my($n, $scale, $suffix) = @_;
$n = ($scale > 1) ? sprintf('%.2f', $n / $scale)
: sprintf('%d', $n);
while ($n =~ s/^(\d+)(\d{3})/$1,$2/) { }
return $n . $suffix;
}
sub pad_left_justified {
my($text, $nums, $i, $format) = @_;
if (defined $nums->[$i] && $nums->[$i]{-start} == $format->{-pos})
+ {
my $padding = $format->{-width} - length($text->[$i]) - 1;
if ($padding > 0) {
$text->[$i] = (' ' x $padding) . $text->[$i]
}
$text->[$i] .= ' ';
}
}
sub format_left_justified {
my($text, $nums, $i, $format, $scale, $suffix) = @_;
if (defined $nums->[$i] && $nums->[$i]{-start} == $format->{-pos})
+ {
if ($i < @{$text} - 1) {
$text->[$i + 1] =~ s/^(\s*)//;
if (length($text->[$i] + length($1) > $format->{-width}))
+{
$format->{-width} = length($text->[$i]) + length($1)
}
}
$text->[$i] = format_number($text->[$i], $scale, $suffix)
}
}
sub pad_right_justified {
my($text, $nums, $i, $format) = @_;
if (defined $nums->[$i] && $nums->[$i]{-end} == $format->{-pos}) {
my $padding = $format->{-width} - length($text->[$i]);
if ($padding > 0) {
$text->[$i] = (' ' x $padding) . $text->[$i]
}
}
}
sub format_right_justified {
my($text, $nums, $i, $format, $scale, $suffix) = @_;
if (defined $nums->[$i] && $nums->[$i]{-end} == $format->{-pos}) {
if ($i > 0) {
$text->[$i - 1] =~ s/(\s*)$//;
if (length($text->[$i] + length($1) > $format->{-width}))
+{
$format->{-width} = length($text->[$i]) + length($1)
}
}
$text->[$i] = format_number($text->[$i], $scale, $suffix)
}
}
sub most_popular(\%) {
my($votes) = @_;
my @ranked =
sort { $b->[1] <=> $a->[1] }
map { [ $_ => $votes->{$_} ] }
keys %{$votes};
return (@{$ranked[0]})
}
sub filter_columns {
my($scale, $suffix) = @_;
my @row_text = ( );
my @row_nums = ( );
my %field_count_votes = ( );
# Find all the numbers in the input and group
# them into columns. Non-numeric text is treated
# as filler between the numeric columns.
while (<>) {
chomp; $_ = expand($_);
my @text = split(/\b(\d+)\b/);
my $field_count = 0;
my $pos = 0;
my @nums = ( );
foreach my $text (@text) {
my $length = length($text);
my $end = $pos + $length;
if ($text =~ /^\d+$/) {
++$field_count;
push @nums, { -start => $pos,
-end => $end,
-length => $length };
}
else {
push @nums, undef;
}
$pos = $end;
}
push @row_text, [ @text ];
push @row_nums, [ @nums ];
++$field_count_votes{$field_count};
}
# Reverse engineer the sprintf formats and put the
# column re-formatting subs into @format.
my @format = ( );
my ($popular_field_count) = most_popular(%field_count_votes);
foreach my $nums (@row_nums) {
my $field_count = 0;
foreach my $cell (@{$nums}) {
if (defined $cell) {
++$field_count;
}
}
next unless ($field_count == $popular_field_count);
my $i = 0;
foreach my $cell (@{$nums}) {
if (defined $cell) {
++$format[$i]{-start}{$cell->{-start}};
++$format[$i]{-end}{$cell->{-end}};
if (!$format[$i]{-max_length} ||
$cell->{-length} > $format[$i]{-max_length})
{
$format[$i]{-max_length} = $cell->{-length}
}
}
++$i;
}
}
foreach my $col (@format) {
next unless (defined $col);
if ($col->{-max_length} > 3) {
my ($start, $start_count) = most_popular(%{$col->{-start}}
+);
my ($end, $end_count) = most_popular(%{$col->{-end}});
if ($end_count >= $start_count) {
$col = { -format => \&format_right_justified,
-pad => \&pad_right_justified,
-type => 'right',
-pos => $end,
-count => $end_count,
-width => $col->{-max_length} };
}
else {
$col = { -format => \&format_left_justified,
-pad => \&pad_left_justified,
-type => 'left',
-pos => $start,
-count => $start_count,
-width => $col->{-max_length} };
}
}
if (!$col->{-format} ||
$col->{-count} < @row_nums / 2)
{
$col = undef;
}
}
# Scale and format the columns if the row matches
# the format, otherwise leave it alone. (Avoid formatting
# things like column headers.)
for (my $row = 0; $row < @row_text; ++$row) {
my $i = 0;
foreach my $col (@format) {
if (defined $col) {
$col->{-format}->($row_text[$row], $row_nums[$row], $i
+,
$col, $scale,
$suffix);
}
++$i;
}
}
# Pad columns and print all the rows.
for (my $row = 0; $row < @row_text; ++$row) {
my $i = 0;
foreach my $col (@format) {
if (defined $col) {
$col->{-pad}->($row_text[$row], $row_nums[$row], $i,
$col);
}
++$i;
}
print join('', @{$row_text[$row]}), "\n";
}
}
sub f { filter_columns(1, '') }
sub b2k { filter_columns(1024, 'K') }
sub k2m { filter_columns(1024, 'M') }
sub m2g { filter_columns(1024, 'G') }
sub b2m { filter_columns(1024*1024, 'M') }
sub k2g { filter_columns(1024*1024, 'G') }
sub b2g { filter_columns(1024*1024*1024, 'G') }
1;