package Foo; use strict; require Exporter; use vars qw(@ISA @EXPORT); @ISA = qw(Exporter); @EXPORT = qw(f b2k k2m m2g b2m k2g b2g); use Text::Tabs; sub format_number { my($n, $scale, $suffix) = @_; $n = ($scale > 1) ? sprintf('%.2f', $n / $scale) : sprintf('%d', $n); while ($n =~ s/^(\d+)(\d{3})/$1,$2/) { } return $n . $suffix; } sub pad_left_justified { my($text, $nums, $i, $format) = @_; if (defined $nums->[$i] && $nums->[$i]{-start} == $format->{-pos}) { my $padding = $format->{-width} - length($text->[$i]) - 1; if ($padding > 0) { $text->[$i] = (' ' x $padding) . $text->[$i] } $text->[$i] .= ' '; } } sub format_left_justified { my($text, $nums, $i, $format, $scale, $suffix) = @_; if (defined $nums->[$i] && $nums->[$i]{-start} == $format->{-pos}) { if ($i < @{$text} - 1) { $text->[$i + 1] =~ s/^(\s*)//; if (length($text->[$i] + length($1) > $format->{-width})) { $format->{-width} = length($text->[$i]) + length($1) } } $text->[$i] = format_number($text->[$i], $scale, $suffix) } } sub pad_right_justified { my($text, $nums, $i, $format) = @_; if (defined $nums->[$i] && $nums->[$i]{-end} == $format->{-pos}) { my $padding = $format->{-width} - length($text->[$i]); if ($padding > 0) { $text->[$i] = (' ' x $padding) . $text->[$i] } } } sub format_right_justified { my($text, $nums, $i, $format, $scale, $suffix) = @_; if (defined $nums->[$i] && $nums->[$i]{-end} == $format->{-pos}) { if ($i > 0) { $text->[$i - 1] =~ s/(\s*)$//; if (length($text->[$i] + length($1) > $format->{-width})) { $format->{-width} = length($text->[$i]) + length($1) } } $text->[$i] = format_number($text->[$i], $scale, $suffix) } } sub most_popular(\%) { my($votes) = @_; my @ranked = sort { $b->[1] <=> $a->[1] } map { [ $_ => $votes->{$_} ] } keys %{$votes}; return (@{$ranked[0]}) } sub filter_columns { my($scale, $suffix) = @_; my @row_text = ( ); my @row_nums = ( ); my %field_count_votes = ( ); # Find all the numbers in the input and group # them into columns. Non-numeric text is treated # as filler between the numeric columns. while (<>) { chomp; $_ = expand($_); my @text = split(/\b(\d+)\b/); my $field_count = 0; my $pos = 0; my @nums = ( ); foreach my $text (@text) { my $length = length($text); my $end = $pos + $length; if ($text =~ /^\d+$/) { ++$field_count; push @nums, { -start => $pos, -end => $end, -length => $length }; } else { push @nums, undef; } $pos = $end; } push @row_text, [ @text ]; push @row_nums, [ @nums ]; ++$field_count_votes{$field_count}; } # Reverse engineer the sprintf formats and put the # column re-formatting subs into @format. my @format = ( ); my ($popular_field_count) = most_popular(%field_count_votes); foreach my $nums (@row_nums) { my $field_count = 0; foreach my $cell (@{$nums}) { if (defined $cell) { ++$field_count; } } next unless ($field_count == $popular_field_count); my $i = 0; foreach my $cell (@{$nums}) { if (defined $cell) { ++$format[$i]{-start}{$cell->{-start}}; ++$format[$i]{-end}{$cell->{-end}}; if (!$format[$i]{-max_length} || $cell->{-length} > $format[$i]{-max_length}) { $format[$i]{-max_length} = $cell->{-length} } } ++$i; } } foreach my $col (@format) { next unless (defined $col); if ($col->{-max_length} > 3) { my ($start, $start_count) = most_popular(%{$col->{-start}}); my ($end, $end_count) = most_popular(%{$col->{-end}}); if ($end_count >= $start_count) { $col = { -format => \&format_right_justified, -pad => \&pad_right_justified, -type => 'right', -pos => $end, -count => $end_count, -width => $col->{-max_length} }; } else { $col = { -format => \&format_left_justified, -pad => \&pad_left_justified, -type => 'left', -pos => $start, -count => $start_count, -width => $col->{-max_length} }; } } if (!$col->{-format} || $col->{-count} < @row_nums / 2) { $col = undef; } } # Scale and format the columns if the row matches # the format, otherwise leave it alone. (Avoid formatting # things like column headers.) for (my $row = 0; $row < @row_text; ++$row) { my $i = 0; foreach my $col (@format) { if (defined $col) { $col->{-format}->($row_text[$row], $row_nums[$row], $i, $col, $scale, $suffix); } ++$i; } } # Pad columns and print all the rows. for (my $row = 0; $row < @row_text; ++$row) { my $i = 0; foreach my $col (@format) { if (defined $col) { $col->{-pad}->($row_text[$row], $row_nums[$row], $i, $col); } ++$i; } print join('', @{$row_text[$row]}), "\n"; } } sub f { filter_columns(1, '') } sub b2k { filter_columns(1024, 'K') } sub k2m { filter_columns(1024, 'M') } sub m2g { filter_columns(1024, 'G') } sub b2m { filter_columns(1024*1024, 'M') } sub k2g { filter_columns(1024*1024, 'G') } sub b2g { filter_columns(1024*1024*1024, 'G') } 1;