perlquestion
ibm1620
I am writing a tool to expand a CSV file to a columnar format, with each column sized to accommodate the max width encountered in the file. That's easily done with two passes. But now I want to narrow the columns so that each record fits on one line in the terminal, at the expense of truncating some wide values. For example:
<code>
Input:
Date|Amount|Category|Description
2022-06-23|123.45|Software & Tech|BACKBLAZE HTTPSWWW.BACKCA|
2022-06-24|63.45|Internet|RECURRING PAYMENT AUTHORIZED ON 06/11 SPECTRUM TX|
2022-06-24|69.34|Phone|RECURRING PAYMENT AUTHORIZED ON 06/02 VZWRLSS*APOCC VISE|
(Max widths 10,6,15,55)
Simple expansion
Date |Amount|Category |Description |
2022-06-23|123.45|Software & Tech|BACKBLAZE HTTPSWWW.BACKCA |
2022-06-24|63.45 |Internet |RECURRING PAYMENT AUTHORIZED ON 06/11 SPECTRUM TX |
2022-06-24|69.34 |Phone |RECURRING PAYMENT AUTHORIZED ON 06/02 VZWRLSS*APOCC VISE|
(Col widths 10,6,15,55)
Shrunk to fit 52-char-wide window
Date |Amount|Category |Description |
2022-06-23|123.45|Software & Tech|BACKBLAZE HTTPSWW|
2022-06-24|63.45 |Internet |RECURRING PAYMENT|
2022-06-24|69.34 |Phone |RECURRING PAYMENT|
(Col widths 10,6,15,17)
Shrunk to fit 46-char-wide window
Date |Amount|Category |Description |
2022-06-23|123.45|Software & Te|BACKBLAZE HTT|
2022-06-24|63.45 |Internet |RECURRING PAY|
2022-06-24|69.34 |Phone |RECURRING PAY|
(Col widths 10,6,13,13)
</code>
I recast the problem as an ordered set of bags whose contents vary in weight, and removing enough from the bags so they don't exceed some total weight. Furthermore, I want to penalize the heaviest bags first. I coded up a working solution (trying to use as many v5.36 features as I could). But I can't get over the feeling that there is a much simpler solution that's eluded me.
<p>
I'd appreciate any comments or suggestions for a simpler algorithm (for one thing, I don't think making it recursive helped any). I'd be particularly intrested in solutions that exeercise v5.36 features.
<code>
#!/usr/bin/env perl
use v5.36; # implies use warnings
my $target_weight = shift // die 'need target_weight';
# Starting weights
my @weights = ( 20, 3, 25, 10, 3, 24, 25 );
say "Before:\n" . display( \@weights );
shrink( \@weights, $target_weight );
say "After:\n" . display( \@weights );
# shrink($bags, $target_weight)
#
# $bags = ref. to array of bag weights
# $target_weight = maximum allowed weight of all bags
#
# If bags exceed target_weight, lighten the bags to achieve target by
# lightening the heaviest bags first.
no warnings q/experimental::for_list/;
no warnings q/experimental::builtin/;
use builtin qw/indexed/;
use List::Util qw/sum/;
sub shrink ( $bags, $target_weight, $curr_weight = undef ) {
# Outer call only:
if ( not defined $curr_weight ) {
$curr_weight = sum @$bags;
# quick exit if no shrink req'd
return if ( $curr_weight <= $target_weight );
# copy input array and sort by weight, descending
my @indexed_weights;
for my ( $i, $wt ) ( indexed @$bags ){
push @indexed_weights, [ $i, $wt ];
}
@indexed_weights = sort { $b->[1] <=> $a->[1] }
@indexed_weights;
# split indexes and weights into two arrays
my @sorted_indexes = map { $_->[0] } @indexed_weights;
my @sorted_weights = map { $_->[1] } @indexed_weights;
say "Sorted:\n" . display( \@sorted_weights );
shrink( \@sorted_weights, $target_weight, $curr_weight );
# Deliver de-sorted result to caller
for my ( $i, $wt ) ( indexed @sorted_weights ) {
$bags->[ $sorted_indexes[$i] ] = $wt;
}
return;
}
# For inner call:
return if ( $curr_weight <= $target_weight );
my $nbags = scalar @$bags;
my $heaviest = $bags->[0]; # weight of heaviest bag
# Count the heaviest bags and also find the next-heaviest
my $n_of_heaviest;
my $next_heaviest;
COUNT:
for ( 1 .. $nbags - 1 ) {
if ( $bags->[$_] < $heaviest ) {
$n_of_heaviest = $_;
$next_heaviest = $bags->[$_];
last COUNT;
}
}
$n_of_heaviest //= $nbags;
$next_heaviest //= 0;
my $loss = $heaviest - $next_heaviest;
my $total_loss = $loss * $n_of_heaviest;
if ( $curr_weight - $total_loss >= $target_weight ) {
$curr_weight -= $total_loss;
$bags->[$_] -= $loss for ( 0 .. $n_of_heaviest - 1 );
say "Reduce bags #0-#"
. ( $n_of_heaviest - 1 )
. " by $loss to weight of next_heaviest, "
. "$next_heaviest:\n"
. display($bags);
shrink( $bags, $target_weight, $curr_weight );
}
else {
# Need to do an equally-distributed shrink of the heaviest
# bags to hit the target
use integer;
my $target_loss = $curr_weight - $target_weight;
my $div = $target_loss / $n_of_heaviest;
my $rem = $target_loss % $n_of_heaviest;
for my $i ( -( $n_of_heaviest - 1 ) .. 0 ) {
$loss = $div + ( $rem-- > 0 ? 1 : 0 );
$bags->[ -$i ] -= $loss;
}
say "Finally, reduce bags #0-#"
. ( $n_of_heaviest - 1 )
. " to target weight of $target_weight:\n"
. display($bags);
}
}
sub display ($aref) {
my $r = '';
for my ( $i, $wt ) ( indexed @$aref ) {
$r .= sprintf "%2s: %s (%d)\n", "#$i", ( '-' x $wt ), $wt;
}
$r .= sprintf "Weight %d, target %d\n",
sum(@$aref), $target_weight;
return $r;
}
</code>
Shrink to 100:
<code>
~/private/perl$ shrink 100
Before:
#0: -------------------- (20)
#1: --- (3)
#2: ------------------------- (25)
#3: ---------- (10)
#4: --- (3)
#5: ------------------------ (24)
#6: ------------------------- (25)
Weight 110, target 100
Sorted:
#0: ------------------------- (25)
#1: ------------------------- (25)
#2: ------------------------ (24)
#3: -------------------- (20)
#4: ---------- (10)
#5: --- (3)
#6: --- (3)
Weight 110, target 100
Reduce bags #0-#1 by 1 to weight of next_heaviest, 24:
#0: ------------------------ (24)
#1: ------------------------ (24)
#2: ------------------------ (24)
#3: -------------------- (20)
#4: ---------- (10)
#5: --- (3)
#6: --- (3)
Weight 108, target 100
Finally, reduce bags #0-#2 to target weight of 100:
#0: ---------------------- (22)
#1: --------------------- (21)
#2: --------------------- (21)
#3: -------------------- (20)
#4: ---------- (10)
#5: --- (3)
#6: --- (3)
Weight 100, target 100
After:
#0: -------------------- (20)
#1: --- (3)
#2: ---------------------- (22)
#3: ---------- (10)
#4: --- (3)
#5: --------------------- (21)
#6: --------------------- (21)
Weight 100, target 100
</code>