#!/usr/bin/perl
use strict;
use warnings;

my @lines = <DATA>;
## Pass 1. OR the records with a mask of spaces
my $mask = chr(32) x length $lines[0];

$mask |= $_ for @lines;

## Detect the spaces that remain and build the template
my $templ = '';
$templ .= 'a' . length($1) . 'x' . length($2) . ' '
  while $mask =~ m[([^ ]+)( +|$)]g;
$templ        =~ s[x\d+\s+$][];       ## Strip redundant last 'xN'
print "original template: " . $templ . "\n\n";

########################
## BEGIN false cols code
########################

## setup the variables
my $count_empty = -1;
my (@fields,@empty,%empty_hash,@AoAfields,@right_align,@left_align);
## create array of unpacked lines
foreach my $line (@lines) {
    push @fields, join '|', unpack($templ, $line);
}
## create AoA so we can process cols
for my $i ( 0 .. $#fields ) {
	$AoAfields[$i] = [ split /\|/, $fields[$i] ];
    }
## loop through AoA finding alignment and empty cols
for my $i ( 0 .. $#AoAfields ) { # for every row in AoA
    my $aref = $AoAfields[$i];
    my $col_numbers = @$aref - 1;
	for my $j ( 0 .. $col_numbers ) { # for every col in AoA
            if ($AoAfields[$i][$j]=~ /^\s+\S/) { # find right-aligned
                push @right_align, $j; 
            }
            if ($AoAfields[$i][$j]=~ /\S\s+$/) { # find left-aligned
                push @left_align, $j; 
            }
            if ($AoAfields[$i][$j]=~ /^\s+$/) { # find fields w/ only spaces
                $count_empty++;
                $empty_hash{$j} = $count_empty;
            }
	}
}
## first remove duplicates in arrays 
&remove_duplicates(\@left_align);
&remove_duplicates(\@right_align);
my ($key, $val);
while (($key, $val) = each(%empty_hash)){
    if ($val/($#AoAfields+1)>.5) { #if column more than 50% empty
        push @empty, $key;
    }
}
## create array from template string
my @templs = split(/\s+/,$templ);
# create hashes of left-aligned & right-aligned for grep
my %left_temp;
my %right_temp;
@left_temp{@left_align} = @left_align;
@right_temp{@right_align} = @right_align;
## find out if col to the left/right of empty col is left/ right-aligned, rewrite template
foreach my $empty (@empty) {
    if ( grep {  exists $left_temp{$_} } $empty-1) {
        #add column width of empty to the column width of col to left of empty, splice out empty
        my $prev_col =  $templs[$empty-1];
        my $empty_col_width = $templs[$empty]; #get empty col value
        $empty_col_width =~ s/(a)(\d{1,2})(x\d)/$2/; #extract width from col value
        $prev_col =~ m/(a)(\d{1,2})(x\d)/; #match width into $2
        my $newwidth = $2+$empty_col_width+1;
        $prev_col =~ s/(a)(\d{1,2})(x\d)/$1$newwidth$3/; #replacement for the previous column
        splice(@templs,$empty-1,1,$prev_col);
        splice(@templs,$empty,1);
    }
    if ( grep {  exists $right_temp{$_} } $empty+1) {
        #add column width of empty to the column width of col to right of empty, splice out empty
        my $post_col =  $templs[$empty+1];
        my $empty_col_width = $templs[$empty]; 
        $empty_col_width =~ s/(a)(\d{1,2})(x\d)/$2/; 
        $post_col =~ m/(a)(\d{1,2})(x\d)/;
        my $newwidth = $2+$empty_col_width+1;
        $post_col =~ s/(a)(\d{1,2})(x\d)/$1$newwidth$3/; 
        splice(@templs,$empty+1,1,$post_col);
        splice(@templs,$empty,1);
    }
	
}    
$templ = join ' ', @templs;
print "new template:      " . $templ . "\n\n";

## PM jdporter
sub remove_duplicates(\@)
{
    my $ar = shift;
    my %seen;
    for ( my $i = 0; $i <= $#{$ar} ; )
    {
        splice @$ar, --$i, 1
            if $seen{$ar->[$i++]}++;
    }
}
#####################
# END false cols code
#####################

## Split the records and output delimited by '|'
print join '|', unpack $templ, $_ for @lines;

__DATA__
The First One Here Is Longer. Collie SN      262287630  77312    93871  MVP
A  Second (PART) here         First In 20 MT 169287655  506666   61066  RTD
3rd Person "Something"        X&Y No SH      564287705  34529    52443  RTE
The Fourth Person 20          MLP 4000       360505504  2237     72201  VRE
The Fifth Name OR Something   Twin 200 SH    469505179  3530     72201  VR
The Sixth Person OR Item      MLP            260505174  3,530   72,201  VRE
70 The Seventh Record         MLP            764205122  3530     72201  VRE
The Eighth Person MLP         MLP            160545154  3530      7220  VRE