#! perl -sw use strict; open FILE, '<', $ARGV[ 0 ] or die $!; my @discards; ## offsets of lines to dicard my %hash; ## Remembers records seen during each pass ## IMPORTANT: Tailor the follwing to your data! ## If all your records start with a digit (0-9) ## As shown, this would make 52 redundant passes! ## During each pass, only consider lines that start with this char for my $firstChar ( 0..9, 'A'..'Z', 'a'..'z' ) { warn "Pass $firstChar\n"; my $offset = 0; ##offset starts a zero for each pass while( ) { if( m[^$firstChar] ) { ## If the line starts with the passes char ## Form the key from the first 7 fields my $key = join'|', ( split '\|', $_, 8 )[ 0 .. 6 ]; ## If doesn't exist yet, remember it if( not exists $hash{ $key } ) { $hash{ $key } = undef; } ## if it does add it's offset to the discards array else { push @discards, $offset; ## The offset for the last line read } } $offset = tell FILE; ## Remember this offset } undef %hash; ## Empty the hash seek FILE, 0, 0; ## And reset to the start after each pass } printf STDERR "Sorting %d discards\n", scalar @discards; ## Sort the list of discard offsets ascending @discards = sort { $a <=> $b } @discards; ## Final pass while( my $line = ) { ## if the line we just read leaves the offset equal ## to the next discard, while( @discards and tell FILE == $discards[ 0] ) { $line = ; ## discard the line and get the next shift @discards; ## And that offset } ## now output the good line (for redirection) print $line; } close FILE;