use Data::Dumper; my (%symbol,@old,@new); my $max = 10; my $ws_replace = '_'; my $letters_only = 1; while () { chomp; push @old, $_; $_ = substr $_,0,10 if $max; s/\s+/$ws_replace/g if $ws_replace; s/\W+//g if $letters_only; $_ .= $. if $symbol{$_}++; push @new, $_; } print Dumper \@old, \@new; my %compare; @compare{@old} = @new; print Dumper \@compare; __DATA__ Line one Line two xxxxxxxxxxx Another line xxxxxxxxxxx Lines end here not xxxxxxxxxxx Fourth line (used to be) Line five Lines end here Lines end here too