use warnings; use strict; my @translate = ( # insert... for... except in... [ 'TWENTY', 'score', qw(twoscore unscored? score[srd]) ], [ 'CENTER', 'core', qw(encore[sd]? score[sd]? core[srd]) ], [ 'CENTERS', 'cores', qw(encores scores) ], [ 'JOHN', 'Johann', qw(Johannesburg) ], [ 'CENTER', 'centre', ], [ 'TRAVELED', 'travelled', ], [ 'HAS NOT', 'hasn\'t', ], ); my %replace = map @{ $_ }[1, 0], @translate; my $search = join ' | ', map word_regex(@{ $_ }[1 .. $#{$_}]), sort { $b->[1] cmp $a->[1] } # longest words first @translate ; while (defined(my $line = )) { print $line; $line =~ s{ ($search) }{$replace{$1}}xmsg; print $line; print "\n"; } sub word_regex { my ($word, @stops, ) = @_; my $not_stopped = join ' ', map not_stopped(@$_), map [ m{ \A (.*) ($word) (.*) \z }xms ], @stops ; return "$not_stopped $word"; } sub not_stopped { my ($stop_prefix, # always defined if word defined, maybe empty $word, # word embedded in stop word $stop_suffix, # always defined if word defined, maybe empty ) = @_; return '' unless defined $word and length $word; # convert word to placeholder (faster match?) $word = sprintf '.{%d}', length $word; # convert stop prefix, if any, to POSITIVE assertion. $stop_prefix = "(?<= $stop_prefix)" if length $stop_prefix; # NEGATIVE assert of stop prefix, word placeholder, stop suffix. return "(?! $stop_prefix $word $stop_suffix)"; } __DATA__ a score and twoscore of unscored scorer won't unscore scored scores at the core of the encore that was scored and cored for many scores of encores Johann travelled to Johannesburg for a score of cores hasn't a stercorean centre