use warnings; use strict; use List::MoreUtils qw(uniq); my @trans = ( # substitute... for... except in... [ 'TWENTY', 'score', qw(fourscore scored scores) ], [ 'CENTER', 'core', qw(encore encores coregent score) ], # [ 'CENTERS', 'cores', qw(encores scores) ], [ 'JOHN', 'Johann', qw(Johannesburg) ], [ 'CENTER', 'centre', ], [ 'TRAVELED', 'travelled', ], [ 'HAS NOT', 'hasn\'t', ], ); my %trans = map @{ $_ }[1, 0], @trans; my ($translate) = map qr{ $_ }xms, join ' | ', map word_regex(@{ $_ }[1 .. $#{$_}]), @trans ; while (defined(my $line = )) { print $line; $line =~ s{ ($translate) }{$trans{$1}}xmsg; print $line; print "\n"; } sub word_regex { my ($word, @stops, ) = @_; my $not_prefix = # 5. conjunction join ') (?## >perl selective_trans_1.pl the core of the coregents encores scored fourscore score of the CENTER of the coregents encores scored fourscore TWENTY of scores when Johann travelled to Johannesburg scores when JOHN TRAVELED to Johannesburg for a score of cores hasn't a centre encoregent for a TWENTY of cores HAS NOT a CENTER encoregent #### use warnings; use strict; my @translate = ( # insert... for... except in... [ 'TWENTY', 'score', qw(twoscore unscored? score[srd]) ], [ 'CENTER', 'core', qw(encore[sd]? score[sd]? core[srd]) ], [ 'CENTERS', 'cores', qw(encores scores) ], [ 'JOHN', 'Johann', qw(Johannesburg) ], [ 'CENTER', 'centre', ], [ 'TRAVELED', 'travelled', ], [ 'HAS NOT', 'hasn\'t', ], ); my %replace = map @{ $_ }[1, 0], @translate; my $search = join ' | ', map word_regex(@{ $_ }[1 .. $#{$_}]), sort { $b->[1] cmp $a->[1] } # longest words first @translate ; while (defined(my $line = )) { print $line; $line =~ s{ ($search) }{$replace{$1}}xmsg; print $line; print "\n"; } sub word_regex { my ($word, @stops, ) = @_; my $not_stopped = join ' ', map not_stopped(@$_), map [ m{ \A (.*) ($word) (.*) \z }xms ], @stops ; return "$not_stopped $word"; } sub not_stopped { my ($stop_prefix, # always defined if word defined, maybe empty $word, # word embedded in stop word $stop_suffix, # always defined if word defined, maybe empty ) = @_; return '' unless defined $word and length $word; # convert word to placeholder (faster match?) $word = sprintf '.{%d}', length $word; # convert stop prefix, if any, to POSITIVE assertion. $stop_prefix = "(?<= $stop_prefix)" if length $stop_prefix; # NEGATIVE assert of stop prefix, word placeholder, stop suffix. return "(?! $stop_prefix $word $stop_suffix)"; } __DATA__ a score and twoscore of unscored scorer won't unscore scored scores at the core of the encore that was scored and cored for many scores of encores Johann travelled to Johannesburg for a score of cores hasn't a stercorean centre #### >perl selective_trans_1.pl a score and twoscore of unscored scorer won't unscore scored scores a TWENTY and twoscore of unscored scorer won't unscore scored scores at the core of the encore that was scored and cored at the CENTER of the encore that was scored and cored for many scores of encores Johann travelled to Johannesburg for many scores of encores JOHN TRAVELED to Johannesburg for a score of cores hasn't a stercorean centre for a TWENTY of CENTERS HAS NOT a sterCENTERan CENTER