$/ = ''; # paragrep mode while (<>) { while ( m{ \b # start at a word boundary (begin letters) ( # capture to $1 \S+ # one or more non-spaces ) # find chunk of non-whitespace \b # until another word boundary (end letters) ( # capture to $2 \s+ # separated by some whitespace \1 # whatever was in $1 \b # until another word boundary )+ # one or more sets of those }xig ) { print "dup word '$1' at paragraph $.\n"; } }