use strict; use warnings; use Data::Dumper (); die "No search terms supplied!" unless @ARGV; my @words = @ARGV; my $text = do { local $/ = undef; }; my $blen = 20; # (max) chars before a matching word to take with us my $alen = 20; # (max) chars after a matching word to take with us my $jlen = $blen+$alen; # (max) chars between 2 matching words captured together my $strwords = join "|" => map quotemeta, @words; # Words to highlight my $rxwords = qr/\b(?i:$strwords)\b/; # ... compiled highlight word match my $expr = qr/\b(?!\s)(?s:.{0,$blen}$rxwords(?:.{0,$jlen}$rxwords)*.{0,$alen}(?:(?<=\s)|[^\s]*\b))/; my $D = Data::Dumper->new( [[grep {s/($rxwords)/[$1]/g} $text =~ /($expr)/g]], ['matched'] )->Indent(1); print $D->Dump(); # reformatted the DATA to look nicer in the post __DATA__ Regular expressions have always been a weak spot for me, and I've got a question that's got me stumped. Here's the problem I'm trying to solve. I have somewhat large articles of text (returned from a search), what I'd like to do is capture the word and X number of words before and after it while tagging the matching word in the captured text. My inital thought was to try something like this. The problem I have is that if there is more than one term and they overlap, the nth term will not be annotated. So my next thought is lookahead/lookbehind, but they don't capture. Is there a way to do this with a single regex? Is a regex even the best way to do this? Thanks, -Lee #### perl -Mstrict -Mwarnings context.pl is and the have $matched = [ 'Regular expressions [have] always been a weak spot for me, [and] I\'ve got a question', 'me stumped. Here\'s [the] problem I\'m trying to solve. I [have] somewhat large articles', 'what I\'d like to do [is] capture [the] word [and] X number of words before [and] after it while tagging [the] matching word in [the] captured text. My ', 'like this. [The] problem I [have] [is] that if there [is] more than one term [and] they overlap, [the] nth term will not be', 'So my next thought [is] lookahead/lookbehind', 'don\'t capture. [Is] there a way to do this', 'a single regex? [Is] a regex even [the] best way to do this' ]; #### \b # a word boundary (?!\s) # following char is not a white space (1) (?s: # . matches newline in rest of regex .{0,$blen} # up to $blen chars (left context) $rxwords # followed by a word we search for (?: # group for repeatedly matching .{0,$jlen} # up to $jlen=$blen+$alen chars (2) $rxwords # followed by a searched word )* # repeatedly match .{0,$alen} # up to $alen chars (right context) (?: # group for disjunction (3) (?<=\s) # last matched char was white space | # or [^\s]* # non white space chars \b # up to the next word boundary ) )