while () { if (/()(.*)(\]\]><\/editorialtext|$)/) { my $editorial = $2; my $headline = $2; $editorial =~ s/<(?:[^>'"]*|(['"]).*?\1)*>//gs; } }