use strict; use warnings; use 5.012; use Parse::RecDescent; $::RD_ERRORS = 1; #Parser dies when it encounters an error $::RD_WARN = 1; #Enable warnings - warn on unused rules &c. $::RD_HINT = 1; #Give out hints to help fix problems. #$::RD_TRACE = 1; #Trace parsers' behaviour my $text = <<'END_OF_TEXT'; {{Infobox aaa bbb ccc {{ddd eee fff ggg {{ hhh iii}} jjj}} {{{kkk {{lll}} mmm }}} }} no no no no no no no no no {{Infobox aaa2 bbb2 ccc2 {{ddd2 eee2 fff2 ggg2 {{hhh2 iii2}} jjj2}} {{{kkk2 {{lll2 }} mmm2 }}} }} {{Infobox 111}} END_OF_TEXT #Declare a global variable that can be loaded with #data from inside the parser: our @infobox_offsets; my $grammar = <<'END_OF_GRAMMAR'; { use 5.012; #enable say() use Data::Dumper; } startrule: paragraph(s) paragraph: infobox | word(s) infobox: '{{Infobox' inner_block(s) '}}' { push @main::infobox_offsets, $itempos[1]->{offset}{from}, $itempos[3]->{offset}{to}, ; } inner_block: brace_block | word(s) #Declare some my variables ('rulevars') for this rule: brace_block: brace_block: lbrace(2..) { $lbraces = join '', @{$item[1]}; $rbraces = "}" x length $lbraces; } inner_block(s) "$rbraces" word: m{ [^{}]+ }xms lbrace: / [{] /xms END_OF_GRAMMAR my $parser = Parse::RecDescent->new($grammar) or die "Bad grammar!\n"; defined $parser->startrule($text) or die "Can't match text"; #Using the recorded offsets for the infoboxes #print out the infobox substr()'s: my ( $start_infobox, $end_infobox, $length_infobox ); while (@infobox_offsets) { $start_infobox = shift @infobox_offsets; $end_infobox = shift @infobox_offsets; $length_infobox = 1 + $end_infobox - $start_infobox; say '*' x 20; say substr $text, $start_infobox, $length_infobox, ; say '*' x 20; } --output:-- ******************** {{Infobox aaa bbb ccc {{ddd eee fff ggg {{ hhh iii}} jjj}} {{{kkk {{lll}} mmm }}} }} ******************** ******************** {{Infobox aaa2 bbb2 ccc2 {{ddd2 eee2 fff2 ggg2 {{hhh2 iii2}} jjj2}} {{{kkk2 {{lll2 }} mmm2 }}} }} ******************** ******************** {{Infobox 111}} ********************