use warnings FATAL => 'all' ; use strict; use constant DEBUG => 0; my $book = <<'ENDBOOK'; pg. 1 one two pg. 2 two three four pg. 4 four five pg. 5 five six pg. 6 six seven eight nine pg. 9 nine ten pg. 10 ten eleven twelve thirteen fourteen pg. 14 fourteen fifteen pg. 15 fifteen sixteen seventeen pg. 17 seventeen eighteen nineteen pg. 19 nineteen and out ENDBOOK print qq{[[$book]] \n\n}; # all these solutions use \K of 5.10+ # # works # # this solution works (insofar as i understand what Polyglot # # wants), but is 'inefficient' in that it involves substitution # # of a substring with an identical substring in most cases # # (assuming sequential page numbers are the most common case). # # my $pn = qr{ pg[.] \s+ }xms; # $book =~ # s{ $pn (\d+) \K (.*?) (?= $pn (\d+)) } # { my $m = missing($1, $3); $m ? qq{$2$m } : $2; }xmsge; # # works. extracts/classifies pg. number/matter ok. subst. ok. # # this solution works (with caveat given above), but in the case # # of sequential page numbers will insert an empty string into # # the target string, which may or may not be 'efficient'. # my $pn = qr{ pg[.] \s+ (\d+) }xms; # CAUTION: embedded capture # $book =~ s{ # $pn # capture pg. number to $1 # .*? \K # ignore pg. number/matter in replace # (?= $pn) # overlap capture next pg. number to $2 # } # { my $m = missing($1, $2); # print "rr'$1' '$2' s/${^MATCH}/$m/rr \n" if DEBUG; # $m; # }xmspge; # use exotic 5.10+ regex constructs to avoid 'useless' substitution. # # works. extracts/classifies pg. number/matter ok. subst. ok. # $book =~ s{ # pg[.] \s+ (\d+) # capture pg. number to $1 # .*? \K # ignore pg. number/matter in replace # (?= pg[.] \s+ (\d+)) # overlap capture next pg. number to $2 # (?(?{ $2 - $1 == 1 }) # sequential pages? # # sequential: no replacement, advance to next pg. # (?{ print "++'$1' '$2'++ \n" if DEBUG; }) # (*SKIP) (*FAIL) # | # # non-sequential: replace/insert missing pg(s)., advance # (?{ print "--'$1' '$2'-- \n" if DEBUG; }) # # null regex always true # ) # } # { my $m = missing($1, $2); # print "rr'$1' '$2' s/${^MATCH}/$m/rr \n" if DEBUG; # $m; # }xmspge; # # works. extracts/classifies pg. number/matter ok. subst. ok. # my $pn = qr{ pg[.] \s+ (\d+) }xms; # CAUTION: embedded capture # use re 'eval'; # $book =~ s{ # $pn # capture pg. number to $1 # .*? \K # ignore pg. number/matter in replace # (?= $pn) # overlap capture next pg. number to $2 # (?(?{ $2 - $1 == 1 }) # sequential pages? # # sequential: no replacement, advance to next pg. # (?{ print "++'$1' '$2'++ \n" if DEBUG; }) # (*SKIP) (*FAIL) # | # # non-sequential: replace/insert missing pg(s)., advance # (?{ print "--'$1' '$2'-- \n" if DEBUG; }) # # null regex always true # ) # } # { my $m = missing($1, $2); # print "rr'$1' '$2' s/${^MATCH}/$m/rr \n" if DEBUG; # $m; # }xmspge; # works. extracts/classifies pg. number/matter ok. subst. ok. my $pn = qr{ pg[.] \s+ (\d+) }xms; # CAUTION: embedded capture use re 'eval'; $book =~ s{ $pn # capture pg. number to $1 .*? \K # ignore pg. number/matter in replace # advance (i.e., skip) matching to this point if pages sequential (?= $pn) # overlap capture next pg. number to $2 (?(?{ $2 - $1 == 1 }) # sequential pages? # sequential: no replacement, advance to next pg. (?{ print "++'$1' '$2'++ \n" if DEBUG; }) (*SKIP) # skip past current page on failure (*FAIL) # fail the match: no replacement ) } { my $m = missing($1, $2); print "rr'$1' '$2' s/${^MATCH}/$m/rr \n" if DEBUG; $m; }xmspge; print "\n"; print "(($book)) \n"; sub missing { my ($i, $j) = @_; die "bad page sequence $i-$j" if $i >= $j; return '' if $j - $i < 2; # no missing page(s) my ($ii, $jj) = ($i + 1, $j - 1); # figure the gap return $ii == $jj ? qq{(PAGE $ii MISSING) } # just one page missing : qq{(PAGES $ii - $jj MISSING) } # multiple pages missing ; }