use warnings; use strict; # example input $_ = q( \bib{ref0}{article}{ author={Y. Bartal}, volume={37}, pages={184}, date={1996}, issn={0272-5428}, } ); # tokenizer rules our @toktab = ( [qr/\\bib(?![A-Za-z])/, "bib"], [qr/\\(?:[A-Za-z]+|.)/s, "text"], [qr/\%.*\n\s*/, "comment"], [qr/\=/, "equal"], [qr/\{/, "begin"], [qr/\}/, "end"], [qr/\s+/, "space"], [qr/[A-Za-z0-9_\-\.]*/, "word"], [qr/[^\\\%\=\{\}\sA-Za-z0-9_\-\.]/, "text"], ); # tokens buffer our(@tokfd); # tokenize amsrefs input TOK: while (1) { for my $tokrul (@toktab) { my($re, $id) = @$tokrul; if (/\G($re)/gc) { push @tokfd, [$id, $1]; next TOK; } } #pos($_) = pos($_); # <--- line 39 if (/\G./sgc) { # <--- line 40 die "internal error: amsref reader tokenizer cannot match input line: ($_) at" . pos($_); } elsif (/\G\z/gc) { # <--- line 42 last; } else { # <--- line 44 die "internal error: amsref reader tokenizer really cannot match input line: ($_) " . pos($_); } } # dump tokens for debugging for my $t (@tokfd) { my($i, $c) = @$t; $c =~ s/\n/\\n/g; printf qq(%-8s "%s"\n), $i, $c; } __END__