use strict; use warnings; $_ = q( \bib{ref0}{article}{ author={Y. Bartal}, volume={37}, pages={184}, date={1996}, issn={0272-5428}, } ); my @tokfd; my $tokre = qr{ (? \\bib(?![A-Za-z]) ) | (? (?s: \\(?:[A-Za-z]+|.) ) ) | (? \%.*\n\s* ) | (? \= ) | (? \{ ) | (? \} ) | (? \s+ ) | (? [A-Za-z0-9_\-\.]+ ) | (? [^\\\%\=\{\}\sA-Za-z0-9_\-\.] ) }x; push @tokfd, [ keys %+, values %+ ] while /\G$tokre/gc; die "internal error: amsref reader tokenizer cannot match input line: ($_) at" . pos($_) if ( $+[0] != length ); for my $t (@tokfd) { my ( $i, $c ) = @$t; $c =~ s/\n/\\n/g; printf qq(%-8s "%s"\n), $i, $c; }