term -> atom operator term
term -> '(' term ')'
term -> atom
operator -> '*' | '/'| '+' | '-'
atom -> \d+
####
term ->
atom operator term
'2' operator term
'2' '*' term
'2' '*' (atom operator term)
'2' '*' ('3' operator term)
'2' '*' ('3' '+' term)
'2' '*' ('3' '+' (atom))
'2' '*' ('3' '+' ('4'))
##
##
expression -> (term add_op)* term
term -> (factor mul_op)* factor
factor -> atom | '(' expression ')'
add_op -> '+' | '-'
mul_op -> '*' | '/'
atom -> \d+
##
##
expression
term add_op term
term '+' (factor)
term '+' ('4')
(factor mulop factor) '+' ('4')
((atom) mulop (atom)) '+' ('4')
(('2') '*' ('3') ) '+' ('4')
##
##
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
my @token_def = (
[Whitespace => qr{\s+}, 1],
[Comment => qr{#.*\n?$}m, 1],
[AddOp => qr{[+-]} ],
[MulOp => qr{[*/]} ],
[Number => qr{\d+} ],
[OpenParen => qr{\(} ],
[CloseParen => qr{\)} ],
);
my $input = $ARGV[0] || "2 * 3 + 4 # and a comment\n";
my @tokens;
pos($input) = 0;
while(pos($input) < length $input){
my $matched = 0;
for my $t (@token_def){
my ($name, $re, $ignore_flag) = @$t;
if ($input =~ m/\G($re)/gc){
$matched = 1;
next if $ignore_flag;
push @tokens, [$name, $1];
next;
}
}
die "Syntax error at postion " . pos($input) unless $matched
}
print Dumper \@tokens;
__END__
$VAR1 = [
[
'Number',
'2'
],
[
'MulOp',
'*'
],
[
'Number',
'3'
],
[
'AddOp',
'+'
],
[
'Number',
'4'
]
];
##
##
sub match {
my $expected_token = shift;
if ($tokens[0][0] eq $expected_token){
my $current = shift @tokens;
return $current->[1];
} else {
die "Syntax error: expected $expected_token, got $tokens[0][0]\n";
}
}
sub lookahead {
my @expected = @_;
no warnings 'uninitialized';
for (0 .. $#expected){
return 0 if $tokens[$_][0] ne $expected[$_];
}
return 1;
}
##
##
sub expression {
my @res = (term());
while (lookahead('AddOp')){
push @res, match('AddOp');
push @res, term();
}
return \@res;
}
sub term {
my @res = (factor());
while (lookahead('MulOp')){
push @res, match('MulOp');
push @res, factor();
}
return \@res;
}
sub factor {
if (lookahead('OpenParen')){
match('OpenParen');
my $res = expression();
match('CloseParen');
return $res;
} else {
atom();
}
}
sub atom {
match('Number');
}
##
##
my $parse_tree = expression();
print Dumper $parse_tree;
__END__
$VAR1 = [
[
'2',
'*',
'3'
],
'+',
[
'4'
]
];
##
##
return @res > 1 ? \@res :
$res[0];
##
##
# parse tree for '2 * (3+4)':
[
'2',
'*',
[ '3', '+', '4' ],
];
##
##
sub execute {
my $tree = shift;
return $tree unless ref $tree;
my %ops = (
'*' => sub { $_[0] * $_[1] },
'/' => sub { $_[0] / $_[1] },
'+' => sub { $_[0] + $_[1] },
'-' => sub { $_[0] - $_[1] },
);
my ($val, @rest) = @$tree;
$val = execute($val);
while (@rest){
my $op = shift @rest;
my $next = execute(shift @rest);
$val = $ops{$op}->($val, $next);
}
return $val;
}
print execute($parse_tree), "\n";
##
##
sub make_rule {
my ($operator, $next) = @_;
return sub {
my @res = ($next->());
while (lookahead($operator)){
push @res, match($operator);
push @res, $next->();
}
return @res > 1 ? \@res : $res[0];
};
}
*term = make_rule('MulOp', \&factor);
*expression = make_rule('AddOp', \&term);
##
##
grammar Arithmetic {
# expression -> (term add_op)* term
rule expression {
[ ]*
}
# term -> (factor mul_op)* factor
rule term {
[ ]*
}
# factor -> atom | '(' expression ')'
rule factor {
|
| '(' ')'
}
# add_op -> '+' | '-'
token add_op {
<[+-]> # that's a char class
}
# mul_op -> '*' | '/'
token mul_op {
<[*/]>
}
token atom {
\d+
}
}
# match it:
$input ~~ Grammar.expression;
##
##
rule factor {
|
| '(' [ ')' || ]
}
##
##
# this example mixes official Perl 6 syntax an PGE syntax,
# I don't really know if they are 100% compatible
# see https://svn.perl.org/parrot/trunk/docs/pct/pct_optable_guide.pod
# and http://perlcabal.org/syn/S05.html
rule factor {
|
| '(' ')'
}
token infix:<*> { }; # stands for a literal * here
token infix:> is equiv('infix:*') { };
token infix:<+> is looser('infix:*') { };
token infix:<-> is equiv('inifx:+') { };
# put parser in bottom-up mode:
rule expression is optable { ... }; # yes, '...' is valid Perl 6 ;-)
proto 'term:' is tighter('infix:*')
is parsed(&factor) { ... };
##
##
ModernArithmetic is Arithmetc {
token infix:> { '÷' } # different symbol, same rule name
}