#!/usr/bin/perl use strict; use warnings FATAL => 'all'; use Data::Dump::Streamer; my %Parsers = ( 'NUM' => \&_parse_literal, 'LITERAL' => \&_parse_literal, 'ASSIGNMENT' => \&_parse_assignment, 'VAR' => \&_parse_var, 'METHOD' => \&_parse_method, 'FUNC' => \&_parse_func, 'FILTER' => \&_parse_filter, 'PARAM_LIST' => \&_parse_param_list, 'OP' => \&_parse_op, 'NOT' => \&_parse_not, 'TERM' => \&_parse_term, 'OPEN' => \&_parse_deref, 'EXPRESSION' => \&_parse_expression, 'CLAUSE' => \&_parse_clause, ); my %OPs = ( '*' => [ 6, sub { $_[0]->() * $_[1]->() } ], '/' => [ 6, sub { $_[0]->() / $_[1]->() } ], '+' => [ 5, sub { $_[0]->() + $_[1]->() } ], '-' => [ 5, sub { $_[0]->() - $_[1]->() } ], '&&' => [ 3, sub { $_[0]->() && $_[1]->() } ], '||' => [ 2, sub { $_[0]->() || $_[1]->() } ], ); my %Terms = ( map {$_ => 1} qw(NUM NOT LITERAL ASSIGNMENT VAR FUNC) ); my %Derefs = ( '{' => [ 'HASH', 'key', '}', sub { $_[0]->{ $_[1] } } ], '[' => [ 'ARRAY', 'index', ']', sub { $_[0]->[ $_[1] ] } ], ); ## parse("statement",{vars},{funcs}) #=================================== sub parse { #=================================== my $text = shift; my %state = ( vars => shift @_, funcs => shift @_, tokens => tokenise($text), ); my $sub = $Parsers{EXPRESSION}->( \%state ) || sub { }; if ( @{ $state{tokens} } ) { die "Still have unprocessed tokens :" . Dump( @{ $state{tokens} } )->Out; } return ( $sub, \%state ); } ## EXPRSSSION [OP|FILTER EXPRESSION|{HASH DEREF}|[ARRAY DEREF]] #=================================== sub _parse_expression { #=================================== my $state = shift; my $next_token = $state->{tokens}[0]; my $expression = $Parsers{CLAUSE}->($state) || return; # OP EXPRESSION while ( $next_token = $state->{tokens}[0] ) { return $expression unless defined $next_token && $next_token->[0] =~ /^OP|FILTER|OPEN/; $expression = $Parsers{ $next_token->[0] }->( $state, $expression ); } return $expression; } ## (EXPRESSION) | TERM #=================================== sub _parse_clause { #=================================== my $state = shift; my $next_token = $state->{tokens}[0]; # ( EXPRESSION ) if ( defined $next_token && $next_token->[0] eq 'PAREN' && $next_token->[1] eq '(' ) { shift @{ $state->{tokens} }; my $expression = $Parsers{EXPRESSION}->($state); $next_token = $state->{tokens}[0]; die "Missing closing parenthesis" unless defined $next_token && $next_token->[0] eq 'PAREN' && $next_token->[1] eq ')'; shift @{ $state->{tokens} }; return $expression; } return $Parsers{TERM}->($state); } ## OP CLAUSE #=================================== sub _parse_op { #=================================== my $state = shift; my $expression = shift; my $token = shift @{ $state->{tokens} }; my ( $precedence, $op ) = @{ $OPs{ $token->[1] } }; my $clause = $Parsers{CLAUSE}->($state) || die "Missing clause after $token->[1]"; my $next_token = $state->{tokens}[0]; # Next token has higher precedence $clause = $Parsers{OP}->( $state, $clause ) if defined $next_token && $next_token->[0] eq 'OP' && $OPs{ $next_token->[1] }[0] > $precedence; return sub { $op->( $expression, $clause ) }; } ## DEREF VAR #=================================== sub _parse_deref { #=================================== my $state = shift; my $var_expression = shift; my $token = shift @{ $state->{tokens} }; die "Unmatched '$token->[1]'" unless exists $Derefs{ $token->[1] }; my $deref = $Derefs{ $token->[1] }; my $key_expression = $Parsers{EXPRESSION}->($state) || die "Missing $deref->[1] when dereferencing $deref->[0]"; $token = shift @{ $state->{tokens} }; die "Missing closing $deref->[2]" unless defined $token && $token->[0] eq 'CLOSE' && $token->[1] eq $deref->[2]; my $ref_type = $deref->[0]; my $deref_code = $deref->[3]; return sub { my $var = $var_expression->(); die "Var is not of type '$ref_type'" unless ref $var && ref $var eq $ref_type; my $key = $key_expression->(); return $deref_code->( $var, $key ); }; } ## ! CLAUSE #=================================== sub _parse_not { #=================================== my $state = shift; my $token = shift @{ $state->{tokens} }; my $clause = $Parsers{CLAUSE}->($state) || die "Missing clause after !"; return sub { !$clause->() }; } ## NUM | NOT | LITERAL | ASSIGNMENT | VAR | FUNC #=================================== sub _parse_term { #=================================== my $state = shift; my $next_token = $state->{tokens}[0]; return unless defined $next_token && exists $Terms{$next_token->[0]}; return $Parsers{ $next_token->[0] }->($state); } ## $ | $VAR [.METHOD] #=================================== sub _parse_var { #=================================== my $state = shift; my $vars = $state->{vars}; my $token = shift @{ $state->{tokens} }; my $var_name = $token->[1] || ''; my $next_token = $state->{tokens}[0]; my $var = sub { die "Undefined var '\$$var_name'" unless exists $vars->{$var_name}; $vars->{$var_name}; }; return $var unless defined $next_token && $next_token->[0] eq 'METHOD'; return $Parsers{ $next_token->[0] }->( $state, $var ); } ## $ | $VAR = EXPRSSSION #=================================== sub _parse_assignment { #=================================== my $state = shift; my $vars = $state->{vars}; my $token = shift @{ $state->{tokens} }; my $var_name = $token->[1] || ''; my $expression = $Parsers{EXPRESSION}->($state) || die "No expression passed to assignment"; return sub { $vars->{$var_name} = $expression->(); } } ## .METHOD(PARAM_LIST) [.METHOD] #=================================== sub _parse_method { #=================================== my $state = shift; my $object = shift; my $token = shift @{ $state->{tokens} }; my $method_name = $token->[1]; my @param_list = @{ $Parsers{PARAM_LIST}->($state) }; my $method = sub { return $object->()->$method_name( map { $_->() } @param_list ); }; my $next_token = $state->{tokens}[0]; if ( defined $next_token && $next_token->[0] eq 'METHOD' ) { return $Parsers{METHOD}->( $state, $method ); } return $method; } ## | FILTER(PARAM_LIST) #=================================== sub _parse_filter { #=================================== my $state = shift; my $expression = shift; my $token = shift @{ $state->{tokens} }; my $funcs = $state->{funcs}; my $filter_name = $token->[1]; my @param_list = @{ $Parsers{PARAM_LIST}->($state) }; return sub { die "Undefined filter '$filter_name'" unless exists $funcs->{$filter_name}; return $funcs->{$filter_name} ->( map { $_->() } ( @param_list, $expression ) ); }; } ## FUNC(PARAM_LIST) [.METHOD] #=================================== sub _parse_func { #=================================== my $state = shift; my $token = shift @{ $state->{tokens} }; my $funcs = $state->{funcs}; my $func_name = $token->[1]; my @param_list = @{ $Parsers{PARAM_LIST}->($state) }; my $next_token = $state->{tokens}[0]; my $func = sub { die "Undefined function '$func_name'" unless exists $funcs->{$func_name}; return $funcs->{$func_name}->( map { $_->() } @param_list ); }; return $func unless defined $next_token && $next_token->[0] eq 'METHOD'; return $Parsers{ $next_token->[0] }->( $state, $func ); } ## ([PARAM[,PARAM]]) #=================================== sub _parse_param_list { #=================================== my $state = shift; my $next_token = $state->{tokens}[0]; return [] unless defined $next_token && $next_token->[0] eq 'PAREN' && $next_token->[1] eq '('; shift @{ $state->{tokens} }; my @param_list; while ( my $next_token = $state->{tokens}[0] ) { die "Missing closing parenthesis on param list" unless defined $next_token; if ( $next_token->[0] eq 'COMMA' ) { shift @{ $state->{tokens} }; next; } last if $next_token->[0] eq 'PAREN' && $next_token->[1] eq ')'; push @param_list, $Parsers{EXPRESSION}->($state); } shift @{ $state->{tokens} }; return \@param_list; } ## LITERAL #=================================== sub _parse_literal { #=================================== my $state = shift; my $token = shift @{ $state->{tokens} }; my $literal = $token->[1]; return sub {$literal}; } # Breaks up the text into an array of tokens #=================================== sub tokenise { #=================================== my $tokeniser = _get_tokeniser(@_); my @tokens; while ( my $token = $tokeniser->() ) { # Reassemble ['OP','-'], ['NUM',123] # into ['NUM','-123'] where appropriate if ( $token->[0] eq 'NUM' && defined $tokens[-1] && $tokens[-1][0] eq 'OP' && $tokens[-1][1] =~ /[-+]/ && ( !defined $tokens[-2] || $tokens[-2][0] =~ /^OP|NOT$/ ) ) { $token->[1] = $tokens[-1][1] eq '+' ? $token->[1] : 0 - $token->[1]; pop @tokens; } push @tokens, $token; } return \@tokens; } # Returns a stream of tokens #=================================== sub _get_tokeniser { #=================================== my $text = shift; $text = '' unless defined $text; my $atom = qr/[a-zA-Z_]\w*/o; return sub { TOKEN: { return [ 'NUM', $1 ] if $text =~ /\G ((?:\d+(?:\.\d*)?)) /gcx; return [ 'LITERAL', $2 ] if $text =~ /\G (['"]) (.*?) (? '; die "Syntax error ' $1 ' : ' $text ' "; } return; } }; } ############################# END PARSER #################################### # Dummy class for method testing package Foo; sub new { bless( {}, 'Foo' ); } sub bar { shift; return 'Bar', @_; } ######### TESTS ############# package main; run_tests(@ARGV); sub run_tests { my $test_no = shift; my @tests = ( 'BLANK', [ q{}, [] ], 'INTS & LITERALS', [ q{0}, 0 ], [ q{1}, 1 ], [ q{2}, 2 ], [ q{''}, '' ], [ q{'abc'}, 'abc' ], [ qq{'abc\ndef'}, "abc\ndef" ], 'INTS & LITERALS in ()', [ q{('abc')}, 'abc' ], [ q{(0)}, 0 ], [ q{(2)}, 2 ], 'VARS', [ q{$lang}, 'English' ], [ q{($lang)}, 'English' ], 'FUNCS', [ q{caps('lower')}, 'LOWER' ], [ q{caps('lower',$lang)}, [ 'LOWER', 'ENGLISH' ] ], 'FILTER', [ q{$lang | caps}, 'ENGLISH' ], [ q{$lang | caps('lower')}, [ 'LOWER', 'ENGLISH' ] ], 'METHODS', [ q{new_foo.bar}, 'Bar' ], [ q{new_foo.bar('Foo')}, [ 'Bar', 'Foo' ] ], [ q{new_foo.bar('Foo') | caps('other')}, [ 'OTHER', 'BAR', 'FOO' ] ], 'DEREF', [ q($hash{'name'}{'first'}), 'John' ], [ q($hash{'name'}{'third'}), undef ], [ q{$array[2]}, 'third' ], [ q{$array[-3]}, 'second' ], 'LOGIC', [ q{2 || 3}, 2 ], [ q{2 || 0}, 2 ], [ q{0 || 3}, 3 ], [ q{0 || 0}, 0 ], [ q{2 && 3}, 3 ], [ q{2 && 0}, 0 ], [ q{0 && 3}, 0 ], [ q{0 && 0}, 0 ], [ q{2 || 3 && 4}, 2 ], [ q{2 || 0 && 4}, 2 ], [ q{2 || 3 && 0}, 2 ], [ q{2 || 0 && 0}, 2 ], [ q{0 || 3 && 4}, 4 ], [ q{0 || 0 && 4}, 0 ], [ q{0 || 3 && 0}, 0 ], [ q{0 || 0 && 0}, 0 ], [ q{2 && 3 || 4}, 3 ], [ q{2 && 0 || 4}, 4 ], [ q{2 && 3 || 0}, 3 ], [ q{2 && 0 || 0}, 0 ], [ q{0 && 3 || 4}, 4 ], [ q{0 && 0 || 4}, 4 ], [ q{0 && 3 || 0}, 0 ], [ q{0 && 0 || 0}, 0 ], [ q{(2 || 3) && 4}, 4 ], [ q{(2 || 0) && 4}, 4 ], [ q{(2 || 3) && 0}, 0 ], [ q{(2 || 0) && 0}, 0 ], [ q{(0 || 3) && 4}, 4 ], [ q{(0 || 0) && 4}, 0 ], [ q{(0 || 3) && 0}, 0 ], [ q{(0 || 0) && 0}, 0 ], [ q{2 && (3 || 4)}, 3 ], [ q{2 && (0 || 4)}, 4 ], [ q{2 && (3 || 0)}, 3 ], [ q{2 && (0 || 0)}, 0 ], [ q{0 && (3 || 4)}, 0 ], [ q{0 && (0 || 4)}, 0 ], [ q{0 && (3 || 0)}, 0 ], [ q{0 && (0 || 0)}, 0 ], [ q{(0 || 0) && (0 || 0)}, 0 ], [ q{(0 || 0) && (0 || 5)}, 0 ], [ q{(0 || 0) && (4 || 0)}, 0 ], [ q{(0 || 0) && (4 || 5)}, 0 ], [ q{(0 || 3) && (0 || 0)}, 0 ], [ q{(0 || 3) && (0 || 5)}, 5 ], [ q{(0 || 3) && (4 || 0)}, 4 ], [ q{(0 || 3) && (4 || 5)}, 4 ], [ q{(2 || 0) && (0 || 0)}, 0 ], [ q{(2 || 0) && (0 || 5)}, 5 ], [ q{(2 || 0) && (4 || 0)}, 4 ], [ q{(2 || 0) && (4 || 5)}, 4 ], [ q{(2 || 3) && (0 || 0)}, 0 ], [ q{(2 || 3) && (0 || 5)}, 5 ], [ q{(2 || 3) && (4 || 0)}, 4 ], [ q{(2 || 3) && (4 || 5)}, 4 ], [ q{(0 && (0 && (0 || 0)))}, 0 ], [ q{(0 && (0 && (0 || 5)))}, 0 ], [ q{(0 && (0 && (4 || 0)))}, 0 ], [ q{(0 && (0 && (4 || 5)))}, 0 ], [ q{(0 && (3 && (0 || 0)))}, 0 ], [ q{(0 && (3 && (0 || 5)))}, 0 ], [ q{(0 && (3 && (4 || 0)))}, 0 ], [ q{(0 && (3 && (4 || 5)))}, 0 ], [ q{(2 && (0 && (0 || 0)))}, 0 ], [ q{(2 && (0 && (0 || 5)))}, 0 ], [ q{(2 && (0 && (4 || 0)))}, 0 ], [ q{(2 && (0 && (4 || 5)))}, 0 ], [ q{(2 && (3 && (0 || 0)))}, 0 ], [ q{(2 && (3 && (0 || 5)))}, 5 ], [ q{(2 && (3 && (4 || 0)))}, 4 ], [ q{(2 && (3 && (4 || 5)))}, 4 ], [ q{2 && 3 && 4}, 4 ], [ q{0 && 3 && 4}, 0 ], [ q{2 && 0 && 4}, 0 ], [ q{2 && 3 && 0}, 0 ], [ q{2 || 3 || 4}, 2 ], [ q{0 || 3 || 4}, 3 ], [ q{2 || 3 || 0}, 2 ], [ q{0 || 0 || 4}, 4 ], 'ARITHMETIC', [ q{1+2}, 3 ], [ q{5-2}, 3 ], [ q{3*4}, 12 ], [ q{18/3}, 6 ], [ q{10-2-3}, 5 ], [ q{27/3/2}, 4.5 ], [ q{2+3*4-12/3}, 10 ], [ q{(2+3)*4-12/3}, 16 ], 'SIGNED NUMBERS', [ q{-3}, -3 ], [ q{+3}, 3 ], [ q{3+3}, 6 ], [ q{-2-4}, -6 ], [ q{+3 + -4}, -1 ], 'NOT', [ q{ !3}, '' ], [ q{ !0}, 1 ], [ q{ !'abc'}, '' ], [ q{ !''}, 1 ], [ q{ !3 && 4}, '' ], [ q{ !0 && 4}, 4 ], 'CR in statement', [ qq{(2+3)\n*4-\n12/3}, 16 ], 'ASSIGNMENT', [q{$abc = 123},'123'], [q{$abc},'123'], [q{$abc = 123 + 24},'147'], [q{$abc},'147'], ); my @errors = ( [ q{,}, 'unprocessed' ], [ q{$lang,$lang}, 'unprocessed' ], [ q{.caps()}, 'unprocessed' ], [ q{$?}, 'Syntax error' ], [ q{3 && 4 || && 5}, 'Missing clause' ], [ q{$def}, 'Undefined var' ], [ q{func}, 'Undefined function' ], [ q{$lang | xyz}, 'Undefined filter' ], [ q{$lang.abc}, 'locate object method' ], [ q{$hash.name.third}, 'unblessed' ], [ q($array{2}), "not of type 'HASH'" ], [ q{$hash[first]}, "not of type 'ARRAY'" ], [ q({'deref'}), 'unprocessed tokens' ], ); if ( defined $test_no ) { @tests = ( $tests[ $test_no - 1 ] ); $test_no--; } else { $test_no = 0 } my $vars = { lang => 'English', hash => { name => { first => 'John', second => 'Smith', '3' => 'three' } }, array => [ 'first', 'second', 'third', 'fourth' ], }; my $funcs = { caps => sub { return map {uc} @_; }, new_foo => sub { return Foo->new(@_); }, }; my $i = 1; foreach my $test (@tests) { unless ( ref $test ) { print "\n$test\n"; $i++; next; } eval { my ( $sub, $state ) = parse( $test->[0], $vars, $funcs ); my @expect = map { defined $_ ? $_ : '' } ref $test->[1] ? @{ $test->[1] } : $test->[1]; my @return = map { defined $_ ? $_ : '' } $sub->(); eval { die if @expect != @return; for ( my $j = 0; $j < @expect; $j++ ) { die if $expect[$j] ne $return[$j]; } }; if ($@) { die "Expected [" . join( ',', @expect ) . "], got [" . join( ',', @return ) . "] instead\n" . ( Dump($sub)->Out ); } print( $i+ $test_no, " : SUCCESS : $test->[0]\n" ); }; if ($@) { die( "Running test " . ( $i + $test_no ) . " : $test->[0]:\n $@\n" ); } $i++; } $i = 1; foreach my $test (@errors) { my ( $sub, $state ); eval { ( $sub, $state ) = parse( $test->[0], $vars, $funcs ); $sub->(); }; my $error = $@ || 'ERROR: No error thrown!'; my $regex = $test->[1]; if ( $error =~ m/$regex/ ) { print "Error test $i: SUCCESS : $test->[0]\n"; $i++; } else { die $test->[0] . " : $error" } } }