#
# Proof of concept for a Parser::RecDescent parser for the diagrams fo
+und at:
# http://publib.boulder.ibm.com/infocenter/db2v8luw/index.jsp?topic=/c
+om.ibm.db2.udb.doc/admin/r0006726.htm
#
# Implementation Notes
# ====================
# - Printing out the result as a grammar is left as an
# excersise for the reader. It should be extremely simple.
# - "parameter-block" and "large bullets" (*) that denote
# unordered sequences are not supported. Those are
# relatively simple or similar to implement, and therefore
# they are not needed in this proof of concept program.
# - "--text.*--" (a replaceable immediately followed by
# literals) is handled differently than "--text--.*--"
# in case the user wishes to forbid whitespace between
# the replaceable and the literals in the former.
#
# Grammar Notes
# =============
# - Can't use // and '' in rules. Which also means I had to
# reimplement whitespace. MATCH provides the functionality
# of // and '', while DO_SET_SKIP provides the functionality
# of <skip>.
# - The '$_[0] = ...' junk commits changed made by a function.
# Not doing so undoes any change to the state made by the
# function, provide backtracking within functions.
# - "production(s)", "production(s?)" and the like won't
# work for us without the change to _parserepeat().
#
# Bugs
# ====
# - This is not a validator. The input is assumed to be
# valid. Adding validation would be simple, but it would
# be time consuming to code, and it would kill what little
# readability this program has.
# - No meaningful error reporting.
# - Didn't look into whether <commit> could be used to
# improve efficiency in this grammar.
# - Something tells me the regexp for sql_literal is
# expensive because of the backtracking necessary to
# locate a dash. The LHS of the | should probably have
# a lookahead.
# - Definitions for "sql_literal" and "sql_replaceable"
# are guesswork and may require tweaking.
# - Any features of Parser::RecDescent not currently used
# should be considered incompatible until known otherwise.
#
use strict;
use warnings;
use Data::Dumper ();
use Parse::RecDescent ();
package Parse::RecDescent::Hack;
use vars qw(@ISA);
BEGIN { @ISA = 'Parse::RecDescent'; }
sub _parserepeat($$$$$$$$$$) # RETURNS A REF TO AN ARRAY OF MATCHES
{
my ($parser, $text, $prod, $min, $max, $_noactions, $expectation, $
+argcode) = @_;
my @tokens = ();
my $reps;
for ($reps=0; $reps<$max;)
{
$_[6]->at($text); # $_[6] IS $expectation FROM CALLER
my $_savetext = $text;
my $prevtextlen = length $text;
my $_tok;
if (! defined ($_tok = &$prod($parser,$text,1,$_noactions,$argco
+de)))
{
$text = $_savetext;
last;
}
push @tokens, $_tok if defined $_tok;
# $text isn't getting smaller.
# ---v
# last if ++$reps >= $min and $prevtextlen == length $text;
# ---
last if ++$reps >= $min and $_savetext eq $text;
# ---^
}
do { $_[6]->failed(); return undef} if $reps<$min;
$_[1] = $text;
return [@tokens];
}
package main;
{
my $grammar = <<'__EOI__';
{
use Algorithm::Loops ();
# State
sub IDX_TEXT () { 0 } # constant
sub IDX_TXET () { 1 } # constant
sub IDX_SKIPF () { 2 }
sub IDX_Y () { 3 }
sub IDX_X () { 4 }
sub IDX_D () { 5 }
sub IDX_STACK () { 6 }
sub STACK_IDX_ID () { 0 }
sub STACK_NEXT_IDX () { 1 }
sub CHOICE_IDX_Y () { STACK_NEXT_IDX+0 }
sub CHOICE_IDX_X () { STACK_NEXT_IDX+1 }
sub CHOICE_IDX_D () { STACK_NEXT_IDX+2 }
sub LOOP_IDX_END_Y () { STACK_NEXT_IDX+0 }
sub LOOP_IDX_END_X () { STACK_NEXT_IDX+1 }
sub DIR_N () { 0 }
sub DIR_E () { 1 }
sub DIR_S () { 2 }
sub DIR_W () { 3 }
sub DIR_C () { 4 }
use vars qw(
@DIR_X
@DIR_Y
@DIR_DASH
);
BEGIN
{
# N E S W .
@DIR_X = ( 0, +1, 0, -1, 0 );
@DIR_Y = ( -1, 0, +1, 0, 0 );
@DIR_DASH = qw( | - | - );
}
sub clone {
my ($self, @args) = @_;
my $new = bless([ @$self ]);
# Stack not cloned. Must be done explicitely by caller.
#$new->[IDX_STACK] = [ @{$new->[IDX_STACK]} ];
while (scalar(@args))
{
my $idx = shift(@args);
my $val = shift(@args);
$new->[$idx] = $val;
}
return $new;
}
# [ [
# 'aeh',
# 'abcd', 'bfi',
# 'efg', ==> 'cgj',
# 'hijkl', 'd k',
# ' l',
# ] ]
sub transpose_AoS {
return [ Algorithm::Loops::MapCarU { join('', map { defined($_) ? $
+_ : ' ' } @_) } map { [ /(.)/gs ] } @{$_[0]} ];
}
sub get_char_at {
my ($text, $y, $x, $d) = @_;
$d = DIR_C unless (defined($d));
$y += $DIR_Y[$d] - 1; # Make zero-based.
$x += $DIR_X[$d] - 1; # Make zero-based.
return undef unless ($y >= 0);
return undef unless ($y < @$text);
my $line = $text->[$y];
return undef if ($x < 0 || $x >= length($line));
return substr($line, $x, 1);
}
sub is_loop_return {
my ($text, $y, $x) = @_;
my $ch;
$ch = get_char_at($text, $y + $DIR_Y[DIR_N]*0, $x + $DIR_X[DIR_N]*0
+);
return undef unless (defined($ch) && $ch eq '-');
$ch = get_char_at($text, $y + $DIR_Y[DIR_N]*1, $x + $DIR_X[DIR_N]*1
+);
return undef unless (defined($ch) && $ch eq 'V');
$ch = get_char_at($text, $y + $DIR_Y[DIR_N]*2, $x + $DIR_X[DIR_N]*2
+);
return undef unless (defined($ch) && ($ch eq '.' || $ch eq '|'));
return 1;
}
sub is_dash {
my ($text, $y, $x, $d) = @_;
my $ch = get_char_at($text, $y, $x);
return undef unless (defined($ch));
return undef unless ($ch eq $DIR_DASH[$d]);
return !(is_loop_return($text, $y, $x));
}
sub skip {
my ($self) = @_;
my ($skipf) = @$self[IDX_SKIPF];
$_[0]->$skipf();
}
sub DO_INIT {
my @text = $_[0] =~ /([^\n]*)\n/gs;
my $text = \@text;
my $txet = transpose_AoS($text);
my $self = bless([
$text, # IDX_TEXT
$txet, # IDX_TXET
'SKIP_NOTHING', # IDX_SKIPF
0, # IDX_Y
1, # IDX_X
0, # IDX_D
[], # IDX_STACK
]);
$self->DO_MOVE_TO_START();
$_[0] = $self;
return 1;
}
sub DO_MOVE_TO_START {
my ($self) = @_;
my ($text, $y) = @$self[IDX_TEXT, IDX_Y];
my $num_lines = scalar(@$text);
my $x = 0; # 0-based.
--$y; # Make 0-based.
while (++$y < $num_lines)
{
last if (substr($text->[$y], $x) =~ /^>/s);
}
++$y; # Make 1-based.
++$x; # Make 1-based.
$_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x, (IDX_D) => DIR_E
+);
return 1;
}
# Skips whitespace like a MATCH.
sub DO_SET_DIR {
my ($self, $new_d) = @_;
$self->skip();
my ($old_d) = @$self[IDX_D];
$_[0] = $self->clone((IDX_D) => $new_d);
return $old_d;
}
sub DO_SET_SKIP {
my ($self, $new_skipf) = @_;
my ($old_skipf) = @$self[IDX_SKIPF];
$_[0] = $self->clone((IDX_SKIPF) => $new_skipf);
return $old_skipf;
}
sub SKIP_NOTHING {}
sub SKIP_LINE {
my ($self) = @_;
my ($text, $y, $x, $d) = @$self[IDX_TEXT, IDX_Y, IDX_X, IDX_D];
my $moved;
while (is_dash($text, $y, $x, $d))
{
$y += $DIR_Y[$d];
$x += $DIR_X[$d];
$moved = 1;
}
$_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x) if ($moved);
}
# Replacement for //.
sub MATCH {
my ($self, $regexp) = @_;
$self->skip();
my ($text, $txet, $y, $x, $d) = @$self[IDX_TEXT, IDX_TXET, IDX_Y, I
+DX_X, IDX_D];
--$y; # Make 0-based.
--$x; # Make 0-based.
my $line;
my $idx;
my $pos;
my $match;
my $sign;
if ($DIR_X[$d]) {
$line = $text->[$y];
return undef unless (defined($line));
$idx = IDX_X;
$pos = $x;
} else {
$line = $txet->[$x];
return undef unless (defined($line));
$idx = IDX_Y;
$pos = $y;
}
if ($DIR_X[$d] >= 0 && $DIR_Y[$d] >= 0) {
$line = substr($line, $pos);
($match) = $line =~ /^($regexp)/s;
$sign = +1;
} else {
$line = substr($line, 0, $pos+1);
($match) = $line =~ /($regexp)$/s;
$sign = -1;
}
return undef unless (defined($match));
$pos += length($match) * $sign;
++$pos; # Make 1-based.
$_[0] = $self->clone($idx => $pos);
return $match;
}
sub MATCH_LOOP_RET {
my ($self) = @_;
$self->skip();
my ($text, $y, $x) = @$self[IDX_TEXT, IDX_Y, IDX_X];
return undef unless (is_loop_return($text, $y, $x));
$y += $DIR_Y[DIR_E];
$x += $DIR_X[DIR_E];
$_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x);
return 1;
}
sub MATCH_EOF {
my ($self) = @_;
my ($text, $y) = @$self[IDX_TEXT, IDX_Y];
--$y; # Make 0-based.
return ($y >= scalar(@$text) ? 1 : undef);
}
# Skips whitespace like a MATCH.
sub DO_CHOICE_START {
my ($self) = @_;
$self->skip();
my ($y, $x, $d, $stack) = @$self[IDX_Y, IDX_X, IDX_D, IDX_STACK];
$stack = [ @$stack ];
push(@$stack,
[
'choice', # STACK_IDX_ID
$y, # CHOICE_IDX_Y
$x, # CHOICE_IDX_X
$d, # CHOICE_IDX_D
],
);
$_[0] = $self->clone((IDX_STACK) => $stack);
return 1;
}
sub DO_GO_CHOICE_START {
my ($self) = @_;
my ($stack) = @$self[IDX_STACK];
my $record = $stack->[$#$stack];
my ($y, $x, $d) = @$record[CHOICE_IDX_Y, CHOICE_IDX_X, CHOICE_IDX_D
+];
$_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x, (IDX_D) => $d);
return 1;
}
sub DO_CHOICE_END {
my ($self) = @_;
my ($stack) = @$self[IDX_STACK];
$stack = [ @$stack ];
pop(@$stack);
$_[0] = $self->clone((IDX_STACK) => $stack);
return 1;
}
# Skips whitespace like a MATCH.
sub DO_LOOP_START {
my ($self) = @_;
$self->skip();
my ($y, $x, $d, $stack) = @$self[IDX_Y, IDX_X, IDX_D, IDX_STACK];
$stack = [ @$stack ];
push(@$stack,
[
'loop', # STACK_IDX_ID
undef, # LOOP_IDX_END_Y
undef, # LOOP_IDX_END_X
],
);
$_[0] = $self->clone((IDX_STACK) => $stack);
return 1;
}
# Skips whitespace like a MATCH.
sub DO_LOOP_BACK {
my ($self) = @_;
$self->skip();
my ($y, $x, $stack) = @$self[IDX_Y, IDX_X, IDX_STACK];
$stack = [ @$stack ];
my $record = $stack->[$#$stack];
@$record[LOOP_IDX_END_Y, LOOP_IDX_END_X] = ($y, $x);
$_[0] = $self->clone((IDX_STACK) => $stack);
return 1;
}
sub DO_GO_LOOP_BACK {
my ($self) = @_;
my ($stack) = @$self[IDX_STACK];
my $record = $stack->[$#$stack];
my ($y, $x) = @$record[LOOP_IDX_END_Y, LOOP_IDX_END_X];
$_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x, (IDX_D) => DIR_E
+);
return 1;
}
sub DO_LOOP_END {
my ($self) = @_;
my ($stack) = @$self[IDX_STACK];
$stack = [ @$stack ];
pop(@$stack);
$_[0] = $self->clone((IDX_STACK) => $stack);
return 1;
}
}
# --- Basics ---
parse : {DO_INIT $text}
stmt(s?)
{$text->MATCH_EOF()}
{ $item[2] }
stmt : {$text->DO_SET_SKIP('SKIP_NOTHING')}
{$text->MATCH('>>')}
{$text->DO_SET_SKIP('SKIP_LINE')}
expr(s?)
stmt_extention(s?)
{$text->MATCH('><')}
{$text->DO_MOVE_TO_START()}
{ [ @{$item[4]}, map { @{$_} } @{$item[5]} ]
+}
stmt_extention : {$text->MATCH('>(?!<)')}
{$text->DO_MOVE_TO_START()}
{$text->MATCH('>(?!>)')}
expr(s?)
{ $item[4] }
expr : sql_text { $item[1] }
| choice { $item[1] }
| loop { $item[1] }
# --- Literals ---
sql_text : sql_component
{$text->DO_SET_SKIP('SKIP_NOTHING')}
sql_component(s?)
{$text->DO_SET_SKIP($item[2])}
{ [ $item[0], $item[1], @{$item[3]} ] }
sql_component : sql_literal { $item[1] }
| sql_replaceable { $item[1] }
sql_literal : {$text->MATCH(qr/(?:[+<>'.])*[^a-z-+<>'.\s](?
+:[^a-z-+<>]|[A-Z]-[A-Z])*/)} { [ @item ] }
sql_replaceable : {$text->MATCH(qr/[a-z](?:[a-z]|-[a-z])*/
+ )} { [ @item ] }
# --- Choices ---
choice : {$text->DO_CHOICE_START()}
choice_branches
{$text->DO_GO_CHOICE_START()}
{$text->DO_SET_DIR(DIR_E)}
{$text->MATCH('\\+')}
expr(s?)
{$text->MATCH('\\+')}
{$text->DO_CHOICE_END()}
{ [ $item[0], @{$item[2]}, $item[6] ] }
choice_branches : north_branches south_branches(?) { [ $item[1
+], @{$item[2]} ] }
| south_branches { [ $item[1
+] ] }
north_branches : {$text->DO_GO_CHOICE_START()} {$text->DO_SET_
+DIR(DIR_N)} {$text->MATCH('\\+')} north_branch { $item[4] }
south_branches : {$text->DO_GO_CHOICE_START()} {$text->DO_SET_
+DIR(DIR_S)} {$text->MATCH('\\+')} south_branch { $item[4] }
north_branch : {$text->DO_CHOICE_START()}
{$text->DO_SET_DIR(DIR_N)}
{$text->MATCH('\\+')}
north_branch
{$text->DO_GO_CHOICE_START()}
{$text->DO_SET_DIR(DIR_E)}
{$text->MATCH('\\+')}
expr(s?)
{$text->MATCH('\\+')}
{$text->DO_CHOICE_END()}
{ [ @{$item[4]}, $item[8] ] }
| {$text->DO_SET_DIR(DIR_E)}
{$text->MATCH('\\.')}
expr(s?)
{$text->MATCH('\\.')}
{ $item[3] }
south_branch : {$text->DO_CHOICE_START()}
{$text->DO_SET_DIR(DIR_S)}
{$text->MATCH('\\+')}
south_branch
{$text->DO_GO_CHOICE_START()}
{$text->DO_SET_DIR(DIR_E)}
{$text->MATCH('\\+')}
expr(s?)
{$text->MATCH('\\+')}
{$text->DO_CHOICE_END()}
{ [ @{$item[4]}, $item[8] ] }
| {$text->DO_SET_DIR(DIR_E)}
{$text->MATCH("'")}
expr(s?)
{$text->MATCH("'")}
{ $item[3] }
# --- Loops ---
loop : {$text->DO_LOOP_START()}
{$text->MATCH_LOOP_RET()}
expr(s?)
{$text->DO_LOOP_BACK()}
{$text->DO_SET_DIR(DIR_N)}
{$text->MATCH('\\+')}
{$text->DO_SET_DIR(DIR_W)}
{$text->MATCH('\\.')}
loop_seperator
{$text->DO_SET_DIR(DIR_S)}
{$text->MATCH('\\.')}
{$text->MATCH('V')}
{$text->DO_GO_LOOP_BACK()}
{$text->MATCH('\\+')}
{$text->DO_LOOP_END()}
{ [ $item[0], $item[3], $item[9] ] }
loop_seperator : {$text->MATCH(',')} { $item[1] }
| { '' }
__EOI__
$::RD_HINT = 1;
# $::RD_TRACE = 1;
# Parse::RecDescent::Hack->Precompile($grammar, "Three");
my $parser = Parse::RecDescent::Hack->new($grammar);
die("Bad grammar.\n") unless defined($parser);
my $text = <<'__EOI__';
>>-ACTIVATE--+-DATABASE-+--database-alias----------------------->
'-DB-------'
>--+-------------------------------------+---------------------><
'-USER--username--+-----------------+-'
'-USING--password-'
>>-select-clause--from-clause--+--------------+----------------->
'-where-clause-'
>--+-----------------+--+---------------+----------------------><
'-group-by-clause-' '-having-clause-'
.-ALL------.
>>-SELECT--+----------+----------------------------------------->
'-DISTINCT-'
>--+-*-----------------------------------------------+---------><
| .-,-------------------------------------------. |
| V | |
'---+-expression--+-------------------------+-+-+-'
| | .-AS-. | |
| '-+----+--new-column-name-' |
'-exposed-name.*--------------------------'
__EOI__
my $result = $parser->parse(\$text);
die("Bad text.\n") unless (defined($result));
print(Data::Dumper->Dump([ $result ],[qw( $result )]));
}
__END__
Must Parse
==========
>>-ACTIVATE--+-DATABASE-+--database-alias----------------------->
'-DB-------'
>--+-------------------------------------+---------------------><
'-USER--username--+-----------------+-'
'-USING--password-'
>>-select-clause--from-clause--+--------------+----------------->
'-where-clause-'
>--+-----------------+--+---------------+----------------------><
'-group-by-clause-' '-having-clause-'
.-ALL------.
>>-SELECT--+----------+----------------------------------------->
'-DISTINCT-'
>--+-*-----------------------------------------------+---------><
| .-,-------------------------------------------. |
| V | |
'---+-expression--+-------------------------+-+-+-'
| | .-AS-. | |
| '-+----+--new-column-name-' |
'-exposed-name.*--------------------------'
__END__
Must Parse (Simpler)
====================
>>-------------------------------------------------------------->
>--------------------------------------------------------------><
>>-ACTIVATE----------------database-alias----------------------><
>>-ACTIVATE--+-DATABASE-+--database-alias----------------------><
'-DB-------'
.-ALL------.
>>-SELECT--+----------+----------------------------------------><
'-DISTINCT-'
>>-+-*-----------------------------------------------+--word2--><
| .-,-----------------------------------. |
| V | |
'-----expression------------------------+--word1--'
__END__
Output
======
$result = [
[
[
'sql_text',
[
'sql_literal',
'ACTIVATE'
]
],
[
'choice',
[
[
'sql_text',
[
'sql_literal',
'DB'
]
]
],
[
[
'sql_text',
[
'sql_literal',
'DATABASE'
]
]
]
],
[
'sql_text',
[
'sql_replaceable',
'database-alias'
]
],
[
'choice',
[
[
'sql_text',
[
'sql_literal',
'USER'
]
],
[
'sql_text',
[
'sql_replaceable',
'username'
]
],
[
'choice',
[
[
'sql_text',
[
'sql_literal',
'USING'
]
],
[
'sql_text',
[
'sql_replaceable',
'password'
]
]
],
[]
]
],
[]
]
],
[
[
'sql_text',
[
'sql_replaceable',
'select-clause'
]
],
[
'sql_text',
[
'sql_replaceable',
'from-clause'
]
],
[
'choice',
[
[
'sql_text',
[
'sql_replaceable',
'where-clause'
]
]
],
[]
],
[
'choice',
[
[
'sql_text',
[
'sql_replaceable',
'group-by-clause'
]
]
],
[]
],
[
'choice',
[
[
'sql_text',
[
'sql_replaceable',
'having-clause'
]
]
],
[]
]
],
[
[
'sql_text',
[
'sql_literal',
'SELECT'
]
],
[
'choice',
[
[
'sql_text',
[
'sql_literal',
'ALL'
]
]
],
[
[
'sql_text',
[
'sql_literal',
'DISTINCT'
]
]
],
[]
],
[
'choice',
[
[
'loop',
[
[
'choice',
[
[
'sql_text',
[
'sql_replaceable',
'exposed-name'
],
[
'sql_literal',
'.*'
]
]
],
[
[
'sql_text',
[
'sql_replaceable',
'expression'
]
],
[
'choice',
[
[
'choice',
[
[
'sql_text',
[
'sql_literal',
'AS'
]
]
],
[]
],
[
'sql_text',
[
'sql_replaceable',
'new-column-name'
]
]
],
[]
]
]
]
],
','
]
],
[
[
'sql_text',
[
'sql_literal',
'*'
]
]
]
]
]
];
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.