>>-ACTIVATE--+-DATABASE-+--database-alias----------------------->
'-DB-------'
>--+-------------------------------------+---------------------><
'-USER--username--+-----------------+-'
'-USING--password-'
####
>>-select-clause--from-clause--+--------------+----------------->
'-where-clause-'
>--+-----------------+--+---------------+----------------------><
'-group-by-clause-' '-having-clause-'
##
##
.-ALL------.
>>-SELECT--+----------+----------------------------------------->
'-DISTINCT-'
>--+-*-----------------------------------------------+---------><
| .-,-------------------------------------------. |
| V | |
'---+-expression--+-------------------------+-+-+-'
| | .-AS-. | |
| '-+----+--new-column-name-' |
'-exposed-name.*--------------------------'
##
##
rule1: 'ACTIVATE' ('DATABASE' | 'DB') database-alias ('' | 'USER' username ( '' | 'USING' password ) )
rule2: select-clause from-clause ('' | where-clause) ('' | group-by-clause) ('' | having-clause)
rule3: 'SELECT' ('ALL' | '' | 'DISTINCT') ( '*' | ( expression ( '' | ( ('' | 'AS' ) new-column-name ) ) | exposed-name '.*' )(/,/ *) )
##
##
rule2: select-clause from-clause where-clause(?) group-by-clause(?) having-clause(?)
..
##
##
#!/usr/bin/perl
use warnings;
use strict;
use Data::Dump::Streamer 'Dumper', 'Dump';
use List::Util 'max';
$|++;
my $DEBUG = 1;
# $x -- virtual line number.
# $y -- position along the virtual line of the leftmost point in the physical line.
my ($x, $y)=(0, 0);
# The position along the virtual line of the last EOL-continuation.
my $y_saved;
# The line that has the start/end markers on it.
my $mainline;
# NOTE: (y, x) order, because it makes it easier to iterate over.
my @info;
while (<>) {
chomp;
s/\cM//; # Deal with DOSish newlines.
print "Init : $_\n";
next if /^#/;
if ($_ =~ /^\s*$/) {
print "Empty line\n";
$x=0;
if (not defined $y_saved) {
# warn "Blank line with no continuation";
}
$y=$y_saved;
$y_saved=undef;
next;
}
if (not defined $y) {
warn "Nonempty line at undefined location?";
warn "$_\n";
last;
}
s(^>>){
my $y=$y+pos($_);
$mainline=$x;
$info[$x][$y] = ['START', $x, $y];
'--';
}gex;
s(^>){
if ($x != $mainline) {
print "???: $_\n" if $DEBUG;
die "Line mismatch at >--. x=$x, mainline=$mainline";
}
# if (not defined $y_saved) {
# die ">- with no matching ->";
# }
# $y=$y_saved;
'-';
}gex;
print "($x, $y): $_\n" if $DEBUG;
s(>$){
$y_saved=$y+pos($_);
'-';
}gex;
s(><$){
my $y=$y+pos($_);
$info[$x][$y] = ['END', $x, $y];
'--';
}gex;
# Quoted literals. Parse these early, but not before we've
# determined our position.
s( \xA0(.*?)\xA0 ){
my $y=$y+pos($_);
# print "Normal literal: $1\n";
$info[$x][$y] = ['LITERAL', $x, $y, $1];
'-' x (length($1)+2);
}gex;
# s(V([ ]*)\|){
# my $y=$y+pos($_);
# $info[$x][$y] = ['REPEAT', $x, $y, length($1)];
# }gex;
# # Ugly as fuck, but neccessary -- check if there's a repeat on the
# # previous line at this position, and take care of the + at it's
# # right-hand-side, to avoid the choice processor becomming confused.
# if ($info[$x-1][$y][0] eq 'REPEAT') {
# substr($_,
# }
# 1 while because it's vital that we get all of these before
# continuing, and they can nest.
# Collect all possible combinations (eg 1st '+' to 2nd '+' AND 1st '+'
# to 3rd '+' AND 2nd '+' to 3rd etc, where contents are not \s
# and throw away the ones that dont make sense later?
1 while (s( ([+\'.]) ([^\s]*) \1 ) {
my $inside = $2;
my $y=$y+pos($_);
$info[$x][$y] = ['CHOICE', $x, $y, length($inside)];
# print "Choice, inside=$inside\n";
if ($inside =~ /^-+$/) {
# print "Emptystring choice\n";
$info[$x][$y+1] = ['LITERAL', $x, $y+1, ''];
}
'-' . $inside . '-';
}gex);
# Quoted variables. These are actually "blocks" that are supposed to be
# defined later in the same file, but we treat them the same.
s(\| ([^ |]*) \|){
my $y=$y+pos($_);
$info[$x][$y] = ['VARIABLE', $x, $y, $1];
'--'.('-' x length($1)).'--';
}ge;
# Footnotes. I'm really not sure how to deal with them, but parse them
# here to prevent them showing up later as literals and parse failures.
# Note that we can't use () delimiters here, or \) is just an end-paren,
# not a literal end-paren.
s{\((\d+)\)}{
my $y=$y+pos($_);
$info[$x][$y] = ['FOOTNOTE', $x, $y, $1];
'-' x (length($1)+2);
}gex;
# Thanks, [tye], for the regex.
# We include . and * as valid chars for the inside of variables, because of
# "exposed-name.*", in select-clause. Uck.
# : is used as part of a variable in ch2host, but only at the beginning, so
# religate it to that position until we see more examples.
# More examples: psmbod
s( ([a-z:0-9\[\]]+(?:-[:a-z.*0-9\[\]]+)*) ){
my $y=$y+pos($_);
$info[$x][$y] = ['VARIABLE', $x, $y, $1];
'-' x length ($1)
}gex;
# Variable then literal, since both can include *s.
# = -- psmasn
s( ([A-Z*,()_=]+) ){
my $y=$y+pos($_);
# print "Normal literal: $1\n";
$info[$x][$y] = ['LITERAL', $x, $y, $1];
'-' x length($1);
}gex;
die "Unparsed bits? Final: $_" if (m/[^- |]/);
# print "Final: $_\n";
$x++;
}
exit if !@info;
Dump \@info if $DEBUG>=2;
sub item2atom {
my $item = shift;
my $linenumber = shift;
my $world = shift;
return '' if !$item;
my $type = $item->[0];
die "Line number passed wrong $linenumber / $item->[1]"
if $linenumber != $item->[1];
# print "item2atom(".Dumper($item).")";
if ($type eq 'START' or
$type eq 'END') {
return '';
} elsif ($type eq 'LITERAL') {
return '"' . quotemeta($item->[3]) . '"';
} elsif ($type eq 'CHOICE') {
my @subrules;
# Fixme: Should look up and down from the source line, until it finds a position
# which is not part of the choice.
# But watch out for:
# ---+--A--+---
# | |
# '--B--'
my $choicesfound=0;
foreach my $linenumber (0..$#{$world}) {
my $line = $world->[$linenumber];
print "Choice, checking $linenumber\n" if $DEBUG>=2;
# Keep going unless this line also begins a choice here.
next if not defined $line->[$item->[2]];
next unless $line->[$item->[2]][0] eq 'CHOICE';
# Make sure it has the same length.
next unless $line->[$item->[2]][3] == $item->[3];
print "$linenumber is choice, following...\n" if $DEBUG>=2;
$choicesfound++;
push @subrules, line2rule($linenumber, $item->[2]+1, $item->[3]-1, $world);
# Replace bit we just iterated over with an equal number of
# undefs, so we don't see it twice.
splice(@$line, $item->[2]+1, $item->[3]-1, (undef)x($item->[3]-1));
}
if ($choicesfound < 2) {
warn "Choice with one ($choicesfound) option at ($item->[1], $item->[2])";
$world->[$linenumber]->[$item->[2]] = undef;
}
return "(".join(' | ', @subrules).")";
} elsif ($type eq 'VARIABLE') {
return $item->[3];
} else {
die "Unknown item type: $type" . Dumper($item);
}
}
sub line2rule {
my ($linenumber, $colnumber, $length, $world) = @_;
my @result;
for my $i ( $colnumber .. $colnumber+$length-1 ) {
# print "line2rule: ", Dumper $world->[$linenumber][$i];
my $result = item2atom($world->[$linenumber][$i], $linenumber, $world);
next unless $result;
push @result, $result;
}
return join ' ', @result;
}
# print "Mainline: $mainline\n";
print line2rule($mainline, 0, 0+@{$info[$mainline]}, \@info), "\n";