Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??
# # Proof of concept for a Parser::RecDescent parser for the diagrams fo +und at: # http://publib.boulder.ibm.com/infocenter/db2v8luw/index.jsp?topic=/c +om.ibm.db2.udb.doc/admin/r0006726.htm # # Implementation Notes # ==================== # - Printing out the result as a grammar is left as an # excersise for the reader. It should be extremely simple. # - "parameter-block" and "large bullets" (*) that denote # unordered sequences are not supported. Those are # relatively simple or similar to implement, and therefore # they are not needed in this proof of concept program. # - "--text.*--" (a replaceable immediately followed by # literals) is handled differently than "--text--.*--" # in case the user wishes to forbid whitespace between # the replaceable and the literals in the former. # # Grammar Notes # ============= # - Can't use // and '' in rules. Which also means I had to # reimplement whitespace. MATCH provides the functionality # of // and '', while DO_SET_SKIP provides the functionality # of <skip>. # - The '$_[0] = ...' junk commits changed made by a function. # Not doing so undoes any change to the state made by the # function, provide backtracking within functions. # - "production(s)", "production(s?)" and the like won't # work for us without the change to _parserepeat(). # # Bugs # ==== # - This is not a validator. The input is assumed to be # valid. Adding validation would be simple, but it would # be time consuming to code, and it would kill what little # readability this program has. # - No meaningful error reporting. # - Didn't look into whether <commit> could be used to # improve efficiency in this grammar. # - Something tells me the regexp for sql_literal is # expensive because of the backtracking necessary to # locate a dash. The LHS of the | should probably have # a lookahead. # - Definitions for "sql_literal" and "sql_replaceable" # are guesswork and may require tweaking. # - Any features of Parser::RecDescent not currently used # should be considered incompatible until known otherwise. # use strict; use warnings; use Data::Dumper (); use Parse::RecDescent (); package Parse::RecDescent::Hack; use vars qw(@ISA); BEGIN { @ISA = 'Parse::RecDescent'; } sub _parserepeat($$$$$$$$$$) # RETURNS A REF TO AN ARRAY OF MATCHES { my ($parser, $text, $prod, $min, $max, $_noactions, $expectation, $ +argcode) = @_; my @tokens = (); my $reps; for ($reps=0; $reps<$max;) { $_[6]->at($text); # $_[6] IS $expectation FROM CALLER my $_savetext = $text; my $prevtextlen = length $text; my $_tok; if (! defined ($_tok = &$prod($parser,$text,1,$_noactions,$argco +de))) { $text = $_savetext; last; } push @tokens, $_tok if defined $_tok; # $text isn't getting smaller. # ---v # last if ++$reps >= $min and $prevtextlen == length $text; # --- last if ++$reps >= $min and $_savetext eq $text; # ---^ } do { $_[6]->failed(); return undef} if $reps<$min; $_[1] = $text; return [@tokens]; } package main; { my $grammar = <<'__EOI__'; { use Algorithm::Loops (); # State sub IDX_TEXT () { 0 } # constant sub IDX_TXET () { 1 } # constant sub IDX_SKIPF () { 2 } sub IDX_Y () { 3 } sub IDX_X () { 4 } sub IDX_D () { 5 } sub IDX_STACK () { 6 } sub STACK_IDX_ID () { 0 } sub STACK_NEXT_IDX () { 1 } sub CHOICE_IDX_Y () { STACK_NEXT_IDX+0 } sub CHOICE_IDX_X () { STACK_NEXT_IDX+1 } sub CHOICE_IDX_D () { STACK_NEXT_IDX+2 } sub LOOP_IDX_END_Y () { STACK_NEXT_IDX+0 } sub LOOP_IDX_END_X () { STACK_NEXT_IDX+1 } sub DIR_N () { 0 } sub DIR_E () { 1 } sub DIR_S () { 2 } sub DIR_W () { 3 } sub DIR_C () { 4 } use vars qw( @DIR_X @DIR_Y @DIR_DASH ); BEGIN { # N E S W . @DIR_X = ( 0, +1, 0, -1, 0 ); @DIR_Y = ( -1, 0, +1, 0, 0 ); @DIR_DASH = qw( | - | - ); } sub clone { my ($self, @args) = @_; my $new = bless([ @$self ]); # Stack not cloned. Must be done explicitely by caller. #$new->[IDX_STACK] = [ @{$new->[IDX_STACK]} ]; while (scalar(@args)) { my $idx = shift(@args); my $val = shift(@args); $new->[$idx] = $val; } return $new; } # [ [ # 'aeh', # 'abcd', 'bfi', # 'efg', ==> 'cgj', # 'hijkl', 'd k', # ' l', # ] ] sub transpose_AoS { return [ Algorithm::Loops::MapCarU { join('', map { defined($_) ? $ +_ : ' ' } @_) } map { [ /(.)/gs ] } @{$_[0]} ]; } sub get_char_at { my ($text, $y, $x, $d) = @_; $d = DIR_C unless (defined($d)); $y += $DIR_Y[$d] - 1; # Make zero-based. $x += $DIR_X[$d] - 1; # Make zero-based. return undef unless ($y >= 0); return undef unless ($y < @$text); my $line = $text->[$y]; return undef if ($x < 0 || $x >= length($line)); return substr($line, $x, 1); } sub is_loop_return { my ($text, $y, $x) = @_; my $ch; $ch = get_char_at($text, $y + $DIR_Y[DIR_N]*0, $x + $DIR_X[DIR_N]*0 +); return undef unless (defined($ch) && $ch eq '-'); $ch = get_char_at($text, $y + $DIR_Y[DIR_N]*1, $x + $DIR_X[DIR_N]*1 +); return undef unless (defined($ch) && $ch eq 'V'); $ch = get_char_at($text, $y + $DIR_Y[DIR_N]*2, $x + $DIR_X[DIR_N]*2 +); return undef unless (defined($ch) && ($ch eq '.' || $ch eq '|')); return 1; } sub is_dash { my ($text, $y, $x, $d) = @_; my $ch = get_char_at($text, $y, $x); return undef unless (defined($ch)); return undef unless ($ch eq $DIR_DASH[$d]); return !(is_loop_return($text, $y, $x)); } sub skip { my ($self) = @_; my ($skipf) = @$self[IDX_SKIPF]; $_[0]->$skipf(); } sub DO_INIT { my @text = $_[0] =~ /([^\n]*)\n/gs; my $text = \@text; my $txet = transpose_AoS($text); my $self = bless([ $text, # IDX_TEXT $txet, # IDX_TXET 'SKIP_NOTHING', # IDX_SKIPF 0, # IDX_Y 1, # IDX_X 0, # IDX_D [], # IDX_STACK ]); $self->DO_MOVE_TO_START(); $_[0] = $self; return 1; } sub DO_MOVE_TO_START { my ($self) = @_; my ($text, $y) = @$self[IDX_TEXT, IDX_Y]; my $num_lines = scalar(@$text); my $x = 0; # 0-based. --$y; # Make 0-based. while (++$y < $num_lines) { last if (substr($text->[$y], $x) =~ /^>/s); } ++$y; # Make 1-based. ++$x; # Make 1-based. $_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x, (IDX_D) => DIR_E +); return 1; } # Skips whitespace like a MATCH. sub DO_SET_DIR { my ($self, $new_d) = @_; $self->skip(); my ($old_d) = @$self[IDX_D]; $_[0] = $self->clone((IDX_D) => $new_d); return $old_d; } sub DO_SET_SKIP { my ($self, $new_skipf) = @_; my ($old_skipf) = @$self[IDX_SKIPF]; $_[0] = $self->clone((IDX_SKIPF) => $new_skipf); return $old_skipf; } sub SKIP_NOTHING {} sub SKIP_LINE { my ($self) = @_; my ($text, $y, $x, $d) = @$self[IDX_TEXT, IDX_Y, IDX_X, IDX_D]; my $moved; while (is_dash($text, $y, $x, $d)) { $y += $DIR_Y[$d]; $x += $DIR_X[$d]; $moved = 1; } $_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x) if ($moved); } # Replacement for //. sub MATCH { my ($self, $regexp) = @_; $self->skip(); my ($text, $txet, $y, $x, $d) = @$self[IDX_TEXT, IDX_TXET, IDX_Y, I +DX_X, IDX_D]; --$y; # Make 0-based. --$x; # Make 0-based. my $line; my $idx; my $pos; my $match; my $sign; if ($DIR_X[$d]) { $line = $text->[$y]; return undef unless (defined($line)); $idx = IDX_X; $pos = $x; } else { $line = $txet->[$x]; return undef unless (defined($line)); $idx = IDX_Y; $pos = $y; } if ($DIR_X[$d] >= 0 && $DIR_Y[$d] >= 0) { $line = substr($line, $pos); ($match) = $line =~ /^($regexp)/s; $sign = +1; } else { $line = substr($line, 0, $pos+1); ($match) = $line =~ /($regexp)$/s; $sign = -1; } return undef unless (defined($match)); $pos += length($match) * $sign; ++$pos; # Make 1-based. $_[0] = $self->clone($idx => $pos); return $match; } sub MATCH_LOOP_RET { my ($self) = @_; $self->skip(); my ($text, $y, $x) = @$self[IDX_TEXT, IDX_Y, IDX_X]; return undef unless (is_loop_return($text, $y, $x)); $y += $DIR_Y[DIR_E]; $x += $DIR_X[DIR_E]; $_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x); return 1; } sub MATCH_EOF { my ($self) = @_; my ($text, $y) = @$self[IDX_TEXT, IDX_Y]; --$y; # Make 0-based. return ($y >= scalar(@$text) ? 1 : undef); } # Skips whitespace like a MATCH. sub DO_CHOICE_START { my ($self) = @_; $self->skip(); my ($y, $x, $d, $stack) = @$self[IDX_Y, IDX_X, IDX_D, IDX_STACK]; $stack = [ @$stack ]; push(@$stack, [ 'choice', # STACK_IDX_ID $y, # CHOICE_IDX_Y $x, # CHOICE_IDX_X $d, # CHOICE_IDX_D ], ); $_[0] = $self->clone((IDX_STACK) => $stack); return 1; } sub DO_GO_CHOICE_START { my ($self) = @_; my ($stack) = @$self[IDX_STACK]; my $record = $stack->[$#$stack]; my ($y, $x, $d) = @$record[CHOICE_IDX_Y, CHOICE_IDX_X, CHOICE_IDX_D +]; $_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x, (IDX_D) => $d); return 1; } sub DO_CHOICE_END { my ($self) = @_; my ($stack) = @$self[IDX_STACK]; $stack = [ @$stack ]; pop(@$stack); $_[0] = $self->clone((IDX_STACK) => $stack); return 1; } # Skips whitespace like a MATCH. sub DO_LOOP_START { my ($self) = @_; $self->skip(); my ($y, $x, $d, $stack) = @$self[IDX_Y, IDX_X, IDX_D, IDX_STACK]; $stack = [ @$stack ]; push(@$stack, [ 'loop', # STACK_IDX_ID undef, # LOOP_IDX_END_Y undef, # LOOP_IDX_END_X ], ); $_[0] = $self->clone((IDX_STACK) => $stack); return 1; } # Skips whitespace like a MATCH. sub DO_LOOP_BACK { my ($self) = @_; $self->skip(); my ($y, $x, $stack) = @$self[IDX_Y, IDX_X, IDX_STACK]; $stack = [ @$stack ]; my $record = $stack->[$#$stack]; @$record[LOOP_IDX_END_Y, LOOP_IDX_END_X] = ($y, $x); $_[0] = $self->clone((IDX_STACK) => $stack); return 1; } sub DO_GO_LOOP_BACK { my ($self) = @_; my ($stack) = @$self[IDX_STACK]; my $record = $stack->[$#$stack]; my ($y, $x) = @$record[LOOP_IDX_END_Y, LOOP_IDX_END_X]; $_[0] = $self->clone((IDX_Y) => $y, (IDX_X) => $x, (IDX_D) => DIR_E +); return 1; } sub DO_LOOP_END { my ($self) = @_; my ($stack) = @$self[IDX_STACK]; $stack = [ @$stack ]; pop(@$stack); $_[0] = $self->clone((IDX_STACK) => $stack); return 1; } } # --- Basics --- parse : {DO_INIT $text} stmt(s?) {$text->MATCH_EOF()} { $item[2] } stmt : {$text->DO_SET_SKIP('SKIP_NOTHING')} {$text->MATCH('>>')} {$text->DO_SET_SKIP('SKIP_LINE')} expr(s?) stmt_extention(s?) {$text->MATCH('><')} {$text->DO_MOVE_TO_START()} { [ @{$item[4]}, map { @{$_} } @{$item[5]} ] +} stmt_extention : {$text->MATCH('>(?!<)')} {$text->DO_MOVE_TO_START()} {$text->MATCH('>(?!>)')} expr(s?) { $item[4] } expr : sql_text { $item[1] } | choice { $item[1] } | loop { $item[1] } # --- Literals --- sql_text : sql_component {$text->DO_SET_SKIP('SKIP_NOTHING')} sql_component(s?) {$text->DO_SET_SKIP($item[2])} { [ $item[0], $item[1], @{$item[3]} ] } sql_component : sql_literal { $item[1] } | sql_replaceable { $item[1] } sql_literal : {$text->MATCH(qr/(?:[+<>'.])*[^a-z-+<>'.\s](? +:[^a-z-+<>]|[A-Z]-[A-Z])*/)} { [ @item ] } sql_replaceable : {$text->MATCH(qr/[a-z](?:[a-z]|-[a-z])*/ + )} { [ @item ] } # --- Choices --- choice : {$text->DO_CHOICE_START()} choice_branches {$text->DO_GO_CHOICE_START()} {$text->DO_SET_DIR(DIR_E)} {$text->MATCH('\\+')} expr(s?) {$text->MATCH('\\+')} {$text->DO_CHOICE_END()} { [ $item[0], @{$item[2]}, $item[6] ] } choice_branches : north_branches south_branches(?) { [ $item[1 +], @{$item[2]} ] } | south_branches { [ $item[1 +] ] } north_branches : {$text->DO_GO_CHOICE_START()} {$text->DO_SET_ +DIR(DIR_N)} {$text->MATCH('\\+')} north_branch { $item[4] } south_branches : {$text->DO_GO_CHOICE_START()} {$text->DO_SET_ +DIR(DIR_S)} {$text->MATCH('\\+')} south_branch { $item[4] } north_branch : {$text->DO_CHOICE_START()} {$text->DO_SET_DIR(DIR_N)} {$text->MATCH('\\+')} north_branch {$text->DO_GO_CHOICE_START()} {$text->DO_SET_DIR(DIR_E)} {$text->MATCH('\\+')} expr(s?) {$text->MATCH('\\+')} {$text->DO_CHOICE_END()} { [ @{$item[4]}, $item[8] ] } | {$text->DO_SET_DIR(DIR_E)} {$text->MATCH('\\.')} expr(s?) {$text->MATCH('\\.')} { $item[3] } south_branch : {$text->DO_CHOICE_START()} {$text->DO_SET_DIR(DIR_S)} {$text->MATCH('\\+')} south_branch {$text->DO_GO_CHOICE_START()} {$text->DO_SET_DIR(DIR_E)} {$text->MATCH('\\+')} expr(s?) {$text->MATCH('\\+')} {$text->DO_CHOICE_END()} { [ @{$item[4]}, $item[8] ] } | {$text->DO_SET_DIR(DIR_E)} {$text->MATCH("'")} expr(s?) {$text->MATCH("'")} { $item[3] } # --- Loops --- loop : {$text->DO_LOOP_START()} {$text->MATCH_LOOP_RET()} expr(s?) {$text->DO_LOOP_BACK()} {$text->DO_SET_DIR(DIR_N)} {$text->MATCH('\\+')} {$text->DO_SET_DIR(DIR_W)} {$text->MATCH('\\.')} loop_seperator {$text->DO_SET_DIR(DIR_S)} {$text->MATCH('\\.')} {$text->MATCH('V')} {$text->DO_GO_LOOP_BACK()} {$text->MATCH('\\+')} {$text->DO_LOOP_END()} { [ $item[0], $item[3], $item[9] ] } loop_seperator : {$text->MATCH(',')} { $item[1] } | { '' } __EOI__ $::RD_HINT = 1; # $::RD_TRACE = 1; # Parse::RecDescent::Hack->Precompile($grammar, "Three"); my $parser = Parse::RecDescent::Hack->new($grammar); die("Bad grammar.\n") unless defined($parser); my $text = <<'__EOI__'; >>-ACTIVATE--+-DATABASE-+--database-alias-----------------------> '-DB-------' >--+-------------------------------------+--------------------->< '-USER--username--+-----------------+-' '-USING--password-' >>-select-clause--from-clause--+--------------+-----------------> '-where-clause-' >--+-----------------+--+---------------+---------------------->< '-group-by-clause-' '-having-clause-' .-ALL------. >>-SELECT--+----------+-----------------------------------------> '-DISTINCT-' >--+-*-----------------------------------------------+--------->< | .-,-------------------------------------------. | | V | | '---+-expression--+-------------------------+-+-+-' | | .-AS-. | | | '-+----+--new-column-name-' | '-exposed-name.*--------------------------' __EOI__ my $result = $parser->parse(\$text); die("Bad text.\n") unless (defined($result)); print(Data::Dumper->Dump([ $result ],[qw( $result )])); } __END__ Must Parse ========== >>-ACTIVATE--+-DATABASE-+--database-alias-----------------------> '-DB-------' >--+-------------------------------------+--------------------->< '-USER--username--+-----------------+-' '-USING--password-' >>-select-clause--from-clause--+--------------+-----------------> '-where-clause-' >--+-----------------+--+---------------+---------------------->< '-group-by-clause-' '-having-clause-' .-ALL------. >>-SELECT--+----------+-----------------------------------------> '-DISTINCT-' >--+-*-----------------------------------------------+--------->< | .-,-------------------------------------------. | | V | | '---+-expression--+-------------------------+-+-+-' | | .-AS-. | | | '-+----+--new-column-name-' | '-exposed-name.*--------------------------' __END__ Must Parse (Simpler) ==================== >>--------------------------------------------------------------> >-------------------------------------------------------------->< >>-ACTIVATE----------------database-alias---------------------->< >>-ACTIVATE--+-DATABASE-+--database-alias---------------------->< '-DB-------' .-ALL------. >>-SELECT--+----------+---------------------------------------->< '-DISTINCT-' >>-+-*-----------------------------------------------+--word2-->< | .-,-----------------------------------. | | V | | '-----expression------------------------+--word1--' __END__ Output ====== $result = [ [ [ 'sql_text', [ 'sql_literal', 'ACTIVATE' ] ], [ 'choice', [ [ 'sql_text', [ 'sql_literal', 'DB' ] ] ], [ [ 'sql_text', [ 'sql_literal', 'DATABASE' ] ] ] ], [ 'sql_text', [ 'sql_replaceable', 'database-alias' ] ], [ 'choice', [ [ 'sql_text', [ 'sql_literal', 'USER' ] ], [ 'sql_text', [ 'sql_replaceable', 'username' ] ], [ 'choice', [ [ 'sql_text', [ 'sql_literal', 'USING' ] ], [ 'sql_text', [ 'sql_replaceable', 'password' ] ] ], [] ] ], [] ] ], [ [ 'sql_text', [ 'sql_replaceable', 'select-clause' ] ], [ 'sql_text', [ 'sql_replaceable', 'from-clause' ] ], [ 'choice', [ [ 'sql_text', [ 'sql_replaceable', 'where-clause' ] ] ], [] ], [ 'choice', [ [ 'sql_text', [ 'sql_replaceable', 'group-by-clause' ] ] ], [] ], [ 'choice', [ [ 'sql_text', [ 'sql_replaceable', 'having-clause' ] ] ], [] ] ], [ [ 'sql_text', [ 'sql_literal', 'SELECT' ] ], [ 'choice', [ [ 'sql_text', [ 'sql_literal', 'ALL' ] ] ], [ [ 'sql_text', [ 'sql_literal', 'DISTINCT' ] ] ], [] ], [ 'choice', [ [ 'loop', [ [ 'choice', [ [ 'sql_text', [ 'sql_replaceable', 'exposed-name' ], [ 'sql_literal', '.*' ] ] ], [ [ 'sql_text', [ 'sql_replaceable', 'expression' ] ], [ 'choice', [ [ 'choice', [ [ 'sql_text', [ 'sql_literal', 'AS' ] ] ], [] ], [ 'sql_text', [ 'sql_replaceable', 'new-column-name' ] ] ], [] ] ] ] ], ',' ] ], [ [ 'sql_text', [ 'sql_literal', '*' ] ] ] ] ] ];

In reply to Parsing 2D Graphs using Parse::RecDescent by ikegami

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others imbibing at the Monastery: (6)
As of 2024-03-28 14:13 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found