Beefy Boxes and Bandwidth Generously Provided by pair Networks
laziness, impatience, and hubris
 
PerlMonks  

Re: Help with regular expression

by choroba (Canon)
on Oct 11, 2012 at 09:59 UTC ( #998407=note: print w/ replies, xml ) Need Help??


in reply to Help with regular expression

A solution using the Marpa::XS parser:

#!/usr/bin/perl use warnings; use strict; use Marpa::XS; use Data::Dumper; main(); { package MyParser; sub pass_tree { shift } sub create_node { my (undef, undef, $name, undef, $value) = @_; return [$name => $value]; } sub create_seq { my (undef, @nodes) = @_; return \@nodes; } sub add_nodes { my (undef, undef, $name, undef, $nodes) = @_; return [ $name => $nodes ]; } sub Tree { return $_[1]; } } sub main { my $grammar = init_grammar(); my $rec = init_recognizer($grammar); my $input = do { local $/ = undef ; <DATA> }; lex($input, $rec); while (my $val = $rec->value) { print Dumper $val; } } sub init_grammar { my $grammar = Marpa::XS::Grammar->new ({ start => 'Tree', actions => 'MyParser', default_action => 'pass_tree', rules => [ [ 'Tree', ['Node'] ], [ 'Tree', ['Leave'] ], [ 'Node', [qw/( Name = Value )/], ' +create_node'], [ 'Node', [qw/( Name = NodeSeq )/], ' +add_nodes'], { lhs => 'NodeSeq', rhs => [qw/Node/], min => 1, a +ction => 'create_seq' }, ], }); $grammar->precompute; } sub init_recognizer { my $grammar = shift; my $rec = Marpa::XS::Recognizer->new({ grammar => $grammar, # trace_terminals => 1, # trace_actions => 1, # trace_values => 1 }); } sub lex { my ($input, $rec) = @_; while (length $input) { if ($input =~ s/^([()])//) { my $success = $rec->read($1); last if ! $success or $success == 0; } elsif ($input =~ s/^([^()=]+)=([^()=]+)//) { $rec->read('Name', $1); $rec->read('='); $rec->read('Value', $2); } elsif ($input =~ s/^([^()=]+)=//) { $rec->read('Name', $1); $rec->read('='); } else { $input =~ s/^\s+// or die "Invalid input at $input\n"; } } die "Cannot parse: $input.\n" if $input =~ /\S/; } __DATA__ (S=(SN=ac2.bd) (I1=(IN=s%1)(NM=1) (HL=(HLD=kkk kjkjk)(ST=abdc)(HI=REM SSS)(H_M=9)(HL=72)(EB=0) +(ER=0)(HI=E043-93A-DF0-0AB63E)(PE=aaa)(HN=DEE)(SS=NS)(SED=(APR=(PAD=k +kk)(PN=9905)(HH=llkjk))(DD=(LLL=kkk)))) (ppp=1)(RAW=kkk)(DN=kkk)(RIN=ppp)) (PPP=1) (AA=LLI))
Update: Hashes cannot be used easily, because the "key" can be repeated (as in HI). Switched to arrays.
Update 2: Simplified the code.
لսႽ ᥲᥒ⚪⟊Ⴙᘓᖇ Ꮅᘓᖇ⎱ Ⴙᥲ𝇋ƙᘓᖇ


Comment on Re: Help with regular expression
Download Code

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://998407]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others studying the Monastery: (10)
As of 2015-07-29 11:09 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (263 votes), past polls