Beefy Boxes and Bandwidth Generously Provided by pair Networks
Think about Loose Coupling
 
PerlMonks  

Re: Help with regular expression

by choroba (Abbot)
on Oct 10, 2012 at 23:25 UTC ( #998342=note: print w/ replies, xml ) Need Help??


in reply to Help with regular expression

Parsing from leaves to root using regular expressions:

#!/usr/bin/perl use Data::Dumper; use warnings; use strict; undef $/; my $input = <DATA>; $input =~ s/[ \n]//g; my %tree; while ($input =~ /[()]/) { if (my ($parent, $son) = $input =~ /\(([^()]+)=\(([^()]+)\)/) { my ($name, $value) = split /=/, $son; if (length $value) { push @{ $tree{$parent} }, {$name => $value}; } else { push @{ $tree{$parent} }, { $name => $tree{$name} }; delete $tree{$name}; } $input =~ s/\($son\)//; } elsif (my ($root, $value) = $input =~ /\(([^()]+)=([^()]+)\)/) { $tree{$root} = $value; $input =~ s/\($root=$value\)//; } else { die "Invalid input\n" unless $input =~ /^\([^()]+=\)$/; last; } } if (keys %tree > 1) { die "More than one root\n"; } print Dumper \%tree; __DATA__ (S=(SN=ac2.bd) (I1=(IN=s%1)(NM=1) (HL=(HLD=kkk kjkjk)(ST=abdc)(HI=REM SSS)(H_M=9)(HL=72)(EB=0) +(ER=0)(HI=E043-93A-DF0-0AB63E)(PE=aaa)(HN=DEE)(SS=NS)(SED=(APR=(PAD=k +kk)(PN=9905)(HH=llkjk))(DD=(LLL=kkk)))) (ppp=1)(RAW=kkk)(DN=kkk)(RIN=ppp)) (PPP=1) (AA=LLI))
لսႽ ᥲᥒ⚪⟊Ⴙᘓᖇ Ꮅᘓᖇ⎱ Ⴙᥲ𝇋ƙᘓᖇ


Comment on Re: Help with regular expression
Download Code
Re^2: Help with regular expression
by Anonymous Monk on Oct 11, 2012 at 08:42 UTC

    Whoa, modifying and matching the same string? That can get tricky :) You appear to collapse whitespace ( "REM SSS" becomes "REMSSS" )

    Here is a familiar pattern I've used before, jazzed up with perlfaq6#What good is \G in a regular expression?

    #!/usr/bin/perl -- use strict; use warnings; use Data::Dump; Main( @ARGV ); exit( 0 ); sub Main { my $pp = q{ (S= (SN=ac2.bd) (I1= (IN=s%1) (NM=1) (HL=(HLD=kkk kjkjk) (ST=abdc) (HI=REM SSS) (H_M=9) (HL=72) (EB=0) (ER=0) (HI=E043-93A-DF0-0AB63E) (PE=aaa) (HN=DEE) (SS=NS) (SED= ( APR=(PAD=kkk) (PN=9905) (HH=llkjk) ) (DD=(LLL=kkk)) ) ) (ppp=1) (RAW=kkk) (DN=kkk) (RIN=ppp) ) (PPP=1) (AA=LLI) )}; dd blahs2aoa(""); dd blahs2aoa( $pp ); } #~ What good is \G in a regular expression? #~ http://perldoc.perl.org/perlfaq6.html#What-good-is-\G-in-a-regular- +expression%3f sub blahs2aoa { my $curr_stack = my $root = [] ; my $prev_stack; local $_ = $_[0]; pos = 0; while( length > pos ){ m/\G\s++/gcx and next; m/\G\s++/gcx and next; # ignore space "greedily" # ignore space without backtracki +ng # Match 1 or more times and give +nothing back # ?? ignore space while ratchetin +g # ?? ratchet and ignore space m/\G=/gcx and next; # ignore equals m/\G\(/gcx and do { # open push @$curr_stack, [] ; push @$prev_stack, $curr_stack; $curr_stack = $$curr_stack[-1]; next; }; m/\G\)/gcx and do { # close if( @$prev_stack ){ $curr_stack = pop @$prev_stack; } else { warn join ' ', "error extra ) close at pos ", pos, "\n +"; } next; }; m/\G([^\(\)=]++)/gcx and do { # key or value push @$curr_stack, $1; next; }; } if( $prev_stack and @$prev_stack ){ warn "Trouble!\nprev_stack ", int @$prev_stack , "\n", 'curr_stack ', int @$curr_stack , "\n", Data::Dump::pp({ prev => $prev_stack, curr => $curr_stack }), +"\n"; } return $root; } __END__ [] [ [ "S", ["SN", "ac2.bd"], [ "I1", ["IN", "s%1"], ["NM", 1], [ "HL", ["HLD", "kkk kjkjk"], ["ST", "abdc"], ["HI", "REM SSS"], ["H_M", 9], ["HL", 72], ["EB", 0], ["ER", 0], ["HI", "E043-93A-DF0-0AB63E"], ["PE", "aaa"], ["HN", "DEE"], ["SS", "NS"], [ "SED", ["APR", ["PAD", "kkk"], ["PN", 9905], ["HH", "llkjk"]], ["DD", ["LLL", "kkk"]], ], ], ["ppp", 1], ["RAW", "kkk"], ["DN", "kkk"], ["RIN", "ppp"], ], ["PPP", 1], ["AA", "LLI"], ], ]

      You can turn that into choroba-s hashy structure using Data::Rmap

      #!/usr/bin/perl -- use strict; use warnings; use Data::Dump; my $blah = ( [ "S", ["SN", "ac2.bd"], [ "I1", ["IN", "s%1"], ["NM", 1], [ "HL", ["HLD", "kkk kjkjk"], ["ST", "abdc"], ["HI", "REM SSS"], ["H_M", 9], ["HL", 72], ["EB", 0], ["ER", 0], ["HI", "E043-93A-DF0-0AB63E"], ["PE", "aaa"], ["HN", "DEE"], ["SS", "NS"], [ "SED", ["APR", ["PAD", "kkk"], ["PN", 9905], ["HH", "llkjk"]], ["DD", ["LLL", "kkk"]], ], ], ["ppp", 1], ["RAW", "kkk"], ["DN", "kkk"], ["RIN", "ppp"], ], ["PPP", 1], ["AA", "LLI"], ], ); use Data::Rmap qw/ rmap_array /; rmap_array { my( $key, $val, @rest ) = @$_; if( not ref $key ){ if( @rest ){ $_ = { $key, [ $val, @rest ] }; } else { $_ = { $key, $val }; } } return; } $blah; dd $blah; __END__ { S => [ { SN => "ac2.bd" }, { I1 => [ { IN => "s%1" }, { NM => 1 }, { HL => [ { HLD => "kkk kjkjk" }, { ST => "abdc" }, { HI => "REM SSS" }, { H_M => 9 }, { HL => 72 }, { EB => 0 }, { ER => 0 }, { HI => "E043-93A-DF0-0AB63E" }, { PE => "aaa" }, { HN => "DEE" }, { SS => "NS" }, { SED => [ { APR => [{ PAD => "kkk" }, { +PN => 9905 }, { HH => "llkjk" }] }, { DD => { LLL => "kkk" } }, ], }, ], }, { ppp => 1 }, { RAW => "kkk" }, { DN => "kkk" }, { RIN => "ppp" }, ], }, { PPP => 1 }, { AA => "LLI" }, ], }

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://998342]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others cooling their heels in the Monastery: (15)
As of 2014-10-23 19:13 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    For retirement, I am banking on:










    Results (128 votes), past polls