Beefy Boxes and Bandwidth Generously Provided by pair Networks
Do you know where your variables are?
 
PerlMonks  

Re: POD style regex for inline HTML elements

by Lady_Aleena (Priest)
on Apr 15, 2013 at 05:52 UTC ( [id://1028680]=note: print w/replies, xml ) Need Help??


in reply to POD style regex for inline HTML elements

Here is another sample, with a mix.

SPAN<text I<italic text>|class="span_class"> some more text B<bold tex +t>.

Here is the desired output:

<span class="span_class">text <i>italic text</i></span> some more text + <b>bold text</b>.
Have a cookie and a very nice day!
Lady Aleena

Replies are listed 'Best First'.
Re^2: POD style regex for inline HTML elements
by Anonymous Monk on Apr 15, 2013 at 08:23 UTC

    *shakes head * marpa+family are so much less work :)

    #!/usr/bin/perl -- use strict; use warnings; use Test::More qw' no_plan '; use Regexp::Common qw/ balanced /; use Data::Dump qw/ dd pp /; sub TRACE; sub DEBUG; *TRACE = *DEBUG = sub { print STDERR @_,"\n" }; #~ *TRACE = *DEBUG = sub { }; our $allowed = join '|', qw[ ABBR ACRONYM B BIG CITE CODE DFN EM I KBD SAMP SMALL SPAN STRONG SUB SUP TT VAR ]; { my $in = 'SPAN<text I<italic text>|class="span_class"> some more t +ext B<bold text>.'; my $out = shabba($in); my $wanted = '<span class="span_class">text <i>italic text</i></sp +an> some more text <b>bold text</b>.'; is( $out, $wanted , 'shabba'); } { my $in = 'SPAN<text I<italic B<and bold> text>|class="span_class"> + some more text B<bold text>.'; my $out = shabba($in); my $wanted = '<span class="span_class">text <i>italic <b>and bold< +/b> text</i></span> some more text <b>bold text</b>.'; is( $out, $wanted , 'shabba'); } exit( 0 ); sub shabba { local $_ = $_[0]; my $dent = $_[1] || 0; pos = 0; my $ret = ""; SHABBALOOP: while( length > pos ){ m{\G(\s+)}gcsx and do { $ret .= $1; next SHABBALOOP; }; m{ \G( $allowed )( $RE{balanced}{-parens=>'<>'} ) }gcsx and do { TRACE "# $dent allowed<> { $1 ( $2 ) }"; $ret .= shabba_allowed( "$1" , "$2" , $dent ); next SHABBALOOP; }; #~ confusion :) #~ \G(\w+\b) #~ fail #~ \G([^<]+)(?!:$allowed)\b #~ \w+\b #~ \G([^<]+?)(?!:$allowed\<)\b #~ fail #~ \G([^<]+?)(?!:$allowed\<) #~ fail #~ \G([^<]+)(?!:$allowed\<) #~ fail \G([^<]+)(?!:\<) #~ inch #~ \G([^<]+?)(?!:\<) #~ \G([^<]+?\b[^<]) #~ \G([^<]+?[^<]) #~ FAIL #~ \G([^<]+[^<]) #~ \G([^<]+[^<]\b) #~ 2same#~ \G( (?!:$allowed\<) .+ ) #~ 2same#~ \G( .+(?!:$allowed\<) ) m{ \G([^<]+\s) }gcmx and do { TRACE "# $dent text { $1 }"; $ret .= shabba_text( "$1" ); next SHABBALOOP; };;; m{ \G([\<\>]) }gcmx and do { TRACE "## $dent error-stray<> { $1 } at pos(@{[pos]})"; last SHABBALOOP; };;; m{ \G(\S) }gcmx and do { TRACE "# $dent inch-forward { $1 }"; $ret .= shabba_text( "$1" ); next SHABBALOOP; };;; } $ret; } #~ sub shabba_allowed { join'',@_ } #~ confusion :) #~ use Text::Balanced qw' :ALL '; #~ dd([ extract_multiple( $stuff,[\&extract_bracketed, ],)]); #~ my $extract_allowed = gen_extract_tagged(qw/$allowed</,'>');dd( +[ extract_multiple( $stuff,[ $extract_allowed , $extract_allowed , ], +)]); #~ #~ 0 and $stuff =~ s{ #~ ( $allowed )( $RE{balanced}{-parens=>'<>'} ) #~ | #~ (.) #~ }{ #~ if( defined $2 ){ #~ $ret .= $2; #~ } else { #~ $ret .= shabba_allowed( "$1" ); #~ } #~ ""; #~ }gsex; sub shabba_allowed { my( $tag , $stuff, $dent ) = @_; $stuff = $1 if $stuff =~ m{^<(.*)>$}gs; my $ret = ""; $ret .= "<\L$tag\E" if $tag; $stuff =~ s{\|([^<>]+)$}{ $ret .= " $1"; ## shabba_allowed_atts($tag,$1); ""; }gsex if defined $stuff ; $ret .= ">" if $tag; if( defined $stuff and length $stuff and $stuff =~ m{[<>]}g ){ $ret .= shabba( $stuff , $dent+1) ; ## recurse } else { $ret .= $stuff; } $ret .= "</\L$tag\E>" if $tag; $ret; } sub shabba_text { join'',@_ } __END__ $ prove -vb lady.alena.balanced.podlike.pl lady.alena.balanced.podlike.pl .. # 0 allowed<> { SPAN ( <text I<itali +c text>|class="span_class"> ) } # 1 text { text } # 1 allowed<> { I ( <italic text> ) } # 0 text { some more text } # 0 allowed<> { B ( <bold text> ) } # 0 inch-forward { . } # 0 allowed<> { SPAN ( <text I<italic B<and bold> text>|class="span_cl +ass"> ) } ok 1 - shabba # 1 text { text } # 1 allowed<> { I ( <italic B<and bold> text> ) } # 2 text { italic } # 2 allowed<> { B ( <and bold> ) } # 2 inch-forward { t } # 2 inch-forward { e } # 2 inch-forward { x } # 2 inch-forward { t } # 0 text { some more text } # 0 allowed<> { B ( <bold text> ) } # 0 inch-forward { . } ok 2 - shabba 1..2 ok All tests successful. Files=1, Tests=2, 0 wallclock secs ( 0.11 usr + 0.02 sys = 0.12 CPU +) Result: PASS

      This would be beautiful as a module. I am having problems wading through it. There are a lot of things in this I do not understand, and I am cargo culting it as best I can. Thank you for your work, Anonymous Monk (whoever you are).

      PS. If you see this, would you please tell me what shabba stands for? :)

      Have a cookie and a very nice day!
      Lady Aleena

        PS. If you see this, would you please tell me what shabba stands for? :)

        shabba is excited utterance -- heard it on the radio decades ago ; could have been the Jamaican dancehall musician, could have been fresh prince/jazzy jeff, or some local dj or ...

        but it took the place of "woo hoo" or "dang it" or "booyah" ... "mahalo" (hello and goodbye)

        and now I use it in place of Frobnicate / NotDemoMeaningfulName / YouChangeIt / iAmSleepyWhenIcopyFAQ

        DUDE:) http://cpansearch.perl.org/src/ABIGAIL/Regexp-Common-2013031301/Changes
        Version 2013030901 Sat Mar 9 14:51:42 CET 2013 + Use (?-1) instead of (??{ }) for the recursive balanced pattern. This makes the pattern unavailable for pre-5.010 perls.

        So solution is simple, on any computer you have, get the old version, print out the pattern and save it to a file

        cpanm -n http://cpan.metacpan.org/authors/id/A/AB/ABIGAIL/Regexp-Common-2011121001.tar.gz

        $ perl -MRegexp::Common -le " print $RE{balanced}{-parens=>'<>'} " (?^:(?^:(?:\<(?:(?>[^\<\>]+)|(??{$Regexp::Common::balanced [0]}))*\>)) +)

        So to inline into above program you'd write something like

        use vars qw/ $re_balanced_angles /; our $re_balanced_angles = qr{(?^:(?^:(?:\<(?:(?>[^\<\>]+)|(??{ $re_bal +anced_angles }))*\>)))}; ... m{ ### no more ## \G( $allowed )( $RE{balanced}{-parens=>'<>'} ) \G( $allowed )( $re_balanced_angles ) }gcsx and do {

        Or downgrade the version of Regexp::Common you upload to your website

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://1028680]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others taking refuge in the Monastery: (7)
As of 2024-04-23 13:10 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found