Beefy Boxes and Bandwidth Generously Provided by pair Networks
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

Re: Proper nesting of HTML to be enforced (the code)

by tye (Cardinal)
on Feb 03, 2004 at 20:49 UTC ( #326310=note: print w/ replies, xml ) Need Help??


in reply to Proper nesting of HTML to be enforced

Here is the code that does this. $q is an Everything::CGI object <which is just like a CGI object except that escapeHTML() also turns ] and [ into entities>. Otherwise, there is very little Everything-specific code so it should be easy to adapt for use in your favorite chatterbox client.

###################################################################### # sub # tagApprove # # purpose # determines whether or not a tag (and it's specified attributes +) # are approved or not. If not, returns a false value. # Otherwise, cleans the arguments in-place and returns a true # value. Used by htmlScreen. # sub tagApprove { my( $close, $tag, $attr, $APPROVED )= @_; if( exists $APPROVED->{lc($tag)} ) { $tag = lc($tag); } elsif( exists $APPROVED->{uc($tag)} ) { $tag = uc($tag); } else { return !1; } if( $close ) { $_[2]= ''; return 1; } my $cleanattr= ""; $attr .= " "; foreach ( split ",", $APPROVED->{$tag} ) { next if "1" eq $_; if( "/" eq $_ ) { $cleanattr .= " ".$_; last; } elsif( $attr =~ /\b$_\s*(=\s*('[^'<>]*'|"[^"<>]*"|([^<>'"\s\[\] +]+)\s))?/i ) { $cleanattr .= " ".$_; if( $3 ) { $cleanattr .= "='$3'"; } elsif( $1 ) { $cleanattr .= "=".$2; } } } for( $cleanattr ) { s/\[/&#91;/g; s/]/&#93;/g; } $_[2]= $cleanattr; return 1; } ###################################################################### +####### # sub # htmlScreen # # purpose # screen out html tags from a chunk of text # returns the text with any unapproved tags escaped. # # params # text -- the text to filter # APPROVED -- ref to hash where approved tags are keys. Null me +ans # all HTML will be escaped out. # BEGIN { my %block; # Block-level tags my %nonest; # Tags that form linear siblings rather than nest. { my @list= ( 'h1'..'h6', qw[ dl ul ol pre p div blockquote form hr table ] ); @block{ @list }= (1) x @list; @list= qw( li tr td th p ); @nonest{ @list }= (1) x @list; } sub htmlScreen { my( $html, $APPROVED )= @_; $APPROVED ||= {}; my $htmlNest= $VARS->{htmlnest} || ($q->param('htmlnest'))[-1]; my %depth; my $block= 1; my @nesting; my $closeTil= sub { my( $name, $all )= @_; my $html= ''; my $add= ''; my $extra= !$name; while( @nesting && $extra ne $name ) { $extra= pop @nesting; $add= $html; $html .= "</$extra>"; pop @{$depth{$extra}}; $block-- if $block{$extra}; } $add= $html if $all; if( $add && ($q->param('htmlerror'))[-1] ) { $html= qq(<font color="#808080" class="htmlerror">) . $q->escapeHTML($add) . "</font>" . $html; } return $html; }; ## $html =~ s#<\s*(/?)(\w+)(.*?)\># tagApprove($1,$2,$3,$APPROVED) + #gse; $html =~ s{ < ( # $1: whole of "tag" !-- (.*?-) # $2: comment body; split "--"s - (?= > ) | \s* (/?) # $3: "" or "/" (for end tag) \s* (\w+) # $4: tag name ( # $5: rest of tag contents (?: [^<>'"\[\]]+ | "[^"<>]*" | '[^'<>]*' )* ) (?= > ) | ) (>?) # $6: "" or ">", closing of tag }{ my( $tag, $cmnt, $close, $name, $attrs, $gt )= ( $1, $2, $3, lc($4), $5, $6 ); if( defined($cmnt) ) { $cmnt =~ s/-(?=-)/- /g if $htmlNest; "<!--$cmnt->"; } elsif( ! $gt || ! tagApprove($close,$name,$attrs,$APPROVED) + ) { $q->escapeHTML( "<$1$gt" ); } elsif( ! $htmlNest || $attrs =~ m#/$# ) { "<$close$name$attrs>"; } elsif( ! $close ) { my $html= ''; my $clean= "<$name$attrs>"; if( $nonest{$name} && $depth{$name} && $block == $depth{$name}[-1] ) { $html .= $closeTil->( $name, 1 ); } $block++ if $block{$name}; $html .= $clean; push @{$depth{$name}}, $block; push @nesting, $name; $html; } else { if( $block{$name} && $depth{$name} && @{$depth{$name}} or $depth{$name} && $block == $depth{$name}[-1] ) { $closeTil->( $name ); } else { $q->escapeHTML( "<$tag$gt" ); } } }gsex; $html .= $closeTil->('',1) if @nesting; return $html; } }

Updated: Bug fix applied ($block++ used to be in an elsif instead of just if).

- tye        


Comment on Re: Proper nesting of HTML to be enforced (the code)
Select or Download Code

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://326310]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others pondering the Monastery: (3)
As of 2014-09-19 06:19 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    How do you remember the number of days in each month?











    Results (132 votes), past polls