Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

Re: Proper nesting of HTML to be enforced (the code)

by tye (Cardinal)
on Feb 03, 2004 at 20:49 UTC ( #326310=note: print w/ replies, xml ) Need Help??


in reply to Proper nesting of HTML to be enforced

Here is the code that does this. $q is an Everything::CGI object <which is just like a CGI object except that escapeHTML() also turns ] and [ into entities>. Otherwise, there is very little Everything-specific code so it should be easy to adapt for use in your favorite chatterbox client.

###################################################################### # sub # tagApprove # # purpose # determines whether or not a tag (and it's specified attributes +) # are approved or not. If not, returns a false value. # Otherwise, cleans the arguments in-place and returns a true # value. Used by htmlScreen. # sub tagApprove { my( $close, $tag, $attr, $APPROVED )= @_; if( exists $APPROVED->{lc($tag)} ) { $tag = lc($tag); } elsif( exists $APPROVED->{uc($tag)} ) { $tag = uc($tag); } else { return !1; } if( $close ) { $_[2]= ''; return 1; } my $cleanattr= ""; $attr .= " "; foreach ( split ",", $APPROVED->{$tag} ) { next if "1" eq $_; if( "/" eq $_ ) { $cleanattr .= " ".$_; last; } elsif( $attr =~ /\b$_\s*(=\s*('[^'<>]*'|"[^"<>]*"|([^<>'"\s\[\] +]+)\s))?/i ) { $cleanattr .= " ".$_; if( $3 ) { $cleanattr .= "='$3'"; } elsif( $1 ) { $cleanattr .= "=".$2; } } } for( $cleanattr ) { s/\[/&#91;/g; s/]/&#93;/g; } $_[2]= $cleanattr; return 1; } ###################################################################### +####### # sub # htmlScreen # # purpose # screen out html tags from a chunk of text # returns the text with any unapproved tags escaped. # # params # text -- the text to filter # APPROVED -- ref to hash where approved tags are keys. Null me +ans # all HTML will be escaped out. # BEGIN { my %block; # Block-level tags my %nonest; # Tags that form linear siblings rather than nest. { my @list= ( 'h1'..'h6', qw[ dl ul ol pre p div blockquote form hr table ] ); @block{ @list }= (1) x @list; @list= qw( li tr td th p ); @nonest{ @list }= (1) x @list; } sub htmlScreen { my( $html, $APPROVED )= @_; $APPROVED ||= {}; my $htmlNest= $VARS->{htmlnest} || ($q->param('htmlnest'))[-1]; my %depth; my $block= 1; my @nesting; my $closeTil= sub { my( $name, $all )= @_; my $html= ''; my $add= ''; my $extra= !$name; while( @nesting && $extra ne $name ) { $extra= pop @nesting; $add= $html; $html .= "</$extra>"; pop @{$depth{$extra}}; $block-- if $block{$extra}; } $add= $html if $all; if( $add && ($q->param('htmlerror'))[-1] ) { $html= qq(<font color="#808080" class="htmlerror">) . $q->escapeHTML($add) . "</font>" . $html; } return $html; }; ## $html =~ s#<\s*(/?)(\w+)(.*?)\># tagApprove($1,$2,$3,$APPROVED) + #gse; $html =~ s{ < ( # $1: whole of "tag" !-- (.*?-) # $2: comment body; split "--"s - (?= > ) | \s* (/?) # $3: "" or "/" (for end tag) \s* (\w+) # $4: tag name ( # $5: rest of tag contents (?: [^<>'"\[\]]+ | "[^"<>]*" | '[^'<>]*' )* ) (?= > ) | ) (>?) # $6: "" or ">", closing of tag }{ my( $tag, $cmnt, $close, $name, $attrs, $gt )= ( $1, $2, $3, lc($4), $5, $6 ); if( defined($cmnt) ) { $cmnt =~ s/-(?=-)/- /g if $htmlNest; "<!--$cmnt->"; } elsif( ! $gt || ! tagApprove($close,$name,$attrs,$APPROVED) + ) { $q->escapeHTML( "<$1$gt" ); } elsif( ! $htmlNest || $attrs =~ m#/$# ) { "<$close$name$attrs>"; } elsif( ! $close ) { my $html= ''; my $clean= "<$name$attrs>"; if( $nonest{$name} && $depth{$name} && $block == $depth{$name}[-1] ) { $html .= $closeTil->( $name, 1 ); } $block++ if $block{$name}; $html .= $clean; push @{$depth{$name}}, $block; push @nesting, $name; $html; } else { if( $block{$name} && $depth{$name} && @{$depth{$name}} or $depth{$name} && $block == $depth{$name}[-1] ) { $closeTil->( $name ); } else { $q->escapeHTML( "<$tag$gt" ); } } }gsex; $html .= $closeTil->('',1) if @nesting; return $html; } }

Updated: Bug fix applied ($block++ used to be in an elsif instead of just if).

- tye        


Comment on Re: Proper nesting of HTML to be enforced (the code)
Select or Download Code

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://326310]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others rifling through the Monastery: (9)
As of 2015-07-08 07:49 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (96 votes), past polls