Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl: the Markov chain saw
 
PerlMonks  

How to use Regular Expressions with HTML

by Ovid (Cardinal)
on Aug 16, 2003 at 02:45 UTC ( #284298=perlmeditation: print w/replies, xml ) Need Help??

Help for this page

Select Code to Download


  1. or download this
    $data =~ s{(<a\s(?:[^>](?!href))*href\s*)(&(&[^;]+;)?(?:.(?!\3))+(?:\3
    +)?)([^>]+>)}
     {$1.decode_entities($2).$4}gsei;
    
  2. or download this
    <p class="foo" name="bar">
    <p name="bar" CLASS="ovid has no class">
    <p NAME="bar" class="ovid has no class">
    <p name="bar" class="ovid has no class">
    
  3. or download this
    package HTML::Token;
    
    ...
            return $tag;
        }
    }
    
  4. or download this
    my $html = <<END_HTML;
    <h1>This is a test</h1>
    ...
        push @tokens => HTML::Token->new($token);
    }
    push @tokens => (qw[* . *]); # make the (p) tag zero or more, followed
    + by anything
    
  5. or download this
    use Token::Regex;
    my $regex = Token::Regex->new('HTML::Token');
    $regex->parse(\@tokens);
    
  6. or download this
    my $tokens = html_tokens(<<END_HTML);
    <h1>This is html</h1>
    ...
        }
        return \@tokens;
    }
    

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlmeditation [id://284298]
Front-paged by TStanley
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others rifling through the Monastery: (11)
As of 2019-02-19 15:03 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    I use postfix dereferencing ...









    Results (104 votes). Check out past polls.

    Notices?
    • (Sep 10, 2018 at 22:53 UTC) Welcome new users!