<?xml version="1.0" encoding="windows-1252"?>
<node id="107246" title="Re: How to strip HTML using latest module" created="2001-08-23 06:06:59" updated="2005-07-21 10:32:59">
<type id="11">
note</type>
<author id="16834">
OeufMayo</author>
<data>
<field name="doctext">
&lt;p&gt;Here's a version using the HTML::Parser v.2 interface:&lt;/p&gt;

&lt;code&gt;#!/usr/bin/perl -w
use strict;
use LWP::Simple qw(get);
use HTML::Parser;

my $parser = Example-&gt;new();
my $html   = get("http://www.perlmonks.org")
	or die "Cannot fetch the HTML\n";

$parser-&gt;parse($html);

package Example;
use base qw(HTML::Parser);
sub text {
	my ($self,$text) = @_;
	print $text;
}
&lt;/code&gt;

&lt;p&gt;And here's the same script, but using the HTML::Parser 
version 3 interface. This one is easier to use because you 
generally don't have to make a new package to parse the 
html (though you can, if you really want to!).&lt;/p&gt;

&lt;code&gt;#!/usr/bin/perl -w
use strict;
use LWP::Simple qw(get);
use HTML::Parser;

my $html = get("http://www.perlmonks.org");

my $parser = HTML::Parser-&gt;new(
	text_h =&gt; [ sub { print shift }, 'dtext' ]
);
$parser-&gt;parse($html);
&lt;/code&gt;



&lt;kbd&gt;-- &lt;br&gt;

my $&lt;a href="/index.pl?node=OeufMayo&amp;lastnode_id=1072"&gt;OeufMayo&lt;/a&gt; = new PerlMonger::Paris({http =&gt; '&lt;a href="http://paris.mongueurs.net"&gt;paris.mongueurs.net&lt;/a&gt;'});&lt;/kbd&gt;</field>
<field name="root_node">
107175</field>
<field name="parent_node">
107175</field>
</data>
</node>
