#!/usr/bin/perl
#You could use HTML::TokeParser::Simple and only print text tags.
#almost straight from the TokeParser::Simple POD

use HTML::TokeParser::Simple;
my $p = HTML::TokeParser::Simple->new( $somefile );
while ( my $token = $p->get_token ) {
    print $token->as_is if $token->is_text;
  }

###################################################################
#HTML::Strip - Perl extension for stripping HTML markup from text.

use HTML::Strip;

my $hs = HTML::Strip->new();

my $clean_text = $hs->parse( $raw_html );
 $hs->eof;

###################################################################


sub strip {
   my $html = shift;
   my $p    = HTML::PullParser->new(
       doc  => $html,
       text => 'text',
   );
   my $result = '';
   while ( my $t = $p->get_token ) {
       $result .= $t->[0];
   }
   return $result;
}

##############################################################

#If you just need to strip all the html tags from a page, 
#and are on a platform with lynx, you can use:

#! /usr/bin/perl
use strict;
use warnings;
my $text=`lynx -dump htmlDocument.html`;
print "$text";

##################################################################
#or
lynx -dump htmlDocument.html > htmlDocument.txt