#!perl -l # ex: set ts=4: use strict; use warnings; use HTML::Parser; use URI; my (@tagstack, $BUF); sub start { # enter into tags my ($tag, $attr, $text) = @_; $tag .= " href" if ($tag eq "a" && defined $attr->{"href"}); push @tagstack, $tag; output($text); } sub end { # escape out of tags my ($tag, $text) = @_; shift @tagstack while (scalar @tagstack && $tagstack[0] ne $tag); shift @tagstack if scalar @tagstack; # actually nuke element we're looking for output($text); } sub text { # handle everything inside and around tags my ($text) = @_; if (unlinked()) { # replace URLs with their linked equivalent if we're not within a link $text =~ s{ \b(http://\S+) }{ "new($1)->canonical . "\">$1" }gex; } output($text); } # are we inside a link right now? sub unlinked { return not scalar grep { /^a href$/ } @tagstack; } # add to output buffer sub output { $BUF .= shift @_; } # start code my $p = HTML::Parser->new( "start_h" => [ \&start, "tagname, attr, text" ] ,"end_h" => [ \&end, "tagname, text" ] ,"text_h" => [ \&text, "dtext" ] ); $p->parse(do{ local $/; }); print $BUF; __DATA__ http://linked1.com http://linked2.com http://linked3.com http://linked4.com http://unlinked1 http://unlinked2.com