#!/usr/bin/perl use warnings; use strict; use Perl6::Slurp; my $output = ""; use XML::SAX::Machines qw(Pipeline); #use XML::SAX::ParserFactory; my $machine = Pipeline(MySAXHandler => \$output); $machine->parse_string( join "", slurp $ARGV[0] ); print "$output\n"; package MySAXHandler; use base qw(XML::SAX::Base); sub start_document { my $self = shift; $self->{_supported} = { img=>{ alt=>1, width=>1, height=>1, src=>1, title=>1, }, a=>{ href=>1, title=>1, }, p=>{}, h3=>{}, em=>{}, strong=>{}, div=>{}, }; return $self->SUPER::start_document(shift); } sub start_element { my ($self, $el) = @_; my $localName = $el->{LocalName}; if (exists $self->{_pending_img}) { my %el = %{$self->{_pending_img}}; delete $self->{_pending_img}; delete $el{Attributes}; $self->SUPER::end_element(\%el); } if (exists $self->{_supported}->{$localName}) { my $attributes = $self->{_supported}->{$localName}; foreach my $attr (keys %{$el->{Attributes}}) { my $key = $attr; $key =~ s[\A{}][]xms; if (not exists $attributes->{$key}) { delete $el->{Attributes}->{$attr}; } } if ($localName eq 'img') { $self->{_pending_img} = $el; } return $self->SUPER::start_element($el); } } sub end_element { my ($self, $el) = @_; my $localName = $el->{LocalName}; if (exists $self->{_pending_img} and $localName ne 'img') { my %el = %{$self->{_pending_img}}; delete $self->{_pending_img}; delete $el{Attributes}; $self->SUPER::end_element(\%el); } if (exists $self->{_supported}->{$localName}) { return $self->SUPER::end_element($el); } } sub character { my ($self, $el) = @_; if (exists $self->{_pending_img}) { my %el = %{$self->{_pending_img}}; delete $self->{_pending_img}; delete $el{Attributes}; $self->SUPER::end_element(\%el); } return $self->SUPER::character($el); } 1