use strict; use XML::Rules; my $filter = XML::Rules->new( style => 'filter', namespaces => { 'urn:oasis:names:tc:opendocument:xmlns:text:1.0' => 'text', 'urn:oasis:names:tc:opendocument:xmlns:office:1.0' => 'office' }, rules => { _default => 'raw', # we do not care what's inside the tags, # we just want to preserve everything 'text:p' => sub { return $_[0] => $_[1] }, # this doesn't seem to do anything, # but it's necessary. The filter mode sends everything outside tags # with special rules directly to output 'text:line-break' => sub { my ($tag, $attrs, $parents, $parentAttrs, $parser) = @_; my $idx = $#$parents; # find the tag enclosing this one $idx-- while ($idx >=0 && $parents->[$idx] ne 'text:p'); return $tag => $attrs if ($parents->[$idx] ne 'text:p'); # line break outside paragraph, leave alone my $level = $#$parents - $idx + 1; print { $parser->{FH} } $parser->parentsToXML( $level); #output the and everything inside we read so far print { $parser->{FH} } $parser->closeParentsToXML( $level); # close the opened tags all the way to the print { $parser->{FH} } "\n"; foreach my $i ($idx .. $#$parents) { # remove the printed content delete $parentAttrs->[$i]->{_content}; # leaves the attributes intact } return; # remove the } } ); $filter->filter( \*DATA, \*STDOUT); __DATA__ Foo Ba r