Beefy Boxes and Bandwidth Generously Provided by pair Networks
Keep It Simple, Stupid
 
PerlMonks  

Comment on

( #3333=superdoc: print w/ replies, xml ) Need Help??

After xml_split here is xml_merge. The tool is quite simple, just feed it the foo-00.xml file created by xml_split and it will happily output the entire file.

Note that, just like xml_split, it respects the encoding and indentation of the original XML file.

#!/usr/bin/perl -w # $Id: xml_merge,v 1.2 2005/02/10 11:45:07 mrodrigu Exp $ use strict; use XML::Twig; use FindBin qw( $RealBin $RealScript); use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION=1; # to stop processing after --he +lp or --version use vars qw( $VERSION $USAGE); $VERSION= "0.01"; $USAGE= "xml_merge [-o <output_file>] [-v] [-h] [-m] [-V] [file]\n"; { # main block my $opt={}; getopts('vhmV', $opt); if( $opt->{h}) { die $USAGE, "\n"; } if( $opt->{m}) { exec "pod2text $RealBin/$RealScript"; } if( $opt->{V}) { print "xml_merge version $VERSION\n"; exit; } if( $opt->{o}) { open( my $out, '>', $opt->{0}) or die "cannot create $opt->{o}: $! +"; $opt->{fh}= $out; # used to set twig_print_outside_roots } else { $opt->{fh}= 1; } # this way twig_print_outside_roots outputs to S +TDOUT $opt->{subdocs} = 1; $opt->{file} = $ARGV[0]; merge( $opt); if( $opt->{v}) { warn "done\n"; } } sub merge { my( $opt)= @_; my $t= XML::Twig->new( keep_encoding => 1, keep_spaces => 1, twig_roots => { '?merge' => sub { $opt= par +se( $_->data, $opt); if( $opt- +>{subdocs}) { merge( $opt); } else + { spit( $opt); } } }, twig_print_outside_roots => $opt->{fh}, ); if( $opt->{v} && $opt->{file}) { warn "merging $opt->{file} (parsi +ng)\n"; } if( $opt->{file}) { $t->parsefile( $opt->{file}); } else { $t->par +se( \*STDIN); } } sub spit { my( $opt)= @_; if( $opt->{v} && $opt->{file}) { warn "merging $opt->{file} (no pa +rsing)\n"; } open( my $in, '<', $opt->{file}) or die "cannot open sub document +'$opt->{file}': $!"; while( <$in>) { if( $opt->{o}) { print {$opt->{fh}} $_; } else { print $_; } } close $in; } # data is the pi data, # (ugly) format is keyword1 = val1 : keyword2 = val2 ... : filename # ex: subdoc = 1 : file-01.xml sub parse { my( $data, $opt)= @_; while( $data=~ s{^\s*(\S+)\s*=\s*(\S+)\s*:\s*}{}) { $opt->{$1}= $2 +; } $opt->{file}= $data; return $opt; } # for Getop::Std sub HELP_MESSAGE { return $USAGE; } sub VERSION_MESSAGE { return $VERSION; } __END__ =head1 NAME xml_merge - merge back XML files split with C<xml_split> =head1 DESCRIPTION C<xml_merge> takes several xml files that have been split using C<xml_split> and recreates a single file. =head1 OPTIONS =over 4 =item -o <output_file> unless this option is used the program output goes to STDOUT =item -v verbose output =item -V outputs version and exit =item -h short help =item -m man (requires pod2text to be in the path) =back =head1 EXAMPLES xml_merge foo-00.xml # output to stdout xml_merge -o foo.xml foo-00.xml # output to foo.xml =head1 SEE ALSO XML::Twig, xml_split =head1 TODO/BUGS =head1 AUTHOR Michel Rodriguez <mirod@cpan.org> =head1 LICENSE This tool is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

In reply to xml_merge by mirod

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?
    Username:
    Password:

    What's my password?
    Create A New User
    Chatterbox?
    and the web crawler heard nothing...

    How do I use this? | Other CB clients
    Other Users?
    Others surveying the Monastery: (15)
    As of 2015-07-29 13:30 GMT
    Sections?
    Information?
    Find Nodes?
    Leftovers?
      Voting Booth?

      The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









      Results (263 votes), past polls