Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

Re: parse XML huge file using cpan modules

by Jenda (Abbot)
on Jul 29, 2019 at 12:12 UTC ( #11103566=note: print w/replies, xml ) Need Help??


in reply to parse XML huge file using cpan modules

To add to the list of options...

use strict; use XML::Rules; use Data::Dumper qw(Dumper); my $parser = XML::Rules->new( stripspaces => 15, rules => { 'name,value' => 'content', statistic => sub { return '%' . $_[1]->{type} => { $_[1]->{nam +e} => $_[1]->{value}} }, resourceGroup => 'no content array', statRecord => sub { #print Dumper($_[1]); foreach my $group (@{$_[1]->{resourceGroup}}) { print "$_[1]->{time}|$group->{name}|$group->{Lifetime} +{LCONNFAIL}|$group->{Lifetime}{LLOSTCONN}|$group->{Lifetime}{LIDLETIM +EOUT}|$group->{Startup}{SIPADDR}|$group->{Startup}{SIPPORT}\n"; } return; } } ); print "time|resourceGroup name|LCONNFAIL|LLOSTCONN|LIDLETIMEOUT|SIPADD +R|SIPPORT\n"; $parser->parse(\*DATA); __DATA__ <?xml version="1.0" encoding="UTF-8"?> <ctgStatistics xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="ctgstatslog.xsd"> <statRecord type="interval" length="60" time="2019-07-16T08:23:59"> <resourceGroup name="CSCS1SVGM1"> <statistic type="Startup"> ...
or
use strict; use XML::Rules; use Data::Dumper qw(Dumper); my $parser = XML::Rules->new( stripspaces => 15, rules => { 'name,value' => 'content', statistic => sub { return $_[1]->{name} => $_[1]->{value} }, resourceGroup => 'no content array', statRecord => sub { #print Dumper($_[1]); foreach my $group (@{$_[1]->{resourceGroup}}) { print "$_[1]->{time}|$group->{name}|$group->{LCONNFAIL +}|$group->{LLOSTCONN}|$group->{LIDLETIMEOUT}|$group->{SIPADDR}|$group +->{SIPPORT}\n"; } return; } } ); print "time|resourceGroup name|LCONNFAIL|LLOSTCONN|LIDLETIMEOUT|SIPADD +R|SIPPORT\n"; $parser->parse(\*DATA); __DATA__ <?xml version="1.0" encoding="UTF-8"?> <ctgStatistics xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="ctgstatslog.xsd"> <statRecord type="interval" length="60" time="2019-07-16T08:23:59"> <resourceGroup name="CSCS1SVGM1"> <statistic type="Startup"> ...

The first version preserves the statistics type in the data provided to the handler of the statRecord tag, the second assumes there will be no duplicate names of statistics and ignores the types.

There's only the data from one <statRecord> in memory at any time.

Jenda
Enoch was right!
Enjoy the last years of Rome.

Replies are listed 'Best First'.
Re^2: parse XML huge file using cpan modules
by nicopelle (Acolyte) on Jul 31, 2019 at 08:46 UTC
    Thanks Jenda!!

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://11103566]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others contemplating the Monastery: (5)
As of 2019-12-13 04:12 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found

    Notices?