Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

Finding max value from a unique tag from XML

by Shaveta_Chawla (Acolyte)
on Nov 22, 2012 at 10:24 UTC ( #1005101=perlquestion: print w/ replies, xml ) Need Help??
Shaveta_Chawla has asked for the wisdom of the Perl Monks concerning the following question:

I have an XML file,
<doc> <date name="processingtime">2011-04-09T11:12:22.049Z</date> <str name="docuid">121422</str> <str name="title">ABC</str> <\doc> <doc> <date name="processingtime">2012-04-09T11:12:22.049Z</date> <str name="docuid">13427</str> <str name="title">CDE</str> <\doc> <date name="processingtime">2010-04-09T11:12:22.049Z</date> <str name="docuid">89822</str> <str name="title">LKK</str> <\doc>
where a tag (<str name="docuid">) appears multiple times. i need to fetch data from all the docid tags and find max value from that. I used following code::
$html =~ s/<doc>/<doc>\n\n\n\n\n\n/g; my (@lines) = split("\n\n\n\n\n", $html); $cn = scalar(@lines); for($i = 0; $i<$cn; $i++) { if ($lines[$i] =~ m/<str name="docuid">(.*?)<\/str>/) { $docuid[$i] = $1; } } my $max = max @docuid;
This code is solving my purpose, but is there any better way to achieve this.

Comment on Finding max value from a unique tag from XML
Select or Download Code
Re: Finding max value from a unique tag from XML
by jethro (Monsignor) on Nov 22, 2012 at 10:41 UTC

    Yes, there are lots of XML modules around. For example take a look at XML::Simple and read the Quick Start section of the manual page, you can just adapt the sample code there for your application

Re: Finding max value from a unique tag from XML
by remiah (Hermit) on Nov 22, 2012 at 10:45 UTC

    Hello Shaveta_Chawla.

    It seems your xml have some trouble like '<\doc>'.
    If it is valid xml, XML::Twig will do like this.

    #!/usr/bin/perl use strict; use warnings; use XML::Twig; my $xml=join('', <DATA>); my $max_docid=0; XML::Twig->new( twig_roots =>{ 'str[@name="docuid"]' => \&set_max_docid, }, )->parse($xml); sub set_max_docid { my ($twig, $elt)= @_; my $docid= $elt->first_child_trimmed_text; if ($docid > $max_docid ){ $max_docid=$docid } } print "max docid=$max_docid\n"; __DATA__ <all> <doc> <date name="processingtime">2011-04-09T11:12:22.049Z</date> <str name="docuid">121422</str> <str name="title">ABC</str> </doc> <doc> <date name="processingtime">2012-04-09T11:12:22.049Z</date> <str name="docuid">13427</str> <str name="title">CDE</str> </doc> <doc> <date name="processingtime">2010-04-09T11:12:22.049Z</date> <str name="docuid">89822</str> <str name="title">LKK</str> </doc> </all>
    XML::Twig has nice tutorial.

Re: Finding max value from a unique tag from XML
by choroba (Canon) on Nov 22, 2012 at 13:19 UTC
    A single XPath expression can find a maximum. Here is an example using XML::XSH2, a wrapper around XML::LibXML:
    open 1.xml ; echo //doc/str[@name="docuid" and not(text() < //doc/str[@name="docuid +"]/text())] ;
    لսႽ ᥲᥒ⚪⟊Ⴙᘓᖇ Ꮅᘓᖇ⎱ Ⴙᥲ𝇋ƙᘓᖇ

      That's more XPath wizardry than I'd normally manage. FWIW, here's the translation of the above to plain old XML::LibXML:

      use 5.010; use XML::LibXML 1.70; my $xml = XML::LibXML->load_xml(location => "1.xml"); say $xml->findnodes(q{ //doc/str[@name="docuid" and not(text() < //doc/str[@name="docuid" +]/text())] });
      perl -E'sub Monkey::do{say$_,for@_,do{($monkey=[caller(0)]->[3])=~s{::}{ }and$monkey}}"Monkey say"->Monkey::do'
Re: Finding max value from a unique tag from XML
by vagabonding electron (Hermit) on Nov 24, 2012 at 16:35 UTC
    My 1 cent using XML::Rules (used as an exercise for myself). The xml is repaired by remiah.
    #!/usr/bin/perl use strict; use warnings; use XML::Rules; my $xml = <<'XML'; <all> <doc> <date name="processingtime">2011-04-09T11:12:22.049Z</date> <str name="docuid">121422</str> <str name="title">ABC</str> </doc> <doc> <date name="processingtime">2012-04-09T11:12:22.049Z</date> <str name="docuid">13427</str> <str name="title">CDE</str> </doc> <doc> <date name="processingtime">2010-04-09T11:12:22.049Z</date> <str name="docuid">89822</str> <str name="title">LKK</str> </doc> </all> XML my @rules = ( 'str' => 'as array', 'date' => 'as is', 'doc' => 'as array no content', 'all' => 'no content' ); my $parser = XML::Rules->new(rules => \@rules); my $data = $parser->parse( $xml ); my $max_value = 0; for my $chunk ( @{ $data->{all}{doc} } ) { for my $str ( @{ $chunk->{str} } ) { $str->{name} eq 'docuid' and $str->{_content} > $max_value and $max_value = $str->{_content}; } } print "The max value is: $max_value\n";

      If all you want from the document is the maximal docuid, you can set the rules to give you exactly that:

      #!/usr/bin/perl use strict; use warnings; no warnings qw(uninitialized); use XML::Rules; my $xml = <<'XML'; <all> <doc> <date name="processingtime">2011-04-09T11:12:22.049Z</date> <str name="docuid">121422</str> <str name="title">ABC</str> </doc> <doc> <date name="processingtime">2012-04-09T11:12:22.049Z</date> <str name="docuid">13427</str> <str name="title">CDE</str> </doc> <doc> <date name="processingtime">2010-04-09T11:12:22.049Z</date> <str name="docuid">89822</str> <str name="title">LKK</str> </doc> </all> XML my @rules = ( 'str' => sub { return unless $_[1]->{name} eq 'docuid'; my $id = $_[1]->{_content}; $_[4]->{pad} = $id if ($id > $_[4]->{pad}); return; }, 'all' => sub { return $_[4]->{pad}; } ); my $parser = XML::Rules->new(rules => \@rules); my $max_value = $parser->parse( $xml ); print "The max value is: $max_value\n";

      This assumes that you want the maximal value from any <str> tag with attribute name="docuid" as it doesn't check the "path" to the <str> tag!

      Update: With version 1.16 and later it's easy to give the specific parser a more readable interface:

      use XML::Rules max_docuid => { method => 'parse', rules => { 'str' => sub { return unless $_[1]->{name} eq 'docuid'; my $id = $_[1]->{_content}; $_[4]->{pad} = $id if ($id > $_[4]->{pad}); return; }, 'all' => sub { return $_[4]->{pad}; } } }; #... print "The max value is: " . max_docuid($xml) . "\n";

      Jenda
      Enoch was right!
      Enjoy the last years of Rome.

        Thank you Jenda!
        Could you please point me to the documentation of $_[4] approach?

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlquestion [id://1005101]
Front-paged by Arunbear
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others having an uproarious good time at the Monastery: (6)
As of 2015-07-07 08:13 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (87 votes), past polls