Beefy Boxes and Bandwidth Generously Provided by pair Networks
Don't ask to ask, just ask
 
PerlMonks  

XML::Twig traversing tree and storing in an array

by jccunning (Acolyte)
on Aug 03, 2012 at 19:05 UTC ( #985318=perlquestion: print w/ replies, xml ) Need Help??
jccunning has asked for the wisdom of the Perl Monks concerning the following question:

I seem to be spinning my wheels and coding myself into a corner. I am learning XML::Twig and trying to traverse an xml file and gather all children of root element into array along with all siblings and attributes; separated for each child of root. For example, in the following xml file I would like to place each class in a separate array along with all_members, private_members, public_methods, etc. and all their associated siblings and attributes. I can do it easily with XML::Parser using the Start method which passes my( $expat, $element, %attrs ) = @_; to my subroutine. Would like to do this with XML::Twig but have not mastered its syntactical usage, yet. If someone has already done this, please advise and provide sample.
<myroot> <classes name="Panoply::AccessLogic"> <all_members name="accessLogic" protection="public" scope="Panoply +::AccessLogic" virtualness="non_virtual" /> <all_members name="DBUS" protection="public" scope="Panoply::Acces +sLogic" virtualness="non_virtual" /> <all_members name="DDR_VIA_JTAG" protection="public" scope="Panopl +y::AccessLogic" virtualness="non_virtual" /> <all_members name="DEFAULT" protection="public" scope="Panoply::Ac +cessLogic" virtualness="non_virtual" /> <brief></brief> <detailed> <doc type="text">Models the access logic</doc> </detailed> <includes name="AccessLogic.hpp" local="no" /> <private_members> <members name="m_accessLogic" kind="variable" protection="privat +e" static="no" type="AccessLogicTypes" virtualness="non_virtual"> </members> <members name="m_handledErrors" kind="variable" protection="priv +ate" static="no" type="HandledErrors" virtualness="non_virtual"> </members> </private_members> <public_methods> <members name="accessLogic" const="yes" kind="function" protecti +on="public" static="no" type="AccessLogicTypes" virtualness="non_virt +ual" volatile="no"> <brief></brief> <detailed> <doc type="text">Gets the current access logic for...</doc> </detailed> </members> <members name="AccessLogic" const="no" kind="function" protectio +n="public" static="no" virtualness="non_virtual" volatile="no"> <parameters declaration_name="accessLogic" default_value="DEFA +ULT" type="AccessLogicTypes" /> <parameters declaration_name="handledErrors" default_value="Ha +ndledErrors::ALL" type="const HandledErrors &amp;" /> </members> </public_methods> <public_typedefs> <members name="AccessLogicTypes" kind="enum" protection="public" + static="no" virtualness="non_virtual"> <values name="DEFAULT"> </values> <values name="IO_CF8_CFC"> </values> </members> </public_typedefs> </classes> <classes name="Panoply::Details::ActionBatch"> <brief></brief> <detailed></detailed> <includes name="PlatformActionBatch.hpp" local="no" /> </classes> <files name="panoplydoc.hpp"> <detailed></detailed> </files> <files name="PanoplyVersion.hpp"> <brief></brief> <includes name="string" /> <variables> <members name="LIBRARY_VERSION" initializer="&quot;0.1.9&quot;" +kind="variable" static="no" type="const std::string"> </members> <members name="BUILD_DATE" initializer="&quot;00/00/0000&quot;" +kind="variable" static="no" type="const std::string"> </members> </variables> </files> </myroot>

Comment on XML::Twig traversing tree and storing in an array
Download Code
Re: XML::Twig traversing tree and storing in an array
by Anonymous Monk on Aug 03, 2012 at 19:34 UTC
      From another approach, is it possible to output sections of a tree structure to an array or something that I could use to compare against same section in older xml file. Following is code I am using to output class, file, and namespace information. Example, I am trying to output each class and all its related information into one array (or something) and second class into another array, etc. Is there an equivalent to $class->print such as $class->array.
      #!/bin/perl use strict; use warnings; use XML::Twig; my $twig_handlers = { 'panoply/classes' => \&class_info, 'panoply/files' => \&file_info, 'panoply/namespaces' => \&namespace_info, }; my $twig1; my @xfiles = @ARGV; foreach my $xfile (@xfiles) { $twig1 = new XML::Twig(ignore_elts => { brief => 'discard', detailed + => 'discard', includes => 'discard', included_by => 'discard', reimp +lemented_by => 'discard' }, TwigRoots => {panoply => 1}, TwigHandlers + => $twig_handlers); eval { $twig1->parsefile( $xfile ) }; die "can't parse file: $@" if $@; } sub class_info { my ($twig, $class) = @_; $class->set_pretty_print( 'indented' ); $class->print; } sub file_info { my ($twig, $file) = @_; $file->set_pretty_print( 'indented' ); $file->print; } sub namespace_info { my ($twig, $nsp) = @_; $nsp->set_pretty_print( 'indented' ); $nsp->print; }
      Sample xml file
      <panoply> <classes name="Panoply::AccessLogic"> <all_members name="accessLogic" protection="public"/> <all_members name="DDR_VIA_JTAG" protection="public"/> <brief></brief> <detailed> <doc type="text">Models the access logic</doc> </detailed> <includes name="AccessLogic.hpp" local="no" /> <public_methods> <members name="handledErrors" const="yes" kind="function" protec +tion="public" static="no"> </members> <members name="AccessLogic" const="no" kind="function" protectio +n="public" static="no"> <parameters declaration_name="accessLogic" type="AccessLogicTy +pes" /> <parameters declaration_name="handledErrors" type="const Handl +edErrors &amp;" /> </members> </public_methods> <public_typedefs> <members name="AccessLogicTypes" kind="enum" protection="public" +> <values name="IO_CF8_CFC"> </values> </members> <members name="MJTAG" kind="enumvalue"> </members> </public_typedefs> </classes> <classes name="Panoply::Details::ActionBatch"> <detailed></detailed> <includes name="PlatformActionBatch.hpp" local="no" /> </classes> <classes name="Panoply::Details::ActionResult"> <derived name="Panoply::Details::CPUIDActionResult"/> <includes name="PlatformAction.hpp" local="no" /> </classes> <classes name="Panoply::Details::AddressIndex"> <includes name="Panoply.hpp" local="no" /> </classes> <files name="panoplydoc.hpp"> </files> <files name="PanoplyExports.hpp"> <defines> <members name="NOMINMAX" kind="define" protection="public"> </members> <members name="_SCL_SECURE_NO_WARNINGS" kind="define"> </members> </defines> </files> <namespaces name="AMD"> <namespaces name="AMD::RegisterDef" /> </namespaces> <namespaces name="AMD::RegisterDef"> <classes name="AMD::RegisterDef::BaseDevice" /> <enums> <members name="PermissionLevel" kind="enum" protection="public"> <values name="PERMISSION_PUBLIC" initializer=" 10"> </values> <values name="PERMISSION_NDA" initializer=" 20"> </values> </members> <members name="RegisterType" kind="enum" protection="public"> <values name="REGISTER_PCI" initializer=" 0"> </values> <values name="REGISTER_MSR" initializer=" 1"> </values> </members> </enums> <functions> <members name="Compare" const="no" kind="function" protection="p +ublic"> <parameters declaration_name="first" type="T" /> <parameters declaration_name="second" type="T" /> </members> </functions> <namespaces name="AMD::RegisterDef::Buffalo" /> <variables> <members name="LIBRARY_VERSION"> </members> </variables> </namespaces> <namespaces name="AMD::RegisterDef::Buffalo"> <classes name="AMD::RegisterDef::Buffalo::BaseEntity" /> </namespaces> </panoply>
        Sure, sprint, aka outer_xml
        use strict; use warnings; use XML::Twig; my $twig = XML::Twig->new( pretty_print => 'indented', TwigHandlers => { ro => \&ssprint, sham => \&ssprint, bo => \&ssprint, } ); $twig->xparse('<hi> <ro><yo/></ro> <sham><yo/></sham> <bo><yo/></bo> </hi>' ); sub ssprint { print 'moo', $_->sprint, "\n"; } __END__ moo <ro> <yo/> </ro> moo <sham> <yo/> </sham> moo <bo> <yo/> </bo>

        So whatever it is you're trying to get done, might be written as

        #!/usr/bin/perl -- use strict; use warnings; use XML::Twig; use Data::Dump qw' dd '; Main( @ARGV ); exit( 0 ); sub Main { my %files; my $filename; my $ssprint = sub { # my( $twig, $elt ) = @_; # my( $twig, $_ ) = @_; # $_ is $elt push @{ $files{ $filename }{ $_->path } }, $_->sprint; return; }; my $twig = XML::Twig->new( pretty_print => 'indented', TwigHandlers => { 'panoply/classes' => $ssprint, 'panoply/files' => $ssprint, 'panoply/namespaces' => $ssprint, }, ); for my $file( @_ ){ dd $filename = $file; ## eval { $twig->parsefile( $file ); 1; } or warn "ERROR parsefile($file): $@ "; $twig->purge; } dd \%files; } __END__ "panolpy.985318.xml" "panolpy.985465.xml" { "panolpy.985318.xml" => { "/panoply/classes" => [ "\n <classes name +=\"Panoply::AccessLogic\">\n <all_members name=\"accessLogic\" pro +tection=\"public\" scope=\"Panoply::AccessLogic\" virtualness=\"non_v +irtual\"/>\n <all_members name=\"DBUS\" protection=\"public\" scop +e=\"Panoply::AccessLogic\" virtualness=\"non_virtual\"/>\n <all_me +mbers name=\"DDR_VIA_JTAG\" protection=\"public\" scope=\"Panoply::Ac +cessLogic\" virtualness=\"non_virtual\"/>\n <all_members name=\"DE +FAULT\" protection=\"public\" scope=\"Panoply::AccessLogic\" virtualn +ess=\"non_virtual\"/>\n <brief></brief>\n <detailed>\n <do +c type=\"text\">Models the access logic</doc>\n </detailed>\n < +includes local=\"no\" name=\"AccessLogic.hpp\"/>\n <private_member +s>\n <members kind=\"variable\" name=\"m_accessLogic\" protectio +n=\"private\" static=\"no\" type=\"AccessLogicTypes\" virtualness=\"n +on_virtual\"></members>\n <members kind=\"variable\" name=\"m_ha +ndledErrors\" protection=\"private\" static=\"no\" type=\"HandledErro +rs\" virtualness=\"non_virtual\"></members>\n </private_members>\n + <public_methods>\n <members const=\"yes\" kind=\"function\" +name=\"accessLogic\" protection=\"public\" static=\"no\" type=\"Acces +sLogicTypes\" virtualness=\"non_virtual\" volatile=\"no\">\n < +brief></brief>\n <detailed>\n <doc type=\"text\">Gets + the current access logic for...</doc>\n </detailed>\n </ +members>\n <members const=\"no\" kind=\"function\" name=\"Access +Logic\" protection=\"public\" static=\"no\" virtualness=\"non_virtual +\" volatile=\"no\">\n <parameters declaration_name=\"accessLog +ic\" default_value=\"DEFAULT\" type=\"AccessLogicTypes\"/>\n < +parameters declaration_name=\"handledErrors\" default_value=\"Handled +Errors::ALL\" type=\"const HandledErrors &amp;\"/>\n </members>\ +n </public_methods>\n <public_typedefs>\n <members kind=\" +enum\" name=\"AccessLogicTypes\" protection=\"public\" static=\"no\" +virtualness=\"non_virtual\">\n <values name=\"DEFAULT\"></valu +es>\n <values name=\"IO_CF8_CFC\"></values>\n </members>\ +n </public_typedefs>\n </classes>", "\n <classes name +=\"Panoply::Details::ActionBatch\">\n <brief></brief>\n <detail +ed></detailed>\n <includes local=\"no\" name=\"PlatformActionBatch +.hpp\"/>\n </classes>", ], "/panoply/files" => [ "\n <files name=\ +"panoplydoc.hpp\">\n <detailed></detailed>\n </files>", "\n <files name=\ +"PanoplyVersion.hpp\">\n <brief></brief>\n <includes name=\"str +ing\"/>\n <variables>\n <members initializer=\"&quot;0.1.9&qu +ot;\" kind=\"variable\" name=\"LIBRARY_VERSION\" static=\"no\" type=\ +"const std::string\"></members>\n <members initializer=\"&quot;0 +0/00/0000&quot;\" kind=\"variable\" name=\"BUILD_DATE\" static=\"no\" + type=\"const std::string\"></members>\n </variables>\n </files>" +, ], }, "panolpy.985465.xml" => { "/panoply/classes" => [ "\n <classes n +ame=\"Panoply::AccessLogic\">\n <all_members name=\"accessLogic\" +protection=\"public\"/>\n <all_members name=\"DDR_VIA_JTAG\" prote +ction=\"public\"/>\n <brief></brief>\n <detailed>\n <doc t +ype=\"text\">Models the access logic</doc>\n </detailed>\n <inc +ludes local=\"no\" name=\"AccessLogic.hpp\"/>\n <public_methods>\n + <members const=\"yes\" kind=\"function\" name=\"handledErrors\" + protection=\"public\" static=\"no\"></members>\n <members const +=\"no\" kind=\"function\" name=\"AccessLogic\" protection=\"public\" +static=\"no\">\n <parameters declaration_name=\"accessLogic\" +type=\"AccessLogicTypes\"/>\n <parameters declaration_name=\"h +andledErrors\" type=\"const HandledErrors &amp;\"/>\n </members> +\n </public_methods>\n <public_typedefs>\n <members kind=\ +"enum\" name=\"AccessLogicTypes\" protection=\"public\">\n <va +lues name=\"IO_CF8_CFC\"></values>\n </members>\n <members +kind=\"enumvalue\" name=\"MJTAG\"></members>\n </public_typedefs>\ +n </classes>", "\n <classes n +ame=\"Panoply::Details::ActionBatch\">\n <detailed></detailed>\n + <includes local=\"no\" name=\"PlatformActionBatch.hpp\"/>\n </clas +ses>", "\n <classes n +ame=\"Panoply::Details::ActionResult\">\n <derived name=\"Panoply: +:Details::CPUIDActionResult\"/>\n <includes local=\"no\" name=\"Pl +atformAction.hpp\"/>\n </classes>", "\n <classes n +ame=\"Panoply::Details::AddressIndex\">\n <includes local=\"no\" n +ame=\"Panoply.hpp\"/>\n </classes>", ], "/panoply/files" => [ "\n <files nam +e=\"panoplydoc.hpp\"></files>", "\n <files nam +e=\"PanoplyExports.hpp\">\n <defines>\n <members kind=\"defin +e\" name=\"NOMINMAX\" protection=\"public\"></members>\n <member +s kind=\"define\" name=\"_SCL_SECURE_NO_WARNINGS\"></members>\n </ +defines>\n </files>", ], "/panoply/namespaces" => [ "\n <namespace +s name=\"AMD\">\n <namespaces name=\"AMD::RegisterDef\"/>\n </nam +espaces>", "\n <namespace +s name=\"AMD::RegisterDef\">\n <classes name=\"AMD::RegisterDef::B +aseDevice\"/>\n <enums>\n <members kind=\"enum\" name=\"Permi +ssionLevel\" protection=\"public\">\n <values initializer=\" 1 +0\" name=\"PERMISSION_PUBLIC\"></values>\n <values initializer +=\" 20\" name=\"PERMISSION_NDA\"></values>\n </members>\n < +members kind=\"enum\" name=\"RegisterType\" protection=\"public\">\n + <values initializer=\" 0\" name=\"REGISTER_PCI\"></values>\n + <values initializer=\" 1\" name=\"REGISTER_MSR\"></values>\n + </members>\n </enums>\n <functions>\n <members const=\" +no\" kind=\"function\" name=\"Compare\" protection=\"public\">\n + <parameters declaration_name=\"first\" type=\"T\"/>\n <para +meters declaration_name=\"second\" type=\"T\"/>\n </members>\n + </functions>\n <namespaces name=\"AMD::RegisterDef::Buffalo\"/>\ +n <variables>\n <members name=\"LIBRARY_VERSION\"></members>\ +n </variables>\n </namespaces>", "\n <namespace +s name=\"AMD::RegisterDef::Buffalo\">\n <classes name=\"AMD::Regis +terDef::Buffalo::BaseEntity\"/>\n </namespaces>", ], }, }

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlquestion [id://985318]
Approved by jdtoronto
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others exploiting the Monastery: (15)
As of 2015-07-06 17:22 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (77 votes), past polls