Beefy Boxes and Bandwidth Generously Provided by pair Networks
more useful options
 
PerlMonks  

Nested XML Question

by zelet (Initiate)
on Jun 25, 2013 at 01:29 UTC ( #1040510=perlquestion: print w/ replies, xml ) Need Help??
zelet has asked for the wisdom of the Perl Monks concerning the following question:

I haven't written a line in Perl in 10 years but I was asked to turn this:
<?xml version="1.0" encoding="UTF-8"?> <sub-group-tree> <host-group host-baselines="true" suppress-excluded-services="true +" inverse-suppression="false" host-trap="false" name="inside-hosts"> <host-group id="65534" name="Catch All" host-baselines="true" +suppress-excluded-services="true" inverse-suppression="false" host-tr +ap="false"> <ip-address-ranges>10.0.0.0/8</ip-address-ranges> <ip-address-ranges>192.168.0.0/16</ip-address-ranges> </host-group> <host-group id="23" name="By Function" host-baselines="true" s +uppress-excluded-services="true" inverse-suppression="false" host-tra +p="false"> <host-group id="61" name="B2B" host-baselines="true" suppr +ess-excluded-services="true" inverse-suppression="false" host-trap="f +alse"> <ip-address-ranges>192.168.0.0/16, 192.167.0.0/16, 192 +.166.0.0/16</ip-address-ranges> </host-group> <host-group id="24" name="DMZ" host-baselines="true" suppr +ess-excluded-services="true" inverse-suppression="false" host-trap="f +alse"> <ip-address-ranges>192.168.0.1/16, 192.167.0.1/16, 192 +.166.0.1/16</ip-address-ranges> </host-group> <host-group id="139" name="Guest and Mobile Networks" host +-baselines="true" suppress-excluded-services="true" inverse-suppressi +on="false" host-trap="false"> <host-group id="55" name="CAG_Mobile" host-baselines=" +true" suppress-excluded-services="true" inverse-suppression="false" h +ost-trap="false"> <ip-address-ranges>10.126.0.0/16, 10.127.0.0/16, 1 +0.128.0.0/16</ip-address-ranges> </host-group> <host-group id="59" name="Guest Catch-All" host-baseli +nes="true" suppress-excluded-services="true" inverse-suppression="fal +se" host-trap="false"> <ip-address-ranges>10.126.0.0/16, 10.127.0.0/16</i +p-address-ranges> </host-group> <host-group id="45" name="Guest Wireless Networks" hos +t-baselines="false" suppress-excluded-services="true" inverse-suppres +sion="false" host-trap="false"> <ip-address-ranges>192.168.0.0/16, 192.167.0.0/16, + 192.166.0.0/16</ip-address-ranges> </host-group> </host-group> <host-group id="35" name="Infrastructure" host-baselines=" +true" suppress-excluded-services="true" inverse-suppression="false" h +ost-trap="false"> <host-group id="8" name="Firewalls" host-baselines="tr +ue" suppress-excluded-services="true" inverse-suppression="false" hos +t-trap="false"> <ip-address-ranges>192.168.0.1/16</ip-address-rang +es> <ip-address-ranges>192.168.0.2/16</ip-address-rang +es> </host-group> <host-group id="11" name="NAT Gateway" host-baselines= +"true" suppress-excluded-services="true" inverse-suppression="false" +host-trap="false" /> <host-group id="39" name="Network Devices" host-baseli +nes="true" suppress-excluded-services="true" inverse-suppression="fal +se" host-trap="false"> <ip-address-ranges>192.168.0.1/16</ip-address-rang +es> <ip-address-ranges>192.168.0.2/16</ip-address-rang +es> <ip-address-ranges>192.168.0.1/16</ip-address-rang +es> <ip-address-ranges>192.168.0.2/16</ip-address-rang +es> <ip-address-ranges>192.168.0.1/16</ip-address-rang +es> <ip-address-ranges>192.168.0.2/16</ip-address-rang +es> </host-group> </host-group> <host-group id="7" name="Internal 3rd Party Managed Device +s" host-baselines="true" suppress-excluded-services="true" inverse-su +ppression="false" host-trap="false" /> <host-group id="6" name="Other" host-baselines="true" supp +ress-excluded-services="true" inverse-suppression="false" host-trap=" +false"> <host-group id="18" name="Broadcast" host-baselines="t +rue" suppress-excluded-services="true" inverse-suppression="false" ho +st-trap="false"> <ip-address-ranges>255.0.0.0/8</ip-address-ranges> </host-group> <host-group id="17" name="Link-Local" host-baselines=" +true" suppress-excluded-services="true" inverse-suppression="false" h +ost-trap="false"> <ip-address-ranges>169.254.0.0/16</ip-address-rang +es> </host-group> <host-group id="19" name="Localhost" host-baselines="t +rue" suppress-excluded-services="true" inverse-suppression="false" ho +st-trap="false"> <ip-address-ranges>127.</ip-address-ranges> </host-group> <host-group id="14" name="Multicast" host-baselines="t +rue" suppress-excluded-services="true" inverse-suppression="false" ho +st-trap="false"> <ip-address-ranges>192.0.0.0/24</ip-address-ranges +> </host-group> </host-group> <host-group id="5" name="Users" host-baselines="false" sup +press-excluded-services="true" inverse-suppression="false" host-trap= +"false"> <host-group id="3" name="Desktops" host-baselines="fal +se" suppress-excluded-services="true" inverse-suppression="false" hos +t-trap="false" /> <host-group id="33" name="Remote VPN IP Pool" host-bas +elines="false" suppress-excluded-services="true" inverse-suppression= +"false" host-trap="false" /> </host-group> <host-group id="12" name="VoIP" host-baselines="false" sup +press-excluded-services="true" inverse-suppression="false" host-trap= +"false"> <host-group id="37" name="Endpoints" host-baselines="t +rue" suppress-excluded-services="true" inverse-suppression="false" ho +st-trap="false" /> <host-group id="13" name="Gateways" host-baselines="tr +ue" suppress-excluded-services="true" inverse-suppression="false" hos +t-trap="false"> <ip-address-ranges>192.168.0.1/16</ip-address-rang +es> </host-group> </host-group> </host-group> <host-group id="43" name="By Location" host-baselines="false" +suppress-excluded-services="false" inverse-suppression="false" host-t +rap="false"> <host-group id="77" name="Amhearst" host-baselines="true" +suppress-excluded-services="true" inverse-suppression="false" host-tr +ap="false"> <ip-address-ranges>10.10.10.1/16</ip-address-ranges> <ip-address-ranges>192.168.0.0/16</ip-address-ranges> </host-group> <host-group id="79" name="Springfield" host-baselines="tru +e" suppress-excluded-services="true" inverse-suppression="false" host +-trap="false"> <ip-address-ranges>192.168.0.1/24</ip-address-ranges> </host-group> </host-group> </host-group> </sub-group-tree>
Into this:
Group.Servers.AllServers 10.100.10.0/24 Group.Servers.Mail 10.100.1.10/32 Group2.Development.Lab 172.16.0.0/22 Group2.Development.Linux 172.16.12.9/32
I've been working on this for 12 hours and have gotten just about nowhere. Could somebody provide me some guidance? Anything to get me going in the right direction? I'm completely swimming. Thanks!

Comment on Nested XML Question
Select or Download Code
Re: Nested XML Question (question_
by Anonymous Monk on Jun 25, 2013 at 01:55 UTC
      Thanks for the quick reply. I was so neck-deep in this stuff all day I didn't explain it well. For example, the first couple lines of the output file (as related to the original XML I posted) would be this:
      inside-hosts inside-hosts.catchall 10.0.0.0/8 inside-hosts.catchall 192.168.0.0/16 inside-hosts.By Function inside-hosts.By Function.B2B 192.168.0.0/16 inside-hosts.By Function.B2B 192.167.0.0/16 inside-hosts.By Function.B2B 192.166.0.0/16 inside-hosts.By Function.DMZ 192.168.0.1/16 ... etc ... inside-hosts.By Function.Infrastructure inside-hosts.By Function.Infrastructure.Firewalls 192.168.0.1/16 inside-hosts.By Function.Infrastructure.Firewalls 192.168.0.2/16
      My biggest issue is figuring out how to loop through the XML and parse out the name and the IP ranges and create the  name.name.name <tab> IP/range structure.
Re: Nested XML Question
by Loops (Hermit) on Jun 25, 2013 at 05:51 UTC

    Think this is a start of what you're looking for

    use XML::Parser; my @prefix; sub start { my ($p, $_, %attr) = @_; push(@prefix,$attr{name}) if defined $attr{name}; push(@prefix,'') if not defined $attr{name}; } sub ips { my ($p, $_) = @_; s/\s//g; return unless @prefix; for my $ip (split(',')) { print join('.',@prefix)," $ip\n"; } } sub end { pop @prefix; } my $parser = new XML::Parser(); $parser->setHandlers(Start => \&start, Default => \&ips, End => \&end) +; $parser->parsefile('xml');
Re: Nested XML Question
by hdb (Parson) on Jun 25, 2013 at 06:04 UTC

    With XML::Simple you have to traverse the structure yourself. Recursion is what you need then.

    use strict; use warnings; use XML::Simple; sub findranges { my $text = shift; my $href = shift; print "$text\n"; $text .= ".".$$href{name} if exists $$href{name}; if( exists $$href{"host-group"} ) { for my $name (keys %{ $$href{"host-group"} } ) { findranges( "$text.$name", $$href{"host-group" +}{$name} ); } } if( exists $$href{"ip-address-ranges"} ) { if( ref( $$href{"ip-address-ranges"} ) ) { print "$text\t$_\n" for @{ $$href{"ip-address- +ranges"} }; } else { print "$text\t".$$href{"ip-address-ranges"}."\ +n"; } } } my $xml = XMLin( 'hosts.xml' ); findranges( "", $$xml{'host-group'} );
Re: Nested XML Question
by Jenda (Abbot) on Jun 25, 2013 at 07:43 UTC

    If you don't mind a little different order then this would work:

    use strict; use warnings; no warnings 'uninitialized'; use XML::Rules; my $parser = XML::Rules->new( stripspaces => 7, rules => { 'ip-address-ranges' => 'content array', 'host-group' => sub { my ($tag, $attr, $parents, $parent_data) = @_; my $name = join '.', ((map $_->{name}, @{$parent_data}[1.. +$#$parent_data]), $attr->{name}); if ($attr->{'ip-address-ranges'} && @{$attr->{'ip-address- +ranges'}}) { foreach my $ip (@{$attr->{'ip-address-ranges'}}) { print "$name\t$ip\n"; } } else { print "$name\n"; } return; } }); $parser->parse(\*DATA); __DATA__ <?xml version="1.0" encoding="UTF-8"?> <sub-group-tree> ...

    If you do mind that the rules would have to be modified to keep the data and you would have to traverse the returned data structure again, because with XML::Rules, the child tags are processed before their parent tags. (Well, there is a way to do something once the opening tag is parsed, but at that time no data from any child tags is accessible so you would not have access to the IP addresses.)

    Jenda
    Enoch was right!
    Enjoy the last years of Rome.

Re: Nested XML Question
by tobyink (Abbot) on Jun 25, 2013 at 09:26 UTC

    This seems to do the job:

    #!/usr/bin/env perl use strict; use warnings; use XML::LibXML 2; my $xml = "XML::LibXML"->load_xml(location => "input.xml"); for my $hg ($xml->findnodes('//host-group')) { my $path = join ".", $hg->findnodes('ancestor-or-self::host-group' +)->map(sub { $_->{name} }); my @ips = $hg->findnodes('ip-address-ranges')->map(sub { split /\ +s*,\s*/, $_->textContent }); print "$path\t$_\n" for @ips; }
    package Cow { use Moo; has name => (is => 'lazy', default => sub { 'Mooington' }) } say Cow->new->name

      That's perfect and solved my issue! Thank you!

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlquestion [id://1040510]
Approved by greengaroo
Front-paged by greengaroo
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others surveying the Monastery: (7)
As of 2014-08-28 01:10 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The best computer themed movie is:











    Results (254 votes), past polls