Beefy Boxes and Bandwidth Generously Provided by pair Networks DiBona
XP is just a number
 
PerlMonks  

recursive alternative for nested array to JSON script?

by davepilbeam (Initiate)
on Sep 29, 2011 at 15:57 UTC ( #928605=perlquestion: print w/ replies, xml ) Need Help??
davepilbeam has asked for the wisdom of the Perl Monks concerning the following question:

Hi, I'm converting a set of html list tags to output a JSON string. The ugly, painfully-derived code below will produce the required data structure, but only if hard-coded to the correct depth.
It's well beyond my abilities to get this conversion to work recursively but, starting with the @path data below, how would the experts approach this?
1/ Html list example (from nested menu structure):

<li><a href="News-and-Media.html">News and Media</a></li> <li><a href="Partners.html">Partners</a> <ul> <li><a href="Partners_Subpage1.html">Subpage1</a> <ul> <li><a href="Partners_Subpage1_Page1.html">Page1</a> <ul> <li><a href="Partners_Subpage1_Page1_First.html">First</a></li> <li><a href="Partners_Subpage1_Page1_Second.html">Second</a></li> </ul> </li> <li><a href="Partners_Subpage1_Page2.html">Page2</a></li> <li><a href="Partners_Subpage1_Page3.html">Page3</a></li> </ul> </li> <li><a href="Partners_Subpage2.html">Subpage1</a> <ul> <li><a href="Partners_Subpage2_Page1.html">Page1</a> <ul> <li><a href="Partners_Subpage2_Page1_First2.html">First</a></li> <li><a href="Partners_Subpage2_Page1_Second2.html">Second</a></li> </ul> </li> </ul> </li> </ul> </li> etc etc


2/ Required result - JSON format, TITLE: {URL:PROPERTY} nested objects

{ "Site-Map.html":{ "Home":{ "Home.html":"" }, "About":{ "About.html":"" }, "Products":{ "Products.html":"" }, "Solutions":{ "Solutions.html":"" }, "Services":{ "Services.html":"pass","Subpage1":{"Services_Subpage1.htm +l":"" },"Subpage2":{"Services_Subpage2.html":"" } }, "Support":{ "Support.html":"pass" }, "News-and-Media":{ "News-and-Media.html":"" }, "Partners":{ "Partners.html":"","Subpage1":{ "Partners_Subpage1.html": +"","Page1":{ "Partners_Subpage1_Page1.html":"","First":{ "Partners_Su +bpage1_Page1_First.html":"" },"Second":{ "Partners_Subpage1_Page1_Sec +ond.html":"" } },"Page2":{ "Partners_Subpage1_Page2.html":"" },"Page3 +":{ "Partners_Subpage1_Page3.html":"" } },"Subpage2":{ "Partners_Subp +age2.html":"","Page1":{ "Partners_Subpage2_Page1.html":"","First2":{ +"Partners_Subpage2_Page1_First2.html":"" },"Second2":{ "Partners_Subp +age2_Page1_Second2.html":"" } } } }, "Blog2":{ "http://www.etc.uk/index.html":"" }, "Contact-Us":{ "Contact-Us.html":"" }, "Privacy":{ "Privacy.html":"hid" }, "Terms":{ "Terms.html":"hid" }, "Site-Map":{ "Site-Map.html":"hid" } } }


3/ Existing code:

use strict; use warnings; my %PAGES = (); my @path = ( ###get list input from html file into array [["Home"],"\"Home.html\":\"\""], [["About"],"\"About.html\":\"\""], [["Products"],"\"Products.html\":\"\""], [["Solutions"],"\"Solutions.html\":\"\""], [["Services"],"\"Services.html\":\"pass\""], [["Services","Subpage1"],"\"Services_Subpage1.html\":\"\""], [["Services","Subpage2"],"\"Services_Subpage2.html\":\"\""], [["Support"],"\"Support.html\":\"pass\""], [["News-and-Media"],"\"News-and-Media.html\":\"\""], [["Partners"],"\"Partners.html\":\"\""], [["Partners","Subpage1"],"\"Partners_Subpage1.html\":\"\""], [["Partners","Subpage1","Page1"],"\"Partners_Subpage1_Page1.html\":\"\ +""], [["Partners","Subpage1","Page1","First"],"\"Partners_Subpage1_Page1_Fi +rst.html\":\"\""], [["Partners","Subpage1","Page1","Second"],"\"Partners_Subpage1_Page1_S +econd.html\":\"\""], [["Partners","Subpage1","Page2"],"\"Partners_Subpage1_Page2.html\":\"\ +""], [["Partners","Subpage1","Page3"],"\"Partners_Subpage1_Page3.html\":\"\ +""], [["Partners","Subpage2"],"\"Partners_Subpage2.html\":\"\""], [["Partners","Subpage2","Page1"],"\"Partners_Subpage2_Page1.html\":\"\ +""], [["Partners","Subpage2","Page1","First2"],"\"Partners_Subpage2_Page1_F +irst2.html\":\"\""], [["Partners","Subpage2","Page1","Second2"],"\"Partners_Subpage2_Page1_ +Second2.html\":\"\""], [["Blog2"],"\"http://www.etc.uk/index.html\":\"\""], [["Contact-Us"],"\"Contact-Us.html\":\"\""], [["Privacy"],"\"Privacy.html\":\"hid\""], [["Terms"],"\"Terms.html\":\"hid\""], [["Site-Map"],"\"Site-Map.html\":\"hid\""] ); my $out = "{ \"Site-Map.html\":{ "; my %PAGES = (); my @SET = (); my @temp = (); my $c = 0; for my $i(0..$#path){ if( !$PAGES{$path[$i][0][0]} ){$PAGES{$path[$i][0][0]} = [$i];} if( $#{$path[$i][0]} == 0 ){ push @{ $PAGES{$path[$i][0][0]} },"\"$path[$i][0][0]\":{ ".$path[$i][1 +]; #%hh } elsif( $#{$path[$i][0]} == 1 ){ $PAGES{$path[$i][0][0]}[2]{$path[$i][0][1]} = [ $i,"\"$path[$i][0][1]\ +":{ ".$path[$i][1] ]; } elsif( $#{$path[$i][0]} == 2 ){ $PAGES{$path[$i][0][0]}[2]{$path[$i][0][1]}[2]{$path[$i][0][2]} = [ $i +,"\"$path[$i][0][2]\":{ ".$path[$i][1] ]; } elsif( $#{$path[$i][0]} == 3 ){ $PAGES{$path[$i][0][0]}[2]{$path[$i][0][1]}[2]{$path[$i][0][2]}[2]{$pa +th[$i][0][3]} = [ $i,"\"$path[$i][0][3]\":{ ".$path[$i][1] ]; } else { #descend to madness } } foreach my $s( sort { $PAGES{$a}[0] <=> $PAGES{$b}[0] } keys %PAGES ){ $out.= $PAGES{$s}[1]; if( $PAGES{$s}[2] ){ my @s1 = (); foreach my $t( sort { $PAGES{$s}[2]{$a}[0] <=> $PAGES{$s}[2]{$b}[0] } +keys %{ $PAGES{$s}[2] } ){ push @s1,$PAGES{$s}[2]{$t}[1]; if( $PAGES{$s}[2]{$t}[2] ){ my @s2 = (); my $o2 = ""; foreach my $u( sort { $PAGES{$s}[2]{$t}[2]{$a}[0] <=> $PAGES{$s}[2]{$t +}[2]{$b}[0] } keys %{ $PAGES{$s}[2]{$t}[2] } ){ push @s2,$PAGES{$s}[2]{$t}[2]{$u}[1]; if( $PAGES{$s}[2]{$t}[2]{$u}[2] ){ my @s3 = (); my $o3 = ""; foreach my $v( sort { $PAGES{$s}[2]{$t}[2]{$u}[2]{$a}[0] <=> $PAGES{$s +}[2]{$t}[2]{$u}[2]{$b}[0] } keys %{ $PAGES{$s}[2]{$t}[2]{$u}[2] } ){ push @s3,$PAGES{$s}[2]{$t}[2]{$u}[2]{$v}[1]; } if($s3[0]){$o3 = join " },",@s3;$o3.= " }";$s2[-1].= ",".$o3;} } } if($s2[0]){$o2 = join " },",@s2;$o2.= " }";$s1[-1].= ",".$o2;} } } if($s1[0]){$out.= ",".(join " },",@s1)." } },";} } else { $out.= " },"; } } $out =~ s/,$//; $out.= " } }"; print $out;

Comment on recursive alternative for nested array to JSON script?
Select or Download Code
Re: recursive alternative for nested array to JSON script?
by Anonymous Monk on Sep 29, 2011 at 16:10 UTC
    I put your JSON document into JSON::decode_json and it results in this Perl data structure:
    { "Site-Map.html" => { About => {"About.html" => ''}, Blog2 => {"http://www.etc.uk/index.html" => ''}, "Contact-Us" => {"Contact-Us.html" => ''}, Home => {"Home.html" => ''}, "News-and-Media" => {"News-and-Media.html" => ''}, Partners => { "Partners.html" => '', Subpage1 => { Page1 => { First => {"Partners_Subpa +ge1_Page1_First.html" => ''}, "Partners_Subpage1_Page1.html" => '', Second => {"Partners_Subpa +ge1_Page1_Second.html" => ''} }, Page2 => {"Partners_Subpage1_Page2. +html" => ''}, Page3 => {"Partners_Subpage1_Page3. +html" => ''}, "Partners_Subpage1.html" => '' }, Subpage2 => { Page1 => { First2 => {"Partners_Subpa +ge2_Page1_First2.html" => ''}, "Partners_Subpage2_Page1.html" => '', Second2 => {"Partners_Subpa +ge2_Page1_Second2.html" => ''} }, "Partners_Subpage2.html" => '' } }, Privacy => {"Privacy.html" => 'hid'}, Products => {"Products.html" => ''}, Services => { "Services.html" => 'pass', Subpage1 => {"Services_Subpage1.html" => ''}, Subpage2 => {"Services_Subpage2.html" => ''} }, "Site-Map" => {"Site-Map.html" => 'hid'}, Solutions => {"Solutions.html" => ''}, Support => {"Support.html" => 'pass'}, Terms => {"Terms.html" => 'hid'} } }
Re: recursive alternative for nested array to JSON script?
by Anonymous Monk on Sep 29, 2011 at 23:35 UTC

    how would the experts approach this?

    Go to cpan and look for JSON

      well yes, I currently use JSON to do a similar recursive directory list output, where I don't need to retain the order of the pages:

      use strict; use warnings; use JSON; sub rec_tree { my $n = $_[0] = {}; my @s; find( sub { $n = (pop @s)->[1] while @s and $File::Find::dir ne $s[-1][0]; return $n->{$_} = data_me($_) if -f; /$bandir/ and $File::Find::prune = 1; /$hidedir/ and $File::Find::prune = 1; push @s,[ $File::Find::name,$n ]; $n = $n->{$_} = {}; }, $_[1]); $_[0]{$_[1]} = delete $_[0]{'.'}; } my $out; rec_tree($out,shift); my $jsout = JSON->new->allow_nonref->encode($out);


      but as I mentioned, a similar recursive approach to parsing the @path data above defeats me, hence the hard coding, which at least retains the correct order in the output..

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: perlquestion [id://928605]
Approved by keszler
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others avoiding work at the Monastery: (10)
As of 2014-04-16 23:43 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    April first is:







    Results (436 votes), past polls