Beefy Boxes and Bandwidth Generously Provided by pair Networks
Think about Loose Coupling
 
PerlMonks  

Re: LibXml - Convert XML to CSV from API Feed

by Anonymous Monk
on Apr 10, 2013 at 11:03 UTC ( #1027948=note: print w/ replies, xml ) Need Help??


in reply to LibXml - Convert XML to CSV from API Feed

A twiggy starft

#!/usr/bin/perl -- use strict; use warnings; use autodie; use utf8; use XML::Twig; use Data::Dump qw/ dd pp /; use Text::CSV; Main( @ARGV ); exit( 0 ); sub Main { #~ parseJmeter( \*DATA, \*STDOUT ); # DEMO my( $in1, $in2 ) = myData(); #~ my $csv = Text::CSV->new ( { auto_diag => 1, allow_loose_quotes + => 1, allow_loose_escapes => 1,} ); #~ dd parseJmeter( $$in1, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter2( $$in1, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter2( $$in2, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter( $$in1, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter3( $$in1, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter3( $$in2, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; dd parseJmeter4( $$in1, \*STDOUT, [] ); print "\n", '#' x 33, "\n"; dd parseJmeter4( $$in2, \*STDOUT, ['WTDate'] ); print "\n", '#' x 33, "\n"; } ## end sub Main sub parseJmeter4 { my( $inFilenameOrHandle, $outHandle, $names_init ) = @_; my %stuff; my @dimension_names = ( @$names_init ); my $name_index = 0; my $t = XML::Twig->new( twig_handlers => { q{/DimensionalReport/ReportDefinition/list[@name="Dimensions"]/Dimensi +on/string[@name="name"]} => sub { warn $_->path, "\n"; push @dimension_names, $_->trimmed_text; $stuff{ $dimension_names[-1] } ||= 'null'; ## initi +alize }, q{/DimensionalReport/list[@name="data"]/DataRow} => sub { warn $_->path, "\n"; my $tp_name = $dimension_names[ $name_index++ ]; $stuff{$tp_name} = $_->att( 'name' ); }, q{//list/[@name="SubRows"]/DataRow} => sub { warn $_->path, "\n"; my $tp_name = $dimension_names[ $name_index++ ]; $stuff{$tp_name} = $_->att( 'name' ); }, q{//list[@name="measures"]/float} => sub { warn $_->path, "\n"; push @dimension_names, $_->att( 'name' ); $stuff{ $_->att( 'name' ) } = $_->trimmed_text; }, } ); $t->xparse( $inFilenameOrHandle ); $t->purge; length $_ or $_ = "null" for values %stuff; ## init not enough warn pp [ map { [ $_ => $stuff{$_} ] } @dimension_names ]; return \%stuff, \@dimension_names; } ## end sub parseJmeter4 sub parseJmeter3 { my( $inFilenameOrHandle, $outHandle ) = @_; my %stuff; my @dimension_names = ( 'WTDate' ); my $name_index = 0; my $t = XML::Twig->new( twig_handlers => { q{/DimensionalReport/ReportDefinition/list[@name="Dimensions"]/Dimensi +on/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; push @dimension_names, $_->trimmed_text; $stuff{ $dimension_names[-1] } ||= 'null'; ## initi +alize }, q{/DimensionalReport/list[@name="data"]/DataRow} => sub { warn $_->path; ## child +ren my $tp_name = $dimension_names[ $name_index++ ]; print $tp_name , ' = ', $_->att( 'name' ), "\n"; $stuff{$tp_name} = $_->att( 'name' ); }, q{//list/[@name="SubRows"]/DataRow} => sub { warn $_->path; ## child +ren my $tp_name = $dimension_names[ $name_index++ ]; print $tp_name , ' = ', $_->att( 'name' ), "\n"; $stuff{$tp_name} = $_->att( 'name' ); }, q{//list[@name="measures"]/float} => sub { warn $_->path; ## child +ren print $_->att( 'name' ), ' = ', $_->trimmed_text, "\n" +; push @dimension_names, $_->att( 'name' ); $stuff{ $_->att( 'name' ) } = $_->trimmed_text; }, } ); $t->xparse( $inFilenameOrHandle ); $t->purge; $stuff{giveup_input_data_too_cryptic} = 11_000; length $_ or $_ = "null" for values %stuff; ## init not enough dd [ map { [ $_ => $stuff{$_} ] } @dimension_names ]; return \%stuff; } ## end sub parseJmeter3 sub parseJmeter2 { my( $inFilenameOrHandle, $outHandle ) = @_; my %stuff; my $toilet_paper_name_index = 0; my @toilet_paper_name; my $t = XML::Twig->new( twig_handlers => { #~ '/DimensionalReport/ReportDefinition/list/Dimension' => sub + { q{//Dimension/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; push @toilet_paper_name, $_->trimmed_text; }, #~ '/DimensionalReport/ReportDefinition/list/Measure' => sub { q{//Measure/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; }, q{//list/[@name="SubRows"]/DataRow/list[@name="measures"]/ +float} => sub { warn $_->path; ## children print $_->att( 'name' ), ' = ', $_->trimmed_text, "\n" +; $stuff{ $_->att( 'name' ) } = $_->trimmed_text; return; }, #~ q{/DimensionalReport/list[@name="data"]/DataRow/list[@name= +"data"]/DataRow[@name]} => sub { #~ q{//list[@name="data"]/DataRow} => sub { #~ q{//list[@name]/DataRow} => sub { q{//list/[@name="SubRows"]/DataRow} => sub { warn $_->path; ## children my $tp_name = $toilet_paper_name[ $toilet_paper_name_i +ndex++ ]; print $tp_name , ' = ', $_->att( 'name' ), "\n"; $stuff{$tp_name} = $_->att( 'name' ); }, }, ); $t->xparse( $inFilenameOrHandle ); $t->purge; return \%stuff; } ## end sub parseJmeter2 sub parseJmeter { my( $inFilenameOrHandle, $outHandle ) = @_; my %stuff; my $toilet_paper_name = ''; my $t = XML::Twig->new( twig_handlers => { #~ '/DimensionalReport/ReportDefinition/list/Dimension' => sub + { #~ '//Dimension/string' => sub { #~ warn $_->path; #~ print $_->att('name'), '=', $_->trimmed_text, "\n"; #~ }, q{//Dimension/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; $toilet_paper_name = $_->trimmed_text; }, #~ '/DimensionalReport/ReportDefinition/list/Measure' => sub { #~ '//Measure/string' => sub { q{//Measure/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; }, q{//list[@name="measures"]/float} => sub { warn $_->path; ## children print $_->att( 'name' ), ' = ', $_->trimmed_text, "\n" +; $stuff{ $_->att( 'name' ) } = $_->trimmed_text; return; }, #~ q{/DimensionalReport/list[@name="data"]/DataRow/list[@name= +"data"]/DataRow[@name]} => sub { #~ q{//list[@name="data"]/DataRow} => sub { #~ q{//list[@name]/DataRow} => sub { q{//list/list[@name]/DataRow} => sub { warn $_->path; ## children print $toilet_paper_name , ' = ', $_->att( 'name' ), " +\n"; $stuff{$toilet_paper_name} = $_->att( 'name' ); }, }, ); $t->xparse( $inFilenameOrHandle ); $t->purge; dd \%stuff; return \%stuff; } ## end sub parseJmeter sub myData { #~ https://ezcrypt.it/jk6n#4qRm2gc3F7f0RnOMqL5bPaYl my $in1 = <<'__IN1__'; <?xml version="1.0"?> <DimensionalReport> <ReportDefinition> <decimal name="accountID">20510</decimal> <string name="profileID">zJt6RUMjXg6</string> <string name="ID">VSlaqtDP0P6</string> <string name="name">Key Metrics</string> <string name="Description">A10_RPT_Key_Metrics_Summary_SD</string> <string name="language"/> <list name="Dimensions"> <Dimension> <string name="ID">timeperiod</string> <string name="name">Time Period</string> </Dimension> </list> <list name="measures"> <Measure> <string name="name">Active Visits</string> <string name="ID">Visits</string> <decimal name="columnID">0</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Page Views</string> <string name="ID">CAoNYspmFb5</string> <decimal name="columnID">1</decimal> <string name="measureFormatType"/> </Measure> </list> </ReportDefinition> <list name="data"> <DataRow name="2013-01-01"> <list name="attributes"/> <list name="measures"/> <list name="SubRows"> <list name="SubRows"> <DataRow name="1/1/2013"> <list name="attributes"/> <list name="measures"> <float name="Active Visits">9609.00</float> <float name="Page Views">36456.00</float> <float name="Clickthroughs">604.00</float> <float name="Daily Visitors">9265.00</float> <float name="Weekly Visitors">8948.00</float> <float name="Monthly Visitors">9265.00</float> <float name="Quarterly Visitors">9265.00</float> <float name="Yearly Visitors">9265.00</float> <float name="Single Page View Visits">4444.00</float> <float name="Entry Page Visits">9608.00</float> <float name="Bounce Rate">46.25</float> </list> </DataRow> </list> </list> </DataRow> </list> </DimensionalReport> __IN1__ #~ https://ezcrypt.it/kk6n#eVINakVb1teCRmxKvdbtirFi my $in2 = <<'__IN2__'; <?xml version="1.0"?> <DimensionalReport> <ReportDefinition> <decimal name="accountID">some_id</decimal> <string name="profileID">some_id</string> <string name="ID">some_id</string> <string name="name">Campaigns by DMA</string> <string name="Description">This report shows campaign activity origi +nating from different Designated Marketing Areas (DMAs) over the repo +rt time period. All activities are tracked and attributed to the las +t campaign to which visitors responded, even if this most recent camp +aign was seen prior to the current visit.</string> <string name="language"/> <list name="Dimensions"> <Dimension> <string name="ID">dma</string> <string name="name">DMA</string> </Dimension> <Dimension> <string name="ID">5oTnPv0snj5</string> <string name="name">Most Recent Campaign Demand Channel</string> </Dimension> <Dimension> <string name="ID">KuU7oeAsnj5</string> <string name="name">Most Recent Campaign Partner</string> </Dimension> <Dimension> <string name="ID">BlpOxH5snj5</string> <string name="name">Most Recent Campaign Marketing Program</stri +ng> </Dimension> <Dimension> <string name="ID">Q2bjod3snj5</string> <string name="name">Most Recent Campaign Marketing Activity</str +ing> </Dimension> <Dimension> <string name="ID">oda6Rji1Oj5</string> <string name="name">Most Recent Campaign Description</string> </Dimension> <Dimension> <string name="ID">9STq0Y0Snj5</string> <string name="name">Most Recent Campaign ID</string> </Dimension> </list> <list name="measures"> <Measure> <string name="name">Visits</string> <string name="ID">Visits</string> <decimal name="columnID">0</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Page Views</string> <string name="ID">CAoNYspmFb5</string> <decimal name="columnID">1</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Clickthroughs</string> <string name="ID">6wSvdu0AOj5</string> <decimal name="columnID">2</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Orders</string> <string name="ID">lEo2K44n7l5</string> <decimal name="columnID">3</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Revenue</string> <string name="ID">qMKL354n7l5</string> <decimal name="columnID">4</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Average Revenue per Order</string> <string name="ID">qMKL354n7l5_Average</string> <decimal name="columnID">5</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Units</string> <string name="ID">pflHFpFmEL5</string> <decimal name="columnID">6</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Average Units per Order</string> <string name="ID">pflHFpFmEL5_Average</string> <decimal name="columnID">7</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Average Visit Duration (Minutes)</string> <string name="ID">9VUp0ikQJb5</string> <decimal name="columnID">9</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Average Visit Page Views</string> <string name="ID">RaAkw6wZzB5</string> <decimal name="columnID">10</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Hits</string> <string name="ID">Hits</string> <decimal name="columnID">11</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Daily Campaign Visitors</string> <string name="ID">JaLVXfZw4K5</string> <decimal name="columnID">12</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Weekly Campaign Visitors</string> <string name="ID">N2tZxjIX4K5</string> <decimal name="columnID">13</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Monthly Campaign Visitors</string> <string name="ID">Jdc329JX4K5</string> <decimal name="columnID">14</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Quarterly Campaign Visitors</string> <string name="ID">GozsrIjX4K5</string> <decimal name="columnID">15</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Yearly Campaign Visitors</string> <string name="ID">KrLEUoKX4K5</string> <decimal name="columnID">16</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">New Campaign Visitors</string> <string name="ID">TwdhsVkX4K5</string> <decimal name="columnID">17</decimal> <string name="measureFormatType"/> </Measure> </list> </ReportDefinition> <list name="data"> <DataRow name="2013-01-01"> <list name="attributes"/> <list name="measures"/> <list name="SubRows"> <list name="SubRows"> <DataRow name="500 (Portland-Auburn:ME-NH)"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="958707"> <list name="attributes"/> <list name="measures"> <float name="Visits">4.00</float> <float name="Page Views">4.00</float> <float name="Clickthroughs">4.00</float> <float name="Orders">0.00</float> <float name="Revenue">0.00</float> <float name="Units">0.00</float> <float name="Hits">4.00</float> <float name="Daily Campaign Visitors">2.00</fl +oat> <float name="Weekly Campaign Visitors">0.00</f +loat> <float name="Monthly Campaign Visitors">2.00</ +float> <float name="Quarterly Campaign Visitors">2.00 +</float> <float name="Yearly Campaign Visitors">2.00</f +loat> <float name="New Campaign Visitors">0.00</floa +t> <float name="Average Revenue per Order"/> <float name="Average Units per Order"/> <float name="Average Visit Duration (Minutes)" +>0.00</float> <float name="Average Visit Page Views">1.00</f +loat> </list> </DataRow> </DataRow> </DataRow> </DataRow> </DataRow> </DataRow> </DataRow> </list> </list> </DataRow> </list> </DimensionalReport> __IN2__ s/^\s+//, s/\s+$// for $in1, $in2; return \$in1, \$in2; } ## end sub myData


Comment on Re: LibXml - Convert XML to CSV from API Feed
Download Code
Replies are listed 'Best First'.
Re^2: LibXml - Convert XML to CSV from API Feed
by Devon (Acolyte) on Apr 22, 2013 at 01:58 UTC
    I haven't had a chance to work on this in a while, but I just wanted to say I appreciate the responses.

    I should have time to flesh it out this week and will post back with the final product.

    Thanks again

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://1027948]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others rifling through the Monastery: (8)
As of 2015-07-31 05:06 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (274 votes), past polls