Beefy Boxes and Bandwidth Generously Provided by pair Networks
Keep It Simple, Stupid
 
PerlMonks  

Re: LibXml - Convert XML to CSV from API Feed

by Anonymous Monk
on Apr 10, 2013 at 11:03 UTC ( #1027948=note: print w/ replies, xml ) Need Help??


in reply to LibXml - Convert XML to CSV from API Feed

A twiggy starft

#!/usr/bin/perl -- use strict; use warnings; use autodie; use utf8; use XML::Twig; use Data::Dump qw/ dd pp /; use Text::CSV; Main( @ARGV ); exit( 0 ); sub Main { #~ parseJmeter( \*DATA, \*STDOUT ); # DEMO my( $in1, $in2 ) = myData(); #~ my $csv = Text::CSV->new ( { auto_diag => 1, allow_loose_quotes + => 1, allow_loose_escapes => 1,} ); #~ dd parseJmeter( $$in1, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter2( $$in1, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter2( $$in2, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter( $$in1, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter3( $$in1, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; #~ dd parseJmeter3( $$in2, \*STDOUT ); #~ print "\n", '#' x 33, "\n"; dd parseJmeter4( $$in1, \*STDOUT, [] ); print "\n", '#' x 33, "\n"; dd parseJmeter4( $$in2, \*STDOUT, ['WTDate'] ); print "\n", '#' x 33, "\n"; } ## end sub Main sub parseJmeter4 { my( $inFilenameOrHandle, $outHandle, $names_init ) = @_; my %stuff; my @dimension_names = ( @$names_init ); my $name_index = 0; my $t = XML::Twig->new( twig_handlers => { q{/DimensionalReport/ReportDefinition/list[@name="Dimensions"]/Dimensi +on/string[@name="name"]} => sub { warn $_->path, "\n"; push @dimension_names, $_->trimmed_text; $stuff{ $dimension_names[-1] } ||= 'null'; ## initi +alize }, q{/DimensionalReport/list[@name="data"]/DataRow} => sub { warn $_->path, "\n"; my $tp_name = $dimension_names[ $name_index++ ]; $stuff{$tp_name} = $_->att( 'name' ); }, q{//list/[@name="SubRows"]/DataRow} => sub { warn $_->path, "\n"; my $tp_name = $dimension_names[ $name_index++ ]; $stuff{$tp_name} = $_->att( 'name' ); }, q{//list[@name="measures"]/float} => sub { warn $_->path, "\n"; push @dimension_names, $_->att( 'name' ); $stuff{ $_->att( 'name' ) } = $_->trimmed_text; }, } ); $t->xparse( $inFilenameOrHandle ); $t->purge; length $_ or $_ = "null" for values %stuff; ## init not enough warn pp [ map { [ $_ => $stuff{$_} ] } @dimension_names ]; return \%stuff, \@dimension_names; } ## end sub parseJmeter4 sub parseJmeter3 { my( $inFilenameOrHandle, $outHandle ) = @_; my %stuff; my @dimension_names = ( 'WTDate' ); my $name_index = 0; my $t = XML::Twig->new( twig_handlers => { q{/DimensionalReport/ReportDefinition/list[@name="Dimensions"]/Dimensi +on/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; push @dimension_names, $_->trimmed_text; $stuff{ $dimension_names[-1] } ||= 'null'; ## initi +alize }, q{/DimensionalReport/list[@name="data"]/DataRow} => sub { warn $_->path; ## child +ren my $tp_name = $dimension_names[ $name_index++ ]; print $tp_name , ' = ', $_->att( 'name' ), "\n"; $stuff{$tp_name} = $_->att( 'name' ); }, q{//list/[@name="SubRows"]/DataRow} => sub { warn $_->path; ## child +ren my $tp_name = $dimension_names[ $name_index++ ]; print $tp_name , ' = ', $_->att( 'name' ), "\n"; $stuff{$tp_name} = $_->att( 'name' ); }, q{//list[@name="measures"]/float} => sub { warn $_->path; ## child +ren print $_->att( 'name' ), ' = ', $_->trimmed_text, "\n" +; push @dimension_names, $_->att( 'name' ); $stuff{ $_->att( 'name' ) } = $_->trimmed_text; }, } ); $t->xparse( $inFilenameOrHandle ); $t->purge; $stuff{giveup_input_data_too_cryptic} = 11_000; length $_ or $_ = "null" for values %stuff; ## init not enough dd [ map { [ $_ => $stuff{$_} ] } @dimension_names ]; return \%stuff; } ## end sub parseJmeter3 sub parseJmeter2 { my( $inFilenameOrHandle, $outHandle ) = @_; my %stuff; my $toilet_paper_name_index = 0; my @toilet_paper_name; my $t = XML::Twig->new( twig_handlers => { #~ '/DimensionalReport/ReportDefinition/list/Dimension' => sub + { q{//Dimension/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; push @toilet_paper_name, $_->trimmed_text; }, #~ '/DimensionalReport/ReportDefinition/list/Measure' => sub { q{//Measure/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; }, q{//list/[@name="SubRows"]/DataRow/list[@name="measures"]/ +float} => sub { warn $_->path; ## children print $_->att( 'name' ), ' = ', $_->trimmed_text, "\n" +; $stuff{ $_->att( 'name' ) } = $_->trimmed_text; return; }, #~ q{/DimensionalReport/list[@name="data"]/DataRow/list[@name= +"data"]/DataRow[@name]} => sub { #~ q{//list[@name="data"]/DataRow} => sub { #~ q{//list[@name]/DataRow} => sub { q{//list/[@name="SubRows"]/DataRow} => sub { warn $_->path; ## children my $tp_name = $toilet_paper_name[ $toilet_paper_name_i +ndex++ ]; print $tp_name , ' = ', $_->att( 'name' ), "\n"; $stuff{$tp_name} = $_->att( 'name' ); }, }, ); $t->xparse( $inFilenameOrHandle ); $t->purge; return \%stuff; } ## end sub parseJmeter2 sub parseJmeter { my( $inFilenameOrHandle, $outHandle ) = @_; my %stuff; my $toilet_paper_name = ''; my $t = XML::Twig->new( twig_handlers => { #~ '/DimensionalReport/ReportDefinition/list/Dimension' => sub + { #~ '//Dimension/string' => sub { #~ warn $_->path; #~ print $_->att('name'), '=', $_->trimmed_text, "\n"; #~ }, q{//Dimension/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; $toilet_paper_name = $_->trimmed_text; }, #~ '/DimensionalReport/ReportDefinition/list/Measure' => sub { #~ '//Measure/string' => sub { q{//Measure/string[@name="name"]} => sub { warn $_->path; print $_->trimmed_text, " = \n"; }, q{//list[@name="measures"]/float} => sub { warn $_->path; ## children print $_->att( 'name' ), ' = ', $_->trimmed_text, "\n" +; $stuff{ $_->att( 'name' ) } = $_->trimmed_text; return; }, #~ q{/DimensionalReport/list[@name="data"]/DataRow/list[@name= +"data"]/DataRow[@name]} => sub { #~ q{//list[@name="data"]/DataRow} => sub { #~ q{//list[@name]/DataRow} => sub { q{//list/list[@name]/DataRow} => sub { warn $_->path; ## children print $toilet_paper_name , ' = ', $_->att( 'name' ), " +\n"; $stuff{$toilet_paper_name} = $_->att( 'name' ); }, }, ); $t->xparse( $inFilenameOrHandle ); $t->purge; dd \%stuff; return \%stuff; } ## end sub parseJmeter sub myData { #~ https://ezcrypt.it/jk6n#4qRm2gc3F7f0RnOMqL5bPaYl my $in1 = <<'__IN1__'; <?xml version="1.0"?> <DimensionalReport> <ReportDefinition> <decimal name="accountID">20510</decimal> <string name="profileID">zJt6RUMjXg6</string> <string name="ID">VSlaqtDP0P6</string> <string name="name">Key Metrics</string> <string name="Description">A10_RPT_Key_Metrics_Summary_SD</string> <string name="language"/> <list name="Dimensions"> <Dimension> <string name="ID">timeperiod</string> <string name="name">Time Period</string> </Dimension> </list> <list name="measures"> <Measure> <string name="name">Active Visits</string> <string name="ID">Visits</string> <decimal name="columnID">0</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Page Views</string> <string name="ID">CAoNYspmFb5</string> <decimal name="columnID">1</decimal> <string name="measureFormatType"/> </Measure> </list> </ReportDefinition> <list name="data"> <DataRow name="2013-01-01"> <list name="attributes"/> <list name="measures"/> <list name="SubRows"> <list name="SubRows"> <DataRow name="1/1/2013"> <list name="attributes"/> <list name="measures"> <float name="Active Visits">9609.00</float> <float name="Page Views">36456.00</float> <float name="Clickthroughs">604.00</float> <float name="Daily Visitors">9265.00</float> <float name="Weekly Visitors">8948.00</float> <float name="Monthly Visitors">9265.00</float> <float name="Quarterly Visitors">9265.00</float> <float name="Yearly Visitors">9265.00</float> <float name="Single Page View Visits">4444.00</float> <float name="Entry Page Visits">9608.00</float> <float name="Bounce Rate">46.25</float> </list> </DataRow> </list> </list> </DataRow> </list> </DimensionalReport> __IN1__ #~ https://ezcrypt.it/kk6n#eVINakVb1teCRmxKvdbtirFi my $in2 = <<'__IN2__'; <?xml version="1.0"?> <DimensionalReport> <ReportDefinition> <decimal name="accountID">some_id</decimal> <string name="profileID">some_id</string> <string name="ID">some_id</string> <string name="name">Campaigns by DMA</string> <string name="Description">This report shows campaign activity origi +nating from different Designated Marketing Areas (DMAs) over the repo +rt time period. All activities are tracked and attributed to the las +t campaign to which visitors responded, even if this most recent camp +aign was seen prior to the current visit.</string> <string name="language"/> <list name="Dimensions"> <Dimension> <string name="ID">dma</string> <string name="name">DMA</string> </Dimension> <Dimension> <string name="ID">5oTnPv0snj5</string> <string name="name">Most Recent Campaign Demand Channel</string> </Dimension> <Dimension> <string name="ID">KuU7oeAsnj5</string> <string name="name">Most Recent Campaign Partner</string> </Dimension> <Dimension> <string name="ID">BlpOxH5snj5</string> <string name="name">Most Recent Campaign Marketing Program</stri +ng> </Dimension> <Dimension> <string name="ID">Q2bjod3snj5</string> <string name="name">Most Recent Campaign Marketing Activity</str +ing> </Dimension> <Dimension> <string name="ID">oda6Rji1Oj5</string> <string name="name">Most Recent Campaign Description</string> </Dimension> <Dimension> <string name="ID">9STq0Y0Snj5</string> <string name="name">Most Recent Campaign ID</string> </Dimension> </list> <list name="measures"> <Measure> <string name="name">Visits</string> <string name="ID">Visits</string> <decimal name="columnID">0</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Page Views</string> <string name="ID">CAoNYspmFb5</string> <decimal name="columnID">1</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Clickthroughs</string> <string name="ID">6wSvdu0AOj5</string> <decimal name="columnID">2</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Orders</string> <string name="ID">lEo2K44n7l5</string> <decimal name="columnID">3</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Revenue</string> <string name="ID">qMKL354n7l5</string> <decimal name="columnID">4</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Average Revenue per Order</string> <string name="ID">qMKL354n7l5_Average</string> <decimal name="columnID">5</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Units</string> <string name="ID">pflHFpFmEL5</string> <decimal name="columnID">6</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Average Units per Order</string> <string name="ID">pflHFpFmEL5_Average</string> <decimal name="columnID">7</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Average Visit Duration (Minutes)</string> <string name="ID">9VUp0ikQJb5</string> <decimal name="columnID">9</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Average Visit Page Views</string> <string name="ID">RaAkw6wZzB5</string> <decimal name="columnID">10</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Hits</string> <string name="ID">Hits</string> <decimal name="columnID">11</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Daily Campaign Visitors</string> <string name="ID">JaLVXfZw4K5</string> <decimal name="columnID">12</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Weekly Campaign Visitors</string> <string name="ID">N2tZxjIX4K5</string> <decimal name="columnID">13</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Monthly Campaign Visitors</string> <string name="ID">Jdc329JX4K5</string> <decimal name="columnID">14</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Quarterly Campaign Visitors</string> <string name="ID">GozsrIjX4K5</string> <decimal name="columnID">15</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">Yearly Campaign Visitors</string> <string name="ID">KrLEUoKX4K5</string> <decimal name="columnID">16</decimal> <string name="measureFormatType"/> </Measure> <Measure> <string name="name">New Campaign Visitors</string> <string name="ID">TwdhsVkX4K5</string> <decimal name="columnID">17</decimal> <string name="measureFormatType"/> </Measure> </list> </ReportDefinition> <list name="data"> <DataRow name="2013-01-01"> <list name="attributes"/> <list name="measures"/> <list name="SubRows"> <list name="SubRows"> <DataRow name="500 (Portland-Auburn:ME-NH)"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="None"> <list name="attributes"/> <list name="measures"/> <DataRow name="958707"> <list name="attributes"/> <list name="measures"> <float name="Visits">4.00</float> <float name="Page Views">4.00</float> <float name="Clickthroughs">4.00</float> <float name="Orders">0.00</float> <float name="Revenue">0.00</float> <float name="Units">0.00</float> <float name="Hits">4.00</float> <float name="Daily Campaign Visitors">2.00</fl +oat> <float name="Weekly Campaign Visitors">0.00</f +loat> <float name="Monthly Campaign Visitors">2.00</ +float> <float name="Quarterly Campaign Visitors">2.00 +</float> <float name="Yearly Campaign Visitors">2.00</f +loat> <float name="New Campaign Visitors">0.00</floa +t> <float name="Average Revenue per Order"/> <float name="Average Units per Order"/> <float name="Average Visit Duration (Minutes)" +>0.00</float> <float name="Average Visit Page Views">1.00</f +loat> </list> </DataRow> </DataRow> </DataRow> </DataRow> </DataRow> </DataRow> </DataRow> </list> </list> </DataRow> </list> </DimensionalReport> __IN2__ s/^\s+//, s/\s+$// for $in1, $in2; return \$in1, \$in2; } ## end sub myData


Comment on Re: LibXml - Convert XML to CSV from API Feed
Download Code
Re^2: LibXml - Convert XML to CSV from API Feed
by Devon (Acolyte) on Apr 22, 2013 at 01:58 UTC
    I haven't had a chance to work on this in a while, but I just wanted to say I appreciate the responses.

    I should have time to flesh it out this week and will post back with the final product.

    Thanks again

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://1027948]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others taking refuge in the Monastery: (13)
As of 2014-12-18 13:33 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (51 votes), past polls