These last months I spend, from time to time, some effort in creating a script that parses XML to Excel. I want to
all of you guys for the hints/tips/ideas/help etc. I want to put it online to give something back to the community.
Maybe someone can make it faster, less lines, or just use it, study it, change it, whatever you want.
The script uses the first 3 nested tags to get some general information. All the rest is divided into other spreadsheets with main and sub info data. Run it once, it's pretty cool. The code is 'quite dynamic' because there is almost nothing written in the code that is also in the xml file. I only do a check for an id, a number, or an enclosurenumber. If they exist, they are put in the first column of excel.
I use the script to get a nice overview of a storage boxes internal configuration
######################################################################
+#######################################
# Script that parses first 3 xml levels to an excel sheet "general inf
+ormation". All the level4 tags #
# get their own sheet. Every sheet has main information (title and hea
+der) and, if available deeper #
# levels, it also has subinformation on the spreadsheet. This subinfor
+mation is the name of the element #
# With all of the containing tags + values in a title/value layout (li
+ke the main information) #
# The script will parse all of the xml files it finds in your working
+directory. #
######################################################################
+#######################################
#!/usr/bin/perl --
use strict;
use warnings;
use Cwd;
use Win32::OLE;
use XML::Simple;
use Win32::OLE::Const 'Microsoft Excel';
##########################################
# Get all xml files in working directory #
##########################################
my $dir = cwd();
opendir(DIR, $dir) || die "can't opendir $dir: $!";
print "$dir\n";
my @files = grep(/\.xml$/,readdir(DIR));
closedir(DIR);
use Data::Dumper;
local $Data::Dumper::Indent=1;
#print Dumper($doc);
########################
# Ask for confirmation #
########################
my $input = "x";
while ($input ne "y")
{
print "\n\nThis script will try to parse all the xml files in the
+scripts folder. \nThis might take up to 5 minutes depending on the xm
+l file(s) size. \nYou can see what is being parsed at a certain time.
+\n\nDo you want to run the script? [y/n]\n";
$input = <STDIN>;
chomp $input;
}
unless( scalar @files ){
print "\n\nNo Files Found in $dir,\nMake sure your script is in a di
+rectory with the XML files\n\n";
exit;
}
parseFile($_) for @files;
sub writeNonArray {
my ($worksheet, $rowCount, $colCount, $header, $value) = @_;
$worksheet->Cells($rowCount, $colCount)->{Value} = $header;
$worksheet->Cells($rowCount, $colCount)->Borders() -> {Weight} = 2;
$colCount++;
$worksheet->Cells($rowCount, $colCount)->{Value} = $value;
$worksheet->Cells($rowCount, $colCount)->Borders() -> {Weight} = 2;
$rowCount++;
$colCount--;
return ($rowCount, $colCount);
}
sub parseFile {
print "\n#############################################\n\nInitializing
+ script ...\n \nAnalysing file $_[0]\n\n#############################
+################\n\n";
#######################
# Excel Initialisation#
#######################
my $xs1 = XML::Simple->new();
my $doc = $xs1->XMLin($_[0], keyattr=>[], ForceContent=>1, ForceArray=
+>1);
my $application = Win32::OLE->new("Excel.Application");
$application->{DisplayAlerts} = 0;
my $workbook = $application->Workbooks->Add();
for (my $count = 3; $count >= 0; $count--) {
if ($workbook->WorkSheets($count)) {$workbook->WorkSheets($count)->Del
+ete();}
}
my $worksheet = $workbook->Worksheets->Add({After=>$workbook->Workshee
+ts($workbook->Worksheets->{Count})});
############################################
# First worksheet with general information #
############################################
$worksheet -> {Name} = "General Information";
$worksheet -> Range("A1") -> Font -> {Size}= 14;
$worksheet -> Range("A1") -> Font -> {ColorIndex}= 2;
$worksheet -> Range("A1") -> {Value} = "General Information";
$worksheet -> Range("A1:E1") -> Merge;
$worksheet -> Range("A1:E1") -> Interior -> {ColorIndex} = 25;
$worksheet -> Range("A1:E1") -> Borders() -> {Weight} = 3;
####################################################################
# Excel index variables for writing to column and row with headers #
####################################################################
my $LastRow = 0;
my $new = 0;
my $mainHeaders = 3; #main header row
my $rowCount=4; #main info row
my $colCount = 2; #main info column
my $subHeaderRow = 0; #additional info header row
my $subDataRow = 50; #additional info data row
print "Collecting general information...";
foreach my $Level1 ( sort keys %{ $doc } ){
my $aoh1 = $doc->{$Level1};
unless (ref($aoh1) eq 'ARRAY') {
($rowCount,$colCount) = writeNonArray($worksheet, $rowCount, $
+colCount, $Level1, $aoh1);
next;
}
my $count1 = $#{ $aoh1 };
foreach my $i(0 .. $count1) {
foreach my $Level2 ( sort keys %{ $doc->{$Level1}[$i] } ){
my $aoh2 = $doc->{$Level1}[$i]{$Level2};
unless (ref($aoh2) eq 'ARRAY') {
($rowCount,$colCount) = writeNonArray($worksheet, $row
+Count, $colCount, $Level2, $aoh2);
next;
}
my $count2 = $#{ $aoh2 };
foreach my $j(0 .. $count2) {
foreach my $Level3 ( sort keys %{ $doc->{$Level1}[$i]{
+$Level2}[$j] } ){
my $aoh3 = $doc->{$Level1}[$i]{$Level2}[$j]{$Level
+3};
unless (ref($aoh3) eq 'ARRAY') {
($rowCount,$colCount) = writeNonArray($workshe
+et, $rowCount, $colCount, $Level3, $aoh3);
next;
}
my $count3 = $#{ $aoh3 };
foreach my $k(0 .. $count3) {
foreach my $Level4 ( sort keys %{ $doc->{$Leve
+l1}[$i]{$Level2}[$j]{$Level3}[$k] } ){
my $aoh4 = $doc->{$Level1}[$i]{$Level2}[$j
+]{$Level3}[$k]{$Level4};
unless (ref($aoh4) eq 'ARRAY') {
($rowCount,$colCount) = writeNonArray(
+$worksheet, $rowCount, $colCount, $Level4, $aoh4);
+
next;
}
print "Completed\nCollecting $Level4 ...";
my $count4 = $#{ $aoh4 };
$subDataRow = ($count4 + 1) + ($rowCount +
+ 1);
my $worksheet = $workbook->Worksheets->Add
+({After=>$workbook->Worksheets($workbook->Worksheets->{Count})});
$worksheet -> {Name} = $Level4;
$worksheet -> Range("A1") -> Font -> {Size
+}= 14;
$worksheet -> Range("A1") -> Font -> {Colo
+rIndex}= 2;
$worksheet -> Range("A1") -> {Value} = "$L
+evel4";
$worksheet -> Range("A1:E1") -> Merge;
$worksheet -> Range("A1:E1") -> Interior -
+> {ColorIndex} = 25;
$worksheet -> Range("A1:E1") -> Borders()
+-> {Weight} = 3;
my $subDataCol = 1;
foreach my $l(0 .. $count4) {
my $temp = "";
foreach my $Level5 ( sort keys %{ $doc
+->{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l] } ){
my $aoh5 = $doc->{$Level1}[$i]{$Le
+vel2}[$j]{$Level3}[$k]{$Level4}[$l]{$Level5};
my $curID = 0;
my $nextup = "";
if($doc->{$Level1
+}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{number}){ $curID = $doc-
+>{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{number};}
if($doc->{$Level1
+}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{enclosureNumber}){ $curI
+D = $doc->{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{enclos
+ureNumber};}
if(ref($aoh5)){
#################
+#######
# EXCEL SUBINFORM
+ATION #
#################
+#######
my $count5 =
+$#{ $aoh5 };
foreach m
+y $m(0 .. $count5) {
if($n
+extup ne $Level5){
$
+new = 1;
$
+subDataRow++;
$
+subHeaderRow = $subDataRow;
$
+subDataCol = 1 ;
$
+nextup = $Level5;
}
else
+{$new = 0;}
$subDataRow++;
foreach my $Level6
+ ( sort keys %{ $doc->{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4
+}[$l]{$Level5}[$m]} ){
+ # print "\n$Level1->$Level2->$Level3->$Level4->$Level5->$Level6 :
+$doc->{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{$Level5}[$
+m]{$Level6}";
# subinfo titl
+e for new element eg "Rank : 1" in colors
if ($temp ne $
+curID){
$temp = $c
+urID;
$worksheet
+->Cells($subDataRow, $subDataCol)->{Value} = "$Level4 : $curID";
$worksheet
+->Cells($subDataRow, $subDataCol)->Interior->{ColorIndex} = 33;
$worksheet
+->Cells($subDataRow, $subDataCol)->{Font}->{Bold} = 1;
$worksheet
+ ->Cells($subDataRow, $subDataCol)-> Borders() -> {Weight} = 1;
$subDataRo
+w+=2;
$subHeader
+Row = $subDataRow;
$subDataRo
+w++;
}
# We should wr
+ite the header only once
if($new==1){
$worksheet
+->Cells($subHeaderRow, $subDataCol)->{Value} = $Level6;
$worksheet
+->Cells($subHeaderRow, $subDataCol)->{Font}->{Bold} = 1;
$worksheet
+ ->Cells($subHeaderRow, $subDataCol)-> Borders() -> {Weight} = 2;
$worksheet
+ ->Cells($subHeaderRow, $subDataCol)-> Borders() -> {ColorIndex} = 25
+;
}
$worksheet
+->Cells($subDataRow, $subDataCol)->{Value} = $doc->{$Level1}[$i]{$Lev
+el2}[$j]{$Level3}[$k]{$Level4}[$l]{$Level5}[$m]{$Level6};
$subDataCol++;
}
$subDataCol=1;
}
}
else{
#################
+#########
# EXCEL MAIN INFO
+RMATION #
#################
+#########
# Index, if a
+ny of those, will be placed in first column
if ($Level5 e
+q "number" | $Level5 eq "id" | $Level5 eq "enclosureNumber"){
$workshee
+t->Cells($mainHeaders, 1)->{Value} = $Level5;
$worksheet->Cells($mainHea
+ders, 1)->{Font} -> {ColorIndex} = 25;
$worksheet->Cells($mainHea
+ders, 1)->{Interior} -> {ColorIndex} = 15;
$worksheet->Cells($mainHea
+ders, 1)->Borders() -> {Weight} = 2;
$worksheet->Cells($mainHea
+ders, 1)->{Font}->{Bold} = 1;
$worksheet->Cell
+s($rowCount, 1)->{Value} = $doc->{$Level1}[$i]{$Level2}[$j]{$Level3}[
+$k]{$Level4}[$l]{$Level5};
$worksheet->Cells($rowCoun
+t, 1)->{Font}->{Bold} = 1;
$worksheet->Cells($rowCoun
+t, 1)->Borders() -> {Weight} = 2;
}
else{
unless ($
+worksheet->Cells($mainHeaders, $colCount)->{Value}){
$work
+sheet->Cells($mainHeaders, $colCount)->{Value} = $Level5;
}
if ($Level5 eq "ww
+pn"){
$worksheet->Cells($row
+Count, $colCount)->{NumberFormat} = "0";
$worksheet->Cells($row
+Count, $colCount)->Borders() -> {Weight} = 2;
+
}
$workshee
+t->Cells($rowCount, $colCount)->Borders() -> {Weight} = 2;
$workshee
+t->Cells($rowCount, $colCount)->{Value} = $doc->{$Level1}[$i]{$Level2
+}[$j]{$Level3}[$k]{$Level4}[$l]{$Level5};
$workshee
+t->Cells($mainHeaders, $colCount)->Borders() -> {Weight} = 2;
$worksheet->Cells($mainHea
+ders, $colCount)->{Font}->{Bold} = 1;
$worksheet->Cells($mainHea
+ders, $colCount)->{Font} -> {ColorIndex} = 25;
$worksheet->Cells($mainHea
+ders, $colCount)->{Interior} -> {ColorIndex} = 15;
$colCount++;
}
}
} $rowCount++;
$subDataRow++;
$colCount=2;
$worksheet -> Range("A:X") -> {Colum
+ns} -> Autofit;
}
$subDataRow=$rowCount+2;;
$colCount=2;
$rowCount=4;
#Seperate general information on first sheet.
} $rowCount=4; $colCount+=3;
}
} $rowCount=4; $colCount+=3;
}
} $rowCount=4; $colCount+=3;
}
}
$LastRow = $worksheet->UsedRange->Find({What=>"*", SearchDirection=>xl
+Previous, SearchOrder=>xlByRows})->{Row};
+
$worksheet -> Range("1:$LastRow") -> {Columns} -> Autofit;
$workbook->SaveAs($dir . '/'.$_[0].'.xls');
$workbook->Close;
print "\n\n$_[0].xls generated in $dir\n" ;
#print "\a";
}
Thank you all again for the help, new suggestions are welcome. And no, I don't know how to use referencing. I wasn't allowed to spend much time studying perl.