I added the 10 comments you made. Thanks again!
You oversaw a few things in the suggestions. I added an variable to the function because the sublevel wasn't enough, it was the attributes title, not the value
into level, and some other variables, I renamed more obvious. Documented the code aswell.
Maybe you can see some other shortcuts now the code looks very nice.
######################################################################
+#######################################
# Script that parses first 3 xml levels to an excel sheet "general inf
+ormation". All the level4 tags #
# get their own sheet. Every sheet has main information (title and hea
+der) and, if available deeper #
# levels, it also has subinformation on the spreadsheet. This subinfor
+mation is the name of the element #
# With all of the containing tags + values in a title/value layout (li
+ke the main information) #
# The script will parse all of the xml files it finds in your working
+directory. #
######################################################################
+#######################################
#!/usr/bin/perl --
use strict;
use warnings;
use Cwd;
use Win32::OLE;
use XML::Simple;
use Win32::OLE::Const 'Microsoft Excel';
##########################################
# Get all xml files in working directory #
##########################################
my $dir = cwd();
opendir(DIR, $dir) || die "can't opendir $dir: $!";
print "$dir\n";
my @files = grep(/\.xml$/,readdir(DIR));
closedir(DIR);
use Data::Dumper;
local $Data::Dumper::Indent=1;
#print Dumper($doc);
########################
# Ask for confirmation #
########################
my $input = "x";
while ($input ne "y")
{
print "\n\nThis script will try to parse all the xml files in the
+scripts folder. \nThis might take up to 5 minutes depending on the xm
+l file(s) size. \nYou can see what is being parsed at a certain time.
+\n\nDo you want to run the script? [y/n]\n";
$input = <STDIN>;
chomp $input;
}
unless( scalar @files ){
print "\n\nNo Files Found in $dir,\nMake sure your script is in a di
+rectory with the XML files\n\n";
exit;
}
parseFile($_) for @files;
sub writeNonArray {
my ($worksheet, $rowCount, $colCount, $header, $value) = @_;
$worksheet->Cells($rowCount, $colCount)->{Value} = $header;
$worksheet->Cells($rowCount, $colCount)->Borders() -> {Weight} = 2;
$colCount++;
$worksheet->Cells($rowCount, $colCount)->{Value} = $value;
$worksheet->Cells($rowCount, $colCount)->Borders() -> {Weight} = 2;
$rowCount++;
$colCount--;
return ($rowCount, $colCount);
}
sub parseFile {
print "\n#############################################\n\nInitializing
+ script ...\n \nAnalysing file $_[0]\n\n#############################
+################\n\n";
#######################
# Excel Initialisation#
#######################
my $xs1 = XML::Simple->new();
my $doc = $xs1->XMLin($_[0], keyattr=>[], ForceContent=>1, ForceArray=
+>1);
my $application = Win32::OLE->new("Excel.Application");
$application->{DisplayAlerts} = 0;
my $workbook = $application->Workbooks->Add();
for (my $count = 3; $count >= 0; $count--) {
if ($workbook->WorkSheets($count)) {$workbook->WorkSheets($count)->Del
+ete();}
}
my $worksheet = $workbook->Worksheets->Add({After=>$workbook->Workshee
+ts($workbook->Worksheets->{Count})});
############################################
# First worksheet with general information #
############################################
$worksheet -> {Name} = "General Information";
$worksheet -> Range("A1") -> Font -> {Size}= 14;
$worksheet -> Range("A1") -> Font -> {ColorIndex}= 2;
$worksheet -> Range("A1") -> {Value} = "General Information";
$worksheet -> Range("A1:E1") -> Merge;
$worksheet -> Range("A1:E1") -> Interior -> {ColorIndex} = 25;
$worksheet -> Range("A1:E1") -> Borders() -> {Weight} = 3;
####################################################################
# Excel index variables for writing to column and row with headers #
####################################################################
my $LastRow = 0;
my $new = 0;
my $mainHeaders = 3; #main header row
my $rowCount=4; #main info row
my $colCount = 2; #main info column
my $subHeaderRow = 0; #additional info header row
my $subDataRow = 50; #additional info data row
print "Collecting general information...";
foreach my $Level1 ( sort keys %{ $doc } ){
my $aoh1 = $doc->{$Level1};
unless (ref($aoh1) eq 'ARRAY') {
($rowCount,$colCount) = writeNonArray($worksheet, $rowCount, $
+colCount, $Level1, $aoh1);
next;
}
my $count1 = $#{ $aoh1 };
foreach my $i(0 .. $count1) {
foreach my $Level2 ( sort keys %{ $doc->{$Level1}[$i] } ){
my $aoh2 = $doc->{$Level1}[$i]{$Level2};
unless (ref($aoh2) eq 'ARRAY') {
($rowCount,$colCount) = writeNonArray($worksheet, $row
+Count, $colCount, $Level2, $aoh2);
next;
}
my $count2 = $#{ $aoh2 };
foreach my $j(0 .. $count2) {
foreach my $Level3 ( sort keys %{ $doc->{$Level1}[$i]{
+$Level2}[$j] } ){
my $aoh3 = $doc->{$Level1}[$i]{$Level2}[$j]{$Level
+3};
unless (ref($aoh3) eq 'ARRAY') {
($rowCount,$colCount) = writeNonArray($workshe
+et, $rowCount, $colCount, $Level3, $aoh3);
next;
}
my $count3 = $#{ $aoh3 };
foreach my $k(0 .. $count3) {
foreach my $Level4 ( sort keys %{ $doc->{$Leve
+l1}[$i]{$Level2}[$j]{$Level3}[$k] } ){
my $aoh4 = $doc->{$Level1}[$i]{$Level2}[$j
+]{$Level3}[$k]{$Level4};
unless (ref($aoh4) eq 'ARRAY') {
($rowCount,$colCount) = writeNonArray(
+$worksheet, $rowCount, $colCount, $Level4, $aoh4);
+
next;
}
print "Completed\nCollecting $Level4 ...";
my $count4 = $#{ $aoh4 };
$subDataRow = ($count4 + 1) + ($rowCount +
+ 1);
my $worksheet = $workbook->Worksheets->Add
+({After=>$workbook->Worksheets($workbook->Worksheets->{Count})});
$worksheet -> {Name} = $Level4;
$worksheet -> Range("A1") -> Font -> {Size
+}= 14;
$worksheet -> Range("A1") -> Font -> {Colo
+rIndex}= 2;
$worksheet -> Range("A1") -> {Value} = "$L
+evel4";
$worksheet -> Range("A1:E1") -> Merge;
$worksheet -> Range("A1:E1") -> Interior -
+> {ColorIndex} = 25;
$worksheet -> Range("A1:E1") -> Borders()
+-> {Weight} = 3;
my $subDataCol = 1;
foreach my $l(0 .. $count4) {
my $temp = "";
foreach my $Level5 ( sort keys %{ $doc
+->{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l] } ){
my $aoh5 = $doc->{$Level1}[$i]{$Le
+vel2}[$j]{$Level3}[$k]{$Level4}[$l]{$Level5};
my $curID = 0;
my $nextup = "";
if($doc->{$Level1
+}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{number}){ $curID = $doc-
+>{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{number};}
if($doc->{$Level1
+}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{enclosureNumber}){ $curI
+D = $doc->{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{enclos
+ureNumber};}
if(ref($aoh5)){
#################
+#######
# EXCEL SUBINFORM
+ATION #
#################
+#######
my $count5 =
+$#{ $aoh5 };
foreach m
+y $m(0 .. $count5) {
if($n
+extup ne $Level5){
$
+new = 1;
$
+subDataRow++;
$
+subHeaderRow = $subDataRow;
$
+subDataCol = 1 ;
$
+nextup = $Level5;
}
else
+{$new = 0;}
$subDataRow++;
foreach my $Level6
+ ( sort keys %{ $doc->{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4
+}[$l]{$Level5}[$m]} ){
+ # print "\n$Level1->$Level2->$Level3->$Level4->$Level5->$Level6 :
+$doc->{$Level1}[$i]{$Level2}[$j]{$Level3}[$k]{$Level4}[$l]{$Level5}[$
+m]{$Level6}";
# subinfo titl
+e for new element eg "Rank : 1" in colors
if ($temp ne $
+curID){
$temp = $c
+urID;
$worksheet
+->Cells($subDataRow, $subDataCol)->{Value} = "$Level4 : $curID";
$worksheet
+->Cells($subDataRow, $subDataCol)->Interior->{ColorIndex} = 33;
$worksheet
+->Cells($subDataRow, $subDataCol)->{Font}->{Bold} = 1;
$worksheet
+ ->Cells($subDataRow, $subDataCol)-> Borders() -> {Weight} = 1;
$subDataRo
+w+=2;
$subHeader
+Row = $subDataRow;
$subDataRo
+w++;
}
# We should wr
+ite the header only once
if($new==1){
$worksheet
+->Cells($subHeaderRow, $subDataCol)->{Value} = $Level6;
$worksheet
+->Cells($subHeaderRow, $subDataCol)->{Font}->{Bold} = 1;
$worksheet
+ ->Cells($subHeaderRow, $subDataCol)-> Borders() -> {Weight} = 2;
$worksheet
+ ->Cells($subHeaderRow, $subDataCol)-> Borders() -> {ColorIndex} = 25
+;
}
$worksheet
+->Cells($subDataRow, $subDataCol)->{Value} = $doc->{$Level1}[$i]{$Lev
+el2}[$j]{$Level3}[$k]{$Level4}[$l]{$Level5}[$m]{$Level6};
$subDataCol++;
}
$subDataCol=1;
}
}
else{
#################
+#########
# EXCEL MAIN INFO
+RMATION #
#################
+#########
# Index, if a
+ny of those, will be placed in first column
if ($Level5 e
+q "number" | $Level5 eq "id" | $Level5 eq "enclosureNumber"){
$workshee
+t->Cells($mainHeaders, 1)->{Value} = $Level5;
$worksheet->Cells($mainHea
+ders, 1)->{Font} -> {ColorIndex} = 25;
$worksheet->Cells($mainHea
+ders, 1)->{Interior} -> {ColorIndex} = 15;
$worksheet->Cells($mainHea
+ders, 1)->Borders() -> {Weight} = 2;
$worksheet->Cells($mainHea
+ders, 1)->{Font}->{Bold} = 1;
$worksheet->Cell
+s($rowCount, 1)->{Value} = $doc->{$Level1}[$i]{$Level2}[$j]{$Level3}[
+$k]{$Level4}[$l]{$Level5};
$worksheet->Cells($rowCoun
+t, 1)->{Font}->{Bold} = 1;
$worksheet->Cells($rowCoun
+t, 1)->Borders() -> {Weight} = 2;
}
else{
unless ($
+worksheet->Cells($mainHeaders, $colCount)->{Value}){
$work
+sheet->Cells($mainHeaders, $colCount)->{Value} = $Level5;
}
if ($Level5 eq "ww
+pn"){
$worksheet->Cells($row
+Count, $colCount)->{NumberFormat} = "0";
$worksheet->Cells($row
+Count, $colCount)->Borders() -> {Weight} = 2;
+
}
$workshee
+t->Cells($rowCount, $colCount)->Borders() -> {Weight} = 2;
$workshee
+t->Cells($rowCount, $colCount)->{Value} = $doc->{$Level1}[$i]{$Level2
+}[$j]{$Level3}[$k]{$Level4}[$l]{$Level5};
$workshee
+t->Cells($mainHeaders, $colCount)->Borders() -> {Weight} = 2;
$worksheet->Cells($mainHea
+ders, $colCount)->{Font}->{Bold} = 1;
$worksheet->Cells($mainHea
+ders, $colCount)->{Font} -> {ColorIndex} = 25;
$worksheet->Cells($mainHea
+ders, $colCount)->{Interior} -> {ColorIndex} = 15;
$colCount++;
}
}
} $rowCount++;
$subDataRow++;
$colCount=2;
$worksheet -> Range("A:X") -> {Colum
+ns} -> Autofit;
}
$subDataRow=$rowCount+2;;
$colCount=2;
$rowCount=4;
#Seperate general information on first sheet.
} $rowCount=4; $colCount+=3;
}
} $rowCount=4; $colCount+=3;
}
} $rowCount=4; $colCount+=3;
}
}
$LastRow = $worksheet->UsedRange->Find({What=>"*", SearchDirection=>xl
+Previous, SearchOrder=>xlByRows})->{Row};
+
$worksheet -> Range("1:$LastRow") -> {Columns} -> Autofit;
$workbook->SaveAs($dir . '/'.$_[0].'.xls');
$workbook->Close;
print "\n\n$_[0].xls generated in $dir\n" ;
#print "\a";
}