http://www.perlmonks.org?node_id=986563


in reply to Re^2: delete duplicated xml lines
in thread delete duplicated xml lines

Please provide all of your code.

Replies are listed 'Best First'.
Re^4: delete duplicated xml lines
by cibiena (Initiate) on Aug 09, 2012 at 22:30 UTC
    #!/usr/bin/perl -w ######################### # # Materialmapping generator # Updated 30.07.2012 # my $version="0.0.3"; # 2012.04.12 v0.0.1 DM: Creation # 2012.04.23 v0.0.2 DM: Added support for active column and comma s +epareted families # 2012.05.30 v0.0.3 DM: Added support for comma separated propriety + - only one for line #Excel format rules: #-a merged cell named "Configurator mapping" on the first line identif +ies the mapping data (internal+properties) #-a merged cell named "Brand" on the first line identifies the Brand d +ata (optional) #-a merged cell named "Market" on the first line identifies the Market + data (optional) #-a merged cell named "Customer" on the first line identifies the Cust +omer data (optional) #-the first column of "Configurator mapping" should have an "internal" + as cell(line number is not important), the "internal" define the tit +le bar line; the cell must be on the same line where all the properti +es/markets/brand/customer names are #-the "active" column must be exactly left of the "internal" column #-the commercial code column must be exacly left of the "active" colum +n #-the "pr_family" column should be exactly right of the "internal" col +umn #-the tecnical description must be exactly right of the last "Configur +ator mapping" column #-the colorzones must be exactly right of the last "tecnical descripti +on" column #Notes: #-in the field "pr_family" the user can concatenate the family value l +ike 'TL1_1,TL1_4' , every value generate a line in the xml file with +the right family #-in the colums "propriety" the user can concatenate the propriety val +ue like 'value1,value2' (but only for one pr colum for line), every v +alue add a line in the xml file with the right propriety #-if the "active" column is not empty, the line is not evaluated #-if "internal" or "pr_family" field is empty, the line is not evaluat +ed # ######################### use Encode; use utf8; use XML::LibXML; use Spreadsheet::ParseExcel; system("cls"); # Read command line arguments #--------------------------------------------------------------------- +------------------- my $materialmapping_file = shift; my $data_folder = shift; print "OPTIONS\nmaterialmapping_file:$materialmapping_file\ndata_folde +r:$data_folder\n"; # subroutine sub getSQLTimeStamp { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localt +ime(time); return sprintf "%4d-%02d-%02d %02d:%02d:%02d",$year+1900,$mon+1 +,$mday,$hour,$min,$sec; } # Main #--------------------------------------------------------------------- +------------------- if(length($materialmapping_file) gt 0 and length($data_folder) gt 0) { opendir ( DIR, $data_folder ) || die "Error in opening dir $data_f +older\n"; my $materialmapping_table_xml = XML::LibXML->createDocument( "1.0" +, "UTF-8"); my $materialmapping_table_xml_root = $materialmapping_table_xml->c +reateElement("masterdata"); $materialmapping_table_xml_root->setAttribute('version',getSQLTime +Stamp()); $materialmapping_table_xml->setDocumentElement($materialmapping_ta +ble_xml_root); print "Material mapping generator v.0.0.3 - Parsing files...\n"; while( ($filename = readdir(DIR))){ if($filename =~ /\.xls$/i) { my $parser = Spreadsheet::ParseExcel->new(); my $oBook = $parser->parse($data_folder.$filename); if (defined $oBook ) { for(my $iSheet=0; $iSheet < $oBook->{SheetCount} ; $iS +heet++) { $oWkS = $oBook->{Worksheet}[$iSheet]; # find the needed columns my $map_Cmin = -1; # the first is internal my $map_Cmax = -1; my $artno_col = -1; my $active_col = -1; my $colorzones_col = -1; my $family_col = -1; my $tec_desc_col = -1; my $brand_Cmin = -1; my $brand_Cmax = -1; my $customer_Cmin = -1; my $customer_Cmax = -1; my $market_Cmin = -1; my $market_Cmax = -1; my $title_row = -1; for (my $iC = $oWkS->{MinCol}; defined $oWkS->{Max +Col} && $iC <= $oWkS->{MaxCol} ; $iC++) { $oWkC = $oWkS->{Cells}[$oWkS->{MinRow}][$iC]; if(defined $oWkC) { if(decode('cp1252',$oWkC->{Val}) eq "Confi +gurator mapping") { for (my $iR = $oWkS->{MinRow} +1; defi +ned $oWkS->{MaxRow} && $iR <= $oWkS->{MaxRow} ; $iR++) { $oWkC_int = $oWkS->{Cells}[$iR][$i +C]; if(defined $oWkC_int and decode('c +p1252',$oWkC_int->{Val}) eq "internal") { $title_row = $iR; } } if($title_row > 0) { foreach my $area ( @{ $oWkS->{Merg +edArea} } ) { if($area->[1] eq $iC and $area +->[0] eq $oWkS->{MinRow}){ $map_Cmax = $area->[3]; } } $map_Cmin = $iC; $artno_col = $map_Cmin -2; $active_col = $map_Cmin -1; $family_col = $map_Cmin +1; $colorzones_col = $map_Cmax +2; $tec_desc_col = $map_Cmax +1; } } elsif(decode('cp1252',$oWkC->{Val}) eq " +Market"){ foreach my $area ( @{ $oWkS->{MergedAr +ea} } ) { if($area->[1] eq $iC and $area->[0 +] eq $oWkS->{MinRow}){ $market_Cmax = $area->[3]; } } $market_Cmin = $iC; } elsif(decode('cp1252',$oWkC->{Val}) eq " +Customer"){ foreach my $area ( @{ $oWkS->{MergedAr +ea} } ) { if($area->[1] eq $iC and $area->[0 +] eq $oWkS->{MinRow}){ $customer_Cmax = $area->[3]; } } $customer_Cmin = $iC; } elsif(decode('cp1252',$oWkC->{Val}) eq " +Brand"){ foreach my $area ( @{ $oWkS->{MergedAr +ea} } ) { if($area->[1] eq $iC and $area->[0 +] eq $oWkS->{MinRow}){ $brand_Cmax = $area->[3]; } } $brand_Cmin = $iC; } } } if($map_Cmin >= 0 and $map_Cmin >= 0 and $title_ro +w >= 0){ for(my $iR = $title_row +1; defined $oWkS->{Ma +xRow} && $iR <= $oWkS->{MaxRow} ; $iR++){ $internal_cell = $oWkS->{Cells}[$iR][$map_ +Cmin]; $active_cell = $oWkS->{Cells}[$iR][$active +_col]; $family_cell = $oWkS->{Cells}[$iR][$family +_col]; if(defined $internal_cell and defined $act +ive_cell and defined $family_cell and length(decode('cp1252',$interna +l_cell->{Val})) gt 0 and !(length(decode('cp1252',$active_cell->{Val} +)) gt 0) and length(decode('cp1252',$family_cell->{Val})) gt 0) { #fo every family print a line my @families = split(/[,.]+/, decode(' +cp1252',$family_cell->{Val})); foreach $family(@families){ + my $materialmapping_item = $materialmapping_table_xml->createElemen +t("item"); + + #add all propriety for(my $prC = $map_Cmin+2; $prC <= + $map_Cmax ; $prC++) { $pr_cell_name = $oWkS->{Ce +lls}[$title_row][$prC]; $pr_cell = $oWkS->{Cells}[ +$iR][$prC]; if(defined $pr_cell and de +fined $pr_cell_name and length(decode('cp1252',$pr_cell->{Val})) gt 0 + and length(decode('cp1252',$pr_cell_name->{Val})) gt 0) { + my @pr = split(/[,.]+/, de +code('cp1252',$pr_cell->{Val})); + foreach $pr(@pr){ + + $materialmapping_item->setAttribute(decode( +'cp1252',$pr_cell_name->{Val}),$pr); + } + } + } + + + #for every multiple less 2 propriety print a line + for(my $prC2 = $map_Cmin+2; $prC2 <= $map_Cmax ; $prC2++) { $pr_cell_name2 = $oWkS->{C +ells}[$title_row][$prC2]; $pr_cell2 = $oWkS->{Cells} +[$iR][$prC2]; if(defined $pr_cell2 and d +efined $pr_cell_name2 and length(decode('cp1252',$pr_cell2->{Val})) g +t 1 and length(decode('cp1252',$pr_cell_name2->{Val})) gt 1) { my @pr2 = split(/[,.]+/, d +ecode('cp1252',$pr_cell2->{Val})); + foreach $pr2(@pr2){ + if(@pr2 gt 1) + { + my $materialmapping_item2 = $materialmapp +ing_table_xml->createElement("item3"); + $materialmapping_item2->setAttribute(deco +de('cp1252',$pr_cell_name2->{Val}),$pr2); + + for(my $prC3 = $map_Cmin+2; $prC3 <= $map +_Cmax ; $prC3++) + { + $pr_cell_name3 = $oWkS->{C +ells}[$title_row][$prC3]; + $pr_cell3 = $oWkS->{Cells} +[$iR][$prC3]; + if($prC3 ne $prC2) + { + my @pr3 = spl +it(/[,.]+/, decode('cp1252',$pr_cell3->{Val})); + foreach $pr3 +(@pr3) + { + + $materialmapping_item2->setAttribute(decode('cp1252',$pr_cell_name3 +->{Val}),$pr3); + } + } + } + + + #set internal $materialmapping_item2->setAttribu +te("internal",decode('cp1252',$internal_cell->{Val})); #set family $materialmapping_item2->setAttribu +te("pr_family",$family); #set item_number $item_number_cell = $oWkS->{Cells} +[$iR][$artno_col]; if(defined $item_number_cell and l +ength(decode('cp1252',$item_number_cell->{Val})) gt 0) { $materialmapping_item2->setAtt +ribute("item_number",decode('cp1252',$item_number_cell->{Val})); } #set colorzones $colorzones_cell = $oWkS->{Cells}[ +$iR][$colorzones_col]; if(defined $colorzones_cell and le +ngth(decode('cp1252',$colorzones_cell->{Val})) gt 0) { $materialmapping_item2->setAtt +ribute("colorzone",decode('cp1252',$colorzones_cell->{Val})); } #set description $description_cell = $oWkS->{Cells} +[$iR][$tec_desc_col]; if(defined $description_cell and l +ength(decode('cp1252',$description_cell->{Val})) gt 0) { $materialmapping_item2->setAtt +ribute("description",decode('cp1252',$description_cell->{Val})); } #set Brands if($brand_Cmin gt 0 and $brand_Cma +x gt 0) { my $brand_string = ""; my $ignored_brand_string = ""; for(my $brandC = $brand_Cmin; +$brandC <= $brand_Cmax ; $brandC++) { $brand_cell_name = $oWkS-> +{Cells}[$title_row][$brandC]; if(defined $brand_cell_nam +e and decode('cp1252',$brand_cell_name->{Val}) ne "Epta std") { $brand_cell = $oWkS->{ +Cells}[$iR][$brandC]; if(defined $brand_cell + and length(decode('cp1252',$brand_cell->{Val})) gt 0) { if(decode('cp1252' +,$brand_cell->{Val}) eq '0') { $ignored_brand +_string = $ignored_brand_string . decode('cp1252',$brand_cell_name->{ +Val}) . ","; } else { $brand_string += $brand_string . decode('cp1252',$brand_cell_name->{Val}) . ","; } } } } if(length($brand_string) gt 0) +{ $materialmapping_item2->se +tAttribute('brand',$brand_string); } if(length($ignored_brand_strin +g) gt 0){ $materialmapping_item2->se +tAttribute('ignore_brand',$ignored_brand_string); } } #set Customer if($customer_Cmin gt 0 and $custom +er_Cmax gt 0) { my $customer_string = ""; my $ignored_customer_string = +""; for(my $customerC = $customer_ +Cmin; $customerC <= $customer_Cmax ; $customerC++) { $customer_cell_name = $oWk +S->{Cells}[$title_row][$customerC]; $customer_cell = $oWkS->{C +ells}[$iR][$customerC]; if(defined $customer_c +ell_name and defined $customer_cell and length(decode('cp1252',$custo +mer_cell->{Val})) gt 0 and length(decode('cp1252',$customer_cell_name +->{Val})) gt 0) { if(decode('cp1252' +,$customer_cell->{Val}) eq '0') { $ignored_custo +mer_string = $ignored_customer_string . decode('cp1252',$customer_cel +l_name->{Val}) . ","; } else { $customer_stri +ng = $customer_string . decode('cp1252',$customer_cell_name->{Val}) . + ","; } } } if(length($customer_string) gt + 0){ $materialmapping_item2->se +tAttribute('customer',$customer_string); } if(length($ignored_customer_st +ring) gt 0){ $materialmapping_item2->se +tAttribute('ignore_customer',$ignored_customer_string); } } #set Market if($market_Cmin gt 0 and $market_C +max gt 0) { my $market_string = ""; my $ignored_market_string = "" +; for(my $marketC = $market_Cmin +; $marketC <= $market_Cmax ; $marketC++) { $market_cell_name = $oWkS- +>{Cells}[$title_row][$marketC]; $market_cell = $oWkS->{Cel +ls}[$iR][$marketC]; if(defined $market_cel +l_name and defined $market_cell and length(decode('cp1252',$market_ce +ll->{Val})) gt 0 and length(decode('cp1252',$market_cell_name->{Val}) +) gt 0) { if(decode('cp1252' +,$market_cell->{Val}) eq '0') { $ignored_marke +t_string = $ignored_market_string . decode('cp1252',$market_cell_name +->{Val}) . ","; } else { $market_string + = $market_string . decode('cp1252',$market_cell_name->{Val}) . ","; } } } if(length($market_string) gt 0 +){ $materialmapping_item2->se +tAttribute('market',$market_string); } if(length($ignored_market_stri +ng) gt 0){ $materialmapping_item2->se +tAttribute('ignore_market',$ignored_market_string); + } } + + + $materialmapping_item = $materialmapping_item2; + $materialmapping_table_xml_root->addChild($materialmapping_item2); + + +} + } } } + + + + + + + + + + + #for every multiple propriety print a line + for(my $prC2 = $map_Cmin+2; $prC2 <= $map_Cmax ; $prC2++) { $pr_cell_name2 = $oWkS->{C +ells}[$title_row][$prC2]; $pr_cell2 = $oWkS->{Cells} +[$iR][$prC2]; if(defined $pr_cell2 and d +efined $pr_cell_name2 and length(decode('cp1252',$pr_cell2->{Val})) g +t 0 and length(decode('cp1252',$pr_cell_name2->{Val})) gt 0) { my @pr2 = split(/[,.]+/, d +ecode('cp1252',$pr_cell2->{Val})); + foreach $pr2(@pr2){ + if(@pr2 gt 1) + { + + + for(my $prC3 = $map_Cmin+2; $prC3 <= $map +_Cmax ; $prC3++) + { + $pr_cell_name3 = $oWkS->{C +ells}[$title_row][$prC3]; + $pr_cell3 = $oWkS->{Cells} +[$iR][$prC3]; + if($prC3 gt $prC2) + { + my @pr3 = spl +it(/[,.]+/, decode('cp1252',$pr_cell3->{Val})); + foreach $pr3 +(@pr3) + { + + + if(@p +r3 gt 1) + { + + my $materialmapping_item3 = $materialmapping_table_xml->createEleme +nt("item2"); + + + $mater +ialmapping_item3->setAttribute(decode('cp1252',$pr_cell_name3->{Val}) +,$pr3); + + + for(my $prC +4 = $map_Cmin+2; $prC4 <= $map_Cmax ; $prC4++) + { + $pr_cell_name4 = $oWkS->{C +ells}[$title_row][$prC4]; + $pr_cell4 = $oWkS->{Cells} +[$iR][$prC4]; + if($prC4 ne $prC3) + { + my @pr4 = spl +it(/[,.]+/, decode('cp1252',$pr_cell4->{Val})); + foreach $pr4 +(@pr4) + { + + + $materialmapping_item3->setAttribute(dec +ode('cp1252',$pr_cell_name2->{Val}),$pr2); + $materialmapping_item3->setAttribute(deco +de('cp1252',$pr_cell_name4->{Val}),$pr4); + + } + }} + + #set internal $materialmapping_item3->setAttribu +te("internal",decode('cp1252',$internal_cell->{Val})); #set family $materialmapping_item3->setAttribu +te("pr_family",$family); #set item_number $item_number_cell = $oWkS->{Cells} +[$iR][$artno_col]; if(defined $item_number_cell and l +ength(decode('cp1252',$item_number_cell->{Val})) gt 0) { $materialmapping_item3->setAtt +ribute("item_number",decode('cp1252',$item_number_cell->{Val})); } #set colorzones $colorzones_cell = $oWkS->{Cells}[ +$iR][$colorzones_col]; if(defined $colorzones_cell and le +ngth(decode('cp1252',$colorzones_cell->{Val})) gt 0) { $materialmapping_item3->setAtt +ribute("colorzone",decode('cp1252',$colorzones_cell->{Val})); } #set description $description_cell = $oWkS->{Cells} +[$iR][$tec_desc_col]; if(defined $description_cell and l +ength(decode('cp1252',$description_cell->{Val})) gt 0) { $materialmapping_item3->setAtt +ribute("description",decode('cp1252',$description_cell->{Val})); } #set Brands if($brand_Cmin gt 0 and $brand_Cma +x gt 0) { my $brand_string = ""; my $ignored_brand_string = ""; for(my $brandC = $brand_Cmin; +$brandC <= $brand_Cmax ; $brandC++) { $brand_cell_name = $oWkS-> +{Cells}[$title_row][$brandC]; if(defined $brand_cell_nam +e and decode('cp1252',$brand_cell_name->{Val}) ne "Epta std") { $brand_cell = $oWkS->{ +Cells}[$iR][$brandC]; if(defined $brand_cell + and length(decode('cp1252',$brand_cell->{Val})) gt 0) { if(decode('cp1252' +,$brand_cell->{Val}) eq '0') { $ignored_brand +_string = $ignored_brand_string . decode('cp1252',$brand_cell_name->{ +Val}) . ","; } else { $brand_string += $brand_string . decode('cp1252',$brand_cell_name->{Val}) . ","; } } } } if(length($brand_string) gt 0) +{ $materialmapping_item3->se +tAttribute('brand',$brand_string); } if(length($ignored_brand_strin +g) gt 0){ $materialmapping_item3->se +tAttribute('ignore_brand',$ignored_brand_string); } } #set Customer if($customer_Cmin gt 0 and $custom +er_Cmax gt 0) { my $customer_string = ""; my $ignored_customer_string = +""; for(my $customerC = $customer_ +Cmin; $customerC <= $customer_Cmax ; $customerC++) { $customer_cell_name = $oWk +S->{Cells}[$title_row][$customerC]; $customer_cell = $oWkS->{C +ells}[$iR][$customerC]; if(defined $customer_c +ell_name and defined $customer_cell and length(decode('cp1252',$custo +mer_cell->{Val})) gt 0 and length(decode('cp1252',$customer_cell_name +->{Val})) gt 0) { if(decode('cp1252' +,$customer_cell->{Val}) eq '0') { $ignored_custo +mer_string = $ignored_customer_string . decode('cp1252',$customer_cel +l_name->{Val}) . ","; } else { $customer_stri +ng = $customer_string . decode('cp1252',$customer_cell_name->{Val}) . + ","; } } } if(length($customer_string) gt + 0){ $materialmapping_item3->se +tAttribute('customer',$customer_string); } if(length($ignored_customer_st +ring) gt 0){ $materialmapping_item3->se +tAttribute('ignore_customer',$ignored_customer_string); } } #set Market if($market_Cmin gt 0 and $market_C +max gt 0) { my $market_string = ""; my $ignored_market_string = "" +; for(my $marketC = $market_Cmin +; $marketC <= $market_Cmax ; $marketC++) { $market_cell_name = $oWkS- +>{Cells}[$title_row][$marketC]; $market_cell = $oWkS->{Cel +ls}[$iR][$marketC]; if(defined $market_cel +l_name and defined $market_cell and length(decode('cp1252',$market_ce +ll->{Val})) gt 0 and length(decode('cp1252',$market_cell_name->{Val}) +) gt 0) { if(decode('cp1252' +,$market_cell->{Val}) eq '0') { $ignored_marke +t_string = $ignored_market_string . decode('cp1252',$market_cell_name +->{Val}) . ","; } else { $market_string + = $market_string . decode('cp1252',$market_cell_name->{Val}) . ","; } } } if(length($market_string) gt 0 +){ $materialmapping_item3->se +tAttribute('market',$market_string); } if(length($ignored_market_stri +ng) gt 0){ $materialmapping_item3->se +tAttribute('ignore_market',$ignored_market_string); + } } + + + + + + + $materialmapping_item = $materialmapping_item3; + $materialmapping_table_xml_root->addChild($materialmapping_ +item3); + } + + }}} + + + + + }} } } + + #set internal $materialmapping_item->setAttribut +e("internal",decode('cp1252',$internal_cell->{Val})); #set family $materialmapping_item->setAttribut +e("pr_family",$family); #set item_number $item_number_cell = $oWkS->{Cells} +[$iR][$artno_col]; if(defined $item_number_cell and l +ength(decode('cp1252',$item_number_cell->{Val})) gt 0) { $materialmapping_item->setAttr +ibute("item_number",decode('cp1252',$item_number_cell->{Val})); } #set colorzones $colorzones_cell = $oWkS->{Cells}[ +$iR][$colorzones_col]; if(defined $colorzones_cell and le +ngth(decode('cp1252',$colorzones_cell->{Val})) gt 0) { $materialmapping_item->setAttr +ibute("colorzone",decode('cp1252',$colorzones_cell->{Val})); } #set description $description_cell = $oWkS->{Cells} +[$iR][$tec_desc_col]; if(defined $description_cell and l +ength(decode('cp1252',$description_cell->{Val})) gt 0) { $materialmapping_item->setAttr +ibute("description",decode('cp1252',$description_cell->{Val})); } #set Brands if($brand_Cmin gt 0 and $brand_Cma +x gt 0) { my $brand_string = ""; my $ignored_brand_string = ""; for(my $brandC = $brand_Cmin; +$brandC <= $brand_Cmax ; $brandC++) { $brand_cell_name = $oWkS-> +{Cells}[$title_row][$brandC]; if(defined $brand_cell_nam +e and decode('cp1252',$brand_cell_name->{Val}) ne "Epta std") { $brand_cell = $oWkS->{ +Cells}[$iR][$brandC]; if(defined $brand_cell + and length(decode('cp1252',$brand_cell->{Val})) gt 0) { if(decode('cp1252' +,$brand_cell->{Val}) eq '0') { $ignored_brand +_string = $ignored_brand_string . decode('cp1252',$brand_cell_name->{ +Val}) . ","; } else { $brand_string += $brand_string . decode('cp1252',$brand_cell_name->{Val}) . ","; } } } } if(length($brand_string) gt 0) +{ $materialmapping_item->set +Attribute('brand',$brand_string); } if(length($ignored_brand_strin +g) gt 0){ $materialmapping_item->set +Attribute('ignore_brand',$ignored_brand_string); } } #set Customer if($customer_Cmin gt 0 and $custom +er_Cmax gt 0) { my $customer_string = ""; my $ignored_customer_string = +""; for(my $customerC = $customer_ +Cmin; $customerC <= $customer_Cmax ; $customerC++) { $customer_cell_name = $oWk +S->{Cells}[$title_row][$customerC]; $customer_cell = $oWkS->{C +ells}[$iR][$customerC]; if(defined $customer_c +ell_name and defined $customer_cell and length(decode('cp1252',$custo +mer_cell->{Val})) gt 0 and length(decode('cp1252',$customer_cell_name +->{Val})) gt 0) { if(decode('cp1252' +,$customer_cell->{Val}) eq '0') { $ignored_custo +mer_string = $ignored_customer_string . decode('cp1252',$customer_cel +l_name->{Val}) . ","; } else { $customer_stri +ng = $customer_string . decode('cp1252',$customer_cell_name->{Val}) . + ","; } } } if(length($customer_string) gt + 0){ $materialmapping_item->set +Attribute('customer',$customer_string); } if(length($ignored_customer_st +ring) gt 0){ $materialmapping_item->set +Attribute('ignore_customer',$ignored_customer_string); } } #set Market if($market_Cmin gt 0 and $market_C +max gt 0) { my $market_string = ""; my $ignored_market_string = "" +; for(my $marketC = $market_Cmin +; $marketC <= $market_Cmax ; $marketC++) { $market_cell_name = $oWkS- +>{Cells}[$title_row][$marketC]; $market_cell = $oWkS->{Cel +ls}[$iR][$marketC]; if(defined $market_cel +l_name and defined $market_cell and length(decode('cp1252',$market_ce +ll->{Val})) gt 0 and length(decode('cp1252',$market_cell_name->{Val}) +) gt 0) { if(decode('cp1252' +,$market_cell->{Val}) eq '0') { $ignored_marke +t_string = $ignored_market_string . decode('cp1252',$market_cell_name +->{Val}) . ","; } else { $market_string + = $market_string . decode('cp1252',$market_cell_name->{Val}) . ","; } } } if(length($market_string) gt 0 +){ $materialmapping_item->set +Attribute('market',$market_string); } if(length($ignored_market_stri +ng) gt 0){ $materialmapping_item->set +Attribute('ignore_market',$ignored_market_string); + } } + + $materialmapping_table_xml_root->addChild($materialmapping_item); + }; } } } } } print "Parsed $filename \n"; } } $materialmapping_table_xml->toFile($materialmapping_file,2); }
    example: read this excel table:
    | A | B | C | D | E | ===+========+==========+===========+=========+=========+ 1 | | Configurator mapping | ---+--------+----------+-----------+---------+---------+ 2 | active | internal | pr_family | pr1 | pr2 | ---+--------+----------+-----------+---------+---------+ 3 | | item 1 | f1 | 1,2,3 | a,b | ---+--------+----------+-----------+---------+---------+ 4 | | item 2 | f1,f2 | 1 | a | ---+--------+----------+-----------+---------+---------+ 5 | | item 3 | f1 | 2 | b | ---+--------+----------+-----------+---------+---------+
    When this spreadsheet is processed by the script, the output must be:
    <?xml version="1.0" encoding="UTF-8"?> <masterdata version="2012-07-27 01:51:57"> <item internal="item 1 " pr_family="f1" pr1="1" pr2="a"/> <item internal="item 1 " pr_family="f1" pr1="1" pr2="b"/> <item internal="item 1 " pr_family="f1" pr1="2" pr2="a"/> <item internal="item 1 " pr_family="f1" pr1="2" pr2="b"/> <item internal="item 1 " pr_family="f1" pr1="3" pr2="a"/> <item internal="item 1 " pr_family="f1" pr1="3" pr2="b"/> <item internal="item 2 " pr_family="f1" pr1="1" pr2="a"/> <item internal="item 2 " pr_family="f2" pr1="1" pr2="a"/> <item internal="item 3 " pr_family="f1" pr1="2" pr2="b"/>
    The script do this but generate a lot of duplicate lines.. the solution is filter duplicate line or modify the logic.. thankyou very much for your answer

      Phew, your indentation is overwhelming! Consider these examples to help:

      #move this line up (and simplify it): #if(length($materialmapping_file) gt 0 or length($data_folder) gt 0 +) unless (defined $materialmapping_file and defined $data_folder) { print "Usage: $0 <materialmapping_file> <data_folder>\n"; exit; } # Now, everything that follows can be outdented one level.

      Similarly:

      while( ($filename = readdir(DIR))){ #if($filename =~ /\.xls$/i) { next unless $filename =~ /\.xls$/i; # everything below can be outdented my $parser = Spreadsheet::ParseExcel->new(); my $oBook = $parser->parse($data_folder.$filename); #if (defined $oBook ) { next unless defined $oBook; # everything below can be outdented ...

      For your duplication problem, I think this might do:

      my %seen; # a new global variable ... # then, whenever you 'createElement' and/or 'setAttribute', first a +ssemble the string: #my $materialmapping_item2 = $materialmapping_table_xml->createElem +ent("item3"); #$materialmapping_item2->setAttribute(decode('cp1252',$pr_cell_name +2->{Val}),$pr2); my $entry = join',', decode('cp1252',$pr_cell_name2->{Val}),$pr2; # and create it if it doesn't already exist: if (!exists $seen{$entry}++){ my $materialmapping_item2 = $materialmapping_table_xml->createEl +ement("item3"); $materialmapping_item2->setAttribute($entry); }
        Thankyou very much for your help hbm!!! I try your duplication solution but give me a error: "exists argument is not a HASH or ARRAY element or a subroutine" Perl is too much difficult for me :(