#!C:\strawberry\perl\bin\perl ####Load Packages use strict; use warnings; use Win32::OLE qw(in with); use Win32::OLE::Const 'Microsoft Excel'; use Win32::OLE::Variant; use Win32::OLE::NLS qw(:LOCALE :DATE); use List::MoreUtils qw(natatime); use Excel::Writer::XLSX; ############################################################ ####Initialize Excel Object and Count Files to Process $Win32::OLE::Warn = 3; #die on errors... #get already active Excel application or open new my $Excel = Win32::OLE->GetActiveObject('Excel.Application') || Win32::OLE->new( 'Excel.Application', 'Quit' ); #Get the file names of the production files and prints a status of the results my $ProdDirect = 'C:/Users/McLovin/Documents/Thesis/Data/'; opendir DH, $ProdDirect or die "Cannot open $ProdDirect: $!"; my @files = grep { !-d } readdir DH; print "@files\n"; closedir DH; my $NumberofFiles = @files; print "Number of files is: $NumberofFiles\n"; ################################################################################ ################################################################# ################Start Processing the Data######################## ################################################################# my $counti; #used to count the excelfiles my $usefile; #The file that is being used my @recID; #an array of ID's for the worksheets my $ID; #The actual ID being pushed to the array my $val; #A test value that is used to test whether a row should be included my @right ; #The first of the arrays that will be filled during the process and will be further disected later my @prod; #The second array, same as above my @legal; #The third ####declare a lot of variables used below when getting information from the excel sheets my $c1; my $c4; my $c5; my $c6; my $c7; my $c8; my $c9; my $c10; my $c11; my $c12; my $c13; my $c14; my $c15; my $c17; my $c18; my $c20; my $c21; my $c22; my $c23; my $c24; my $c25; my $c26; my $c27; my $c32; my $c33; my $c34; my $c35; my $c36; my $c37; my $c38; my $c39; #####Loop over all the files for ( $counti = 0 ; $counti < $NumberofFiles ; $counti++ ) { ##### open Excel file -- This is the input data files print "Working on file $counti of $NumberofFiles\n"; $usefile = $files[$counti]; ###This points to the folder where the excel files are placed my $workfile = "C:/Users/McLovin/Documents/Thesis/Datatal/" . "$usefile"; print "$workfile\n"; my $Book = $Excel->Workbooks->Open($workfile); #####Work on each sheet in the workbook #####This is the number of worksheets in the workbook my $sheetcnt = $Book->Worksheets->Count(); foreach my $r (1) { my $Sheet = $Book->Worksheets($r); $ID = $Sheet->{Name}; push @recID, $ID; print "Worksheet name is $Sheet->{Name}\n"; ##work on each row and column my $Tot_Rows = $Sheet->UsedRange->Rows->{'Count'}; my $Tot_Cols = $Sheet->UsedRange->Columns->{'Count'}; #Extract the necessary information from the rows and columns and places them in three arrays @right @legal and @prod foreach my $row ( 1 .. $Tot_Rows ) { foreach my $col (1) { # skip empty cells next unless defined $Sheet->Cells( $row, $col )->{'Value'}; $val = $Sheet->Cells( $row, $col )->{'Value'}; if ( $val =~ /^\d{10}$/ ) { #Gets column one information foreach my $col1 (1) { next unless defined $Sheet->Cells( $row, $col1 )->{'Value'}; $c1 = $Sheet->Cells( $row, $col1 )->{'Value'}; push @right, $c1; push @legal, $c1; push @prod, $c1; #Gets region from column 2 foreach my $col4 (4) { next unless defined $Sheet->Cells( $row, $col4 )->{'Value'}; $c4 = $Sheet->Cells( $row, $col4 )->{'Value'}; push @right, $c4; push @legal, $c4; push @prod, $c4; } #Gets column 3 information foreach my $col5 (5) { next unless defined $Sheet->Cells( $row, $col5 )->{'Value'}; $c5 = $Sheet->Cells( $row, $col5 )->{'Value'}; push @right, $c5; push @legal, $c5; push @prod, $c5; } #Get the description that needs to be processed later foreach my $col6 (6) { next unless defined $Sheet->Cells( $row, $col6 )->{'Value'}; $c6 = $Sheet->Cells( $row, $col6 )->{'Value'}; push @legal, $c6; } #Gets area foreach my $col7 (7) { next unless defined $Sheet->Cells( $row, $col7 )->{'Value'}; $c7 = $Sheet->Cells( $row, $col7 )->{'Value'}; push @right, $c7; } #Gets column 8 information foreach my $col8 (8) { next unless defined $Sheet->Cells( $row, $col8 )->{'Value'}; $c8 = $Sheet->Cells( $row, $col8 )->{'Value'}; push @right, $c8; } #Gets column 9 foreach my $col9 (9) { next unless defined $Sheet->Cells( $row, $col9 )->{'Value'}; $c9 = $Sheet->Cells( $row, $col9 )->{'Value'}; push @right, $c9; } #and so on.... foreach my $col10 (10) { next unless defined $Sheet->Cells( $row, $col10 )->{'Value'}; $c10 = $Sheet->Cells( $row, $col10 )->{'Value'}; push @right, $c10; push @legal, $c10; } #same foreach my $col11 (11) { next unless defined $Sheet->Cells( $row, $col11 )->{'Value'}; $c11 = $Sheet->Cells( $row, $col11 )->{'Value'}; push @right, $c11; push @legal, $c11; } #same foreach my $col12 (12) { $c12 = $Sheet->Cells( $row, $col12 )->{'Value'}; push @right, $c12; push @prod, $c12; } #same foreach my $col13 (13) { $c13 = $Sheet->Cells( $row, $col13 )->{'Value'}; push @prod, $c13; } #same foreach my $col14 (14) { $c14 = $Sheet->Cells( $row, $col14 )->{'Value'}; push @prod, $c14; } #same foreach my $col15 (15) { $c15 = $Sheet->Cells( $row, $col15 )->{'Value'}; push @prod, $c15; } #same foreach my $col17 (17) { $c17 = $Sheet->Cells( $row, $col17 )->{'Value'}; push @prod, $c17; } #same foreach my $col18 (18) { $c18 = $Sheet->Cells( $row, $col18 )->{'Value'}; push @prod, $c18; } #same foreach my $col20 (20) { $c20 = $Sheet->Cells( $row, $col20 )->{'Value'}; push @prod, $c20; } #same foreach my $col21 (21) { $c21 = $Sheet->Cells( $row, $col21 )->{'Value'}; push @prod, $c21; } #same foreach my $col22 (22) { $c22 = $Sheet->Cells( $row, $col22 )->{'Value'}; push @prod, $c22; } #same foreach my $col23 (23) { $c23 = $Sheet->Cells( $row, $col23 )->{'Value'}; push @prod, $c23; } #same foreach my $col24 (24) { $c24 = $Sheet->Cells( $row, $col24 )->{'Value'}; push @prod, $c24; } #same foreach my $col25 (25) { $c25 = $Sheet->Cells( $row, $col25 )->{'Value'}; push @prod, $c25; } #same foreach my $col26 (26) { $c26 = $Sheet->Cells( $row, $col26 )->{'Value'}; push @prod, $c26; } #same foreach my $col27 (27) { $c27 = $Sheet->Cells( $row, $col27 )->{'Value'}; push @prod, $c27; } #32-39 production data foreach my $col32 (32) { $c32 = $Sheet->Cells( $row, $col32 )->{'Value'}; push @prod, $c32; } foreach my $col33 (33) { $c33 = $Sheet->Cells( $row, $col33 )->{'Value'}; push @prod, $c33; } foreach my $col34 (34) { $c34 = $Sheet->Cells( $row, $col34 )->{'Value'}; push @prod, $c34; } foreach my $col35 (35) { $c35 = $Sheet->Cells( $row, $col35 )->{'Value'}; push @prod, $c35; } foreach my $col36 (36) { $c36 = $Sheet->Cells( $row, $col36 )->{'Value'}; push @prod, $c36; } foreach my $col37 (37) { $c37 = $Sheet->Cells( $row, $col37 )->{'Value'}; push @prod, $c37; } foreach my $col38 (38) { $c38 = $Sheet->Cells( $row, $col38 )->{'Value'}; push @prod, $c38; } foreach my $col39 (39) { $c39 = $Sheet->Cells( $row, $col39 )->{'Value'}; push @prod, $c39; } } } } } } $Book->Close; } ################################################################################ #Prints of the created arrays #print "@right\n"; #print "@legal\n"; #print "@prod\n"; ################################################################################ #################################################################################### #the process that eliminates duplicates in the @right array based on criteria #makes a copy of the array as it is destroyed in the next while statement, This is maybe one of the problems my @rig = @right; #copy of array my @righ; # a new array that is the result of the while statement below my %seen; # a hash that stores agreementnumbers for unique entries #The array is in exact sets of 9 strings and i want it spliced a intervals of exactly those intervals while ( my ( $m, $n, $o, $p, $q, $r, $s, $t, $y ) = splice( @right, 0, 9 ) ) { last if $m !~ /^\d{10}$/; next if $seen{$m}++; if ( $n =~ /Specific_region/ ) { if ( $o =~ /NG/ ) { push @righ, $m, $o, $p, $q, $r, $s, $t; } } } # print "@righ\n"; my @leg; #an array that holds the result for the next while statement my @spli; #used as a container for certain entries in the while statement my @joi; #another middle of equation array for picking up results %seen = () ; #emties the previous hash as the uniqueness of entries is also important here my @tes; #yet another array for picking up results ################################################################################ #the process that insures that each returned value is printed in the correct #form and coupled with the ten digit number #again the array is organized in 6 values in a row that need to be seperated out into rows. while ( my ( $h, $aa, $rr, $j, $k, $l ) = splice( @legal, 0, 6 ) ) { last if $h !~ /^\d{10}$/; if ( $aa =~ /Specific_region/ ) { if ( $rr =~ /NG/ ) { if ( $j =~ /\n/ ) { next if $seen{$h}++; my @spli = split( /\n/, $j ); foreach my $n (@spli) { if ( $n =~ /LSD/ ) { my @tes = split( /LSD/, $n ); foreach my $lon (@tes) { if ( $lon =~ /SEC/ ) { my @joi = split( /-|W|:|\s|,|\(/, $lon ); my $chans = @joi; my $eleg = join( "", @joi[ 0, 2, 1, $chans - 1 ] ); push @leg, $eleg, $h, $k, $l; } } } elsif ( $n =~ /\(/ ) { my @joi = split( /-|W|:|\s|\(/, $n ); my $chans = @joi; my $eleg = join( "", @joi[ 0, 2, 1, $chans - 2 ] ); push @leg, $eleg, $h, $k, $l; } else { my @joi = split( /-|W|:|\s/, $n ); my $chans = @joi; my $eleg = join( "", @joi[ 0, 2, 1, $chans - 1 ] ); push @leg, $eleg, $h, $k, $l; } } } else { next if $seen{$h}++; my @joi = split( /-|W|:|\s/, $j ); my $chans = @joi; my $eleg = join( "", @joi[ 0, 2, 1, $chans - 1 ] ); push @leg, $eleg, $h, $k, $l; } } } } #print "@leg\n"; my @peg; foreach my $loma (@leg) { if ( $loma =~ /^\d{7}$/ ) { substr( $loma, 6, 0, 0 ); push @peg, $loma; } else { push @peg, $loma; } } #################################################################################### #the process that creates the production array for the entries my @produ; while ( my ( $cp, $aaa, $rrr, $dp, $ep, $fp, $gp, $hp, $ip, $jp, $kp, $lp, $mp, $np, $op, $pp, $qp, $rp, $sp, $tp, $up, $vp, $wp, $yp, $xp ) = splice( @prod, 0, 25 ) ) { last if $cp !~ /^\d{10}$/; if ( $aaa =~ /Specific_region/ ) { if ( $rrr =~ /NG/ ) { unless ( $dp =~ /a specific repeated text for all relevant entries/ ) { #eliminate this if statement for option two, where entries with no actual production is included if ( defined($qp) && $qp =~ /\d\d-\d\d-\d{4}/ ) { push @produ, $dp, $ep, $cp, $fp, $gp, $hp, $ip, $jp, $kp, $lp, $mp, $np, $op, $pp, $qp, $rp, $sp, $tp, $up, $vp, $wp, $yp, $xp; } } } } } # print "@produ\n"; my @nwells; #an array that collects the results #creates the 4.2 entries "agreements with no wells while ( my ( $mn, $nn, $on, $pn, $qn, $rn, $sn, $tn, $yn ) = splice( @rig, 0, 9 ) ) { last if $mn !~ /^\d{10}$/; if ( $nn =~ /Specific_region/ ) { if ( $on =~ /NG/ ) { if ( $yn =~ /a specific repeated text for all relevant entries/ ) { push @nwells, $mn, $on, $pn, $qn, $rn, $sn, $tn, $yn; } } } } # print "@nwells\n"; #Places results into arrays of arrays for easy computation in excel. uses natatime again the arrays # are of a specific size. per row. my @AAR; { my $iter = natatime 7, @righ; while ( my @tmp = $iter->() ) { push @AAR, \@tmp; } } my @BAR; { my $iter = natatime 4, @peg; while ( my @tmp = $iter->() ) { push @BAR, \@tmp; } } my @CAR; { my $iter = natatime 23, @produ; while ( my @tmp = $iter->() ) { push @CAR, \@tmp; } } my @DAR; { my $iter = natatime 8, @nwells; while ( my @tmp = $iter->() ) { push @DAR, \@tmp; } } #####The new excel sheets that should contain the results my $workbooknew = Excel::Writer::XLSX->new('re1.xlsx'); my $worksheetnew = $workbooknew->add_worksheet(); $worksheetnew->keep_leading_zeros(); $worksheetnew->set_column( 'A:G', 30 ); $worksheetnew->write_col( 'A2', \@AAR ); ####This is the data on the legalnumber - introduce keep_leading zeroes for correct legal form my $workbooknew1 = Excel::Writer::XLSX->new('re2.xlsx'); my $worksheetnew1 = $workbooknew1->add_worksheet(); $worksheetnew1->keep_leading_zeros(); $worksheetnew1->set_column( 'A:D', 15 ); $worksheetnew1->write_col( 'A2', \@BAR ); # # ####This is the data on the production of the wells - introduce keep_leading zeroes for correct legal form my $workbooknew2 = Excel::Writer::XLSX->new('re3.xlsx'); my $worksheetnew2 = $workbooknew2->add_worksheet(); my $worksheetnew3 = $workbooknew2->add_worksheet(); $worksheetnew2->set_column( 'A:W', 50 ); $worksheetnew2->write_col( 'A2', \@CAR ); $worksheetnew3->set_column( 'A:H', 30 ); $worksheetnew3->write_col( 'A2', \@DAR );