Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

Searching all Excel files in a directory

by starchild (Sexton)
on Feb 21, 2005 at 23:43 UTC ( #433192=snippet: print w/replies, xml ) Need Help??
Description: This is a snippet of code that searches all Excel files in a directory for certain pieces of data. If any matches are found, their location is saved in a log file. It uses the technique described here to reduce memory consumption. This has come in quite handy at work, where we receive Excel files on a daily basis and occasionally need to find out which file some piece of information resides in.

Update: Based on a suggestion, I changed the code to use the qr// operator, so that the regular expressions are compiled only once, instead of every time a cell is searched.

use strict;
use warnings;
use File::Basename;
use Spreadsheet::ParseExcel;

$| = 1;

my $excelDir = "c:/documents and settings/administrator/my documents/m
+iddle_earth";

my @searchTerms = ( qr/FRODO/, qr/SAM/, qr/ARWEN/, qr/ARAGORN/ );

#maps column number to column letter as used by Excel
my @columnMap = ('A'..'ZZ');

#any matches found are printed to log file
my $logName = "excel_search.log";
open(my $fh, ">$logName") or die "can't open $logName: $!";

my $parse_excel = Spreadsheet::ParseExcel->new(
  CellHandler => \&cell_handler,
  NotSetCell => 1
);

#escape blanks in the excel directory path
$excelDir =~ s/ /\\ /g;

my $fileMatches;
my $totalMatches = 0;

foreach my $file (glob("$excelDir/*.xls")) {
  $fileMatches = 0;
  print "Searching " . basename($file) . "... ";

  $parse_excel->Parse($file);

  print $fileMatches, " matches found.\n\n";
  $totalMatches += $fileMatches;
}

print $totalMatches, " total matches were found.  See $logName for det
+ails\n";

close($fh);


sub cell_handler {
  my ($workbook, $sheet_index, $row, $col, $cell) = @_;

  foreach my $re (@searchTerms) {
    if ($cell->Value =~ $re) {
      #found a match; print out details
      print $fh "File:  ", basename($workbook->{File}), "\n",
        "Sheet: ", $workbook->{Worksheet}[$sheet_index]->{Name}, "\n",
        "Cell: ", $columnMap[$col], ":", $row, "\n",
        "Cell Contents: ", $cell->Value, "\n\n";

      $fileMatches++;
    } #end if
  } #end foreach

}
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: snippet [id://433192]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others chanting in the Monastery: (5)
As of 2021-08-04 10:26 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    My primary motivation for participating at PerlMonks is: (Choices in context)








    Results (41 votes). Check out past polls.

    Notices?