Here's my refactored code. My intention was to make it readily adaptable to threading. The intended 'worker' subroutine is probe_volume(). I've probably missed the mark entirely, but with guidance from you and other kind monks, I'm hoping I can finally write my first truly useful parallel program.
#!perl
#
# CountFilesRecords.pl
use strict;
use warnings;
use Capture::Tiny qw( capture_stdout );
use English qw( -no_match_vars );
use File::Glob qw( bsd_glob );
use Text::CSV_XS;
@ARGV or die "Usage: perl $PROGRAM_NAME <export volume folder> ...\n";
# Expand globs...
local @ARGV = map { $ARG =~ tr{\\}{/}; bsd_glob($ARG) } @ARGV;
local $OUTPUT_RECORD_SEPARATOR = "\n";
local $OUTPUT_AUTOFLUSH = 1;
my @CSV_FIELD_LABELS = qw(
ExportVolumeFolder
TotalDATRecords
TotalTextFiles
TotalLFPRecords
TotalImageFiles
);
for my $volume_folder (@ARGV) {
-d $volume_folder
or die "Export volume folder $volume_folder doesn't exist\n";
}
my @volume_folders;
my %stuff_by;
VOLUME_FOLDER:
for my $volume_folder (@ARGV) {
my $volume_name = (split m{/}, $volume_folder)[-1];
my $text_folder = "$volume_folder/TEXT";
my $images_folder = "$volume_folder/IMAGES";
my $dat_file = "$volume_folder/$volume_name.dat";
my $lfp_file = "$volume_folder/$volume_name.lfp";
# Check for completed export volumes, report incomplete ones...
unless (-d $text_folder && -d $images_folder && -f $dat_file && -f
+ $lfp_file) {
select STDERR;
print $volume_folder;
select STDOUT;
next VOLUME_FOLDER;
}
push @volume_folders, $volume_folder;
$stuff_by{$volume_folder} = {
FOLDER_NAME => $volume_folder,
TEXT_FILES => {
COMMAND => qq( find "$text_folder" -type f -name "*.txt" |
+ wc -l ),
COUNT => 0,
},
IMAGE_FILES => {
COMMAND => qq( find "$images_folder" -type f ! -name Thumb
+s.db | wc -l ),
COUNT => 0,
},
DAT_RECORDS => {
COMMAND => qq( wc -l "$dat_file" ),
COUNT => 0,
},
LFP_RECORDS => {
COMMAND => qq( wc -l "$lfp_file" ),
COUNT => 0,
},
};
}
# Quit if there are no completed export volume folders...
exit 1 unless @volume_folders;
my $csv = Text::CSV_XS->new();
# Print CSV header...
$csv->print(\*STDOUT, \@CSV_FIELD_LABELS);
for my $volume_folder (@volume_folders) {
# Print CSV record...
$csv->print(\*STDOUT, probe_volume($stuff_by{$volume_folder}));
}
exit 0;
sub probe_volume {
my $vol = shift;
for my $stuff (qw( TEXT_FILES IMAGE_FILES DAT_RECORDS LFP_RECORDS
+)) {
(undef, $vol->{$stuff}{COUNT})
= capture_stdout { count_stuff($vol->{$stuff}{COMMAND}) };
}
# The first line of every DAT file is a header
$vol->{DAT_RECORDS}{COUNT}--;
return [
$vol->{FOLDER_NAME},
$vol->{DAT_RECORDS}{COUNT},
$vol->{TEXT_FILES}{COUNT},
$vol->{LFP_RECORDS}{COUNT},
$vol->{IMAGE_FILES}{COUNT}
];
}
sub count_stuff {
my $command = shift;
my $output = qx( $command );
my ($count) = $output =~ m/(\d+)/;
return $count;
}