use strict;
use Benchmark;
use File::Find ();
# this part is unnecessary, but find2perl made it up, so I copied it:
use vars qw/*name *dir *prune/;
*name = *File::Find::name;
*dir = *File::Find::dir;
*prune = *File::Find::prune;
timethese( 10, {
'File::Find' => \&find2perl,
'Shell:Find' => \&shellfind,
});
my @found; # I'm not using this for anything at present
sub find2perl {
@found = ();
File::Find::find({wanted => \&wanted}, '.'); # made by find2perl
}
sub wanted { # made by find2perl
my ($dev,$ino,$mode,$nlink,$uid,$gid);
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && -d _
&& push @found, $_;
}
sub shellfind {
@found = `find . -type d`;
}
__END__
# OUTPUT:
Benchmark: timing 10 iterations of File::Find, Shell:Find...
File::Find: 27 wallclock secs (20.87 usr 4.01 sys + 0.01 cusr 0.00 csys = 24.89 CPU) @ 0.40/s (n=10)
Shell:Find: 5 wallclock secs ( 0.21 usr 0.01 sys + 1.28 cusr 3.36 csys = 4.86 CPU) @ 45.45/s (n=10)
# I printed scalar(@found) in one test, and these results
# were obtained where there were over 6K directories under "."
####
# don't use File::Find;
##
##
#!/usr/bin/perl
# Program: find-new-files.perl
# Purpose: initialize and maintain a record of files in a
# directory tree
# Written by: dave graff
# If a file called "paths.logged" does not exist in the cwd, we create
# one, and treat all contents under cwd as "new". If "paths.logged"
# already exists, we find directories with modification dates more
# recent than this file, and treat only these as "new".
# For each "new" directory, assume a file.manifest is there (create an
# empty one if there isn't one), and diff that file against the current
# inventory of data files, storing all new files to an array.
# Of course, this will fail in all paths where the current user does
# not have write permission, but such paths can be avoided by adding
# a suitable condition to the first "find" command.
use strict;
my $path_log = "paths.logged";
my ($list_name,$new_list) = ("file.manifest","new.manifest");
my $new_flag = ( -e $path_log ) ? "-newer $path_log" : "";
my @new_dirs = `find . -type d $new_flag`;
# add "-user uname" and/or "-group gname" to avoid directories where
# the current user might not have write permission
my $diff_cmd =
"cd 'THISPATH' && touch $list_name && ".
"find . -type f -maxdepth 1 | tee $new_list | diff - $list_name | grep '<'";
# the shell functions in $diff_cmd will:
# - chdir to a given path,
# - create file.manifest there if it does not yet exist,
# - find data files in that path (not subdirs, not files in subdirs),
# - create a "new.manifest" file containing this current file list,
# - diff the new list of files against the existing file.manifest,
# - return only current files not found in the existing manifest.
# since it's a sub-shell, the chdir is forgotten when the sub-shell is done.
open( OUT, ">new-file-path.list" );
foreach my $path ( @new_dirs ) {
chomp $path;
my $cmd = $diff_cmd;
$cmd =~ s{THISPATH}{$path}g;
# the output of the shell command needs to be conditioned to have the
# path string prepended to each file name (we can leave the new-line
# in place at the end of the name):
print OUT join "", map { s{^< \.(.*)}{$path$1}; $_ } ( `$cmd` );
# replace the old manifest:
rename "$path/$new_list", "$path/$list_name" or
warn "failed to update $path/$list_name\n";
}
close OUT;
`touch $path_log`;