#! /usr/local/bin/perl -w ## author: parv, parv UNDERSCORE fm AT emailuser DOT net ## ## date: jan 09 2003 ## ## version: 0.012 ## ## license: ## this software is free to be used in any form only if proper ## credit is given. i am not responsible for any kind of damage or ## loss. use it at your own risk. ## ## name: sanefilename.perl ## ## purpose: ## change characters in file names which are not composed of ## [-_.a-zA-Z0-9] characters. and do other changes; see ## sanitize() sub. ## ## usage: ## sanefilename.perl [-options] file [file2 file3 ...] ## ## for details, issue this command... ## ## sanefilename.perl -usage ## use strict; # pod module to generate, well, usage messages use Pod::Usage; # module to parse options use Getopt::Long qw(:config default require_order); # modules to move & parse file name use File::Basename; use File::Copy; # existent file name handler - sub dispatch table # my %existent = ( # option given to handle existent file names # 'opt' => [ 'notexist' , qr/^(?:ask|force|notexist)$/ ] # based on $existent{opt}->[0], will contain reference # to one of the following subs , 'handle' => undef # TO BE IMPLEMENTED , 'ask' => sub { return; } # force file move , 'force' => sub { my ($path , $old, $new , $exist) = @_; verbose(" '${path}${new}' already exists, will be overwritten...\n") if -e "${path}$new"; move_file($path , $old , $new , $exist); return; } # new file does't exist, so move , 'notexist' => sub { my ($path , $old, $new , $exist) = @_; unless ( -e "${path}$new" ) { move_file($path , $old , $new , $exist); return; } verbose(" '${path}${new}' already exists, '${path}${old}' skipped ...\n"); return; } ); # get & check options # ---- # save existent handling option separately so that it will be easy # to update valid options, $existent{opt}->[1], and related sub # references # ---- my $opt = get_opt( $existent{'opt'} ); # check if any arguments remain which will be file names # pod2usage( '-msg' => " give file name(s) to change to sane version(s)\n" , '-exitval' => 0 , '-verbose' => 0 ) unless scalar @ARGV; # save the appropriate sub reference based on dup option # to save time in foreach() due to double indirection # $existent{'handle'} = $existent{ $existent{'opt'}->[0] }; # regex to specify valid characters # my $char_re = valid_char( $opt->{'lower'} ); foreach (@ARGV) { my ($old_file , $path) = fileparse($_ , ''); # sanity check die " '$path' - not a writable directory, exiting... \n" unless -d $path || -w $path; unless ( -e $path . $old_file ) { verbose(" '${path}${old_file}' doesn't exist, skipped...\n"); next; } # if the old name is OK, skip # if ( $old_file !~ m/$char_re->[1]/ ) { print " '${path}${old_file}' - okay, skipped...\n" if $opt->{'verbose'} >= 3; next; } # calculate new file name my $new_file = sanitize( \$old_file , $char_re , $opt->{lower} ); # very unlikely scenario that _current_ file name will be same # as the calculated one from this _current_ name # if ( $old_file eq $new_file ) { verbose(<<_MSG_); sanitized new name is same as the old. ${path}${old_file} is not moved... _MSG_ next; } # call appropriate sub depending on the -exist option given # $existent{handle}->($path , $old_file , $new_file , $existent{'opt'}->[0] ); } print " ...done\n" if $opt->{verbose}; ## subroutines # calculate new file name from the old sub sanitize { # $old - old name # $char - regex (array ref) specifying valid character set # $lower - option if to exclude upper case letters # my ($old , $char , $lower) = @_; die "sanitize: expecting \$old and\$char to be defined\n" unless defined $old and ref $char eq 'ARRAY'; local $_ = $$old; tr [A-Z] [a-z] if $lower; # remove end non [-_.${alpha}\d] characters # #s/(?: $char->[1] $ | ^ $char->[1] )//x; # change all the "wrong" characters to - # s/$char->[1]/-/g; # prefer - to _ s/(?:_-|-_)/-/go; # prefer . to - or _ s/(?:\.[-_]|[-_]\.)/./go; # minimize the consecutive occurrence of . - _ to one of each # s/([-._]){2,}/$1/go; return $_; } sub move_file { my ($path , $old , $new , $exist) = @_; unless (defined $path and defined $old and defined $new) { verbose("move_file: \$path, \$old, \$new is/are undefined"); return; } $old = $path . $old; $new = $path . $new; my $old_new = sub { printf " '%s' -> '%s'\n", ($old , $new); }; $old_new->() if $opt->{'nomove'} or $opt->{'verbose'} >= 2 or ($opt->{'verbose'} && ($exist eq 'force')); return if $opt->{'nomove'}; move("$old" , "$new") || die "couldn't move '$old' to '$new': $!\n" ; return; } sub valid_char { my $low = shift; # create list to be used as regex in file renaming # my $char = '-_.0-9a-z'; $char .= 'A-Z' unless $low; # return two regexen: latter is the complement of the former # return [ qr/[$char]/ , qr/[^$char]/ ]; } sub get_opt { # existent handling option is saved in this array at [0] my $exist = shift; my %opt = ( # supersedes any option, shows program usage 'usage' => 0 # control amount of output generated , 'verbose' => 2 # check option if names needed to be lowercased , 'lower' => 0 # instead of actual move, only show the new name , 'nomove' => 0 ); # get options GetOptions( 'usage|help' => \$opt{'usage'} , 'exist=s' => \$exist->[0] , 'ask' => sub { $exist->[0] = 'ask' } , 'force' => sub { $exist->[0] = 'force' } , 'notexist|skip' => sub { $exist->[0] = 'notexist' } , 'quiet' => sub { $opt{'verbose'} = 0 } , 'verbose=i' => \$opt{'verbose'} , 'lower' => \$opt{'lower'} , 'nomove' => \$opt{'nomove'} ) || die pod2usage('-exitval' => 2 , '-verbose' => 1); # exit normally is asked for usage # pod2usage('-exitval' => 0 , '-verbose' => 3) if $opt{'usage'}; # die horribly due to wrong option given # pod2usage( '-msg' => " incorrect (existent and/or verbose options were given\n" , '-exitval' => 1 , '-verbose' => 0 ) unless check_opt($exist , $opt{'verbose'}); return \%opt; } sub check_opt { my ($exist , $verbose) = @_; return ( $verbose =~ m/^\d+$/ && $exist->[0] =~ m/$exist->[1]/ ); } sub verbose { warn(@_) if $opt->{'verbose'}; return; } __DATA__ ## start usage documentation =head1 NAME sanefilename.perl - undo odd characters in file names =head1 SYNOPSIS B [-options] F [F F ...] B -usage =head1 DESCRIPTION sanefilename.perl changes characters in file names which are not composed of C<[-_.a-zA-Z0-9]> characters. all the characters not matching C<[-_.a-zA-Z0-9]> are converted to I<->. I<-_> or I<_-> sequence is changed to single I<->. Any sequence of I<.->, I<._>, I<-.>, I<_.> is changed to single I<.>. multiple occurrences of [-_.] are changed to one. =head1 OPTIONS =over =item B<-usage> shows the whole program documentation; supersedes any other option =item B<-nomove> do not actually move the files, just print the old given names and new sanitized ones. =item B<-lower> excludes [A-Z] characters from the [-_.a-zA-Z0-9] character set. =item B<-quiet> same as -verbose=0 =item B<-verbose>=I<0>|I<1>|I<2>|I<3> controls how much output is generated; larger is the number, more the output be. =over =item 0 only fatal error messages are generated =item 1 as 0, plus non-fatal error messages are shown =item 2 default. as 1, plus converted file names are shown =item 3 as 2, plus OK/unchanged old file names are shown =back =item B<-ask> (unimplemented) =item B<-force> =item B<-notexist> | B<-skip> =item B<-exist>=I|I|I|I controls the behaviour when sanitized new file name is as an existing file. if the new sanitized name is same as old given name, that is not considered to be an already existent file. consider there are two files: I and I. the sanitized name for the first file, without the B<-lower> option, would be I. since this soon-to-be file already exists, it creates a good situtation for providing an option. the file name I, without the B<-lower> option, produces the same new name. any further processing or file moving is skipped, except for any messages requested via B<-verbose> option. =over =item ask (unimplemented) an alternative name is asked to enter. =item force old given file is moved to the new calculated name, obliterating an already existing file. =item notexist | skip default. move old file to new name only if it does not exist. =back =back =head1 See Also see these fine perlpods too: =over =item * File::Basename =item * File::Copy =back =head1 Distribution and such parv, parv UNDERSCORE fm AT emailuser DOT net jan 09 2003 version 0.012 this software is free to be used in any form only if proper credit is given. i am not responsible for any kind of damage or loss. use it at your own risk. =cut