use strict; use warnings; use diagnostics; use Text::CSV_XS; use File::Find; use File::Slurp qw(edit_file); use List::Util qw(uniq); my $spreadsheet = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); # Consume the first argument (the spreadsheet input) and push its # contents to an array of arrays. open my $fh, "<", $ARGV[0]; my @table; while (my $row = $spreadsheet->getline($fh)) { push @table, $row; } close $fh; # Sort the array-of-arrays by the length of the # first element of each subarray (our 'keys') such # that the longest key comes first, and the # shortest comes last. Then sort alphabetically. @table = sort { length $b->[0] <=> length $a->[0] || $a->[0] cmp $b->[0] } @table; # CSVs are received from another department with # three columns, where the second denotes part of # speech. This isn't needed for this operation, so # discard it. for my $row (@table) { splice(@$row,1,1); $row->[0] = quotemeta($row->[0]); } my @rows_to_cull = (); # Pairwise comparison of rows to detect duplicate keys. # Sort having been performed earlier, only pairwise comparisons # of adjacent rows will result in matches. for my $row_a (0..$#table) { my $key = $table[$row_a]->[0]; my @values = $table[$row_a]->[1]; for my $row_b ($row_a+1..$#table) { if ( $table[$row_a]->[0] eq $table[$row_b]->[0] ) { push @values, $table[$row_b]->[1]; $table[$row_a]->[1] = \@values; push @rows_to_cull, $row_b; } else { last; } } } # Convert the array to a hash to ensure uniqueness, # then back to an array for traversal, sorting the # array in descending numeric order so removal of # earlier rows doesn't interfere with indexing for # later rows. my %cull_hash = map { $_ => 1 } @rows_to_cull; my @cull_array = keys %cull_hash; @cull_array = sort { $b <=> $a } @cull_array; for (@cull_array) { splice(@table,$_,1); } # Loop which ensures the uniqueness of elements in the # replacement candidate arrays. for my $row (0..$#table) { my $replacement = $table[$row]->[1]; if (ref($replacement) eq 'ARRAY' && length $replacement > 1) { my @unique = uniq(@$replacement); $table[$row]->[1] = \@unique; } } # The following takes the second argument from the # command line as the directory to operate on. It # also identifies a filter subroutine to exclude # non-documentation files from the checks, and a # subroutine that will actually do the search-and-replace. find({ preprocess => \&filter, wanted => \&search_and_replace }, $ARGV[1] ); sub filter { return grep { -d or (-f and ( /\.txt$/ or /\.rst$/ or /\.yaml$/))} @_; } # Main loop sub search_and_replace { open my $target_file, "<", $_; $| = 1; my $filename = $_; while (my $target_string = <$target_file>) { for my $row (@table) { my $search = $row->[0]; my $replacement = $row->[1]; if ((lc $target_string) =~ (lc $search)) { print "Found $search in $filename in the following context:\n"; print "$target_string\n"; if (ref($replacement) eq 'ARRAY' && length $replacement > 1) { print "Choose a replacement candidate by typing the appropriate number, or else type '0' to skip.\n"; my $count = 1; for my $value (@$replacement) { print "$count\) $value\n"; $count++; } my $choice = ; if ($choice >= 1 && $choice <= (length $replacement)) { my $replace_choice = $replacement->[$choice]; edit_file { s/$search/$replace_choice/gi} $filename; } else { print "Skipping this occurrence without replacing.\n"; } } else { print "Confirm replacement with $replacement by typing y, or skip by typing any other key.\n"; my $choice = ; if ($choice eq 'y' || 'Y') { edit_file { s/$search/$replacement/gi } $filename; } else { print "Skipping this occurrence without replacing.\n"; } } } } } close $target_file; }