#!/usr/bin/env perl # vowel_count - count all the vowels in each word, both total and distinct # # reads from DATA if no input supplied # # Tom Christiansen # Sun Jun 12 22:23:02 MDT 2011 use v5.12; use utf8; use strict; use autodie; use warnings; use warnings qw< FATAL utf8 >; use open qw< :std :utf8 >; use charnames qw< :full >; use feature qw< unicode_strings >; use File::Basename qw< basename >; use Carp qw< carp croak confess cluck >; use Unicode::Normalize qw< NFD NFC NFKD NFKC >; ####################################### sub main ( ) ; sub ARGCOUNT ( ) ; sub clear_traps ( ) ; sub count_distinct_vowels ( _ ) ; sub count_total_vowels ( _ ) ; sub deQ ( $ ) ; sub deQQ ( $ ) ; sub dequeue ( $$ ) ; sub filter ( ) ; sub inits ( ) ; sub NOT_REACHED ( ) ; sub oops ( @ ) ; sub panic ( @ ) ; sub set_traps ( ) ; ####################################### our $Errors = 0; ####################################### main(); exit($Errors > 0 ? 1 : 0); ####################################### sub main() { set_traps(); inits(); run_filter(); } ####################################### sub run_filter() { local *next_line = (@ARGV == 0 && -t STDIN) ? sub { scalar } : sub { scalar }; while (my $line = next_line()) { for my $word (split " ", $line) { printf "%-26s : ", NFC($word); printf "%2d vowels total, ", count_total_vowels($word); printf " %d distinct\n", count_distinct_vowels($word); } } } ####################################### sub oops(@) { @_ > 0 || ARGCOUNT(); our $Errors; my $msg = "@_"; $msg =~ s/[^\n]\K\z/\n/; print STDERR "$0: $msg"; $Errors++; } sub panic(@) { confess "$0: INTERNAL ERROR: @_"; } sub NOT_REACHED() { panic("NOT REACHED"); } sub ARGCOUNT() { panic("wrong arguments to function"); } sub inits() { END { eval { close STDOUT || oops "couldn't close STDOUT: $!"; } } $SIG{PIPE} = sub { exit }; ## if (grep /\P{ASCII}/ => @ARGV) { ## @ARGV = map { decode("UTF-8", $_) } @ARGV; ## } $0 = basename($0); # shorter messages $| = 1; binmode(DATA, ":utf8"); } sub set_traps() { $SIG{__DIE__} = sub { confess "Uncaught exception: @_" unless $^S; }; $SIG{__WARN__} = sub { if ($^S) { cluck "Trapped warning: @_" } else { confess "Deadly warning: @_" } }; } sub clear_traps() { @SIG{ <__{WARN,DIE}__> } = ( ); } sub dequeue($$) { my($leader, $body) = @_; $body =~ s/^\s*\Q$leader\E ?//gm; return $body; } sub deQ($) { my $text = $_[0]; return dequeue q<|Q|>, $text; } sub deQQ($) { my $text = $_[0]; return dequeue qq<|QQ|>, $text; } UNITCHECK { my @vowels = ( "\N{LATIN SMALL LETTER A}", "\N{LATIN SMALL LETTER AE}", # no decomposition "\N{LATIN SMALL LETTER E}", "\N{LATIN SMALL LETTER I}", "\N{LATIN SMALL LETTER O}", "\N{LATIN SMALL LETTER O WITH STROKE}", # no decomposition "\N{LATIN SMALL LIGATURE OE}", # no decomposition "\N{LATIN SMALL LETTER U}", "\N{LATIN SMALL LETTER Y}", ); my %vowel_counter; for my $vowel (@vowels) { my $code = deQ(<<'START') . deQQ(<<"COUNT") . deQ(<<'FINISH') ; |Q| |Q| sub (_) { |Q| @_ == 1 || ARGCOUNT(); |Q| my $string = shift; |Q| my $count = $string =~ START |QQ| |QQ| tr/$vowel//; |QQ| COUNT |Q| return $count; |Q| } |Q| FINISH $vowel_counter{ $vowel } = eval $code || die "compile failed: $code $@"; } sub count_total_vowels(_) { local $_ = lc NFKD(shift()); state $vowels = join(q(), @vowels); return eval qq{ y/$vowels// }; } sub count_distinct_vowels(_) { my $string = lc NFKD(shift()); my $sum = 0; for my $code (values %vowel_counter) { $sum++ if $code->($string); } return $sum; } } # end UNITCHECK __END__