This doesn't help this questioner, but here's what I use for removing Unicode accents:
use 5.008;
use charnames ();
sub deaccent {
# split it into characters, then loop through them converting one
+by one
my @chars = split //, $_[0];
for my $char (@chars) {
# look up the name (e.g. "LATIN SMALL LETTER O WITH TILDE")
my $name = charnames::viacode(ord($char));
# only try to convert it if it was a valid char and had " WITH
+ "
if ($name && $name =~ m/(.*) WITH /) {
# take off the " WITH foo" and see if that is a valid char
my $neword = charnames::vianame("$1");
$char = chr($neword) if $neword;
}
}
return join '', @chars;
}