#!/usr/bin/perl use strict; use LWP::UserAgent; use HTTP::Request; use Encode; # you need this module binmode STDOUT, ":utf8"; my $ua = LWP::UserAgent->new; my $url = "some_url_that_works_for_you"; my $req = HTTP::Request->new( GET => $url ); my $res = $ua->request( $req ); $txt = decode( 'utf-8', $res->content ); # decode "external" utf8 to "internal" my @accented = ( $txt =~ /(\w*?[^[:ascii:]]\w*)/g ); if ( @accented ) { printf( "found %d words with non-ascii characters.\n", scalar @accented ); my @alphanumerics = grep /^\w+$/, @accented; printf( "of those, %d words match ^\\w+\$:\n ", scalar @alphanumerics ); print join( "\n ", @alphanumerics ),"\n"; my @diacritic_marks = grep /\p{NonspacingMark}/, @accented; printf( "and %d used separate diacritic marks:\n ", scalar @diacritic_marks ); print join( "\n ", @diacritic_marks ), "\n"; }