#!/usr/bin/perl use strict; use DBI; use Encode; # set up connection parameters $dbname, $host, $user, $password, then: my $dbh = DBI->connect( "DBI::mysql:database=$dbname;host=$host", $user, $password ); my $sth = $dbh->prepare( "select item_name from table1" ); $sth->execute; my ( $is_wide_utf8, $is_not_utf8, $is_ascii ); while ( my ( $item ) = $sth->fetchrow_array ) { my $nbytes = length( $item ); my $nchars = eval "length( decode( 'utf8', $item, Encode::FB_CROAK ))"; if ( $@ ) { # decode croaked -- text string is not utf8 $is_not_utf8++; # figure out something else to try (maybe shiftjis?) } elsif ( $nchars == $nbytes ) { $is_ascii++; } else { $is_wide_utf8++; } } print "$is_ascii entries found to be ascii-only\n" if ( $is_ascii ); print "$is_wide_utf8 entries found to be utf8\n" if ( $is_wide_utf8); print "$is_not_uft8 entries found to be non-utf8\n" if ( $is_not_utf8 );