sub utf8_escape_2 { my $s = shift; $s =~ s{ ( [\xc2-\xdf] [\x80-\xbf] | [\xe0-\xef] [\x80-\xbf]{2} | [\xf0-\xf4] [\x80-\xbf]{3} ) } { my @c = map ord, split //, $1; '\\u' . ( ( 2 == @c ) ? sprintf( '%04x', ( ( 0b00011111 & $c[0]) << 6 ) | ( 0b00111111 & $c[1] ) ) : ( 3 == @c ) ? sprintf( '%04x', ( ( 0b00001111 & $c[0] ) << 12 ) | ( ( 0b00111111 & $c[1] ) << 6 ) | ( 0b00111111 & $c[2] ) ) : 'fffd' ); }xemsg; $s =~ tr/\x80-\xff//d; # invalid utf8 return $s; }