use 5.020; # Heuristically "fix" a broken string use strict; use warnings; use utf8; use Encode qw/encode decode/; my $chars = 'абвд'; my $bytes = encode('UTF-8',$chars); my $mixed_pickles = "$chars $bytes $chars $bytes"; say "Before: ", encode('UTF-8',$mixed_pickles); my $utf8_decodable_regex = qr/[\xC0-\xDF][\x80-\xBF] | # 2 bytes unicode char [\xE0-\xEF][\x80-\xBF]{2} | # 3 bytes unicode char [\xF0-\xFF][\x80-\xBF]{3}/x; $mixed_pickles =~ s/($utf8_decodable_regex)/ decode('UTF-8',$1,Encode::FB_CROAK | Encode::LEAVE_SRC)/gex; say "After: ", encode('UTF-8',$mixed_pickles);