use strict; use Encode; binmode STDOUT, ":utf8"; my %test_sets = ( normal => [ 0x40 .. 0x7f ], # normal ascii range of 64 characters # puta some surrogate data on a record boundary: oksplit => [ 0x40 .. 0x5e, 0xd801, 0xdc01, 0x61 .. 0x7f ], # good surrog. pair danglehi => [ 0x40 .. 0x5e, 0xd801, 0x60 .. 0x7f ], # bad: missing Lo surrog. danglelo => [ 0x40 .. 0x5e, 0xdc01, 0x60 .. 0x7f ], # bad: missing Hi surrog. invsplit => [ 0x40 .. 0x5e, 0xdc01, 0xd801, 0x61 .. 0x7f ], # two surrog. errors # same as above, but not on a record boundary: okmid => [ 0x40 .. 0x4e, 0xd801, 0xdc01, 0x51 .. 0x7f ], # good surrog. pair strandhi => [ 0x40 .. 0x4e, 0xd801, 0x50 .. 0x7f ], # bad: missing Lo surrog. strandlo => [ 0x40 .. 0x4e, 0xdc01, 0x50 .. 0x7f ], # bad: missing Hi surrog. invmid => [ 0x40 .. 0x4e, 0xdc01, 0xd801, 0x51 .. 0x7f ], # two surrog. errors ); for my $type ( qw/normal okmid oksplit strandhi strandlo invmid danglehi danglelo invsplit/ ) { warn "\nRunning test on $type;\n"; print "\nRunning test on $type:\n"; my $string = pack( 'v*', @{$test_sets{$type}} ); my $u = ''; { open my $fh, "<", \$string or die $!; my $pass = 1; $_ = ''; while ( read( $fh, $_, 64, length())) { eval { $u .= decode( "UTF-16LE", $_, Encode::FB_WARN ) }; if ( $@ ) { warn sprintf( "on pass %d: %s; leaving %d bytes: \n", $pass, $@, length(), join( " ", unpack( "v*", $_ ))); } $pass++; } } print "\n$u\n"; }