use strict; use warnings; use Carp qw( croak ); sub avoid_utf8_internally { my ($s) = @_; utf8::downgrade($s, 1) or croak("Non-bytes found in input"); return $s; } sub use_utf8_internally { my ($s) = @_; utf8::upgrade($s); return $s; } my $file_num; for my $s ( avoid_utf8_internally("bj\x{f6}rk"), use_utf8_internally("bj\x{f6}rk"), "b" x 1000, ) { my $packed = pack("V/a*", $s); printf("%s -> %s\n", length($s), length($packed)); open(my $fh, '>', 'packed'.++$file_num) or die; binmode $fh; # No crlf mucking. print $fh $packed; } #### >perl script.pl 5 -> 9 5 -> 9 1000 -> 1004 >debug packed1 -rcx CX 0009 : -d100 l9 0B14:0100 05 00 00 00 62 6A F6 72-6B ....bj.rk -q >debug packed2 -rcx CX 0009 : -d100 l9 0B14:0100 05 00 00 00 62 6A F6 72-6B ....bj.rk -q >debug packed3 -rcx CX 03EC : -d100 3EC 0B14:0100 E8 03 00 00 62 62 62 62-62 62 62 62 62 62 62 62 ....bbbbbbbbbbbb 0B14:0110 62 62 62 62 62 62 62 62-62 62 62 62 62 62 62 62 bbbbbbbbbbbbbbbb ... 0B14:04D0 62 62 62 62 62 62 62 62-62 62 62 62 62 62 62 62 bbbbbbbbbbbbbbbb 0B14:04E0 62 62 62 62 62 62 62 62-62 62 62 62 bbbbbbbbbbbb -q #### >debug packed1 -rcx CX 0009 : -d100 l9 0B14:0100 05 00 00 00 62 6A F6 72-6B ....bj.rk -q >debug packed2 -rcx CX 0009 : -d100 l9 0B14:0100 05 00 00 00 62 6A C3 B6-72 ....bj..r -q