use strict;
use warnings;
use Devel::Peek qw( Dump );
use Inline C => <<'__EOI__';
void buggy_assign(SV* dsv, SV* ssv) {
SvSetSV_nosteal(dsv, ssv);
/* Should call SvSetMagicSV_nosteal(dsv, ssv); instead
* or should call SvSETMAGIC(dsv); afterwards.
*/
}
void set_magic(SV* sv) {
SvSETMAGIC(sv);
}
__EOI__
my $txt_de = "K\N{U+00E4}se";
my $txt_ru = "\N{U+041C}\N{U+043E}\N{U+0441}\N{U+043A}\N{U+0432}\N{U+0
+430}";
# \N should have done this already.
utf8::upgrade($_) for $txt_de, $txt_ru;
my $txt;
buggy_assign($txt, $txt_de);
print(length($txt), "\n"); Dump($txt);
buggy_assign($txt, $txt_ru);
print(length($txt), "\n"); Dump($txt);
set_magic($txt);
print(length($txt), "\n"); Dump($txt);
4
SV = PVMG(0x81af530) at 0x817bca0
REFCNT = 1
FLAGS = (PADMY,SMG,POK,pPOK,UTF8)
IV = 0
NV = 0
PV = 0x8177b98 "K\303\244se"\0 [UTF8 "K\x{e4}se"]
CUR = 5
LEN = 8
MAGIC = 0x82f0d98
MG_VIRTUAL = &PL_vtbl_utf8
MG_TYPE = PERL_MAGIC_utf8(w)
MG_LEN = 4
4
SV = PVMG(0x81af530) at 0x817bca0
REFCNT = 1
FLAGS = (PADMY,SMG,POK,pPOK,UTF8)
IV = 0
NV = 0
PV = 0x844b718 "\320\234\320\276\321\201\320\272\320\262\320\260"\0
+[UTF8 "\x{41c}\x{43e}\x{441}\x{43a}\x{432}\x{430}"]
CUR = 12
LEN = 16
MAGIC = 0x82f0d98
MG_VIRTUAL = &PL_vtbl_utf8
MG_TYPE = PERL_MAGIC_utf8(w)
MG_LEN = 4
6
SV = PVMG(0x81af530) at 0x817bca0
REFCNT = 1
FLAGS = (PADMY,SMG,POK,pPOK,UTF8)
IV = 0
NV = 0
PV = 0x844b718 "\320\234\320\276\321\201\320\272\320\262\320\260"\0
+[UTF8 "\x{41c}\x{43e}\x{441}\x{43a}\x{432}\x{430}"]
CUR = 12
LEN = 16
MAGIC = 0x82f0d98
MG_VIRTUAL = &PL_vtbl_utf8
MG_TYPE = PERL_MAGIC_utf8(w)
MG_LEN = 6
The first one works because length hasn't placed the magic yet.
SV = PV(0x816a0c0) at 0x817bca0
REFCNT = 1
FLAGS = (PADMY,POK,pPOK,UTF8)
PV = 0x8177b98 "K\303\244se"\0 [UTF8 "K\x{e4}se"]
CUR = 5
LEN = 8
Assigning to $txt — your workaround — works because assignment properly calls SvSETMAGIC, which clears the precomputed length of the string, thus forcing the next call to length to recalculate it.
SV = PVMG(0x81af570) at 0x817bca0
REFCNT = 1
FLAGS = (PADMY,SMG,POK,pPOK)
IV = 0
NV = 0
PV = 0x8177b98 ""\0
CUR = 0
LEN = 8
MAGIC = 0x831d6a0
MG_VIRTUAL = &PL_vtbl_utf8
MG_TYPE = PERL_MAGIC_utf8(w)
MG_LEN = -1
|