<?xml version="1.0" encoding="windows-1252"?>
<node id="860243" title="Re^5: length() returns wrong result - suspicious magic" created="2010-09-15 12:19:19" updated="2010-09-15 12:19:19">
<type id="11">
note</type>
<author id="381608">
ikegami</author>
<data>
<field name="doctext">
&lt;p&gt;It is indeed a missing &lt;c&gt;SvSETMAGIC&lt;/c&gt;.

&lt;c&gt;
use strict;
use warnings;

use Devel::Peek qw( Dump );

use Inline C =&gt; &lt;&lt;'__EOI__';

void buggy_assign(SV* dsv, SV* ssv) {
   SvSetSV_nosteal(dsv, ssv);
   /* Should call SvSetMagicSV_nosteal(dsv, ssv); instead
    * or should call SvSETMAGIC(dsv); afterwards.
    */
}

void set_magic(SV* sv) {
   SvSETMAGIC(sv);
}

__EOI__

my $txt_de = "K\N{U+00E4}se";
my $txt_ru = "\N{U+041C}\N{U+043E}\N{U+0441}\N{U+043A}\N{U+0432}\N{U+0430}";

# \N should have done this already.
utf8::upgrade($_) for $txt_de, $txt_ru;

my $txt;

buggy_assign($txt, $txt_de);
print(length($txt), "\n"); Dump($txt);

buggy_assign($txt, $txt_ru);
print(length($txt), "\n"); Dump($txt);

set_magic($txt);
print(length($txt), "\n"); Dump($txt);
&lt;/c&gt;

&lt;c&gt;
4
SV = PVMG(0x81af530) at 0x817bca0
  REFCNT = 1
  FLAGS = (PADMY,SMG,POK,pPOK,UTF8)
  IV = 0
  NV = 0
  PV = 0x8177b98 "K\303\244se"\0 [UTF8 "K\x{e4}se"]
  CUR = 5
  LEN = 8
  MAGIC = 0x82f0d98
    MG_VIRTUAL = &amp;PL_vtbl_utf8
    MG_TYPE = PERL_MAGIC_utf8(w)
    MG_LEN = 4
4
SV = PVMG(0x81af530) at 0x817bca0
  REFCNT = 1
  FLAGS = (PADMY,SMG,POK,pPOK,UTF8)
  IV = 0
  NV = 0
  PV = 0x844b718 "\320\234\320\276\321\201\320\272\320\262\320\260"\0 [UTF8 "\x{41c}\x{43e}\x{441}\x{43a}\x{432}\x{430}"]
  CUR = 12
  LEN = 16
  MAGIC = 0x82f0d98
    MG_VIRTUAL = &amp;PL_vtbl_utf8
    MG_TYPE = PERL_MAGIC_utf8(w)
    MG_LEN = 4
6
SV = PVMG(0x81af530) at 0x817bca0
  REFCNT = 1
  FLAGS = (PADMY,SMG,POK,pPOK,UTF8)
  IV = 0
  NV = 0
  PV = 0x844b718 "\320\234\320\276\321\201\320\272\320\262\320\260"\0 [UTF8 "\x{41c}\x{43e}\x{441}\x{43a}\x{432}\x{430}"]
  CUR = 12
  LEN = 16
  MAGIC = 0x82f0d98
    MG_VIRTUAL = &amp;PL_vtbl_utf8
    MG_TYPE = PERL_MAGIC_utf8(w)
    MG_LEN = 6
&lt;/c&gt;

&lt;p&gt;The first one works because length hasn't placed the magic yet.

&lt;c&gt;
SV = PV(0x816a0c0) at 0x817bca0
  REFCNT = 1
  FLAGS = (PADMY,POK,pPOK,UTF8)
  PV = 0x8177b98 "K\303\244se"\0 [UTF8 "K\x{e4}se"]
  CUR = 5
  LEN = 8
&lt;/c&gt;

&lt;p&gt;Assigning to &lt;c&gt;$txt&lt;/c&gt; &amp;mdash; your workaround &amp;mdash; works because assignment properly calls SvSETMAGIC, which clears the precomputed length of the string, thus forcing the next call to &lt;c&gt;length&lt;/c&gt; to recalculate it.

&lt;c&gt;
SV = PVMG(0x81af570) at 0x817bca0
  REFCNT = 1
  FLAGS = (PADMY,SMG,POK,pPOK)
  IV = 0
  NV = 0
  PV = 0x8177b98 ""\0
  CUR = 0
  LEN = 8
  MAGIC = 0x831d6a0
    MG_VIRTUAL = &amp;PL_vtbl_utf8
    MG_TYPE = PERL_MAGIC_utf8(w)
    MG_LEN = -1
&lt;/c&gt;</field>
<field name="root_node">
860211</field>
<field name="parent_node">
860232</field>
</data>
</node>
