Replace
SV* my_c_function(SV* sv) {
STRLEN len;
const char* s = SvPVbyte(sv, len);
...
return newSV(...);
}
with
SV* my_c_function(SV* sv) {
STRLEN len;
const char* s = SvPVutf8(sv, len);
...
return newSVpvn_utf8(..., 1);
}
Example:
use strict;
use warnings;
use feature qw( say );
use open ":std", ":encoding(UTF-8)";
use Inline C => <<'__EOS__';
static const char hex_syms[16] = { '0', '1', '2', '3', '4', '5', '6',
+'7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
static void my_c_function(char* dst, const char* src, STRLEN n) {
while (n--) {
*(dst++) = hex_syms[ ((unsigned char)*src) >> 4 ];
*(dst++) = hex_syms[ *src & 0xF ];
*(dst++) = '.';
++src;
}
dst[-1] = 0;
}
SV* my_xs_function(SV* buf_sv) {
STRLEN buf_len;
const char* buf = SvPVutf8(buf_sv, buf_len);
if (buf_len == 0)
return newSVpvs("");
{
STRLEN hex_len = buf_len * 3 - 1;
char* hex;
SV* hex_sv;
Newx(hex, hex_len, char);
my_c_function(hex, buf, buf_len);
hex_sv = newSVpvn_utf8(hex, hex_len, 1);
Safefree(hex);
return hex_sv;
}
}
__EOS__
my $s = "\x{C9}ric";
utf8::downgrade( my $d = $s ); # Let's test with both
utf8::upgrade( my $u = $s ); # string storage formats.
say $u eq $d ? "Same" : "Different";
for my $s ($d, $u) {
say "UCP: ", sprintf("%vX", $s); # C9.72.69.63
say "UTF-8: ", my_xs_function($s); # C3.89.72.69.63
}
Optimized: (Avoids creating two buffers and copying one into the other. Also protects against memory leaks from long jumps in the C code by mortalizing the allocated memory sooner.)
SV* my_xs_function(SV* buf_sv) {
STRLEN buf_len;
const char* buf = SvPVutf8(buf_sv, buf_len);
if (buf_len == 0)
return newSVpvs("");
{
STRLEN hex_len = buf_len * 3 - 1;
SV* hex_sv = sv_2mortal(newSV(hex_len));
SvPOK_on(hex_sv);
SvCUR_set(hex_sv, hex_len);
SvUTF8_on(hex_sv);
my_c_function(SvPVX(hex_sv), buf, buf_len);
return hex_sv;
}
}