comment on

I'm a really sore^Hthirsty loser. So I wrote an XS function that did the replacement in-place by operating directly on the SV'S char vector. It's faster than your swap function but still slower than my s///-pos stuff, I don't really know why.

use Inline C => q[
void avar_cee(SV *sv1, SV *sv2)
{
    char *sv1p, *sv2p;
    STRLEN sv1len, sv2len;
    SV *sv1_sv;
    STRLEN i;

    if (!SvROK(sv1) || !SvPOK((SV*)SvRV(sv1)) || !SvPOK(sv2)) {
        croak("Usage: avar_cee(\$s1, $s2)");
    }

    sv1_sv = (SV*)SvRV(sv1);
    sv1p = SvPV(sv1_sv, sv1len);
    sv2p = SvPV(sv2,    sv2len);

    if (sv1len != sv2len) {
       croak("The given strings must be of the same length");
    }

    for (i = 0; i < sv2len; i++) {
        if (sv1p[i] == '\0') {
            sv1p[i] = sv2p[i];
        }
    }
}
[download]

It could be sped up with something like OpenMP by putting #pragma omp paralell for before the for loop but I didn't have gcc 4.2 to test it. I changed the test script so that it wouldn't be unfair to the in-place functions. Previously I was copying the data anyway so that testing would work which defeated the purpose of doing it in-place. Results:

split1            5.07/s     --    -82%      -87% -99%  -99%   -99%   
+    -99%     -100%  -100% -100%          -100%             -100%
substr1           27.8/s   449%      --      -31% -96%  -96%   -97%   
+    -97%      -98%   -98%  -99%           -99%              -99%
ikegami_s         40.3/s   695%     45%        -- -94%  -95%   -95%   
+    -96%      -97%   -98%  -98%           -99%              -99%
avar               700/s 13700%   2414%     1636%   --  -10%   -18%   
+    -30%      -42%   -62%  -70%           -80%              -86%
avar2              778/s 15243%   2695%     1830%  11%    --    -8%   
+    -22%      -35%   -58%  -67%           -78%              -85%
corion             848/s 16635%   2948%     2005%  21%    9%     --   
+    -15%      -29%   -54%  -63%           -76%              -83%
ikegami_tr         999/s 19609%   3490%     2379%  43%   28%    18%   
+      --      -17%   -45%  -57%           -72%              -80%
avar2_pos         1201/s 23591%   4215%     2880%  72%   54%    42%   
+     20%        --   -34%  -48%           -66%              -76%
moritz            1832/s 36032%   6481%     4445% 162%  135%   116%   
+     83%       53%     --  -21%           -49%              -64%
swap              2324/s 45742%   8250%     5666% 232%  199%   174%   
+    133%       93%    27%    --           -35%              -54%
avar_c_inplace    3567/s 70267%  12717%     8751% 410%  359%   320%   
+    257%      197%    95%   53%             --              -30%
avar2_pos_inplace 5063/s 99781%  18093%    12464% 624%  551%   497%   
+    407%      322%   176%  118%            42%                --
[download]

benchmark.pl:

#!/usr/bin/perl

use 5.6.0;

use strict;
use warnings FATAL => 'all';

use Benchmark qw( cmpthese );

use Inline C => q[
void avar_c_inplace(SV *sv1, SV *sv2)
{
    char *sv1p, *sv2p;
    STRLEN sv1len, sv2len;
    SV *sv1_sv;
    STRLEN i;

    if (!SvROK(sv1) || !SvPOK((SV*)SvRV(sv1)) || !SvPOK(sv2)) {
        croak("Usage: avar_cee(\$s1, $s2)");
    }

    sv1_sv = (SV*)SvRV(sv1);
    sv1p = SvPV(sv1_sv, sv1len);
    sv2p = SvPV(sv2,    sv2len);

    if (sv1len != sv2len) {
       croak("The given strings must be of the same length");
    }

    for (i = 0; i < sv2len; i++) {
        if (sv1p[i] == '\0') {
            sv1p[i] = sv2p[i];
        }
    }
}

SV* swap(SV* one, SV* two) {
    char *buf1, *buf2, *buf3;
    STRLEN idx, len;
    SV* ret;

    if (!SvPOK(one) || !SvPOK(two) || sv_len(one) > sv_len(two))
        return newSVsv(&PL_sv_undef);

    len = sv_len(one);
    buf1 = SvPVX(one);
    buf2 = SvPVX(two);
    buf3 = malloc(len);

    for (idx=0; idx < len; idx++) {
        buf3[idx] = buf1[idx] ? buf1[idx] : buf2[idx];
    }

    ret = newSVpv(buf3, len);
    free(buf3);
    return ret;
}
];

my $s1 = do_rand(0, 100_000);
my $s2 = do_rand(1, 100_000);
#my $s1 = "a\0b\0c\0d\0";
#my $s2 = "aXbYcZdA";

my @z = $s1 =~ m/(\0)/g;
my $num = @z;
warn "\$s1 has $num zeroes";

my $cp0 = $s1;
my $cp1 = $s1;

my %subs = (
    'split1'     => sub { my $s3 = split1( $s1, $s2 ) },
    'substr1'    => sub { my $s3 = substr1( $s1, $s2 ) },
#    'kyle'       => sub { my $s3 = kyle( $s1, $s2 ) },
    'moritz'     => sub { my $s3 = moritz( $s1, $s2 ) },
    'corion'     => sub { my $s3 = corion( $s1, $s2 ) },
    'ikegami_s'  => sub { my $s3 = ikegami_s( $s1, $s2 ) },
    'ikegami_tr' => sub { my $s3 = ikegami_tr( $s1, $s2 ) },
    'avar'       => sub { my $s3 = avar( $s1, $s2 ) },
    'avar2'      => sub { my $s3 = avar2( $s1, $s2 ) },
    'avar2_pos'  => sub { my $s3 = avar2_pos( $s1, $s2 ) },
    'avar2_pos_inplace'  => sub { avar2_pos_inplace( \$cp0, $s2 ); $cp
+0 },
    'avar_c_inplace'  => sub { avar_c_inplace( \$cp1, $s2 ); $cp1 },
    'swap'  => sub { swap( $s1, $s2 ) },
#    'bogus'     => sub { "oh noes" },
);

cmpthese( -2, \%subs );

use Test::More;

plan 'tests' => scalar keys %subs;

my $correct;
{
    my $tmp = $s1;
    avar2_pos_inplace(\$tmp, $s2);
    $correct = $tmp;
}

foreach my $subname ( keys %subs ) {
    my $sub = $subs{$subname};
    ok($sub->() eq $correct, "$subname returned the correct value");
}

sub split1 {
    my ($s1, $s2) = @_;

    my @s1 = split //, $s1;
    my @s2 = split //, $s2;

    foreach my $idx ( 0 .. $#s1 ) { 
    if ( $s1[$idx] eq chr(0) ) { 
        $s1[$idx] = $s2[$idx];
    } 
    } 

    return join '', @s1;
}

sub substr1 {
    my ($s1, $s2) = @_;

    for my $idx ( 0 .. length($s1) ) {
    if ( substr($s1,$idx,1) eq chr(0) ) {
        substr($s1, $idx, 1) = substr($s2, $idx, 1);
    }
    } 

    return $s1;
} 

sub kyle {
    my ($s1, $s2) = @_;

    my $out = $s1;
    while ( $s1 =~ m/\000/g ) {
    my $pos = pos;
    substr( $out, $pos, 1 ) = substr( $s2, $pos, 1 );
    }

    return $out;
}

sub moritz {
    my ($s1, $s2) = @_;

    my $pos = 0;
    while ( 0 < ( $pos = index $s1, "\000", $pos ) ) {
    substr( $s1, $pos, 1 ) = substr( $s2, $pos, 1 );
    }

    return $s1;
}

sub ikegami_s {
    my ($s1, $s2) = @_;
    (my $mask = $s1) =~ s/[^\x00]/\xFF/g;
    return ($s1 & $mask) | ($s2 & ~$mask);
}

sub ikegami_tr {
   my ($s1, $s2) = @_;
   (my $mask = $s1) =~ tr/\x00/\xFF/c;
   return ($s1 & $mask) | ($s2 & ~$mask);
}

sub corion {
   my ($s1, $s2) = @_;

   my $ofs = 0;
   return join "", map { $ofs += length; $_ => substr $s2, $ofs++, 1 }
+ split /\0/, $s1, -1;
}

sub avar {
    my ($s1, $s2) = @_;

    my $s3 = $s1;

    {
        use bytes;
        $s3 =~ s/(\0)/substr $s2, $+[0]-1, 1/eg;
    }

    $s3;
}


sub avar2 {
    my ($s1, $s2) = @_;

    use bytes;
    $s1 =~ s/(\0)/substr $s2, $+[0]-1, 1/eg;

    return $s1;
}

sub avar2_pos {
    my ($s1, $s2) = @_;

    use bytes;
    $s1 =~ s/\0/substr $s2, pos($s1), 1/eg;

    return $s1;
}

sub avar2_pos_inplace {
    my ($s1, $s2) = @_;

    use bytes;
    $$s1 =~ s/\0/substr $s2, pos($$s1), 1/eg;
}

sub avar2_pos_inplace2 {
    my ($s1, $s2) = @_;

    use bytes;
    $$s1 =~ s/\0/substr $$s2, pos($$s1), 1/eg;
}

# This makes sure that $s1 has chr(0)'s in it and $s2 does not.
sub do_rand {
    my $min = shift;
    my $len = shift;
    my $n = "";
    for (1 .. $len)
    {
        $n .= chr( rand(255-$min)+$min );
    }
    return $n;
}

#sub do_rand {
#    my $n = (shift) ? int(rand(255)) : int(rand(254)) + 1;
#    return chr( $n );
#}

__END__
[download]

In reply to Re^2: Challenge: CPU-optimized byte-wise or-equals (for a meter of beer) by avar
in thread Challenge: CPU-optimized byte-wise or-equals (for a meter of beer) by dragonchild

Are you posting in the right place? Check out Where do I post X? to know for sure.
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
Want more info? How to link or How to display code and escape characters are good places to start.


Don't ask to ask, just ask
	PerlMonks