Beefy Boxes and Bandwidth Generously Provided by pair Networks
Don't ask to ask, just ask
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??
I'm a really sore^Hthirsty loser. So I wrote an XS function that did the replacement in-place by operating directly on the SV'S char vector. It's faster than your swap function but still slower than my s///-pos stuff, I don't really know why.
use Inline C => q[ void avar_cee(SV *sv1, SV *sv2) { char *sv1p, *sv2p; STRLEN sv1len, sv2len; SV *sv1_sv; STRLEN i; if (!SvROK(sv1) || !SvPOK((SV*)SvRV(sv1)) || !SvPOK(sv2)) { croak("Usage: avar_cee(\$s1, $s2)"); } sv1_sv = (SV*)SvRV(sv1); sv1p = SvPV(sv1_sv, sv1len); sv2p = SvPV(sv2, sv2len); if (sv1len != sv2len) { croak("The given strings must be of the same length"); } for (i = 0; i < sv2len; i++) { if (sv1p[i] == '\0') { sv1p[i] = sv2p[i]; } } }
It could be sped up with something like OpenMP by putting #pragma omp paralell for before the for loop but I didn't have gcc 4.2 to test it. I changed the test script so that it wouldn't be unfair to the in-place functions. Previously I was copying the data anyway so that testing would work which defeated the purpose of doing it in-place. Results:
split1 5.07/s -- -82% -87% -99% -99% -99% + -99% -100% -100% -100% -100% -100% substr1 27.8/s 449% -- -31% -96% -96% -97% + -97% -98% -98% -99% -99% -99% ikegami_s 40.3/s 695% 45% -- -94% -95% -95% + -96% -97% -98% -98% -99% -99% avar 700/s 13700% 2414% 1636% -- -10% -18% + -30% -42% -62% -70% -80% -86% avar2 778/s 15243% 2695% 1830% 11% -- -8% + -22% -35% -58% -67% -78% -85% corion 848/s 16635% 2948% 2005% 21% 9% -- + -15% -29% -54% -63% -76% -83% ikegami_tr 999/s 19609% 3490% 2379% 43% 28% 18% + -- -17% -45% -57% -72% -80% avar2_pos 1201/s 23591% 4215% 2880% 72% 54% 42% + 20% -- -34% -48% -66% -76% moritz 1832/s 36032% 6481% 4445% 162% 135% 116% + 83% 53% -- -21% -49% -64% swap 2324/s 45742% 8250% 5666% 232% 199% 174% + 133% 93% 27% -- -35% -54% avar_c_inplace 3567/s 70267% 12717% 8751% 410% 359% 320% + 257% 197% 95% 53% -- -30% avar2_pos_inplace 5063/s 99781% 18093% 12464% 624% 551% 497% + 407% 322% 176% 118% 42% --
benchmark.pl:
#!/usr/bin/perl use 5.6.0; use strict; use warnings FATAL => 'all'; use Benchmark qw( cmpthese ); use Inline C => q[ void avar_c_inplace(SV *sv1, SV *sv2) { char *sv1p, *sv2p; STRLEN sv1len, sv2len; SV *sv1_sv; STRLEN i; if (!SvROK(sv1) || !SvPOK((SV*)SvRV(sv1)) || !SvPOK(sv2)) { croak("Usage: avar_cee(\$s1, $s2)"); } sv1_sv = (SV*)SvRV(sv1); sv1p = SvPV(sv1_sv, sv1len); sv2p = SvPV(sv2, sv2len); if (sv1len != sv2len) { croak("The given strings must be of the same length"); } for (i = 0; i < sv2len; i++) { if (sv1p[i] == '\0') { sv1p[i] = sv2p[i]; } } } SV* swap(SV* one, SV* two) { char *buf1, *buf2, *buf3; STRLEN idx, len; SV* ret; if (!SvPOK(one) || !SvPOK(two) || sv_len(one) > sv_len(two)) return newSVsv(&PL_sv_undef); len = sv_len(one); buf1 = SvPVX(one); buf2 = SvPVX(two); buf3 = malloc(len); for (idx=0; idx < len; idx++) { buf3[idx] = buf1[idx] ? buf1[idx] : buf2[idx]; } ret = newSVpv(buf3, len); free(buf3); return ret; } ]; my $s1 = do_rand(0, 100_000); my $s2 = do_rand(1, 100_000); #my $s1 = "a\0b\0c\0d\0"; #my $s2 = "aXbYcZdA"; my @z = $s1 =~ m/(\0)/g; my $num = @z; warn "\$s1 has $num zeroes"; my $cp0 = $s1; my $cp1 = $s1; my %subs = ( 'split1' => sub { my $s3 = split1( $s1, $s2 ) }, 'substr1' => sub { my $s3 = substr1( $s1, $s2 ) }, # 'kyle' => sub { my $s3 = kyle( $s1, $s2 ) }, 'moritz' => sub { my $s3 = moritz( $s1, $s2 ) }, 'corion' => sub { my $s3 = corion( $s1, $s2 ) }, 'ikegami_s' => sub { my $s3 = ikegami_s( $s1, $s2 ) }, 'ikegami_tr' => sub { my $s3 = ikegami_tr( $s1, $s2 ) }, 'avar' => sub { my $s3 = avar( $s1, $s2 ) }, 'avar2' => sub { my $s3 = avar2( $s1, $s2 ) }, 'avar2_pos' => sub { my $s3 = avar2_pos( $s1, $s2 ) }, 'avar2_pos_inplace' => sub { avar2_pos_inplace( \$cp0, $s2 ); $cp +0 }, 'avar_c_inplace' => sub { avar_c_inplace( \$cp1, $s2 ); $cp1 }, 'swap' => sub { swap( $s1, $s2 ) }, # 'bogus' => sub { "oh noes" }, ); cmpthese( -2, \%subs ); use Test::More; plan 'tests' => scalar keys %subs; my $correct; { my $tmp = $s1; avar2_pos_inplace(\$tmp, $s2); $correct = $tmp; } foreach my $subname ( keys %subs ) { my $sub = $subs{$subname}; ok($sub->() eq $correct, "$subname returned the correct value"); } sub split1 { my ($s1, $s2) = @_; my @s1 = split //, $s1; my @s2 = split //, $s2; foreach my $idx ( 0 .. $#s1 ) { if ( $s1[$idx] eq chr(0) ) { $s1[$idx] = $s2[$idx]; } } return join '', @s1; } sub substr1 { my ($s1, $s2) = @_; for my $idx ( 0 .. length($s1) ) { if ( substr($s1,$idx,1) eq chr(0) ) { substr($s1, $idx, 1) = substr($s2, $idx, 1); } } return $s1; } sub kyle { my ($s1, $s2) = @_; my $out = $s1; while ( $s1 =~ m/\000/g ) { my $pos = pos; substr( $out, $pos, 1 ) = substr( $s2, $pos, 1 ); } return $out; } sub moritz { my ($s1, $s2) = @_; my $pos = 0; while ( 0 < ( $pos = index $s1, "\000", $pos ) ) { substr( $s1, $pos, 1 ) = substr( $s2, $pos, 1 ); } return $s1; } sub ikegami_s { my ($s1, $s2) = @_; (my $mask = $s1) =~ s/[^\x00]/\xFF/g; return ($s1 & $mask) | ($s2 & ~$mask); } sub ikegami_tr { my ($s1, $s2) = @_; (my $mask = $s1) =~ tr/\x00/\xFF/c; return ($s1 & $mask) | ($s2 & ~$mask); } sub corion { my ($s1, $s2) = @_; my $ofs = 0; return join "", map { $ofs += length; $_ => substr $s2, $ofs++, 1 } + split /\0/, $s1, -1; } sub avar { my ($s1, $s2) = @_; my $s3 = $s1; { use bytes; $s3 =~ s/(\0)/substr $s2, $+[0]-1, 1/eg; } $s3; } sub avar2 { my ($s1, $s2) = @_; use bytes; $s1 =~ s/(\0)/substr $s2, $+[0]-1, 1/eg; return $s1; } sub avar2_pos { my ($s1, $s2) = @_; use bytes; $s1 =~ s/\0/substr $s2, pos($s1), 1/eg; return $s1; } sub avar2_pos_inplace { my ($s1, $s2) = @_; use bytes; $$s1 =~ s/\0/substr $s2, pos($$s1), 1/eg; } sub avar2_pos_inplace2 { my ($s1, $s2) = @_; use bytes; $$s1 =~ s/\0/substr $$s2, pos($$s1), 1/eg; } # This makes sure that $s1 has chr(0)'s in it and $s2 does not. sub do_rand { my $min = shift; my $len = shift; my $n = ""; for (1 .. $len) { $n .= chr( rand(255-$min)+$min ); } return $n; } #sub do_rand { # my $n = (shift) ? int(rand(255)) : int(rand(254)) + 1; # return chr( $n ); #} __END__

In reply to Re^2: Challenge: CPU-optimized byte-wise or-equals (for a meter of beer) by avar
in thread Challenge: CPU-optimized byte-wise or-equals (for a meter of beer) by dragonchild

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others browsing the Monastery: (6)
As of 2024-04-23 10:25 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found