I've benchmarked all of the promising-looking solutions here. If you don't see yours and you think it's a contender, let me know, and ideally post your benchmark code and results as a followup to this (or stick it in a scratchpad and /msg me and I'll add it).
#!/usr/bin/perl
use Benchmark;
use Inline C => 'DATA',
VERSION => 0.0,
NAME => 'SimpleTest',
OPTIMIZE => '-O3';
Inline->init;
sub simple3
{
my($a,$b)=@_;
my(@seen);
return undef if (length($a) != length($b));
foreach my $i (0..length($a))
{
my($ac,$bc)=(substr($a,$i,1),substr($b,$i,1));
if ($ac eq $bc)
{
; # Do nothing
}
elsif ($ac eq '_')
{
return undef if (++$seen[$bc] > 1);
}
elsif ($bc eq '_')
{
return undef if (++$seen[$ac] > 1);
}
else { return undef }
}
1;
}
# Represent each string as two strings and two masks.
sub sgifford_clever2
{
(my $a = $_[0]) =~ tr/_/\0/;
(my $b = $_[1]) =~ tr/_/\0/;
# Data transformations; could be done beforehand in linear time.
my($a3,$b3)=("\0"x10,"\0"x10);
foreach my $i (0..(length($a)-1))
{
my $c = substr($a,$i,1);
next if $c eq "\0";
substr($a3,$c,1)=$i;
}
foreach my $i (0..(length($b)-1))
{
my $c = substr($b,$i,1);
next if $c eq "\0";
substr($b3,$c,1)=$i;
}
my $a_new = $a . $a3;
my $b_new = $b . $b3;
(my $a_mask = $a_new) =~ tr/\0/\xff/c;
(my $b_mask = $b_new) =~ tr/\0/\xff/c;
# (my $print = "\t$a_new\n\t$b_mask\nvs.\t$b_new\n\t$a_mask\n\n") =~
+tr/\0\xff/_!/;
# print $print;
# Comparisons; must be done for each comparison.
return (($a_new & $b_mask) eq ($b_new & $a_mask));
}
sub sgifford_clever3
{
# a and mask[b] eq b and mask[a]
($_[0][1] & $_[1][2]) eq ($_[1][1] & $_[0][2]);
}
sub sgifford_clever3_xform
{
(my $a = $_[0]) =~ tr/_/\0/;
# Data transformations; could be done beforehand in linear time.
my($a3)="\0"x10;
foreach my $i (0..(length($a)-1))
{
my $c = substr($a,$i,1);
next if $c eq "\0";
substr($a3,$c,1)=$i;
}
my $a_new = $a . $a3;
(my $a_mask = $a_new) =~ tr/\0/\xff/c;
return [$_[0],$a_new,$a_mask];
}
# From [ccn]
sub ccn
{
local $_ = $_[0] ^ $_[1];
return not (/[\001-\017]/ or /([\020-\031]).*?\1/s);
}
# From [ambrus]
sub ambrus
{
my ($s1, $s2) = @_;
my $m = length($s1) - 1;
my($n, $p) = ($m - 1, $m + 1);
($s1 . $s2) !~ /^.{0,$m}?([^_]).{$m}(?!\1)[^_]/ and
($s1 . $s2) !~ /^.{0,$m}?([^_])(?:.{0,$n}|.{$p,})\1/;
}
# From [aristotle]
sub aristotle
{
my( $l, $r ) = @_;
# underscores are insignificant
tr/_/\0/ for $l, $r;
# cancel out identical values
my $xor = $l ^ $r;
# convert to bitmasks
tr/\0/\377/c for $l, $r;
my $mask = $l & $r;
# masked chars must be identical
return !1 if ( $xor & $mask ) =~ tr/\0//c;
# and there may not be dupes of non-identical characters
return 0 == grep {
my $char = substr( $xor, $_, 1 );
$char ne "\0" and index( $xor, $char, $_ + 1 ) !
+= -1
} 0 .. length( $xor ) - 1;
}
sub aristotle2_xform
{
my($a)=@_;
# underscores are insignificant
$a =~ tr/_/\0/;
(my $mask = $a) =~ tr/\0/\377/c;
return [$_[0],$a,$mask];
}
sub aristotle2
{
my( $l, $r ) = @_;
# cancel out identical values
my $xor = $l->[1] ^ $r->[1];
my $mask = $l->[2] & $r->[1];
# masked chars must be identical
return !1 if ( $xor & $mask ) =~ tr/\0//c;
# and there may not be dupes of non-identical characters
return 0 == grep {
my $char = substr( $xor, $_, 1 );
$char ne "\0" and index( $xor, $char, $_ + 1 ) !
+= -1
} 0 .. length( $xor ) - 1;
}
my @tests =
(qw/
_8__3__19
48____7__
_8__3__19
4_2___7__
_8__3__19
4_8___7__
__8_3__19
48____7__
__8_3__19
84____7__
_8__3__19
48_____7_
/
);
sub run_tests
{
my($func,$verbose,@tests)=@_;
my ($s1, $s2);
while (defined($s1 = shift(@tests)))
{
$s2 = shift(@tests);
my $result = $func->($s1, $s2);
if ($verbose)
{
if (ref($s1) && ref($s2))
{
$s1 = $s1->[0];
$s2 = $s2->[0];
}
print "$s1\n$s2: ",$result?"compatible":"not compatible","\n";
}
}
}
my @tests_clever3 = map { sgifford_clever3_xform($_) } @tests;
my @tests_ccn = map { my $tmp = $_; $tmp =~ tr/_/ /; $tmp } @tests;
my @tests_aristotle2 = map { aristotle2_xform($_) } @tests;
#run_tests(\&simple3, 1, @tests);
#run_tests(\&ambrus, 1, @tests);
#run_tests(\&aristotle,1,@tests);
#run_tests(\&aristotle2,1,@tests_aristotle2);
#run_tests(\&clever3, 1, @tests_clever3);
#run_tests(\&cclever3, 1, @tests_clever3);
timethese(25_000, {
simple3 => sub { run_tests(\&simple3, 0, @tests) },
sgifford_csimple3 => sub { run_tests(\&sgifford_csimp
+le3, 0, @tests) },
sgifford_clever2 => sub { run_tests(\&sgifford_clever
+2, 0, @tests) },
sgifford_clever3 => sub { run_tests(\&sgifford_clever
+3, 0, @tests_clever3) },
sgifford_cclever3 => sub { run_tests(\&sgifford_cclev
+er3, 0, @tests_clever3) },
ccn => sub { run_tests(\&ccn, 0, @tests_ccn) },
ambrus => sub { run_tests(\&ambrus, 0, @tests) },
aristotle => sub { run_tests(\&aristotle, 0, @tests)
+},
aristotle2 => sub { run_tests(\&aristotle2, 0, @tests
+_aristotle2) },
});
__DATA__
__C__
int sgifford_csimple3(const char *a, const char *b)
{
int i;
int l;
unsigned char seen[256];
memset(seen,0,256);
if ((l=strlen(a)) != strlen(b))
return 0;
for(i=0;i<l;i++)
{
if (a[i] == b[i])
{
; /* Do nothing */
}
else if (a[i] == '_')
{
if (++seen[b[i]] > 1)
return 0;
}
else if (b[i] == '_')
{
if (++seen[a[i]] > 1)
return 0;
}
else
return 0;
}
return 1;
}
int sgifford_cclever3(SV *a, SV *b)
{
AV *a_arr, *b_arr;
SV **tmp;
char *a_val, *a_mask, *b_val, *b_mask;
int i;
/* First get the arrays from the references */
if (!SvROK(a) || !SvROK(b))
croak("a or b not arrayrefs!");
a_arr = (AV*)SvRV(a);
b_arr = (AV*)SvRV(b);
/* Now pull out the data */
if ( (tmp = av_fetch(a_arr, 1, 0)) == NULL)
croak("a[1] is undef");
if ((a_val = SvPV(*tmp, PL_na)) == NULL)
croak("a[1] contains NULL pointer?");
if ( (tmp = av_fetch(a_arr, 2, 0)) == NULL)
croak("a[2] is undef");
if ((a_mask = SvPV(*tmp, PL_na)) == NULL)
croak("a[2] contains NULL pointer?");
if ( (tmp = av_fetch(b_arr, 1, 0)) == NULL)
croak("b[1] is undef");
if ((b_val = SvPV(*tmp, PL_na)) == NULL)
croak("b[1] contains NULL pointer?");
if ( (tmp = av_fetch(b_arr, 2, 0)) == NULL)
croak("b[2] is undef");
if ((b_mask = SvPV(*tmp, PL_na)) == NULL)
croak("b[2] contains NULL pointer?");
/* OK, finally we have all of the data! */
for(i=0;i<20;i++)
{
if ((a_val[i] & b_mask[i]) != (b_val[i] & a_mask[i]))
return 0;
}
return 1;
}