My end result is not to know, for each (x,y) array how many 1's they share just to know what are the top 10 y arrays that share the most 1' with each x array.
Convert your arrays of 0s 1s to bit-strings, then use bitwise-& and unpack '%32b*' to count the equivalences and you can do this 300+ times faster than comparing the arrays:
#! perl -slw
use strict;
use Benchmark qw[ cmpthese ];
use Data::Dump qw[ pp ]; $Data::Dump::WIDTH = 500;
our $I //= -1;
our $N //= 1000;
our @xArrays = map[ map int( rand 2 ), 1 .. 15_000 ], 1 .. $N;
our @yArrays = map[ map int( rand 2 ), 1 .. 15_000 ], 1 .. $N;
our @xStrings = map{ join '', @$_ } @xArrays;
our @yStrings = map{ join '', @$_ } @yArrays;
our @xBits = map{ pack 'b*', $_ } @xStrings;
our @yBits = map{ pack 'b*', $_ } @yStrings;
cmpthese $I, {
array => q[
my %top10s;
for my $x ( 0 .. $#xArrays ) {
for my $y ( 0 .. $#yArrays ) {
my $count = 0;
$xArrays[$x][$_] == 1 && $yArrays[$y][$_] == 1 and ++$
+count for 0 .. $#{ $xArrays[ 0 ] };
$top10s{"$x:$y"} = $count;
my $discard = ( sort{ $top10s{$a} <=> $top10s{$b} } ke
+ys %top10s )[ 0 ];
keys( %top10s ) > 10 and delete $top10s{$discard};
}
}
$I == 1 and pp ' arrays: ', %top10s;
],
strings => q[
my %top10s;
for my $x ( 0 .. $#xStrings ) {
for my $y ( 0 .. $#yStrings ) {
my $count = ( $xStrings[$x] & $yStrings[$y] ) =~ tr[1]
+[];
$top10s{"$x:$y"} = $count;
my $discard = ( sort{ $top10s{$a} <=> $top10s{$b} } ke
+ys %top10s )[ 0 ];
keys( %top10s ) > 10 and delete $top10s{$discard};
}
}
$I == 1 and pp 'strings: ', %top10s;
],
bits => q[
my %top10s;
for my $x ( 0 .. $#xBits ) {
for my $y ( 0 .. $#yBits ) {
my $count = unpack '%32b*', ( $xBits[$x] & $yBits[$y]
+);
$top10s{"$x:$y"} = $count;
my $discard = ( sort{ $top10s{$a} <=> $top10s{$b} } ke
+ys %top10s )[ 0 ];
keys( %top10s ) > 10 and delete $top10s{$discard};
}
}
$I == 1 and pp ' bits: ', %top10s;
],
};
__END__
C:\test>1067218 -N=100
Rate array strings bits
array 1.95e-002/s -- -98% -100%
strings 1.08/s 5417% -- -82%
bits 5.97/s 30510% 455% --
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.