#! perl -slw use strict; use Benchmark qw[ cmpthese ]; use Data::Dump qw[ pp ]; $Data::Dump::WIDTH = 500; our $I //= -1; our $N //= 1000; our $W //= 15_000; # OP mentions that 1 bits are sparse (1 :: 500) It doesn't make a difference for straight # bit comparison techniques, but it *does* for mine. our $P //= 0.002; our @xArrays = map[ map { rand()<$P ? 1 : 0 } 1 .. $W ], 1 .. $N; our @yArrays = map[ map { rand()<$P ? 1 : 0 } 1 .. $W ], 1 .. $N; our @xStrings = map{ join '', @$_ } @xArrays; our @yStrings = map{ join '', @$_ } @yArrays; our @xBits = map{ pack 'b*', $_ } @xStrings; our @yBits = map{ pack 'b*', $_ } @yStrings; # Because the bit vectors are sparse, I'm storing them as lists of bit positions, rather # than bit vectors, so I'm converting Y my $start = time; our @yArrays2; for my $y (0 .. $#yArrays) { for my $bitcol (0 .. $W-1) { push @{$yArrays2[$y]}, $bitcol if $yArrays[$y][$bitcol]; } } my $cur = time - $start; print "Converting yArrays format took $cur sec.\n"; # I bin the X arrays after which I never use them again, so I'll bin them directly from the # originally generated vectors. $start = time; our @bitcols; for my $x (0 .. $#xArrays) { for my $bitcol (0 .. 14_999) { push @{$bitcols[$bitcol]}, $x if $xArrays[$x][$bitcol]; } } $cur = time - $start; print "Binning xArrays took $cur sec.\n"; cmpthese $I, { array => q[ my %top10s; for my $x ( 0 .. $#xArrays ) { for my $y ( 0 .. $#yArrays ) { my $count = 0; $xArrays[$x][$_] == 1 && $yArrays[$y][$_] == 1 and ++$count for 0 .. $#{ $xArrays[ 0 ] }; $top10s{"$x:$y"} = $count; my $discard = ( sort{ $top10s{$a} <=> $top10s{$b} || $a cmp $b} keys %top10s )[ 0 ]; keys( %top10s ) > 10 and delete $top10s{$discard}; } } $I == 1 and pp ' arrays: ', %top10s; ], strings => q[ my %top10s; for my $x ( 0 .. $#xStrings ) { for my $y ( 0 .. $#yStrings ) { my $count = ( $xStrings[$x] & $yStrings[$y] ) =~ tr[1][]; $top10s{"$x:$y"} = $count; my $discard = ( sort{ $top10s{$a} <=> $top10s{$b} || $a cmp $b} keys %top10s )[ 0 ]; keys( %top10s ) > 10 and delete $top10s{$discard}; } } $I == 1 and pp 'strings: ', %top10s; ], bits => q[ my %top10s; for my $x ( 0 .. $#xBits ) { for my $y ( 0 .. $#yBits ) { my $count = unpack '%32b*', ( $xBits[$x] & $yBits[$y] ); $top10s{"$x:$y"} = $count; my $discard = ( sort{ $top10s{$a} <=> $top10s{$b} || $a cmp $b} keys %top10s )[ 0 ]; keys( %top10s ) > 10 and delete $top10s{$discard}; } } $I == 1 and pp ' bits: ', %top10s; ], robo => sub { my %top10s; my $cur_min=0; for my $y ( 0 .. $#yArrays2 ) { my %x; for my $bitcol (@{$yArrays2[$y]}) { $x{$_}++ for @{$bitcols[$bitcol]}; } for my $k (keys %x) { next unless $x{$k} >= $cur_min; $top10s{"$k:$y"} = $x{$k}; my $discard = ( sort{ $top10s{$a} <=> $top10s{$b} || $a cmp $b } keys %top10s )[ 0 ]; if (keys %top10s > 10) { $cur_min = $top10s{$discard}; delete $top10s{$discard}; } } } $I == 1 and pp ' bits: ', %top10s; }, };