I am dumping my own code here which is for finding the union/intersection of arrays
in-place. It uses
mergesort to order the lists(The
Heap module from CPAN is of low-quality so I had to use the
built-in mergesort for sorting..) and then a
divide-and-conquer to find the intersection and union. It is about 2 times slower than
uni_int_2 but it is as much efficient in
space requirements because it does
not allocate any more memory than the input arrays (of course I can't compare it with the
buk(). For arrays of
500,000 elements it takes
4-5 seconds in my machine, and
RAM consumption is
50MB which is what Perl allocates for the initial arrays (and of course the
runtime).
I didn't try to optimize it much, perhaps someone could throw an idea here. Sorry for the big file, but I have some comments inline that might help anyone interested.
use warnings;
use strict;
use List::Util 'shuffle';
use sort '_mergesort';
#create the
!$ARGV[0] and print "No input for array size" and exit;
my @array1 = shuffle 1..$ARGV[0];
my @array2 = shuffle 1..$ARGV[0];
my @sortedA = sort {$a<=>$b} (@array1);
my @sortedB = sort {$a<=> $b } (@array2);
our $pivot; #used in binary search..
my ($uni, $insc)= p_uni(0,$#sortedA,0,$#sortedB);
print "\n|U| = ",$uni," |I|=",$insc;
#>>>>> UNION and INTERSECTION <<<<<<<<
#The function p_uni() will return an ARRAY for which
#ARRAY[0] = cardinality of union
#ARRAY[1]=cardinality of intersection
sub p_uni {
#The function process 2 ordered lists, say A and B as follows
#We split A = A1+A2 and B=B1+B2 around the middle
#Since they are ordered we know that INTERSECTION(A1,B2)=0 and INTERSE
+CTION (A2,B1)=0, so we process them independently...
#The split array Ai is A[aLeft,aRight] - a subset of A with those indi
+ces. The same holds for bLeft and bRight.
my ($aLeft,$aRight,$bLeft, $bRight) = @_;
my ($tmp_uni,$tmp_insc)=(0)x 2;
#If A array is too small then run a linear search and match
if($aRight-$aLeft<100)
{
my $search_result;
for (my $i=$aLeft, my $bLeft_h = $bLeft;$i<=$aRight;$i++)
{
$search_result = bin_search(\@sortedB, $sortedA[$i],$bL
+eft_h,$bRight);
$tmp_uni++;
$tmp_insc++ if($search_result->[0]);
$bLeft_h = $search_result->[1];
}
return ($aRight-$aLeft+1-$tmp_insc+$bRight-$bLeft+1, $tmp_insc)
+;
}
#If B array is too small run a linear search and match
elsif($bRight-$bLeft<100)
{
my ($tmp_uni,$tmp_insc)=(0)x 2;
my $search_result;
for my $i($bLeft..$bRight)
{
if(defined($sortedB[$i-1]) && $sortedB[$i-1] != $so
+rtedB[$i]) {
my $search_result = bin_search(\@sortedA, $sortedB[$i]
+,$aLeft,$aRight);
if($search_result->[0]) {$tmp_uni++; $tmp_insc++;}
else { $tmp_uni+=1; }
$aLeft = $search_result->[1];
}
}
return ($aRight-$aLeft+1-$tmp_insc+$bRight-$bLeft+1, $tmp_insc
+);
}
my $bPos = int(($bLeft+$bRight)/2);
my $aPos = bin_search(\@sortedA, $sortedB[$bPos],$aLeft,$aRight);
my ($a_insc,$a_uni) = p_uni($aLeft, $aPos->[1], $bLeft,$bPos);
my ($b_insc,$b_uni) = p_uni($aPos->[1]+1,$aRight,$bPos+1,$bRight);
return($a_insc+$b_insc,$a_uni+$b_uni);
}
#This is a binary search- Searches in the ordered list reference AR_RE
+F for WHAT and between and including indices cLEFT cRIGHT..
sub bin_search {
my ($ar_ref, $what,$cLeft,$cRight) =@_;
my ($left,$right,$value)=($cLeft,$cRight,0);
while($left<=$right )
{
$pivot = int (($left+$right)/2);
$value = $ar_ref->[$pivot];
if($value>$what)
{
$right=$pivot-1;
}
elsif($value<$what)
{
$left=$pivot+1;
}
else
{
return [1,$pivot];
}
}
return [0,$pivot];
}