# From Knuth Art of Programming # Algortihm S(3.4.2) # Select n records at random from a set of N records where # 0($num-@result)); push @result,$array->[$pos++]; } return \@result } # From Knuth Art of Programming # Algortihm R(3.4.2) # first argument is a filehandle. Second argument is the desired # number of records in the sample # Will die if there are insufficeient records in the file. # Returns a reference to an array of the selected records. sub reservoir_sample { my ($file,$num)=@_; my @buffer; while ( <$file> ) { chomp; push @buffer,$_; last if @buffer==$num; } die "Insufficient records\n" if @buffer<$num; my $pos=@buffer; while ( <$file> ) { $pos++; my $rand=rand($pos); if ($rand<@buffer) { chomp; $buffer[$rand]=$_; } } return \@buffer; }