Hi Monks,
I am posting an updated version of the code which significantly reduced the time for assigning target status (less than 1/2 the time compared to the original version).
use strict;
use Data::Dumper;
use List::Util qw/max/;
# target region
my @a = ("100_200","210_310","400_450","475_600", "680_900");
my $query_snp = 205;
my $line = quicksort(\@a, \$query_snp);
print "$$line";
# loosely based on the quicksort algorithm (Mastering Alorithms with P
+erl)
sub quicksort
{
my $array = shift;
my $snp = shift;
my $start_idx = 0;
my $end_idx = scalar @$array - 1;
my $mid_point = int( ( $end_idx - $start_idx)/2 );
my $pivot = $start_idx + $mid_point;
my ($current_start , $current_end) = split /\_/, $array->[$pivot];
my $current_start_idx = $$snp < $current_start ? $start_idx: $pivo
+t;
my $current_end_idx = $current_start_idx ==$start_idx ? $pivot:
+$end_idx;
my $new_array = [ @$array[$current_start_idx..$current_end_idx] ];
my $out_line;
# check if the query_snp falls in the pivot region
if($end_idx > 1 && $$snp >= $current_start && $$snp <= $current_en
+d)
{
$out_line = "$$snp\t$current_start\t$current_end";
return(\$out_line);
}
# If there are only two elements in the input array, three cases a
+re possible
# case1: (100_200, 280_380)
# a. query_snp < 100
# b. query_snp > 380
# c. query_snp > 200 && query_snp < 280
elsif($end_idx ==1 )
{
if( $$snp > ( split /\_/, $array->[0] )[1] && $$snp < ( split
+/\_/, $array->[1] )[0] ||
$$snp > ( split /\_/, $array->[1] )[1] ||
$$snp < ( split /\_/, $array->[0] )[0] )
{
$out_line = "$$snp\tNOT_IN_TARGET";
return(\$out_line);
}
# case 2: query _snp lies in one of the target regions
# a. query_snp >=100 && <=200
# b. query_snp >=280 && <=380
elsif( $$snp >= ( split /\_/, $array->[0] )[0] && $$snp <= ( s
+plit /\_/, $array->[0] )[1]
|| $$snp >= ( split /\_/, $array->[1] )[0] && $$snp <= ( s
+plit /\_/, $array->[1] )[1]
)
{
$out_line = "$$snp\tyes";
return(\$out_line);
}
# case3: Doesn't satisfy any of the above and hence is an erro
+r
else
{
$out_line = "$$snp\terror\n";
return(\$out_line);
}
}
# if it doesn't lie in the pivot region, do a recursive check of h
+alf the array elements each time it enters this loop
# this significantly reduces the search time
else
{
quicksort($new_array, $snp);
}
Any input is greatly appreciated!