http://www.perlmonks.org?node_id=998890

linseyr has asked for the wisdom of the Perl Monks concerning the following question:

Hi, I want to find the overlapping regions from 4 files which look like this:
file: 148N chr1 10 50 file: 162N chr1 9 40 file: 174N chr1 12 60 file: 175N chr1 30 45
If the fist column (chr) is the same in ALL files, i want to compare column 3 and 4 (start and end). If in ALL files the regions are overlapping, I want to return the smallest start position and the largest end position to get the complete region in which there are overlapping reads. So for this input the output would be:
chr1 9 60
Because there is a region that is overlapping in all files. I want to do this, ONLY if ALL 4 files are overlapping. I wrote a script now to check for all possible combination for overlapping reads, but because of all the many possible combinations I think it wont be finished in this week... This is my script now:
open(N148,'<',"148Nsorted.bed") or die $!; my @Sample148N = <N148>; close(N148); open(N162,'<',"162Nsorted.bed") or die $!; my @Sample162N = <N162>; close(N162); open(N174,'<',"174Nsorted.bed") or die $!; my @Sample174N = <N174>; close(N174); open(N175,'<',"175Nsorted.bed") or die $!; my @Sample175N = <N175>; close(N175); my @results; my $start; my $end; my $overlap = "no-overlap"; my $option1 = ($end148-$start162 >= 1 && $end162 > $end148) && ($end17 +4 - $start148 > 1 && $start174 < $start148) && ($start175 > $start148 + && $end148 > $end175); my $option2 = ($end148-$start162 >= 1 && $end162 > $end148) && ($end17 +5 - $start148 > 1 && $start175 < $start148) && ($start174 > $start148 + && $end148 > $end174); my $option3 = ($end148-$start174 >= 1 && $end174 > $end148) && ($end17 +5 - $start148 > 1 && $start175 < $start148) && ($start162 > $start148 + && $end148 > $end162); my $option4 = ($end148-$start175 >= 1 && $end175 > $end148) && ($end17 +4 - $start148 > 1 && $start174 < $start148) && ($start162 > $start148 + && $end148 > $end162); my $option5 = ($end148-$start174 >= 1 && $end174 > $end148) && ($end16 +2 - $start148 > 1 && $start162 < $start148) && ($start175 > $start148 + && $end148 > $end175); my $option6 = ($end148-$start175 >= 1 && $end175 > $end148) && ($end16 +2 - $start148 > 1 && $start162 < $start148) && ($start174 > $start148 + && $end148 > $end174); my $option7 = ($end162-$start148 >= 1 && $end148 > $end162) && ($end17 +4 - $start162 > 1 && $start174 < $start162) && ($start175 > $start162 + && $end162 > $end175); my $option8 = ($end162-$start148 >= 1 && $end148 > $end162) && ($end17 +5 - $start162 > 1 && $start175 < $start162) && ($start174 > $start162 + && $end162 > $end174); my $option9 = ($end162-$start174 >= 1 && $end174 > $end162) && ($end17 +5 - $start162 > 1 && $start175 < $start162) && ($start148 > $start162 + && $end162 > $end148); my $option10 = ($end162-$start175 >= 1 && $end175 > $end162) && ($end1 +74 - $start162 > 1 && $start174 < $start162) && ($start148 > $start16 +2 && $end162 > $end148); my $option11 = ($end162-$start174 >= 1 && $end174 > $end162) && ($end1 +48 - $start162 > 1 && $start148 < $start162) && ($start175 > $start16 +2 && $end162 > $end175); my $option12 = ($end162-$start175 >= 1 && $end175 > $end162) && ($end1 +48 - $start162 > 1 && $start148 < $start162) && ($start174 > $start16 +2 && $end162 > $end174); my $option13 = ($end174-$start162 >= 1 && $end162 > $end174) && ($end1 +48 - $start174 > 1 && $start148 < $start174) && ($start175 > $start17 +4 && $end174 > $end175); my $option14 = ($end174-$start162 >= 1 && $end162 > $end174) && ($end1 +75 - $start174 > 1 && $start175 < $start174) && ($start148 > $start17 +4 && $end174 > $end148); my $option15 = ($end174-$start148 >= 1 && $end148 > $end174) && ($end1 +75 - $start174 > 1 && $start175 < $start174) && ($start162 > $start17 +4 && $end174 > $end162); my $option16 = ($end174-$start175 >= 1 && $end175 > $end174) && ($end1 +48 - $start174 > 1 && $start148 < $start174) && ($start162 > $start17 +4 && $end174 > $end162); my $option17 = ($end174-$start148 >= 1 && $end148 > $end174) && ($end1 +62 - $start174 > 1 && $start162 < $start174) && ($start175 > $start17 +4 && $end174 > $end175); my $option18 = ($end174-$start175 >= 1 && $end175 > $end174) && ($end1 +62 - $start174 > 1 && $start162 < $start174) && ($start148 > $start17 +4 && $end174 > $end148); my $option19 = ($end175-$start162 >= 1 && $end162 > $end175) && ($end1 +74 - $start175 > 1 && $start174 < $start175) && ($start148 > $start17 +5 && $end175 > $end148); my $option20 = ($end175-$start162 >= 1 && $end162 > $end175) && ($end1 +48 - $start175 > 1 && $start148 < $start175) && ($start174 > $start17 +5 && $end175 > $end174); my $option21 = ($end175-$start174 >= 1 && $end174 > $end175) && ($end1 +48 - $start175 > 1 && $start148 < $start175) && ($start162 > $start17 +5 && $end175 > $end162); my $option22 = ($end175-$start148 >= 1 && $end148 > $end175) && ($end1 +74 - $start175 > 1 && $start174 < $start175) && ($start162 > $start17 +5 && $end175 > $end162); my $option23 = ($end175-$start174 >= 1 && $end174 > $end175) && ($end1 +62 - $start175 > 1 && $start162 < $start175) && ($start148 > $start17 +5 && $end175 > $end148); my $option24 = ($end175-$start148 >= 1 && $end148 > $end175) && ($end1 +62 - $start175 > 1 && $start162 < $start175) && ($start174 > $start17 +5 && $end175 > $end174); for my $line(@Sample148N){ my($chr148,$start148,$end148) = split("\t",$line); for my $line2(@Sample162N){ my($chr162,$start162,$end162) = split("\t",$line2); for my $line3(@Sample174N){ my($chr174,$start174,$end174) = split("\t",$line3); for my $line4(@Sample175N){ my($chr175,$start175,$end175) = split("\t",$line2); if(($chr148 eq $chr162 && $chr148 eq $chr174 && $chr14 +8 eq $chr175) && ($option1 || $option2 || $option3 || $option4 || $op +tion5 || $option6 || $option7 || $option8 || $option9 || $option10 || + $option11 || $option12 || $option13 || $option14 || $option15 || $op +tion16 || $option17 || $option18 || $option19 || $option20 || $option +21 || $option22 || $option23 || $option24){ $overlap = "overlap"; $start = $start148 if ($start148 < $start162 && $s +tart148 <$start174 && $start148 < $start175); $end = $end148 if ($end148 > $end162 && $end148 > +$end174 && $end148 > $end175); $start = $start162 if ($start162 < $start148 && $s +tart162 <$start174 && $start162 < $start175); $end = $end162 if ($end162 > $end148 && $end162 > +$end174 && $end162 > $end175); $start = $start174 if ($start174 < $start162 && $s +tart174 <$start148 && $start174 < $start175); $end = $end174 if ($end174 > $end162 && $end174 > +$end148 && $end174 > $end175); $start = $start175 if ($start175 < $start148 && $s +tart175 <$start174 && $start175 < $start162); $end = $end175 if ($end175 > $end148 && $end175 > +$end174 && $end175 > $end162); } } print "$chr148 $start $end"; }
Could somebody help me find an easier way to do this?\ Thanks a lot!