#! perl
use strict;
use warnings;
use constant MIN_DEPTH => 5;
my ($chromosome, $position, undef, $coverage_depth) = split /\s+/, <D
+ATA>;
my %series =
(
name => $chromosome,
start => $position,
end => $position,
depth => $coverage_depth,
);
while (<DATA>)
{
($chromosome, $position, undef, $coverage_depth) = split /\s+/;
if ($series{name} eq $chromosome &&
$series{end} == $position - 1 &&
$series{depth} >= MIN_DEPTH &&
$coverage_depth >= MIN_DEPTH)
{
$series{end} = $position;
}
else
{
display_series();
%series =
(
name => $chromosome,
start => $position,
end => $position,
depth => $coverage_depth,
);
}
}
display_series();
sub display_series
{
if ($series{depth} >= MIN_DEPTH)
{
print join(',', $series{name}, $series{start}, $series{end},
$series{end} - $series{start} + 1), "\n";
}
}
__DATA__
C10000035 12 C 4 ....^>. HHFCC
C10000035 13 C 6 .....^>. HHFFCC
C10000035 14 C 6 ...... JHFFCC
C10000035 15 C 6 ...... IHFFFC
C10000035 16 A 4 .GG...^>G JGHFFFC
C10000035 17 C 7 ....... JGHFFFC
C10000035 18 C 8 .......^]. JIHHFFC@
C10000035 19 A 8 ........ IJHHFFFC
C10000035 20 C 9 ..T...T.^]. JIHGHFF@C
C10000035 21 G 10 A........^]. AJJHHHFDCC
C10000040 30 C 5 ....^>. HHFCC
C10000040 31 C 6 .....^>. HHFFCC
C10000040 32 C 6 ...... JHFFCC
C10000040 33 C 6 ...... IHFFFC
C10000040 34 C 4 ...... IHFFFC
C10000040 35 C 4 ...... IHFFFC
C10000040 36 C 4 ...... IHFFFC
C10000040 37 C 6 ...... IHFFFC
C10000040 38 C 6 ...... IHFFFC
Output:
16:48 >perl 1157_SoPW.pl
C10000035,13,15,3
C10000035,17,21,5
C10000040,30,33,4
C10000040,37,38,2
16:50 >
Hope that helps,
|