#!/usr/bin/perl -w use strict; use Bio::Tools::BPbl2seq; my $AFILE = 'a.seq'; my $BFILE = 'b.seq'; my $OUTFILE = 'bl2seq.out'; system('blast/bl2seq', '-i', $AFILE, '-j', $BFILE, '-o', $OUTFILE, '-p', 'blastn', '-e', '1e-4'); my $report = Bio::Tools::BPbl2seq->new(-file => $OUTFILE, -queryname => 'a.seq'); my (@Aranges,@Branges); while (my $hsp = $report->next_feature) { push @Aranges, [$hsp->query->start, $hsp->query->end]; push @Branges, [$hsp->sbjct->start, $hsp->sbjct->end]; } my $Arange_ref = \@Aranges; my $Brange_ref = \@Branges; my $sortedA = sort_array($Arange_ref); my $sortedB = sort_array($Brange_ref); my $Aseq = get_seq($AFILE); my $Bseq = get_seq($BFILE); my $Agaps = find_gaps(length $Aseq, $sortedA); my $Bgaps = find_gaps(length $Bseq, $sortedB); print_gaps('A',$Aseq,$Agaps); print_gaps('B',$Bseq,$Bgaps); #------------- sub sort_array { my $array_ref = shift; my @array = @{$array_ref}; for (my $i = 0; $i<((scalar @array)-1); $i++) { if ($array[$i][0] > $array[$i+1][0]) { ($array[$i], $array[$i+1]) = ($array[$i+1], $array[$i]); } } my $return_ref = \@array; return $return_ref; } #------------- sub find_gaps { my $len = shift; my $array_ref = shift; my @array = @{$array_ref}; my @gaps; if ($array[0][0] > 1) { push @gaps, (1, ($array[0]->[0]-1)); } for (my $i = 1; $i<(scalar @array); $i++) { if ($array[$i-1][1] < $array[$i][0]) { push @gaps, [ ($array[$i-1][1]+1), ($array[$i][0]-1) ]; } } if ($array[-1][1] < $len) { push @gaps, [ ($array[-1][1]+1), $len]; } my $return_ref = \@gaps; return $return_ref; } #------------ sub get_seq { my $filename = shift; my $seq = ''; open FILE, $filename or die "couldn't open $filename: $!\n"; while () { chomp; if (/^>/) { next; } $seq .= $_; } close FILE; $seq; } #------------ sub print_gaps { my $seqname = shift; my $seqin = shift; my $array_ref = shift; my @array = @{$array_ref}; if (scalar @array > 0) { print "$seqname gaps:\n"; foreach my $gap (@array) { my $start = ${$gap}[0]; my $end = ${$gap}[1]; my $seq = substr ($seqin, $start, ($end-$start) ); print "$start - $end:$seq\n"; } } } Which results in this output: A gaps: 320 - 338:ggctcctatcagcgggtc 361 - 418:atgcagtccagagaaaagtgcctcatttttcagtaaagtgacatattcctggtttag 892 - 3389:gtccctggcaaccttgtgtgtctatttcttactggatgaaggaaacattttaacagccactaaagtgtt cacatcgatgtctttgtttaatattttaaggattcctctgtttgagttaccaaccgtgatctcagctgtggtccagacaa agatatccctgggccgtttggaagactttctcaacactgaggagcttcttcctcaaagtattgaaacgaactatacagga gatcatgctattgggtttacagatgcttctttctcctgggataaaacaggaatgccagttctaaaagaggctctgtggct tatgtttctcagcaggcctggattcagaattgcattttgcaagaaaacattctctttggctccatcatgaagaaagagtt ttatgagcaagtattggaagcctgtgctctccttccagatttggagcagttgccaaagggagatcaaactgagattggag aaagagtaaggaaacggctgtgaatataagtgggggccagcagcatcgagtaagcctggccagagctgtctacagtgggg ccgacgtctacctcctggatgatcccttgtctgctattgatgttcatgttggaaagcagctttttgaaaaagtgataggg tccttgggccttttgaaaaaccgggtagccatagtttcatttatgctttcttgtggggcaggggatatatctagcactgt gcctaaaattccatttattttgtttatttgttgaagcaaagtagtcacatttcacatgaaaaaaatgggatgctcagatt gcaatttcacaggcaaatgtcaatgggtaatttttgaagttttacttacaagtttactctggtgacaatttatgtgattg ttatctcttcatctaataccatatcctaaggggatgtatgtgccataatgaattgttgttgactcaagtgatttttacaa tggtattttggtgcacaccaaaatattaataaaattttatattattaacatagcaatttgacattattatcacagcaatc atagtgattcagctgttttgttaaagggccaagaagatgaaaattagtgtttaccagtacatgccatatttcacagggta aattagttcaatccctatttcacacataaaggagctcagggctcacaatgacttgatataggttgctcaacttaagaaca atagagatttttaaaagtatgtctgactccaaagccacattcttcataatatgtgtttatggaattattaatatagagaa ctttgaaaatatatgtataaaattataaaatatatgtctcagtaaaaataaacaaacatctttatatattgacataaacc caagattaaaaatgtcacacttaattgtatgtatatgtataaaataatatgaatgatttaaaaataaggactatgaatta tgacttggattaaacactttttttgtaatggtactgtatatttaaaaataagcaagaatgacatatatgaaattgtttta aatttgcaattaaattgattcatttctaatgaacgaaaaaatacccgaactaaagtgttaaaaatcacatttagaaacta ttttctcatctagcagcatgtcattattccttgtaaaatacatagatgctgccttaaactggcacctaccaaaaatcata ctctgaaatagtaaaggaaaaaagtcattgtagctttttttaaaaagccatttttcgtaatctctgagataaaacacttt gtttttcagactcatattttagtgacacacaatctcacacttctgccacaaatgaatcttattgttgtaatgaaaagtgg cagaatagctcaaatgggaatataccaggagctgctgtgtaaaaccaaaaatctcactaatttgcaccaagtcatcagtg aacaagaaaaagctcatgctctaaagcaagtcagtgcaatcaactccagaactagaccaaaagacaaaatcctggagcaa aaacatagtgggatatgagtccccttctgagagtcatgcaaggatttctaggcactgttaagaacccacgtagaaagaag atgttaagtgacggtagaaccagttttcagggttgcctgattttttgaagcagcacttcacagaagtcgaggtgaccagg attgacatgcaagttctaagagacaggccctcattggatcaaggaaaacagctctcaatgaaaaaagaaaagatccctgt tggtgggttgaagttctccatcattctgcagtacctccaagcctttggctggctctgggtgtggctgactgtggtcactt acttagggcagaatttggtgagcattggacagaatctatggcttagtgcatgggcaaaagaagcaaaaaacatgaatgag ttcacagaatggaagcaaataagaagcaataaacttaatatctatggattattgggattaataaaaggtaaaagaaaatg taaaaaaaaaaaaaaaaaaaaaaaaaaa B gaps: 805 - 1073:cttacttttatcaaatgtttctcgacaaaagttttccactggggaaattattaacttgatgtcagcaac tcatggacttgacagcaaacctcaatctcctctggtctgccccttttcaaatcctaatggccgtatatctcctttggcaa gagctgggtccagcagtgttagcaggggtggcagtccttgtgtttgttataccaataaatgctttagctgcaactaaaat aaaaaagttaaaggtaagaaaaaaaaaaaaaaaaaaaaa