#!/usr/local/bin/perl # Usage information #Usage: $0 -i -o -n -b # -t use strict; use warnings; use Bio::SearchIO; use Getopt::Std;#needed for flagging parameters sub main{ my %opt; #note: colons after letter mean the flag expects an argument getopt('i:o:n:b:t:', \%opt); print "Parsing the BLAST result ...\n"; my $in = Bio::SearchIO->new(-format => 'blastxml', -file => $opt{i}); open (OUT,">$opt{o}") or die "Cannot open $opt{o}: $!"; open (OUT2,">$opt{t}") or die "Cannot open $opt{t}: $!"; open (OUT3, ">$opt{o}.header") or die "Cannot open $opt{o}.header: $!"; # print the header info for tab-deliminated columns print OUT "query_name\tquery_length\taccession_number\tsubject_length\tsubject_description\tE value\tbit score\tframe\tquery_start\t"; print OUT "query_end\thit_start\thit_end\t%_conserved\t%_identical\n"; print OUT2 "query_name\tquery_length\taccession_number\tsubject_length\tsubject_description\tE value\tbit score\tframe\tquery_start\t"; print OUT2 "query_end\thit_start\thit_end\t%_conserved\t%_identical\n"; # extraction of information for each result recursively while ( my $result = $in->next_result ) { #prints query info for reads WITHOUT hits into -t ="bad" file if ( $result->num_hits == 0 ) { print OUT2 $result->query_description . "\t"; print OUT2 $result->query_length . "\t"; print OUT2 "No hits found\n"; } else { my $count = 0; # process each hit recursively while (my $hit = $result->next_hit) { #prints query info for reads WITH hits BELOW bit-score input value into -t = "bad" file if ( $hit->bits < $opt{b}) { print OUT2 $result->query_description . "\t"; print OUT2 $result->query_length . "\t"; print OUT2 "below bit score\n";} #prints query and other info for reads WITH hits ABOVE bit-score input into -o = "good" file elsif ( $hit->bits >= $opt{b}) { print OUT $result->query_description . "\t"; print OUT3 $result->query_description . "\n"; print OUT $result->query_length . "\t"; print OUT $hit->accession . "\t"; print OUT $hit->length . "\t"; print OUT $hit->description . "\t"; print OUT $hit->significance . "\t"; print OUT $hit->bits . "\t"; my $hspcount = 0; # process the top HSP for the top number of hits (user defined) into -o file while (my $hsp = $hit->next_hsp) { if ($hit->bits >= $opt{b}) { print OUT "\t\t\t\t\t\t\t", if ($hspcount > 0); print OUT $hsp->query->frame . "\t"; print OUT $hsp->start('query') . "\t" . $hsp->end('query'). "\t"; print OUT $hsp->start('hit') . "\t" . $hsp->end('hit') . "\t"; printf OUT "%.1f" , ($hsp->frac_conserved * 100); print OUT "%\t"; printf OUT "%.1f" , ($hsp->frac_identical * 100); print OUT "%\n"; $hspcount++; } } } $count++; # flow control for the number of hits needed last if ($count == $opt{n}); } } } close OUT; close OUT2; } main(); print " DONE!!!\n";