We have Platform computing's LSF for load sharing on our compute
cluster, but we can't justify getting licenses for machines
that would only be in the cluster part of the time. Enter my
new P4 with a gig of RAM. It runs Win2k during the day, but
now boots to linux running a
BLAST
server at night. Now I can run the following script in an LSF queue that
only runs on nights and weekends. Now I can roll this method
out to others in the company with fast processors and lots of disk.
#!/usr/bin/perl -w
use strict;
use Getopt::Std;
use LWP::UserAgent;
use HTTP::Request::Common qw(POST);
use HTML::TreeBuilder;
use HTML::FormatText;
use vars qw($ua);
my %opts;
getopts('hp:d:i:o:e:v:b:F:', \%opts);
if (defined $opts{'h'}) {
print<<"HERE";
Usage: Very much like commandline blast:
-p program
-d database (though it has to be the name the the webserver k
+nows
ie, NOT /data/db/ncbi/something.)
-i input file (must be fasta with empty line between seqs)
-o output file
-e escore cutoff (default 0.01)
-v and -b alignments and oneline descriptions (default 10)
-h this message
Those are currently the only arguments it knows.
Note that you must specify all of -p, -d, -i, and -o
And I am not going to do any sanity checking--arguments better
be spelled right
HERE
die;
}
if (! (defined $opts{'p'} and defined $opts{'d'} and
defined $opts{'i'} and defined $opts{'o'}) ){
die "you must specify all of -p, -d, -i, and -o\n";
}
my $PROGRAM = $opts{'p'};
my $DB = $opts{'d'};
my $INFILE = $opts{'i'};
my $OUTFILE = $opts{'o'};
my $ESCORE;
my $V;
my $B;
if (defined $opts{'e'}) {
$ESCORE=$opts{'e'};
} else {
$ESCORE=0.01;
}
if (defined $opts{'v'}) {
$V=$opts{'v'};
} else {
$V=10;
}
if (defined $opts{'b'}) {
$B=$opts{'b'};
} else {
$B=10;
}
#--------------------------------------------------------------
my $seq;
my $dotheblast =1;
open OUT, ">$OUTFILE" or die "couldn't open $OUTFILE: $!\n";
open IN, $INFILE or die "couldn't open $INFILE: $!\n";
while (<IN>) {
if (/^>/) {
$seq = $_;
$dotheblast=1;
} elsif (/^[ATGCNX]+$/i) {
$seq .= $_;
} elsif ($dotheblast == 1 and $seq ne "") {
$ua = LWP::UserAgent->new;
my $data = [ PROGRAM => $PROGRAM,
DATALIB => $DB,
SEQUENCE => $seq,
EXPECT => $ESCORE,
DESCRIPTIONS => $V,
ALIGNMENTS => $B,
];
my $action = 'http://scain/blast/blast.cgi';
my $req = $ua->request(POST($action, $data));
while (!$req->is_success) { # if it isn't available now, wait
+and try again.
print "sleeping...\n";
sleep 600;
$req = $ua->request(POST($action, $data));
}
if ($req->is_success) {
my $result = $req->content;
my $tree = HTML::TreeBuilder->new_from_content($result);
my $formatter = HTML::FormatText->new(leftmargin => 0, rig
+htmargin => 80);
print OUT $formatter->format($tree);
}
$seq="";
$dotheblast=0;
}
}
close IN;
close OUT;