use warnings;
use XML::Simple;
use LWP::UserAgent;
use HTTP::Request::Common;
use URI::Escape;
use Data::Dumper;
use Text::CSV;
my @keywords;
my $file ="proteinlist.csv";
my $ua = LWP::UserAgent->new;
my $csv = Text::CSV->new({ sep_char => ',' });
#Open result CSV file.
open(my $fh, ">", "result1.csv");
print $fh "Pubmed ID, Drug name, Keyword(s) that matches, List of prot
+eins in the abstract\n";
#Open the CSV file containing list of PubMed IDs
open(my $data, '<', "pmid.csv");
while (my $line = <$data>) {
chomp $line;
if ($csv->parse($line)) {
#Skip first line
next if ($. == 1);
my @fields = $csv->fields();
#Replace (-) with (,)
$fields[0] =~ tr/-/,/;
$fields[1] =~ tr/-/,/;
#Split alt name
my @id = split /[+]/, $fields[1];
for (my $i = 0; $i < scalar @id; $i++){
#Initialize http request
my $args = "db=pubmed&id=$id[$i]&retmode=text&rettype=abstract";
my $req = new HTTP::Request POST => 'https://eutils.ncbi.nlm.nih.g
+ov/entrez/eutils/efetch.fcgi';
$req->content_type('application/x-www-form-urlencoded');
$req->content($args);
#Get response
my $response = $ua->request($req);
my $content = $response->content;
$fields[0] =~ tr/,/-/;
my $keystr = "";
#open csv file containing the protein list and compare with the co
+ntent of abstract
open(my $data, "<", $file) or die "Could not open '$file' $!\n";
while (my $readinline = <$data>) {
chomp $readinline;
#initialize the first data of csv as the first keyword
my @fields = split "," , $readinline;
$keywords[$i] = $fields[0];
if (regex(lc $content,lc $keywords[$i]) != -1) {
if ($keystr eq ""){
$keystr = $keywords[$i];
}else{
$keystr = $keystr . "+$keywords[$i]";
}
}
}
if ($keystr ne ""){
print $fh "$id[$i],$fields[0],$keystr,Yes\n";
print "$id[$i],$fields[0],$keystr,Yes\n";
}else{
print $fh "$id[$i],$fields[0],No keyword matches,No\n";
print "$id[$i],$fields[0],No keyword matches,No\n";
}
}
}
}
close($fh);
|