shabird has a number of posts (ead a file which has three columns and store the content in a hash, Query of multi dimentional array, storing a file in 2d array) that are somewhat similar. Hence "script.pl" which makes use of DBI, DBD::CSV, Getopt::Long::Descriptive, and Text::Table
#!/usr/bin/env perl
use strict;
use warnings;
use Carp;
use Data::Dumper;
$Data::Dumper::Deepcopy=1;
$Data::Dumper::Indent=1;
$Data::Dumper::Sortkeys=1;
use DBI;
use Getopt::Long::Descriptive('describe_options');
use Text::Table;
use 5.01800;
(our $opts,my $usage)=describe_options(
$0.' %o <some-arg>',
,['directory|d=s' ,'the working directory </Users/Desktop>' ,{
+ default => '/Users/Desktop' }]
,['extension|e=s' ,'the file extension <.txt/r>' ,
+{ default => '.txt/r' }]
,['separator|s=s' ,'the separating character<"\t">' ,
+{ default => "\t" }]
,['sql=s', ,'the sql' ,
+{ required => 1 }]
,[]
,['verbose|v' ,'print extra stuff' ]
,['help' ,'print usage message and exit' ,
+{ shortcircuit => 1 }]
);
warn Data::Dumper->Dump([\$opts],[qw(*opts)]),' ' if ($opts->{verbose}
+);
if ($opts->help()) { # MAN! MAN!
say <<"_HELP_";
@{[$usage->text]}
_HELP_
exit;
}
else { # No MAN required.
};
# Get a connection to the database tables
# ... as this is DBD::CSV a table is a file
my $dbh=DBI->connect ("dbi:CSV:", undef, undef, {
f_dir => $opts->directory(),
f_ext => $opts->extension(),
csv_sep_char => $opts->separator(),
RaiseError => 1,
}) or die "Cannot connect: $DBI::errstr";
eval {
# Prepare and execute the sql
my $sth=$dbh->prepare($opts->sql());
$sth->execute();
# get the names of the fields returned by the select
my $field_aref=$sth->{NAME};
my $table=Text::Table->new(\'|',
map {( { title => $_ }, \'|') } @{$field_aref}
)
if ($#{$field_aref});
# and dump them ...
warn Data::Dumper->Dump([\$field_aref],[qw(*field_aref)]),' ' if (
+$opts->verbose());
# Get the selection one row at a time
while (my $value_aref=$sth->fetchrow_arrayref()) {
# dump the values from the select
warn Data::Dumper->Dump([\$value_aref],[qw(*value_aref)]),' '
+if ($opts->verbose());
# For simplicity we will make a hash where the keys are the fi
+eld names and the values are the values of those fields
my %_h;
@_h{@$field_aref}=@$value_aref;
# since everything looks reasonable ...
if (defined &with_each_row) { # have a &with_each_row so ...
with_each_row(\%_h);
}
elsif (@$field_aref > 1) { # select has multiple fields so ...
# dump the hash to confirm all is what we expect
warn Data::Dumper->Dump([\%_h],[qw(*_h)]),' ' if ($opts->v
+erbose());
$table->load($value_aref)
if (defined $table);
}
else { # only one field
say $value_aref->[0];
}
}
if (defined &in_summary) {
in_summary();
}
elsif (defined $table) {
print $table->title(),
$table->rule('-','|'),
$table->body(),
$table->body_rule('-','-');
};
};
$@ and Carp::croak "SQL database error: $@";
__END__
Yes, I'm guilty of heresy - I confess I'm on Windows.
perl script.pl --help
script.pl [-desv] [long options...] <some-arg>
-d STR --directory STR the working directory </Users/Desktop>
-e STR --extension STR the file extension <.txt/r>
-s STR --separator STR the separating character<"\t">
--sql STR the sql
-v --verbose print extra stuff
--help print usage message and exit
Let us assume that we have stored the data from the nodes as x<node number>.txt in the local directory, we have "x11114659.txt", "x11115466.txt" and "x11116298.txt" so for ead a file which has three columns and store the content in a hash:
.>perl script.pl -d . --sql "select regulation from x11115466"
up
down
NA
up
up
up
down
down
down
up
up
down
up
NA
NA
up
up
or as a nice table (when the select returns more than one field ... we get a table)
.>perl script.pl -d . --sql "select genename, regulation from x1111546
+6"
|genename |regulation|
|----------|----------|
|APOL4 |up |
|CYP2C8 |down |
|NAALADL2 |NA |
|NANOS3 |up |
|C20orf204 |up |
|MIR429 |up |
|MIR200A |down |
|MIR200B |down |
|CFL1P4 |down |
|AC091607.1|up |
|RPL19P20 |up |
|SREK1IP1P1|down |
|CCT5P2 |up |
|CHTF8P1 |NA |
|FAR1P1 |NA |
|AC067940.1|up |
|AL662791.1|up |
-----------------------
For Query of multi dimentional array:
..>perl script.pl -d . --sql "select concat(GeneID,' ',(Tp1+tp2+tp3))
+from x11114659 order by GeneId"
ALA1 33
THR8 168
HUA4 476
ABA5 17
or again as table
..> perl script.pl -d . --sql "select GeneID, (Tp1+tp2+tp3) as sum fr
+om x11114659 order by GeneId"
|GeneID|sum|
|------|---|
|ABA5 | 17|
|ALA1 | 33|
|HUA4 |476|
|THR8 |168|
------------
And finally for storing a file in 2d array:
..>perl script.pl -d . --sql "select concat(ProteinName,'; ',MF1,'; ',
+MF2,'; ',MF3) as whatever from x11116298"
GH1; Growth factor activity; Growth hormone receptor binding; Hormone
+activity
POMC; G protein-coupled receptor binding; Hormone activity; Signaling
+receptor binding
THRAP3; ATP binding Source; Nuclear receptor transcription coactivator
+ activity; Phosphoprotein binding
Now the count function doesn't seem to be behaving itself so this "select regulation, count(*) from x11115466 group by regulation" throws an error. But there's a simple work-around. Supply a module that exports two subs "with_each_row" and "in_summary" - "with_each_row" is fed the reference to a hash of field names and their values, and "in_summary" is called once the select is exhausted.
package Example;
use strict;
use warnings;
use Exporter;
our @ISA=qw(Exporter);
our @EXPORT=qw(with_each_row in_summary);
use Data::Dumper;
use 5.01800;
my %_H;
sub with_each_row {
my ($_HREF)=@_;
warn Data::Dumper->Dump([\$_HREF],[qw(*_HREF)]),' ' if ($main:
+:opts->verbose());
$_H{$_HREF->{regulation}}++;
};
sub in_summary {
for my $key (sort keys %_H) {
printf "%10s:%-10s\n",$key,$_H{$key};
};
};
1;
Fortunately, for us, there is no need to change any code in script.pl ... we simply make use ot the -M option and get
..>perl -MExample script.pl -d . --sql "select genename, regulation fr
+om x11115466"
NA:3
down:5
up:9