thank you everyone. I tried to put together a couple of examples on how to do this simply. (bulk insert may or may not be appropriate, because if a record already exists, I want to add to it. this means I need to lookup first, and then decide.)
the results are actually quite interesting:
method | File Size | Speed |
DBM::deep | 20,191,941 | 83 seconds |
Berkeley | 1,323,008 | 1 second |
DBI SQLite Synchronous=Off | 950,272 | 21 seconds |
DBI SQLite | 950,272 | 118 seconds |
Of course, there may be better ways to tweak the routines--I am an amateur at this. if I have made mistakes, or I should try something simpler, please correct me. for now, these are nice examples and should work as simple skeletons and guides for others, that show how to read and write to embedded data bases under perl. (the timing in the table of course does not reflect changing records; this is rare. I also have no idea how well or poorly the data bases scale.)
#!/usr/bin/perl -w
use strict;
use warnings FATAL => qw{ uninitialized };
################################################################
my $numofwords= 50000;
my @randomword;
################################################################
## not really a data base
################################################################
sub memhash {
my %randomword;
for (my $i=0; $i<$numofwords; ++$i) {
if (defined($randomword{$i})) {
$randomword{ $randomword[$i] }.=",$i"; }
else { $randomword{ $randomword[$i] }.="$i"; }
}
}
################################################################
## the perl module
################################################################
sub dbmdeep {
use DBM::Deep;
my $db = DBM::Deep->new( "dbm.db" );
for my $i (0 .. $numofwords - 1) {
my $rin= $randomword[$i];
if (defined $db->{$rin}) { $db->{$rin} .= ",$i"; } else { $db->{$r
+in} = $i; }
}
}
################################################################
## the standard berkeley data base
################################################################
sub berkeley {
use DB_File;
my $filename= "berkeley.db";
my %randomword;
tie %randomword, "DB_File", $filename
or die "cannot open $filename: $!\n";
for my $i (0 .. $numofwords - 1) {
my $rin= $randomword[$i];
if (defined $randomword{$rin}) { $randomword{$rin} .= ",$i"; }
else { $randomword{$rin} = $i; }
}
untie %randomword;
}
################################################################
## sql is treated differently. allows multiple keys, so we
## won't combine just yet, to give it an advantage.
################################################################
sub dbisqlite_nopragma {
use DBI;
my $dbfile = 'sqlite_nopragma.db'; # your database file
system("rm -rf $dbfile");
my $dbh = DBI->connect( # connect to your database, create
+if needed
"dbi:SQLite:dbname=$dbfile", # DSN: dbi, driver, database
+ file
"", # no user
"", # no password
{ RaiseError => 1 }, # complain if something goes
+ wrong
) or die $DBI::errstr;
$dbh->do( "CREATE TABLE allwords ( word, record )" );
# $dbh->do( "PRAGMA synchronous=OFF" );
for (my $i=0; $i<$numofwords; ++$i) {
$dbh->do( "INSERT INTO allwords VALUES ( '$randomword[$i]', '$i' )
+" );
}
$dbh->disconnect();
}
################################################################
sub dbisqlite_pragma {
use DBI;
my $dbfile = 'sqlite_pragma.db'; # your database file
system("rm -rf $dbfile");
my $dbh = DBI->connect( # connect to your database, create
+if needed
"dbi:SQLite:dbname=$dbfile", # DSN: dbi, driver, database
+ file
"", # no user
"", # no password
{ RaiseError => 1 }, # complain if something goes
+ wrong
) or die $DBI::errstr;
$dbh->do( "CREATE TABLE allwords ( word, record )" );
$dbh->do( "PRAGMA synchronous=OFF" );
for (my $i=0; $i<$numofwords; ++$i) {
$dbh->do( "INSERT INTO allwords VALUES ( '$randomword[$i]', '$i' )
+" );
}
$dbh->disconnect();
}
################################################################
for (my $i=0; $i<$numofwords; ++$i) {
my @chars = ( "A" .. "Z", "a" .. "z", 0 .. 9, qw(! @ $ % ^ & *) );
my $rin = join("", @chars[ map { rand @chars } ( 1 .. 5 ) ]);
push(@randomword, $rin);
}
print STDERR "[$0: created $numofwords words for testing]\n";
system("rm -rf *.db"); # kill all prio test files
use Benchmark ':hireswallclock'; ## unfortunately, this fails under O
+SX Mac Pro. always gives 0 secs.
print time()."\t".localtime()."\n";
print "memhash\n";
Benchmark::timethis( 1, &memhash );
print time()."\t".localtime()."\n";
print "dbmdeep\n";
Benchmark::timethis( 1, &dbmdeep);
print time()."\t".localtime()."\n";
print "berkeley\n";
Benchmark::timethis( 1, &berkeley);
print time()."\t".localtime()."\n";
print "sqlite nopragma\n";
Benchmark::timethis( 1, &dbisqlite_nopragma);
print time()."\t".localtime()."\n";
print "sqlite pragma\n";
Benchmark::timethis( 1, &dbisqlite_pragma);
print time()."\t".localtime()."\n";
PS: Under OSX, the timer seems broken.
I thus tried to install DBM::Deep under ubuntu linux, but here I got
cpan[1]> install DBM::Deep
CPAN: Storable loaded ok (v2.21)
Going to read '/home/ivo/.cpan/Metadata'
Database was generated on Tue, 26 Jan 2010 12:38:07 GMT
CPAN: YAML loaded ok (v0.71)
Going to read 122 yaml files from /home/ivo/.cpan/build/
CPAN: Time::HiRes loaded ok (v1.9719)
DONE
Restored the state of none (in 1.2153 secs)
Running install for module 'DBM::Deep'
Running Build for R/RK/RKINYON/DBM-Deep-1.0014.tar.gz
Has already been unwrapped into directory /home/ivo/.cpan/build/DBM-
+Deep-1.0014-7f188H
Has already been made
Running Build test
* WARNING: Configuration was initially created with Module::Build
version '0.280801' but we are now using version '0.3603'.
If errors occur, you must re-run the Build.PL or Makefile.PL script.
default not defined in test_types! at ./Build line 61
RKINYON/DBM-Deep-1.0014.tar.gz
./Build test -- NOT OK
//hint// to see the cpan-testers results for installing this module, t
+ry:
reports RKINYON/DBM-Deep-1.0014.tar.gz
Running Build install
make test had returned bad status, won't install without force
Failed during this command:
RKINYON/DBM-Deep-1.0014.tar.gz : make_test NO
so I ended up resorting to timing it with printed seconds under OSX. not pretty.
|