comment on

Dear monks,
I would like to compress SQLite FTS databases (utf-8 text data) to reduce file sizes and perhaps even improve performance. Based on documentation here and here and a lot of guesswork, I came up with some partially working code. It creates the db, and it seems to compress it as the file is smaller than the uncompressed db with the same entries. When I run a SELECT query on the compressed db, I get a correct report about the number of hits but the text content itself doesn't show up. What am I doing wrong?

use strict;
use warnings;

use utf8;

use DBI;
use IO::Compress::Gzip qw(gzip $GzipError) ;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ;

sub compressor;
sub uncompressor;

print "\nTesting 'compressor' and 'uncompressor' subs: " . uncompresso
+r(compressor("test passed")) . "\n";

my $dbh = DBI->connect( "dbi:SQLite:dbname=test.db",
                            "",
                            "", {
                                # RaiseError => 1,        # we'll hand
+le the errors ourselves
                                sqlite_unicode => 1,
                            }
                        ) or die "Can't connect to database: $DBI::err
+str";;                    # database connection var


$dbh->sqlite_create_function( 'compressme', 1, sub { compressor } );
$dbh->sqlite_create_function( 'uncompressme', 1, sub { uncompressor } 
+);

$dbh->do("DROP TABLE IF EXISTS EnHu") or print "drop table failed";
# $dbh->do("CREATE VIRTUAL TABLE EnHu USING fts4 (English TEXT, Hungar
+ian TEXT)") or print "failed to create fts4 table";        # uncompre
+ssed
$dbh->do("CREATE VIRTUAL TABLE EnHu USING fts4 (English TEXT, Hungaria
+n TEXT, compress=compressme, uncompress=uncompressme)") or print "fai
+led to create fts4 table"; # compressed


while (<DATA>) {
    chomp;
    /^([^\t]*)\t([^\t]*)/;
    my $lang1 = $1;
    my $lang2 = $2;
    # print "\n\ninserting:\n$lang1\n$lang2";
    
    my $insert = $dbh->prepare('INSERT INTO EnHu (English, Hungarian) 
+VALUES(?, ?)');
    # my $insert = $dbh->prepare('INSERT INTO EnHu (English, Hungarian
+) VALUES(compressme(?), compressme(?))');    #no need to call the fun
+ctions, SQLite seems to do it on its own
    $insert->execute("$lang1", "$lang2");
    
    
    
}

my $countentries = $dbh->prepare( "SELECT Count(*) FROM EnHu" ); # OK
$countentries->execute();
my $size = $countentries->fetch();

my $searchterm = "some";
print "\n\nImport done, the db contains @$size records.\nEnter search 
+term to run query\n";
chomp($searchterm = <STDIN>);



my $q_1 = $dbh->prepare( "SELECT * FROM EnHu WHERE English MATCH ?" ) 
+or print "prepare failed";    # works on uncompressed db, kinda works
+ on compressed db
# my $q_1 = $dbh->prepare( "SELECT uncompressme(English) FROM EnHu WHE
+RE English MATCH ?" ) or print "prepare failed";    # doesn't work
# my $q_1 = $dbh->prepare( "SELECT uncompressme(English),uncompressme(
+Hungarian) FROM EnHu WHERE English MATCH ?" ) or print "prepare faile
+d";
# my $q_1 = $dbh->prepare( "SELECT uncompressme(English) FROM EnHu WHE
+RE English MATCH ?" ) or print "prepare failed";
# my $q_1 = $dbh->prepare( "SELECT * FROM EnHu WHERE uncompressme(Engl
+ish) MATCH ?" );    # doesn't work
# my $q_1 = $dbh->prepare( "SELECT * FROM EnHu WHERE English MATCH com
+pressme(?)" );    # nope


$q_1->execute($searchterm) or print "execute failed";

# my $q_2 = $dbh->prepare( "SELECT * FROM EnHu LIMIT 4" );
# $q_2->execute() or print "execute failed";
# my $all = $q_2->fetchall_arrayref() or print "fetch failed";

my $all = $q_1->fetchall_arrayref() or print "fetch failed";

my $i = @$all;
foreach my $row (@$all) {
        print "\n--------------------\n";
        foreach my $item (@$row) {
            print $item;
            print "\t";
            
        };
        
    }
    print "\n\n";
print "\n$i matches found.";


<STDIN>;

    sub compressor {
        my $in = shift;
        my $out;
        gzip \$in => \$out;
        return ($out);
    }
    sub uncompressor {
        my $in = shift;
        my $out;
        gunzip \$in => \$out;
        return ($out);
    }
    



__DATA__
this is some text    némi szöveg
some more text here    na még egy kicsi
just for testing you know    nalássuk
fourth row of text    még némi szöveg
and a little more text for good measure    dobjuk hozzá még ezt is
[download]

In reply to How do I read from a compressed SQLite FTS4 database with DBD::SQLite? by elef

Are you posting in the right place? Check out Where do I post X? to know for sure.
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
Want more info? How to link or How to display code and escape characters are good places to start.


Come for the quick hacks, stay for the epiphanies.
	PerlMonks