#!/usr/bin/perl
use HTML::TableContentParser;
use HTML::Strip;
use DBI;
use strict;
use warnings;
# Connect to database and create parser object
my $db = DBI->connect ("DBI:mysql:newsbms","newsbms", "newsbms",
{ RaiseError => 1, PrintError => 0});
for my $path( 'modified', 'deleted' )
{
print "\nProcessing the '$path' entries...\n\n";
# Create counters
my $counter = 0;
my $query_counter = 0;
# Open the directory
my $dirname = "/home/martinn/monitoring/newsBMS/$path/";
opendir(DIR, $dirname) || die ("Could not open $dirname");
# Prepare the MySQL statement
my $query = "INSERT INTO";
if ($path eq 'modified')
{
$query = $query . " modified (id, name, title, duration,
library, modified, user, rev) VALUES (
?, ?, ?, ?, ?, ?, ?, ? )";
}
if ($path eq 'deleted')
{
$query = $query . " deleted (name, title, duration,
deleted, library) VALUES (
?, ?, ?, ?, ? )";
}
$query = $query . " ON DUPLICATE KEY UPDATE duplicates=duplicates+1";
my $statement = $db->prepare($query);
# Loop through all files in the directory
while (defined(my $filename = readdir(DIR)))
{
# Skip special "files": '.' and '..'
next if $filename =~ /^\.\.?$/;
$counter++;
# Open and read the html file into a single string
open(HTMLFILE, $dirname.$filename) || die ("Couldn't open $filename");
binmode HTMLFILE;
my $html = join("", );
close(HTMLFILE);
# Parse the html table
my $tcp = HTML::TableContentParser->new;
my $tables = $tcp->parse($html);
# Issue the MySQL queries
for my $t (@$tables)
{
for my $r (@{ $t->{rows} })
{
my @values;
for my $c (@{ $r->{cells} })
{
# Remove the html tags from the cells
my $stripper = HTML::Strip->new();
$c->{data} = $stripper->parse($c->{data});
# Add cell to the end of the array
push(@values, $c->{data});
}
$statement->execute(@values);
$query_counter++;
# Basic activity monitor
if ($query_counter % 5000 == 0)
{
print "Issued $query_counter MySQL queries.\n";
}
}
}
}
# Close the directory
closedir(DIR);
# Finish the MySQL statement
$statement->finish();
print "\nDone the '$path' table.\n";
print "Processed $counter files and issued $query_counter MySQL queries.\n";
}
# Disconnect from the database
$db->disconnect();
print "\nProgram Finished.\n";