#!/usr/bin/perl
use HTML::TableContentParser;
use HTML::Parse;
use HTML::FormatText;
use DBI;
use strict;
use warnings;
# Connect to database and create parser object
my $db = DBI->connect ("DBI:mysql:newsbms","newsbms", "newsbms",
{ RaiseError => 1, PrintError => 0});
# Loop twice
my $loopround = 1;
while ($loopround <= 2)
{
# Choose the table name
my $tablename = "modified";
if ($loopround == 2)
{
$tablename = "deleted";
}
print "\nProcessing the '$tablename' entries...\n\n";
# Create counters to show the number of files and queries processed
my $counter = 0;
my $query_counter = 0;
# Open the directory
my $dirname = "/home/martin/monitoring/newsBMS/$tablename/";
opendir(DIR, $dirname) || die ("Could not open $dirname");
# Loop through all files in the directory
while (defined(my $filename = readdir(DIR)))
{
# Skip special "files": '.' and '..'
next if $filename =~ /^\.\.?$/;
$counter++;
# Open and read the html file into a single string
open(HTMLFILE, $dirname.$filename) || die ("Could not open $filename");
binmode HTMLFILE;
my $html = join("", );
close(HTMLFILE);
# Parse the html tables
my $tcp = HTML::TableContentParser->new;
my $tables = $tcp->parse($html);
# Remove the html tags from the cells
for my $t (@$tables) {
for my $r (@{ $t->{rows} }) {
for my $c (@{ $r->{cells} })
{
my $stripper = HTML::FormatText->new;
$c->{data} = $stripper->format(parse_html($c->{data}));
$c->{data} =~ s/'/-/g;
$c->{data} =~ s/[:\\:]/-/g;
}
}
}
# Issue the MySQL queries
for my $t (@$tables)
{
for my $r (@{ $t->{rows} })
{
my $query = "INSERT INTO";
if ($loopround == 1)
{
$query = $query . " modified (id, name, title, duration,";
$query = $query . "library, modified, user, rev) VALUES (";
}
if ($loopround == 2)
{
$query = $query . " deleted (name, title, duration,";
$query = $query . "deleted, library) VALUES (";
}
for my $c (@{ $r->{cells} })
{
chop($c->{data}); # remove the \n
$query = $query . "'" . $c->{data} . "',";
}
chop($query); # Remove the last comma added
$query = $query . ") ON DUPLICATE KEY UPDATE duplicates=duplicates+1";
#print "Query = $query \n\n";
my $execute = $db->prepare($query);
$execute->execute();
$query_counter++;
if ($query_counter % 1000 == 0) {
print "Issued $query_counter MySQL queries.\n";
}
}
}
}
# Close the directory
closedir(DIR);
print "\nDone the '$tablename' table.\nProcessed $counter files and issued $query_counter MySQL queries.\n";
$loopround++;
}
# Disconnect from the database
$db->disconnect();
print "\nProgram Finished.\n";