Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

Re^2: DBI/load data infile issue

by jimbus (Friar)
on Jul 08, 2005 at 16:16 UTC ( #473469=note: print w/ replies, xml ) Need Help??


in reply to Re: DBI/load data infile issue
in thread DBI/load data infile issue

Code w/o DBI

use DBI; use Logs; use Net::FTP; use Time::Local; use Time::Timezone; use POSIX; use File::Temp qw/ tempfile /; open (IN, "/home/reports/ftp/WSB/test.txt"); #open (IN, "gzip -dc /tmp/arc/0620.gz |"); ($cntfh, $cntfn) = tempfile('WSB_count_XXXX', DIR => '/tmp'); ($urlfh, $urlfn) = tempfile('WSB_urls_XXXX', DIR => '/tmp'); chmod 0666, $urlfn; chmod 0666, $cntfn; my $url_str = "mysql -u root -prep0rts reports -e\"load data infile '" + . $urlfn . "' into table tmpurl fields terminated by '|' (tst amp,url,count)\""; my $cnt_str = "mysql -u root -prep0rts reports -e\"load data infile '" + . $cntfn . "' into table tmpwsb fields terminated by '|' (tst amp,op100,op700,total)\""; print "$url_str\n"; print "$cnt_str\n"; @lines = <IN>; $lines = @lines; print "lines: $lines\n"; # parse records and increment counters foreach $line (@lines) { ($time, $opcode, $url) = parse($line); if ($opcode == "100") { $tstamp{$time}++; $op100{$time}++; if (length $url > 0) { my $tmpkey = $url . "<::>" . $time; $urls{$tmpkey}++; } } elsif ($opcode == "700") { $tstamp{$time}++; $op700{$time}++; if (length $url > 0) { my $tmpkey = $url . "::" . $time; $urls{$tmpkey}++; } } } @time = sort keys %tstamp; print "Done parsing: @time[0]: @time[$#time]\n"; for ($ii = @time[0]; $ii <= @time[$#time]; $ii++) { $tmpTime = strftime "%Y-%m-%d %H:%M:%S", localtime($ii); $tmp100 = $op100{$ii} || 0; $tmp700 = $op700{$ii} || 0; $tmpTotal = $tstamp{$ii} || 0; print $cntfh "$tmpTime|$tmp100|$tmp700|$tmpTotal\n"; } print "Done writing counts\n"; # enter URLS into database foreach $key (sort keys %urls) { my ($url,$tstamp) = split (/<::>/, $key); $tmpTime = strftime "%Y-%m-%d %H:%M:%S", localtime($tstamp); print $urlfh "$tmpTime|$url|$urls{$key}\n"; } print "Done writing urls\n"; `$cnt_str`; print "Done writing count DB\n"; `$url_str`; print "Done writing url DB\n";

Code with DBI:

use Logs; use Net::FTP; use Time::Local; use Time::Timezone; use POSIX; use File::Temp qw/ tempfile /; open (IN, "/home/reports/ftp/WSB/test.txt"); #open (IN, "gzip -dc /tmp/arc/0620.gz |"); ($cntfh, $cntfn) = tempfile('WSB_count_XXXX', DIR => '/tmp'); ($urlfh, $urlfn) = tempfile('WSB_urls_XXXX', DIR => '/tmp'); chmod 0666, $urlfn; chmod 0666, $cntfn; my $dbh = DBI->connect('dbi:mysql:reports', 'root','rep0rts', {RaiseEr +ror => 1,PrintError => 0}) or Die ("load data","WSB","Could n t connect to database" . $dbh->errstr); my $url_str = "load data infile '" . $urlfn . "' into table tmpurl fie +lds terminated by '|' (tstamp,url,count)"; my $cnt_str = "load data infile '" . $cntfn . "' into table tmpwsb fie +lds terminated by '|' (tstamp,op100,op700,total)"; print "$url_str\n"; print "$cnt_str\n"; @lines = <IN>; $lines = @lines; print "lines: $lines\n"; # parse records and increment counters foreach $line (@lines) { ($time, $opcode, $url) = parse($line); if ($opcode == "100") { $tstamp{$time}++; $op100{$time}++; if (length $url > 0) { my $tmpkey = $url . "<::>" . $time; $urls{$tmpkey}++; } } elsif ($opcode == "700") { $tstamp{$time}++; $op700{$time}++; if (length $url > 0) { my $tmpkey = $url . "::" . $time; $urls{$tmpkey}++; } } } @time = sort keys %tstamp; print "Done parsing: @time[0]: @time[$#time]\n"; for ($ii = @time[0]; $ii <= @time[$#time]; $ii++) { $tmpTime = strftime "%Y-%m-%d %H:%M:%S", localtime($ii); $tmp100 = $op100{$ii} || 0; $tmp700 = $op700{$ii} || 0; $tmpTotal = $tstamp{$ii} || 0; print $cntfh "$tmpTime|$tmp100|$tmp700|$tmpTotal\n"; } print "Done writing counts\n"; # enter URLS into database foreach $key (sort keys %urls) { my ($url,$tstamp) = split (/<::>/, $key); $tmpTime = strftime "%Y-%m-%d %H:%M:%S", localtime($tstamp); print $urlfh "$tmpTime|$url|$urls{$key}\n"; } print "Done writing urls\n"; $dbh->do($cnt_str); print "Done writing count DB\n"; $dbh->do($url_str); print "Done writing url DB\n"; $dbh->disconnect(); print "just done\n";
Never moon a werewolf!


Comment on Re^2: DBI/load data infile issue
Select or Download Code
Re^3: DBI/load data infile issue
by raafschild (Novice) on Jul 08, 2005 at 16:46 UTC
    Make sure to close your tmp file before starting the loader. This will flush all data to the file.

    Raafschild

      We have a winner!

      THANK YOU THANK YOU THANK YOU

      Closing the files made it work. Thats one of those detail things that I can be lazy about and it finally bit me in the butt.

      Thank you again,

      Jimbus

      Never moon a werewolf!
        That's what i thought. See my previous post below (very end). I did not do a re:re:re

        :) -SK

Re^3: DBI/load data infile issue
by bofh_of_oz (Hermit) on Jul 08, 2005 at 17:47 UTC
    Disclaimer: Whatever errors I find may be unrelated to the problem itself as I repeatedly got distracted while writing this comment...

    - (A trait of senior monks that's rubbed off on me): use strict - it helps...

    - Shouldn't

    for ($ii = @time[0]; $ii <= @time[$#time]; $ii++)
    be using $time[0] and $time[$#time]? There are also other lines using that notation...

    - In your "Code with DBI" make PrintError=>1 and redirect the output for late night reading ;)

    - Check those output files to make sure that there are no "character combinations" that have a special meaning in SQL... It could be where you're losing the data...

    --------------------------------
    An idea is not responsible for the people who believe in it...

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://473469]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others rifling through the Monastery: (7)
As of 2015-07-03 21:10 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (56 votes), past polls