Beefy Boxes and Bandwidth Generously Provided by pair Networks
Just another Perl shrine
 
PerlMonks  

Re: DBI/load data infile issue

by bofh_of_oz (Hermit)
on Jul 08, 2005 at 16:04 UTC ( #473464=note: print w/ replies, xml ) Need Help??


in reply to DBI/load data infile issue

A code example, anyone?

Seriously, it is nice to discuss ideas, but it helps to see a piece of code that produces error to actually debug that error... Even though the error might not be induced by the code

--------------------------------
An idea is not responsible for the people who believe in it...


Comment on Re: DBI/load data infile issue
Re^2: DBI/load data infile issue
by jimbus (Friar) on Jul 08, 2005 at 16:16 UTC
    Code w/o DBI

    use DBI; use Logs; use Net::FTP; use Time::Local; use Time::Timezone; use POSIX; use File::Temp qw/ tempfile /; open (IN, "/home/reports/ftp/WSB/test.txt"); #open (IN, "gzip -dc /tmp/arc/0620.gz |"); ($cntfh, $cntfn) = tempfile('WSB_count_XXXX', DIR => '/tmp'); ($urlfh, $urlfn) = tempfile('WSB_urls_XXXX', DIR => '/tmp'); chmod 0666, $urlfn; chmod 0666, $cntfn; my $url_str = "mysql -u root -prep0rts reports -e\"load data infile '" + . $urlfn . "' into table tmpurl fields terminated by '|' (tst amp,url,count)\""; my $cnt_str = "mysql -u root -prep0rts reports -e\"load data infile '" + . $cntfn . "' into table tmpwsb fields terminated by '|' (tst amp,op100,op700,total)\""; print "$url_str\n"; print "$cnt_str\n"; @lines = <IN>; $lines = @lines; print "lines: $lines\n"; # parse records and increment counters foreach $line (@lines) { ($time, $opcode, $url) = parse($line); if ($opcode == "100") { $tstamp{$time}++; $op100{$time}++; if (length $url > 0) { my $tmpkey = $url . "<::>" . $time; $urls{$tmpkey}++; } } elsif ($opcode == "700") { $tstamp{$time}++; $op700{$time}++; if (length $url > 0) { my $tmpkey = $url . "::" . $time; $urls{$tmpkey}++; } } } @time = sort keys %tstamp; print "Done parsing: @time[0]: @time[$#time]\n"; for ($ii = @time[0]; $ii <= @time[$#time]; $ii++) { $tmpTime = strftime "%Y-%m-%d %H:%M:%S", localtime($ii); $tmp100 = $op100{$ii} || 0; $tmp700 = $op700{$ii} || 0; $tmpTotal = $tstamp{$ii} || 0; print $cntfh "$tmpTime|$tmp100|$tmp700|$tmpTotal\n"; } print "Done writing counts\n"; # enter URLS into database foreach $key (sort keys %urls) { my ($url,$tstamp) = split (/<::>/, $key); $tmpTime = strftime "%Y-%m-%d %H:%M:%S", localtime($tstamp); print $urlfh "$tmpTime|$url|$urls{$key}\n"; } print "Done writing urls\n"; `$cnt_str`; print "Done writing count DB\n"; `$url_str`; print "Done writing url DB\n";

    Code with DBI:

    use Logs; use Net::FTP; use Time::Local; use Time::Timezone; use POSIX; use File::Temp qw/ tempfile /; open (IN, "/home/reports/ftp/WSB/test.txt"); #open (IN, "gzip -dc /tmp/arc/0620.gz |"); ($cntfh, $cntfn) = tempfile('WSB_count_XXXX', DIR => '/tmp'); ($urlfh, $urlfn) = tempfile('WSB_urls_XXXX', DIR => '/tmp'); chmod 0666, $urlfn; chmod 0666, $cntfn; my $dbh = DBI->connect('dbi:mysql:reports', 'root','rep0rts', {RaiseEr +ror => 1,PrintError => 0}) or Die ("load data","WSB","Could n t connect to database" . $dbh->errstr); my $url_str = "load data infile '" . $urlfn . "' into table tmpurl fie +lds terminated by '|' (tstamp,url,count)"; my $cnt_str = "load data infile '" . $cntfn . "' into table tmpwsb fie +lds terminated by '|' (tstamp,op100,op700,total)"; print "$url_str\n"; print "$cnt_str\n"; @lines = <IN>; $lines = @lines; print "lines: $lines\n"; # parse records and increment counters foreach $line (@lines) { ($time, $opcode, $url) = parse($line); if ($opcode == "100") { $tstamp{$time}++; $op100{$time}++; if (length $url > 0) { my $tmpkey = $url . "<::>" . $time; $urls{$tmpkey}++; } } elsif ($opcode == "700") { $tstamp{$time}++; $op700{$time}++; if (length $url > 0) { my $tmpkey = $url . "::" . $time; $urls{$tmpkey}++; } } } @time = sort keys %tstamp; print "Done parsing: @time[0]: @time[$#time]\n"; for ($ii = @time[0]; $ii <= @time[$#time]; $ii++) { $tmpTime = strftime "%Y-%m-%d %H:%M:%S", localtime($ii); $tmp100 = $op100{$ii} || 0; $tmp700 = $op700{$ii} || 0; $tmpTotal = $tstamp{$ii} || 0; print $cntfh "$tmpTime|$tmp100|$tmp700|$tmpTotal\n"; } print "Done writing counts\n"; # enter URLS into database foreach $key (sort keys %urls) { my ($url,$tstamp) = split (/<::>/, $key); $tmpTime = strftime "%Y-%m-%d %H:%M:%S", localtime($tstamp); print $urlfh "$tmpTime|$url|$urls{$key}\n"; } print "Done writing urls\n"; $dbh->do($cnt_str); print "Done writing count DB\n"; $dbh->do($url_str); print "Done writing url DB\n"; $dbh->disconnect(); print "just done\n";
    Never moon a werewolf!
      Make sure to close your tmp file before starting the loader. This will flush all data to the file.

      Raafschild

        We have a winner!

        THANK YOU THANK YOU THANK YOU

        Closing the files made it work. Thats one of those detail things that I can be lazy about and it finally bit me in the butt.

        Thank you again,

        Jimbus

        Never moon a werewolf!
      Disclaimer: Whatever errors I find may be unrelated to the problem itself as I repeatedly got distracted while writing this comment...

      - (A trait of senior monks that's rubbed off on me): use strict - it helps...

      - Shouldn't

      for ($ii = @time[0]; $ii <= @time[$#time]; $ii++)
      be using $time[0] and $time[$#time]? There are also other lines using that notation...

      - In your "Code with DBI" make PrintError=>1 and redirect the output for late night reading ;)

      - Check those output files to make sure that there are no "character combinations" that have a special meaning in SQL... It could be where you're losing the data...

      --------------------------------
      An idea is not responsible for the people who believe in it...

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://473464]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others exploiting the Monastery: (5)
As of 2015-07-04 21:39 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (60 votes), past polls