Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl Monk, Perl Meditation
 
PerlMonks  

Re^2: Parsing cli output into .csv

by erix (Vicar)
on Apr 26, 2014 at 06:09 UTC ( #1083899=note: print w/ replies, xml ) Need Help??


in reply to Re: Parsing cli output into .csv
in thread Parsing cli output into .csv

Nobody will read your post.

That turned out not to be the case :)

I was curious how much time it would take me to move such data in a postgres table (with apologies to the OP but I just can't be bothered with the minor oracle stuff).

It took me 13 minutes.

Obviously the parsing is guesswork on my part.

use strict; use warnings; use DBI; my $t = "pm1083865_ivo"; # table name my $dbh = DBI->connect or die "no database connection - $!\n "; $dbh->{AutoCommit} = 0; $dbh->{RaiseError} = 1; $dbh->do("drop table if exists $t"); $dbh->do("create table $t(col0 timestamptz, col1 text, col2 int, col3 +text, col4 int)"); $dbh->commit; my $sth1 = $dbh->prepare("insert into $t values(cast(? as timestamptz) +,?,?)" ); my $sth2 = $dbh->prepare("insert into $t values(cast(? as timestamptz) +,?,?,?,?)"); my $date_string ; while (<>) { chomp; $date_string = $_; last; } while (<>) { no warnings 'uninitialized'; next if (/^\s*$/); my ($col1, $col2, $col3, $col4); s{\r}{}; chomp; if ( /^([^:]+):\s*(\d+)\s*$/ ) { $col1 = $1; $col2 = $2; $col3 = ''; $col4 = ''; $sth1->execute($date_string, $col1,$col2); } elsif ( /^([^:]+):\s*(\d+)\s*([^:]+):\s*(\d+)\s*$/ ) { $col1 = $1; $col2 = $2; $col3 = $3; $col4 = $4; $sth2->execute($date_string,$col1,$col2,$col3,$col4); } else { print ">>>", $_ ,"<<<\n"; die "oops - unprocessed line!\n"; } # print join("\t", $col1, $col2, $col3, $col4), "\n"; } $dbh->commit; $dbh->disconnect; $ wc -l pm1083865_ivo.txt 72 pm1083865_ivo.txt $ perl ivo.pl pm1083865_ivo.txt && echo "table pm1083865_ivo limit 5; +select count(*) from pm1083865_ivo" | psql Timing is on. col0 | col1 | col2 | col3 + | col4 ------------------------+-------------------+--------+---------------- +--+-------- 2014-04-25 23:06:25+02 | Total Subscribers | 970260 | + | 2014-04-25 23:06:25+02 | Active | 86806 | Dormant + | 883454 2014-04-25 23:06:25+02 | pdsn-simple-ipv4 | 212 | pdsn-simple-ipv +6 | 0 2014-04-25 23:06:25+02 | pdsn-mobile-ip | 539792 | ha-mobile-ipv6 + | 0 2014-04-25 23:06:25+02 | hsgw-ipv6 | 234943 | hsgw-ipv4 + | 1977 (5 rows) Time: 1.845 ms count ------- 70 (1 row) Time: 0.820 ms

UPDATE: change col0 to timestamptz; added a commit;


Comment on Re^2: Parsing cli output into .csv
Download Code
Re^3: Parsing cli output into .csv
by Ivo (Initiate) on Apr 26, 2014 at 10:42 UTC

    Thanks Eric!

    apology for the format, just first time user

      That small typo 'fileds' (fields) obscured the meaning of your question for me, and rereading it I think that what you wanted is really more like this:

      (again, I've implemented against postgres, but if you take out the DB stuff you can use CSV output (the outcommented lines))

      use strict; use warnings; use DBI; my $t = "pm1083865_ivo2"; # table name my $date_string ; my @hdr = (); my @val = (); while (<>) { # reading STDIN chomp; $date_string = $_; last; } while (<>) { # reading STDIN next if (/^\s*$/); chomp; if ( /^([^:]+):\s*(\d+)\s*$/ ) { push(@hdr, $1); push(@val, $2); } elsif ( /^([^:]+):\s*(\d+)\s*([^:]+):\s*(\d+)\s*$/ ) { push(@hdr, $1); push(@val, $2); push(@hdr, $3); push(@val, $4); } else { print ">>>", $_ ,"<<<\n"; die "oops - unprocessed line!\n"; } } # clean up header names for database use: for (my$i=0;$i<scalar(@hdr);$i++) { $hdr[$i] = lc($hdr[$i]); # lower case $hdr[$i] =~ s{[-]}{_}g; # change dash to underscore $hdr[$i] =~ s{[ ]}{_}g; # change space to underscore $hdr[$i] =~ s{_[_]+}{_}g; # } my @all_hdrs = (); push(@all_hdrs, "date_line" ); push(@all_hdrs, @hdr +); my @all_vals = (); push(@all_vals, $date_string); push(@all_vals, @val +); # use only this line to get a output csv # print join("\t", @all_hdrs), "\n", join("\t", @all_vals), "\n"; # exit; my $dbh = DBI->connect or die "no database connection - $!\n "; $dbh->{AutoCommit} = 0; $dbh->{RaiseError} = 1; my $arf = $dbh->selectrow_arrayref(" select count(*) from information_schema.tables where table_schema = 'public' and table_name = '$t'"); if ($arf->[0] == 0) { my $sql_create = join("\n" , "create table $t (" , " date_line timestamptz\n, " , join ("\n, ", map { $_ .= ' integer' } @hdr ) , ");" ); $dbh->do( $sql_create ); } my $sql_insert = join("\n" , "insert into $t values (" , join( ", ", map {'?'} @all_hdrs) , ");" ); my $sth = $dbh->prepare($sql_insert); my $rc = $sth->execute(@all_vals); $dbh->commit; $dbh->disconnect;

      After running that (showing record vertically):

      $ echo "table pm1083865_ivo2" | psql -x | head -n 20 Timing is on. -[ RECORD 1 ]-----------------+----------------------- date_line | 2014-04-25 23:06:25+02 total_subscribers | 970260 active | 86806 dormant | 883454 pdsn_simple_ipv4 | 212 pdsn_simple_ipv6 | 0 pdsn_mobile_ip | 539792 ha_mobile_ipv6 | 0 hsgw_ipv6 | 234943 hsgw_ipv4 | 1977 hsgw_ipv4_ipv6 | 192328 pgw_pmip_ipv6 | 0 pgw_pmip_ipv4 | 0 pgw_pmip_ipv4_ipv6 | 0 pgw_gtp_ipv6 | 0 pgw_gtp_ipv4 | 0 pgw_gtp_ipv4_ipv6 | 0 sgw_gtp_ipv6 | 0 [...] etc., there are 126 columns

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://1083899]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others examining the Monastery: (6)
As of 2015-07-06 03:39 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (70 votes), past polls