Anonymous Monk has asked for the wisdom of the Perl Monks concerning the following question:
Hi there,
I am working with some data in which I need to take an average every 12 lines (every 5 seconds) and print that average out along with the time that the data was recorded (so one reading every minute).
I have done this in 2 ways but neither are particularly neat or speedy. I am relatively new to Perl so I apologise to anyone who thinks I have butchered the code.
The data looks as follows:
The following is my first method - which as you can see is very chunky and just a bit of a bodge job.
My second code is a bit more streamline but it takes longer to run.
I know that is probably a lot to read and probably doesn't make sense, and I apologise. The scripts get the job done but I am looking for help in improving my skills and making everything clearer. The 1st code takes 1.8 seconds to run (file has 120960 lines) and second takes 9 seconds to run for the same file. Any help would be greatly appreciated, as this needs to be run for about 100,000 files.
acceleration (mg) - 2013-10-09 10:00:00 - 2013-10-16 09:59:55 - sample +Rate = 5 seconds, imputed 46.3, 0 17.1, 0 30.1, 0 38.4, 0 97.1, 0 87.3, 0 84, 0 78.5, 0 67.9, 0 83.5, 0 155, 0 103.5, 0
#!/usr/bin/perl use strict; use warnings; use Getopt::Long; use Date::Calc qw(:all); my $input_file = undef; GetOptions ( "input=s" => \$input_file, ); open INPUT, $input_file or die "Can't open $input_file for input: $!"; my @files = undef; #list of files to run script on while(my $line = <INPUT>){ chomp($line); push @files, $line; } for(my $i=1; $i < scalar @files; $i++) { if(-e $files[$i]){ open IN, "gunzip -c $files[$i]|" or die "Can't open $files[$i] for input: $!"; open OUT, "> $files[$i]_out.txt" or die "Can't open $files[$i]_out.txt for output: $!"; my $firstline = 1; my @dt = undef; # date and time my @date1 = undef; my @date2 = undef; if($firstline==1){ #extract header line and get start a +nd end date from line chomp(my $line = <IN>); my @header = split(/,/, $line); @dt= $header[0] =~ /(\d+)/g; @date1 = ($dt[0], $dt[1], $dt[2], $dt[3], $dt[4], $dt[5]); @date2 = ($dt[6], $dt[7], $dt[8], $dt[9], $dt[10], $dt[11] +); $firstline = 0; } else { <IN> for 1..1 } print OUT "Date\tTime\tDay\tmg\n"; #print title my $count = 0; while(my $line1 = <IN>){ #whilst reading f +ile get 12 lines and print time and print average of 12 lines chomp($line1); my @mg1 = split(/,/, $line1); my $line2 = <IN>; chomp($line2); my @mg2 = split(/,/, $line2); my $line3 = <IN>; chomp($line3); my @mg3 = split(/,/, $line3); my $line4 = <IN>; chomp($line4); my @mg4 = split(/,/, $line4); my $line5 = <IN>; chomp($line5); my @mg5 = split(/,/, $line5); my $line6 = <IN>; chomp($line6); my @mg6 = split(/,/, $line6); my $line7 = <IN>; chomp($line7); my @mg7 = split(/,/, $line7); my $line8 = <IN>; chomp($line8); my @mg8 = split(/,/, $line8); my $line9 = <IN>; chomp($line9); my @mg9 = split(/,/, $line9); my $line10 = <IN>; chomp($line10); my @mg10 = split(/,/, $line10); my $line11 = <IN>; chomp($line11); my @mg11 = split(/,/, $line11); my $line12 = <IN>; chomp($line12); my @mg12 = split(/,/, $line12); my ($y, $mo, $d, $h, $m, $s) = Add_Delta_DHMS(@date1, 0, 0 +, $count, 0); printf OUT qq(%d-%02d-%02d %02d:%02d:%02d), $y, $mo, $d, $ +h, $m, $s; print OUT "\t" . Day_of_Week($y, $mo, $d); print OUT "\t" . (($mg1[0]+$mg2[0]+$mg3[0]+$mg4[0]+$mg5[0] ++$mg6[0]+$mg7[0]+$mg8[0]+$mg9[0]+$mg10[0]+$mg11[0]+$mg12[0])/12) . "\ +n"; $count +=1 } close IN; close OUT } }
for(my $i=1; $i < scalar @files; $i++) { #everything same as before un +til we get to reading the files if(-e $files[$i]){ open IN, "gunzip -c $files[$i]|" or die "Can't open $files[$i] for input: $!"; open OUT, "> $files[$i]_out.txt" or die "Can't open $files[$i]_out.txt for output: $!"; my $firstline = 1; my @dt = undef; # date and time my @date1 = undef; my @date2 = undef; if($firstline==1){ chomp(my $line = <IN>); my @header = split(/,/, $line); @dt= $header[0] =~ /(\d+)/g; @date1 = ($dt[0], $dt[1], $dt[2], $dt[3], $dt[4], $dt[5]); @date2 = ($dt[6], $dt[7], $dt[8], $dt[9], $dt[10], $dt[11] +); $firstline = 0; } else { <IN> for 1..1 } print OUT "Date\tTime\tDay\tmg\n"; my $count = 0; my $avg_count = 0; while(<IN>){ #this is w +here it changes, instead of doing the same thing 12 times I say if th +e line number (-1 due to header) modulos 12 is 0 then print the avera +ge and set it back to 0 my ($y, $mo, $d, $h, $m, $s) = Add_Delta_DHMS(@date1, 0, 0 +, $count, 0); chomp($_); my @line = split(/,/, $_); $avg_count += $line[0]; if((($.)-1)%12 ==0){ printf OUT qq(%d-%02d-%02d %02d:%02d:%02d), $y, $mo, $ +d, $h, $m, $s; print OUT "\t" . Day_of_Week($y, $mo, $d); print OUT "\t" . ($avg_count/12) . "\n"; $count +=1; $avg_count =0; } } close IN; close OUT } }
|
---|
Back to
Seekers of Perl Wisdom