I created a script to read in generic Spacecraft data files in a CSV format. I wanted to be able to validate some of our other software's calculation of Min/Mean/Max/StdDev/NumOfPoints. I wanted the script to be able to work generically on any of our data files without knowing ahead of time how many columns etc there are going to be.
The below script does exactly what I want it to, all of the data validates correctly. On most of my Computers (linux and WinXP) The script processes a 30 Megabyte version of the sample data (included below) in under a minute. I have one machine that only has 128 MB of ram and the script doesn't complete after 30 minutes.
Any suggestions on how to make it more efficient in it's use of memory? I know my method of populating my Hashes of Arrays seems long-winded but I couldn't think of another way to do it....
Script
#!/usr/bin/perl -w
use strict;
use Statistics::Basic::Mean;
use Statistics::Basic::StdDev;
my $filename='224_APID003_report.csv';
my (%HoA,%hash_keys);
open (READ_IN,"<$filename") or die "I can't open $filename to read.\n"
+;
while (<READ_IN>)
{
chomp;
create_hash($_) if /^Year/;
pop_hash($_) if /^\d{4},/;
}
close(READ_IN);
foreach (sort{$a<=>$b}keys(%hash_keys))
{
next if $hash_keys{$_}=~m/(?:TIME|YEAR)/i;
my $count=@{$HoA{$hash_keys{$_}}};
my $pointer=\@{$HoA{$hash_keys{$_}}};
my @hi_low=sort{$a<=>$b}@{$HoA{$hash_keys{$_}}};
my $low=shift(@hi_low);
my $hi=pop(@hi_low);
my $mean = Statistics::Basic::Mean->new($pointer)->query;
my $stddev = Statistics::Basic::StdDev->new($pointer)->query;
print "$hash_keys{$_}: MIN:($low) MEAN:($mean) MAX:($hi) STDEV:($s
+tddev) POINTS:($count)\n";
}
################################
#Subroutines
################################
sub create_hash
{
my @columns=split(/,/,shift);
my $i=0;
foreach (@columns)
{
$i++;
$hash_keys{$i}=$_;
$HoA{$_}=();
}
}
sub pop_hash
{
my @values=split(/,/,shift);
foreach (sort{$a<=>$b}keys(%hash_keys))
{
push(@{$HoA{$hash_keys{$_}}},shift(@values));
}
}
Sample Data
Year,S/C Time,224_P003STIME,224_P003PVNO,224_P003PCKT,224_P003SHDF,224
+_P003ID,224_P003SEGF,224_P003SCNT,224_P003PLEN,224_P003STIME,224_P003
+MTIME,224_MCDHANSGND,224_MCDH5VSVOLT,224_MCDH5VSCUR,224_MCDH33VSVOLT,
+224_MCDH33VSCUR,224_MCDH25VSVOLT,224_MCDH25VSCUR,224_PBUSURBVOLT,224_
+PEPURLCUR,224_PBATV1,224_PBATFIVOLT,224_PBATCUR,224_PBATCPOL,224_PBAT
+CPOLV,224_PEPSAVOLT,224_PEPSACUR,224_PBATHV1,224_PBATHV2,224_PBATHV3,
+224_PBATHV4,224_PBATHV5,224_PBATHV6,224_PEP5VBM,224_PBATV2,224_AMAGCU
+R,224_MCDH21VRVOLT,224_XRCVAGCGS,224_XRCVCLSTR,224_XRCVRFPS
2005,115-00:00:00.095,05-115-00:00:00.095,0,0,1,3,3,11466,35,05-115-00
+:00:00.095,04-115-23:59:50.095,0.000000,4.961763,0.496248,3.320780,0.
+094080,2.519886,0.037647,6.983401,1.290000,0.247917,5.830000,0.045000
+,49,0.735000,7.443989,1.575000,0.125373,0.125373,0.125373,0.125373,0.
+125373,0.125373,5.273542,0.000000,0.138108,2.090138,0.019593,0.019593
+,20.753000
2005,115-00:00:01.028,05-115-00:00:01.028,0,0,1,3,3,11467,35,05-115-00
+:00:01.028,04-115-23:59:51.028,0.000000,4.961763,0.496248,3.320780,0.
+094080,2.519886,0.037647,7.018670,1.320000,0.247917,5.830000,0.045000
+,49,0.735000,7.443989,1.575000,0.125373,0.125373,0.125373,0.125373,0.
+125373,0.125373,5.273542,0.000000,0.137260,2.090138,0.019593,0.019593
+,20.753000
2005,115-00:00:02.028,05-115-00:00:02.028,0,0,1,3,3,11468,35,05-115-00
+:00:02.028,04-115-23:59:52.028,0.000000,4.961763,0.496248,3.320780,0.
+094080,2.519886,0.037647,7.018670,1.290000,0.247917,5.830000,0.045000
+,49,0.735000,7.443989,1.575000,0.125373,0.125373,0.125373,0.125373,0.
+125373,0.125373,5.273542,0.000000,0.137260,2.090138,0.019593,0.019593
+,20.753000
2005,115-00:00:03.036,05-115-00:00:03.036,0,0,1,3,3,11469,35,05-115-00
+:00:03.036,04-115-23:59:53.036,0.000000,4.961763,0.496248,3.320780,0.
+094080,2.519886,0.037647,7.018670,1.275000,0.247917,5.830000,0.045000
+,49,0.735000,7.443989,1.575000,0.125373,0.125373,0.125373,0.125373,0.
+125373,0.125373,5.273542,0.000000,0.138108,2.090138,0.019593,0.019593
+,20.753000
2005,115-00:00:04.094,05-115-00:00:04.094,0,0,1,3,3,11470,35,05-115-00
+:00:04.094,04-115-23:59:54.094,0.000000,4.961763,0.496248,3.320780,0.
+094080,2.519886,0.037647,7.018670,1.290000,0.247917,5.830000,0.045000
+,49,0.735000,7.443989,1.575000,0.125373,0.125373,0.125373,0.125373,0.
+125373,0.125373,5.273542,0.000000,0.137260,2.090138,0.019593,0.019593
+,20.753000