sub correlate { my (\$p1,\$p2,\$v) = @_; return if \$p1 eq \$p2; my (\$xy,\$x2,\$y2); for my \$date (sort keys %data){ my \$x = \$data{\$date}{\$p1}{\$v} - \$total{\$p1}{\$v}{'mean'} ; my \$y = \$data{\$date}{\$p2}{\$v} - \$total{\$p2}{\$v}{'mean'} ; \$xy += \$x * \$y; \$x2 += \$x * \$x; \$y2 += \$y * \$y; } my \$cor = \$xy / sqrt(\$x2 * \$y2); print "\$p1 \$p2 \$v \$cor\n"; } ##```## #!/usr/bin/perl use strict; use warnings; my \$infile = 'products.txt'; my %data = (); my %total = (); my \$records = 0; my \$t0 = time(); open IN,'<',\$infile or die "Could not open \$infile \$!"; while (){ my (\$date, \$product, \$price, \$qu) = split /\s+/,\$_; \$data{\$date}{\$product}{'price'} = \$price; \$data{\$date}{\$product}{'qu'} = \$qu; \$total{\$product}{'count'} += 1; \$total{\$product}{'price'}{'sum'} += \$price; \$total{\$product}{'qu'}{'sum'} += \$qu; ++\$records; } close IN; # calculate stats my \$outfile = 'report.txt'; open OUT,'>',\$outfile or die "Could not open \$outfile"; for my \$prod (keys %total){ my \$count = \$total{\$prod}{'count'}; # mean \$total{\$prod}{'price'}{'mean'} = \$total{\$prod}{'price'}{'sum'}/\$count; \$total{\$prod}{'qu'}{'mean'} = \$total{\$prod}{'qu'}{'sum'}/\$count; # std dev squared my (\$sum_x2,\$sum_y2); for my \$date (keys %data){ my \$x = \$data{\$date}{\$prod}{'price'} - \$total{\$prod}{'price'}{'mean'}; \$sum_x2 += (\$x*\$x); my \$y = \$data{\$date}{\$prod}{'qu'} - \$total{\$prod}{'qu'}{'mean'}; \$sum_y2 += (\$y*\$y); } \$total{\$prod}{'price'}{'stddev'} = sqrt(\$sum_x2/\$count); \$total{\$prod}{'qu'}{'stddev'} = sqrt(\$sum_y2/\$count); my \$line = join "\t",\$prod, \$total{\$prod}{'price'}{'mean'}, \$total{\$prod}{'price'}{'stddev'}, \$total{\$prod}{'qu'}{'mean'}, \$total{\$prod}{'qu'}{'stddev'}; print OUT \$line."\n"; } close OUT; # correlate first 5 products against the rest \$t0 = time; for (1..5){ my \$p1 = sprintf 'product_%04d',\$_; print "Correlating \$p1\n"; open OUT,'>',"correlate_\${p1}.txt" or die "\$!"; for my \$p2 (sort keys %total){ correlate(\$p1,\$p2,'price'); } close OUT; } # summary my \$dur = time - \$t0; printf " Products : %d Dates : %d Records : %d Run Time : %d s",0+keys %total, 0+keys %data, \$records, \$dur; sub correlate { my (\$p1,\$p2,\$v) = @_; return if \$p1 eq \$p2; my \$sigma; my \$count = 0; for my \$date (sort keys %data){ # skip missing dataset next unless (defined \$data{\$date}{\$p1} && defined \$data{\$date}{\$p2}); my \$zee1 = ( \$data{\$date}{\$p1}{\$v} - \$total{\$p1}{\$v}{'mean'} ) / \$total{\$p1}{\$v}{'stddev'} ; my \$zee2 = ( \$data{\$date}{\$p2}{\$v} - \$total{\$p2}{\$v}{'mean'} ) / \$total{\$p2}{\$v}{'stddev'} ; \$sigma += ( \$zee1 * \$zee2 ); ++\$count; } my \$cor = \$sigma / ( \$count - 1 ) ; print OUT "\$p1 \$p2 \$v \$cor\n"; } ```