http://www.perlmonks.org?node_id=995286


in reply to Re^4: Loading 283600 records (Updated)
in thread Loading 283600 records (WordNet)

I tried the substr alias trick from Re^5: Working with fixed length files, and it revealed a problem with your benchmark, the first subs to run are always slower because the disk cache hasn't been filled, and perl.exe hasn't requested all the memory it needs

My messy junk :)

#!/usr/bin/perl -- use strict; use warnings; use Time::HiRes; use Benchmark qw/cmpthese/; my $href; use Data::Dump; sub substraliasSa { $href={}; open(my $fh, "<", "04.txt") or die $!; $_ = " " x 25; my $left = \substr ( $_, 0,10 ); my @right = ( \substr ( $_, 10,10 ), \substr ( $_, 20,5 ) ); push @{ $href->{ $$left } }, [ map $$_, @right ] while <$fh> ; close $fh; } sub substraliasSu { $href={}; open(my $fh, "<", "04.txt") or die $!; local $_ = " " x 25; my $left = \substr ( $_, 0,10 ); my @right = ( \substr ( $_, 10,10 ), \substr ( $_, 20,5 ) ); push @{ $href->{ $$left } }, [ map $$_, @right ] while <$fh> ; close $fh; } sub substraliasR4 { $href={}; open(my $fh, "<", "04.txt") or die $!; $_ = " " x 25; my $left = \substr ( $_, 0,10 ); my $right1 = \substr ( $_, 10,10 ); my $right2 = \substr ( $_, 20,5 ); push @{ $href->{ $$left } }, [ $$right1, $$right2, ] while <$fh> ; close $fh; } sub substraliasR3 { $href={}; open(my $fh, "<", "04.txt") or die $!; local $_ = " " x 25; my $left = \substr ( $_, 0,10 ); my $right1 = \substr ( $_, 10,10 ); my $right2 = \substr ( $_, 20,5 ); push @{ $href->{ $$left } }, [ $$right1, $$right2, ] while <$fh> ; close $fh; } sub substraliasR1 { $href={}; open(my $fh, "<", "04.txt") or die $!; my $record = " " x 25; my $left = \substr ( $record, 0,10 ); my $right1 = \substr ( $record, 10,10 ); my $right2 = \substr ( $record, 20,5 ); push @{ $href->{ $$left } }, [ $$right1, $$right2, ] while( $record = <$fh> ); close $fh; } sub substraliasR2 { $href={}; open(my $fh, "<", "04.txt") or die $!; my $record = " " x 25; my @rec = ( \substr ( $record, 0,10 ), \substr ( $record, 10,10 ), \substr ( $record, 20,5 ), ); push @{ $href->{ ${$rec[0]} } }, [ ${$rec[1]} , ${$rec[2]} ] while( $record = <$fh> ); close $fh; } sub substraliasCo { $href={}; open(my $fh, "<", "04.txt") or die $!; my $record = " " x 25; my @rec = ( \substr ( $record, 0,10 ), \substr ( $record, 10,10 ), \substr ( $record, 20,5 ), ); while( $record = <$fh> ){ push @{ $href->{ ${$rec[0]} } }, [ ${$rec[1]} , ${$rec[2]} ]; } close $fh; } sub substraliasYo { $href={}; open(my $fh, "<", "04.txt") or die $!; my $record = " " x 25; my @rec = ( \substr ( $record, 0,10 ), \substr ( $record, 10,10 ), \substr ( $record, 20,5 ), ); while( $record = <$fh> ){ push @{ $href->{ ${$rec[0]} } }, [ ${$rec[1]} , ${$rec[2]} ]; } close $fh; } sub substraliasBa { $href={}; open(my $fh, "<", "04.txt") or die $!; my $record = " " x 25; my @rec = ( \substr ( $record, 0,10 ), \substr ( $record, 10,10 ), \substr ( $record, 20,5 ), ); while( <$fh> ){ substr ($record, 0) = $_; push @{ $href->{ ${$rec[0]} } }, [ ${$rec[1]} , ${$rec[2]} ]; } close $fh; } sub substraliasFo { $href={}; open(my $fh, "<", "04.txt") or die $!; my $record = " " x 25; local $/ = \(25 + 2); my @rec = ( \substr ( $record, 0,10 ), \substr ( $record, 10,10 ), \substr ( $record, 20,5 ), ); ( substr ($record, 0) = $_ ), push @{ $href->{ ${$rec[0]} } }, [ ${$rec[1]} , ${$rec[2]} ] while <$fh>; close $fh; } sub test1{ $href={}; open(my $fh, "<", "04.txt") or die $!; while(<$fh>){ chomp; push @{ $href->{ substr($_,0,10)} }, [ substr($_,10,10), subst +r($_,20)]; } close $fh; } my %tests = ( T1 => \&test1, R1 => \&substraliasR1, R2 => \&substraliasR2, R3 => \&substraliasR3, R4 => \&substraliasR4, Co => \&substraliasCo, Yo => \&substraliasYo, Ba => \&substraliasBa, Fo => \&substraliasFo, Su => \&substraliasSu, Sa => \&substraliasSa, ); ## spin-up, ready the disk cache, cause sYo and sCo are identical but +the bench results aren't test1(); substraliasR1(); substraliasR2(); substraliasR3(); substralia +sR4(); substraliasCo(); substraliasYo(); substraliasBa(); substralias +Fo(); substraliasSu(); substraliasSa(); sleep 10; print scalar gmtime, "\n"; cmpthese( 10, \%tests ); print scalar gmtime, "\n"; __END__ before spin-up #~ #~ Mon Sep 24 01:40:12 2012 #~ s/iter T1 Yo Sa Su Ba Fo R2 R4 R1 R3 Co #~ T1 1.58 -- -1% -2% -3% -12% -15% -27% -28% -28% -28% -40% #~ Yo 1.57 1% -- -2% -2% -11% -14% -27% -28% -28% -28% -39% #~ Sa 1.55 2% 2% -- -0% -10% -13% -26% -26% -27% -27% -38% #~ Su 1.54 3% 2% 0% -- -9% -12% -25% -26% -26% -27% -38% #~ Ba 1.40 13% 13% 11% 10% -- -4% -18% -19% -19% -19% -32% #~ Fo 1.35 17% 17% 15% 14% 4% -- -15% -16% -16% -16% -29% #~ R2 1.15 37% 37% 34% 34% 21% 17% -- -1% -1% -2% -17% #~ R4 1.14 39% 38% 36% 35% 23% 18% 1% -- -0% -1% -16% #~ R1 1.13 39% 39% 36% 36% 23% 19% 2% 0% -- -0% -16% #~ R3 1.13 40% 39% 37% 36% 23% 19% 2% 1% 0% -- -16% #~ Co 0.952 66% 65% 62% 62% 47% 42% 21% 20% 19% 19% -- #~ Mon Sep 24 01:42:37 2012 #~ #~ #~ Mon Sep 24 01:54:29 2012 #~ s/iter Yo T1 Su Sa Ba Fo R2 R3 R1 R4 Co #~ Yo 1.61 -- -1% -4% -4% -16% -16% -28% -29% -29% -29% -41% #~ T1 1.59 1% -- -2% -3% -15% -15% -27% -28% -28% -28% -40% #~ Su 1.55 4% 3% -- -1% -13% -13% -26% -26% -27% -27% -38% #~ Sa 1.54 4% 3% 1% -- -12% -12% -25% -26% -26% -26% -38% #~ Ba 1.35 19% 17% 15% 14% -- -0% -15% -16% -16% -16% -29% #~ Fo 1.35 19% 17% 15% 14% 0% -- -15% -16% -16% -16% -29% #~ R2 1.15 39% 38% 34% 34% 17% 17% -- -1% -1% -1% -17% #~ R3 1.14 41% 39% 36% 35% 19% 19% 1% -- -0% -0% -16% #~ R1 1.14 41% 40% 36% 35% 19% 19% 1% 0% -- -0% -16% #~ R4 1.14 41% 40% 36% 35% 19% 19% 1% 0% 0% -- -16% #~ Co 0.953 69% 66% 62% 61% 42% 42% 21% 20% 19% 19% -- #~ Mon Sep 24 01:56:54 2012 after spin-up Mon Sep 24 03:43:30 2012 s/iter Sa Su Ba Fo R2 Yo R3 R4 R1 Co T1 Sa 1.56 -- -0% -1% -9% -25% -26% -26% -26% -27% -28% -31% Su 1.55 0% -- -1% -9% -25% -25% -26% -26% -26% -28% -31% Ba 1.54 1% 1% -- -8% -24% -25% -25% -25% -26% -27% -30% Fo 1.42 10% 9% 8% -- -18% -18% -19% -19% -20% -21% -25% R2 1.16 34% 34% 32% 22% -- -0% -1% -1% -2% -4% -8% Yo 1.16 35% 34% 33% 23% 0% -- -0% -1% -1% -4% -8% R3 1.15 35% 35% 33% 23% 1% 0% -- -0% -1% -3% -7% R4 1.15 35% 35% 34% 24% 1% 1% 0% -- -1% -3% -7% R1 1.14 36% 36% 35% 24% 2% 1% 1% 1% -- -2% -6% Co 1.11 40% 39% 38% 27% 4% 4% 4% 3% 2% -- -4% T1 1.07 46% 45% 44% 33% 9% 8% 8% 7% 7% 4% -- Mon Sep 24 03:45:52 2012 Mon Sep 24 03:46:20 2012 s/iter Su Sa Ba Fo R2 Yo R4 R3 R1 Co T1 Su 1.55 -- -0% -5% -11% -25% -26% -26% -27% -27% -30% -31% Sa 1.54 0% -- -4% -11% -25% -25% -26% -26% -27% -30% -31% Ba 1.48 5% 4% -- -7% -22% -22% -23% -23% -23% -27% -27% Fo 1.38 12% 12% 7% -- -16% -16% -17% -17% -18% -22% -22% R2 1.16 34% 34% 28% 19% -- -0% -1% -1% -2% -7% -7% Yo 1.15 34% 34% 28% 19% 0% -- -1% -1% -2% -6% -7% R4 1.14 36% 35% 30% 21% 1% 1% -- -0% -1% -5% -6% R3 1.14 36% 36% 30% 21% 2% 1% 0% -- -0% -5% -6% R1 1.13 37% 36% 30% 21% 2% 2% 1% 0% -- -5% -6% Co 1.08 44% 43% 37% 28% 7% 7% 6% 6% 5% -- -1% T1 1.07 45% 44% 38% 29% 8% 8% 6% 6% 6% 1% -- Mon Sep 24 03:48:39 2012 with sleep Mon Sep 24 03:54:28 2012 s/iter Su Sa Ba Fo Yo R2 R3 R4 R1 Co T1 Su 1.56 -- -0% -4% -11% -25% -25% -26% -26% -26% -30% -31% Sa 1.56 0% -- -4% -11% -25% -25% -26% -26% -26% -30% -31% Ba 1.50 4% 4% -- -8% -22% -22% -23% -23% -23% -27% -28% Fo 1.39 13% 13% 8% -- -16% -16% -17% -17% -17% -21% -22% Yo 1.17 33% 33% 28% 18% -- -0% -1% -2% -2% -6% -7% R2 1.17 34% 34% 29% 19% 0% -- -1% -1% -1% -6% -7% R3 1.15 35% 35% 30% 20% 1% 1% -- -0% -0% -5% -6% R4 1.15 36% 36% 30% 20% 2% 1% 0% -- -0% -5% -6% R1 1.15 36% 36% 30% 20% 2% 1% 0% 0% -- -5% -6% Co 1.10 42% 42% 37% 26% 7% 6% 5% 5% 5% -- -1% T1 1.08 44% 44% 39% 28% 8% 8% 7% 6% 6% 1% -- Mon Sep 24 03:56:48 2012