Beefy Boxes and Bandwidth Generously Provided by pair Networks
"be consistent"
 
PerlMonks  

Re: finding number of contiguous letters

by PipTigger (Friar)
on May 24, 2007 at 10:20 UTC ( #617200=note: print w/ replies, xml ) Need Help??


in reply to finding number of contiguous letters

So I've gotten silly drawing all of the above together. =)

#!/usr/bin/perl use strict; use warnings; # 75O0K8S - Benchmark all the Trigram-finder +s! use Benchmark qw(cmpthese ); # with liberty taken t +o use Text::Ngram qw(ngram_counts add_to_counts); # reformat aggressivel +y my @strs = qw(computer CheesyNachoz JustAnotherPerlHacker);my $st +r; push(@strs, 'Just Another Perl Hacker'); # add a test string w/ spaces + too my %cnms = ( # CodeRefName => PerlMonkAuthor (in post order) 'whileZ' => 'Zaxo' , 'recurseO' => 'otto' , # not sure how to do magic blazar goto Tail-Recursion 'whileM' => 'Marknel' , 'mapB' => 'blazar' , 'hngramG' => 'graff' , 'substrGF' => 'GrandFather' , 'unpackLG' => 'Limbic~Region && GrandFather' , 'regexGF' => 'GrandFather' , 'mapstrS' => 'Skeeve' , 'substrS' => 'Skeeve' , 'regexS' => 'Skeeve' , 'counmapA' => 'andreas1234567', 'mapstrS2' => 'Skeeve' , 'whileAB' => 'Anonymous Monk && blazar' , 'hmapB' => 'blazar' , 'regexS2' => 'Skeeve' , 'whileP' => 'Pip' , 'regexP' => 'Pip' , ); my $crfs; $crfs->{$_} = \&{"do_$_"} for(keys(%cnms)); for my $strn (@strs) { $str = $strn; # assign global $str for all subs for my $cnam (sort { $a cmp $b } keys(%{$crfs})) { printf("%-8s:%s\n", $cnam, join ' ', $crfs->{$cnam}->()); } cmpthese(-1, $crfs); } sub do_whileZ { local $_ = $str; my @tris; pos() -= 2, push @tris, $1 while /(...)/g; return @tri +s; } sub recurse { my($cnt, $ra_out, $ra_val, $ra_in)= @_; if( $#$ra_val != $cnt) { # working array not full if($#$ra_in > -1) { # fill it up push(@$ra_val, shift(@$ra_in)); recurse($cnt, $ra_out, $ra_val, $ra_in); } else { return; } # done with list } else { # add to output array, joined ltr string, rip off first ltr push(@$ra_out, join('',@$ra_val)); shift(@$ra_val); # add new ltr, if any remaining push(@$ra_val, shift(@$ra_in)) if($#$ra_in > -1); recurse($cnt, $ra_out, $ra_val, $ra_in); } } # input letter array, output trigram string array, && values workspa +ce sub do_recurseO { my @in = split('', $str); my @tris; my @val; recurse( 2, \@tris, \@val, \@in ); return @tri +s; } sub do_whileM { my $strlen = length($str); my $loop_num = $strlen-2; my @tris; my $ndx = 0; while($ndx < $loop_num){ my $tri = substr $str,$ndx,3; push @tris,$tri;$ndx++; }return @tri +s; } sub do_mapB { my @tris = map { substr $str, $_, 3 } 0..length($str +)-3; return @tri +s; } sub do_hngramG { my $href = ngram_counts($str, 3); return sort { $a cmp $b } keys %{$hre +f};} sub do_substrGF { my @tris; push @tris, substr $str, $_, 3 for 0..length($str)-3; return @tri +s; } # HTTP://Perl.Com/doc/manual/html/pod/perlfunc/unpack.html sub do_unpackLG { my $mats = length($str)-2; my $tmpl = 'a3XX' x $mat +s; my @tris = unpack($tmpl, $str); return @tri +s; } sub do_regexGF { my @tris = $str =~ /(?=(...))/g; return @tri +s; } sub do_mapstrS { return map substr($str,$_-3,3),(3..length $str +); } sub do_substrS { my @tris; push @tris, substr $str, $_, 3 for 0..length($str)-3; return @tri +s; } sub do_regexS { return $str =~ /(?=(...))/ +g; } sub do_counmapA { return map { (length($_)-2) } split('\s+', $str); + } sub do_mapstrS2 { my $len = 3; my @tris = map substr($str,$_-$len,$len +), ($len..length $str); return @tri +s; } sub do_whileAB { my @tris; my $stri = $str; while($stri) { $stri =~ /(...)/ and push @tris, $1; $stri =~ s/^.//; }return @tri +s; } sub do_hmapB { my %saw; my @tris = map { my $s = substr $str, $_, 3 +; $saw{$s}++ ? () : $s } 0..length($str)-3;return sort{$a cmp$b}keys%s +aw;} sub do_regexS2 { $_ = $str; s/(.)(.)(.)(?=(.)(.))/$1$2$3$2$3$4$3$4$5/ +g; return /(...)/ +g; } sub do_whileP { my @tris; $_ = $str; # s/// is slow! push @tris, $1 while(s/^(.(..))/$2/); return @tri +s; } sub do_regexP { $_ = $str; s/((?:.)((?:.)(.)))(?=(.)(.))/$1$2$4$3$4$ +5/g; return /(...)/ +g; }

Results:

counmapA:6 hmapB :com mpu omp put ter ute hngramG :com mpu omp put ter ute mapB :com omp mpu put ute ter mapstrS :com omp mpu put ute ter mapstrS2:com omp mpu put ute ter recurseO:com omp mpu put ute ter regexGF :com omp mpu put ute ter regexP :com omp mpu put ute ter regexS :com omp mpu put ute ter regexS2 :com omp mpu put ute ter substrGF:com omp mpu put ute ter substrS :com omp mpu put ute ter unpackLG:com omp mpu put ute ter whileAB :com omp mpu put ute ter whileM :com omp mpu put ute ter whileP :com omp mpu put ute ter whileZ :com omp mpu put ute ter Rate recurseO hngramG whileP hmapB whileAB whileZ regexS2 + regexP whileM mapstrS2 regexGF mapB substrGF substrS unpackLG mapstr +S counmapA regexS recurseO 14490/s -- -48% -57% -63% -68% -70% -74% + -80% -82% -83% -83% -84% -86% -87% -87% -92 +% -97% -98% hngramG 27837/s 92% -- -18% -29% -39% -43% -51% + -61% -66% -67% -67% -70% -74% -75% -75% -84 +% -95% -97% whileP 33810/s 133% 21% -- -14% -26% -31% -40% + -52% -58% -60% -60% -64% -68% -69% -70% -81 +% -94% -96% hmapB 39456/s 172% 42% 17% -- -14% -19% -31% + -44% -51% -53% -53% -58% -63% -64% -65% -78 +% -93% -95% whileAB 45948/s 217% 65% 36% 16% -- -6% -19% + -35% -43% -45% -45% -51% -57% -58% -59% -74 +% -92% -94% whileZ 48651/s 236% 75% 44% 23% 6% -- -14% + -32% -40% -42% -42% -48% -55% -56% -57% -73 +% -91% -94% regexS2 56775/s 292% 104% 68% 44% 24% 17% -- + -20% -30% -32% -32% -39% -47% -48% -50% -68 +% -90% -93% regexP 71087/s 391% 155% 110% 80% 55% 46% 25% + -- -12% -15% -15% -24% -34% -35% -37% -60 +% -87% -91% whileM 80736/s 457% 190% 139% 105% 76% 66% 42% + 14% -- -3% -4% -13% -25% -26% -28% -55 +% -85% -90% mapstrS2 83510/s 476% 200% 147% 112% 82% 72% 47% + 17% 3% -- -1% -10% -22% -24% -26% -53 +% -85% -90% regexGF 84020/s 480% 202% 149% 113% 83% 73% 48% + 18% 4% 1% -- -10% -22% -23% -25% -53 +% -85% -90% mapB 92980/s 542% 234% 175% 136% 102% 91% 64% + 31% 15% 11% 11% -- -13% -15% -18% -48 +% -83% -89% substrGF 107184/s 640% 285% 217% 172% 133% 120% 89% + 51% 33% 28% 28% 15% -- -2% -5% -40 +% -81% -87% substrS 109713/s 657% 294% 224% 178% 139% 126% 93% + 54% 36% 31% 31% 18% 2% -- -3% -38 +% -80% -86% unpackLG 112733/s 678% 305% 233% 186% 145% 132% 99% + 59% 40% 35% 34% 21% 5% 3% -- -37 +% -80% -86% mapstrS 177535/s 1125% 538% 425% 350% 286% 265% 213% + 150% 120% 113% 111% 91% 66% 62% 57% - +- -68% -78% counmapA 550801/s 3701% 1879% 1529% 1296% 1099% 1032% 870% + 675% 582% 560% 556% 492% 414% 402% 389% 210 +% -- -32% regexS 810891/s 5496% 2813% 2298% 1955% 1665% 1567% 1328% + 1041% 904% 871% 865% 772% 657% 639% 619% 357 +% 47% -- counmapA:10 hmapB :Che Nac ach cho ees esy hee hoz syN yNa hngramG :ach che cho ees esy hee hoz nac syn yna mapB :Che hee ees esy syN yNa Nac ach cho hoz mapstrS :Che hee ees esy syN yNa Nac ach cho hoz mapstrS2:Che hee ees esy syN yNa Nac ach cho hoz recurseO:Che hee ees esy syN yNa Nac ach cho hoz regexGF :Che hee ees esy syN yNa Nac ach cho hoz regexP :Che hee ees esy syN yNa Nac ach cho hoz regexS :Che hee ees esy syN yNa Nac ach cho hoz regexS2 :Che hee ees esy syN yNa Nac ach cho hoz substrGF:Che hee ees esy syN yNa Nac ach cho hoz substrS :Che hee ees esy syN yNa Nac ach cho hoz unpackLG:Che hee ees esy syN yNa Nac ach cho hoz whileAB :Che hee ees esy syN yNa Nac ach cho hoz whileM :Che hee ees esy syN yNa Nac ach cho hoz whileP :Che hee ees esy syN yNa Nac ach cho hoz whileZ :Che hee ees esy syN yNa Nac ach cho hoz Rate recurseO whileP hngramG hmapB whileAB whileZ regexS2 + regexP whileM regexGF mapstrS2 mapB substrS substrGF unpackLG mapst +rS counmapA regexS recurseO 9955/s -- -51% -55% -60% -67% -68% -75% + -81% -81% -82% -82% -83% -86% -87% -87% -9 +2% -98% -99% whileP 20287/s 104% -- -9% -19% -33% -34% -50% + -61% -61% -63% -63% -66% -72% -72% -74% -8 +3% -96% -97% hngramG 22330/s 124% 10% -- -11% -26% -28% -44% + -57% -57% -59% -60% -63% -69% -70% -71% -8 +1% -96% -97% hmapB 25121/s 152% 24% 12% -- -17% -18% -38% + -51% -52% -54% -54% -58% -65% -66% -67% -7 +9% -96% -97% whileAB 30117/s 203% 48% 35% 20% -- -2% -25% + -42% -42% -45% -45% -50% -58% -59% -61% -7 +5% -95% -96% whileZ 30811/s 210% 52% 38% 23% 2% -- -23% + -40% -41% -44% -44% -49% -57% -58% -60% -7 +4% -95% -96% regexS2 40193/s 304% 98% 80% 60% 33% 30% -- + -22% -23% -26% -27% -33% -44% -46% -48% -6 +7% -93% -95% regexP 51569/s 418% 154% 131% 105% 71% 67% 28% + -- -1% -6% -6% -14% -28% -30% -33% -5 +7% -91% -93% whileM 52194/s 424% 157% 134% 108% 73% 69% 30% + 1% -- -4% -5% -13% -27% -29% -32% -5 +7% -91% -93% regexGF 54613/s 449% 169% 145% 117% 81% 77% 36% + 6% 5% -- -1% -9% -24% -26% -29% -5 +5% -90% -93% mapstrS2 55137/s 454% 172% 147% 119% 83% 79% 37% + 7% 6% 1% -- -8% -23% -25% -28% -5 +4% -90% -93% mapB 60124/s 504% 196% 169% 139% 100% 95% 50% + 17% 15% 10% 9% -- -16% -18% -22% -5 +0% -89% -92% substrS 71957/s 623% 255% 222% 186% 139% 134% 79% + 40% 38% 32% 31% 20% -- -2% -6% -4 +0% -87% -91% substrGF 73770/s 641% 264% 230% 194% 145% 139% 84% + 43% 41% 35% 34% 23% 3% -- -4% -3 +9% -87% -91% unpackLG 76663/s 670% 278% 243% 205% 155% 149% 91% + 49% 47% 40% 39% 28% 7% 4% -- -3 +6% -86% -90% mapstrS 120470/s 1110% 494% 439% 380% 300% 291% 200% + 134% 131% 121% 118% 100% 67% 63% 57% +-- -79% -85% counmapA 562196/s 5548% 2671% 2418% 2138% 1767% 1725% 1299% + 990% 977% 929% 920% 835% 681% 662% 633% 36 +7% -- -28% regexS 777546/s 7711% 3733% 3382% 2995% 2482% 2424% 1835% + 1408% 1390% 1324% 1310% 1193% 981% 954% 914% 54 +5% 38% -- counmapA:19 hmapB :Ano Hac Jus Per ack cke erP erl her ker lHa not oth rPe rlH s +tA tAn the ust hngramG :ack ano cke erl erp hac her jus ker lha not oth per rlh rpe s +ta tan the ust mapB :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker mapstrS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker mapstrS2:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker recurseO:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker regexGF :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker regexP :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker regexS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker regexS2 :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker substrGF:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker substrS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker unpackLG:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker whileAB :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker whileM :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker whileP :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker whileZ :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker Rate recurseO whileP hmapB hngramG whileAB whileZ regexS2 + whileM regexP regexGF mapstrS2 mapB substrGF substrS unpackLG mapst +rS counmapA regexS recurseO 5910/s -- -44% -55% -61% -65% -66% -74% + -80% -80% -80% -81% -83% -86% -86% -87% -9 +1% -99% -99% whileP 10645/s 80% -- -20% -29% -36% -38% -53% + -63% -64% -65% -65% -69% -74% -75% -76% -8 +4% -98% -99% hmapB 13274/s 125% 25% -- -12% -21% -23% -41% + -54% -55% -56% -56% -61% -68% -69% -70% -8 +0% -98% -98% hngramG 15058/s 155% 41% 13% -- -10% -13% -33% + -48% -50% -50% -50% -56% -63% -64% -66% -7 +7% -97% -98% whileAB 16748/s 183% 57% 26% 11% -- -3% -26% + -42% -44% -44% -45% -51% -59% -60% -63% -7 +4% -97% -98% whileZ 17230/s 192% 62% 30% 14% 3% -- -24% + -41% -42% -43% -43% -50% -58% -59% -62% -7 +4% -97% -98% regexS2 22541/s 281% 112% 70% 50% 35% 31% -- + -22% -24% -25% -26% -34% -45% -47% -50% -6 +5% -96% -97% whileM 28980/s 390% 172% 118% 92% 73% 68% 29% + -- -3% -4% -5% -15% -29% -31% -35% -5 +6% -95% -96% regexP 29824/s 405% 180% 125% 98% 78% 73% 32% + 3% -- -1% -2% -13% -27% -29% -33% -5 +4% -95% -96% regexGF 30118/s 410% 183% 127% 100% 80% 75% 34% + 4% 1% -- -1% -12% -26% -29% -33% -5 +4% -95% -96% mapstrS2 30416/s 415% 186% 129% 102% 82% 77% 35% + 5% 2% 1% -- -11% -26% -28% -32% -5 +3% -94% -96% mapB 34132/s 478% 221% 157% 127% 104% 98% 51% + 18% 14% 13% 12% -- -17% -19% -24% -4 +8% -94% -96% substrGF 40959/s 593% 285% 209% 172% 145% 138% 82% + 41% 37% 36% 35% 20% -- -3% -9% -3 +7% -93% -95% substrS 42164/s 613% 296% 218% 180% 152% 145% 87% + 45% 41% 40% 39% 24% 3% -- -6% -3 +5% -92% -95% unpackLG 44776/s 658% 321% 237% 197% 167% 160% 99% + 55% 50% 49% 47% 31% 9% 6% -- -3 +1% -92% -94% mapstrS 65163/s 1003% 512% 391% 333% 289% 278% 189% + 125% 118% 116% 114% 91% 59% 55% 46% +-- -88% -92% counmapA 548746/s 9185% 5055% 4034% 3544% 3177% 3085% 2334% + 1794% 1740% 1722% 1704% 1508% 1240% 1201% 1126% 74 +2% -- -32% regexS 803888/s 13502% 7452% 5956% 5239% 4700% 4566% 3466% + 2674% 2595% 2569% 2543% 2255% 1863% 1807% 1695% 113 +4% 46% -- counmapA:2 5 2 4 hmapB : An Ha Pe Ano Hac Jus Per ack cke er erl her ker l H not o +th r P rl st t A the ust hngramG : an ha pe ack ano cke er erl hac her jus ker l h not oth p +er r p rl st t a the ust mapB :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker mapstrS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker mapstrS2:Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker recurseO:Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker regexGF :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker regexP :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker regexS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker regexS2 :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker substrGF:Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker substrS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker unpackLG:Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker whileAB :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker whileM :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker whileP :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker whileZ :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker Rate recurseO whileP hmapB hngramG whileAB whileZ regexS2 + whileM regexP regexGF mapstrS2 mapB substrGF substrS unpackLG mapst +rS counmapA regexS recurseO 5218/s -- -44% -55% -61% -64% -65% -74% + -79% -80% -80% -80% -83% -85% -86% -86% -9 +1% -98% -99% whileP 9309/s 78% -- -21% -31% -36% -38% -54% + -63% -64% -65% -65% -69% -74% -74% -76% -8 +4% -96% -99% hmapB 11712/s 124% 26% -- -13% -20% -21% -42% + -53% -55% -55% -55% -61% -67% -68% -69% -7 +9% -95% -99% hngramG 13524/s 159% 45% 15% -- -8% -9% -33% + -46% -48% -49% -49% -55% -62% -63% -65% -7 +6% -94% -98% whileAB 14629/s 180% 57% 25% 8% -- -2% -27% + -42% -44% -44% -44% -52% -59% -60% -62% -7 +4% -94% -98% whileZ 14913/s 186% 60% 27% 10% 2% -- -26% + -41% -43% -43% -43% -51% -58% -59% -61% -7 +4% -94% -98% regexS2 20096/s 285% 116% 72% 49% 37% 35% -- + -20% -23% -24% -24% -34% -44% -44% -48% -6 +5% -91% -98% whileM 25121/s 381% 170% 114% 86% 72% 68% 25% + -- -4% -4% -4% -17% -30% -31% -34% -5 +6% -89% -97% regexP 26065/s 399% 180% 123% 93% 78% 75% 30% + 4% -- -1% -1% -14% -27% -28% -32% -5 +4% -89% -97% regexGF 26304/s 404% 183% 125% 95% 80% 76% 31% + 5% 1% -- -0% -13% -27% -27% -31% -5 +4% -89% -97% mapstrS2 26305/s 404% 183% 125% 95% 80% 76% 31% + 5% 1% 0% -- -13% -27% -27% -31% -5 +4% -89% -97% mapB 30340/s 481% 226% 159% 124% 107% 103% 51% + 21% 16% 15% 15% -- -15% -16% -21% -4 +7% -87% -96% substrGF 35870/s 587% 285% 206% 165% 145% 141% 78% + 43% 38% 36% 36% 18% -- -1% -6% -3 +7% -85% -96% substrS 36202/s 594% 289% 209% 168% 147% 143% 80% + 44% 39% 38% 38% 19% 1% -- -6% -3 +6% -84% -96% unpackLG 38331/s 635% 312% 227% 183% 162% 157% 91% + 53% 47% 46% 46% 26% 7% 6% -- -3 +2% -83% -95% mapstrS 56776/s 988% 510% 385% 320% 288% 281% 183% + 126% 118% 116% 116% 87% 58% 57% 48% +-- -76% -93% counmapA 231848/s 4343% 2391% 1880% 1614% 1485% 1455% 1054% + 823% 790% 781% 781% 664% 546% 540% 505% 30 +8% -- -73% regexS 847418/s 16139% 9004% 7136% 6166% 5693% 5583% 4117% + 3273% 3151% 3122% 3122% 2693% 2262% 2241% 2111% 139 +3% 266% --

Whee! ;)

Updated to include Skeeve's newest regex silliness && one of my own due to such inspiration.

-Pip@CPAN.Org
PipForPresident.Org


Comment on Re: finding number of contiguous letters
Select or Download Code
Re^2: finding number of contiguous letters
by otto (Beadle) on May 25, 2007 at 00:31 UTC
    very nice :)

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://617200]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others taking refuge in the Monastery: (9)
As of 2014-12-29 12:45 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    Is guessing a good strategy for surviving in the IT business?





    Results (187 votes), past polls