Beefy Boxes and Bandwidth Generously Provided by pair Networks
No such thing as a small change
 
PerlMonks  

Re: finding number of contiguous letters

by PipTigger (Hermit)
on May 24, 2007 at 10:20 UTC ( #617200=note: print w/ replies, xml ) Need Help??


in reply to finding number of contiguous letters

So I've gotten silly drawing all of the above together. =)

#!/usr/bin/perl use strict; use warnings; # 75O0K8S - Benchmark all the Trigram-finder +s! use Benchmark qw(cmpthese ); # with liberty taken t +o use Text::Ngram qw(ngram_counts add_to_counts); # reformat aggressivel +y my @strs = qw(computer CheesyNachoz JustAnotherPerlHacker);my $st +r; push(@strs, 'Just Another Perl Hacker'); # add a test string w/ spaces + too my %cnms = ( # CodeRefName => PerlMonkAuthor (in post order) 'whileZ' => 'Zaxo' , 'recurseO' => 'otto' , # not sure how to do magic blazar goto Tail-Recursion 'whileM' => 'Marknel' , 'mapB' => 'blazar' , 'hngramG' => 'graff' , 'substrGF' => 'GrandFather' , 'unpackLG' => 'Limbic~Region && GrandFather' , 'regexGF' => 'GrandFather' , 'mapstrS' => 'Skeeve' , 'substrS' => 'Skeeve' , 'regexS' => 'Skeeve' , 'counmapA' => 'andreas1234567', 'mapstrS2' => 'Skeeve' , 'whileAB' => 'Anonymous Monk && blazar' , 'hmapB' => 'blazar' , 'regexS2' => 'Skeeve' , 'whileP' => 'Pip' , 'regexP' => 'Pip' , ); my $crfs; $crfs->{$_} = \&{"do_$_"} for(keys(%cnms)); for my $strn (@strs) { $str = $strn; # assign global $str for all subs for my $cnam (sort { $a cmp $b } keys(%{$crfs})) { printf("%-8s:%s\n", $cnam, join ' ', $crfs->{$cnam}->()); } cmpthese(-1, $crfs); } sub do_whileZ { local $_ = $str; my @tris; pos() -= 2, push @tris, $1 while /(...)/g; return @tri +s; } sub recurse { my($cnt, $ra_out, $ra_val, $ra_in)= @_; if( $#$ra_val != $cnt) { # working array not full if($#$ra_in > -1) { # fill it up push(@$ra_val, shift(@$ra_in)); recurse($cnt, $ra_out, $ra_val, $ra_in); } else { return; } # done with list } else { # add to output array, joined ltr string, rip off first ltr push(@$ra_out, join('',@$ra_val)); shift(@$ra_val); # add new ltr, if any remaining push(@$ra_val, shift(@$ra_in)) if($#$ra_in > -1); recurse($cnt, $ra_out, $ra_val, $ra_in); } } # input letter array, output trigram string array, && values workspa +ce sub do_recurseO { my @in = split('', $str); my @tris; my @val; recurse( 2, \@tris, \@val, \@in ); return @tri +s; } sub do_whileM { my $strlen = length($str); my $loop_num = $strlen-2; my @tris; my $ndx = 0; while($ndx < $loop_num){ my $tri = substr $str,$ndx,3; push @tris,$tri;$ndx++; }return @tri +s; } sub do_mapB { my @tris = map { substr $str, $_, 3 } 0..length($str +)-3; return @tri +s; } sub do_hngramG { my $href = ngram_counts($str, 3); return sort { $a cmp $b } keys %{$hre +f};} sub do_substrGF { my @tris; push @tris, substr $str, $_, 3 for 0..length($str)-3; return @tri +s; } # HTTP://Perl.Com/doc/manual/html/pod/perlfunc/unpack.html sub do_unpackLG { my $mats = length($str)-2; my $tmpl = 'a3XX' x $mat +s; my @tris = unpack($tmpl, $str); return @tri +s; } sub do_regexGF { my @tris = $str =~ /(?=(...))/g; return @tri +s; } sub do_mapstrS { return map substr($str,$_-3,3),(3..length $str +); } sub do_substrS { my @tris; push @tris, substr $str, $_, 3 for 0..length($str)-3; return @tri +s; } sub do_regexS { return $str =~ /(?=(...))/ +g; } sub do_counmapA { return map { (length($_)-2) } split('\s+', $str); + } sub do_mapstrS2 { my $len = 3; my @tris = map substr($str,$_-$len,$len +), ($len..length $str); return @tri +s; } sub do_whileAB { my @tris; my $stri = $str; while($stri) { $stri =~ /(...)/ and push @tris, $1; $stri =~ s/^.//; }return @tri +s; } sub do_hmapB { my %saw; my @tris = map { my $s = substr $str, $_, 3 +; $saw{$s}++ ? () : $s } 0..length($str)-3;return sort{$a cmp$b}keys%s +aw;} sub do_regexS2 { $_ = $str; s/(.)(.)(.)(?=(.)(.))/$1$2$3$2$3$4$3$4$5/ +g; return /(...)/ +g; } sub do_whileP { my @tris; $_ = $str; # s/// is slow! push @tris, $1 while(s/^(.(..))/$2/); return @tri +s; } sub do_regexP { $_ = $str; s/((?:.)((?:.)(.)))(?=(.)(.))/$1$2$4$3$4$ +5/g; return /(...)/ +g; }

Results:

counmapA:6 hmapB :com mpu omp put ter ute hngramG :com mpu omp put ter ute mapB :com omp mpu put ute ter mapstrS :com omp mpu put ute ter mapstrS2:com omp mpu put ute ter recurseO:com omp mpu put ute ter regexGF :com omp mpu put ute ter regexP :com omp mpu put ute ter regexS :com omp mpu put ute ter regexS2 :com omp mpu put ute ter substrGF:com omp mpu put ute ter substrS :com omp mpu put ute ter unpackLG:com omp mpu put ute ter whileAB :com omp mpu put ute ter whileM :com omp mpu put ute ter whileP :com omp mpu put ute ter whileZ :com omp mpu put ute ter Rate recurseO hngramG whileP hmapB whileAB whileZ regexS2 + regexP whileM mapstrS2 regexGF mapB substrGF substrS unpackLG mapstr +S counmapA regexS recurseO 14490/s -- -48% -57% -63% -68% -70% -74% + -80% -82% -83% -83% -84% -86% -87% -87% -92 +% -97% -98% hngramG 27837/s 92% -- -18% -29% -39% -43% -51% + -61% -66% -67% -67% -70% -74% -75% -75% -84 +% -95% -97% whileP 33810/s 133% 21% -- -14% -26% -31% -40% + -52% -58% -60% -60% -64% -68% -69% -70% -81 +% -94% -96% hmapB 39456/s 172% 42% 17% -- -14% -19% -31% + -44% -51% -53% -53% -58% -63% -64% -65% -78 +% -93% -95% whileAB 45948/s 217% 65% 36% 16% -- -6% -19% + -35% -43% -45% -45% -51% -57% -58% -59% -74 +% -92% -94% whileZ 48651/s 236% 75% 44% 23% 6% -- -14% + -32% -40% -42% -42% -48% -55% -56% -57% -73 +% -91% -94% regexS2 56775/s 292% 104% 68% 44% 24% 17% -- + -20% -30% -32% -32% -39% -47% -48% -50% -68 +% -90% -93% regexP 71087/s 391% 155% 110% 80% 55% 46% 25% + -- -12% -15% -15% -24% -34% -35% -37% -60 +% -87% -91% whileM 80736/s 457% 190% 139% 105% 76% 66% 42% + 14% -- -3% -4% -13% -25% -26% -28% -55 +% -85% -90% mapstrS2 83510/s 476% 200% 147% 112% 82% 72% 47% + 17% 3% -- -1% -10% -22% -24% -26% -53 +% -85% -90% regexGF 84020/s 480% 202% 149% 113% 83% 73% 48% + 18% 4% 1% -- -10% -22% -23% -25% -53 +% -85% -90% mapB 92980/s 542% 234% 175% 136% 102% 91% 64% + 31% 15% 11% 11% -- -13% -15% -18% -48 +% -83% -89% substrGF 107184/s 640% 285% 217% 172% 133% 120% 89% + 51% 33% 28% 28% 15% -- -2% -5% -40 +% -81% -87% substrS 109713/s 657% 294% 224% 178% 139% 126% 93% + 54% 36% 31% 31% 18% 2% -- -3% -38 +% -80% -86% unpackLG 112733/s 678% 305% 233% 186% 145% 132% 99% + 59% 40% 35% 34% 21% 5% 3% -- -37 +% -80% -86% mapstrS 177535/s 1125% 538% 425% 350% 286% 265% 213% + 150% 120% 113% 111% 91% 66% 62% 57% - +- -68% -78% counmapA 550801/s 3701% 1879% 1529% 1296% 1099% 1032% 870% + 675% 582% 560% 556% 492% 414% 402% 389% 210 +% -- -32% regexS 810891/s 5496% 2813% 2298% 1955% 1665% 1567% 1328% + 1041% 904% 871% 865% 772% 657% 639% 619% 357 +% 47% -- counmapA:10 hmapB :Che Nac ach cho ees esy hee hoz syN yNa hngramG :ach che cho ees esy hee hoz nac syn yna mapB :Che hee ees esy syN yNa Nac ach cho hoz mapstrS :Che hee ees esy syN yNa Nac ach cho hoz mapstrS2:Che hee ees esy syN yNa Nac ach cho hoz recurseO:Che hee ees esy syN yNa Nac ach cho hoz regexGF :Che hee ees esy syN yNa Nac ach cho hoz regexP :Che hee ees esy syN yNa Nac ach cho hoz regexS :Che hee ees esy syN yNa Nac ach cho hoz regexS2 :Che hee ees esy syN yNa Nac ach cho hoz substrGF:Che hee ees esy syN yNa Nac ach cho hoz substrS :Che hee ees esy syN yNa Nac ach cho hoz unpackLG:Che hee ees esy syN yNa Nac ach cho hoz whileAB :Che hee ees esy syN yNa Nac ach cho hoz whileM :Che hee ees esy syN yNa Nac ach cho hoz whileP :Che hee ees esy syN yNa Nac ach cho hoz whileZ :Che hee ees esy syN yNa Nac ach cho hoz Rate recurseO whileP hngramG hmapB whileAB whileZ regexS2 + regexP whileM regexGF mapstrS2 mapB substrS substrGF unpackLG mapst +rS counmapA regexS recurseO 9955/s -- -51% -55% -60% -67% -68% -75% + -81% -81% -82% -82% -83% -86% -87% -87% -9 +2% -98% -99% whileP 20287/s 104% -- -9% -19% -33% -34% -50% + -61% -61% -63% -63% -66% -72% -72% -74% -8 +3% -96% -97% hngramG 22330/s 124% 10% -- -11% -26% -28% -44% + -57% -57% -59% -60% -63% -69% -70% -71% -8 +1% -96% -97% hmapB 25121/s 152% 24% 12% -- -17% -18% -38% + -51% -52% -54% -54% -58% -65% -66% -67% -7 +9% -96% -97% whileAB 30117/s 203% 48% 35% 20% -- -2% -25% + -42% -42% -45% -45% -50% -58% -59% -61% -7 +5% -95% -96% whileZ 30811/s 210% 52% 38% 23% 2% -- -23% + -40% -41% -44% -44% -49% -57% -58% -60% -7 +4% -95% -96% regexS2 40193/s 304% 98% 80% 60% 33% 30% -- + -22% -23% -26% -27% -33% -44% -46% -48% -6 +7% -93% -95% regexP 51569/s 418% 154% 131% 105% 71% 67% 28% + -- -1% -6% -6% -14% -28% -30% -33% -5 +7% -91% -93% whileM 52194/s 424% 157% 134% 108% 73% 69% 30% + 1% -- -4% -5% -13% -27% -29% -32% -5 +7% -91% -93% regexGF 54613/s 449% 169% 145% 117% 81% 77% 36% + 6% 5% -- -1% -9% -24% -26% -29% -5 +5% -90% -93% mapstrS2 55137/s 454% 172% 147% 119% 83% 79% 37% + 7% 6% 1% -- -8% -23% -25% -28% -5 +4% -90% -93% mapB 60124/s 504% 196% 169% 139% 100% 95% 50% + 17% 15% 10% 9% -- -16% -18% -22% -5 +0% -89% -92% substrS 71957/s 623% 255% 222% 186% 139% 134% 79% + 40% 38% 32% 31% 20% -- -2% -6% -4 +0% -87% -91% substrGF 73770/s 641% 264% 230% 194% 145% 139% 84% + 43% 41% 35% 34% 23% 3% -- -4% -3 +9% -87% -91% unpackLG 76663/s 670% 278% 243% 205% 155% 149% 91% + 49% 47% 40% 39% 28% 7% 4% -- -3 +6% -86% -90% mapstrS 120470/s 1110% 494% 439% 380% 300% 291% 200% + 134% 131% 121% 118% 100% 67% 63% 57% +-- -79% -85% counmapA 562196/s 5548% 2671% 2418% 2138% 1767% 1725% 1299% + 990% 977% 929% 920% 835% 681% 662% 633% 36 +7% -- -28% regexS 777546/s 7711% 3733% 3382% 2995% 2482% 2424% 1835% + 1408% 1390% 1324% 1310% 1193% 981% 954% 914% 54 +5% 38% -- counmapA:19 hmapB :Ano Hac Jus Per ack cke erP erl her ker lHa not oth rPe rlH s +tA tAn the ust hngramG :ack ano cke erl erp hac her jus ker lha not oth per rlh rpe s +ta tan the ust mapB :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker mapstrS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker mapstrS2:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker recurseO:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker regexGF :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker regexP :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker regexS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker regexS2 :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker substrGF:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker substrS :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker unpackLG:Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker whileAB :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker whileM :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker whileP :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker whileZ :Jus ust stA tAn Ano not oth the her erP rPe Per erl rlH lHa H +ac ack cke ker Rate recurseO whileP hmapB hngramG whileAB whileZ regexS2 + whileM regexP regexGF mapstrS2 mapB substrGF substrS unpackLG mapst +rS counmapA regexS recurseO 5910/s -- -44% -55% -61% -65% -66% -74% + -80% -80% -80% -81% -83% -86% -86% -87% -9 +1% -99% -99% whileP 10645/s 80% -- -20% -29% -36% -38% -53% + -63% -64% -65% -65% -69% -74% -75% -76% -8 +4% -98% -99% hmapB 13274/s 125% 25% -- -12% -21% -23% -41% + -54% -55% -56% -56% -61% -68% -69% -70% -8 +0% -98% -98% hngramG 15058/s 155% 41% 13% -- -10% -13% -33% + -48% -50% -50% -50% -56% -63% -64% -66% -7 +7% -97% -98% whileAB 16748/s 183% 57% 26% 11% -- -3% -26% + -42% -44% -44% -45% -51% -59% -60% -63% -7 +4% -97% -98% whileZ 17230/s 192% 62% 30% 14% 3% -- -24% + -41% -42% -43% -43% -50% -58% -59% -62% -7 +4% -97% -98% regexS2 22541/s 281% 112% 70% 50% 35% 31% -- + -22% -24% -25% -26% -34% -45% -47% -50% -6 +5% -96% -97% whileM 28980/s 390% 172% 118% 92% 73% 68% 29% + -- -3% -4% -5% -15% -29% -31% -35% -5 +6% -95% -96% regexP 29824/s 405% 180% 125% 98% 78% 73% 32% + 3% -- -1% -2% -13% -27% -29% -33% -5 +4% -95% -96% regexGF 30118/s 410% 183% 127% 100% 80% 75% 34% + 4% 1% -- -1% -12% -26% -29% -33% -5 +4% -95% -96% mapstrS2 30416/s 415% 186% 129% 102% 82% 77% 35% + 5% 2% 1% -- -11% -26% -28% -32% -5 +3% -94% -96% mapB 34132/s 478% 221% 157% 127% 104% 98% 51% + 18% 14% 13% 12% -- -17% -19% -24% -4 +8% -94% -96% substrGF 40959/s 593% 285% 209% 172% 145% 138% 82% + 41% 37% 36% 35% 20% -- -3% -9% -3 +7% -93% -95% substrS 42164/s 613% 296% 218% 180% 152% 145% 87% + 45% 41% 40% 39% 24% 3% -- -6% -3 +5% -92% -95% unpackLG 44776/s 658% 321% 237% 197% 167% 160% 99% + 55% 50% 49% 47% 31% 9% 6% -- -3 +1% -92% -94% mapstrS 65163/s 1003% 512% 391% 333% 289% 278% 189% + 125% 118% 116% 114% 91% 59% 55% 46% +-- -88% -92% counmapA 548746/s 9185% 5055% 4034% 3544% 3177% 3085% 2334% + 1794% 1740% 1722% 1704% 1508% 1240% 1201% 1126% 74 +2% -- -32% regexS 803888/s 13502% 7452% 5956% 5239% 4700% 4566% 3466% + 2674% 2595% 2569% 2543% 2255% 1863% 1807% 1695% 113 +4% 46% -- counmapA:2 5 2 4 hmapB : An Ha Pe Ano Hac Jus Per ack cke er erl her ker l H not o +th r P rl st t A the ust hngramG : an ha pe ack ano cke er erl hac her jus ker l h not oth p +er r p rl st t a the ust mapB :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker mapstrS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker mapstrS2:Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker recurseO:Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker regexGF :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker regexP :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker regexS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker regexS2 :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker substrGF:Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker substrS :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker unpackLG:Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker whileAB :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker whileM :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker whileP :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker whileZ :Jus ust st t A An Ano not oth the her er r P Pe Per erl r +l l H Ha Hac ack cke ker Rate recurseO whileP hmapB hngramG whileAB whileZ regexS2 + whileM regexP regexGF mapstrS2 mapB substrGF substrS unpackLG mapst +rS counmapA regexS recurseO 5218/s -- -44% -55% -61% -64% -65% -74% + -79% -80% -80% -80% -83% -85% -86% -86% -9 +1% -98% -99% whileP 9309/s 78% -- -21% -31% -36% -38% -54% + -63% -64% -65% -65% -69% -74% -74% -76% -8 +4% -96% -99% hmapB 11712/s 124% 26% -- -13% -20% -21% -42% + -53% -55% -55% -55% -61% -67% -68% -69% -7 +9% -95% -99% hngramG 13524/s 159% 45% 15% -- -8% -9% -33% + -46% -48% -49% -49% -55% -62% -63% -65% -7 +6% -94% -98% whileAB 14629/s 180% 57% 25% 8% -- -2% -27% + -42% -44% -44% -44% -52% -59% -60% -62% -7 +4% -94% -98% whileZ 14913/s 186% 60% 27% 10% 2% -- -26% + -41% -43% -43% -43% -51% -58% -59% -61% -7 +4% -94% -98% regexS2 20096/s 285% 116% 72% 49% 37% 35% -- + -20% -23% -24% -24% -34% -44% -44% -48% -6 +5% -91% -98% whileM 25121/s 381% 170% 114% 86% 72% 68% 25% + -- -4% -4% -4% -17% -30% -31% -34% -5 +6% -89% -97% regexP 26065/s 399% 180% 123% 93% 78% 75% 30% + 4% -- -1% -1% -14% -27% -28% -32% -5 +4% -89% -97% regexGF 26304/s 404% 183% 125% 95% 80% 76% 31% + 5% 1% -- -0% -13% -27% -27% -31% -5 +4% -89% -97% mapstrS2 26305/s 404% 183% 125% 95% 80% 76% 31% + 5% 1% 0% -- -13% -27% -27% -31% -5 +4% -89% -97% mapB 30340/s 481% 226% 159% 124% 107% 103% 51% + 21% 16% 15% 15% -- -15% -16% -21% -4 +7% -87% -96% substrGF 35870/s 587% 285% 206% 165% 145% 141% 78% + 43% 38% 36% 36% 18% -- -1% -6% -3 +7% -85% -96% substrS 36202/s 594% 289% 209% 168% 147% 143% 80% + 44% 39% 38% 38% 19% 1% -- -6% -3 +6% -84% -96% unpackLG 38331/s 635% 312% 227% 183% 162% 157% 91% + 53% 47% 46% 46% 26% 7% 6% -- -3 +2% -83% -95% mapstrS 56776/s 988% 510% 385% 320% 288% 281% 183% + 126% 118% 116% 116% 87% 58% 57% 48% +-- -76% -93% counmapA 231848/s 4343% 2391% 1880% 1614% 1485% 1455% 1054% + 823% 790% 781% 781% 664% 546% 540% 505% 30 +8% -- -73% regexS 847418/s 16139% 9004% 7136% 6166% 5693% 5583% 4117% + 3273% 3151% 3122% 3122% 2693% 2262% 2241% 2111% 139 +3% 266% --

Whee! ;)

Updated to include Skeeve's newest regex silliness && one of my own due to such inspiration.

-Pip@CPAN.Org
PipForPresident.Org


Comment on Re: finding number of contiguous letters
Select or Download Code
Replies are listed 'Best First'.
Re^2: finding number of contiguous letters
by otto (Beadle) on May 25, 2007 at 00:31 UTC
    very nice :)

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://617200]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others meditating upon the Monastery: (5)
As of 2015-07-28 05:17 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    The top three priorities of my open tasks are (in descending order of likelihood to be worked on) ...









    Results (252 votes), past polls