I am getting somehow similar results. The difference is, I guess, that I initialize the count with the number of occurrences of the combination before adding the shorter words. The script runs for 3min26s, the output ends with
adeimrst: 310
aeilnrst: 312
adeinrst: 313
aeloprst: 313
aeilnpst: 314
adeoprst: 316
adeilrst: 319
aeimprst: 328
adeiprst: 332
aeilprst: 344
aeinprst: 346
The script:
#!/usr/bin/perl
use warnings;
use strict;
use feature qw(say);
my %lc_words;
my $dict = shift;
my $FH;
open $FH, '<', $dict or $FH = *DATA;
while (<$FH>) {
chomp;
next if length > 8;
my $lc = lc;
$lc_words{$lc} = 1;
}
say scalar keys %lc_words;
my %sorted_count;
for (keys %lc_words) {
my @letters = sort split //;
my $sorted = join q(), @letters;
$sorted_count{$sorted}++;
}
say "$_: $sorted_count{$_}"
for sort { $sorted_count{$a} <=> $sorted_count{$b} }
keys %sorted_count;
print '-' x 78, "\n";
my %summed = %sorted_count;
for my $length (1 .. 7) {
warn $length;
for my $sorted (grep $length == length, keys %sorted_count) {
my $regex = join '.*', split //, $sorted;
for my $longer (grep $length < length, keys %sorted_count) {
$summed{$longer} += $sorted_count{$sorted} if $longer =~ $
+regex;
}
}
}
say "$_: $summed{$_}"
for sort { $summed{$a} <=> $summed{$b} }
keys %summed;
__DATA__
ffffffff
fffffffa
afffffff
bfffffff
fffffffb
ffffbfff
a
aa
aaa
aaaa
aaaaa
aaaaaa
Update: Checking the result:
$ grep -E '^[aeinprst]{1,8}$' 2of12inf.txt | grep -vE '(.).*\1' | wc
+-l
346
Update 2: Testing duplicate characters:
$ grep -E '(.).*\1.*:' 1056884.out | tail -n1
aeiprsst: 279
$ grep -E '^[aeiprsst]{1,8}$' 2of12inf.txt | grep -Ev '([aeiprt]).*\1|
+s.*s.*s' | wc -l
279