I have an idea for an exhaustive solution. I haven't convinced myself it will work yet but it will require multiple phases and at least one of the phases will need to be written in C. I will update when I can play even if it turns out not to be viable.
Perl script. Used to filter the dictionary as well as determine the maximum number of times each letter is repeated in a word.
The C program. It uses a high water mark algorithm. To ensure it was working, I had it output every time the high water mark was reached or exceeded rather than waiting all the way until the end.
#include <stdio.h>
#include <stdlib.h>
// Program to count how many 8 character combinations are necessary fo
+r brute force
// Constants to remove magic numbers
#define LINE_LEN 11
#define LAST_LETTER 25
#define CONDENSED 16
#define ASCII 97 // 'a' is 97 in ASCII
#define DICTIONARY 40933
// Prototypes
inline short int is_subset(short int *entry, short int *have);
inline void populate_dictionary(short int *entry, char *line);
int main (void) {
short int best = 0;
short int word[DICTIONARY][CONDENSED];
int idx = 0;
static const char filename[] = "reduced.txt";
FILE *file = fopen (filename, "r");
if (file != NULL) {
char line[LINE_LEN];
while (fgets(line, sizeof line, file) != NULL) {
populate_dictionary(word[idx], line);
++idx;
}
fclose(file);
}
else {
perror(filename);
}
//exit(0);
static const short int max[26] = {4, 3, 3, 4, 4, 4, 4, 3, 3, 2, 3,
+ 4, 3, 4, 4, 3, 1, 4, 5, 3, 4, 2, 3, 2, 3, 4};
short int have[26] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
int i, j, k, l, m, n, o, p, q;
short int subsets;
// Evaluate all 12,461,993 possible 8 letter combinations
for (i = 0; i <= LAST_LETTER; i++) {
if (++have[i] > max[i]) {
--have[i];
continue;
}
for (j = i; j <= LAST_LETTER; j++) {
if (++have[j] > max[j]) {
--have[j];
continue;
}
for (k = j; k <= LAST_LETTER; k++) {
if (++have[k] > max[k]) {
--have[k];
continue;
}
for (l = k; l <= LAST_LETTER; l++) {
if (++have[l] > max[l]) {
--have[l];
continue;
}
for (m = l; m <= LAST_LETTER; m++) {
if (++have[m] > max[m]) {
--have[m];
continue;
}
for (n = m; n <= LAST_LETTER; n++) {
if (++have[n] > max[n]) {
--have[n];
continue;
}
for (o = n; o <= LAST_LETTER; o++) {
if (++have[o] > max[o]) {
--have[o];
continue;
}
for (p = o; p <= LAST_LETTER; p++) {
if (++have[p] > max[p]) {
--have[p];
continue;
}
subsets = 0;
for (idx = 0; idx < DICTIONARY; id
+x++) {
if (is_subset(word[idx], have)
+) {
++subsets;
}
}
if (subsets >= best) {
best = subsets;
for (idx = 0; idx <= LAST_LETT
+ER; idx++) {
for (q = 1; q <= have[idx]
+; q++) {
printf("%c", idx + ASC
+II);
}
}
printf(" = %i\n", best);
}
--have[p];
}
--have[o];
}
--have[n];
}
--have[m];
}
--have[l];
}
--have[k];
}
--have[j];
}
--have[i];
}
return 0;
}
inline short int is_subset(short int *entry, short int *have) {
short int i;
for (i = 0; i < CONDENSED; i = i + 2) {
if (entry[i] == -1) {
return 1;
}
if (entry[i + 1] > have[entry[i]]) {
return 0;
}
}
return 1;
}
inline void populate_dictionary(short int *entry, char *line) {
short int letter, i, j;
// The max word length is 8 and there are 2 pairs for each possibl
+e letter
// This makes 16 slots
// letter,count letter, count letter, count, etc
for (i = 0; i < CONDENSED; i = i + 2) {
entry[i] = -1; // letter
entry[i + 1] = 0; // count
}
// Count how many of each letter are in the word
for (i = 0; line[i] != '\n'; i++) {
letter = line[i] - ASCII;
for (j = 0; j < CONDENSED; j = j + 2) {
if (entry[j] == -1) {
entry[j] = letter;
++entry[j + 1];
break;
}
else if (entry[j] == letter) {
++entry[j + 1];
break;
}
}
}
}
I am sure there is a much better way of doing this. I will need to achieve over 5 million loops per second to achieve my goal of being completed within 24 hours. I kicked it off at 10:28PM EST. I will update again when it is completed.