To avoid cluttering the main node with too much code,
I omitted the program to generate the lookup table,
mentioned above in the "Memoization" section of the root node.
I include a standalone program here (built with a 64-bit
compile, with 32-bit int and 64-bit size_t) that generates the
"bytevecM" 4GB lookup table in case anyone wants to duplicate
what I did (this is not pretty code, only needs to be run once).
// Generate the "M" lookup table (aka bytemap).
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#define XmItemOf(array) (sizeof(array) / sizeof(array[0]))
// Start at one because '\0' needs escaping in Python.
#define MIN_SEARCH_CHAR 1
#define MAX_SEARCH_CHAR 128
// Next one is used to check for no hit
#define INVALID_SEARCH_CHAR 0
// Change this line for different mod
#define MOD_TARG 1001
// Change this line for different length
#define HASH_LEN 11
// Change these two lines to generate different bytemaps for C,L,X,V,I
+.
#define ZPD_BYTE "lookzpM.byte"
#define D_TARG 1000
// Maximum of all hits, including terminating zero.
#define MAX_ELT 256
#define MAXI 2147483647
#define MAXU 4294967295U
#define BYTE_MAP_SIZE_BYTES ((size_t)MAXU+1)
#define ARR_SIZE MAXU
static inline int rubymod(int x, int y) {
int xmody = x - (x / y) * y;
if (xmody && ((y ^ xmody) < 0))
{
xmody += y;
}
return xmody;
}
static int hitval[MAX_ELT];
static int nhitval;
static unsigned int histo[MAX_ELT];
int main()
{
int ii = 0;
int jj = 0;
unsigned int uu = 0;
unsigned int ucnt = 0;
unsigned char hitchar; // the (presumed unique) hit char
int k = 0;
int z = 0;
int q = 0;
int qq = 0;
int maxpos = 0;
int posind = 0;
printf("sizeof int=%d\n", (int)sizeof(int));
printf("sizeof long=%d\n", (int)sizeof(long));
printf("sizeof size_t=%d\n", (int)sizeof(size_t));
printf("sizeof void*=%d\n", (int)sizeof(void*));
if (sizeof(int) != 4) {
fprintf(stderr, "oops sizeof int != 4 (%d)\n", sizeof(int));
exit(1);
}
if (sizeof(hitval) != sizeof(int) * MAX_ELT) {
fprintf(stderr, "oops array hitval is padded (%d v %d)\n",
sizeof(hitval), sizeof(int) * MAX_ELT);
exit(1);
}
memset(histo, 0, sizeof(histo));
size_t siQuarter = BYTE_MAP_SIZE_BYTES/4;
if (siQuarter + siQuarter + siQuarter + siQuarter != BYTE_MAP_SIZE_
+BYTES) {
fprintf(stderr, "oops quarter is not even, quarter=%lu\n", (unsi
+gned long)siQuarter);
exit(1);
}
unsigned int uiQuarter = (unsigned int)siQuarter;
fprintf(stderr, "uiQuarter=%u\n", uiQuarter);
char* fnames[4];
FILE* fhs[4];
int i;
for (i = 0; i < 4; ++i) {
fnames[i] = (char*)malloc(64);
if (fnames[i] == NULL) {
fprintf(stderr, "out of memory\n");
exit(1);
}
sprintf(fnames[i], "%s-%d", ZPD_BYTE, i);
fprintf(stderr, "%d: %s\n", i, fnames[i]);
fhs[i] = fopen(fnames[i], "wb");
if (fhs[i] == NULL) {
fprintf(stderr, "oops open '%s' failed: errno=%d\n", fhs[i],
+errno);
exit(1);
}
}
int ifh = 0;
for (;;) {
ii = (int)uu;
if (uu % 10000000 == 0) {
printf("uu=%u ucnt=%u ii=%d: maxpos=%d posind=%d\n",
uu, ucnt, ii, maxpos, posind);
}
nhitval = 0;
memset(hitval, 0, sizeof(hitval));
qq = -1;
hitchar = INVALID_SEARCH_CHAR;
// We ignore q==0, 10, 13 because these three require escaping i
+n Python.
for (q = MIN_SEARCH_CHAR; q < MAX_SEARCH_CHAR; ++q) {
if (q == 10 || q == 13) continue;
jj = (1000003 * ii) ^ q;
jj ^= HASH_LEN;
if (jj == -1) jj = -2;
k = rubymod(jj, MOD_TARG);
if (k == D_TARG) {
hitval[nhitval] = MOD_TARG;
++nhitval;
if (nhitval >= MAX_ELT) {
fprintf(stderr, "oops nhitval=%d uu=%u ii=%d jj=%d (%d)\
+n",
nhitval, uu, ii, jj, MOD_TARG);
exit(1);
}
qq = q;
}
}
if (nhitval > maxpos) {
maxpos = nhitval;
posind = (int)uu;
}
if (nhitval > 0) {
if (nhitval != 1 || qq < 0) {
fprintf(stderr, "oops nhitval != 1 is =%d uu=%u ii=%d qq=%
+d (%d)\n",
nhitval, uu, ii, qq, MOD_TARG);
exit(1);
}
++ucnt;
hitchar = (unsigned char)qq;
}
++histo[nhitval];
if (uu == uiQuarter) {
printf("closing first file handle %d: uu=%u\n", ifh, uu);
fclose(fhs[ifh]);
++ifh;
}
else if (uu == uiQuarter * 2) {
printf("closing second file handle %d: uu=%u\n", ifh, uu);
fclose(fhs[ifh]);
++ifh;
}
else if (uu == uiQuarter * 3) {
printf("closing third file handle %d: uu=%u\n", ifh, uu);
fclose(fhs[ifh]);
++ifh;
}
if (fputc(hitchar, fhs[ifh]) == EOF) {
fprintf(stderr, "oops fputs '%s' failed: errno=%d\n", ZPD_BYT
+E, errno);
exit(1);
}
if (uu == ARR_SIZE) {
printf("breaking: uu=%u ii=%d\n", uu, ii);
break;
}
++uu;
}
printf("closing last file handle %d: uu=%u\n", ifh, uu);
fclose(fhs[ifh]);
printf("ucnt=%u maxpos=%d (ind=%d)\n", ucnt, maxpos, posind);
printf("\n");
printf("histo:\n");
for (z = 0; z < XmItemOf(histo); ++z) {
printf("%d: %u\n", z, histo[z]);
}
printf("\n");
return 0;
}