Beefy Boxes and Bandwidth Generously Provided by pair Networks
There's more than one way to do things
 
PerlMonks  

Re: The 10**21 Problem (Part 2)

by eyepopslikeamosquito (Canon)
on May 18, 2014 at 04:07 UTC ( #1086481=note: print w/ replies, xml ) Need Help??


in reply to The 10**21 Problem (Part 2)

To avoid cluttering the main node with too much code, I omitted the program to generate the lookup table, mentioned above in the "Memoization" section of the root node.

I include a standalone program here (built with a 64-bit compile, with 32-bit int and 64-bit size_t) that generates the "bytevecM" 4GB lookup table in case anyone wants to duplicate what I did (this is not pretty code, only needs to be run once).

// Generate the "M" lookup table (aka bytemap). #include <stdio.h> #include <stdlib.h> #include <string.h> #include <errno.h> #define XmItemOf(array) (sizeof(array) / sizeof(array[0])) // Start at one because '\0' needs escaping in Python. #define MIN_SEARCH_CHAR 1 #define MAX_SEARCH_CHAR 128 // Next one is used to check for no hit #define INVALID_SEARCH_CHAR 0 // Change this line for different mod #define MOD_TARG 1001 // Change this line for different length #define HASH_LEN 11 // Change these two lines to generate different bytemaps for C,L,X,V,I +. #define ZPD_BYTE "lookzpM.byte" #define D_TARG 1000 // Maximum of all hits, including terminating zero. #define MAX_ELT 256 #define MAXI 2147483647 #define MAXU 4294967295U #define BYTE_MAP_SIZE_BYTES ((size_t)MAXU+1) #define ARR_SIZE MAXU static inline int rubymod(int x, int y) { int xmody = x - (x / y) * y; if (xmody && ((y ^ xmody) < 0)) { xmody += y; } return xmody; } static int hitval[MAX_ELT]; static int nhitval; static unsigned int histo[MAX_ELT]; int main() { int ii = 0; int jj = 0; unsigned int uu = 0; unsigned int ucnt = 0; unsigned char hitchar; // the (presumed unique) hit char int k = 0; int z = 0; int q = 0; int qq = 0; int maxpos = 0; int posind = 0; printf("sizeof int=%d\n", (int)sizeof(int)); printf("sizeof long=%d\n", (int)sizeof(long)); printf("sizeof size_t=%d\n", (int)sizeof(size_t)); printf("sizeof void*=%d\n", (int)sizeof(void*)); if (sizeof(int) != 4) { fprintf(stderr, "oops sizeof int != 4 (%d)\n", sizeof(int)); exit(1); } if (sizeof(hitval) != sizeof(int) * MAX_ELT) { fprintf(stderr, "oops array hitval is padded (%d v %d)\n", sizeof(hitval), sizeof(int) * MAX_ELT); exit(1); } memset(histo, 0, sizeof(histo)); size_t siQuarter = BYTE_MAP_SIZE_BYTES/4; if (siQuarter + siQuarter + siQuarter + siQuarter != BYTE_MAP_SIZE_ +BYTES) { fprintf(stderr, "oops quarter is not even, quarter=%lu\n", (unsi +gned long)siQuarter); exit(1); } unsigned int uiQuarter = (unsigned int)siQuarter; fprintf(stderr, "uiQuarter=%u\n", uiQuarter); char* fnames[4]; FILE* fhs[4]; int i; for (i = 0; i < 4; ++i) { fnames[i] = (char*)malloc(64); if (fnames[i] == NULL) { fprintf(stderr, "out of memory\n"); exit(1); } sprintf(fnames[i], "%s-%d", ZPD_BYTE, i); fprintf(stderr, "%d: %s\n", i, fnames[i]); fhs[i] = fopen(fnames[i], "wb"); if (fhs[i] == NULL) { fprintf(stderr, "oops open '%s' failed: errno=%d\n", fhs[i], +errno); exit(1); } } int ifh = 0; for (;;) { ii = (int)uu; if (uu % 10000000 == 0) { printf("uu=%u ucnt=%u ii=%d: maxpos=%d posind=%d\n", uu, ucnt, ii, maxpos, posind); } nhitval = 0; memset(hitval, 0, sizeof(hitval)); qq = -1; hitchar = INVALID_SEARCH_CHAR; // We ignore q==0, 10, 13 because these three require escaping i +n Python. for (q = MIN_SEARCH_CHAR; q < MAX_SEARCH_CHAR; ++q) { if (q == 10 || q == 13) continue; jj = (1000003 * ii) ^ q; jj ^= HASH_LEN; if (jj == -1) jj = -2; k = rubymod(jj, MOD_TARG); if (k == D_TARG) { hitval[nhitval] = MOD_TARG; ++nhitval; if (nhitval >= MAX_ELT) { fprintf(stderr, "oops nhitval=%d uu=%u ii=%d jj=%d (%d)\ +n", nhitval, uu, ii, jj, MOD_TARG); exit(1); } qq = q; } } if (nhitval > maxpos) { maxpos = nhitval; posind = (int)uu; } if (nhitval > 0) { if (nhitval != 1 || qq < 0) { fprintf(stderr, "oops nhitval != 1 is =%d uu=%u ii=%d qq=% +d (%d)\n", nhitval, uu, ii, qq, MOD_TARG); exit(1); } ++ucnt; hitchar = (unsigned char)qq; } ++histo[nhitval]; if (uu == uiQuarter) { printf("closing first file handle %d: uu=%u\n", ifh, uu); fclose(fhs[ifh]); ++ifh; } else if (uu == uiQuarter * 2) { printf("closing second file handle %d: uu=%u\n", ifh, uu); fclose(fhs[ifh]); ++ifh; } else if (uu == uiQuarter * 3) { printf("closing third file handle %d: uu=%u\n", ifh, uu); fclose(fhs[ifh]); ++ifh; } if (fputc(hitchar, fhs[ifh]) == EOF) { fprintf(stderr, "oops fputs '%s' failed: errno=%d\n", ZPD_BYT +E, errno); exit(1); } if (uu == ARR_SIZE) { printf("breaking: uu=%u ii=%d\n", uu, ii); break; } ++uu; } printf("closing last file handle %d: uu=%u\n", ifh, uu); fclose(fhs[ifh]); printf("ucnt=%u maxpos=%d (ind=%d)\n", ucnt, maxpos, posind); printf("\n"); printf("histo:\n"); for (z = 0; z < XmItemOf(histo); ++z) { printf("%d: %u\n", z, histo[z]); } printf("\n"); return 0; }


Comment on Re: The 10**21 Problem (Part 2)
Download Code
Re^2: The 10**21 Problem (Part 2)
by oiskuu (Pilgrim) on May 18, 2014 at 09:21 UTC

    $ perl -lwe '$/ = \128; ++$hist[tr/\0//c] while <>; print "$_: $hist[$ +_]" for 0..$#hist;' lookzpM.byte-* 0: 12003438 1: 148200 2: 147874 ... 42: 330481 43: 33028

    This means you have plenty of room to pack the lines as: {N} ({i}{x})* {pad}.
    Compute q8 = (i ^ m7) & 127; q9 = (x ^ HASH_LEN);

    Average 15.984 elements per line. Lots of looping and branches can be avoided this way.

    Update. One more thing occurs to me. Indeed, those files compress rather well: lzma packs them 1:2500!

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://1086481]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others having an uproarious good time at the Monastery: (6)
As of 2014-09-03 02:44 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    My favorite cookbook is:










    Results (35 votes), past polls