C:\test\Judy>cl /W3 /Ot /favor:INTEL64 /MT /LD Judy.c Microsoft (R) C/C++ Optimizing Compiler Version 15.00.21022.08 for x64 Copyright (C) Microsoft Corporation. All rights reserved. Judy.c Microsoft (R) Incremental Linker Version 9.00.21022.08 Copyright (C) Microsoft Corporation. All rights reserved. /out:Judy.dll /dll /implib:Judy.lib Judy.obj Creating library Judy.lib and object Judy.exp #### #include #include #include #include "..\mytypes.h" #include "Judy.h" #define TAG printf( "%s:%u\n", __FILE__, __LINE__ ) char *odo( char *odo ) { I32 wheel = (I32)strlen( odo )-1; while( odo[ wheel ] == 'z' && wheel >= 0 ) odo[ wheel-- ] = 'a'; return wheel < 0 ? NULL : ( ++odo[ wheel ], odo ); } int main( int argc, char **argv ) { char *name = argc > 1 ? argv[ 1 ] : "aaaa"; U32 size = (U32)strlen( name ); U64 addr = 1ull; void *addrByName = judy_open( size, 0 ); // size of keys (+1 for nulls added internally) void *nameByAddr = judy_open( 0, 1 ); // size calculated internally as depth * 8 (or 4 for 32-bit) ULONG64 start, end, hz = 2394046161; int cpuInfo[4]; do { JudySlot *addrSlot = judy_cell( addrByName, name, size ); JudySlot *nameSlot = judy_cell( nameByAddr, (void*)&addr, 1 ); *addrSlot = addr; *nameSlot = (U64)_strdup( name ); ++addr; } while( odo( name ) ); printf( "Check memory: " ); getc( stdin ); __cpuid( cpuInfo, 0 ); start = __rdtsc(); do { JudySlot *addrSlot = judy_slot( addrByName, name, size ); U64 gotAddr = *addrSlot; JudySlot *nameSlot = judy_slot( nameByAddr, (void*)&gotAddr, 1 ); char *gotName = (char*)*nameSlot; if( strcmp( name, gotName ) ) fprintf( stderr, "name:'%s' != gotName:'%s'\n", name, gotName ), exit( -__LINE__ ); } while( odo( name ) ); __cpuid( cpuInfo, 0 ); end = __rdtsc(); printf( "Bidi lookup of %u pairs took: %.3f seconds\n", (int)pow( (double)26, (double)size ), (double)( end - start) / (double)hz ) ; printf( "Check memory: " ); getc( stdin ); return 1; } #### C:\test\Judy>cl /W3 /Ot JudyTest.c Judy.lib Microsoft (R) C/C++ Optimizing Compiler Version 15.00.21022.08 for x64 Copyright (C) Microsoft Corporation. All rights reserved. JudyTest.c Microsoft (R) Incremental Linker Version 9.00.21022.08 Copyright (C) Microsoft Corporation. All rights reserved. /out:JudyTest.exe JudyTest.obj Judy.lib #### C:\test\Judy>JudyTest.exe aaaaa Check memory: Bidi lookup of 11881376 pairs took: 6.325 seconds Check memory: 524,332k #### #! perl -slw package Judy; use strict; use Inline C => Config => BUILD_NOISY => 1; use Inline C => <<'END_C', NAME => 'Judy', CLEAN_AFTER_BUILD =>0, TYPEMAPS => '/test/Judy/Judy.typemap'; #undef malloc #undef calloc #undef free #define CLASS "Judy" // Judy arrays 23 NOV 2012 // Author Karl Malbrain, malbrain@yahoo.com // with assistance from Jan Weiss. // Simplified judy arrays for strings and integers // Adapted from the ideas of Douglas Baskins of HP. // Map a set of keys to corresponding memory cells (uints). // Each cell must be set to a non-zero value by the caller. // String mappings are denoted by calling judy_open with zero as // the second argument. Integer mappings are denoted by calling // judy_open with the Integer depth of the Judy Trie as the second // argument. // functions: // judy_open: open a new judy array returning a judy object. // judy_close: close an open judy array, freeing all memory. // judy_clone: clone an open judy array, duplicating the stack. // judy_data: allocate data memory within judy array for external use. // judy_cell: insert a string into the judy array, return cell pointer. // judy_strt: retrieve the cell pointer greater than or equal to given key // judy_slot: retrieve the cell pointer, or return NULL for a given key. // judy_key: retrieve the string value for the most recent judy query. // judy_end: retrieve the cell pointer for the last string in the array. // judy_nxt: retrieve the cell pointer for the next string in the array. // judy_prv: retrieve the cell pointer for the prev string in the array. // judy_del: delete the key and cell for the current stack entry. #include "Judy.h" typedef unsigned __int64 U64; Judy *new( uint max, uint depth ) { return judy_open( max, depth ); } __forceinline U32 addNum( Judy *judy, uchar *buff, uint max, U64 value ) { JudySlot *slot = judy_cell( judy, buff, max ); if( !slot ) return 0; *slot = value; return 1; } __forceinline U64 getNum( Judy *judy, uchar *buff, uint max ) { JudySlot *slot = judy_slot( judy, buff, max ); return *slot; } __forceinline U32 addStr( Judy *judy, U64 buff, char *value ) { JudySlot *slot = judy_cell( judy, (void*)&buff, 1 ); if( !slot ) return 0; *slot = (U64)value; return 1; } __forceinline char *getStr( Judy *judy, U64 buff ) { JudySlot *slot = judy_slot( judy, (void*)&buff, 1 ); if( !slot ) return 0; return (char*)*slot; } // open judy object call with max key size and Integer tree depth. __forceinline Judy *judy_open( uint max, uint depth ) { JudySeg *seg; Judy *judy; uint amt; if( depth ) max = JUDY_key_size * depth; else max++; // allow for zero terminator on keys if( (seg = malloc(JUDY_seg)) ) { seg->seg = NULL; seg->next = JUDY_seg; } else { return NULL; } amt = sizeof(Judy) + max * sizeof(JudyStack); if( amt & (JUDY_cache_line - 1) ) amt |= JUDY_cache_line - 1, amt++; seg->next -= (JudySlot)seg & (JUDY_cache_line - 1); seg->next -= amt; judy = (Judy *)((uchar *)seg + seg->next); memset(judy, 0, amt); judy->depth = depth; judy->seg = seg; judy->max = max; return judy; } __forceinline void judy_close( Judy *judy ) { JudySeg *seg, *nxt = judy->seg; while( (seg = nxt) ) nxt = seg->seg, free( seg ); } // allocate judy node __forceinline void *judy_alloc( Judy *judy, uint type ) { uint amt, idx, min; JudySeg *seg; void **block, **rtn; if( !judy->seg ) return NULL; if( type == JUDY_radix ) type = JUDY_radix_equiv; if( type == JUDY_span ) type = JUDY_span_equiv; amt = JudySize[type]; if( amt & 0x07 ) amt |= 0x07, amt += 1; // see if free block is already available if( (block = judy->reuse[type]) ) { judy->reuse[type] = *block; memset (block, 0, amt); return (void *)block; } // break down available larger block for reuse into smaller blocks if( type >= JUDY_1 ) for( idx = type; idx++ < JUDY_max; ) if( block = judy->reuse[idx] ) { judy->reuse[idx] = *block; while( idx-- > type) { judy->reuse[idx] = block + JudySize[idx] / sizeof(void *); block[JudySize[idx] / sizeof(void *)] = 0; } memset (block, 0, amt); return (void *)block; } min = amt < JUDY_cache_line ? JUDY_cache_line : amt; if( judy->seg->next < min + sizeof(*seg) ) { if( (seg = malloc (JUDY_seg)) ) { seg->next = JUDY_seg; seg->seg = judy->seg; judy->seg = seg; seg->next -= (JudySlot)seg & (JUDY_cache_line - 1); } else { return NULL; } } // generate additional free blocks to fill up to cache line size rtn = (void **)((uchar *)judy->seg + judy->seg->next - amt); for( idx = type; amt & (JUDY_cache_line - 1); amt <<= 1 ) { block = (void **)((uchar *)judy->seg + judy->seg->next - 2 * amt); judy->reuse[idx++] = block; *block = 0; } judy->seg->next -= amt; memset (rtn, 0, JudySize[type]); return (void *)rtn; } __forceinline void *judy_data( Judy *judy, uint amt ) { JudySeg *seg; void *block; if( !judy->seg ) return NULL; if( amt & (JUDY_cache_line - 1)) amt |= (JUDY_cache_line - 1), amt += 1; if( judy->seg->next < amt + sizeof(*seg) ) { if( (seg = malloc (JUDY_seg)) ) { seg->next = JUDY_seg; seg->seg = judy->seg; judy->seg = seg; seg->next -= (JudySlot)seg & (JUDY_cache_line - 1); } else { return NULL; } } judy->seg->next -= amt; block = (void *)((uchar *)judy->seg + judy->seg->next); memset (block, 0, amt); return block; } __forceinline void *judy_clone( Judy *judy ) { Judy *clone; uint amt; amt = sizeof(Judy) + judy->max * sizeof(JudyStack); clone = judy_data (judy, amt); memcpy (clone, judy, amt); clone->seg = NULL; // stop allocations from cloned array return clone; } __forceinline void judy_free( Judy *judy, void *block, int type ) { if( type == JUDY_radix ) type = JUDY_radix_equiv; if( type == JUDY_span ) type = JUDY_span_equiv; *((void **)(block)) = judy->reuse[type]; judy->reuse[type] = (void **)block; return; } // assemble key from current path __forceinline int judy_key( Judy *judy, uchar *buff, uint max ) { judyvalue *dest = (judyvalue *)buff; uint len = 0, idx = 0, depth; int slot, off, type; judyvalue value; uchar *base; int keysize; if( judy->depth ) max = judy->depth * JUDY_key_size; else max--; // leave room for zero terminator while( len < max && ++idx <= judy->level ) { type = judy->stack[idx].next & 0x07; slot = judy->stack[idx].slot; depth = len / JUDY_key_size; if( judy->depth && !(len & JUDY_key_mask) ) dest[depth] = 0; switch( type ) { case JUDY_1: case JUDY_2: case JUDY_4: case JUDY_8: case JUDY_16: case JUDY_32: keysize = JUDY_key_size - ( judy->stack[idx].off & JUDY_key_mask ); base = (uchar *)( judy->stack[idx].next & JUDY_mask ); if( judy->depth ) { value = *(judyvalue *)(base + slot * keysize); value &= JudyMask[keysize]; dest[depth++] |= value; len += keysize; if( depth < judy->depth ) continue; return len; } off = keysize; while( off-- && len < max ) if( buff[len] = base[slot * keysize + off] ) len++; else break; continue; case JUDY_radix: if( judy->depth ) { dest[depth] |= (judyvalue)slot << (JUDY_key_size - (++len & JUDY_key_mask)) * 8; if( !(len & JUDY_key_mask) ) depth++; if( depth < judy->depth ) continue; return len; } if( !slot ) break; buff[len++] = (uchar)slot; continue; } } buff[len] = 0; return len; } // find slot & setup cursor __forceinline JudySlot *judy_slot( Judy *judy, uchar *buff, uint max ) { judyvalue *src = (judyvalue *)buff, value, test = 0; JudySlot next = *judy->root, *table, *node; int slot, size, keysize, tst, cnt; uint depth = 0, off = 0; uchar *base; judy->level = 0; while( next ) { if( judy->level < judy->max ) judy->level++; judy->stack[judy->level].next = next; judy->stack[judy->level].off = off; size = JudySize[next & 0x07]; switch( next & 0x07 ) { case JUDY_1: case JUDY_2: case JUDY_4: case JUDY_8: case JUDY_16: case JUDY_32: base = (uchar *)(next & JUDY_mask); node = (JudySlot *)((next & JUDY_mask) + size); keysize = JUDY_key_size - (off & JUDY_key_mask); cnt = size / (sizeof(JudySlot) + keysize); slot = cnt; value = 0; if( judy->depth ) { value = src[depth++]; off |= JUDY_key_mask; off++; value &= JudyMask[keysize]; } else do { value <<= 8; if( off < max ) value |= buff[off]; } while( ++off & JUDY_key_mask ); // find slot > key while( slot-- ) { test = *(judyvalue *)(base + slot * keysize); test &= JudyMask[keysize]; if( test <= value ) break; } judy->stack[judy->level].slot = slot; if( test == value ) { // is this a leaf? if( !judy->depth && !(value & 0xFF) || judy->depth && depth == judy->depth ) return &node[-slot-1]; next = node[-slot-1]; continue; } return NULL; case JUDY_radix: table = (JudySlot *)(next & JUDY_mask); // outer radix if( judy->depth ) slot = (src[depth] >> ((JUDY_key_size - ++off & JUDY_key_mask) * 8)) & 0xff; else if( off < max ) slot = buff[off++]; else slot = 0; // put radix slot on judy stack judy->stack[judy->level].slot = slot; if( (next = table[slot >> 4]) ) table = (JudySlot *)(next & JUDY_mask); // inner radix else return NULL; if( judy->depth && !(off & JUDY_key_mask) ) depth++; if( !judy->depth && !slot || judy->depth && depth == judy->depth ) // leaf? if( table[slot & 0x0F] ) return &table[slot & 0x0F]; // occupied? else return NULL; next = table[slot & 0x0F]; continue; case JUDY_span: node = (JudySlot *)((next & JUDY_mask) + JudySize[JUDY_span]); base = (uchar *)(next & JUDY_mask); cnt = tst = JUDY_span_bytes; if( tst > (int)(max - off) ) tst = max - off; value = strncmp((const char *)base, (const char *)(buff + off), tst); if( !value && tst < cnt && !base[tst] ) return &node[-1]; // leaf? if( !value && tst == cnt ) { next = node[-1]; off += cnt; continue; } return NULL; } } return NULL; } // promote full nodes to next larger size __forceinline JudySlot *judy_promote( Judy *judy, JudySlot *next, int idx, judyvalue value, int keysize ) { uchar *base = (uchar *)(*next & JUDY_mask); int oldcnt, newcnt, slot; JudySlot *newnode, *node, *result; uchar *newbase; uint type; type = (*next & 0x07) + 1; node = (JudySlot *)((*next & JUDY_mask) + JudySize[type-1]); oldcnt = JudySize[type-1] / (sizeof(JudySlot) + keysize); newcnt = JudySize[type] / (sizeof(JudySlot) + keysize); // promote node to next larger size newbase = judy_alloc (judy, type); newnode = (JudySlot *)(newbase + JudySize[type]); *next = (JudySlot)newbase | type; // open up slot at idx memcpy(newbase + (newcnt - oldcnt - 1) * keysize, base, idx * keysize); // copy keys for( slot = 0; slot < idx; slot++ ) newnode[-(slot + newcnt - oldcnt)] = node[-(slot + 1)]; // copy ptr // fill in new node memcpy(newbase + (idx + newcnt - oldcnt - 1) * keysize, &value, keysize); // copy key result = &newnode[-(idx + newcnt - oldcnt)]; // copy rest of old node memcpy(newbase + (idx + newcnt - oldcnt) * keysize, base + (idx * keysize), (oldcnt - slot) * keysize); // copy keys for( ; slot < oldcnt; slot++ ) newnode[-(slot + newcnt - oldcnt + 1)] = node[-(slot + 1)]; // copy ptr judy->stack[judy->level].next = *next; judy->stack[judy->level].slot = idx + newcnt - oldcnt - 1; judy_free (judy, (void **)base, type - 1); return result; } // construct new node for JUDY_radix entry make node with slot - start entries moving key over one offset __forceinline void judy_radix( Judy *judy, JudySlot *radix, uchar *old, int start, int slot, int keysize, uchar key, uint depth ) { int size, idx, cnt = slot - start, newcnt; JudySlot *node, *oldnode; uint type = JUDY_1 - 1; JudySlot *table; uchar *base; // if necessary, setup inner radix node if( !(table = (JudySlot *)(radix[key >> 4] & JUDY_mask)) ) { table = judy_alloc (judy, JUDY_radix); radix[key >> 4] = (JudySlot)table | JUDY_radix; } oldnode = (JudySlot *)(old + JudySize[JUDY_max]); // is this slot a leaf? if( !judy->depth && (!key || !keysize) || judy->depth && !keysize && depth == judy->depth) { table[key & 0x0F] = oldnode[-start-1]; return; } // calculate new node big enough to contain slots do { type++; size = JudySize[type]; newcnt = size / (sizeof(JudySlot) + keysize); } while( cnt > newcnt && type < JUDY_max ); // store new node pointer in inner table base = judy_alloc (judy, type); node = (JudySlot *)(base + size); table[key & 0x0F] = (JudySlot)base | type; // allocate node and copy old contents shorten keys by 1 byte during copy for( idx = 0; idx < cnt; idx++ ) { memcpy (base + (newcnt - idx - 1) * keysize, old + (start + cnt - idx - 1) * (keysize + 1), keysize); node[-(newcnt - idx)] = oldnode[-(start + cnt - idx)]; } } // decompose full node to radix nodes __forceinline void judy_splitnode( Judy *judy, JudySlot *next, uint size, uint keysize, uint depth ) { int cnt, slot, start = 0; uint key = 0x0100, nxt; JudySlot *newradix; uchar *base; base = (uchar *)(*next & JUDY_mask); cnt = size / (sizeof(JudySlot) + keysize); // allocate outer judy_radix node newradix = judy_alloc (judy, JUDY_radix); *next = (JudySlot)newradix | JUDY_radix; for( slot = 0; slot < cnt; slot++ ) { nxt = base[slot * keysize + keysize - 1]; if( key > 0xFF ) key = nxt; if( nxt == key ) continue; // decompose portion of old node into radix nodes judy_radix (judy, newradix, base, start, slot, keysize - 1, (uchar)key, depth); start = slot; key = nxt; } judy_radix (judy, newradix, base, start, slot, keysize - 1, (uchar)key, depth); judy_free (judy, (void **)base, JUDY_max); } // return first leaf __forceinline JudySlot *judy_first( Judy *judy, JudySlot next, uint off, uint depth ) { JudySlot *table, *inner, *node; uint keysize, size; int slot, cnt; uchar *base; while( next ) { if( judy->level < judy->max ) judy->level++; judy->stack[judy->level].off = off; judy->stack[judy->level].next = next; size = JudySize[next & 0x07]; switch( next & 0x07 ) { case JUDY_1: case JUDY_2: case JUDY_4: case JUDY_8: case JUDY_16: case JUDY_32: keysize = JUDY_key_size - (off & JUDY_key_mask); node = (JudySlot *)((next & JUDY_mask) + size); base = (uchar *)(next & JUDY_mask); cnt = size / (sizeof(JudySlot) + keysize); for( slot = 0; slot < cnt; slot++ ) if( node[-slot-1] ) break; judy->stack[judy->level].slot = slot; if( !judy->depth && !base[slot * keysize] || judy->depth && ++depth == judy->depth ) return &node[-slot-1]; next = node[-slot - 1]; off = (off | JUDY_key_mask) + 1; continue; case JUDY_radix: off++; if( judy->depth && !(off & JUDY_key_mask) ) depth++; table = (JudySlot *)(next & JUDY_mask); for( slot = 0; slot < 256; slot++ ) if( (inner = (JudySlot *)(table[slot >> 4] & JUDY_mask)) ) { if( (next = inner[slot & 0x0F]) ) { judy->stack[judy->level].slot = slot; if( !judy->depth && !slot || judy->depth && depth == judy->depth ) return &inner[slot & 0x0F]; else break; } } else slot |= 0x0F; continue; case JUDY_span: node = (JudySlot *)((next & JUDY_mask) + JudySize[JUDY_span]); base = (uchar *)(next & JUDY_mask); cnt = JUDY_span_bytes; if( !base[cnt - 1] ) // leaf node? return &node[-1]; next = node[-1]; off += cnt; continue; } } return NULL; } // return last leaf cell pointer __forceinline JudySlot *judy_last( Judy *judy, JudySlot next, uint off, uint depth ) { JudySlot *table, *inner, *node; uint keysize, size; int slot, cnt; uchar *base; while( next ) { if( judy->level < judy->max ) judy->level++; judy->stack[judy->level].next = next; judy->stack[judy->level].off = off; size = JudySize[next & 0x07]; switch( next & 0x07 ) { case JUDY_1: case JUDY_2: case JUDY_4: case JUDY_8: case JUDY_16: case JUDY_32: keysize = JUDY_key_size - (off & JUDY_key_mask); slot = size / (sizeof(JudySlot) + keysize); base = (uchar *)(next & JUDY_mask); node = (JudySlot *)((next & JUDY_mask) + size); judy->stack[judy->level].slot = --slot; if( !judy->depth && !base[slot * keysize] || judy->depth && ++depth == judy->depth ) return &node[-slot-1]; next = node[-slot-1]; off += keysize; continue; case JUDY_radix: table = (JudySlot *)(next & JUDY_mask); off++; if( judy->depth && !(off & JUDY_key_mask) ) depth++; for( slot = 256; slot--; ) { judy->stack[judy->level].slot = slot; if( (inner = (JudySlot *)(table[slot >> 4] & JUDY_mask)) ) { if( (next = inner[slot & 0x0F]) ) if( !judy->depth && !slot || judy->depth && depth == judy->depth ) return &inner[0]; else break; } else slot &= 0xF0; } continue; case JUDY_span: node = (JudySlot *)((next & JUDY_mask) + JudySize[JUDY_span]); base = (uchar *)(next & JUDY_mask); cnt = JUDY_span_bytes; if( !base[cnt - 1] ) // leaf node? return &node[-1]; next = node[-1]; off += cnt; continue; } } return NULL; } // judy_end: return last entry __forceinline JudySlot *judy_end( Judy *judy ) { judy->level = 0; return judy_last( judy, *judy->root, 0, 0 ); } // judy_nxt: return next entry __forceinline JudySlot *judy_nxt( Judy *judy ) { JudySlot *table, *inner, *node, next; int slot, size, cnt; uint keysize, depth, off; uchar *base; if( !judy->level ) return judy_first (judy, *judy->root, 0, 0); while( judy->level ) { next = judy->stack[judy->level].next; slot = judy->stack[judy->level].slot; off = judy->stack[judy->level].off; keysize = JUDY_key_size - (off & JUDY_key_mask); size = JudySize[next & 0x07]; depth = off / JUDY_key_size; switch( next & 0x07 ) { case JUDY_1: case JUDY_2: case JUDY_4: case JUDY_8: case JUDY_16: case JUDY_32: cnt = size / (sizeof(JudySlot) + keysize); node = (JudySlot *)((next & JUDY_mask) + size); base = (uchar *)(next & JUDY_mask); if( ++slot < cnt ) if( !judy->depth && !base[slot * keysize] || judy->depth && ++depth == judy->depth ) { judy->stack[judy->level].slot = slot; return &node[-slot - 1]; } else { judy->stack[judy->level].slot = slot; return judy_first (judy, node[-slot-1], (off | JUDY_key_mask) + 1, depth); } judy->level--; continue; case JUDY_radix: table = (JudySlot *)(next & JUDY_mask); if( judy->depth && !((off+1) & JUDY_key_mask) ) depth++; while( ++slot < 256 ) if( (inner = (JudySlot *)(table[slot >> 4] & JUDY_mask)) ) { if( inner[slot & 0x0F] ) { judy->stack[judy->level].slot = slot; if( !judy->depth || depth < judy->depth ) return judy_first(judy, inner[slot & 0x0F], off + 1, depth); return &inner[slot & 0x0F]; } } else slot |= 0x0F; judy->level--; continue; case JUDY_span: judy->level--; continue; } } return NULL; } // judy_prv: return ptr to previous entry __forceinline JudySlot *judy_prv( Judy *judy ) { int slot, size, keysize; JudySlot *table, *inner, *node, next; uint depth, off; uchar *base; if( !judy->level ) return judy_last (judy, *judy->root, 0, 0); while( judy->level ) { next = judy->stack[judy->level].next; slot = judy->stack[judy->level].slot; off = judy->stack[judy->level].off; size = JudySize[next & 0x07]; depth = off / JUDY_key_size; switch( next & 0x07 ) { case JUDY_1: case JUDY_2: case JUDY_4: case JUDY_8: case JUDY_16: case JUDY_32: node = (JudySlot *)((next & JUDY_mask) + size); if( !slot || !node[-slot] ) { judy->level--; continue; } base = (uchar *)(next & JUDY_mask); judy->stack[judy->level].slot--; keysize = JUDY_key_size - (off & JUDY_key_mask); if( !judy->depth && !base[(slot - 1) * keysize] || judy->depth && ++depth == judy->depth ) return &node[-slot]; return judy_last (judy, node[-slot], (off | JUDY_key_mask) + 1, depth); case JUDY_radix: table = (JudySlot *)(next & JUDY_mask); if( judy->depth && !((off + 1) & JUDY_key_mask) ) depth++; while( slot-- ) { judy->stack[judy->level].slot--; if( (inner = (JudySlot *)(table[slot >> 4] & JUDY_mask)) ) if( inner[slot & 0x0F] ) if( !judy->depth && !slot || judy->depth && depth == judy->depth ) return &inner[0]; else return judy_last(judy, inner[slot & 0x0F], off + 1, depth); } judy->level--; continue; case JUDY_span: judy->level--; continue; } } return NULL; } // judy_del: delete string from judy array returning previous entry. __forceinline JudySlot *judy_del ( Judy *judy ) { int slot, off, size, type, high; JudySlot *table, *inner, *node, next; int keysize, cnt; uchar *base; while( judy->level ) { next = judy->stack[judy->level].next; slot = judy->stack[judy->level].slot; off = judy->stack[judy->level].off; size = JudySize[next & 0x07]; switch( type = next & 0x07 ) { case JUDY_1: case JUDY_2: case JUDY_4: case JUDY_8: case JUDY_16: case JUDY_32: keysize = JUDY_key_size - (off & JUDY_key_mask); cnt = size / (sizeof(JudySlot) + keysize); node = (JudySlot *)((next & JUDY_mask) + size); base = (uchar *)(next & JUDY_mask); // move deleted slot to first slot while( slot ) { node[-slot-1] = node[-slot]; memcpy (base + slot * keysize, base + (slot - 1) * keysize, keysize); slot--; } // zero out first slot node[-1] = 0; memset (base, 0, keysize); if( node[-cnt] ) { // does node have any slots left? judy->stack[judy->level].slot++; return judy_prv (judy); } judy_free (judy, base, type); judy->level--; continue; case JUDY_radix: table = (JudySlot *)(next & JUDY_mask); inner = (JudySlot *)(table[slot >> 4] & JUDY_mask); inner[slot & 0x0F] = 0; high = slot & 0xF0; for( cnt = 16; cnt--; ) if( inner[cnt] ) return judy_prv (judy); judy_free (judy, inner, JUDY_radix); table[slot >> 4] = 0; for( cnt = 16; cnt--; ) if( table[cnt] ) return judy_prv (judy); judy_free (judy, table, JUDY_radix); judy->level--; continue; case JUDY_span: base = (uchar *)(next & JUDY_mask); judy_free (judy, base, type); judy->level--; continue; } } // tree is now empty *judy->root = 0; return NULL; } // return cell for first key greater than or equal to given key __forceinline JudySlot *judy_strt( Judy *judy, uchar *buff, uint max ) { JudySlot *cell; judy->level = 0; if( !max ) return judy_first( judy, *judy->root, 0, 0 ); if( (cell = judy_slot( judy, buff, max)) ) return cell; return judy_nxt( judy ); } // split open span node __forceinline void judy_splitspan (Judy *judy, JudySlot *next, uchar *base) { JudySlot *node = (JudySlot *)(base + JudySize[JUDY_span]); uint cnt = JUDY_span_bytes, off = 0; uchar *newbase; int i; do { newbase = judy_alloc (judy, JUDY_1); *next = (JudySlot)newbase | JUDY_1; i = JUDY_key_size; while( i-- ) *newbase++ = base[off + i]; next = (JudySlot *)newbase; off += JUDY_key_size; cnt -= JUDY_key_size; } while( cnt && base[off - 1] ); *next = node[-1]; judy_free( judy, base, JUDY_span ); } // judy_cell: add string to judy array __forceinline JudySlot *judy_cell( Judy *judy, uchar *buff, uint max ) { judyvalue *src = (judyvalue *)buff, test, value; int size, idx, slot, cnt, tst; JudySlot *next = judy->root, *table, *node; uint off = 0, start, depth = 0, keysize; uchar *base; judy->level = 0; while( *next ) { if( judy->level < judy->max ) judy->level++; judy->stack[judy->level].next = *next; judy->stack[judy->level].off = off; switch( *next & 0x07 ) { default: size = JudySize[*next & 0x07]; keysize = JUDY_key_size - (off & JUDY_key_mask); cnt = size / (sizeof(JudySlot) + keysize); base = (uchar *)(*next & JUDY_mask); node = (JudySlot *)((*next & JUDY_mask) + size); start = off; slot = cnt; value = 0; if( judy->depth ) { value = src[depth++]; off |= JUDY_key_mask; off++; value &= JudyMask[keysize]; } else do { value <<= 8; if( off < max ) value |= buff[off]; } while( ++off & JUDY_key_mask ); // find slot > key while( slot-- ) { test = *(judyvalue *)(base + slot * keysize); test &= JudyMask[keysize]; if( test <= value ) break; } judy->stack[judy->level].slot = slot; if( test == value ) { // new key is equal to slot key next = &node[-slot-1]; if( !judy->depth && !(value & 0xFF) || judy->depth && depth == judy->depth ) return next; // is this a leaf? continue; } // if this node is not full open up cell after slot if( !node[-1] ) { memmove(base, base + keysize, slot * keysize); // move keys less than new key down one slot memcpy(base + slot * keysize, &value, keysize); // copy new key into slot for( idx = 0; idx < slot; idx++ ) node[-idx-1] = node[-idx-2];// copy tree ptrs/cells down one slot node[-slot-1] = 0; // set new tree ptr/cell next = &node[-slot-1]; if( !judy->depth && !(value & 0xFF) || judy->depth && depth == judy->depth ) return JUDY_key_mask); cnt = size / (sizeof(JudySlot) + keysize); node = (JudySlot *)((next nCheck memory: ) return-1level--; continue; } base = (uchar *)(next judy_prv (judy); judy_free (judy, inner, JUDY_radix); tableext; continue; } if( size < JudySize[JUDY_max] ) { next = judy_promote (judy, next, slot+1, value, keysize); if( !judy->depth && !(value & 0xFF) || judy->depth && depth == judy->depth ) return next; continue; } // split full maximal node into JUDY_radix nodes loop to reprocess new insert judy_splitnode (judy, next, size, keysize, depth); judy->level--; off = start; if( judy->depth ) depth--; continue; case JUDY_radix: table = (JudySlot *)(*next & JUDY_mask); // outer radix if( judy->depth ) slot = (src[depth] >> ((JUDY_key_size - ++off & JUDY_key_mask) * 8)) & 0xff; else if( off < max ) slot = buff[off++]; else slot = 0, off++; if( judy->depth && !(off & JUDY_key_mask) ) depth++; // allocate inner radix if empty if( !table[slot >> 4] ) table[slot >> 4] = (JudySlot)judy_alloc (judy, JUDY_radix) | JUDY_radix; table = (JudySlot *)(table[slot >> 4] & JUDY_mask); judy->stack[judy->level].slot = slot; next = &table[slot & 0x0F]; if( !judy->depth && !slot || judy->depth && depth == judy->depth ) return next; // leaf? continue; case JUDY_span: base = (uchar *)(*next & JUDY_mask); node = (JudySlot *)((*next & JUDY_mask) + JudySize[JUDY_span]); cnt = JUDY_span_bytes; tst = cnt; if( tst > (int)(max - off) ) tst = max - off; value = strncmp((const char *)base, (const char *)(buff + off), tst); if( !value && tst < cnt && !base[tst] ) return &node[-1]; if( !value && tst == cnt ) { next = &node[-1]; off += cnt; continue; } // bust up JUDY_span node and produce JUDY_1 nodes then loop to reprocess insert judy_splitspan (judy, next, base); judy->level--; continue; } } // place JUDY_1 node under JUDY_radix node(s) if( off & JUDY_key_mask ) if( judy->depth || off <= max ) { base = judy_alloc (judy, JUDY_1); keysize = JUDY_key_size - (off & JUDY_key_mask); node = (JudySlot *)(base + JudySize[JUDY_1]); *next = (JudySlot)base | JUDY_1; // fill in slot 0 with bytes of key if( judy->depth ) { value = src[depth]; memcpy(base, &value, keysize); // copy new key into slot } else { while( keysize ) if( off + keysize <= max ) *base++ = buff[off + --keysize]; else base++, --keysize; } if( judy->level < judy->max ) judy->level++; judy->stack[judy->level].next = *next; judy->stack[judy->level].slot = 0; judy->stack[judy->level].off = off; next = &node[-1]; off |= JUDY_key_mask; depth++; off++; } // produce span nodes to consume rest of key or judy_1 nodes if not string tree if( !judy->depth ) while( off <= max ) { base = judy_alloc (judy, JUDY_span); *next = (JudySlot)base | JUDY_span; node = (JudySlot *)(base + JudySize[JUDY_span]); cnt = tst = JUDY_span_bytes; if( tst > (int)(max - off) ) tst = max - off; memcpy (base, buff + off, tst); if( judy->level < judy->max ) judy->level++; judy->stack[judy->level].next = *next; judy->stack[judy->level].slot = 0; judy->stack[judy->level].off = off; next = &node[-1]; off += tst; depth++; if( !base[cnt-1] ) break; } else while( depth < judy->depth ) { base = judy_alloc (judy, JUDY_1); node = (JudySlot *)(base + JudySize[JUDY_1]); *next = (JudySlot)base | JUDY_1; // fill in slot 0 with bytes of key *(judyvalue *)base = src[depth]; if( judy->level < judy->max ) judy->level++; judy->stack[judy->level].next = *next; judy->stack[judy->level].slot = 0; judy->stack[judy->level].off = off; next = &node[-1]; off |= JUDY_key_mask; depth++; off++; } return next; } __forceinline char *odo( char *s ) { I32 wheel = (I32)strlen( s )-1; while( s[ wheel ] == 'z' && wheel >= 0 ) s[ wheel-- ] = 'a'; return wheel < 0 ? NULL : ( ++s[ wheel ], s ); } END_C package main; use Time::HiRes qw[ time ]; use Data::Dump qw[ pp ]; sub mem{ `tasklist /nh /fi "PID eq $$"` =~ m[(\S+ K)$] } our $ODO //= 'aa'; my $addrByName = Judy::new( length( $ODO ), 0 );; my $nameByAddr = Judy::new( 0, 1 ); print 'Memory before building Judy: ', mem; my $n = 1; do { $addrByName->addNum( $ODO, length( $ODO ), $n ); $nameByAddr->addStr( $n++, $ODO ); my $gotNum = $addrByName->getNum( $ODO, length( $ODO ) ); } while Judy::odo( $ODO ); print 'Memory after building Judy: ', mem; mem; my $start = time; $n = 1; do { my $gotNum = $addrByName->getNum( $ODO, length( $ODO ) ); my $gotName = $nameByAddr->getStr( $n++); die "'$gotName' ne '$ODO'" unless $gotName eq $ODO; } while Judy::odo( $ODO ); printf "Bidi lookups of %u pairs took:%.9f seconds\n", $n-1, time() - $start; print 'Memory after lookups: ', mem; <>; #### C:\test\Judy>perl Judy.pm -ODO=aaaaa Memory before building Judy: 10,760 K Memory after building Judy: 347,660 K Bidi lookups of 11881376 pairs took:25.197204113 seconds Memory after lookups: 347,680 K