fixing hash function to do a better job on long lists.

This commit is contained in:
JeffBezanson 2009-05-20 04:30:00 +00:00
parent bfbbf051c9
commit ff650e3049
2 changed files with 43 additions and 37 deletions

View File

@ -259,9 +259,10 @@ value_t equal(value_t a, value_t b)
#define doublehash(a) int64to32hash(a) #define doublehash(a) int64to32hash(a)
#endif #endif
// *ut means we had to start using the table // *oob: output argument, means we hit the limit specified by 'bound'
static uptrint_t bounded_hash(value_t a, int bound, int *ut) static uptrint_t bounded_hash(value_t a, int bound, int *oob)
{ {
*oob = 0;
double d; double d;
numerictype_t nt; numerictype_t nt;
size_t i, len; size_t i, len;
@ -269,12 +270,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
cprim_t *cp; cprim_t *cp;
void *data; void *data;
uptrint_t h = 0; uptrint_t h = 0;
if (*ut) { int oob2, tg = tag(a);
h = (uptrint_t)ptrhash_get(&equal_eq_hashtable, (void*)a);
if (h != (uptrint_t)HT_NOTFOUND)
return h;
}
int tg = tag(a);
switch(tg) { switch(tg) {
case TAG_NUM : case TAG_NUM :
case TAG_NUM1: case TAG_NUM1:
@ -282,7 +278,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
return doublehash(*(int64_t*)&d); return doublehash(*(int64_t*)&d);
case TAG_FUNCTION: case TAG_FUNCTION:
if (uintval(a) > N_BUILTINS) if (uintval(a) > N_BUILTINS)
return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut); return bounded_hash(((function_t*)ptr(a))->bcode, bound, oob);
return inthash(a); return inthash(a);
case TAG_SYM: case TAG_SYM:
return ((symbol_t*)ptr(a))->hash; return ((symbol_t*)ptr(a))->hash;
@ -296,39 +292,39 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
cv = (cvalue_t*)ptr(a); cv = (cvalue_t*)ptr(a);
data = cv_data(cv); data = cv_data(cv);
return memhash(data, cv_len(cv)); return memhash(data, cv_len(cv));
case TAG_VECTOR: case TAG_VECTOR:
if (bound <= 0) { if (bound <= 0) {
h = ++(*ut) + (uptrint_t)HT_NOTFOUND; *oob = 1;
ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h); return 1;
return h;
} }
len = vector_size(a); len = vector_size(a);
for(i=0; i < len; i++) { for(i=0; i < len; i++) {
h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1); h = MIX(h, bounded_hash(vector_elt(a,i), bound/2, &oob2)+1);
if (oob2)
bound/=2;
*oob = *oob || oob2;
} }
return h; return h;
case TAG_CONS: case TAG_CONS:
if (bound <= 0) if (bound <= 0) {
*oob = 1;
return 1; return 1;
return MIX(bounded_hash(car_(a), bound/2, ut), }
bounded_hash(cdr_(a), bound/2, ut)+2); h = bounded_hash(car_(a), bound/2, oob);
// this should be able to hash long lists with greater fidelity, // bounds balancing: try to share the bounds efficiently
// but it does not work yet. // between the car and cdr so we can hash better when a list is
/* // car-shallow and cdr-deep (a common case) or vice-versa.
first = a; if (*oob)
bb = BOUNDED_HASH_BOUND; bound/=2;
do { else
h = MIX(h, bounded_hash(car_(a), bound-1, ut)); bound--;
a = cdr_(a); h = MIX(h, bounded_hash(cdr_(a), bound, &oob2)+2);
bb--; // recursive OOB propagation. otherwise this case is slow:
if (bb <= 0) { // (hash '#2=('#0=(#1=(#1#) . #0#) . #2#))
*ut = 1; *oob = *oob || oob2;
ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h); return h;
return h;
}
} while (iscons(a));
return MIX(h, bounded_hash(a, bound-1, ut));
*/
} }
return 0; return 0;
} }
@ -342,10 +338,8 @@ int equal_lispvalue(value_t a, value_t b)
uptrint_t hash_lispvalue(value_t a) uptrint_t hash_lispvalue(value_t a)
{ {
int ut=0; int oob=0;
uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut); uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &oob);
if (ut)
htable_reset(&equal_eq_hashtable, 512);
return n; return n;
} }

View File

@ -154,5 +154,17 @@
(hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3]) (hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
(hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3])))) (hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
(assert (equal? (hash '#0=(1 . #0#))
(hash '#1=(1 1 . #1#))))
(assert (not (equal? (hash '#0=(1 1 . #0#))
(hash '#1=(1 #0# . #1#)))))
(assert (not (equal? (hash (iota 10))
(hash (iota 20)))))
(assert (not (equal? (hash (iota 41))
(hash (iota 42)))))
(princ "all tests pass\n") (princ "all tests pass\n")
#t #t