fixing hash function to do a better job on long lists.
This commit is contained in:
parent
bfbbf051c9
commit
ff650e3049
|
@ -259,9 +259,10 @@ value_t equal(value_t a, value_t b)
|
|||
#define doublehash(a) int64to32hash(a)
|
||||
#endif
|
||||
|
||||
// *ut means we had to start using the table
|
||||
static uptrint_t bounded_hash(value_t a, int bound, int *ut)
|
||||
// *oob: output argument, means we hit the limit specified by 'bound'
|
||||
static uptrint_t bounded_hash(value_t a, int bound, int *oob)
|
||||
{
|
||||
*oob = 0;
|
||||
double d;
|
||||
numerictype_t nt;
|
||||
size_t i, len;
|
||||
|
@ -269,12 +270,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
|
|||
cprim_t *cp;
|
||||
void *data;
|
||||
uptrint_t h = 0;
|
||||
if (*ut) {
|
||||
h = (uptrint_t)ptrhash_get(&equal_eq_hashtable, (void*)a);
|
||||
if (h != (uptrint_t)HT_NOTFOUND)
|
||||
return h;
|
||||
}
|
||||
int tg = tag(a);
|
||||
int oob2, tg = tag(a);
|
||||
switch(tg) {
|
||||
case TAG_NUM :
|
||||
case TAG_NUM1:
|
||||
|
@ -282,7 +278,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
|
|||
return doublehash(*(int64_t*)&d);
|
||||
case TAG_FUNCTION:
|
||||
if (uintval(a) > N_BUILTINS)
|
||||
return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut);
|
||||
return bounded_hash(((function_t*)ptr(a))->bcode, bound, oob);
|
||||
return inthash(a);
|
||||
case TAG_SYM:
|
||||
return ((symbol_t*)ptr(a))->hash;
|
||||
|
@ -296,39 +292,39 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
|
|||
cv = (cvalue_t*)ptr(a);
|
||||
data = cv_data(cv);
|
||||
return memhash(data, cv_len(cv));
|
||||
|
||||
case TAG_VECTOR:
|
||||
if (bound <= 0) {
|
||||
h = ++(*ut) + (uptrint_t)HT_NOTFOUND;
|
||||
ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
|
||||
return h;
|
||||
*oob = 1;
|
||||
return 1;
|
||||
}
|
||||
len = vector_size(a);
|
||||
for(i=0; i < len; i++) {
|
||||
h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1);
|
||||
h = MIX(h, bounded_hash(vector_elt(a,i), bound/2, &oob2)+1);
|
||||
if (oob2)
|
||||
bound/=2;
|
||||
*oob = *oob || oob2;
|
||||
}
|
||||
return h;
|
||||
|
||||
case TAG_CONS:
|
||||
if (bound <= 0)
|
||||
if (bound <= 0) {
|
||||
*oob = 1;
|
||||
return 1;
|
||||
return MIX(bounded_hash(car_(a), bound/2, ut),
|
||||
bounded_hash(cdr_(a), bound/2, ut)+2);
|
||||
// this should be able to hash long lists with greater fidelity,
|
||||
// but it does not work yet.
|
||||
/*
|
||||
first = a;
|
||||
bb = BOUNDED_HASH_BOUND;
|
||||
do {
|
||||
h = MIX(h, bounded_hash(car_(a), bound-1, ut));
|
||||
a = cdr_(a);
|
||||
bb--;
|
||||
if (bb <= 0) {
|
||||
*ut = 1;
|
||||
ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h);
|
||||
return h;
|
||||
}
|
||||
} while (iscons(a));
|
||||
return MIX(h, bounded_hash(a, bound-1, ut));
|
||||
*/
|
||||
}
|
||||
h = bounded_hash(car_(a), bound/2, oob);
|
||||
// bounds balancing: try to share the bounds efficiently
|
||||
// between the car and cdr so we can hash better when a list is
|
||||
// car-shallow and cdr-deep (a common case) or vice-versa.
|
||||
if (*oob)
|
||||
bound/=2;
|
||||
else
|
||||
bound--;
|
||||
h = MIX(h, bounded_hash(cdr_(a), bound, &oob2)+2);
|
||||
// recursive OOB propagation. otherwise this case is slow:
|
||||
// (hash '#2=('#0=(#1=(#1#) . #0#) . #2#))
|
||||
*oob = *oob || oob2;
|
||||
return h;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -342,10 +338,8 @@ int equal_lispvalue(value_t a, value_t b)
|
|||
|
||||
uptrint_t hash_lispvalue(value_t a)
|
||||
{
|
||||
int ut=0;
|
||||
uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut);
|
||||
if (ut)
|
||||
htable_reset(&equal_eq_hashtable, 512);
|
||||
int oob=0;
|
||||
uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &oob);
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
|
@ -154,5 +154,17 @@
|
|||
(hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
|
||||
(hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
|
||||
|
||||
(assert (equal? (hash '#0=(1 . #0#))
|
||||
(hash '#1=(1 1 . #1#))))
|
||||
|
||||
(assert (not (equal? (hash '#0=(1 1 . #0#))
|
||||
(hash '#1=(1 #0# . #1#)))))
|
||||
|
||||
(assert (not (equal? (hash (iota 10))
|
||||
(hash (iota 20)))))
|
||||
|
||||
(assert (not (equal? (hash (iota 41))
|
||||
(hash (iota 42)))))
|
||||
|
||||
(princ "all tests pass\n")
|
||||
#t
|
||||
|
|
Loading…
Reference in New Issue