fixing hash function to do a better job on long lists.
This commit is contained in:
parent
bfbbf051c9
commit
ff650e3049
|
@ -259,9 +259,10 @@ value_t equal(value_t a, value_t b)
|
||||||
#define doublehash(a) int64to32hash(a)
|
#define doublehash(a) int64to32hash(a)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// *ut means we had to start using the table
|
// *oob: output argument, means we hit the limit specified by 'bound'
|
||||||
static uptrint_t bounded_hash(value_t a, int bound, int *ut)
|
static uptrint_t bounded_hash(value_t a, int bound, int *oob)
|
||||||
{
|
{
|
||||||
|
*oob = 0;
|
||||||
double d;
|
double d;
|
||||||
numerictype_t nt;
|
numerictype_t nt;
|
||||||
size_t i, len;
|
size_t i, len;
|
||||||
|
@ -269,12 +270,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
|
||||||
cprim_t *cp;
|
cprim_t *cp;
|
||||||
void *data;
|
void *data;
|
||||||
uptrint_t h = 0;
|
uptrint_t h = 0;
|
||||||
if (*ut) {
|
int oob2, tg = tag(a);
|
||||||
h = (uptrint_t)ptrhash_get(&equal_eq_hashtable, (void*)a);
|
|
||||||
if (h != (uptrint_t)HT_NOTFOUND)
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
int tg = tag(a);
|
|
||||||
switch(tg) {
|
switch(tg) {
|
||||||
case TAG_NUM :
|
case TAG_NUM :
|
||||||
case TAG_NUM1:
|
case TAG_NUM1:
|
||||||
|
@ -282,7 +278,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
|
||||||
return doublehash(*(int64_t*)&d);
|
return doublehash(*(int64_t*)&d);
|
||||||
case TAG_FUNCTION:
|
case TAG_FUNCTION:
|
||||||
if (uintval(a) > N_BUILTINS)
|
if (uintval(a) > N_BUILTINS)
|
||||||
return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut);
|
return bounded_hash(((function_t*)ptr(a))->bcode, bound, oob);
|
||||||
return inthash(a);
|
return inthash(a);
|
||||||
case TAG_SYM:
|
case TAG_SYM:
|
||||||
return ((symbol_t*)ptr(a))->hash;
|
return ((symbol_t*)ptr(a))->hash;
|
||||||
|
@ -296,39 +292,39 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
|
||||||
cv = (cvalue_t*)ptr(a);
|
cv = (cvalue_t*)ptr(a);
|
||||||
data = cv_data(cv);
|
data = cv_data(cv);
|
||||||
return memhash(data, cv_len(cv));
|
return memhash(data, cv_len(cv));
|
||||||
|
|
||||||
case TAG_VECTOR:
|
case TAG_VECTOR:
|
||||||
if (bound <= 0) {
|
if (bound <= 0) {
|
||||||
h = ++(*ut) + (uptrint_t)HT_NOTFOUND;
|
*oob = 1;
|
||||||
ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
|
return 1;
|
||||||
return h;
|
|
||||||
}
|
}
|
||||||
len = vector_size(a);
|
len = vector_size(a);
|
||||||
for(i=0; i < len; i++) {
|
for(i=0; i < len; i++) {
|
||||||
h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1);
|
h = MIX(h, bounded_hash(vector_elt(a,i), bound/2, &oob2)+1);
|
||||||
|
if (oob2)
|
||||||
|
bound/=2;
|
||||||
|
*oob = *oob || oob2;
|
||||||
}
|
}
|
||||||
return h;
|
return h;
|
||||||
|
|
||||||
case TAG_CONS:
|
case TAG_CONS:
|
||||||
if (bound <= 0)
|
if (bound <= 0) {
|
||||||
|
*oob = 1;
|
||||||
return 1;
|
return 1;
|
||||||
return MIX(bounded_hash(car_(a), bound/2, ut),
|
|
||||||
bounded_hash(cdr_(a), bound/2, ut)+2);
|
|
||||||
// this should be able to hash long lists with greater fidelity,
|
|
||||||
// but it does not work yet.
|
|
||||||
/*
|
|
||||||
first = a;
|
|
||||||
bb = BOUNDED_HASH_BOUND;
|
|
||||||
do {
|
|
||||||
h = MIX(h, bounded_hash(car_(a), bound-1, ut));
|
|
||||||
a = cdr_(a);
|
|
||||||
bb--;
|
|
||||||
if (bb <= 0) {
|
|
||||||
*ut = 1;
|
|
||||||
ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h);
|
|
||||||
return h;
|
|
||||||
}
|
}
|
||||||
} while (iscons(a));
|
h = bounded_hash(car_(a), bound/2, oob);
|
||||||
return MIX(h, bounded_hash(a, bound-1, ut));
|
// bounds balancing: try to share the bounds efficiently
|
||||||
*/
|
// between the car and cdr so we can hash better when a list is
|
||||||
|
// car-shallow and cdr-deep (a common case) or vice-versa.
|
||||||
|
if (*oob)
|
||||||
|
bound/=2;
|
||||||
|
else
|
||||||
|
bound--;
|
||||||
|
h = MIX(h, bounded_hash(cdr_(a), bound, &oob2)+2);
|
||||||
|
// recursive OOB propagation. otherwise this case is slow:
|
||||||
|
// (hash '#2=('#0=(#1=(#1#) . #0#) . #2#))
|
||||||
|
*oob = *oob || oob2;
|
||||||
|
return h;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -342,10 +338,8 @@ int equal_lispvalue(value_t a, value_t b)
|
||||||
|
|
||||||
uptrint_t hash_lispvalue(value_t a)
|
uptrint_t hash_lispvalue(value_t a)
|
||||||
{
|
{
|
||||||
int ut=0;
|
int oob=0;
|
||||||
uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut);
|
uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &oob);
|
||||||
if (ut)
|
|
||||||
htable_reset(&equal_eq_hashtable, 512);
|
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -154,5 +154,17 @@
|
||||||
(hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
|
(hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
|
||||||
(hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
|
(hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
|
||||||
|
|
||||||
|
(assert (equal? (hash '#0=(1 . #0#))
|
||||||
|
(hash '#1=(1 1 . #1#))))
|
||||||
|
|
||||||
|
(assert (not (equal? (hash '#0=(1 1 . #0#))
|
||||||
|
(hash '#1=(1 #0# . #1#)))))
|
||||||
|
|
||||||
|
(assert (not (equal? (hash (iota 10))
|
||||||
|
(hash (iota 20)))))
|
||||||
|
|
||||||
|
(assert (not (equal? (hash (iota 41))
|
||||||
|
(hash (iota 42)))))
|
||||||
|
|
||||||
(princ "all tests pass\n")
|
(princ "all tests pass\n")
|
||||||
#t
|
#t
|
||||||
|
|
Loading…
Reference in New Issue