fixing hash function to do a better job on long lists.
This commit is contained in:
		
							parent
							
								
									bfbbf051c9
								
							
						
					
					
						commit
						ff650e3049
					
				| 
						 | 
					@ -259,9 +259,10 @@ value_t equal(value_t a, value_t b)
 | 
				
			||||||
#define doublehash(a) int64to32hash(a)
 | 
					#define doublehash(a) int64to32hash(a)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// *ut means we had to start using the table
 | 
					// *oob: output argument, means we hit the limit specified by 'bound'
 | 
				
			||||||
static uptrint_t bounded_hash(value_t a, int bound, int *ut)
 | 
					static uptrint_t bounded_hash(value_t a, int bound, int *oob)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					    *oob = 0;
 | 
				
			||||||
    double d;
 | 
					    double d;
 | 
				
			||||||
    numerictype_t nt;
 | 
					    numerictype_t nt;
 | 
				
			||||||
    size_t i, len;
 | 
					    size_t i, len;
 | 
				
			||||||
| 
						 | 
					@ -269,12 +270,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
 | 
				
			||||||
    cprim_t *cp;
 | 
					    cprim_t *cp;
 | 
				
			||||||
    void *data;
 | 
					    void *data;
 | 
				
			||||||
    uptrint_t h = 0;
 | 
					    uptrint_t h = 0;
 | 
				
			||||||
    if (*ut) {
 | 
					    int oob2, tg = tag(a);
 | 
				
			||||||
        h = (uptrint_t)ptrhash_get(&equal_eq_hashtable, (void*)a);
 | 
					 | 
				
			||||||
        if (h != (uptrint_t)HT_NOTFOUND)
 | 
					 | 
				
			||||||
            return h;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    int tg = tag(a);
 | 
					 | 
				
			||||||
    switch(tg) {
 | 
					    switch(tg) {
 | 
				
			||||||
    case TAG_NUM :
 | 
					    case TAG_NUM :
 | 
				
			||||||
    case TAG_NUM1:
 | 
					    case TAG_NUM1:
 | 
				
			||||||
| 
						 | 
					@ -282,7 +278,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
 | 
				
			||||||
        return doublehash(*(int64_t*)&d);
 | 
					        return doublehash(*(int64_t*)&d);
 | 
				
			||||||
    case TAG_FUNCTION:
 | 
					    case TAG_FUNCTION:
 | 
				
			||||||
        if (uintval(a) > N_BUILTINS)
 | 
					        if (uintval(a) > N_BUILTINS)
 | 
				
			||||||
            return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut);
 | 
					            return bounded_hash(((function_t*)ptr(a))->bcode, bound, oob);
 | 
				
			||||||
        return inthash(a);
 | 
					        return inthash(a);
 | 
				
			||||||
    case TAG_SYM:
 | 
					    case TAG_SYM:
 | 
				
			||||||
        return ((symbol_t*)ptr(a))->hash;
 | 
					        return ((symbol_t*)ptr(a))->hash;
 | 
				
			||||||
| 
						 | 
					@ -296,39 +292,39 @@ static uptrint_t bounded_hash(value_t a, int bound, int *ut)
 | 
				
			||||||
        cv = (cvalue_t*)ptr(a);
 | 
					        cv = (cvalue_t*)ptr(a);
 | 
				
			||||||
        data = cv_data(cv);
 | 
					        data = cv_data(cv);
 | 
				
			||||||
        return memhash(data, cv_len(cv));
 | 
					        return memhash(data, cv_len(cv));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    case TAG_VECTOR:
 | 
					    case TAG_VECTOR:
 | 
				
			||||||
        if (bound <= 0) {
 | 
					        if (bound <= 0) {
 | 
				
			||||||
            h = ++(*ut) + (uptrint_t)HT_NOTFOUND;
 | 
					            *oob = 1;
 | 
				
			||||||
            ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
 | 
					            return 1;
 | 
				
			||||||
            return h;
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        len = vector_size(a);
 | 
					        len = vector_size(a);
 | 
				
			||||||
        for(i=0; i < len; i++) {
 | 
					        for(i=0; i < len; i++) {
 | 
				
			||||||
            h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1);
 | 
					            h = MIX(h, bounded_hash(vector_elt(a,i), bound/2, &oob2)+1);
 | 
				
			||||||
 | 
					            if (oob2)
 | 
				
			||||||
 | 
					                bound/=2;
 | 
				
			||||||
 | 
					            *oob = *oob || oob2;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return h;
 | 
					        return h;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    case TAG_CONS:
 | 
					    case TAG_CONS:
 | 
				
			||||||
        if (bound <= 0)
 | 
					        if (bound <= 0) {
 | 
				
			||||||
 | 
					            *oob = 1;
 | 
				
			||||||
            return 1;
 | 
					            return 1;
 | 
				
			||||||
        return MIX(bounded_hash(car_(a), bound/2, ut),
 | 
					 | 
				
			||||||
                   bounded_hash(cdr_(a), bound/2, ut)+2);
 | 
					 | 
				
			||||||
        // this should be able to hash long lists with greater fidelity,
 | 
					 | 
				
			||||||
        // but it does not work yet.
 | 
					 | 
				
			||||||
        /*
 | 
					 | 
				
			||||||
        first = a;
 | 
					 | 
				
			||||||
        bb = BOUNDED_HASH_BOUND;
 | 
					 | 
				
			||||||
        do {
 | 
					 | 
				
			||||||
            h = MIX(h, bounded_hash(car_(a), bound-1, ut));
 | 
					 | 
				
			||||||
            a = cdr_(a);
 | 
					 | 
				
			||||||
            bb--;
 | 
					 | 
				
			||||||
            if (bb <= 0) {
 | 
					 | 
				
			||||||
                *ut = 1;
 | 
					 | 
				
			||||||
                ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h);
 | 
					 | 
				
			||||||
                return h;
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        } while (iscons(a));
 | 
					        h = bounded_hash(car_(a), bound/2, oob);
 | 
				
			||||||
        return MIX(h, bounded_hash(a, bound-1, ut));
 | 
					        // bounds balancing: try to share the bounds efficiently
 | 
				
			||||||
        */
 | 
					        // between the car and cdr so we can hash better when a list is
 | 
				
			||||||
 | 
					        // car-shallow and cdr-deep (a common case) or vice-versa.
 | 
				
			||||||
 | 
					        if (*oob)
 | 
				
			||||||
 | 
					            bound/=2;
 | 
				
			||||||
 | 
					        else
 | 
				
			||||||
 | 
					            bound--;
 | 
				
			||||||
 | 
					        h = MIX(h, bounded_hash(cdr_(a), bound, &oob2)+2);
 | 
				
			||||||
 | 
					        // recursive OOB propagation. otherwise this case is slow:
 | 
				
			||||||
 | 
					        // (hash '#2=('#0=(#1=(#1#) . #0#) . #2#))
 | 
				
			||||||
 | 
					        *oob = *oob || oob2;
 | 
				
			||||||
 | 
					        return h;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    return 0;
 | 
					    return 0;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -342,10 +338,8 @@ int equal_lispvalue(value_t a, value_t b)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
uptrint_t hash_lispvalue(value_t a)
 | 
					uptrint_t hash_lispvalue(value_t a)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int ut=0;
 | 
					    int oob=0;
 | 
				
			||||||
    uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut);
 | 
					    uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &oob);
 | 
				
			||||||
    if (ut)
 | 
					 | 
				
			||||||
        htable_reset(&equal_eq_hashtable, 512);
 | 
					 | 
				
			||||||
    return n;
 | 
					    return n;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -154,5 +154,17 @@
 | 
				
			||||||
	      (hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
 | 
						      (hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
 | 
				
			||||||
	      (hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
 | 
						      (hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(assert (equal? (hash '#0=(1 . #0#))
 | 
				
			||||||
 | 
							(hash '#1=(1 1 . #1#))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(assert (not (equal? (hash '#0=(1 1 . #0#))
 | 
				
			||||||
 | 
							     (hash '#1=(1 #0# . #1#)))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(assert (not (equal? (hash (iota 10))
 | 
				
			||||||
 | 
							     (hash (iota 20)))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(assert (not (equal? (hash (iota 41))
 | 
				
			||||||
 | 
							     (hash (iota 42)))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
(princ "all tests pass\n")
 | 
					(princ "all tests pass\n")
 | 
				
			||||||
#t
 | 
					#t
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue