doing a better job hashing circular structure. the hash function is now

always fast and gives conservative correct answers, at the expense of
fidelity on medium and large size structures. for example (hash (iota 15))
gives the same result as (hash (iota 14)).
This commit is contained in:
JeffBezanson 2009-05-20 03:39:20 +00:00
parent c2026ba77c
commit bfbbf051c9
4 changed files with 77 additions and 36 deletions

View File

@ -165,6 +165,7 @@ static value_t cyc_vector_compare(value_t a, value_t b, htable_t *table,
static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq) static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq)
{ {
cyc_compare_top:
if (a==b) if (a==b)
return fixnum(0); return fixnum(0);
if (iscons(a)) { if (iscons(a)) {
@ -199,7 +200,9 @@ static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq)
eq_union(table, a, b, ca, cb); eq_union(table, a, b, ca, cb);
d = cyc_compare(aa, ab, table, eq); d = cyc_compare(aa, ab, table, eq);
if (numval(d)!=0) return d; if (numval(d)!=0) return d;
return cyc_compare(da, db, table, eq); a = da;
b = db;
goto cyc_compare_top;
} }
else { else {
return fixnum(1); return fixnum(1);
@ -256,11 +259,9 @@ value_t equal(value_t a, value_t b)
#define doublehash(a) int64to32hash(a) #define doublehash(a) int64to32hash(a)
#endif #endif
// *flag means max recursion bound exceeded // *ut means we had to start using the table
// *ut means this happened some time, so we had to start using the table static uptrint_t bounded_hash(value_t a, int bound, int *ut)
static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
{ {
*flag = 0;
double d; double d;
numerictype_t nt; numerictype_t nt;
size_t i, len; size_t i, len;
@ -273,8 +274,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
if (h != (uptrint_t)HT_NOTFOUND) if (h != (uptrint_t)HT_NOTFOUND)
return h; return h;
} }
if (bound <= 0) { *ut = *flag = 1; return 0; } int tg = tag(a);
int bb, tg = tag(a);
switch(tg) { switch(tg) {
case TAG_NUM : case TAG_NUM :
case TAG_NUM1: case TAG_NUM1:
@ -282,7 +282,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
return doublehash(*(int64_t*)&d); return doublehash(*(int64_t*)&d);
case TAG_FUNCTION: case TAG_FUNCTION:
if (uintval(a) > N_BUILTINS) if (uintval(a) > N_BUILTINS)
return bounded_hash(((function_t*)ptr(a))->bcode, bound, flag, ut); return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut);
return inthash(a); return inthash(a);
case TAG_SYM: case TAG_SYM:
return ((symbol_t*)ptr(a))->hash; return ((symbol_t*)ptr(a))->hash;
@ -297,32 +297,38 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
data = cv_data(cv); data = cv_data(cv);
return memhash(data, cv_len(cv)); return memhash(data, cv_len(cv));
case TAG_VECTOR: case TAG_VECTOR:
if (bound <= 0) {
h = ++(*ut) + (uptrint_t)HT_NOTFOUND;
ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
return h;
}
len = vector_size(a); len = vector_size(a);
for(i=0; i < len; i++) { for(i=0; i < len; i++) {
h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, flag, ut)); h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1);
if (*flag) {
if (h == (uptrint_t)HT_NOTFOUND) h++;
ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
}
} }
return h; return h;
case TAG_CONS: case TAG_CONS:
if (bound <= 0)
return 1;
return MIX(bounded_hash(car_(a), bound/2, ut),
bounded_hash(cdr_(a), bound/2, ut)+2);
// this should be able to hash long lists with greater fidelity,
// but it does not work yet.
/*
first = a;
bb = BOUNDED_HASH_BOUND; bb = BOUNDED_HASH_BOUND;
do { do {
h = MIX(h, bounded_hash(car_(a), bound-1, flag, ut)+1); h = MIX(h, bounded_hash(car_(a), bound-1, ut));
if (*flag) {
if (h == (uptrint_t)HT_NOTFOUND) h++;
ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
}
a = cdr_(a); a = cdr_(a);
bb--; bb--;
if (bb <= 0) { *ut = *flag = 1; return h; } if (bb <= 0) {
if (*ut) { *ut = 1;
if (ptrhash_get(&equal_eq_hashtable, (void*)a) != HT_NOTFOUND) ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h);
return h; return h;
} }
} while (iscons(a)); } while (iscons(a));
return MIX(h, bounded_hash(a, bound-1, flag, ut)+1); return MIX(h, bounded_hash(a, bound-1, ut));
*/
} }
return 0; return 0;
} }
@ -336,8 +342,8 @@ int equal_lispvalue(value_t a, value_t b)
uptrint_t hash_lispvalue(value_t a) uptrint_t hash_lispvalue(value_t a)
{ {
int flag, ut=0; int ut=0;
uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &flag, &ut); uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut);
if (ut) if (ut)
htable_reset(&equal_eq_hashtable, 512); htable_reset(&equal_eq_hashtable, 512);
return n; return n;

View File

@ -761,7 +761,6 @@ static value_t do_trycatch()
- put the stack in this state - put the stack in this state
- provide arg count - provide arg count
- respect tail position - respect tail position
- call correct entry point (either eval_sexpr or apply_cl)
- restore SP - restore SP
callee's responsibility: callee's responsibility:

View File

@ -35,14 +35,6 @@
;(set! a (map-int identity 10000)) ;(set! a (map-int identity 10000))
;(dotimes (i 200) (rfoldl cons () a)) ;(dotimes (i 200) (rfoldl cons () a))
; iterative filter
(define (ifilter pred lst)
((label f (lambda (accum lst)
(cond ((null? lst) (nreverse accum))
((not (pred (car lst))) (f accum (cdr lst)))
(#t (f (cons (car lst) accum) (cdr lst))))))
() lst))
(define (sort l) (define (sort l)
(if (or (null? l) (null? (cdr l))) l (if (or (null? l) (null? (cdr l))) l
(let* ((piv (car l)) (let* ((piv (car l))
@ -76,11 +68,10 @@
`((lambda (,name) (set! ,name ,fn)) ())) `((lambda (,name) (set! ,name ,fn)) ()))
(define (square x) (* x x)) (define (square x) (* x x))
(define (evenp x) (= x (* (/ x 2) 2)))
(define (expt b p) (define (expt b p)
(cond ((= p 0) 1) (cond ((= p 0) 1)
((= b 0) 0) ((= b 0) 0)
((evenp p) (square (expt b (/ p 2)))) ((even? p) (square (expt b (div0 p 2))))
(#t (* b (expt b (- p 1)))))) (#t (* b (expt b (- p 1))))))
(define (gcd a b) (define (gcd a b)

View File

@ -109,5 +109,50 @@
(18 . e) (15 . a) (12 . a) (10 . e) (6 . d) (5 . c) (4 . e) (18 . e) (15 . a) (12 . a) (10 . e) (6 . d) (5 . c) (4 . e)
(3 . d) (2 . c) (0 . b) (1 . a)))) (3 . d) (2 . c) (0 . b) (1 . a))))
; hashing strange things
(assert (equal?
(hash '#0=(1 1 #0# . #0#))
(hash '#1=(1 1 #1# 1 1 #1# . #1#))))
(assert (not (equal?
(hash '#0=(1 1 #0# . #0#))
(hash '#1=(1 2 #1# 1 1 #1# . #1#)))))
(assert (equal?
(hash '#0=((1 . #0#) . #0#))
(hash '#1=((1 . #1#) (1 . #1#) . #1#))))
(assert (not (equal?
(hash '#0=((1 . #0#) . #0#))
(hash '#1=((2 . #1#) (1 . #1#) . #1#)))))
(assert (not (equal?
(hash '#0=((1 . #0#) . #0#))
(hash '#1=((1 . #1#) (2 . #1#) . #1#)))))
(assert (equal?
(hash #0=[1 [2 [#0#]] 3])
(hash #1=[1 [2 [[1 [2 [#1#]] 3]]] 3])))
(assert (not (equal?
(hash #0=[1 [2 [#0#]] 3])
(hash #1=[1 [2 [[5 [2 [#1#]] 3]]] 3]))))
(assert (equal?
(hash #0=[1 #0# [2 [#0#]] 3])
(hash #1=[1 #1# [2 [[1 #1# [2 [#1#]] 3]]] 3])))
(assert (not (equal?
(hash #0=[1 #0# [2 [#0#]] 3])
(hash #1=[6 #1# [2 [[1 #1# [2 [#1#]] 3]]] 3]))))
(assert (equal?
(hash [1 [2 [[1 1 [2 [1]] 3]]] 3])
(hash [1 [2 [[1 1 [2 [1]] 3]]] 3])))
(assert (not (equal?
(hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
(hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
(princ "all tests pass\n") (princ "all tests pass\n")
#t #t