From bfbbf051c90844ab0e3e18c10d646013b17e38d9 Mon Sep 17 00:00:00 2001 From: JeffBezanson Date: Wed, 20 May 2009 03:39:20 +0000 Subject: [PATCH] doing a better job hashing circular structure. the hash function is now always fast and gives conservative correct answers, at the expense of fidelity on medium and large size structures. for example (hash (iota 15)) gives the same result as (hash (iota 14)). --- femtolisp/equal.c | 56 +++++++++++++++++++++++------------------- femtolisp/flisp.c | 1 - femtolisp/test.lsp | 11 +-------- femtolisp/unittest.lsp | 45 +++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 36 deletions(-) diff --git a/femtolisp/equal.c b/femtolisp/equal.c index 6e85c07..de85636 100644 --- a/femtolisp/equal.c +++ b/femtolisp/equal.c @@ -165,6 +165,7 @@ static value_t cyc_vector_compare(value_t a, value_t b, htable_t *table, static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq) { + cyc_compare_top: if (a==b) return fixnum(0); if (iscons(a)) { @@ -199,7 +200,9 @@ static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq) eq_union(table, a, b, ca, cb); d = cyc_compare(aa, ab, table, eq); if (numval(d)!=0) return d; - return cyc_compare(da, db, table, eq); + a = da; + b = db; + goto cyc_compare_top; } else { return fixnum(1); @@ -256,11 +259,9 @@ value_t equal(value_t a, value_t b) #define doublehash(a) int64to32hash(a) #endif -// *flag means max recursion bound exceeded -// *ut means this happened some time, so we had to start using the table -static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut) +// *ut means we had to start using the table +static uptrint_t bounded_hash(value_t a, int bound, int *ut) { - *flag = 0; double d; numerictype_t nt; size_t i, len; @@ -273,8 +274,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut) if (h != (uptrint_t)HT_NOTFOUND) return h; } - if (bound <= 0) { *ut = *flag = 1; return 0; } - int bb, tg = tag(a); + int tg = tag(a); switch(tg) { case TAG_NUM : case TAG_NUM1: @@ -282,7 +282,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut) return doublehash(*(int64_t*)&d); case TAG_FUNCTION: if (uintval(a) > N_BUILTINS) - return bounded_hash(((function_t*)ptr(a))->bcode, bound, flag, ut); + return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut); return inthash(a); case TAG_SYM: return ((symbol_t*)ptr(a))->hash; @@ -297,32 +297,38 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut) data = cv_data(cv); return memhash(data, cv_len(cv)); case TAG_VECTOR: + if (bound <= 0) { + h = ++(*ut) + (uptrint_t)HT_NOTFOUND; + ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h); + return h; + } len = vector_size(a); for(i=0; i < len; i++) { - h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, flag, ut)); - if (*flag) { - if (h == (uptrint_t)HT_NOTFOUND) h++; - ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h); - } + h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1); } return h; case TAG_CONS: + if (bound <= 0) + return 1; + return MIX(bounded_hash(car_(a), bound/2, ut), + bounded_hash(cdr_(a), bound/2, ut)+2); + // this should be able to hash long lists with greater fidelity, + // but it does not work yet. + /* + first = a; bb = BOUNDED_HASH_BOUND; do { - h = MIX(h, bounded_hash(car_(a), bound-1, flag, ut)+1); - if (*flag) { - if (h == (uptrint_t)HT_NOTFOUND) h++; - ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h); - } + h = MIX(h, bounded_hash(car_(a), bound-1, ut)); a = cdr_(a); bb--; - if (bb <= 0) { *ut = *flag = 1; return h; } - if (*ut) { - if (ptrhash_get(&equal_eq_hashtable, (void*)a) != HT_NOTFOUND) - return h; + if (bb <= 0) { + *ut = 1; + ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h); + return h; } } while (iscons(a)); - return MIX(h, bounded_hash(a, bound-1, flag, ut)+1); + return MIX(h, bounded_hash(a, bound-1, ut)); + */ } return 0; } @@ -336,8 +342,8 @@ int equal_lispvalue(value_t a, value_t b) uptrint_t hash_lispvalue(value_t a) { - int flag, ut=0; - uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &flag, &ut); + int ut=0; + uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut); if (ut) htable_reset(&equal_eq_hashtable, 512); return n; diff --git a/femtolisp/flisp.c b/femtolisp/flisp.c index 4b65438..96a6ff9 100644 --- a/femtolisp/flisp.c +++ b/femtolisp/flisp.c @@ -761,7 +761,6 @@ static value_t do_trycatch() - put the stack in this state - provide arg count - respect tail position - - call correct entry point (either eval_sexpr or apply_cl) - restore SP callee's responsibility: diff --git a/femtolisp/test.lsp b/femtolisp/test.lsp index 2b65a85..f39364b 100644 --- a/femtolisp/test.lsp +++ b/femtolisp/test.lsp @@ -35,14 +35,6 @@ ;(set! a (map-int identity 10000)) ;(dotimes (i 200) (rfoldl cons () a)) -; iterative filter -(define (ifilter pred lst) - ((label f (lambda (accum lst) - (cond ((null? lst) (nreverse accum)) - ((not (pred (car lst))) (f accum (cdr lst))) - (#t (f (cons (car lst) accum) (cdr lst)))))) - () lst)) - (define (sort l) (if (or (null? l) (null? (cdr l))) l (let* ((piv (car l)) @@ -76,11 +68,10 @@ `((lambda (,name) (set! ,name ,fn)) ())) (define (square x) (* x x)) -(define (evenp x) (= x (* (/ x 2) 2))) (define (expt b p) (cond ((= p 0) 1) ((= b 0) 0) - ((evenp p) (square (expt b (/ p 2)))) + ((even? p) (square (expt b (div0 p 2)))) (#t (* b (expt b (- p 1)))))) (define (gcd a b) diff --git a/femtolisp/unittest.lsp b/femtolisp/unittest.lsp index 7fca220..23b32f2 100644 --- a/femtolisp/unittest.lsp +++ b/femtolisp/unittest.lsp @@ -109,5 +109,50 @@ (18 . e) (15 . a) (12 . a) (10 . e) (6 . d) (5 . c) (4 . e) (3 . d) (2 . c) (0 . b) (1 . a)))) +; hashing strange things +(assert (equal? + (hash '#0=(1 1 #0# . #0#)) + (hash '#1=(1 1 #1# 1 1 #1# . #1#)))) + +(assert (not (equal? + (hash '#0=(1 1 #0# . #0#)) + (hash '#1=(1 2 #1# 1 1 #1# . #1#))))) + +(assert (equal? + (hash '#0=((1 . #0#) . #0#)) + (hash '#1=((1 . #1#) (1 . #1#) . #1#)))) + +(assert (not (equal? + (hash '#0=((1 . #0#) . #0#)) + (hash '#1=((2 . #1#) (1 . #1#) . #1#))))) + +(assert (not (equal? + (hash '#0=((1 . #0#) . #0#)) + (hash '#1=((1 . #1#) (2 . #1#) . #1#))))) + +(assert (equal? + (hash #0=[1 [2 [#0#]] 3]) + (hash #1=[1 [2 [[1 [2 [#1#]] 3]]] 3]))) + +(assert (not (equal? + (hash #0=[1 [2 [#0#]] 3]) + (hash #1=[1 [2 [[5 [2 [#1#]] 3]]] 3])))) + +(assert (equal? + (hash #0=[1 #0# [2 [#0#]] 3]) + (hash #1=[1 #1# [2 [[1 #1# [2 [#1#]] 3]]] 3]))) + +(assert (not (equal? + (hash #0=[1 #0# [2 [#0#]] 3]) + (hash #1=[6 #1# [2 [[1 #1# [2 [#1#]] 3]]] 3])))) + +(assert (equal? + (hash [1 [2 [[1 1 [2 [1]] 3]]] 3]) + (hash [1 [2 [[1 1 [2 [1]] 3]]] 3]))) + +(assert (not (equal? + (hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3]) + (hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3])))) + (princ "all tests pass\n") #t