doing a better job hashing circular structure. the hash function is now

always fast and gives conservative correct answers, at the expense of fidelity on medium and large size structures. for example (hash (iota 15)) gives the same result as (hash (iota 14)).
2009-05-20 03:39:20 +00:00 · 2009-05-20 03:39:20 +00:00 · bfbbf051c9
parent c2026ba77c
commit bfbbf051c9
4 changed files with 77 additions and 36 deletions
--- a/femtolisp/equal.c
+++ b/femtolisp/equal.c
@ -165,6 +165,7 @@ static value_t cyc_vector_compare(value_t a, value_t b, htable_t *table,

 static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq)
 {
+ cyc_compare_top:
    if (a==b)
        return fixnum(0);
    if (iscons(a)) {
@ -199,7 +200,9 @@ static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq)
            eq_union(table, a, b, ca, cb);
            d = cyc_compare(aa, ab, table, eq);
            if (numval(d)!=0) return d;
-            return cyc_compare(da, db, table, eq);
+            a = da;
+            b = db;
+            goto cyc_compare_top;
        }
        else {
            return fixnum(1);
@ -256,11 +259,9 @@ value_t equal(value_t a, value_t b)
 #define doublehash(a) int64to32hash(a)
 #endif

-// *flag means max recursion bound exceeded
-// *ut means this happened some time, so we had to start using the table
-static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
+// *ut means we had to start using the table
+static uptrint_t bounded_hash(value_t a, int bound, int *ut)
 {
-    *flag = 0;
    double d;
    numerictype_t nt;
    size_t i, len;
@ -273,8 +274,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
        if (h != (uptrint_t)HT_NOTFOUND)
            return h;
    }
-    if (bound <= 0) { *ut = *flag = 1; return 0; }
-    int bb, tg = tag(a);
+    int tg = tag(a);
    switch(tg) {
    case TAG_NUM :
    case TAG_NUM1:
@ -282,7 +282,7 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
        return doublehash(*(int64_t*)&d);
    case TAG_FUNCTION:
        if (uintval(a) > N_BUILTINS)
-            return bounded_hash(((function_t*)ptr(a))->bcode, bound, flag, ut);
+            return bounded_hash(((function_t*)ptr(a))->bcode, bound, ut);
        return inthash(a);
    case TAG_SYM:
        return ((symbol_t*)ptr(a))->hash;
@ -297,32 +297,38 @@ static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
        data = cv_data(cv);
        return memhash(data, cv_len(cv));
    case TAG_VECTOR:
+        if (bound <= 0) {
+            h = ++(*ut) + (uptrint_t)HT_NOTFOUND;
+            ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
+            return h;
+        }
        len = vector_size(a);
        for(i=0; i < len; i++) {
-            h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, flag, ut));
-            if (*flag) {
-                if (h == (uptrint_t)HT_NOTFOUND) h++;
-                ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
-            }
+            h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, ut)+1);
        }
        return h;
    case TAG_CONS:
+        if (bound <= 0)
+            return 1;
+        return MIX(bounded_hash(car_(a), bound/2, ut),
+                   bounded_hash(cdr_(a), bound/2, ut)+2);
+        // this should be able to hash long lists with greater fidelity,
+        // but it does not work yet.
+        /*
+        first = a;
        bb = BOUNDED_HASH_BOUND;
        do {
-            h = MIX(h, bounded_hash(car_(a), bound-1, flag, ut)+1);
-            if (*flag) {
-                if (h == (uptrint_t)HT_NOTFOUND) h++;
-                ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
-            }
+            h = MIX(h, bounded_hash(car_(a), bound-1, ut));
            a = cdr_(a);
            bb--;
-            if (bb <= 0) { *ut = *flag = 1; return h; }
-            if (*ut) {
-                if (ptrhash_get(&equal_eq_hashtable, (void*)a) != HT_NOTFOUND)
-                    return h;
+            if (bb <= 0) {
+                *ut = 1;
+                ptrhash_put(&equal_eq_hashtable, (void*)first, (void*)h);
+                return h;
            }
        } while (iscons(a));
-        return MIX(h, bounded_hash(a, bound-1, flag, ut)+1);
+        return MIX(h, bounded_hash(a, bound-1, ut));
+        */
    }
    return 0;
 }
@ -336,8 +342,8 @@ int equal_lispvalue(value_t a, value_t b)

 uptrint_t hash_lispvalue(value_t a)
 {
-    int flag, ut=0;
-    uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &flag, &ut);
+    int ut=0;
+    uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &ut);
    if (ut)
        htable_reset(&equal_eq_hashtable, 512);
    return n;
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@ -761,7 +761,6 @@ static value_t do_trycatch()
  - put the stack in this state
  - provide arg count
  - respect tail position
-  - call correct entry point (either eval_sexpr or apply_cl)
  - restore SP

  callee's responsibility:
--- a/femtolisp/test.lsp
+++ b/femtolisp/test.lsp
@ -35,14 +35,6 @@
 ;(set! a (map-int identity 10000))
 ;(dotimes (i 200) (rfoldl cons () a))

-; iterative filter
-(define (ifilter pred lst)
-  ((label f (lambda (accum lst)
-              (cond ((null? lst) (nreverse accum))
-                    ((not (pred (car lst))) (f accum (cdr lst)))
-                    (#t (f (cons (car lst) accum) (cdr lst))))))
-   () lst))
-
 (define (sort l)
  (if (or (null? l) (null? (cdr l))) l
    (let* ((piv (car l))
@ -76,11 +68,10 @@
  `((lambda (,name) (set! ,name ,fn)) ()))

 (define (square x) (* x x))
-(define (evenp  x) (= x (* (/ x 2) 2)))
 (define (expt b p)
  (cond ((= p 0) 1)
        ((= b 0) 0)
-        ((evenp p) (square (expt b (/ p 2))))
+        ((even? p) (square (expt b (div0 p 2))))
        (#t (* b (expt b (- p 1))))))

 (define (gcd a b)
--- a/femtolisp/unittest.lsp
+++ b/femtolisp/unittest.lsp
@ -109,5 +109,50 @@
                 (18 . e) (15 . a) (12 . a) (10 . e) (6 . d) (5 . c) (4 . e)
                 (3 . d) (2 . c) (0 . b) (1 . a))))

+; hashing strange things
+(assert (equal?
+	 (hash '#0=(1 1 #0# . #0#))
+	 (hash '#1=(1 1 #1# 1 1 #1# . #1#))))
+
+(assert (not (equal?
+	      (hash '#0=(1 1 #0# . #0#))
+	      (hash '#1=(1 2 #1# 1 1 #1# . #1#)))))
+
+(assert (equal?
+	 (hash '#0=((1 . #0#) . #0#))
+	 (hash '#1=((1 . #1#) (1 . #1#) . #1#))))
+
+(assert (not (equal?
+	      (hash '#0=((1 . #0#) . #0#))
+	      (hash '#1=((2 . #1#) (1 . #1#) . #1#)))))
+
+(assert (not (equal?
+	      (hash '#0=((1 . #0#) . #0#))
+	      (hash '#1=((1 . #1#) (2 . #1#) . #1#)))))
+
+(assert (equal?
+	 (hash #0=[1 [2 [#0#]] 3])
+	 (hash #1=[1 [2 [[1 [2 [#1#]] 3]]] 3])))
+
+(assert (not (equal?
+	      (hash #0=[1 [2 [#0#]] 3])
+	      (hash #1=[1 [2 [[5 [2 [#1#]] 3]]] 3]))))
+
+(assert (equal?
+	 (hash #0=[1 #0# [2 [#0#]] 3])
+	 (hash #1=[1 #1# [2 [[1 #1# [2 [#1#]] 3]]] 3])))
+
+(assert (not (equal?
+	      (hash #0=[1 #0# [2 [#0#]] 3])
+	      (hash #1=[6 #1# [2 [[1 #1# [2 [#1#]] 3]]] 3]))))
+
+(assert (equal?
+	 (hash [1 [2 [[1 1 [2 [1]] 3]]] 3])
+	 (hash [1 [2 [[1 1 [2 [1]] 3]]] 3])))
+
+(assert (not (equal?
+	      (hash [6 1 [2 [[3 1 [2 [1]] 3]]] 3])
+	      (hash [6 1 [2 [[1 1 [2 [1]] 3]]] 3]))))
+
 (princ "all tests pass\n")
 #t