fixing hash of circular structure to avoid tree recursion (very important!!)

simplifying vector_grow
2009-05-13 04:03:13 +00:00 · 2009-05-13 04:03:13 +00:00 · 6c549f0399
parent 77e37368c9
commit 6c549f0399
3 changed files with 74 additions and 54 deletions
--- a/femtolisp/equal.c
+++ b/femtolisp/equal.c
@ -256,16 +256,24 @@ value_t equal(value_t a, value_t b)
 #define doublehash(a) int64to32hash(a)
 #endif

-static uptrint_t bounded_hash(value_t a, int bound)
+// *flag means max recursion bound exceeded
+// *ut means this happened some time, so we had to start using the table
+static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
 {
+    *flag = 0;
    double d;
    numerictype_t nt;
    size_t i, len;
    cvalue_t *cv;
    cprim_t *cp;
    void *data;
-    if (bound <= 0) return 0;
    uptrint_t h = 0;
+    if (*ut) {
+        h = (uptrint_t)ptrhash_get(&equal_eq_hashtable, (void*)a);
+        if (h != (uptrint_t)HT_NOTFOUND)
+            return h;
+    }
+    if (bound <= 0) { *ut = *flag = 1; return 0; }
    int bb, tg = tag(a);
    switch(tg) {
    case TAG_NUM :
@ -274,7 +282,7 @@ static uptrint_t bounded_hash(value_t a, int bound)
        return doublehash(*(int64_t*)&d);
    case TAG_FUNCTION:
        if (uintval(a) > N_BUILTINS)
-            return bounded_hash(((function_t*)ptr(a))->bcode, bound);
+            return bounded_hash(((function_t*)ptr(a))->bcode, bound, flag, ut);
        return inthash(a);
    case TAG_SYM:
        return ((symbol_t*)ptr(a))->hash;
@ -292,18 +300,30 @@ static uptrint_t bounded_hash(value_t a, int bound)
    case TAG_VECTOR:
        len = vector_size(a);
        for(i=0; i < len; i++) {
-            h = MIX(h, bounded_hash(vector_elt(a,i), bound-1));
+            h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, flag, ut));
+            if (*flag) {
+                if (h == (uptrint_t)HT_NOTFOUND) h++;
+                ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
+            }
        }
        return h;
    case TAG_CONS:
        bb = BOUNDED_HASH_BOUND;
        do {
-            h = MIX(h, bounded_hash(car_(a), bound-1)+1);
-            bb--;
-            if (bb <= 0) return h;
+            h = MIX(h, bounded_hash(car_(a), bound-1, flag, ut)+1);
+            if (*flag) {
+                if (h == (uptrint_t)HT_NOTFOUND) h++;
+                ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
+            }
            a = cdr_(a);
+            bb--;
+            if (bb <= 0) { *ut = *flag = 1; return h; }
+            if (*ut) {
+                if (ptrhash_get(&equal_eq_hashtable, (void*)a) != HT_NOTFOUND)
+                    return h;
+            }
        } while (iscons(a));
-        return MIX(h, bounded_hash(a, bound-1)+1);
+        return MIX(h, bounded_hash(a, bound-1, flag, ut)+1);
    }
    return 0;
 }
@ -317,7 +337,11 @@ int equal_lispvalue(value_t a, value_t b)

 uptrint_t hash_lispvalue(value_t a)
 {
-    return bounded_hash(a, BOUNDED_HASH_BOUND);
+    int flag, ut=0;
+    uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &flag, &ut);
+    if (ut)
+        htable_reset(&equal_eq_hashtable, 512);
+    return n;
 }

 value_t fl_hash(value_t *args, u_int32_t nargs)
--- a/femtolisp/flisp.c
+++ b/femtolisp/flisp.c
@ -401,22 +401,23 @@ static value_t relocate(value_t v)

    if (t == TAG_VECTOR) {
        // N.B.: 0-length vectors secretly have space for a first element
-        size_t i, newsz, sz = vector_size(v);
-        newsz = sz;
-        if (vector_elt(v,-1) & 0x1)
-            newsz += vector_grow_amt(sz);
-        nc = tagptr(alloc_words(newsz+1), TAG_VECTOR);
-        vector_setsize(nc, newsz);
-        a = vector_elt(v,0);
-        forward(v, nc);
-        i = 0;
-        if (sz > 0) {
-            vector_elt(nc,0) = relocate(a); i++;
-            for(; i < sz; i++)
-                vector_elt(nc,i) = relocate(vector_elt(v,i));
+        size_t i, sz = vector_size(v);
+        if (vector_elt(v,-1) & 0x1) {
+            // grown vector
+            nc = relocate(vector_elt(v,0));
+            forward(v, nc);
+        }
+        else {
+            nc = tagptr(alloc_words(sz+1), TAG_VECTOR);
+            vector_setsize(nc, sz);
+            a = vector_elt(v,0);
+            forward(v, nc);
+            if (sz > 0) {
+                vector_elt(nc,0) = relocate(a);
+                for(i=1; i < sz; i++)
+                    vector_elt(nc,i) = relocate(vector_elt(v,i));
+            }
        }
-        for(; i < newsz; i++)
-            vector_elt(nc,i) = NIL;
        return nc;
    }
    else if (t == TAG_CPRIM) {
@ -647,24 +648,6 @@ value_t fl_cons(value_t a, value_t b)
    return c;
 }

-// NOTE: this is NOT an efficient operation. it is only used by the
-// reader; vectors should not generally be resized.
-// vector_grow requires at least 1 and up to 3 garbage collections!
-static value_t vector_grow(value_t v)
-{
-    size_t s = vector_size(v);
-    size_t d = vector_grow_amt(s);
-    PUSH(v);
-    // first allocate enough space to guarantee the heap will be big enough
-    // for the new vector
-    alloc_words(d);
-    // setting low bit of vector's size acts as a flag to the collector
-    // to grow this vector as it is relocated
-    ((size_t*)ptr(Stack[SP-1]))[0] |= 0x1;
-    gc(0);
-    return POP();
-}
-
 int isnumber(value_t v)
 {
    return (isfixnum(v) || iscprim(v));
@ -676,13 +659,6 @@ int isnumber(value_t v)

 // eval -----------------------------------------------------------------------

-/*
-  there is one interesting difference between this and (lambda x x).
-  (eq a (apply list a)) is always false for nonempty a, while
-  (eq a (apply (lambda x x) a)) is always true. the justification for this
-  is that a vararg lambda often needs to recur by applying itself to the
-  tail of its argument list, so copying the list would be unacceptable.
-*/
 static value_t list(value_t *args, uint32_t nargs)
 {
    cons_t *c;
@ -841,7 +817,7 @@ static value_t apply_cl(uint32_t nargs)
            POPN(1);
            goto next_op;
        case OP_NOP: goto next_op;
-        case OP_DUP: v = Stack[SP-1]; PUSH(v); goto next_op;
+        case OP_DUP: SP++; Stack[SP-1] = Stack[SP-2]; goto next_op;
        case OP_POP: POPN(1); goto next_op;
        case OP_TCALL:
            n = code[ip++];  // nargs
--- a/femtolisp/read.c
+++ b/femtolisp/read.c
@ -344,6 +344,28 @@ static u_int32_t peek()
    return toktype;
 }

+// NOTE: this is NOT an efficient operation. it is only used by the
+// reader, and requires at least 1 and up to 3 garbage collections!
+static value_t vector_grow(value_t v)
+{
+    size_t s = vector_size(v);
+    size_t d = vector_grow_amt(s);
+    PUSH(v);
+    value_t newv = alloc_vector(s+d, 1);
+    v = Stack[SP-1];
+    int i;
+    for(i=0; i < s; i++)
+        vector_elt(newv, i) = vector_elt(v, i);
+    // use gc to rewrite references from the old vector to the new
+    Stack[SP-1] = newv;
+    if (s > 0) {
+        ((size_t*)ptr(v))[0] |= 0x1;
+        vector_elt(v, 0) = newv;
+        gc(0);
+    }
+    return POP();
+}
+
 static value_t read_vector(value_t label, u_int32_t closer)
 {
    value_t v=the_empty_vector, elt;
@ -354,13 +376,11 @@ static value_t read_vector(value_t label, u_int32_t closer)
    while (peek() != closer) {
        if (ios_eof(F))
            lerror(ParseError, "read: unexpected end of input");
-        if (i == 0) {
-            v = Stack[SP-1] = alloc_vector(4, 1);
+        if (i >= vector_size(v)) {
+            v = Stack[SP-1] = vector_grow(v);
            if (label != UNBOUND)
                ptrhash_put(&readstate->backrefs, (void*)label, (void*)v);
        }
-        else if (i >= vector_size(v))
-            Stack[SP-1] = vector_grow(v);
        elt = do_read_sexpr(UNBOUND);
        v = Stack[SP-1];
        vector_elt(v,i) = elt;