fixing hash of circular structure to avoid tree recursion (very important!!)

simplifying vector_grow
This commit is contained in:
JeffBezanson 2009-05-13 04:03:13 +00:00
parent 77e37368c9
commit 6c549f0399
3 changed files with 74 additions and 54 deletions

View File

@ -256,16 +256,24 @@ value_t equal(value_t a, value_t b)
#define doublehash(a) int64to32hash(a) #define doublehash(a) int64to32hash(a)
#endif #endif
static uptrint_t bounded_hash(value_t a, int bound) // *flag means max recursion bound exceeded
// *ut means this happened some time, so we had to start using the table
static uptrint_t bounded_hash(value_t a, int bound, int *flag, int *ut)
{ {
*flag = 0;
double d; double d;
numerictype_t nt; numerictype_t nt;
size_t i, len; size_t i, len;
cvalue_t *cv; cvalue_t *cv;
cprim_t *cp; cprim_t *cp;
void *data; void *data;
if (bound <= 0) return 0;
uptrint_t h = 0; uptrint_t h = 0;
if (*ut) {
h = (uptrint_t)ptrhash_get(&equal_eq_hashtable, (void*)a);
if (h != (uptrint_t)HT_NOTFOUND)
return h;
}
if (bound <= 0) { *ut = *flag = 1; return 0; }
int bb, tg = tag(a); int bb, tg = tag(a);
switch(tg) { switch(tg) {
case TAG_NUM : case TAG_NUM :
@ -274,7 +282,7 @@ static uptrint_t bounded_hash(value_t a, int bound)
return doublehash(*(int64_t*)&d); return doublehash(*(int64_t*)&d);
case TAG_FUNCTION: case TAG_FUNCTION:
if (uintval(a) > N_BUILTINS) if (uintval(a) > N_BUILTINS)
return bounded_hash(((function_t*)ptr(a))->bcode, bound); return bounded_hash(((function_t*)ptr(a))->bcode, bound, flag, ut);
return inthash(a); return inthash(a);
case TAG_SYM: case TAG_SYM:
return ((symbol_t*)ptr(a))->hash; return ((symbol_t*)ptr(a))->hash;
@ -292,18 +300,30 @@ static uptrint_t bounded_hash(value_t a, int bound)
case TAG_VECTOR: case TAG_VECTOR:
len = vector_size(a); len = vector_size(a);
for(i=0; i < len; i++) { for(i=0; i < len; i++) {
h = MIX(h, bounded_hash(vector_elt(a,i), bound-1)); h = MIX(h, bounded_hash(vector_elt(a,i), bound-1, flag, ut));
if (*flag) {
if (h == (uptrint_t)HT_NOTFOUND) h++;
ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
}
} }
return h; return h;
case TAG_CONS: case TAG_CONS:
bb = BOUNDED_HASH_BOUND; bb = BOUNDED_HASH_BOUND;
do { do {
h = MIX(h, bounded_hash(car_(a), bound-1)+1); h = MIX(h, bounded_hash(car_(a), bound-1, flag, ut)+1);
bb--; if (*flag) {
if (bb <= 0) return h; if (h == (uptrint_t)HT_NOTFOUND) h++;
ptrhash_put(&equal_eq_hashtable, (void*)a, (void*)h);
}
a = cdr_(a); a = cdr_(a);
bb--;
if (bb <= 0) { *ut = *flag = 1; return h; }
if (*ut) {
if (ptrhash_get(&equal_eq_hashtable, (void*)a) != HT_NOTFOUND)
return h;
}
} while (iscons(a)); } while (iscons(a));
return MIX(h, bounded_hash(a, bound-1)+1); return MIX(h, bounded_hash(a, bound-1, flag, ut)+1);
} }
return 0; return 0;
} }
@ -317,7 +337,11 @@ int equal_lispvalue(value_t a, value_t b)
uptrint_t hash_lispvalue(value_t a) uptrint_t hash_lispvalue(value_t a)
{ {
return bounded_hash(a, BOUNDED_HASH_BOUND); int flag, ut=0;
uptrint_t n = bounded_hash(a, BOUNDED_HASH_BOUND, &flag, &ut);
if (ut)
htable_reset(&equal_eq_hashtable, 512);
return n;
} }
value_t fl_hash(value_t *args, u_int32_t nargs) value_t fl_hash(value_t *args, u_int32_t nargs)

View File

@ -401,22 +401,23 @@ static value_t relocate(value_t v)
if (t == TAG_VECTOR) { if (t == TAG_VECTOR) {
// N.B.: 0-length vectors secretly have space for a first element // N.B.: 0-length vectors secretly have space for a first element
size_t i, newsz, sz = vector_size(v); size_t i, sz = vector_size(v);
newsz = sz; if (vector_elt(v,-1) & 0x1) {
if (vector_elt(v,-1) & 0x1) // grown vector
newsz += vector_grow_amt(sz); nc = relocate(vector_elt(v,0));
nc = tagptr(alloc_words(newsz+1), TAG_VECTOR); forward(v, nc);
vector_setsize(nc, newsz); }
a = vector_elt(v,0); else {
forward(v, nc); nc = tagptr(alloc_words(sz+1), TAG_VECTOR);
i = 0; vector_setsize(nc, sz);
if (sz > 0) { a = vector_elt(v,0);
vector_elt(nc,0) = relocate(a); i++; forward(v, nc);
for(; i < sz; i++) if (sz > 0) {
vector_elt(nc,i) = relocate(vector_elt(v,i)); vector_elt(nc,0) = relocate(a);
for(i=1; i < sz; i++)
vector_elt(nc,i) = relocate(vector_elt(v,i));
}
} }
for(; i < newsz; i++)
vector_elt(nc,i) = NIL;
return nc; return nc;
} }
else if (t == TAG_CPRIM) { else if (t == TAG_CPRIM) {
@ -647,24 +648,6 @@ value_t fl_cons(value_t a, value_t b)
return c; return c;
} }
// NOTE: this is NOT an efficient operation. it is only used by the
// reader; vectors should not generally be resized.
// vector_grow requires at least 1 and up to 3 garbage collections!
static value_t vector_grow(value_t v)
{
size_t s = vector_size(v);
size_t d = vector_grow_amt(s);
PUSH(v);
// first allocate enough space to guarantee the heap will be big enough
// for the new vector
alloc_words(d);
// setting low bit of vector's size acts as a flag to the collector
// to grow this vector as it is relocated
((size_t*)ptr(Stack[SP-1]))[0] |= 0x1;
gc(0);
return POP();
}
int isnumber(value_t v) int isnumber(value_t v)
{ {
return (isfixnum(v) || iscprim(v)); return (isfixnum(v) || iscprim(v));
@ -676,13 +659,6 @@ int isnumber(value_t v)
// eval ----------------------------------------------------------------------- // eval -----------------------------------------------------------------------
/*
there is one interesting difference between this and (lambda x x).
(eq a (apply list a)) is always false for nonempty a, while
(eq a (apply (lambda x x) a)) is always true. the justification for this
is that a vararg lambda often needs to recur by applying itself to the
tail of its argument list, so copying the list would be unacceptable.
*/
static value_t list(value_t *args, uint32_t nargs) static value_t list(value_t *args, uint32_t nargs)
{ {
cons_t *c; cons_t *c;
@ -841,7 +817,7 @@ static value_t apply_cl(uint32_t nargs)
POPN(1); POPN(1);
goto next_op; goto next_op;
case OP_NOP: goto next_op; case OP_NOP: goto next_op;
case OP_DUP: v = Stack[SP-1]; PUSH(v); goto next_op; case OP_DUP: SP++; Stack[SP-1] = Stack[SP-2]; goto next_op;
case OP_POP: POPN(1); goto next_op; case OP_POP: POPN(1); goto next_op;
case OP_TCALL: case OP_TCALL:
n = code[ip++]; // nargs n = code[ip++]; // nargs

View File

@ -344,6 +344,28 @@ static u_int32_t peek()
return toktype; return toktype;
} }
// NOTE: this is NOT an efficient operation. it is only used by the
// reader, and requires at least 1 and up to 3 garbage collections!
static value_t vector_grow(value_t v)
{
size_t s = vector_size(v);
size_t d = vector_grow_amt(s);
PUSH(v);
value_t newv = alloc_vector(s+d, 1);
v = Stack[SP-1];
int i;
for(i=0; i < s; i++)
vector_elt(newv, i) = vector_elt(v, i);
// use gc to rewrite references from the old vector to the new
Stack[SP-1] = newv;
if (s > 0) {
((size_t*)ptr(v))[0] |= 0x1;
vector_elt(v, 0) = newv;
gc(0);
}
return POP();
}
static value_t read_vector(value_t label, u_int32_t closer) static value_t read_vector(value_t label, u_int32_t closer)
{ {
value_t v=the_empty_vector, elt; value_t v=the_empty_vector, elt;
@ -354,13 +376,11 @@ static value_t read_vector(value_t label, u_int32_t closer)
while (peek() != closer) { while (peek() != closer) {
if (ios_eof(F)) if (ios_eof(F))
lerror(ParseError, "read: unexpected end of input"); lerror(ParseError, "read: unexpected end of input");
if (i == 0) { if (i >= vector_size(v)) {
v = Stack[SP-1] = alloc_vector(4, 1); v = Stack[SP-1] = vector_grow(v);
if (label != UNBOUND) if (label != UNBOUND)
ptrhash_put(&readstate->backrefs, (void*)label, (void*)v); ptrhash_put(&readstate->backrefs, (void*)label, (void*)v);
} }
else if (i >= vector_size(v))
Stack[SP-1] = vector_grow(v);
elt = do_read_sexpr(UNBOUND); elt = do_read_sexpr(UNBOUND);
v = Stack[SP-1]; v = Stack[SP-1];
vector_elt(v,i) = elt; vector_elt(v,i) = elt;