/* * * h a s h . c -- Hash Tables * * Copyright © 1993-1999 Erick Gallesio - I3S-CNRS/ESSI * * * Permission to use, copy, modify, distribute,and license this * software and its documentation for any purpose is hereby granted, * provided that existing copyright notices are retained in all * copies and that this notice is included verbatim in any * distributions. No written agreement, license, or royalty fee is * required for any of the authorized uses. * This software is provided ``AS IS'' without express or implied * warranty. * * Author: Erick Gallesio [eg@kaolin.unice.fr] * Creation date: 17-Jan-1994 17:49 * Last file update: 3-Sep-1999 20:20 (eg) */ #include static void free_hash_table(SCM ht); static void mark_hash_table(SCM ht); static PRIMITIVE hash_table_hash(SCM obj); /**** Definitions for new type tc_hash ****/ static int tc_hash; static STk_extended_scheme_type hash_table_type = { "hash-table", /* name */ 0, /* is_procp */ mark_hash_table, /* gc_mark_fct */ free_hash_table, /* gc_sweep_fct */ NULL, /* apply_fct */ NULL, /* display_fct */ NULL, /* compare_fct */ }; typedef enum {hash_eq, hash_string, hash_comp} hash_type; typedef struct { struct Tcl_HashTable *h; hash_type type; SCM comparison; /* unused if not a comparison hash table */ SCM sxhash_fct; } Scheme_hash_table; #define HASH(x) ((Scheme_hash_table *) ((x)->storage_as.extension.data)) #define LHASH(x) ((x)->storage_as.extension.data) #define HASHP(x) (TYPEP(x, tc_hash)) #define HASH_COMP(x) (HASH(x)->comparison) #define HASH_SXHASH(x) (HASH(x)->sxhash_fct) #define HASH_H(x) (HASH(x)->h) #define HASH_TYPE(x) (HASH(x)->type) #define HASH_WORD(h1, h2) ((((h1) << 4) + (h1)) ^ (h2)) /* Good repartition ? */ /* This function is duplicated from tclHash.c * It would be possible to export this function form tchHash.c, but I prefer * to avoid modifications, as far as possible, from files in the Tcl directory * Given the size of this function, a duplication should not be a problem */ static unsigned long HashString(register char *string) { register unsigned long result; register int c; result = 0; for ( ; ; ) { c = *string++; if (c == 0) break; result += (result<<3) + c; } return result; } /* * sxhash permits to calculate a "universal" hash value a` la CL sxhash * function * */ unsigned long sxhash(SCM obj) { register unsigned long h; register SCM tmp; register int i; switch (TYPE(obj)) { case tc_cons: h = sxhash(CAR(obj)); for(tmp=CDR(obj); CONSP(tmp); tmp=CDR(tmp)) h = HASH_WORD(h, sxhash(CAR(tmp))); h = HASH_WORD(h, sxhash(tmp)); return h; case tc_integer: case tc_bignum: return (unsigned long) STk_integer_value_no_overflow(obj); case tc_flonum: return (unsigned long) FLONM(obj); case tc_symbol: /* For some reasons * return (unsigned long) obj; * which is correct, yiels worse results than the * following code. Perhaps, we have a better *repartion by using hashing on the chars. Weird! */ return HASH_WORD(0, (unsigned long) obj); return HashString(PNAME(obj)); case tc_keyword: return HashString(KEYVAL(obj)); case tc_string: return HashString(CHARS(obj)); case tc_vector: h = 0; for (i=VECTSIZE(obj)-1; i >= 0; i--) h = HASH_WORD(h, sxhash(VECT(obj)[i])); return h; default: /* Either a small constant or a complex type (STklos * object, user defined type, hashtable...). In this * case we return the type of the object. This is very * inneficient but it should be rare to use a structured * object as a key. Note that returning the type * works even if we have not COMPACT_SMALL_CST (as far as * I know, nobody undefine it). In this case SMALL_CSTP * always return FALSE. */ return (SMALL_CSTP(obj)) ? (unsigned long) obj: (unsigned long) TYPE(obj); } } /* * find_key: equivalent to the assoc function except that it works * with any comparison. If no association is found, find_key returns * NULL */ static SCM find_key(SCM obj, SCM alist, SCM comparison) { register SCM l, tmp; for(l=alist; !NULLP(l); l=CDR(l)) { tmp = CAR(l); if (Apply2(comparison, obj, CAR(tmp)) != Ntruth) return tmp; } return NULL; } /* * remove_key: remove the given key from the association list. * The key is compared with the ``comparison'' function */ static SCM remove_key(SCM obj, SCM alist, SCM comparison) { register SCM l; for(l=NIL; !NULLP(alist); alist=CDR(alist)) { if (Apply2(comparison, obj, CAR(CAR(alist))) == Ntruth) l = Cons(CAR(alist), l); } return l; } /* * The_func returns a cell which represent the ORIGINAL subr given as * a string. Don't use intern to avoid problems if "eq?" or * "hash-table-hash" have been redefined by the user. This is probably * not useful, but ... */ static SCM the_func(char *s) { SCM z; if (strcmp(s, "eq?") == 0) { NEWCELL(z, tc_subr_2); z->storage_as.subr0.f = (SCM (*)()) STk_eq; } else { /* s is "hash-table-hash" */ NEWCELL(z, tc_subr_1); z->storage_as.subr0.f = (SCM (*)()) sxhash; } z->storage_as.subr0.name = s; return z; } /******************************************************************************/ /* * STk_sxhash: the Scheme version of sxhash */ static PRIMITIVE hash_table_hash(SCM obj) { long int x = sxhash(obj); return STk_makeinteger((x < 0) ? -x : x); } static PRIMITIVE make_hash_table(SCM l, int len) { SCM sxhash, compar, z; hash_type type=hash_comp; switch (len) { case 0: compar = the_func("eq?"); sxhash = the_func("hash-table-hash"); break; case 1: compar = CAR(l); sxhash = the_func("hash-table-hash"); break; case 2: compar = CAR(l); sxhash = CAR(CDR(l)); break; default: STk_err("make-hash-table: bad list of parameters", l); } if (STk_procedurep(compar) == Ntruth) STk_err("make-hash-table: bad comparison function", compar); if (STk_procedurep(sxhash) == Ntruth) STk_err("make-hash-table: bad hash function", sxhash); if (TYPEP(compar, tc_subr_2)) /* * We have a procedure. See if it is 'eq?' or 'string?'. * If so, we implement the hash table in the most efficient * way. Otherwise, we will use another method (i.e. find a key * code for each object with the sxhash function and strore * each element with this key in a A-list, the A-list for a * given key is is found using the Tcl Hash functions */ if ((void *) SUBRF(compar) == (void *) STk_eq) type = hash_eq; else if ((void *) SUBRF(compar) == (void *) STk_streq) type = hash_string; /* Make a new hash table object */ NEWCELL(z, tc_hash); LHASH(z) = (Scheme_hash_table *)must_malloc(sizeof(Scheme_hash_table)); HASH(z)->h = must_malloc(sizeof(Tcl_HashTable)); HASH(z)->type = type; HASH(z)->sxhash_fct = sxhash; HASH(z)->comparison = compar; Tcl_InitHashTable(HASH_H(z), (type == hash_string)? TCL_STRING_KEYS : TCL_ONE_WORD_KEYS); return z; } static PRIMITIVE hash_table_p(SCM obj) { return HASHP(obj) ? Truth: Ntruth; } static PRIMITIVE hash_table_put(SCM ht, SCM key, SCM val) { Tcl_HashEntry *entry; SCM index; int new_entry; if (!HASHP(ht)) Err("hash-table-put!: bad hash table", ht); switch (HASH_TYPE(ht)) { case hash_eq: entry = Tcl_CreateHashEntry(HASH_H(ht), (char *) key, &new_entry); Tcl_SetHashValue(entry, val); break; case hash_string: if (!STRINGP(key)) Err("hash-table-put!: bad string", key); entry = Tcl_CreateHashEntry(HASH_H(ht), CHARS(key), &new_entry); Tcl_SetHashValue(entry, val); break; case hash_comp: index = Apply1(HASH_SXHASH(ht), key); if ((entry=Tcl_FindHashEntry(HASH_H(ht), (char *) index)) != NULL) { SCM old = (SCM) Tcl_GetHashValue(entry); /* waz here */ SCM tmp = find_key(key, old, HASH_COMP(ht)); if (tmp) { CAR(tmp) = key; /* Generally useless. But we don't master the hash fct */ CDR(tmp) = val; /* (i.e. it can have side-effects) */ } else Tcl_SetHashValue(entry, Cons(Cons(key, val), old)); } else { /* new bucket */ SCM tmp = LIST1(Cons(key, val)); /* place it in tmp to avoid GC problems */ entry = Tcl_CreateHashEntry(HASH_H(ht), (char *) index, &new_entry); Tcl_SetHashValue(entry, tmp); } break; } return UNDEFINED; } static PRIMITIVE hash_table_get(SCM ht, SCM key, SCM default_value) { Tcl_HashEntry *entry; SCM index; if (!HASHP(ht)) Err("hash-table-get: bad hash table", ht); switch (HASH_TYPE(ht)) { case hash_eq: if (entry=Tcl_FindHashEntry(HASH_H(ht), (char *) key)) /* Key already in hash table */ return (SCM) Tcl_GetHashValue(entry); break; case hash_string: if (!STRINGP(key)) Err("hash-table-get: bad string", key); if (entry=Tcl_FindHashEntry(HASH_H(ht), CHARS(key))) /* Key already in hash table */ return (SCM) Tcl_GetHashValue(entry); break; case hash_comp: index = Apply1(HASH_SXHASH(ht), key); if (entry=Tcl_FindHashEntry(HASH_H(ht), (char *) index)) { SCM tmp, val = (SCM) Tcl_GetHashValue(entry); if (tmp = find_key(key, val, HASH_COMP(ht))) return CDR(tmp); } break; } /* If we are here, key was not present in table */ if (default_value == UNBOUND) Err("hash-table-get: entry not defined for this key", key); return default_value; } static PRIMITIVE hash_table_remove(SCM ht, SCM key) { Tcl_HashEntry *entry; SCM index; if (!HASHP(ht)) Err("hash-table-remove!: bad hash table", ht); switch (HASH_TYPE(ht)) { case hash_eq: if (entry=Tcl_FindHashEntry(HASH_H(ht), (char *) key)) /* Key alrady in hash table */ Tcl_DeleteHashEntry(entry); break; case hash_string: if (!STRINGP(key)) Err("hash-table-remove: bad string", key); if (entry=Tcl_FindHashEntry(HASH_H(ht), CHARS(key))) /* Key alrady in hash table */ Tcl_DeleteHashEntry(entry); break; case hash_comp: index = Apply1(HASH_SXHASH(ht), key); if (entry=Tcl_FindHashEntry(HASH_H(ht), (char *) index)) { SCM tmp, val = (SCM) Tcl_GetHashValue(entry); tmp = remove_key(key, val, HASH_COMP(ht)); if (NULLP(tmp)) /* This was the only entry for this key. We can delete the entry */ Tcl_DeleteHashEntry(entry); else Tcl_SetHashValue(entry, tmp); } break; } return UNDEFINED; } static PRIMITIVE hash_table_for_each(SCM ht, SCM proc) { Tcl_HashEntry *entry; Tcl_HashSearch search; if (!HASHP(ht)) Err("hash-table-for-each: bad hash table", ht); if (STk_procedurep(proc)==Ntruth) Err("hash-table-for-each: bad procedure", proc); for (entry = Tcl_FirstHashEntry(HASH_H(ht), &search); entry; entry = Tcl_NextHashEntry(&search)) { switch (HASH_TYPE(ht)) { case hash_eq: Apply2(proc, (SCM) Tcl_GetHashKey(HASH_H(ht), entry), (SCM) Tcl_GetHashValue(entry)); break; case hash_string: { char *s = Tcl_GetHashKey(HASH_H(ht), entry); Apply2(proc, STk_makestring(s), (SCM) Tcl_GetHashValue(entry)); } break; case hash_comp: { SCM val; for (val=(SCM) Tcl_GetHashValue(entry); !NULLP(val); val = CDR(val)) Apply2(proc, CAR(CAR(val)), CDR(CAR(val))); } } } return UNDEFINED; } static PRIMITIVE hash_table_map(SCM ht, SCM proc) { Tcl_HashEntry *entry; Tcl_HashSearch search; SCM result = NIL; if (!HASHP(ht)) Err("hash-table-map: bad hash table", ht); if (STk_procedurep(proc)==Ntruth) Err("hash-table-map: bad procedure", proc); for (entry = Tcl_FirstHashEntry(HASH_H(ht), &search); entry; entry = Tcl_NextHashEntry(&search)) { switch (HASH_TYPE(ht)) { case hash_eq: result = Cons(Apply2(proc, (SCM)Tcl_GetHashKey(HASH_H(ht), entry), (SCM) Tcl_GetHashValue(entry)), result); break; case hash_string: { char *s = Tcl_GetHashKey(HASH_H(ht), entry); result = Cons(Apply2(proc, STk_makestring(s), (SCM) Tcl_GetHashValue(entry)), result); } break; case hash_comp: { SCM val; for (val=(SCM) Tcl_GetHashValue(entry); !NULLP(val); val = CDR(val)) result = Cons(Apply2(proc, CAR(CAR(val)), CDR(CAR(val))), result); } } } return result; } static PRIMITIVE hash_table_stats(SCM ht) { Tcl_HashSearch search; char *s; if (!HASHP(ht)) Err("hash-table-stats: bad hash table", ht); /* * There is a bug in the Tcl/hash module. Tcl_HashStats makes a division by 0 * if the hash table is empty. */ if (Tcl_FirstHashEntry(HASH_H(ht), &search)) { s = Tcl_HashStats(HASH_H(ht)); Puts(s, STk_curr_eport); Putc('\n', STk_curr_eport); free(s); } else Puts("Empty hash table\n", STk_curr_eport); return UNDEFINED; } static void free_hash_table(SCM ht) { Tcl_DeleteHashTable(HASH_H(ht)); free(HASH_H(ht)); free(HASH(ht)); } static void mark_hash_table(SCM ht) { Tcl_HashEntry *entry; Tcl_HashSearch search; /* Mark information stored in the hash structure */ STk_gc_mark(HASH_COMP(ht)); STk_gc_mark(HASH_SXHASH(ht)); /* Mark the content of the Tcl hash table */ for (entry = Tcl_FirstHashEntry(HASH_H(ht), &search); entry; entry = Tcl_NextHashEntry(&search)) { /* The only cas where the must be marked is if the hash table is * an eq? one. In effect, * hash_eq table: the key is in the Tcl key field and must be marked * hash_string table: Tcl hashtable has made a copy in the entry * hash_comp: the key is in the value field which will be always marked */ if (HASH_TYPE(ht) == hash_eq) STk_gc_mark((SCM) Tcl_GetHashKey(HASH_H(ht), entry)); /* and mark the value in all cases */ STk_gc_mark((SCM) Tcl_GetHashValue(entry)); } } /******************************************************************************/ PRIMITIVE STk_init_hash(void) { tc_hash = STk_add_new_type(&hash_table_type); STk_add_new_primitive("make-hash-table", tc_lsubr, make_hash_table); STk_add_new_primitive("hash-table?", tc_subr_1, hash_table_p); STk_add_new_primitive("hash-table-hash", tc_subr_1, hash_table_hash); STk_add_new_primitive("hash-table-put!", tc_subr_3, hash_table_put); STk_add_new_primitive("hash-table-get", tc_subr_2_or_3, hash_table_get); STk_add_new_primitive("hash-table-remove!", tc_subr_2, hash_table_remove); STk_add_new_primitive("hash-table-for-each", tc_subr_2, hash_table_for_each); STk_add_new_primitive("hash-table-map", tc_subr_2, hash_table_map); STk_add_new_primitive("hash-table-stats", tc_subr_1, hash_table_stats); return UNDEFINED; }