/* * * h a s h . c -- Hash Tables * * Copyright © 1993-1997 Erick Gallesio - I3S-CNRS/ESSI * * * Permission to use, copy, and/or distribute this software and its * documentation for any purpose and without fee is hereby granted, provided * that both the above copyright notice and this permission notice appear in * all copies and derived works. Fees for distribution or use of this * software or derived works may only be charged with express written * permission of the copyright holder. * This software is provided ``as is'' without express or implied warranty. * * This software is a derivative work of other copyrighted softwares; the * copyright notices of these softwares are placed in the file COPYRIGHTS * * * Author: Erick Gallesio [eg@kaolin.unice.fr] * Creation date: 17-Jan-1994 17:49 * Last file update: 31-Dec-1997 15:36 */ #include static void free_hash_table(SCM ht); static void mark_hash_table(SCM ht); static PRIMITIVE hash_table_hash(SCM obj); /**** Definitions for new type tc_hash ****/ static int tc_hash; static STk_extended_scheme_type hash_table_type = { "hash-table", /* name */ 0, /* is_procp */ mark_hash_table, /* gc_mark_fct */ free_hash_table, /* gc_sweep_fct */ NULL, /* apply_fct */ NULL, /* display_fct */ NULL, /* compare_fct */ }; typedef enum {hash_eq, hash_string, hash_comp} hash_type; typedef struct { struct Tcl_HashTable *h; hash_type type; SCM comparison; /* unused if not a comparison hash table */ SCM sxhash_fct; } Scheme_hash_table; #define HASH(x) ((Scheme_hash_table *) ((x)->storage_as.extension.data)) #define LHASH(x) ((x)->storage_as.extension.data) #define HASHP(x) (TYPEP(x, tc_hash)) #define HASH_COMP(x) (HASH(x)->comparison) #define HASH_SXHASH(x) (HASH(x)->sxhash_fct) #define HASH_H(x) (HASH(x)->h) #define HASH_TYPE(x) (HASH(x)->type) #define HASH_WORD(h1, h2) ((((h1) << 4) + (h1)) ^ (h2)) /* Good repartition ? */ /* This function is duplicated from tclHash.c * It would be possible to export this function form tchHash.c, but I prefer * to avoid modifications, as far as possible, from files in the Tcl directory * Given the size of this function, a duplication should not be a problem */ static unsigned long HashString(register char *string) { register unsigned long result; register int c; result = 0; for ( ; ; ) { c = *string++; if (c == 0) break; result += (result<<3) + c; } return result; } /* * sxhash permits to calculate a "universal" hash value a` la CL sxhash * function * */ unsigned long sxhash(SCM obj) { register unsigned long h; register SCM tmp; register int i; switch (TYPE(obj)) { case tc_cons: h = sxhash(CAR(obj)); for(tmp=CDR(obj); CONSP(tmp); tmp=CDR(tmp)) h = HASH_WORD(h, sxhash(CAR(tmp))); h = HASH_WORD(h, sxhash(tmp)); return h; case tc_integer: case tc_bignum: return (unsigned long) STk_integer_value_no_overflow(obj); case tc_flonum: return (unsigned long) FLONM(obj); case tc_symbol: return HashString(PNAME(obj)); case tc_keyword: return HashString(KEYVAL(obj)); case tc_string: return HashString(CHARS(obj)); case tc_vector: h = 0; for (i=VECTSIZE(obj)-1; i >= 0; i--) h = HASH_WORD(h, sxhash(VECT(obj)[i])); return h; default: /* Either a small constant or a complex type (STklos * object, user defined type, hashtable...). In this * case we return the type of the object. This is very * inneficient but it should be rare to use a structured * object as a key. Note that returning the type * works even if we have not COMPACT_SMALL_CST (as far as * I know, nobody undefine it). In this case SMALL_CSTP * always return FALSE. */ return (SMALL_CSTP(obj)) ? (unsigned long) obj: (unsigned long) TYPE(obj); } } /* * find_key: equivalent to the assoc function except that it works * with any comparison. If no association is found, find_key returns * NULL */ static SCM find_key(SCM obj, SCM alist, SCM comparison) { register SCM l, tmp; for(l=alist; !NULLP(l); l=CDR(l)) { tmp = CAR(l); if (STk_apply(comparison, LIST2(obj, CAR(tmp))) != Ntruth) return tmp; } return NULL; } /* * remove_key: remove the given key from the association list. * The key is compared with the ``comparison'' function */ static SCM remove_key(SCM obj, SCM alist, SCM comparison) { register SCM l, tmp; for(l=NIL; !NULLP(alist); alist=CDR(alist)) { if (STk_apply(comparison, LIST2(obj, CAR(CAR(alist)))) == Ntruth) l = Cons(CAR(alist), l); } return l; } /* * The_func returns a cell which represent the ORIGINAL subr given as * a string. Don't use intern to avoid problems if "eq?" or * "hash-table-hash" have been redefined by the user. This is probably * not useful, but ... */ static SCM the_func(char *s) { SCM z; if (strcmp(s, "eq?") == 0) { NEWCELL(z, tc_subr_2); z->storage_as.subr0.f = (SCM (*)()) STk_eq; } else { /* s is "hash-table-hash" */ NEWCELL(z, tc_subr_1); z->storage_as.subr0.f = (SCM (*)()) sxhash; } z->storage_as.subr0.name = s; return z; } /******************************************************************************/ /* * STk_sxhash: the Scheme version of sxhash */ static PRIMITIVE hash_table_hash(SCM obj) { long int x = sxhash(obj); return STk_makeinteger((x < 0) ? -x : x); } static PRIMITIVE make_hash_table(SCM l, int len) { SCM sxhash, compar, z; hash_type type=hash_comp; switch (len) { case 0: compar = the_func("eq?"); sxhash = the_func("hash-table-hash"); break; case 1: compar = CAR(l); sxhash = the_func("hash-table-hash"); break; case 2: compar = CAR(l); sxhash = CAR(CDR(l)); break; default: STk_err("make-hash-table: bad list of parameters", l); } if (STk_procedurep(compar) == Ntruth) STk_err("make-hash-table: bad comparison function", compar); if (STk_procedurep(sxhash) == Ntruth) STk_err("make-hash-table: bad hash function", sxhash); if (TYPEP(compar, tc_subr_2)) /* * We have a procedure. See if it is 'eq?' or 'string?'. * If so, we implement the hash table in the most efficient * way. Otherwise, we will use another method (i.e. find a key * code for each object with the sxhash function and strore * each element with this key in a A-list, the A-list for a * given key is is found using the Tcl Hash functions */ if ((void *) SUBRF(compar) == (void *) STk_eq) type = hash_eq; else if ((void *) SUBRF(compar) == (void *) STk_streq) type = hash_string; /* Make a new hash table object */ NEWCELL(z, tc_hash); LHASH(z) = (Scheme_hash_table *)must_malloc(sizeof(Scheme_hash_table)); HASH(z)->h = must_malloc(sizeof(Tcl_HashTable)); HASH(z)->type = type; HASH(z)->sxhash_fct = sxhash; HASH(z)->comparison = compar; Tcl_InitHashTable(HASH_H(z), (type == hash_string)? TCL_STRING_KEYS : TCL_ONE_WORD_KEYS); return z; } static PRIMITIVE hash_table_p(SCM obj) { return HASHP(obj) ? Truth: Ntruth; } static PRIMITIVE hash_table_put(SCM ht, SCM key, SCM val) { Tcl_HashEntry *entry; SCM index; int new; if (!HASHP(ht)) Err("hash-table-put!: bad hash table", ht); switch (HASH_TYPE(ht)) { case hash_eq: entry = Tcl_CreateHashEntry(HASH_H(ht), (char *) key, &new); Tcl_SetHashValue(entry, val); break; case hash_string: if (!STRINGP(key)) Err("hash-table-put!: bad string", key); entry = Tcl_CreateHashEntry(HASH_H(ht), CHARS(key), &new); Tcl_SetHashValue(entry, val); break; case hash_comp: index = Apply(HASH_SXHASH(ht), LIST1(key)); entry = Tcl_CreateHashEntry(HASH_H(ht), (char *) index, &new); Tcl_SetHashValue(entry, NIL); /* To avoid GC problems in further allocations * Thanks to S. Calvo */ if (new) Tcl_SetHashValue(entry, LIST1(Cons(key, val))); else { SCM old = (SCM) Tcl_GetHashValue(entry); SCM tmp = find_key(key, old, HASH_COMP(ht)); if (tmp) { CAR(tmp) = key; CDR(tmp) = val; } else Tcl_SetHashValue(entry, Cons(Cons(key, val), old)); } break; } return UNDEFINED; } static PRIMITIVE hash_table_get(SCM ht, SCM key, SCM default_value) { Tcl_HashEntry *entry; SCM index; if (!HASHP(ht)) Err("hash-table-get: bad hash table", ht); switch (HASH_TYPE(ht)) { case hash_eq: if (entry=Tcl_FindHashEntry(HASH_H(ht), (char *) key)) /* Key already in hash table */ return (SCM) Tcl_GetHashValue(entry); break; case hash_string: if (!STRINGP(key)) Err("hash-table-get: bad string", key); if (entry=Tcl_FindHashEntry(HASH_H(ht), CHARS(key))) /* Key already in hash table */ return (SCM) Tcl_GetHashValue(entry); break; case hash_comp: index = Apply(HASH_SXHASH(ht), LIST1(key)); if (entry=Tcl_FindHashEntry(HASH_H(ht), (char *) index)) { SCM tmp, val = (SCM) Tcl_GetHashValue(entry); if (tmp = find_key(key, val, HASH_COMP(ht))) return CDR(tmp); } break; } /* If we are here, key was not present in table */ if (default_value == UNBOUND) Err("hash-table-get: entry not defined for this key", key); return default_value; } static PRIMITIVE hash_table_remove(SCM ht, SCM key) { Tcl_HashEntry *entry; SCM index; if (!HASHP(ht)) Err("hash-table-remove!: bad hash table", ht); switch (HASH_TYPE(ht)) { case hash_eq: if (entry=Tcl_FindHashEntry(HASH_H(ht), (char *) key)) /* Key alrady in hash table */ Tcl_DeleteHashEntry(entry); break; case hash_string: if (!STRINGP(key)) Err("hash-table-remove: bad string", key); if (entry=Tcl_FindHashEntry(HASH_H(ht), CHARS(key))) /* Key alrady in hash table */ Tcl_DeleteHashEntry(entry); break; case hash_comp: index = Apply(HASH_SXHASH(ht), LIST1(key)); if (entry=Tcl_FindHashEntry(HASH_H(ht), (char *) index)) { SCM tmp, val = (SCM) Tcl_GetHashValue(entry); tmp = remove_key(key, val, HASH_COMP(ht)); if (NULLP(tmp)) /* This was the only entry for this key. We can delete the entry */ Tcl_DeleteHashEntry(entry); else Tcl_SetHashValue(entry, tmp); } break; } return UNDEFINED; } static PRIMITIVE hash_table_for_each(SCM ht, SCM proc) { Tcl_HashEntry *entry; Tcl_HashSearch search; if (!HASHP(ht)) Err("hash-table-for-each: bad hash table", ht); if (STk_procedurep(proc)==Ntruth) Err("hash-table-for-each: bad procedure", proc); for (entry = Tcl_FirstHashEntry(HASH_H(ht), &search); entry; entry = Tcl_NextHashEntry(&search)) { switch (HASH_TYPE(ht)) { case hash_eq: Apply(proc, LIST2((SCM) Tcl_GetHashKey(HASH_H(ht), entry), (SCM) Tcl_GetHashValue(entry))); break; case hash_string: { char *s = Tcl_GetHashKey(HASH_H(ht), entry); Apply(proc, LIST2(STk_makestring(s), (SCM) Tcl_GetHashValue(entry))); } break; case hash_comp: { SCM val; for (val=(SCM) Tcl_GetHashValue(entry); !NULLP(val); val = CDR(val)) Apply(proc, LIST2(CAR(CAR(val)), CDR(CAR(val)))); } } } return UNDEFINED; } static PRIMITIVE hash_table_map(SCM ht, SCM proc) { Tcl_HashEntry *entry; Tcl_HashSearch search; SCM result = NIL; if (!HASHP(ht)) Err("hash-table-map: bad hash table", ht); if (STk_procedurep(proc)==Ntruth) Err("hash-table-map: bad procedure", proc); for (entry = Tcl_FirstHashEntry(HASH_H(ht), &search); entry; entry = Tcl_NextHashEntry(&search)) { switch (HASH_TYPE(ht)) { case hash_eq: result = Cons(Apply(proc, LIST2((SCM)Tcl_GetHashKey(HASH_H(ht), entry), (SCM) Tcl_GetHashValue(entry))), result); break; case hash_string: { char *s = Tcl_GetHashKey(HASH_H(ht), entry); result = Cons(Apply(proc, LIST2(STk_makestring(s), (SCM) Tcl_GetHashValue(entry))), result); } break; case hash_comp: { SCM val; for (val=(SCM) Tcl_GetHashValue(entry); !NULLP(val); val = CDR(val)) result = Cons(Apply(proc, LIST2(CAR(CAR(val)), CDR(CAR(val)))), result); } } } return result; } static PRIMITIVE hash_table_stats(SCM ht) { Tcl_HashSearch search; char *s; if (!HASHP(ht)) Err("hash-table-stats: bad hash table", ht); /* * There is a bug in the Tcl/hash module. Tcl_HashStats makes a division by 0 * if the hash table is empty. */ if (Tcl_FirstHashEntry(HASH_H(ht), &search)) { s = Tcl_HashStats(HASH_H(ht)); fprintf(STk_stderr, "%s\n", s); free(s); } else fprintf(STk_stderr, "Empty hash table\n"); return UNDEFINED; } static void free_hash_table(SCM ht) { Tcl_DeleteHashTable(HASH_H(ht)); free(HASH_H(ht)); free(HASH(ht)); } static void mark_hash_table(SCM ht) { Tcl_HashEntry *entry; Tcl_HashSearch search; /* Mark information stored in the hash structure */ STk_gc_mark(HASH_COMP(ht)); STk_gc_mark(HASH_SXHASH(ht)); /* Mark the content of the Tcl hash table */ for (entry = Tcl_FirstHashEntry(HASH_H(ht), &search); entry; entry = Tcl_NextHashEntry(&search)) { /* The only cas where the must be marked is if the hash table is * an eq? one. In effect, * hash_eq table: the key is in the Tcl key field and must be marked * hash_string table: Tcl hashtable has made a copy in the entry * hash_comp: the key is in the value field which will be always marked */ if (HASH_TYPE(ht) == hash_eq) STk_gc_mark((SCM) Tcl_GetHashKey(HASH_H(ht), entry)); /* and mark the value in all cases */ STk_gc_mark((SCM) Tcl_GetHashValue(entry)); } } /******************************************************************************/ PRIMITIVE STk_init_hash(void) { tc_hash = STk_add_new_type(&hash_table_type); STk_add_new_primitive("make-hash-table", tc_lsubr, make_hash_table); STk_add_new_primitive("hash-table?", tc_subr_1, hash_table_p); STk_add_new_primitive("hash-table-hash", tc_subr_1, hash_table_hash); STk_add_new_primitive("hash-table-put!", tc_subr_3, hash_table_put); STk_add_new_primitive("hash-table-get", tc_subr_2_or_3, hash_table_get); STk_add_new_primitive("hash-table-remove!", tc_subr_2, hash_table_remove); STk_add_new_primitive("hash-table-for-each", tc_subr_2, hash_table_for_each); STk_add_new_primitive("hash-table-map", tc_subr_2, hash_table_map); STk_add_new_primitive("hash-table-stats", tc_subr_1, hash_table_stats); return UNDEFINED; }