From e7e5677d51c0c3bf605ecf35ca4e0ab8af3c90bf Mon Sep 17 00:00:00 2001 From: JeffBezanson Date: Sun, 23 Nov 2008 07:12:37 +0000 Subject: [PATCH] support reading hex float literals better text representation of infs and nans removing construction of low-level numeric types directly from strings adding hash function corresponding to equal better way to initialize builtins moving advanced bitvector ops to separate compilation unit rearranging hash table code so it can be specialized for different comparison functions one good way to bloat a piece of software is to add several ASCII pictures of the mona lisa --- femtolisp/ascii-mona-lisa | 47 ++++ femtolisp/ascii-mona-lisa-2 | 71 ++++++ femtolisp/builtins.c | 66 ++--- femtolisp/cvalues.c | 40 +-- femtolisp/equal.c | 91 ++++++- femtolisp/flisp.c | 54 ++-- femtolisp/flisp.h | 14 +- femtolisp/print.c | 34 ++- femtolisp/read.c | 54 ++-- femtolisp/string.c | 30 ++- femtolisp/table.c | 8 +- femtolisp/todo | 30 ++- llt/Makefile | 2 +- llt/bitvector-ops.c | 485 ++++++++++++++++++++++++++++++++++++ llt/bitvector.c | 475 ----------------------------------- llt/dtypes.h | 9 + llt/hashing.c | 27 ++ llt/hashing.h | 1 + llt/htable.c | 48 ++++ llt/htable.h | 19 ++ llt/htable.inc | 140 +++++++++++ llt/htableh.inc | 30 +++ llt/ptrhash.c | 181 +------------- llt/ptrhash.h | 40 +-- 24 files changed, 1142 insertions(+), 854 deletions(-) create mode 100644 femtolisp/ascii-mona-lisa create mode 100644 femtolisp/ascii-mona-lisa-2 create mode 100644 llt/bitvector-ops.c create mode 100644 llt/htable.c create mode 100644 llt/htable.h create mode 100644 llt/htable.inc create mode 100644 llt/htableh.inc diff --git a/femtolisp/ascii-mona-lisa b/femtolisp/ascii-mona-lisa new file mode 100644 index 0000000..e3e822e --- /dev/null +++ b/femtolisp/ascii-mona-lisa @@ -0,0 +1,47 @@ +iIYVVVVXVVVVVVVVVYVYVYYVYYYYIIIIYYYIYVVVYYYYYYYYYVVYVVVVXVVVVVYI+. +tYVXXXXXXVXXXXVVVYVVVVVVVVVVVVYVVVVVVVVVVVVVVVVVXXXXXVXXXXXXXVVYi. +iYXRXRRRXXXXXXXXXXXVVXVXVVVVVVVVXXXVXVVXXXXXXXXXXXXXXRRRRRRRRRXVi. +tVRRRRRRRRRRRRRRRXRXXXXXXXXXXXXXXRRXXXXRRRRXXXXXXXRRRRRRRRRRRRXV+. +tVRRBBBRMBRRRRRRRRRXXRRRRRXt=+;;;;;==iVXRRRRXXXXRRRRRRRRMMBRRRRXi, +tVRRBMBBMMBBBBBMBBRBBBRBX++=++;;;;;;:;;;IRRRRXXRRRBBBBBBMMBBBRRXi, +iVRMMMMMMMMMMMMMMBRBBMMV==iIVYIi=;;;;:::;;XRRRRRRBBMMMMMMMMBBRRXi. +iVRMMMMMMMMMMMMMMMMMMMY;IBWWWWMMXYi=;:::::;RBBBMMMMMMMMMMMMMMBBXi, ++VRMMRBMMMMMMMMMMMMMMY+;VMMMMMMMRXIi=;:::::=VVXXXRRRMMMMMMMMBBMXi; +=tYYVVVXRRRXXRBMMMMMV+;=RBBMMMXVXXVYt;::::::ttYYVYVVRMMMMMMBXXVI+= +;=tIYYVYYYYYYVVVMMMBt=;;+i=IBi+t==;;i;::::::+iitIIttYRMMMMMRXVVI=; +;=IIIIYYYIIIIttIYItIt;;=VVYXBIVRXVVXI;::::::;+iitttttVMMBRRRVVVI+, +;+++tttIttttiiii+i++==;;RMMMBXXMMMXI+;::::::;+ittttitYVXVYYIYVIi;; +;===iiittiiIitiii++;;;;:IVRVi=iBXVIi;::::::::;==+++++iiittii+++=;; +;;==+iiiiiiiiii+++=;;;;;;VYVIiiiVVt+;::::::::;++++++++++iti++++=;; +;;=++iiii+i+++++iii==;;;::tXYIIYIi+=;:::::,::;+++++++++++++++++=;; +;;;+==+ii+++++iiiiit=;;:::::=====;;;::::::::::+++i+++++++++i+++;;; +;;;==+=+iiiiitttIIII+;;;:,::,;;;;:;=;;;::,::::=++++++++==++++++;;; +:;====+tittiiittttti+;;::::,:=Ytiiiiti=;:::::,:;;==ii+ittItii+==;; +;;+iiittIti+ii;;===;;:;::::;+IVXVVVVVVt;;;;;::::;;===;+IIiiti=;;;; +;=++++iIti+ii+=;;;=;:::;;+VXBMMBBBBBBXY=;=;;:::::;=iYVIIttii++;;;; +;;++iiiItttIi+++=;;:::;=iBMMMMMMMMMMMXI==;;,::;;:;;=+itIttIIti+;;; +;=+++++i+tYIIiii;:,::;itXMMMMMMMMMMMBXti==;:;++=;:::::;=+iittti+;; +;;+ii+ii+iitiIi;::::;iXBMMMMMWWWWWMMBXti+ii=;::::,,,,:::=;==+tI+;; +;;iiiitItttti;:::;::=+itYXXMWWWWWWMBYt+;;::,,,,,,,,,,,,,:==;==;;;; +:;=iIIIttIt+:;:::;;;==;+=+iiittttti+;;:,:,,,,::,,,,,,,,:::;=;==::; +;::=+ittiii=;:::::;;;:;:;=++==;;==;:,,,,,,:;::::,,,,,,,,::;==;;::; +:::;+iiiii=;::::,:;:::::;;:;;::;:::,,,,,,,:::;=;;;:,,,,,:::;;::::; +:;;iIIIIII=;:::,:::::::,::::,:::,,,,,,,,,,,:;;=;:,,,,,,::::;=;:::; +:;==++ii+;;;:::::::::::,,,,,,::,,,,,,,,,,,::::,,,,,,,,,,:,:::::::; +::;;=+=;;;:::;;::,,,,,,,,,,,,,,,,,,,,,,,,,:,,,,,,,,,,,,,,,,,:::::; +::;=;;;:;:::;;;;::,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,::,,::::; +:;;:;::::::,::,,:,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,:::; +:::::::::::;;;:,,,,,,,,,,,,,...,...,,,.,,,,,,,,,,,,.,,,,,,,,,,,,:; +::::::::;=;;;;;::,,,,,,,,,,,.......,...,,,,,,,,,,,,.,,,,,,,,,,,,,; +:::::,,:;=;;;;;;;iVXXXVt+:,,....,,,,....,.,,,,,,,.,.....,,,,,,,,:; +:,,::,,:::;;;;;;=IVVVXXXXVXVt:,,,,,..,..,,,,.,,,,,..,.,,,,,,,,,,,; +::,::,,,:,:::::,::;=iIYVXVVVVIYIi;,,.,.,,,::,,,,,,,,,,,,,,,,,,,,,. +:,,,,,,,,,,,,,,,,::;+itIIIIIIi:;;i++=;;;;;;;;;::,,,...,,..,,,,,,,. +:,,,,,,,,,,,,,,=iitVYi++iitt==it;;:;;;;::;;::::,,,......,,,,,,,::. +::,,,,,,,,,,,,,++iiIVIi=;;=;+i;:;+:::,,,,,,,,,,,,,.....,,,,,,,,::, +,,,,,,,,,,,,,,,;=+it=:::,,,,,,,,,,.,......,,.,..........,,,,,,,,:: +:,,,,,,,,,,,,,,,,:=:,,,,,,,,,,,,,,......................,.,,.,.,,: +:,,,,,,,,,,,,,,,,,:,,,,,,,,,,..,........................,..,...,,: +,,,,,,,,,,,,,,,,,,,.....................................,.......,, +,,,,,,,,,.,,,,,,,...............................................,, +itittiiiii+=++=;;=iiiiiiittiiiiii+iii===;++iiitiiiiiii+=====+ii=+i diff --git a/femtolisp/ascii-mona-lisa-2 b/femtolisp/ascii-mona-lisa-2 new file mode 100644 index 0000000..78e5519 --- /dev/null +++ b/femtolisp/ascii-mona-lisa-2 @@ -0,0 +1,71 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!>'''''' !!!!! +!!!!! ?$$$$$$$$$$$$??$c`$$$$$$$$$$$?>' `!!!! +!!!!! `?$$$$$$I7?"" ,$$$$$$$$$?>>' !!!! +!!!!!. <>'' `!!! +!!!!!! '' `!!! +!!!!!! $$$hccccccccc= cc$$$$$$$>>' !!! +!!!!! `?$$$$$$F"""" `"$$$$$>>>'' `!! +!!!!! "?$$$$$cccccc$$$$??>>>>' !! +!!!!> "$$$$$$$$$$$$$F>>>>'' `! +!!!!! "$$$$$$$$???>''' ! +!!!!!> `""""" ` +!!!!!!; . ` +!!!!!!! ?h. +!!!!!!!! $$c, +!!!!!!!!> ?$$$h. .,c +!!!!!!!!! $$$$$$$$$hc,.,,cc$$$$$ +!!!!!!!!! .,zcc$$$$$$$$$$$$$$$$$$$$$$ +!!!!!!!!! .z$$$$$$$$$$$$$$$$$$$$$$$$$$$$ +!!!!!!!!! ,d$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ . +!!!!!!!!! ,d$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ !! +!!!!!!!!! ,d$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ ,!' +!!!!!!!!> c$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$. !' +!!!!!!'' ,d$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$> ' +!!!'' z$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$> +!' ,$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$> .. + z$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' ;!!!!''` + $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$F ,;;!'`' .'' + <$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$> ,;'`' ,; + `$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$F -' ,;!!' + "?$$$$$$$$$$?$$$$$$$$$$$$$$$$$$$$$$$$$$F . ""??$$$?C3$$$$$$$$$$$$$$$$$$$$$$$$"" ;!''' !!! + ;!!!!;, `"''""????$$$$$$$$$$$$$$$$"" ,;-'' ',! + ;!!!!;,;, .. ' . ' ' + !!' ,;!!! ;'`!!!!!!!!;!!!!!; . >' .'' ; + !!' ;!!'!';! !! !!!!!!!!!!!!! ' -' + ;! ;> + !' ; !! ' + ' ;! > ! ' + ' +by Allen Mullen diff --git a/femtolisp/builtins.c b/femtolisp/builtins.c index b76f78d..88a5cdd 100644 --- a/femtolisp/builtins.c +++ b/femtolisp/builtins.c @@ -351,38 +351,42 @@ value_t fl_randn(value_t *args, u_int32_t nargs) extern void stringfuncs_init(); +static builtinspec_t builtin_info[] = { + { "set-syntax", fl_setsyntax }, + { "symbol-syntax", fl_symbolsyntax }, + { "syntax-environment", fl_syntax_env }, + { "environment", fl_global_env }, + { "constantp", fl_constantp }, + + { "print", fl_print }, + { "princ", fl_princ }, + { "read", fl_read }, + { "load", fl_load }, + { "exit", fl_exit }, + { "fixnum", fl_fixnum }, + { "truncate", fl_truncate }, + + { "vector.alloc", fl_vector_alloc }, + + { "time.now", fl_time_now }, + { "time.string", fl_time_string }, + + { "rand", fl_rand }, + { "rand.uint32", fl_rand32 }, + { "rand.uint64", fl_rand64 }, + { "rand.double", fl_randd }, + { "rand.float", fl_randf }, + { "randn", fl_randn }, + + { "path.cwd", fl_path_cwd }, + + { "os.getenv", fl_os_getenv }, + { "os.setenv", fl_os_setenv }, + { NULL, NULL } +}; + void builtins_init() { - set(symbol("set-syntax"), guestfunc(fl_setsyntax)); - set(symbol("symbol-syntax"), guestfunc(fl_symbolsyntax)); - set(symbol("syntax-environment"), guestfunc(fl_syntax_env)); - set(symbol("environment"), guestfunc(fl_global_env)); - set(symbol("constantp"), guestfunc(fl_constantp)); - - set(symbol("print"), guestfunc(fl_print)); - set(symbol("princ"), guestfunc(fl_princ)); - set(symbol("read"), guestfunc(fl_read)); - set(symbol("load"), guestfunc(fl_load)); - set(symbol("exit"), guestfunc(fl_exit)); - set(symbol("fixnum"), guestfunc(fl_fixnum)); - set(symbol("truncate"), guestfunc(fl_truncate)); - - set(symbol("vector.alloc"), guestfunc(fl_vector_alloc)); - - set(symbol("time.now"), guestfunc(fl_time_now)); - set(symbol("time.string"), guestfunc(fl_time_string)); - - set(symbol("rand"), guestfunc(fl_rand)); - set(symbol("rand.uint32"), guestfunc(fl_rand32)); - set(symbol("rand.uint64"), guestfunc(fl_rand64)); - set(symbol("rand.double"), guestfunc(fl_randd)); - set(symbol("rand.float"), guestfunc(fl_randf)); - set(symbol("randn"), guestfunc(fl_randn)); - - set(symbol("path.cwd"), guestfunc(fl_path_cwd)); - - set(symbol("os.getenv"), guestfunc(fl_os_getenv)); - set(symbol("os.setenv"), guestfunc(fl_os_setenv)); - + assign_global_builtins(builtin_info); stringfuncs_init(); } diff --git a/femtolisp/cvalues.c b/femtolisp/cvalues.c index 1d0c49e..cd303df 100644 --- a/femtolisp/cvalues.c +++ b/femtolisp/cvalues.c @@ -219,34 +219,6 @@ static void cv_pin(cvalue_t *cv) } */ -static int64_t strtoi64(char *str, char *fname) -{ - char *pend; - int64_t i; - errno = 0; - i = strtoll(str, &pend, 0); - if (*pend != '\0' || errno) lerror(ArgError, "%s: invalid string", fname); - return i; -} -static uint64_t strtoui64(char *str, char *fname) -{ - char *pend; - uint64_t i; - errno = 0; - i = strtoull(str, &pend, 0); - if (*pend != '\0' || errno) lerror(ArgError, "%s: invalid string", fname); - return i; -} -static double strtodouble(char *str, char *fname) -{ - char *pend; - double d; - errno = 0; - d = strtod(str, &pend); - if (*pend != '\0' || errno) lerror(ArgError, "%s: invalid string", fname); - return d; -} - #define num_ctor(typenam, cnvt, tag, fromstr) \ static void cvalue_##typenam##_init(value_t type, value_t arg, \ void *dest, void *data) \ @@ -259,18 +231,10 @@ static void cvalue_##typenam##_init(value_t type, value_t arg, \ else if (iscvalue(arg)) { \ cvalue_t *cv = (cvalue_t*)ptr(arg); \ void *p = cv_data(cv); \ - if (valid_numtype(cv_numtype(cv))) { \ + if (valid_numtype(cv_numtype(cv))) \ n = (typenam##_t)conv_to_##cnvt(p, cv_numtype(cv)); \ - } \ - else if (cv->flags.cstring) { \ - n = fromstr(p, #typenam); \ - } \ - else if (cv_len(cv) == sizeof(typenam##_t)) { \ - n = *(typenam##_t*)p; \ - } \ - else { \ + else \ goto cnvt_error; \ - } \ } \ else { \ goto cnvt_error; \ diff --git a/femtolisp/equal.c b/femtolisp/equal.c index 2fd889e..959817e 100644 --- a/femtolisp/equal.c +++ b/femtolisp/equal.c @@ -7,20 +7,23 @@ #include "llt.h" #include "flisp.h" +#define BOUNDED_COMPARE_BOUND 2048 +#define BOUNDED_HASH_BOUND 4096 + // comparable tag #define cmptag(v) (isfixnum(v) ? TAG_NUM : tag(v)) -static value_t eq_class(ptrhash_t *table, value_t key) +static value_t eq_class(htable_t *table, value_t key) { value_t c = (value_t)ptrhash_get(table, (void*)key); - if (c == (value_t)PH_NOTFOUND) + if (c == (value_t)HT_NOTFOUND) return NIL; if (c == key) return c; return eq_class(table, c); } -static void eq_union(ptrhash_t *table, value_t a, value_t b, +static void eq_union(htable_t *table, value_t a, value_t b, value_t c, value_t cb) { value_t ca = (c==NIL ? a : c); @@ -51,7 +54,7 @@ static value_t compare_num_cvalue(value_t a, value_t b, int eq) } static value_t bounded_compare(value_t a, value_t b, int bound, int eq); -static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq); +static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq); static value_t bounded_vector_compare(value_t a, value_t b, int bound, int eq) { @@ -138,7 +141,7 @@ static value_t bounded_compare(value_t a, value_t b, int bound, int eq) return (taga < tagb) ? fixnum(-1) : fixnum(1); } -static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table, +static value_t cyc_vector_compare(value_t a, value_t b, htable_t *table, int eq) { size_t la = vector_size(a); @@ -186,7 +189,7 @@ static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table, return fixnum(0); } -static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq) +static value_t cyc_compare(value_t a, value_t b, htable_t *table, int eq) { if (a==b) return fixnum(0); @@ -234,19 +237,19 @@ static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq) return bounded_compare(a, b, 1, eq); } -static ptrhash_t equal_eq_hashtable; +static htable_t equal_eq_hashtable; void comparehash_init() { - ptrhash_new(&equal_eq_hashtable, 512); + htable_new(&equal_eq_hashtable, 512); } // 'eq' means unordered comparison is sufficient static value_t compare_(value_t a, value_t b, int eq) { - value_t guess = bounded_compare(a, b, 2048, eq); + value_t guess = bounded_compare(a, b, BOUNDED_COMPARE_BOUND, eq); if (guess == NIL) { guess = cyc_compare(a, b, &equal_eq_hashtable, eq); - ptrhash_reset(&equal_eq_hashtable, 512); + htable_reset(&equal_eq_hashtable, 512); } return guess; } @@ -270,3 +273,71 @@ value_t equal(value_t a, value_t b) * preallocate hash table and call reset() instead of new/free * less redundant tag checking, 3-bit tags */ + +#ifdef BITS64 +#define MIX(a, b) int64hash((int64_t)(a) ^ (int64_t)(b)); +#define doublehash(a) int64hash(a) +#else +#define MIX(a, b) int64to32hash(((int64_t)(a))<<32 | ((int64_t)(b))) +#define doublehash(a) int64to32hash(a) +#endif + +static uptrint_t bounded_hash(value_t a, int bound) +{ + double d; + numerictype_t nt; + size_t i, len; + cvalue_t *cv; + void *data; + if (bound <= 0) return 0; + uptrint_t h = 0; + int bb, tg = tag(a); + switch(tg) { + case TAG_NUM : + case TAG_NUM1: + d = numval(a); + return doublehash(*(int64_t*)&d); + case TAG_BUILTIN: + return inthash(a); + case TAG_SYM: + return ((symbol_t*)ptr(a))->hash; + case TAG_CVALUE: + cv = (cvalue_t*)ptr(a); + data = cv_data(cv); + if (valid_numtype(nt=cv_numtype(cv))) { + d = conv_to_double(data, nt); + if (d==0) d = 0.0; // normalize -0 + return doublehash(*(int64_t*)&d); + } + else { + return memhash(data, cv_len(cv)); + } + case TAG_VECTOR: + len = vector_size(a); + for(i=0; i < len; i++) { + h = MIX(h, bounded_hash(vector_elt(a,i), bound-1)); + } + return h; + case TAG_CONS: + bb = BOUNDED_HASH_BOUND; + do { + h = MIX(h, bounded_hash(car_(a), bound-1)+1); + bb--; + if (bb <= 0) return h; + a = cdr_(a); + } while (iscons(a)); + return MIX(h, bounded_hash(a, bound-1)+1); + } + return 0; +} + +uptrint_t hash(value_t a) +{ + return bounded_hash(a, BOUNDED_HASH_BOUND); +} + +value_t fl_hash(value_t *args, u_int32_t nargs) +{ + argcount("hash", nargs, 1); + return fixnum(hash(args[0])); +} diff --git a/femtolisp/flisp.c b/femtolisp/flisp.c index 9726a0b..629c826 100644 --- a/femtolisp/flisp.c +++ b/femtolisp/flisp.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "llt.h" #include "flisp.h" @@ -61,11 +62,9 @@ static char *builtin_names[] = "vector", "aref", "aset", "length", "assoc", "compare", "for" }; -static char *stack_bottom; -#define PROCESS_STACK_SIZE (2*1024*1024) #define N_STACK 98304 value_t Stack[N_STACK]; -u_int32_t SP = 0; +uint32_t SP = 0; value_t NIL, T, LAMBDA, QUOTE, IF, TRYCATCH; value_t BACKQUOTE, COMMA, COMMAAT, COMMADOT; @@ -81,31 +80,31 @@ static value_t relocate(value_t v); static void do_print(ios_t *f, value_t v, int princ); typedef struct _readstate_t { - ptrhash_t backrefs; - ptrhash_t gensyms; + htable_t backrefs; + htable_t gensyms; struct _readstate_t *prev; } readstate_t; static readstate_t *readstate = NULL; static void free_readstate(readstate_t *rs) { - ptrhash_free(&rs->backrefs); - ptrhash_free(&rs->gensyms); + htable_free(&rs->backrefs); + htable_free(&rs->gensyms); } static unsigned char *fromspace; static unsigned char *tospace; static unsigned char *curheap; static unsigned char *lim; -static u_int32_t heapsize = 256*1024;//bytes -static u_int32_t *consflags; +static uint32_t heapsize = 256*1024;//bytes +static uint32_t *consflags; // error utilities ------------------------------------------------------------ // saved execution state for an unwind target typedef struct _ectx_t { jmp_buf buf; - u_int32_t sp; + uint32_t sp; readstate_t *rdst; struct _ectx_t *prev; } exception_context_t; @@ -187,9 +186,9 @@ symbol_t *symtab = NULL; static symbol_t *mk_symbol(char *str) { symbol_t *sym; + size_t len = strlen(str); - sym = (symbol_t*)malloc_aligned(sizeof(symbol_t)-sizeof(void*) + - strlen(str)+1, + sym = (symbol_t*)malloc_aligned(sizeof(symbol_t)-sizeof(void*) + len + 1, 8); sym->left = sym->right = NULL; if (str[0] == ':') { @@ -200,6 +199,7 @@ static symbol_t *mk_symbol(char *str) sym->binding = UNBOUND; sym->syntax = 0; } + sym->hash = memhash32(str, len)^0xAAAAAAAA; strcpy(&sym->name[0], str); return sym; } @@ -234,15 +234,15 @@ typedef struct { value_t syntax; // syntax environment entry value_t binding; // global value binding void *dlcache; // dlsym address (not used here) - u_int32_t id; + uint32_t id; } gensym_t; -static u_int32_t _gensym_ctr=0; +static uint32_t _gensym_ctr=0; // two static buffers for gensym printing so there can be two // gensym names available at a time, mostly for compare() static char gsname[2][16]; static int gsnameno=0; -value_t gensym(value_t *args, u_int32_t nargs) +value_t gensym(value_t *args, uint32_t nargs) { (void)args; (void)nargs; @@ -258,7 +258,7 @@ value_t fl_gensym() return gensym(NULL, 0); } -static char *snprintf_gensym_id(char *nbuf, size_t n, u_int32_t g) +static char *snprintf_gensym_id(char *nbuf, size_t n, uint32_t g) { size_t i=n-1; @@ -431,7 +431,7 @@ void gc(int mustgrow) { static int grew = 0; void *temp; - u_int32_t i; + uint32_t i; readstate_t *rs; curheap = tospace; @@ -473,7 +473,7 @@ void gc(int mustgrow) temp = bitvector_resize(consflags, heapsize/sizeof(cons_t), 1); if (temp == NULL) lerror(MemoryError, "out of memory"); - consflags = (u_int32_t*)temp; + consflags = (uint32_t*)temp; } grew = !grew; } @@ -496,7 +496,7 @@ value_t listn(size_t n, ...) { va_list ap; va_start(ap, n); - u_int32_t si = SP; + uint32_t si = SP; size_t i; for(i=0; i < n; i++) { @@ -665,7 +665,7 @@ static value_t eval_sexpr(value_t e, uint32_t penv, int tail) raise(list2(UnboundError, e)); return v; } - if ((unsigned)(char*)&nargs < (unsigned)stack_bottom || SP>=(N_STACK-100)) + if (SP >= (N_STACK-64)) lerror(MemoryError, "eval: stack overflow"); saveSP = SP; v = car_(e); @@ -1309,6 +1309,14 @@ extern void comparehash_init(); static char *EXEDIR; +void assign_global_builtins(builtinspec_t *b) +{ + while (b->name != NULL) { + set(symbol(b->name), guestfunc(b->fptr)); + b++; + } +} + void lisp_init(void) { int i; @@ -1320,7 +1328,7 @@ void lisp_init(void) curheap = fromspace; lim = curheap+heapsize-sizeof(cons_t); consflags = bitvector_new(heapsize/sizeof(cons_t), 1); - ptrhash_new(&printconses, 32); + htable_new(&printconses, 32); comparehash_init(); NIL = symbol("nil"); setc(NIL, NIL); @@ -1377,6 +1385,7 @@ void lisp_init(void) cvalues_init(); set(symbol("gensym"), guestfunc(gensym)); + set(symbol("hash"), guestfunc(fl_hash)); char buf[1024]; char *exename = get_exename(buf, sizeof(buf)); @@ -1394,7 +1403,7 @@ void lisp_init(void) value_t toplevel_eval(value_t expr) { value_t v; - u_int32_t saveSP = SP; + uint32_t saveSP = SP; PUSH(fixnum(2)); PUSH(NIL); PUSH(NIL); @@ -1486,7 +1495,6 @@ int main(int argc, char *argv[]) locale_is_utf8 = u8_is_locale_utf8(setlocale(LC_ALL, "")); - stack_bottom = ((char*)&v) - PROCESS_STACK_SIZE; lisp_init(); set(symbol("argv"), argv_list(argc, argv)); FL_TRY { diff --git a/femtolisp/flisp.h b/femtolisp/flisp.h index 67c1901..3a19f83 100644 --- a/femtolisp/flisp.h +++ b/femtolisp/flisp.h @@ -18,6 +18,7 @@ typedef struct _symbol_t { value_t syntax; // syntax environment entry value_t binding; // global value binding void *dlcache; // dlsym address + uint32_t hash; // below fields are private struct _symbol_t *left; struct _symbol_t *right; @@ -91,7 +92,7 @@ typedef struct _symbol_t { #define isgensym(x) (issymbol(x) && ismanaged(x)) extern value_t Stack[]; -extern u_int32_t SP; +extern uint32_t SP; #define PUSH(v) (Stack[SP++] = (v)) #define POP() (Stack[--SP]) #define POPN(n) (SP-=(n)) @@ -132,6 +133,8 @@ size_t llength(value_t v); value_t list_nth(value_t l, size_t n); value_t compare(value_t a, value_t b); // -1, 0, or 1 value_t equal(value_t a, value_t b); // T or nil +uptrint_t hash(value_t a); +value_t fl_hash(value_t *args, u_int32_t nargs); /* safe casts */ cons_t *tocons(value_t v, char *fname); @@ -235,7 +238,7 @@ typedef unsigned long ulong_t; typedef double double_t; typedef float float_t; -typedef value_t (*guestfunc_t)(value_t*, u_int32_t); +typedef value_t (*guestfunc_t)(value_t*, uint32_t); extern value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym; extern value_t int64sym, uint64sym, shortsym, ushortsym; @@ -272,4 +275,11 @@ value_t return_from_uint64(uint64_t Uaccum); value_t return_from_int64(int64_t Saccum); value_t char_from_code(uint32_t code); +typedef struct { + char *name; + guestfunc_t fptr; +} builtinspec_t; + +void assign_global_builtins(builtinspec_t *b); + #endif diff --git a/femtolisp/print.c b/femtolisp/print.c index 983857c..3f88ddf 100644 --- a/femtolisp/print.c +++ b/femtolisp/print.c @@ -1,4 +1,4 @@ -static ptrhash_t printconses; +static htable_t printconses; static u_int32_t printlabel; static int print_pretty; static int SCR_WIDTH = 80; @@ -36,7 +36,7 @@ static void print_traverse(value_t v) while (iscons(v)) { if (ismarked(v)) { bp = (value_t*)ptrhash_bp(&printconses, (void*)v); - if (*bp == (value_t)PH_NOTFOUND) + if (*bp == (value_t)HT_NOTFOUND) *bp = fixnum(printlabel++); return; } @@ -48,7 +48,7 @@ static void print_traverse(value_t v) return; if (ismarked(v)) { bp = (value_t*)ptrhash_bp(&printconses, (void*)v); - if (*bp == (value_t)PH_NOTFOUND) + if (*bp == (value_t)HT_NOTFOUND) *bp = fixnum(printlabel++); return; } @@ -325,7 +325,7 @@ static void do_print(ios_t *f, value_t v, int princ) case TAG_VECTOR: case TAG_CONS: if ((label=(value_t)ptrhash_get(&printconses, (void*)v)) != - (value_t)PH_NOTFOUND) { + (value_t)HT_NOTFOUND) { if (!ismarked(v)) { HPOS+=ios_printf(f, "#%ld#", numval(label)); return; @@ -477,16 +477,26 @@ static void cvalue_printdata(ios_t *f, void *data, size_t len, value_t type, int ndec; if (type == floatsym) { d = (double)*(float*)data; ndec = 8; } else { d = *(double*)data; ndec = 16; } - snprint_real(buf, sizeof(buf), d, 0, ndec, 3, 10); - if (weak || princ || strpbrk(buf, ".eE")) { - outs(buf, f); - if (type == floatsym) outc('f', f); + if (!DFINITE(d)) { + char *rep; + if (isnan(d)) + rep = sign_bit(d) ? "-NaN" : "+NaN"; + else + rep = sign_bit(d) ? "-Inf" : "+Inf"; + if (type == floatsym) + HPOS+=ios_printf(f, "#%s(%s)", symbol_name(type), rep); + else + HPOS+=ios_printf(f, "%s", rep); } else { - if (!DFINITE(d)) - HPOS+=ios_printf(f, "#%s(\"%s\")", symbol_name(type), buf); - else + snprint_real(buf, sizeof(buf), d, 0, ndec, 3, 10); + if (weak || princ || strpbrk(buf, ".eE")) { + outs(buf, f); + if (type == floatsym) outc('f', f); + } + else { HPOS+=ios_printf(f, "#%s(%s)", symbol_name(type), buf); + } } } else if (issymbol(type)) { @@ -608,5 +618,5 @@ void print(ios_t *f, value_t v, int princ) do_print(f, v, princ); - ptrhash_reset(&printconses, 32); + htable_reset(&printconses, 32); } diff --git a/femtolisp/read.c b/femtolisp/read.c index 8629fba..6b3c801 100644 --- a/femtolisp/read.c +++ b/femtolisp/read.c @@ -23,32 +23,48 @@ static int isnumtok(char *tok, value_t *pval) double d; if (*tok == '\0') return 0; - if (!((tok[0]=='0' && tok[1]=='x') || // these formats are always integer - (tok[0]=='0' && isdigit(tok[1]))) && - strpbrk(tok, ".eE")) { + if (!(tok[0]=='0' && isdigit(tok[1])) && + strpbrk(tok, ".eEpP")) { d = strtod(tok, &end); if (*end == '\0') { if (pval) *pval = mk_double(d); return 1; } - if (end > tok && *end == 'f' && end[1] == '\0') { + if (end > tok && end[0] == 'f' && end[1] == '\0') { if (pval) *pval = mk_float((float)d); return 1; } } - if (isdigit(tok[0]) || tok[0]=='-' || tok[0]=='+') { - if (tok[0]=='-') { - i64 = strtoll(tok, &end, 0); - if (pval) *pval = return_from_int64(i64); - } - else { - ui64 = strtoull(tok, &end, 0); - if (pval) *pval = return_from_uint64(ui64); - } - if (*end == '\0') + + if (tok[0] == '+') { + if (!strcmp(tok,"+NaN")) { + if (pval) *pval = mk_double(D_PNAN); return 1; + } + if (!strcmp(tok,"+Inf")) { + if (pval) *pval = mk_double(D_PINF); + return 1; + } } - return 0; + else if (tok[0] == '-') { + if (!strcmp(tok,"-NaN")) { + if (pval) *pval = mk_double(D_NNAN); + return 1; + } + if (!strcmp(tok,"-Inf")) { + if (pval) *pval = mk_double(D_NINF); + return 1; + } + i64 = strtoll(tok, &end, 0); + if (pval) *pval = return_from_int64(i64); + return (*end == '\0'); + } + else if (!isdigit(tok[0])) { + return 0; + } + ui64 = strtoull(tok, &end, 0); + if (pval) *pval = return_from_uint64(ui64); + return (*end == '\0'); } static u_int32_t toktype = TOK_NONE; @@ -505,12 +521,12 @@ static value_t do_read_sexpr(ios_t *f, value_t label) case TOK_BACKREF: // look up backreference v = (value_t)ptrhash_get(&readstate->backrefs, (void*)tokval); - if (v == (value_t)PH_NOTFOUND) + if (v == (value_t)HT_NOTFOUND) lerror(ParseError, "read: undefined label %ld", numval(tokval)); return v; case TOK_GENSYM: pv = (value_t*)ptrhash_bp(&readstate->gensyms, (void*)tokval); - if (*pv == (value_t)PH_NOTFOUND) + if (*pv == (value_t)HT_NOTFOUND) *pv = gensym(NULL, 0); return *pv; case TOK_DOUBLEQUOTE: @@ -524,8 +540,8 @@ value_t read_sexpr(ios_t *f) value_t v; readstate_t state; state.prev = readstate; - ptrhash_new(&state.backrefs, 16); - ptrhash_new(&state.gensyms, 16); + htable_new(&state.backrefs, 16); + htable_new(&state.gensyms, 16); readstate = &state; v = do_read_sexpr(f, UNBOUND); diff --git a/femtolisp/string.c b/femtolisp/string.c index ea31bbf..d1279bb 100644 --- a/femtolisp/string.c +++ b/femtolisp/string.c @@ -267,19 +267,23 @@ value_t fl_string_dec(value_t *args, u_int32_t nargs) return size_wrap(i); } +static builtinspec_t stringfunc_info[] = { + { "intern", fl_intern }, + { "string", fl_string }, + { "stringp", fl_stringp }, + { "string.length", fl_string_length }, + { "string.split", fl_string_split }, + { "string.sub", fl_string_sub }, + { "string.char", fl_string_char }, + { "string.inc", fl_string_inc }, + { "string.dec", fl_string_dec }, + { "string.reverse", fl_string_reverse }, + { "string.encode", fl_string_encode }, + { "string.decode", fl_string_decode }, + { NULL, NULL } +}; + void stringfuncs_init() { - set(symbol("intern"), guestfunc(fl_intern)); - - set(symbol("string"), guestfunc(fl_string)); - set(symbol("stringp"), guestfunc(fl_stringp)); - set(symbol("string.length"), guestfunc(fl_string_length)); - set(symbol("string.split"), guestfunc(fl_string_split)); - set(symbol("string.sub"), guestfunc(fl_string_sub)); - set(symbol("string.char"), guestfunc(fl_string_char)); - set(symbol("string.inc"), guestfunc(fl_string_inc)); - set(symbol("string.dec"), guestfunc(fl_string_dec)); - set(symbol("string.reverse"), guestfunc(fl_string_reverse)); - set(symbol("string.encode"), guestfunc(fl_string_encode)); - set(symbol("string.decode"), guestfunc(fl_string_decode)); + assign_global_builtins(stringfunc_info); } diff --git a/femtolisp/table.c b/femtolisp/table.c index 5076b5e..234da0f 100644 --- a/femtolisp/table.c +++ b/femtolisp/table.c @@ -33,7 +33,7 @@ typedef struct { typedef struct { table_interface_t *ti; ulong_t nkeys; - ptrhash_t ht; + htable_t ht; } fltable_t; void print_htable(ios_t *f, value_t h, int princ) @@ -43,16 +43,16 @@ void print_htable(ios_t *f, value_t h, int princ) void free_htable(value_t self) { fltable_t *pt = (fltable_t*)cv_data((cvalue_t*)ptr(self)); - ptrhash_free(&pt->ht); + htable_free(&pt->ht); } void relocate_htable(value_t old, value_t new) { fltable_t *pt = (fltable_t*)cv_data((cvalue_t*)ptr(self)); - ptrhash_t *h = &pt->ht; + htable_t *h = &pt->ht; size_t i; for(i=0; i < h->size; i++) { - if (h->table[i] != PH_NOTFOUND) + if (h->table[i] != HT_NOTFOUND) h->table[i] = (void*)relocate((value_t)h->table[i]); } } diff --git a/femtolisp/todo b/femtolisp/todo index 8cda4f3..1e173e6 100644 --- a/femtolisp/todo +++ b/femtolisp/todo @@ -919,15 +919,18 @@ switch to miser mode, otherwise default is ok, for example: ----------------------------------------------------------------------------- consolidated todo list as of 8/30: +- new cvalues, types representation - implement support for defining new opaque values -- finalizers in gc -- expose io stream object - hashtable +- finalizers in gc +- unify vectors and arrays +- expose io stream object + - enable print-shared for cvalues' types - remaining c types - remaining cvalues functions -- special efficient reader for #array - finish ios +- special efficient reader for #array ----------------------------------------------------------------------------- @@ -943,20 +946,27 @@ typedef struct { fltype_t *type; void *data; size_t len; // length of *data in bytes - - value_t parent; // optional - char data[1]; // variable size + union { + value_t parent; // optional + char _space[1]; // variable size + }; } cvalue_t; -typedef struct { - fltype_t *type; - void *data; -} cprim_t; +#define owned(cv) ((cv)->type & 0x1) +#define hasparent(cv) ((cv)->type & 0x2) +#define isinlined(cv) ((cv)->data == &(cv)->_space[0]) +#define cv_class(cv) ((fltype_t*)(((uptrint_t)(cv)->type)&~3)) +#define cv_type(cv) (cv_class(cv)->type) +#define cv_len(cv) ((cv)->len) +#define cv_data(cv) ((cv)->data) +#define cv_numtype(cv) (cv_class(cv)->numtype) typedef struct _fltype_t { value_t type; int numtype; size_t sz; cvtable_t *vtable; + int marked; struct _fltype_t *eltype; // for arrays + struct _fltype_t *artype; // (array this) } fltype_t; diff --git a/llt/Makefile b/llt/Makefile index 4fc67ef..266b3ac 100644 --- a/llt/Makefile +++ b/llt/Makefile @@ -1,7 +1,7 @@ CC = gcc SRCS = bitvector.c hashing.c socket.c timefuncs.c utils.c dblprint.c ptrhash.c \ - utf8.c ios.c operators.c cplxprint.c dirpath.c + utf8.c ios.c operators.c cplxprint.c dirpath.c htable.c bitvector-ops.c OBJS = $(SRCS:%.c=%.o) DOBJS = $(SRCS:%.c=%.do) TARGET = libllt.a diff --git a/llt/bitvector-ops.c b/llt/bitvector-ops.c new file mode 100644 index 0000000..82e6b27 --- /dev/null +++ b/llt/bitvector-ops.c @@ -0,0 +1,485 @@ +#include +#include +#include + +#include "dtypes.h" +#include "bitvector.h" + +#ifdef WIN32 +#include +#define alloca _alloca +#endif + +// greater than this # of words we use malloc instead of alloca +#define MALLOC_CUTOFF 2000 + +u_int32_t bitreverse(u_int32_t x) +{ + u_int32_t m; + +#ifdef __INTEL_COMPILER + x = _bswap(x); +#else + x = (x >> 16) | (x << 16); m = 0xff00ff00; + x = ((x & m) >> 8) | ((x & ~m) << 8); +#endif + m = 0xf0f0f0f0; + x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc; + x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa; + x = ((x & m) >> 1) | ((x & ~m) << 1); + + return x; +} + +// shift all bits in a long bit vector +// n is # of int32s to consider, s is shift distance +// lowest bit-index is bit 0 of word 0 +// TODO: handle boundary case of shift distance >= data size? +void bitvector_shr(u_int32_t *b, size_t n, u_int32_t s) +{ + u_int32_t i; + if (s == 0 || n == 0) return; + i = (s>>5); + if (i) { + n -= i; + memmove(b, &b[i], n*4); + memset(&b[n], 0, i*4); + s &= 31; + } + for(i=0; i < n-1; i++) { + b[i] = (b[i]>>s) | (b[i+1]<<(32-s)); + } + b[i]>>=s; +} + +// out-of-place version, good for re-aligning a strided submatrix to +// linear representation when a copy is needed +// assumes that dest has the same amount of space as source, even if it +// wouldn't have been necessary to hold the shifted bits +void bitvector_shr_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s) +{ + u_int32_t i, j; + if (n == 0) return; + if (s == 0) { + memcpy(dest, b, n*4); + return; + } + j = (s>>5); + if (j) { + n -= j; + memset(&dest[n], 0, j*4); + s &= 31; + b = &b[j]; + } + for(i=0; i < n-1; i++) { + dest[i] = (b[i]>>s) | (b[i+1]<<(32-s)); + } + dest[i] = b[i]>>s; +} + +void bitvector_shl(u_int32_t *b, size_t n, u_int32_t s) +{ + u_int32_t i, scrap=0, temp; + if (s == 0 || n == 0) return; + i = (s>>5); + if (i) { + n -= i; + memmove(&b[i], b, n*4); + memset(b, 0, i*4); + s &= 31; + b = &b[i]; + } + for(i=0; i < n; i++) { + temp = (b[i]<>(32-s); + b[i] = temp; + } +} + +// if dest has more space than source, set scrap to true to keep the +// top bits that would otherwise be shifted out +void bitvector_shl_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s, + bool_t scrap) +{ + u_int32_t i, j, sc=0; + if (n == 0) return; + if (s == 0) { + memcpy(dest, b, n*4); + return; + } + j = (s>>5); + if (j) { + n -= j; + memset(dest, 0, j*4); + s &= 31; + dest = &dest[j]; + } + for(i=0; i < n; i++) { + dest[i] = (b[i]<>(32-s); + } + if (scrap) + dest[i] = sc; +} + +// set nbits to c, starting at given bit offset +// assumes offs < 32 +void bitvector_fill(u_int32_t *b, u_int32_t offs, u_int32_t c, u_int32_t nbits) +{ + index_t i; + u_int32_t nw, tail; + u_int32_t mask; + + if (nbits == 0) return; + nw = (offs+nbits+31)>>5; + + if (nw == 1) { + mask = (lomask(nbits)<>5; + + if (nw == 1) { + mask = (lomask(nbits)<>5; \ + \ + if (soffs == doffs) { \ + if (nw == 1) { \ + mask = (lomask(nbits)<>5; \ + if (soffs < doffs) { \ + s = doffs-soffs; \ + if (nw == 1) { \ + mask = (lomask(nbits)<>(32-s); \ + for(i=1; i < snw-1; i++) { \ + dest[i] = (OP(src[i])<>(32-s); \ + } \ + tail = (doffs+nbits)&31; \ + if (tail==0) { mask=ONES32; } else { mask = lomask(tail); } \ + if (snw == nw) { \ + dest[i] = (dest[i] & ~mask) | (((OP(src[i])<>(32-s); \ + i++; \ + dest[i] = (dest[i] & ~mask) | (scrap & mask); \ + } \ + } \ + } \ + else { \ + s = soffs-doffs; \ + if (snw == 1) { \ + mask = (lomask(nbits)<>s) & mask); \ + return; \ + } \ + if (nw == 1) { \ + mask = (lomask(nbits)<>s)|(OP(src[1])<<(32-s))) & mask); \ + return; \ + } \ + mask = ~lomask(doffs); \ + dest[0] = (dest[0] & ~mask) | \ + (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \ + for(i=1; i < nw-1; i++) { \ + dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s)); \ + } \ + tail = (doffs+nbits)&31; \ + if (tail==0) { mask=ONES32; } else { mask = lomask(tail); } \ + if (snw == nw) { \ + dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask); \ + } \ + else /* snw > nw */ { \ + dest[i] = (dest[i] & ~mask) | \ + (((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask); \ + } \ + } \ +} + +#define BV_COPY(a) (a) +#define BV_NOT(a) (~(a)) +BITVECTOR_COPY_OP(copy, BV_COPY) +BITVECTOR_COPY_OP(not_to, BV_NOT) + +// right-shift the bits in one logical "row" of a long 2d bit vector +/* +void bitvector_shr_row(u_int32_t *b, u_int32_t offs, size_t nbits, u_int32_t s) +{ +} +*/ + +// copy from source to dest while reversing bit-order +// assumes dest offset == 0 +// assumes source and dest don't overlap +// assumes offset < 32 +void bitvector_reverse_to(u_int32_t *dest, u_int32_t *src, u_int32_t soffs, + u_int32_t nbits) +{ + index_t i; + u_int32_t nw, tail; + + if (nbits == 0) return; + + nw = (soffs+nbits+31)>>5; + // first, reverse the words while reversing bit order within each word + for(i=0; i < nw/2; i++) { + dest[i] = bitreverse(src[nw-i-1]); + dest[nw-i-1] = bitreverse(src[i]); + } + if (nw&0x1) + dest[i] = bitreverse(src[i]); + + tail = (soffs+nbits)&31; + if (tail) + bitvector_shr(dest, nw, 32-tail); +} + +void bitvector_reverse(u_int32_t *b, u_int32_t offs, u_int32_t nbits) +{ + index_t i; + u_int32_t nw, tail; + u_int32_t *temp; + + if (nbits == 0) return; + + nw = (offs+nbits+31)>>5; + temp = (nw > MALLOC_CUTOFF) ? malloc(nw*4) : alloca(nw*4); + for(i=0; i < nw/2; i++) { + temp[i] = bitreverse(b[nw-i-1]); + temp[nw-i-1] = bitreverse(b[i]); + } + if (nw&0x1) + temp[i] = bitreverse(b[i]); + + tail = (offs+nbits)&31; + bitvector_copy(b, offs, temp, (32-tail)&31, nbits); + if (nw > MALLOC_CUTOFF) free(temp); +} + +u_int64_t bitvector_count(u_int32_t *b, u_int32_t offs, u_int64_t nbits) +{ + size_t i, nw; + u_int32_t ntail; + u_int64_t ans; + + if (nbits == 0) return 0; + nw = ((u_int64_t)offs+nbits+31)>>5; + + if (nw == 1) { + return count_bits(b[0] & (lomask(nbits)<>offs); // first end cap + + for(i=1; i < nw-1; i++) { + /* popcnt can be computed branch-free, so these special cases + probably don't help much */ + /* + v = b[i]; + if (v == 0) + continue; + if (v == ONES32) + ans += 32; + else + */ + ans += count_bits(b[i]); + } + + ntail = (offs+(u_int32_t)nbits)&31; + ans += count_bits(b[i]&(ntail>0?lomask(ntail):ONES32)); // last end cap + + return ans; +} + +u_int32_t bitvector_any0(u_int32_t *b, u_int32_t offs, u_int32_t nbits) +{ + index_t i; + u_int32_t nw, tail; + u_int32_t mask; + + if (nbits == 0) return 0; + nw = (offs+nbits+31)>>5; + + if (nw == 1) { + mask = (lomask(nbits)<>5; + + if (nw == 1) { + mask = (lomask(nbits)< soffs) + bitvector_shl_to(dest, src, nw, newoffs-soffs, true); + else + bitvector_shr_to(dest, src, nw, soffs-newoffs); +} + +#define BITVECTOR_BINARY_OP_TO(opname, OP) \ +void bitvector_##opname##_to(u_int32_t *dest, u_int32_t doffs, \ + u_int32_t *a, u_int32_t aoffs, \ + u_int32_t *b, u_int32_t boffs, u_int32_t nbits) \ +{ \ + u_int32_t nw = (doffs+nbits+31)>>5; \ + u_int32_t *temp = nw>MALLOC_CUTOFF ? malloc((nw+1)*4) : alloca((nw+1)*4);\ + u_int32_t i, anw, bnw; \ + if (aoffs == boffs) { \ + anw = (aoffs+nbits+31)>>5; \ + } \ + else if (aoffs == doffs) { \ + bnw = (boffs+nbits+31)>>5; \ + adjust_offset_to(temp, b, bnw, boffs, aoffs); \ + b = temp; anw = nw; \ + } \ + else { \ + anw = (aoffs+nbits+31)>>5; \ + bnw = (boffs+nbits+31)>>5; \ + adjust_offset_to(temp, a, anw, aoffs, boffs); \ + a = temp; aoffs = boffs; anw = bnw; \ + } \ + for(i=0; i < anw; i++) temp[i] = OP(a[i], b[i]); \ + bitvector_copy(dest, doffs, temp, aoffs, nbits); \ + if (nw>MALLOC_CUTOFF) free(temp); \ +} + +#define BV_AND(a,b) ((a)&(b)) +#define BV_OR(a,b) ((a)|(b)) +#define BV_XOR(a,b) ((a)^(b)) +BITVECTOR_BINARY_OP_TO(and, BV_AND) +BITVECTOR_BINARY_OP_TO(or, BV_OR) +BITVECTOR_BINARY_OP_TO(xor, BV_XOR) diff --git a/llt/bitvector.c b/llt/bitvector.c index 567860a..cea3d92 100644 --- a/llt/bitvector.c +++ b/llt/bitvector.c @@ -38,12 +38,8 @@ #ifdef WIN32 #include -#define alloca _alloca #endif -// greater than this # of words we use malloc instead of alloca -#define MALLOC_CUTOFF 2000 - u_int32_t *bitvector_resize(u_int32_t *b, u_int64_t n, int initzero) { u_int32_t *p; @@ -76,474 +72,3 @@ u_int32_t bitvector_get(u_int32_t *b, u_int64_t n) { return b[n>>5] & (1<<(n&31)); } - -u_int32_t bitreverse(u_int32_t x) -{ - u_int32_t m; - -#ifdef __INTEL_COMPILER - x = _bswap(x); -#else - x = (x >> 16) | (x << 16); m = 0xff00ff00; - x = ((x & m) >> 8) | ((x & ~m) << 8); -#endif - m = 0xf0f0f0f0; - x = ((x & m) >> 4) | ((x & ~m) << 4); m = 0xcccccccc; - x = ((x & m) >> 2) | ((x & ~m) << 2); m = 0xaaaaaaaa; - x = ((x & m) >> 1) | ((x & ~m) << 1); - - return x; -} - -// shift all bits in a long bit vector -// n is # of int32s to consider, s is shift distance -// lowest bit-index is bit 0 of word 0 -// TODO: handle boundary case of shift distance >= data size? -void bitvector_shr(u_int32_t *b, size_t n, u_int32_t s) -{ - u_int32_t i; - if (s == 0 || n == 0) return; - i = (s>>5); - if (i) { - n -= i; - memmove(b, &b[i], n*4); - memset(&b[n], 0, i*4); - s &= 31; - } - for(i=0; i < n-1; i++) { - b[i] = (b[i]>>s) | (b[i+1]<<(32-s)); - } - b[i]>>=s; -} - -// out-of-place version, good for re-aligning a strided submatrix to -// linear representation when a copy is needed -// assumes that dest has the same amount of space as source, even if it -// wouldn't have been necessary to hold the shifted bits -void bitvector_shr_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s) -{ - u_int32_t i, j; - if (n == 0) return; - if (s == 0) { - memcpy(dest, b, n*4); - return; - } - j = (s>>5); - if (j) { - n -= j; - memset(&dest[n], 0, j*4); - s &= 31; - b = &b[j]; - } - for(i=0; i < n-1; i++) { - dest[i] = (b[i]>>s) | (b[i+1]<<(32-s)); - } - dest[i] = b[i]>>s; -} - -void bitvector_shl(u_int32_t *b, size_t n, u_int32_t s) -{ - u_int32_t i, scrap=0, temp; - if (s == 0 || n == 0) return; - i = (s>>5); - if (i) { - n -= i; - memmove(&b[i], b, n*4); - memset(b, 0, i*4); - s &= 31; - b = &b[i]; - } - for(i=0; i < n; i++) { - temp = (b[i]<>(32-s); - b[i] = temp; - } -} - -// if dest has more space than source, set scrap to true to keep the -// top bits that would otherwise be shifted out -void bitvector_shl_to(u_int32_t *dest, u_int32_t *b, size_t n, u_int32_t s, - bool_t scrap) -{ - u_int32_t i, j, sc=0; - if (n == 0) return; - if (s == 0) { - memcpy(dest, b, n*4); - return; - } - j = (s>>5); - if (j) { - n -= j; - memset(dest, 0, j*4); - s &= 31; - dest = &dest[j]; - } - for(i=0; i < n; i++) { - dest[i] = (b[i]<>(32-s); - } - if (scrap) - dest[i] = sc; -} - -// set nbits to c, starting at given bit offset -// assumes offs < 32 -void bitvector_fill(u_int32_t *b, u_int32_t offs, u_int32_t c, u_int32_t nbits) -{ - index_t i; - u_int32_t nw, tail; - u_int32_t mask; - - if (nbits == 0) return; - nw = (offs+nbits+31)>>5; - - if (nw == 1) { - mask = (lomask(nbits)<>5; - - if (nw == 1) { - mask = (lomask(nbits)<>5; \ - \ - if (soffs == doffs) { \ - if (nw == 1) { \ - mask = (lomask(nbits)<>5; \ - if (soffs < doffs) { \ - s = doffs-soffs; \ - if (nw == 1) { \ - mask = (lomask(nbits)<>(32-s); \ - for(i=1; i < snw-1; i++) { \ - dest[i] = (OP(src[i])<>(32-s); \ - } \ - tail = (doffs+nbits)&31; \ - if (tail==0) { mask=ONES32; } else { mask = lomask(tail); } \ - if (snw == nw) { \ - dest[i] = (dest[i] & ~mask) | (((OP(src[i])<>(32-s); \ - i++; \ - dest[i] = (dest[i] & ~mask) | (scrap & mask); \ - } \ - } \ - } \ - else { \ - s = soffs-doffs; \ - if (snw == 1) { \ - mask = (lomask(nbits)<>s) & mask); \ - return; \ - } \ - if (nw == 1) { \ - mask = (lomask(nbits)<>s)|(OP(src[1])<<(32-s))) & mask); \ - return; \ - } \ - mask = ~lomask(doffs); \ - dest[0] = (dest[0] & ~mask) | \ - (((OP(src[0])>>s)|(OP(src[1])<<(32-s))) & mask); \ - for(i=1; i < nw-1; i++) { \ - dest[i] = (OP(src[i])>>s) | (OP(src[i+1])<<(32-s)); \ - } \ - tail = (doffs+nbits)&31; \ - if (tail==0) { mask=ONES32; } else { mask = lomask(tail); } \ - if (snw == nw) { \ - dest[i] = (dest[i] & ~mask) | ((OP(src[i])>>s) & mask); \ - } \ - else /* snw > nw */ { \ - dest[i] = (dest[i] & ~mask) | \ - (((OP(src[i])>>s)|(OP(src[i+1])<<(32-s))) & mask); \ - } \ - } \ -} - -#define BV_COPY(a) (a) -#define BV_NOT(a) (~(a)) -BITVECTOR_COPY_OP(copy, BV_COPY) -BITVECTOR_COPY_OP(not_to, BV_NOT) - -// right-shift the bits in one logical "row" of a long 2d bit vector -/* -void bitvector_shr_row(u_int32_t *b, u_int32_t offs, size_t nbits, u_int32_t s) -{ -} -*/ - -// copy from source to dest while reversing bit-order -// assumes dest offset == 0 -// assumes source and dest don't overlap -// assumes offset < 32 -void bitvector_reverse_to(u_int32_t *dest, u_int32_t *src, u_int32_t soffs, - u_int32_t nbits) -{ - index_t i; - u_int32_t nw, tail; - - if (nbits == 0) return; - - nw = (soffs+nbits+31)>>5; - // first, reverse the words while reversing bit order within each word - for(i=0; i < nw/2; i++) { - dest[i] = bitreverse(src[nw-i-1]); - dest[nw-i-1] = bitreverse(src[i]); - } - if (nw&0x1) - dest[i] = bitreverse(src[i]); - - tail = (soffs+nbits)&31; - if (tail) - bitvector_shr(dest, nw, 32-tail); -} - -void bitvector_reverse(u_int32_t *b, u_int32_t offs, u_int32_t nbits) -{ - index_t i; - u_int32_t nw, tail; - u_int32_t *temp; - - if (nbits == 0) return; - - nw = (offs+nbits+31)>>5; - temp = (nw > MALLOC_CUTOFF) ? malloc(nw*4) : alloca(nw*4); - for(i=0; i < nw/2; i++) { - temp[i] = bitreverse(b[nw-i-1]); - temp[nw-i-1] = bitreverse(b[i]); - } - if (nw&0x1) - temp[i] = bitreverse(b[i]); - - tail = (offs+nbits)&31; - bitvector_copy(b, offs, temp, (32-tail)&31, nbits); - if (nw > MALLOC_CUTOFF) free(temp); -} - -u_int64_t bitvector_count(u_int32_t *b, u_int32_t offs, u_int64_t nbits) -{ - size_t i, nw; - u_int32_t ntail; - u_int64_t ans; - - if (nbits == 0) return 0; - nw = ((u_int64_t)offs+nbits+31)>>5; - - if (nw == 1) { - return count_bits(b[0] & (lomask(nbits)<>offs); // first end cap - - for(i=1; i < nw-1; i++) { - /* popcnt can be computed branch-free, so these special cases - probably don't help much */ - /* - v = b[i]; - if (v == 0) - continue; - if (v == ONES32) - ans += 32; - else - */ - ans += count_bits(b[i]); - } - - ntail = (offs+(u_int32_t)nbits)&31; - ans += count_bits(b[i]&(ntail>0?lomask(ntail):ONES32)); // last end cap - - return ans; -} - -u_int32_t bitvector_any0(u_int32_t *b, u_int32_t offs, u_int32_t nbits) -{ - index_t i; - u_int32_t nw, tail; - u_int32_t mask; - - if (nbits == 0) return 0; - nw = (offs+nbits+31)>>5; - - if (nw == 1) { - mask = (lomask(nbits)<>5; - - if (nw == 1) { - mask = (lomask(nbits)< soffs) - bitvector_shl_to(dest, src, nw, newoffs-soffs, true); - else - bitvector_shr_to(dest, src, nw, soffs-newoffs); -} - -#define BITVECTOR_BINARY_OP_TO(opname, OP) \ -void bitvector_##opname##_to(u_int32_t *dest, u_int32_t doffs, \ - u_int32_t *a, u_int32_t aoffs, \ - u_int32_t *b, u_int32_t boffs, u_int32_t nbits) \ -{ \ - u_int32_t nw = (doffs+nbits+31)>>5; \ - u_int32_t *temp = nw>MALLOC_CUTOFF ? malloc((nw+1)*4) : alloca((nw+1)*4);\ - u_int32_t i, anw, bnw; \ - if (aoffs == boffs) { \ - anw = (aoffs+nbits+31)>>5; \ - } \ - else if (aoffs == doffs) { \ - bnw = (boffs+nbits+31)>>5; \ - adjust_offset_to(temp, b, bnw, boffs, aoffs); \ - b = temp; anw = nw; \ - } \ - else { \ - anw = (aoffs+nbits+31)>>5; \ - bnw = (boffs+nbits+31)>>5; \ - adjust_offset_to(temp, a, anw, aoffs, boffs); \ - a = temp; aoffs = boffs; anw = bnw; \ - } \ - for(i=0; i < anw; i++) temp[i] = OP(a[i], b[i]); \ - bitvector_copy(dest, doffs, temp, aoffs, nbits); \ - if (nw>MALLOC_CUTOFF) free(temp); \ -} - -#define BV_AND(a,b) ((a)&(b)) -#define BV_OR(a,b) ((a)|(b)) -#define BV_XOR(a,b) ((a)^(b)) -BITVECTOR_BINARY_OP_TO(and, BV_AND) -BITVECTOR_BINARY_OP_TO(or, BV_OR) -BITVECTOR_BINARY_OP_TO(xor, BV_XOR) diff --git a/llt/dtypes.h b/llt/dtypes.h index ffc840d..731c0fd 100644 --- a/llt/dtypes.h +++ b/llt/dtypes.h @@ -108,6 +108,15 @@ typedef u_ptrint_t uptrint_t; #define NBABS(n,nb) (((n)^((n)>>((nb)-1))) - ((n)>>((nb)-1))) #define DFINITE(d) (((*(int64_t*)&(d))&0x7ff0000000000000LL)!=0x7ff0000000000000LL) +extern double D_PNAN; +extern double D_NNAN; +extern double D_PINF; +extern double D_NINF; +extern float F_PNAN; +extern float F_NNAN; +extern float F_PINF; +extern float F_NINF; + typedef enum { T_INT8, T_UINT8, T_INT16, T_UINT16, T_INT32, T_UINT32, T_INT64, T_UINT64, T_FLOAT, T_DOUBLE } numerictype_t; diff --git a/llt/hashing.c b/llt/hashing.c index 8cd7b98..98cf182 100644 --- a/llt/hashing.c +++ b/llt/hashing.c @@ -9,6 +9,7 @@ #include "utils.h" #include "hashing.h" #include "timefuncs.h" +#include "ios.h" uint_t nextipow2(uint_t i) { @@ -67,6 +68,14 @@ u_int64_t memhash(char* buf, size_t n) return (u_int64_t)c | (((u_int64_t)b)<<32); } +u_int32_t memhash32(char* buf, size_t n) +{ + u_int32_t c=0xcafe8881, b=0x4d6a087c; + + hashlittle2(buf, n, &c, &b); + return c; +} + #include "mt19937ar.c" double rand_double() @@ -118,6 +127,15 @@ void randomize() init_by_array((unsigned long*)&tm, 2); } +double D_PNAN; +double D_NNAN; +double D_PINF; +double D_NINF; +float F_PNAN; +float F_NNAN; +float F_PINF; +float F_NINF; + void llt_init() { /* @@ -131,4 +149,13 @@ void llt_init() randomize(); ios_init_stdstreams(); + + D_PNAN = strtod("+NaN",NULL); + D_NNAN = strtod("-NaN",NULL); + D_PINF = strtod("+Inf",NULL); + D_NINF = strtod("-Inf",NULL); + F_PNAN = strtof("+NaN",NULL); + F_NNAN = strtof("-NaN",NULL); + F_PINF = strtof("+Inf",NULL); + F_NINF = strtof("-Inf",NULL); } diff --git a/llt/hashing.h b/llt/hashing.h index 0815f15..9e3741b 100644 --- a/llt/hashing.h +++ b/llt/hashing.h @@ -11,6 +11,7 @@ u_int32_t int64to32hash(u_int64_t key); #define inthash int32hash #endif u_int64_t memhash(char* buf, size_t n); +u_int32_t memhash32(char* buf, size_t n); #define random() genrand_int32() #define srandom(n) init_genrand(n) double rand_double(); diff --git a/llt/htable.c b/llt/htable.c new file mode 100644 index 0000000..a265310 --- /dev/null +++ b/llt/htable.c @@ -0,0 +1,48 @@ +/* + functions common to all hash table instantiations +*/ + +#include +#include +#include +#include +#include + +#include "dtypes.h" +#include "htable.h" +#include "hashing.h" + +htable_t *htable_new(htable_t *h, size_t size) +{ + size = nextipow2(size); + size *= 2; // 2 pointers per key/value pair + size *= 2; // aim for 50% occupancy + h->size = size; + h->table = (void**)malloc(size*sizeof(void*)); + if (h->table == NULL) return NULL; + size_t i; + for(i=0; i < size; i++) + h->table[i] = HT_NOTFOUND; + return h; +} + +void htable_free(htable_t *h) +{ + free(h->table); +} + +// empty and reduce size +void htable_reset(htable_t *h, size_t sz) +{ + if (h->size > sz*4) { + size_t newsz = sz*4; + void **newtab = (void**)realloc(h->table, newsz*sizeof(void*)); + if (newtab == NULL) + return; + h->size = newsz; + h->table = newtab; + } + size_t i, hsz=h->size; + for(i=0; i < hsz; i++) + h->table[i] = HT_NOTFOUND; +} diff --git a/llt/htable.h b/llt/htable.h new file mode 100644 index 0000000..4ab3036 --- /dev/null +++ b/llt/htable.h @@ -0,0 +1,19 @@ +#ifndef __HTABLE_H_ +#define __HTABLE_H_ + +typedef struct { + size_t size; + void **table; +} htable_t; + +// define this to be an invalid key/value +#define HT_NOTFOUND ((void*)1) + +// initialize and free +htable_t *htable_new(htable_t *h, size_t size); +void htable_free(htable_t *h); + +// clear and (possibly) change size +void htable_reset(htable_t *h, size_t sz); + +#endif diff --git a/llt/htable.inc b/llt/htable.inc new file mode 100644 index 0000000..28a373a --- /dev/null +++ b/llt/htable.inc @@ -0,0 +1,140 @@ +//-*- mode:c -*- + +/* + include this file and call HTIMPL to generate an implementation +*/ + +#define hash_size(h) ((h)->size/2) + +// compute empirical max-probe for a given size +#define max_probe(size) ((size)>>5) + +#define HTIMPL(HTNAME, HFUNC, EQFUNC) \ +static void **HTNAME##_lookup_bp(htable_t *h, void *key) \ +{ \ + uint_t hv; \ + size_t i, orig, index, iter; \ + size_t newsz, sz = hash_size(h); \ + size_t maxprobe = max_probe(sz); \ + void **tab = h->table; \ + void **ol; \ + \ + hv = HFUNC((uptrint_t)key); \ + retry_bp: \ + iter = 0; \ + index = (index_t)(hv & (sz-1)) * 2; \ + sz *= 2; \ + orig = index; \ + \ + do { \ + if (tab[index+1] == HT_NOTFOUND) { \ + tab[index] = key; \ + return &tab[index+1]; \ + } \ + \ + if (EQFUNC(key, tab[index])) \ + return &tab[index+1]; \ + \ + index = (index+2) & (sz-1); \ + iter++; \ + if (iter > maxprobe) \ + break; \ + } while (index != orig); \ + \ + /* table full */ \ + /* quadruple size, rehash, retry the insert */ \ + /* it's important to grow the table really fast; otherwise we waste */ \ + /* lots of time rehashing all the keys over and over. */ \ + sz = h->size; \ + ol = h->table; \ + if (sz >= (1<<19)) \ + newsz = sz<<1; \ + else \ + newsz = sz<<2; \ + /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \ + tab = (void**)malloc(newsz*sizeof(void*)); \ + if (tab == NULL) \ + return NULL; \ + for(i=0; i < newsz; i++) \ + tab[i] = HT_NOTFOUND; \ + h->table = tab; \ + h->size = newsz; \ + for(i=0; i < sz; i+=2) { \ + if (ol[i] != HT_NOTFOUND && ol[i+1] != HT_NOTFOUND) { \ + (*HTNAME##_lookup_bp(h, ol[i])) = ol[i+1]; \ + } \ + } \ + free(ol); \ + \ + sz = hash_size(h); \ + maxprobe = max_probe(sz); \ + \ + goto retry_bp; \ + \ + return NULL; \ +} \ + \ +void HTNAME##_put(htable_t *h, void *key, void *val) \ +{ \ + void **bp = HTNAME##_lookup_bp(h, key); \ + \ + *bp = val; \ +} \ + \ +void **HTNAME##_bp(htable_t *h, void *key) \ +{ \ + return HTNAME##_lookup_bp(h, key); \ +} \ + \ +/* returns bp if key is in hash, otherwise NULL */ \ +static void **HTNAME##_peek_bp(htable_t *h, void *key) \ +{ \ + size_t sz = hash_size(h); \ + size_t maxprobe = max_probe(sz); \ + void **tab = h->table; \ + size_t index = (index_t)(HFUNC((uptrint_t)key) & (sz-1)) * 2; \ + sz *= 2; \ + size_t orig = index; \ + size_t iter = 0; \ + \ + do { \ + if (tab[index] == HT_NOTFOUND) \ + return NULL; \ + if (EQFUNC(key, tab[index]) && tab[index+1] != HT_NOTFOUND) \ + return &tab[index+1]; \ + \ + index = (index+2) & (sz-1); \ + iter++; \ + if (iter > maxprobe) \ + break; \ + } while (index != orig); \ + \ + return NULL; \ +} \ + \ +void *HTNAME##_get(htable_t *h, void *key) \ +{ \ + void **bp = HTNAME##_peek_bp(h, key); \ + if (bp == NULL) \ + return HT_NOTFOUND; \ + return *bp; \ +} \ + \ +int HTNAME##_has(htable_t *h, void *key) \ +{ \ + return (HTNAME##_get(h,key) != HT_NOTFOUND); \ +} \ + \ +void HTNAME##_remove(htable_t *h, void *key) \ +{ \ + void **bp = HTNAME##_peek_bp(h, key); \ + if (bp != NULL) \ + *bp = HT_NOTFOUND; \ +} \ + \ +void HTNAME##_adjoin(htable_t *h, void *key, void *val) \ +{ \ + void **bp = HTNAME##_lookup_bp(h, key); \ + if (*bp == HT_NOTFOUND) \ + *bp = val; \ +} diff --git a/llt/htableh.inc b/llt/htableh.inc new file mode 100644 index 0000000..57ff425 --- /dev/null +++ b/llt/htableh.inc @@ -0,0 +1,30 @@ +//-*- mode:c -*- + +#include "htable.h" + +#define HTPROT(HTNAME) \ +void *HTNAME##_get(htable_t *h, void *key); \ +void HTNAME##_put(htable_t *h, void *key, void *val); \ +void HTNAME##_adjoin(htable_t *h, void *key, void *val); \ +int HTNAME##_has(htable_t *h, void *key); \ +void HTNAME##_remove(htable_t *h, void *key); \ +void **HTNAME##_bp(htable_t *h, void *key); + +// return value, or PH_NOTFOUND if key not found + +// add key/value binding + +// add binding iff key is unbound + +// does key exist? + +// logically remove key + +// get a pointer to the location of the value for the given key. +// creates the location if it doesn't exist. only returns NULL +// if memory allocation fails. +// this should be used for updates, for example: +// void **bp = ptrhash_bp(h, key); +// *bp = f(*bp); +// do not reuse bp if there might be intervening calls to ptrhash_put, +// ptrhash_bp, ptrhash_reset, or ptrhash_free. diff --git a/llt/ptrhash.c b/llt/ptrhash.c index 1a1ebcc..c9b9bb1 100644 --- a/llt/ptrhash.c +++ b/llt/ptrhash.c @@ -13,183 +13,8 @@ #include "ptrhash.h" #include "hashing.h" -#define ptrhash_size(h) ((h)->size/2) +#define OP_EQ(x,y) ((x)==(y)) -ptrhash_t *ptrhash_new(ptrhash_t *h, size_t size) -{ - size = nextipow2(size); - size *= 2; // 2 pointers per key/value pair - size *= 2; // aim for 50% occupancy - h->size = size; - h->table = (void**)malloc(size*sizeof(void*)); - if (h->table == NULL) return NULL; - size_t i; - for(i=0; i < size; i++) - h->table[i] = PH_NOTFOUND; - return h; -} +#include "htable.inc" -void ptrhash_free(ptrhash_t *h) -{ - free(h->table); -} - -// empty and reduce size -void ptrhash_reset(ptrhash_t *h, size_t sz) -{ - if (h->size > sz*4) { - size_t newsz = sz*4; - void **newtab = (void**)realloc(h->table, newsz*sizeof(void*)); - if (newtab == NULL) - return; - h->size = newsz; - h->table = newtab; - } - size_t i, hsz=h->size; - for(i=0; i < hsz; i++) - h->table[i] = PH_NOTFOUND; -} - -// compute empirical max-probe for a given size -#define ph_max_probe(size) ((size)>>5) - -static void **ptrhash_lookup_bp(ptrhash_t *h, void *key) -{ - uint_t hv; - size_t i, orig, index, iter; - size_t newsz, sz = ptrhash_size(h); - size_t maxprobe = ph_max_probe(sz); - void **tab = h->table; - void **ol; - - hv = inthash((uptrint_t)key); - retry_bp: - iter = 0; - index = (index_t)(hv & (sz-1)) * 2; - sz *= 2; - orig = index; - - do { - if (tab[index+1] == PH_NOTFOUND) { - tab[index] = key; - return &tab[index+1]; - } - - if (key == tab[index]) - return &tab[index+1]; - - index = (index+2) & (sz-1); - iter++; - if (iter > maxprobe) - break; - } while (index != orig); - - // table full - // quadruple size, rehash, retry the insert - // it's important to grow the table really fast; otherwise we waste - // lots of time rehashing all the keys over and over. - sz = h->size; - ol = h->table; - if (sz >= (1<<19)) - newsz = sz<<1; - else - newsz = sz<<2; - //printf("trying to allocate %d words.\n", newsz); fflush(stdout); - tab = (void**)malloc(newsz*sizeof(void*)); - if (tab == NULL) - return NULL; - for(i=0; i < newsz; i++) - tab[i] = PH_NOTFOUND; - h->table = tab; - h->size = newsz; - for(i=0; i < sz; i+=2) { - if (ol[i] != PH_NOTFOUND && ol[i+1] != PH_NOTFOUND) { - (*ptrhash_lookup_bp(h, ol[i])) = ol[i+1]; - /* - // this condition is not really possible - if (bp == NULL) { - free(h->table); - h->table = ol; - h->size = sz; - // another thing we could do in this situation - // is newsz<<=1 and go back to the malloc, retrying with - // a bigger buffer on this level of recursion. - return NULL; - } - */ - } - } - free(ol); - - sz = ptrhash_size(h); - maxprobe = ph_max_probe(sz); - - goto retry_bp; - - return NULL; -} - -void ptrhash_put(ptrhash_t *h, void *key, void *val) -{ - void **bp = ptrhash_lookup_bp(h, key); - - *bp = val; -} - -void **ptrhash_bp(ptrhash_t *h, void *key) -{ - return ptrhash_lookup_bp(h, key); -} - -// returns bp if key is in hash, otherwise NULL -static void **ptrhash_peek_bp(ptrhash_t *h, void *key) -{ - size_t sz = ptrhash_size(h); - size_t maxprobe = ph_max_probe(sz); - void **tab = h->table; - size_t index = (index_t)(inthash((uptrint_t)key) & (sz-1)) * 2; - sz *= 2; - size_t orig = index; - size_t iter = 0; - - do { - if (tab[index] == PH_NOTFOUND) - return NULL; - if (key == tab[index] && tab[index+1] != PH_NOTFOUND) - return &tab[index+1]; - - index = (index+2) & (sz-1); - iter++; - if (iter > maxprobe) - break; - } while (index != orig); - - return NULL; -} - -void *ptrhash_get(ptrhash_t *h, void *key) -{ - void **bp = ptrhash_peek_bp(h, key); - if (bp == NULL) - return PH_NOTFOUND; - return *bp; -} - -int ptrhash_has(ptrhash_t *h, void *key) -{ - return (ptrhash_get(h,key) != PH_NOTFOUND); -} - -void ptrhash_remove(ptrhash_t *h, void *key) -{ - void **bp = ptrhash_peek_bp(h, key); - if (bp != NULL) - *bp = PH_NOTFOUND; -} - -void ptrhash_adjoin(ptrhash_t *h, void *key, void *val) -{ - void **bp = ptrhash_lookup_bp(h, key); - if (*bp == PH_NOTFOUND) - *bp = val; -} +HTIMPL(ptrhash, inthash, OP_EQ) diff --git a/llt/ptrhash.h b/llt/ptrhash.h index 4c1a8e3..8245459 100644 --- a/llt/ptrhash.h +++ b/llt/ptrhash.h @@ -1,44 +1,8 @@ #ifndef __PTRHASH_H_ #define __PTRHASH_H_ -typedef struct _ptrhash_t { - size_t size; - void **table; -} ptrhash_t; +#include "htableh.inc" -// define this to be an invalid key/value -#define PH_NOTFOUND ((void*)1) - -// initialize and free -ptrhash_t *ptrhash_new(ptrhash_t *h, size_t size); -void ptrhash_free(ptrhash_t *h); - -// clear and (possibly) change size -void ptrhash_reset(ptrhash_t *h, size_t sz); - -// return value, or PH_NOTFOUND if key not found -void *ptrhash_get(ptrhash_t *h, void *key); - -// add key/value binding -void ptrhash_put(ptrhash_t *h, void *key, void *val); - -// add binding iff key is unbound -void ptrhash_adjoin(ptrhash_t *h, void *key, void *val); - -// does key exist? -int ptrhash_has(ptrhash_t *h, void *key); - -// logically remove key -void ptrhash_remove(ptrhash_t *h, void *key); - -// get a pointer to the location of the value for the given key. -// creates the location if it doesn't exist. only returns NULL -// if memory allocation fails. -// this should be used for updates, for example: -// void **bp = ptrhash_bp(h, key); -// *bp = f(*bp); -// do not reuse bp if there might be intervening calls to ptrhash_put, -// ptrhash_bp, ptrhash_reset, or ptrhash_free. -void **ptrhash_bp(ptrhash_t *h, void *key); +HTPROT(ptrhash) #endif