diff --git a/s48/riatables/AUTHORS b/s48/riatables/AUTHORS new file mode 100644 index 0000000..90c9bf3 --- /dev/null +++ b/s48/riatables/AUTHORS @@ -0,0 +1 @@ +Public Domain 2005 Taylor Campbell diff --git a/s48/riatables/BLURB b/s48/riatables/BLURB new file mode 100644 index 0000000..ff17bec --- /dev/null +++ b/s48/riatables/BLURB @@ -0,0 +1 @@ +riatables: rich hash tables. diff --git a/s48/riatables/NEWS b/s48/riatables/NEWS new file mode 100644 index 0000000..0238ac4 --- /dev/null +++ b/s48/riatables/NEWS @@ -0,0 +1,3 @@ +version 0.1 +* Initial version. UPSTREAM: + http://www.bloodandcoffee.net/campbell/code/s48-riatables.tar.gz diff --git a/s48/riatables/README b/s48/riatables/README new file mode 100644 index 0000000..0169a31 --- /dev/null +++ b/s48/riatables/README @@ -0,0 +1,21 @@ +Improved hash table package for Scheme48 + +This library for Scheme48 implements much more rich hash tables than +what Scheme48's built-in hash tables provide, notably object identity +tables (with arbitrary objects for keys), tables with GC-sensitive hash +functions in general, and head- & tail-weak hash tables. + +Taylor Campbell wrote this code and this associated documentation; he +places it in the public domain. + +The scheme/ directory contains the Scheme source code. Load the file +scheme/packages.scm for the definitions of the relevant interfaces & +structures. The RIATABLES structure exports everything relevant to the +package. + +There is currently no extended documentation. Use the source, Luke. +This may change soon. + +Send bugs, comments, questions, or anything to Taylor Campbell via +email to campbell@bloodandcoffee.net or via IRC on irc.freenode.net, +where he can be found in the channels #scheme & #scsh as Riastradh. diff --git a/s48/riatables/pkg-def.scm b/s48/riatables/pkg-def.scm new file mode 100644 index 0000000..4d76d8c --- /dev/null +++ b/s48/riatables/pkg-def.scm @@ -0,0 +1,13 @@ +(define-package "riatables" + (0 1) + ((install-lib-version (1 0))) + (write-to-load-script + `((config) + (load ,(absolute-file-name "packages.scm" + (get-directory 'scheme #f))))) + (install-file "README" 'doc) + (install-file "NEWS" 'doc) + (install-string (COPYING) "COPYING" 'doc) + (install-file "scheme/packages.scm" 'scheme) + (install-file "scheme/riatable.scm" 'scheme) + (install-file "scheme/support.scm" 'scheme)) diff --git a/s48/riatables/scheme/packages.scm b/s48/riatables/scheme/packages.scm new file mode 100644 index 0000000..db2d0cb --- /dev/null +++ b/s48/riatables/scheme/packages.scm @@ -0,0 +1,74 @@ +;;;;;; Riatables: alternative hash tables -*- Scheme -*- +;;;;;; Interface and package definitions + +;;; Taylor Campbell wrote this code; he places it in the public domain. + +(define-interface riatables-interface + (export table-template? + make-table-template + make-weak-table-template + make-strong-table-template + + weaken-table-template strengthen-table-template + head-weaken-table-template head-strengthen-table-template + tail-weaken-table-template tail-strengthen-table-template + modify-table-template-weakness + + table-template-key-predicate + table-template-key-comparator + table-template-key-hasher + table-template-gc-sensitive? + table-template-head-weak? + table-template-tail-weak? + + table-constructor + + table? + make-table-from-template + + table-size + table-entry + set-table-entry! + modify-table-entry! + pop-table-entry! + walk-table + + table-template + + descriptor-hash modular-descriptor-hash + string-hash modular-string-hash + string-ci-hash modular-string-ci-hash + integer-hash modular-integer-hash + + make-object-table object-table-template + make-weak-object-table weak-object-table-template + make-string-table string-table-template + make-string-ci-table string-ci-table-template + make-integer-table integer-table-template + make-symbol-table symbol-table-template)) + +(define-interface riatables-support-interface + (export gc-stamp + descriptor-hash modular-descriptor-hash + string-hash modular-string-hash + string-ci-hash modular-string-ci-hash + integer-hash modular-integer-hash + exact-integer?)) + +(define-structure riatables riatables-interface + (open scheme + receiving + weak + (subset signals (error call-error)) + define-record-types + riatables-support) + (optimize auto-integrate) + (files riatable)) + +(define-structure riatables-support riatables-support-interface + (open scheme + (subset primitives (memory-status string-hash)) + (subset architecture (memory-status-option)) + enumerated) + (optimize auto-integrate) + (files support)) diff --git a/s48/riatables/scheme/riatable.scm b/s48/riatables/scheme/riatable.scm new file mode 100644 index 0000000..ec017b4 --- /dev/null +++ b/s48/riatables/scheme/riatable.scm @@ -0,0 +1,819 @@ +;;;;;; Riatables: alternative hash tables -*- Scheme -*- +;;;;;; Thread-unsafe implementation + +;;; Taylor Campbell wrote this code; he places it in the public domain. + +(define-record-type table type/table + (really-make-table id + template + size + gc-stamp + bucket-vector) + table? + (id table-id) + (template table-template) + (size table-size set-table-size!) + (gc-stamp table-gc-stamp set-table-gc-stamp!) + (bucket-vector table-bucket-vector set-table-bucket-vector!)) + +(define-record-discloser type/table + (lambda (table) + `(,(or (table-type-name table) 'table) + ,@(cond ((table-id table) => list) (else '())) + ,(table-size table)))) + + + +;;; -------------------- +;;; Parameters + +;;; These are just guesses. I ought to fill in better suggestions at +;;; some point, but these will do for now. + +;;; The number of buckets to put in freshly created tables. + +(define initial-bucket-count 10) + +;;; The next number of buckets to have in a table after PREVIOUS. + +(define (next-bucket-count previous) + (+ (* previous 2) 1)) + +;;; The maximum average number of entries to have in a bucket. + +(define maximum-average-bucket-entries 3) + + + +;;; -------------------- +;;; Table templates + +;;; This code is rather too verbose for my tastes. At least there can +;;; be a DEFINE-TEMPLATE-ACCESSOR macro to reduce code tedium. + +(define-record-type table-template type/table-template + (really-make-table-template type-name + key-predicate + key-comparator + key-hasher + gc-sensitive? + head-weak? + tail-weak? + searcher + expander) + table-template? + (type-name table-template-type-name) + (key-predicate table-template-key-predicate) + (key-comparator table-template-key-comparator) + (key-hasher table-template-key-hasher) + (gc-sensitive? table-template-gc-sensitive?) + (head-weak? table-template-head-weak?) + (tail-weak? table-template-tail-weak?) + (searcher table-template-searcher) + (expander table-template-expander)) + +(define-record-discloser type/table-template + (lambda (t) + `(table-template ,@(cond ((table-template-type-name t) => list) + (else '()))))) + +(define-syntax define-template-accessors + (syntax-rules () + ((define-template-accessors (table template) ...) + (begin (define (table t) + (template (table-template t))) + ...)))) + +(define-template-accessors + (table-type-name table-template-type-name) + (table-key-predicate table-template-key-predicate) + (table-key-comparator table-template-key-comparator) + (table-key-hasher table-template-key-hasher) + (table-gc-sensitive? table-template-gc-sensitive?) + (table-head-weak? table-template-head-weak?) + (table-tail-weak? table-template-tail-weak?) + (table-searcher table-template-searcher) + (table-expander table-template-expander)) + +(define (make-table-template key? key= key-hash + gc? head-weak? tail-weak? + type-name) + (let* ((rehasher (make-table-rehasher key-hash + gc? head-weak? tail-weak?)) + (hasher (make-table-hasher key-hash gc? rehasher))) + (really-make-table-template + type-name + key? key= key-hash + gc? head-weak? tail-weak? + (make-table-searcher key= hasher gc? + head-weak? tail-weak?) + (make-table-expander hasher gc? + head-weak? tail-weak? + rehasher)))) + +(define (make-weak-table-template key? key= key-hash gc? type-name) + (make-table-template key? key= key-hash gc? #t #t type-name)) +(define (make-strong-table-template key? key= key-hash gc? type-name) + (make-table-template key? key= key-hash gc? #f #f type-name)) + +(define (modify-table-template-weakness template head-weak? tail-weak?) + (make-table-template (table-template-key-predicate template) + (table-template-key-comparator template) + (table-template-key-hasher template) + (table-template-gc-sensitive? template) + head-weak? + tail-weak? + (table-template-type-name template))) + +(define-syntax define-weakness-modifiers + (syntax-rules () + ((define-weakness-modifiers + (name template head? tail?) + ...) + (begin (define (name template) + (modify-table-template-weakness template head? tail?)) + ...)))) + +(define-weakness-modifiers + (weaken-table-template t #t #t) + (head-weaken-table-template t #t (table-template-tail-weak? t)) + (tail-weaken-table-template t (table-template-head-weak? t) #t) + (strengthen-table-template t #f #f) + (head-strengthen-table-template t #f (table-template-tail-weak? t)) + (tail-strengthen-table-template t (table-template-head-weak? t) #f)) + + + +;;; -------------------- +;;; Constructors + +(define (%make-table-from-template template suggested-size id) + (let ((gc? (table-template-gc-sensitive? template))) + (really-make-table id + template + 0 + (and gc? (gc-stamp)) + (make-vector + (if (and suggested-size + (> suggested-size + maximum-average-bucket-entries)) + (quotient suggested-size + maximum-average-bucket-entries) + initial-bucket-count) + (empty-bucket))))) + +(define (table-constructor template) + (let ((name `(table-constructor ,template))) + (lambda size+id + (receive (suggested-size id) + (table-cons-options size+id name) + (%make-table-from-template template suggested-size id))))) + +(define (make-table-from-template template . size+id) + (receive (suggested-size id) + (table-cons-options size+id make-table-from-template) + (%make-table-from-template template suggested-size id))) + +(define (table-cons-options options callee) + (if (null? options) + (values #f #f) + (let ((size (car options)) + (more (cdr options))) + (if (or (integer? size) (not size)) + (if (null? more) + (values size #f) + (let ((name (car more)) + (more (cdr more))) + (if (null? more) + (values size name) + (error "extraneous arguments" more callee)))) + (error "invalid table size suggestion argument" + size + callee))))) + + + +;;; -------------------- +;;; Main table operations + +;;; This is a macro so that it gets integrated and debugging info is +;;; retained about the procedure. + +(define-syntax define-keyed-table-operation + (syntax-rules () + ((define-keyed-table-operation (name table-arg key-arg arg ...) + (bucket-index-var entry-var) + body1 body2 ...) + (define (name table-arg key-arg arg ...) + (cond ((not (table? table-arg)) + (call-error "invalid table argument" name + table-arg key-arg arg ...)) + ((not ((table-key-predicate table-arg) key-arg)) + (call-error "invalid key argument" name + table-arg key-arg arg ...)) + (else + (receive (bucket-index-var entry-var) + ((table-searcher table-arg) + table-arg key-arg) + body1 body2 ...))))))) + +(define-keyed-table-operation (table-entry table key) + (bucket-index entry) + (and entry + (let ((tail-weak? (table-tail-weak? table))) + (cond ((not tail-weak?) + (entry-value entry)) + ((weak-entry-value entry)) + (else ; Broken weak entry. + (expunge-entry-from-table! table bucket-index entry) + #f))))) + +(define-keyed-table-operation (set-table-entry! table key value) + (bucket-index entry) + (cond ((not value) + (cond (entry + (expunge-entry-from-table! table bucket-index entry) + (if (table-tail-weak? table) + (set-weak-entry-value! entry #f) + (set-entry-value! entry #f))))) + (entry + (if (table-tail-weak? table) + (set-weak-entry-value! entry #f) + (set-entry-value! entry #f))) + (bucket-index + ((table-expander table) table bucket-index key value)))) + +(define-keyed-table-operation (modify-table-entry! table key modifier) + (bucket-index entry) + (cond (entry + ;; We don't need to set the entry's value to be #F here: both + ;; of the modifiers will set the entry to #F if MODIFIER + ;; returns it. + (if (not (if (table-tail-weak? table) + (modify-weak-entry-value! entry modifier) + (modify-entry-value! entry modifier))) + (expunge-entry-from-table! table bucket-index entry))) + ((modifier #f) + => (lambda (new-value) + ((table-expander table) + table bucket-index key new-value))))) + +(define-keyed-table-operation (pop-table-entry! table key) + (bucket-index entry) + (and entry + (begin (expunge-entry-from-table! table bucket-index entry) + (if (table-tail-weak? entry) + (weak-entry-value entry) + (entry-value entry))))) + +(define (walk-table table proc) + (let* ((buckets (table-bucket-vector table)) + (bcount (vector-length buckets))) + (let outer-loop ((alist '()) + (i 0)) + (if (= i bcount) + (walk-table-alist alist proc + (table-head-weak? table) + (table-tail-weak? table)) + (let inner-loop ((alist alist) + (bucket (vector-ref buckets i))) + (if (bucket-empty? bucket) + (outer-loop (+ i 1)) + (inner-loop (cons (let ((entry (bucket-entry bucket))) + (cons (entry-key entry) + (entry-value entry))) + alist) + (bucket-next bucket)))))))) + +(define (walk-table-alist alist proc head-weak? tail-weak?) + ;++ Probably shouldn't break the weakness abstraction here. (Weak + ;++ pointers are supposed to be dealt with only by WEAK-ENTRY-VALUE + ;++ &c.; they're meant to be invisible to all other code here.) + ;++ Also, WALK-TABLE should expunge broken entries; it shouldn't be + ;++ the job of this to filter them out when determining what to pass + ;++ to PROC. Better yet, there would just be a TABLE->ALIST that + ;++ does everything right. I'm too lazy now. + (for-each (if head-weak? + (if tail-weak? + (lambda (key.value) + (cond ((weak-pointer-ref (car key.value)) + => (lambda (key) + (cond ((weak-pointer-ref + (cdr key.value)) + => (lambda (value) + (proc key value)))))))) + (lambda (key.value) + (cond ((weak-pointer-ref (car key.value)) + => (lambda (key) + (proc key (cdr key.value))))))) + (if tail-weak? + (lambda (key.value) + (cond ((weak-pointer-ref (cdr key.value)) + => (lambda (value) + (proc (car key.value) value))))) + (lambda (key.value) + (proc (car key.value) (cdr key.value))))) + alist)) + + + +;;; -------------------- +;;; Buckets + +;;; Buckets are represented as alists, because ASSQ in the Scheme48 VM +;;; is fast. However, this is no longer useful: using ASSQ was an +;;; optimization hack that would lose with optimistic concurrency, so +;;; it is no longer used. A more space-efficient representation could, +;;; and probably should, be chosen now with no adverse effects on time +;;; efficiency. + +;;; All these trivial procedures are procedures, not direct aliases, so +;;; that the silly Scheme48 automatic procedure integrator, which has +;;; much too low a threshold and no provision for alias integration, +;;; will integrate them. + +(define (empty-bucket) '()) +(define (bucket-empty? bucket) (null? bucket)) +(define (bucket-nonempty? bucket) (pair? bucket)) + +(define (make-bucket entry next) (cons entry next)) + +(define (bucket-entry bucket) (car bucket)) +(define (bucket-next bucket) (cdr bucket)) +(define (set-bucket-next! bucket next) (set-cdr! bucket next)) + +(define (entry-maker head-weak? tail-weak?) + (cond ((and head-weak? tail-weak?) + make-weak-entry) + (head-weak? make-head-weak-entry) + (tail-weak? make-tail-weak-entry) + (else make-strong-entry))) + +(define (make-weak-entry key value) + (cons (make-weak-pointer key) + (make-weak-pointer value))) +(define (make-head-weak-entry key value) + (cons (make-weak-pointer key) value)) +(define (make-tail-weak-entry key value) + (cons key (make-weak-pointer value))) +(define (make-strong-entry key value) + (cons key value)) + +(define (entry-key entry) (car entry)) +(define (weak-entry-key entry) (weak-pointer-ref (car entry))) + +(define (entry-value entry) (cdr entry)) +(define (weak-entry-value entry) (weak-pointer-ref (cdr entry))) + +(define (set-entry-value! entry value) + (set-cdr! entry value)) +(define (set-weak-entry-value! entry value) + (set-entry-value! entry (make-weak-pointer value))) + +;;; If MODIFIER returns #F, the entry is to be deleted. It would seem +;;; superfluous to set it if it is #F, but the entry, if it is to be +;;; deleted, needs to be set to #F. This is because of a delicate case +;;; in thread synchronization: +;;; - Thread A is rehashing a table; it is in the middle of dumping +;;; the bucket that contains an entry E, which it has already +;;; dumped. +;;; - Thread A is suspended; thread B gets to run. +;;; - Thread B tries to expunge E from the bucket, but it's absent, +;;; so it assumes that another thread concurrently deleted it. +;;; Now the entry E is still in the table, even though thread B thought +;;; it must have been deleted. + +(define (modify-entry-value! entry modifier) + (let ((new-value (modifier (entry-value entry)))) + (set-entry-value! entry new-value) + new-value)) +(define (modify-weak-entry-value! entry modifier) + (let ((new-value (modifier (weak-entry-value entry)))) + (set-weak-entry-value! entry new-value) + new-value)) + +;;; It might be better to write a purely functional, allocation-only +;;; version of this, to perform as little proposal logging as possible. +;;; Then again, it might make GCs more frequent. +;++ But couldn't that be handled by having two versions, one for GC- +;++ sensitive tables and one for GC-insensitive tables, with the latter +;++ of which allocation has no problems? + +(define (expunge-entry-from-bucket! bucket entry) + (cond ((bucket-empty? bucket) bucket) + ((eq? entry (bucket-entry bucket)) + (bucket-next bucket)) + (else + (let loop ((b (bucket-next bucket)) + (lag bucket)) + ;; It is OK for BUCKET not to contain ENTRY: another thread + ;; may have concurrently deleted it before the call to + ;; EXPUNGE-ENTRY-FROM-BUCKET! but after the search for the + ;; entry. + (if (bucket-nonempty? b) + (cond ((eq? (bucket-entry b) entry) + (set-bucket-next! lag (bucket-next b)) + bucket) + (else + (loop (bucket-next b) b)))))))) + + + +;;; -------------------- +;;; Bucket operations + +(define (make-bucket-searcher key= gc? head-weak? tail-weak?) + ((cond ((and gc? (or head-weak? tail-weak?)) + make-gc-weak-bucket-searcher) + ((and head-weak? tail-weak?) + make-weak-bucket-searcher) + (head-weak? make-head-weak-bucket-searcher) + (tail-weak? make-tail-weak-bucket-searcher) + (else make-strong-bucket-searcher)) + key=)) + +(define-syntax define-bucket-searcher + (syntax-rules () + ((define-bucket-searcher name + entry-var + (key-var key-expression) + test) + (define (name key=) + (letrec ((expunge-broken-initials + (lambda (bucket delta) + (if (bucket-empty? bucket) + (values bucket delta) + (let* ((entry-var (bucket-entry bucket)) + (next (bucket-next bucket)) + (key-var key-expression)) + (if test + (values bucket delta) + (expunge-broken-initials next + (+ delta 1))))))) + (search + (lambda (bucket lag key delta) + (if (bucket-empty? bucket) + (values #f delta) + (let* ((entry-var (bucket-entry bucket)) + (next (bucket-next bucket)) + (key-var key-expression)) + (cond ((not test) + (set-bucket-next! lag next) + (search next lag key (+ delta 1))) + ((key= key-var key) + (values entry-var delta)) + (else + (search next bucket key delta)))))))) + (lambda (table index bucket key) + (receive (bucket* delta) + (expunge-broken-initials bucket 0) + (if (positive? delta) + (vector-set! (table-bucket-vector table) + index + bucket*)) + (receive (entry delta) + (if (bucket-empty? bucket*) + (values #f delta) + (let ((entry-var (bucket-entry bucket*)) + (next (bucket-next bucket*))) + (if (key= key-expression key) + (values entry-var delta) + (search next + bucket* + key + delta)))) + (set-table-size! table (- (table-size table) delta)) + entry)))))))) + +(define-bucket-searcher make-weak-bucket-searcher + entry + (key (weak-entry-key entry)) + (and key (weak-entry-value entry))) + +(define-bucket-searcher make-head-weak-bucket-searcher + entry + (key (weak-entry-key entry)) + key) + +(define-bucket-searcher make-tail-weak-bucket-searcher + entry + (key (entry-key entry)) + (weak-entry-value entry)) + +;;; This is just like the strong bucket searcher, except that it looks +;;; at the weak pointer keys' contents, not at the keys themselves. +;;; This works because, if a key was broken at the last GC, the rehash +;;; of the table would have cleared it out before calling this. + +(define (make-gc-weak-bucket-searcher key=) + (lambda (table index bucket key) + (let loop ((bucket bucket)) + (if (bucket-empty? bucket) + #f + (let ((entry (bucket-entry bucket))) + (if (key= (weak-entry-key entry) key) + entry + (loop (bucket-next bucket)))))))) + +(define (make-strong-bucket-searcher key=) + ;** EXPUNGE WHEN OPTIMISTIC CONCURRENCY IS IMPLEMENTED + ;** (ASSQ doesn't log) + (if (eq? key= eq?) + (lambda (table index bucket key) ;+++ Performance hack: ASSQ is + (assq key bucket)) ;+++ a VM primitive, so fast. + (letrec ((loop (lambda (table index bucket key) + (if (bucket-empty? bucket) + #f + (let ((entry (bucket-entry bucket))) + (if (key= (entry-key entry) key) + entry + (loop table + index + (bucket-next bucket) + key))))))) + loop))) + + + +(define (make-bucket-dumper key-hash head-weak? tail-weak?) + ((cond ((and head-weak? tail-weak?) + make-weak-bucket-dumper) + (head-weak? make-head-weak-bucket-dumper) + (tail-weak? make-tail-weak-bucket-dumper) + (else make-strong-bucket-dumper)) + key-hash)) + +(define-syntax define-bucket-dumper + (syntax-rules () + ((define-bucket-dumper name + entry + (key key-expression) + test) + (define (name key-hash) + (lambda (bucket-vector old-buckets index) + (let ((mod (vector-length bucket-vector))) + (let loop ((bucket (vector-ref old-buckets index)) + (count 0)) + (if (bucket-empty? bucket) + count + (let* ((entry (bucket-entry bucket)) + (next (bucket-next bucket)) + (key key-expression)) + (loop next + (if test + (let ((hash (key-hash key mod))) + (if (not (eq? hash index)) + (begin (set-bucket-next! + bucket + (vector-ref bucket-vector + hash)) + (vector-set! bucket-vector + hash + bucket))) + (+ count 1)) + count))))))))))) + +(define-bucket-dumper make-weak-bucket-dumper + entry + (key (weak-entry-key entry)) + (and key (weak-entry-value entry))) + +(define-bucket-dumper make-head-weak-bucket-dumper + entry + (key (weak-entry-key entry)) + key) + +(define-bucket-dumper make-tail-weak-bucket-dumper + entry + (key (entry-key entry)) + (weak-entry-value entry)) + +(define (make-strong-bucket-dumper key-hash) + (letrec ((loop + (lambda (buckets index bucket count mod) + (if (bucket-empty? bucket) + count + (let ((next (bucket-next bucket)) + (hash + (key-hash (entry-key (bucket-entry bucket)) + mod))) + ;; Prevent accidentally creating a circular bucket. + (if (not (eq? hash index)) + (begin (set-bucket-next! bucket + (vector-ref buckets + hash)) + (vector-set! buckets hash bucket))) + (loop buckets index next (+ count 1) mod)))))) + (lambda (bucket-vector old-buckets index) + (loop bucket-vector + index + (vector-ref old-buckets index) + 0 + (vector-length bucket-vector))))) + + + +;;; -------------------- +;;; Internal hash table procedures + +(define (make-table-rehasher key-hash gc? head-weak? tail-weak?) + ((if gc? + make-gc-table-rehasher + make-stable-table-rehasher) + (make-bucket-dumper key-hash head-weak? tail-weak?))) + +;;; GC-sensitive table rehashers have to be very carefully constructed. +;;; They may not allocate, and it must be absolutely certain whether or +;;; not they succeeded in hashing immediately following a certain GC +;;; stamp. + +(define (make-gc-table-rehasher dump-bucket!) + (letrec ((attempt-rehash! + (make-gc-attempted-table-rehasher dump-bucket!)) + (loop (lambda (table new-vector) + (if (attempt-rehash! table (gc-stamp) new-vector) + #f + (loop table new-vector))))) + (lambda (table stamp new-vector) + (or (attempt-rehash! table stamp new-vector) + (loop table new-vector))))) + +;;; This returns a procedure that tries to copy all the buckets from +;;; TABLE's existing bucket vector to NEW-VECTOR. + +(define (make-gc-attempted-table-rehasher dump-bucket!) + (define (loop table stamp new-vector old-buckets old-length + i new-size) + (cond ((not (eq? stamp (gc-stamp))) + ;; Immediately abort, even if we finished copying all of the + ;; buckets, because our efforts were for naught. + (vector-fill! new-vector (empty-bucket)) + #f) + ((= i old-length) + ;; We succeeded in rehashing and a GC didn't occur to + ;; circumvent us. Of course, a GC could still occur in the + ;; next couple instructions, but that will be caught in the + ;; next table lookup; what is important is that the table + ;; table was completely rehashed safely. + (set-table-gc-stamp! table stamp) + (set-table-bucket-vector! table new-vector) + (set-table-size! table new-size) + #t) + (else + ;; Copy the bucket at I & proceed happily. DUMP-BUCKET! + ;; could test the stamp itself, but it's probably a lot + ;; cheaper just to test it after every bucket copied. + (loop table stamp new-vector old-buckets old-length + (+ i 1) + (+ (dump-bucket! new-vector old-buckets i) + new-size))))) + (lambda (table stamp new-vector) + (let ((old-buckets (table-bucket-vector table))) + (loop table stamp new-vector + old-buckets + (vector-length old-buckets) + 0 0)))) + +(define (make-stable-table-rehasher dump-bucket!) + (lambda (table stamp new-vector) + (let* ((old-buckets (table-bucket-vector table)) + (old-length (vector-length old-buckets))) + (do ((i 0 (+ i 1)) + (new-size 0 + (+ (dump-bucket! new-vector old-buckets i) + new-size))) + ((= i old-length) + (set-table-bucket-vector! table + new-vector) + (set-table-size! table new-size) + #t))))) + +(define (make-table-hasher key-hash gc? rehasher) + (if gc? + (make-gc-table-hasher key-hash rehasher) + (lambda (table key) + (key-hash key (vector-length (table-bucket-vector table)))))) + +(define (make-gc-table-hasher key-hash rehash-table!) + (letrec ((retry + (lambda (table key mod new-vector) + (let* ((hash (key-hash key mod)) + (stamp (gc-stamp))) + (if (or (eq? (table-gc-stamp table) stamp) + (rehash-table! table stamp new-vector)) + hash + (retry table key mod new-vector)))))) + (lambda (table key) + (let ((mod (vector-length (table-bucket-vector table)))) + (let ((stamp (gc-stamp))) + (if (eq? (table-gc-stamp table) stamp) + (let* ((hash (key-hash key mod))) + (if (eq? (gc-stamp) stamp) + hash + ;;; These next two calls are duplicated, but it is + ;;; needed so we don't allocate unnecessarily. + (retry table key mod + (make-vector mod (empty-bucket))))) + (retry table key mod + (make-vector mod (empty-bucket))))))))) + +(define (make-table-searcher key= table-hash gc? head-weak? tail-weak?) + (let ((search-bucket (make-bucket-searcher key= gc? + head-weak? tail-weak?))) + (lambda (table key) + (let* ((hash (table-hash table key)) + (bucket-vector (table-bucket-vector table)) + (bucket (vector-ref bucket-vector hash))) + (values hash (search-bucket table hash bucket key)))))) + +(define (make-table-expander table-hash gc? + head-weak? tail-weak? + rehash-table!) + (let ((make-entry (entry-maker head-weak? tail-weak?)) + (rehash! (if gc? + (lambda (table new-vector) + (let loop ((stamp (table-gc-stamp table))) + (if (rehash-table! table stamp new-vector) + (values) + (loop (gc-stamp))))) + (lambda (table new-vector) + (rehash-table! table #f new-vector))))) + (lambda (table cached-hash key value) + (let* ((hash (if (maybe-expand-table! table rehash!) + cached-hash + (table-hash table key))) + ;; Note the LET*: the rehash might set the table's bucket + ;; vector. + (bucket-vector (table-bucket-vector table))) + (vector-set! bucket-vector hash + (make-bucket (make-entry key value) + (vector-ref bucket-vector hash))) + (set-table-size! table (+ (table-size table) 1)))))) + +(define (maybe-expand-table! table rehash!) + (let loop ((first? #t)) + (let ((size (table-size table))) + (cond ((<= size (* (vector-length (table-bucket-vector table)) + maximum-average-bucket-entries)) + first?) + (else + (rehash! table + (make-vector (next-bucket-count size) + (empty-bucket))) + (loop #f)))))) + +(define (expunge-entry-from-table! table bucket-index entry) + (let ((bucket-vector (table-bucket-vector table))) + (vector-set! bucket-vector bucket-index + (expunge-entry-from-bucket! + (vector-ref bucket-vector bucket-index) + entry)))) + + + +;;; -------------------- +;;; Various table templates + +(define-syntax define-table-types + (syntax-rules () + ((define-table-types + (template-name constructor-name template-exp) + ...) + (begin (begin (define template-name template-exp) + (define constructor-name + (table-constructor template-name))) + ...)))) + +(define-table-types + (object-table-template make-object-table + (make-strong-table-template (lambda (x) #t) eq? + modular-descriptor-hash + #t ; GC-sensitive hash function + 'object-table)) + (weak-object-table-template make-weak-object-table + (make-weak-table-template (lambda (x) #t) eq? + modular-descriptor-hash + #t ; GC-sensitive hash function + 'weak-object-table)) + (string-table-template make-string-table + (make-strong-table-template string? string=? + modular-string-hash + #f ; GC-insensitive hash function + 'string-table)) + (string-ci-table-template make-string-ci-table + (make-strong-table-template string? string-ci=? + modular-string-ci-hash + #f ; GC-insensitive hash function + 'string-ci-table)) + (integer-table-template make-integer-table + (make-strong-table-template integer? = + modular-integer-hash + #f ; GC-insensitive hash function + 'integer-table)) + (symbol-table-template make-symbol-table + (make-strong-table-template symbol? eq? + (lambda (s mod) + (modular-string-hash (symbol->string s) + mod)) + #f ; GC-insensitive hash function + 'symbol-table))) diff --git a/s48/riatables/scheme/support.scm b/s48/riatables/scheme/support.scm new file mode 100644 index 0000000..d850795 --- /dev/null +++ b/s48/riatables/scheme/support.scm @@ -0,0 +1,39 @@ +;;;;;; Riatables: alternative hash tables -*- Scheme -*- +;;;;;; Miscellaneous support utilities + +;;; Taylor Campbell wrote this code; he places it in the public domain. + +(define (gc-stamp) + (memory-status (enum memory-status-option gc-count) 0)) + +(define (descriptor-hash object) + (memory-status (enum memory-status-option pointer-hash) object)) + +(define (modular-descriptor-hash object mod) + (modulo (descriptor-hash object) mod)) + +;;; Silly string hashers. I use Scheme48's STRING-HASH, not a better +;;; algorithm (Scheme48's is pretty terrible), because it's a primitive +;;; in the VM and therefore faster than anything in plain Scheme. + +(define (modular-string-hash string mod) + (modulo (string-hash string) mod)) + +(define (string-ci-hash string) + (let* ((length (string-length string)) + (new-string (make-string length))) + (do ((i 0 (+ i 1))) + ((= i length)) + (string-set! new-string i (char-downcase (string-ref string i)))) + (string-hash string))) + +(define (modular-string-ci-hash string mod) + (modulo (string-ci-hash string) mod)) + +(define (integer-hash integer) (abs integer)) + +(define (modular-integer-hash integer mod) + (modulo integer mod)) + +(define (exact-integer? x) + (and (integer? x) (exact? x)))