diff --git a/scheme/big/either.scm b/scheme/big/either.scm new file mode 100644 index 0000000..07513d3 --- /dev/null +++ b/scheme/big/either.scm @@ -0,0 +1,74 @@ +; Copyright (c) 1993-2001 by Richard Kelsey and Jonathan Rees. See file COPYING. + +; Nondeterminism, Prolog, or whatever you want to call it. This is +; depth-first search implemented using call/cc. + +; The fluid variable $FAIL is bound to a thunk to be called in case of failure. + +(define $fail + (make-fluid (make-cell + (lambda () + (error "call to FAIL outside WITH-NONDETERMINISM"))))) + +(define (with-nondeterminism thunk) + (let-fluid $fail + (make-cell (lambda () + (error "nondeterminism ran out of choices"))) + thunk)) + +; Call the current failure function. + +(define (fail) + ((fluid-cell-ref $fail))) + +; For the alternation operator, Icon's a | b or McCarthy's (amb a b), +; we write (either a b). + +(define-syntax either + (syntax-rules () + ((either) (fail)) + ((either x) x) + ((either x y ...) + (%either (lambda () x) (lambda () (either y ...)))))) + +; 1. Save the current failure procedure and continuation. +; 2. Install a new failure procedure that restores the old failure procedure +; and continuation and then calls THUNK2. +; 3. Call THUNK1. + +(define (%either thunk1 thunk2) + (let ((save (fluid-cell-ref $fail))) + ((call-with-current-continuation + (lambda (k) + (fluid-cell-set! $fail + (lambda () + (fluid-cell-set! $fail save) + (k thunk2))) + thunk1))))) + +; (one-value x) is Prolog's CUT operator. X is allowed to return only once. + +(define-syntax one-value + (syntax-rules () + ((one-value x) (%one-value (lambda () x))))) + +(define (%one-value thunk) + (let ((save (fluid-cell-ref $fail))) + (call-with-values thunk + (lambda args + (fluid-cell-set! $fail save) + (apply values args))))) + +; (all-values a) returns a list of all the possible values of the +; expression a. Prolog calls this "bagof"; I forget what Icon calls it. + +(define-syntax all-values + (syntax-rules () + ((all-values x) (%all-values (lambda () x))))) + +(define (%all-values thunk) + (let ((results '())) + (either (let ((new-result (thunk))) + (set! results (cons new-result results)) + (fail)) + (reverse results)))) diff --git a/scheme/more-interfaces.scm b/scheme/more-interfaces.scm index 6434f88..620c51a 100644 --- a/scheme/more-interfaces.scm +++ b/scheme/more-interfaces.scm @@ -412,3 +412,121 @@ (cons-stream :syntax) head tail the-empty-stream empty-stream? explode implode get put)) +; Olin's encyclopedic SRFIs. + +(define-interface srfi-1-interface + (export xcons make-list list-tabulate cons* list-copy + proper-list? circular-list? dotted-list? not-pair? null-list? list= + circular-list length+ + iota + first second third fourth fifth sixth seventh eighth ninth tenth + car+cdr + take drop + take-right drop-right + take! drop-right! + split-at split-at! + last last-pair + zip unzip1 unzip2 unzip3 unzip4 unzip5 + count + append! append-reverse append-reverse! concatenate concatenate! + unfold fold pair-fold reduce + unfold-right fold-right pair-fold-right reduce-right + append-map append-map! map! pair-for-each filter-map map-in-order + filter partition remove + filter! partition! remove! + find find-tail any every list-index + take-while drop-while take-while! + span break span! break! + delete delete! + alist-cons alist-copy + delete-duplicates delete-duplicates! + alist-delete alist-delete! + reverse! + lset<= lset= lset-adjoin + lset-union lset-intersection lset-difference lset-xor + lset-diff+intersection + lset-union! lset-intersection! lset-difference! lset-xor! + lset-diff+intersection!)) + +(define-interface srfi-13-interface + (export string-map string-map! + string-fold string-unfold + string-fold-right string-unfold-right + string-tabulate string-for-each string-for-each-index + string-every string-any + string-hash string-hash-ci + string-compare string-compare-ci + string= string< string> string<= string>= string<> + string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> + string-downcase string-upcase string-titlecase + string-downcase! string-upcase! string-titlecase! + string-take string-take-right + string-drop string-drop-right + string-pad string-pad-right + string-trim string-trim-right string-trim-both + string-filter string-delete + string-index string-index-right + string-skip string-skip-right + string-count + string-prefix-length string-prefix-length-ci + string-suffix-length string-suffix-length-ci + string-prefix? string-prefix-ci? + string-suffix? string-suffix-ci? + string-contains string-contains-ci + string-copy! substring/shared + string-reverse string-reverse! reverse-list->string + string-concatenate + string-concatenate/shared + string-concatenate-reverse + string-append/shared + xsubstring string-xcopy! + string-null? + string-join + string-tokenize + string-replace + + string->list string-copy string-fill! + string? make-string string-length string-ref string-set! + string string-append list->string)) + +(define-interface srfi-14-interface + (export char-set? char-set= + char-set<= char-set-hash char-set-cursor char-set-ref + char-set-cursor-next end-of-char-set? char-set-fold char-set-unfold + char-set-unfold! char-set-for-each char-set-map char-set-copy + char-set + + list->char-set string->char-set + list->char-set! string->char-set! + + char-set-filter ucs-range->char-set + + ; the SRFI defines ->CHAR-SET, but that isn't a legal identifier + x->char-set + + char-set-filter! ucs-range->char-set! + + char-set->list char-set->string + + char-set-size char-set-count char-set-contains? + char-set-every char-set-any + + char-set-adjoin char-set-delete + char-set-adjoin! char-set-delete! + + + char-set-complement char-set-union char-set-intersection + char-set-complement! char-set-union! char-set-intersection! + + char-set-difference char-set-xor char-set-diff+intersection + char-set-difference! char-set-xor! char-set-diff+intersection! + + char-set:lower-case char-set:upper-case char-set:title-case + char-set:letter char-set:digit char-set:letter+digit + char-set:graphic char-set:printing char-set:whitespace + char-set:iso-control char-set:punctuation char-set:symbol + char-set:hex-digit char-set:blank char-set:ascii + char-set:empty char-set:full + + )) + diff --git a/scheme/more-packages.scm b/scheme/more-packages.scm index 43973e3..b6f09a8 100644 --- a/scheme/more-packages.scm +++ b/scheme/more-packages.scm @@ -662,6 +662,155 @@ features) ; make-immutable (files (big finite-type))) +; nondeterminism via call/cc + +(define-structure nondeterminism (export with-nondeterminism + ((either one-value all-values) :syntax) + fail) + (open scheme-level-2 + fluids cells + (subset signals (error))) + (files (big either))) + +;---------------- +; SRFI packages + +; SRFI-0 - Doesn't work with the module system. + +; Olin's list library. + +(define-structure srfi-1 srfi-1-interface + (open scheme-level-2 + receiving + (subset signals (error))) + (files (srfi srfi-1))) + +(define-structure srfi-2 (export (and-let* :syntax)) + (open scheme-level-2 + signals) ; error + (files (srfi srfi-2))) + +; SRFI-3 - withdrawn +; SRFI-4 - needs hacks to the reader + +(define-structure srfi-5 (export (let :syntax)) + (open (modify scheme-level-2 (hide let))) + (files (srfi srfi-5))) + +(define-structure srfi-6 (export open-input-string + open-output-string + get-output-string) + (open (modify extended-ports + (rename (make-string-input-port open-input-string) + (make-string-output-port open-output-string) + (string-output-port-output get-output-string))))) + +; Configuration language + +(define-structure srfi-7 (export) ; defines a command + (open scheme + + ; for parsing programs + receiving + nondeterminism + (subset signals (error)) + + (subset package-commands-internal (config-package)) + ensures-loaded + (subset packages (note-structure-name!)) + + ; for defining the command + (subset command-processor (define-user-command-syntax + user-command-environment)) + (subset environments (environment-define!))) + + (begin + (define available-srfis + '(srfi-1 srfi-2 srfi-5 srfi-6 srfi-7 srfi-8 srfi-9 + srfi-11 srfi-13 srfi-14 srfi-16 srfi-17 srfi-23)) + + ; Some SRFI's redefine Scheme variables. + (define shadowed + '((srfi-1 map for-each member assoc) + (srfi-5 let) + (srfi-13 string->list string-copy string-fill!) + (srfi-17 set!))) + ) + + (files (srfi srfi-7))) + +; Taken directly from the SRFI document (or from `receiving', take your pick). + +(define-structure srfi-8 (export (receive :syntax)) + (open scheme-level-2) + (begin + (define-syntax receive + (syntax-rules () + ((receive formals expression body ...) + (call-with-values (lambda () expression) + (lambda formals body ...))))))) + +; SRFI-9 is a slight modification of DEFINE-RECORD-TYPE. + +(define-structure srfi-9 (export (define-record-type :syntax)) + (open scheme-level-2 + (with-prefix define-record-types sys:)) + (begin + (define-syntax define-record-type + (syntax-rules () + ((define-record-type type-name . stuff) + (sys:define-record-type type-name type-name . stuff)))))) + +; SRFI-10 - no stand-alone interface. + +(define-structure srfi-11 (export (let-values :syntax) + (let*-values :syntax)) + (open scheme-level-2) + (files (srfi srfi-11))) + +; SRFI-12 - withdrawn + +; Two more encyclopedias from Olin. + +(define-structure srfi-13 srfi-13-interface + (open scheme-level-2 + bitwise + srfi-8 srfi-14 + (subset signals (error))) + (files (srfi srfi-13))) + +(define-structure srfi-14 srfi-14-interface + (open scheme-level-2 + bitwise + srfi-9 + (modify ascii (rename (char->ascii %char->latin1) + (ascii->char %latin1->char))) + (subset features (make-immutable!)) + (subset signals (error))) + (files (srfi srfi-14))) + +; SRFI-15 - withdrawn + +(define-structure srfi-16 (export (case-lambda :syntax)) + (open scheme-level-2 + (subset signals (error))) + (files (srfi srfi-16))) + +(define-structure srfi-17 (export (set! :syntax) setter) + (open (modify scheme-level-2 (rename (set! scheme-set!))) + (subset signals (error)) + (subset util (unspecific))) + (files (srfi srfi-17))) + +; SRFI-18 - no implementation given +; SRFI-19 - implementation is specific to MzScheme +; SRFI-20 - withdrawn +; SRFI-21 - no implementation given +; SRFI-22 - not final yet + +(define-structure srfi-23 (export error) + (open (subset signals (error)))) + ; ... end of package definitions. ; Temporary compatibility stuff @@ -687,6 +836,7 @@ bignums ratnums recnums floatnums build callback + cells command-levels command-processor debugging @@ -742,6 +892,11 @@ ;; Compatibility record table build-internals ;added by JMG + + ; SRFI packages + srfi-1 srfi-2 srfi-5 srfi-6 srfi-7 srfi-8 srfi-9 + srfi-11 srfi-13 srfi-14 srfi-16 srfi-17 + srfi-23 ) :structure) ((define-signature define-package) :syntax))) diff --git a/scheme/srfi/srfi-1.scm b/scheme/srfi/srfi-1.scm new file mode 100644 index 0000000..e943185 --- /dev/null +++ b/scheme/srfi/srfi-1.scm @@ -0,0 +1,1620 @@ +;;; SRFI-1 list-processing library -*- Scheme -*- +;;; Reference implementation +;;; +;;; Copyright (c) 1998, 1999 by Olin Shivers. You may do as you please with +;;; this code as long as you do not remove this copyright notice or +;;; hold me liable for its use. Please send bug reports to shivers@ai.mit.edu. +;;; -Olin + +; Changes made for Scheme 48 +; +; - CHECK-ARG is defined as a macro that does nothing. +; - replaced the one use of LET-OPTIONAL +; - added definition of :OPTIONAL + +(define-syntax check-arg + (syntax-rules () + ((check-arg stuff ...) #f))) + +(define (:optional maybe-value default) + (cond ((null? maybe-value) + default) + ((null? (cdr maybe-value)) + (car maybe-value)) + (else + (error "too many arguments passed to :optional" maybe-value)))) + +;;; This is a library of list- and pair-processing functions. I wrote it after +;;; carefully considering the functions provided by the libraries found in +;;; R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, Common +;;; Lisp, Bigloo, guile, T, APL and the SML standard basis. It is a pretty +;;; rich toolkit, providing a superset of the functionality found in any of +;;; the various Schemes I considered. + +;;; This implementation is intended as a portable reference implementation +;;; for SRFI-1. See the porting notes below for more information. + +;;; Exported: +;;; xcons tree-copy make-list list-tabulate cons* list-copy +;;; proper-list? circular-list? dotted-list? not-pair? null-list? list= +;;; circular-list length+ +;;; iota +;;; first second third fourth fifth sixth seventh eighth ninth tenth +;;; car+cdr +;;; take drop +;;; take-right drop-right +;;; take! drop-right! +;;; split-at split-at! +;;; last last-pair +;;; zip unzip1 unzip2 unzip3 unzip4 unzip5 +;;; count +;;; append! append-reverse append-reverse! concatenate concatenate! +;;; unfold fold pair-fold reduce +;;; unfold-right fold-right pair-fold-right reduce-right +;;; append-map append-map! map! pair-for-each filter-map map-in-order +;;; filter partition remove +;;; filter! partition! remove! +;;; find find-tail any every list-index +;;; take-while drop-while take-while! +;;; span break span! break! +;;; delete delete! +;;; alist-cons alist-copy +;;; delete-duplicates delete-duplicates! +;;; alist-delete alist-delete! +;;; reverse! +;;; lset<= lset= lset-adjoin +;;; lset-union lset-intersection lset-difference lset-xor lset-diff+intersection +;;; lset-union! lset-intersection! lset-difference! lset-xor! lset-diff+intersection! +;;; +;;; In principle, the following R4RS list- and pair-processing procedures +;;; are also part of this package's exports, although they are not defined +;;; in this file: +;;; Primitives: cons pair? null? car cdr set-car! set-cdr! +;;; Non-primitives: list length append reverse cadr ... cddddr list-ref +;;; memq memv assq assv +;;; (The non-primitives are defined in this file, but commented out.) +;;; +;;; These R4RS procedures have extended definitions in SRFI-1 and are defined +;;; in this file: +;;; map for-each member assoc +;;; +;;; The remaining two R4RS list-processing procedures are not included: +;;; list-tail (use drop) +;;; list? (use proper-list?) + + +;;; A note on recursion and iteration/reversal: +;;; Many iterative list-processing algorithms naturally compute the elements +;;; of the answer list in the wrong order (left-to-right or head-to-tail) from +;;; the order needed to cons them into the proper answer (right-to-left, or +;;; tail-then-head). One style or idiom of programming these algorithms, then, +;;; loops, consing up the elements in reverse order, then destructively +;;; reverses the list at the end of the loop. I do not do this. The natural +;;; and efficient way to code these algorithms is recursively. This trades off +;;; intermediate temporary list structure for intermediate temporary stack +;;; structure. In a stack-based system, this improves cache locality and +;;; lightens the load on the GC system. Don't stand on your head to iterate! +;;; Recurse, where natural. Multiple-value returns make this even more +;;; convenient, when the recursion/iteration has multiple state values. + +;;; Porting: +;;; This is carefully tuned code; do not modify casually. +;;; - It is careful to share storage when possible; +;;; - Side-effecting code tries not to perform redundant writes. +;;; +;;; That said, a port of this library to a specific Scheme system might wish +;;; to tune this code to exploit particulars of the implementation. +;;; The single most important compiler-specific optimisation you could make +;;; to this library would be to add rewrite rules or transforms to: +;;; - transform applications of n-ary procedures (e.g. LIST=, CONS*, APPEND, +;;; LSET-UNION) into multiple applications of a primitive two-argument +;;; variant. +;;; - transform applications of the mapping functions (MAP, FOR-EACH, FOLD, +;;; ANY, EVERY) into open-coded loops. The killer here is that these +;;; functions are n-ary. Handling the general case is quite inefficient, +;;; requiring many intermediate data structures to be allocated and +;;; discarded. +;;; - transform applications of procedures that take optional arguments +;;; into calls to variants that do not take optional arguments. This +;;; eliminates unnecessary consing and parsing of the rest parameter. +;;; +;;; These transforms would provide BIG speedups. In particular, the n-ary +;;; mapping functions are particularly slow and cons-intensive, and are good +;;; candidates for tuning. I have coded fast paths for the single-list cases, +;;; but what you really want to do is exploit the fact that the compiler +;;; usually knows how many arguments are being passed to a particular +;;; application of these functions -- they are usually explicitly called, not +;;; passed around as higher-order values. If you can arrange to have your +;;; compiler produce custom code or custom linkages based on the number of +;;; arguments in the call, you can speed these functions up a *lot*. But this +;;; kind of compiler technology no longer exists in the Scheme world as far as +;;; I can see. +;;; +;;; Note that this code is, of course, dependent upon standard bindings for +;;; the R5RS procedures -- i.e., it assumes that the variable CAR is bound +;;; to the procedure that takes the car of a list. If your Scheme +;;; implementation allows user code to alter the bindings of these procedures +;;; in a manner that would be visible to these definitions, then there might +;;; be trouble. You could consider horrible kludgery along the lines of +;;; (define fact +;;; (let ((= =) (- -) (* *)) +;;; (letrec ((real-fact (lambda (n) +;;; (if (= n 0) 1 (* n (real-fact (- n 1))))))) +;;; real-fact))) +;;; Or you could consider shifting to a reasonable Scheme system that, say, +;;; has a module system protecting code from this kind of lossage. +;;; +;;; This code does a fair amount of run-time argument checking. If your +;;; Scheme system has a sophisticated compiler that can eliminate redundant +;;; error checks, this is no problem. However, if not, these checks incur +;;; some performance overhead -- and, in a safe Scheme implementation, they +;;; are in some sense redundant: if we don't check to see that the PROC +;;; parameter is a procedure, we'll find out anyway three lines later when +;;; we try to call the value. It's pretty easy to rip all this argument +;;; checking code out if it's inappropriate for your implementation -- just +;;; nuke every call to CHECK-ARG. +;;; +;;; On the other hand, if you *do* have a sophisticated compiler that will +;;; actually perform soft-typing and eliminate redundant checks (Rice's systems +;;; being the only possible candidate of which I'm aware), leaving these checks +;;; in can *help*, since their presence can be elided in redundant cases, +;;; and in cases where they are needed, performing the checks early, at +;;; procedure entry, can "lift" a check out of a loop. +;;; +;;; Finally, I have only checked the properties that can portably be checked +;;; with R5RS Scheme -- and this is not complete. You may wish to alter +;;; the CHECK-ARG parameter checks to perform extra, implementation-specific +;;; checks, such as procedure arity for higher-order values. +;;; +;;; The code has only these non-R4RS dependencies: +;;; A few calls to an ERROR procedure; +;;; Uses of the R5RS multiple-value procedure VALUES and the m-v binding +;;; RECEIVE macro (which isn't R5RS, but is a trivial macro). +;;; Many calls to a parameter-checking procedure check-arg: +;;; (define (check-arg pred val caller) +;;; (let lp ((val val)) +;;; (if (pred val) val (lp (error "Bad argument" val pred caller))))) +;;; A few uses of the LET-OPTIONAL and :OPTIONAL macros for parsing +;;; optional arguments. +;;; +;;; Most of these procedures use the NULL-LIST? test to trigger the +;;; base case in the inner loop or recursion. The NULL-LIST? function +;;; is defined to be a careful one -- it raises an error if passed a +;;; non-nil, non-pair value. The spec allows an implementation to use +;;; a less-careful implementation that simply defines NULL-LIST? to +;;; be NOT-PAIR?. This would speed up the inner loops of these procedures +;;; at the expense of having them silently accept dotted lists. + +;;; A note on dotted lists: +;;; I, personally, take the view that the only consistent view of lists +;;; in Scheme is the view that *everything* is a list -- values such as +;;; 3 or "foo" or 'bar are simply empty dotted lists. This is due to the +;;; fact that Scheme actually has no true list type. It has a pair type, +;;; and there is an *interpretation* of the trees built using this type +;;; as lists. +;;; +;;; I lobbied to have these list-processing procedures hew to this +;;; view, and accept any value as a list argument. I was overwhelmingly +;;; overruled during the SRFI discussion phase. So I am inserting this +;;; text in the reference lib and the SRFI spec as a sort of "minority +;;; opinion" dissent. +;;; +;;; Many of the procedures in this library can be trivially redefined +;;; to handle dotted lists, just by changing the NULL-LIST? base-case +;;; check to NOT-PAIR?, meaning that any non-pair value is taken to be +;;; an empty list. For most of these procedures, that's all that is +;;; required. +;;; +;;; However, we have to do a little more work for some procedures that +;;; *produce* lists from other lists. Were we to extend these procedures to +;;; accept dotted lists, we would have to define how they terminate the lists +;;; produced as results when passed a dotted list. I designed a coherent set +;;; of termination rules for these cases; this was posted to the SRFI-1 +;;; discussion list. I additionally wrote an earlier version of this library +;;; that implemented that spec. It has been discarded during later phases of +;;; the definition and implementation of this library. +;;; +;;; The argument *against* defining these procedures to work on dotted +;;; lists is that dotted lists are the rare, odd case, and that by +;;; arranging for the procedures to handle them, we lose error checking +;;; in the cases where a dotted list is passed by accident -- e.g., when +;;; the programmer swaps a two arguments to a list-processing function, +;;; one being a scalar and one being a list. For example, +;;; (member '(1 3 5 7 9) 7) +;;; This would quietly return #f if we extended MEMBER to accept dotted +;;; lists. +;;; +;;; The SRFI discussion record contains more discussion on this topic. + + +;;; Constructors +;;;;;;;;;;;;;;;; + +;;; Occasionally useful as a value to be passed to a fold or other +;;; higher-order procedure. +(define (xcons d a) (cons a d)) + +;;;; Recursively copy every cons. +;(define (tree-copy x) +; (let recur ((x x)) +; (if (not (pair? x)) x +; (cons (recur (car x)) (recur (cdr x)))))) + +;;; Make a list of length LEN. + +(define (make-list len . maybe-elt) + (check-arg (lambda (n) (and (integer? n) (>= n 0))) len make-list) + (let ((elt (cond ((null? maybe-elt) #f) ; Default value + ((null? (cdr maybe-elt)) (car maybe-elt)) + (else (error "Too many arguments to MAKE-LIST" + (cons len maybe-elt)))))) + (do ((i len (- i 1)) + (ans '() (cons elt ans))) + ((<= i 0) ans)))) + + +;(define (list . ans) ans) ; R4RS + + +;;; Make a list of length LEN. Elt i is (PROC i) for 0 <= i < LEN. + +(define (list-tabulate len proc) + (check-arg (lambda (n) (and (integer? n) (>= n 0))) len list-tabulate) + (check-arg procedure? proc list-tabulate) + (do ((i (- len 1) (- i 1)) + (ans '() (cons (proc i) ans))) + ((< i 0) ans))) + +;;; (cons* a1 a2 ... an) = (cons a1 (cons a2 (cons ... an))) +;;; (cons* a1) = a1 (cons* a1 a2 ...) = (cons a1 (cons* a2 ...)) +;;; +;;; (cons first (unfold not-pair? car cdr rest values)) + +(define (cons* first . rest) + (let recur ((x first) (rest rest)) + (if (pair? rest) + (cons x (recur (car rest) (cdr rest))) + x))) + +;;; (unfold not-pair? car cdr lis values) + +(define (list-copy lis) + (let recur ((lis lis)) + (if (pair? lis) + (cons (car lis) (recur (cdr lis))) + lis))) + +;;; IOTA count [start step] (start start+step ... start+(count-1)*step) + +(define (iota count . maybe-start+step) + (check-arg integer? count iota) + (if (< count 0) (error "Negative step count" iota count)) +; (let-optionals maybe-start+step ((start 0) (step 1)) ...) + (receive (start step) + (case (length maybe-start+step) + ((0) (values 0 1)) + ((2) (values (car maybe-start+step) + (cadr maybe-start+step))) + (else + (error "wrong number of arguments to IOTA" + (cons count maybe-start+step)))) + (check-arg number? start iota) + (check-arg number? step iota) + (let ((last-val (+ start (* (- count 1) step)))) + (do ((count count (- count 1)) + (val last-val (- val step)) + (ans '() (cons val ans))) + ((<= count 0) ans))))) + +;;; I thought these were lovely, but the public at large did not share my +;;; enthusiasm... +;;; :IOTA to (0 ... to-1) +;;; :IOTA from to (from ... to-1) +;;; :IOTA from to step (from from+step ...) + +;;; IOTA: to (1 ... to) +;;; IOTA: from to (from+1 ... to) +;;; IOTA: from to step (from+step from+2step ...) + +;(define (%parse-iota-args arg1 rest-args proc) +; (let ((check (lambda (n) (check-arg integer? n proc)))) +; (check arg1) +; (if (pair? rest-args) +; (let ((arg2 (check (car rest-args))) +; (rest (cdr rest-args))) +; (if (pair? rest) +; (let ((arg3 (check (car rest))) +; (rest (cdr rest))) +; (if (pair? rest) (error "Too many parameters" proc arg1 rest-args) +; (values arg1 arg2 arg3))) +; (values arg1 arg2 1))) +; (values 0 arg1 1)))) +; +;(define (iota: arg1 . rest-args) +; (receive (from to step) (%parse-iota-args arg1 rest-args iota:) +; (let* ((numsteps (floor (/ (- to from) step))) +; (last-val (+ from (* step numsteps)))) +; (if (< numsteps 0) (error "Negative step count" iota: from to step)) +; (do ((steps-left numsteps (- steps-left 1)) +; (val last-val (- val step)) +; (ans '() (cons val ans))) +; ((<= steps-left 0) ans))))) +; +; +;(define (:iota arg1 . rest-args) +; (receive (from to step) (%parse-iota-args arg1 rest-args :iota) +; (let* ((numsteps (ceiling (/ (- to from) step))) +; (last-val (+ from (* step (- numsteps 1))))) +; (if (< numsteps 0) (error "Negative step count" :iota from to step)) +; (do ((steps-left numsteps (- steps-left 1)) +; (val last-val (- val step)) +; (ans '() (cons val ans))) +; ((<= steps-left 0) ans))))) + + + +(define (circular-list val1 . vals) + (let ((ans (cons val1 vals))) + (set-cdr! (last-pair ans) ans) + ans)) + +;;; ::= () ; Empty proper list +;;; | (cons ) ; Proper-list pair +;;; Note that this definition rules out circular lists -- and this +;;; function is required to detect this case and return false. + +(define (proper-list? x) + (let lp ((x x) (lag x)) + (if (pair? x) + (let ((x (cdr x))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (and (not (eq? x lag)) (lp x lag))) + (null? x))) + (null? x)))) + + +;;; A dotted list is a finite list (possibly of length 0) terminated +;;; by a non-nil value. Any non-cons, non-nil value (e.g., "foo" or 5) +;;; is a dotted list of length 0. +;;; +;;; ::= ; Empty dotted list +;;; | (cons ) ; Proper-list pair + +(define (dotted-list? x) + (let lp ((x x) (lag x)) + (if (pair? x) + (let ((x (cdr x))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (and (not (eq? x lag)) (lp x lag))) + (not (null? x)))) + (not (null? x))))) + +(define (circular-list? x) + (let lp ((x x) (lag x)) + (and (pair? x) + (let ((x (cdr x))) + (and (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (or (eq? x lag) (lp x lag)))))))) + +(define (not-pair? x) (not (pair? x))) ; Inline me. + +;;; This is a legal definition which is fast and sloppy: +;;; (define null-list? not-pair?) +;;; but we'll provide a more careful one: +(define (null-list? l) + (cond ((pair? l) #f) + ((null? l) #t) + (else (error "null-list?: argument out of domain" l)))) + + +(define (list= = . lists) + (or (null? lists) ; special case + + (let lp1 ((list-a (car lists)) (others (cdr lists))) + (or (null? others) + (let ((list-b (car others)) + (others (cdr others))) + (if (eq? list-a list-b) ; EQ? => LIST= + (lp1 list-b others) + (let lp2 ((list-a list-a) (list-b list-b)) + (if (null-list? list-a) + (and (null-list? list-b) + (lp1 list-b others)) + (and (not (null-list? list-b)) + (= (car list-a) (car list-b)) + (lp2 (cdr list-a) (cdr list-b))))))))))) + + + +;;; R4RS, so commented out. +;(define (length x) ; LENGTH may diverge or +; (let lp ((x x) (len 0)) ; raise an error if X is +; (if (pair? x) ; a circular list. This version +; (lp (cdr x) (+ len 1)) ; diverges. +; len))) + +(define (length+ x) ; Returns #f if X is circular. + (let lp ((x x) (lag x) (len 0)) + (if (pair? x) + (let ((x (cdr x)) + (len (+ len 1))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag)) + (len (+ len 1))) + (and (not (eq? x lag)) (lp x lag len))) + len)) + len))) + +(define (zip list1 . more-lists) (apply map list list1 more-lists)) + + +;;; Selectors +;;;;;;;;;;;;; + +;;; R4RS non-primitives: +;(define (caar x) (car (car x))) +;(define (cadr x) (car (cdr x))) +;(define (cdar x) (cdr (car x))) +;(define (cddr x) (cdr (cdr x))) +; +;(define (caaar x) (caar (car x))) +;(define (caadr x) (caar (cdr x))) +;(define (cadar x) (cadr (car x))) +;(define (caddr x) (cadr (cdr x))) +;(define (cdaar x) (cdar (car x))) +;(define (cdadr x) (cdar (cdr x))) +;(define (cddar x) (cddr (car x))) +;(define (cdddr x) (cddr (cdr x))) +; +;(define (caaaar x) (caaar (car x))) +;(define (caaadr x) (caaar (cdr x))) +;(define (caadar x) (caadr (car x))) +;(define (caaddr x) (caadr (cdr x))) +;(define (cadaar x) (cadar (car x))) +;(define (cadadr x) (cadar (cdr x))) +;(define (caddar x) (caddr (car x))) +;(define (cadddr x) (caddr (cdr x))) +;(define (cdaaar x) (cdaar (car x))) +;(define (cdaadr x) (cdaar (cdr x))) +;(define (cdadar x) (cdadr (car x))) +;(define (cdaddr x) (cdadr (cdr x))) +;(define (cddaar x) (cddar (car x))) +;(define (cddadr x) (cddar (cdr x))) +;(define (cdddar x) (cdddr (car x))) +;(define (cddddr x) (cdddr (cdr x))) + + +(define first car) +(define second cadr) +(define third caddr) +(define fourth cadddr) +(define (fifth x) (car (cddddr x))) +(define (sixth x) (cadr (cddddr x))) +(define (seventh x) (caddr (cddddr x))) +(define (eighth x) (cadddr (cddddr x))) +(define (ninth x) (car (cddddr (cddddr x)))) +(define (tenth x) (cadr (cddddr (cddddr x)))) + +(define (car+cdr pair) (values (car pair) (cdr pair))) + +;;; take & drop + +(define (take lis k) + (check-arg integer? k take) + (let recur ((lis lis) (k k)) + (if (zero? k) '() + (cons (car lis) + (recur (cdr lis) (- k 1)))))) + +(define (drop lis k) + (check-arg integer? k drop) + (let iter ((lis lis) (k k)) + (if (zero? k) lis (iter (cdr lis) (- k 1))))) + +(define (take! lis k) + (check-arg integer? k take!) + (if (zero? k) '() + (begin (set-cdr! (drop lis (- k 1)) '()) + lis))) + +;;; TAKE-RIGHT and DROP-RIGHT work by getting two pointers into the list, +;;; off by K, then chasing down the list until the lead pointer falls off +;;; the end. + +(define (take-right lis k) + (check-arg integer? k take-right) + (let lp ((lag lis) (lead (drop lis k))) + (if (pair? lead) + (lp (cdr lag) (cdr lead)) + lag))) + +(define (drop-right lis k) + (check-arg integer? k drop-right) + (let recur ((lag lis) (lead (drop lis k))) + (if (pair? lead) + (cons (car lag) (recur (cdr lag) (cdr lead))) + '()))) + +;;; In this function, LEAD is actually K+1 ahead of LAG. This lets +;;; us stop LAG one step early, in time to smash its cdr to (). +(define (drop-right! lis k) + (check-arg integer? k drop-right!) + (let ((lead (drop lis k))) + (if (pair? lead) + + (let lp ((lag lis) (lead (cdr lead))) ; Standard case + (if (pair? lead) + (lp (cdr lag) (cdr lead)) + (begin (set-cdr! lag '()) + lis))) + + '()))) ; Special case dropping everything -- no cons to side-effect. + +;(define (list-ref lis i) (car (drop lis i))) ; R4RS + +;;; These use the APL convention, whereby negative indices mean +;;; "from the right." I liked them, but they didn't win over the +;;; SRFI reviewers. +;;; K >= 0: Take and drop K elts from the front of the list. +;;; K <= 0: Take and drop -K elts from the end of the list. + +;(define (take lis k) +; (check-arg integer? k take) +; (if (negative? k) +; (list-tail lis (+ k (length lis))) +; (let recur ((lis lis) (k k)) +; (if (zero? k) '() +; (cons (car lis) +; (recur (cdr lis) (- k 1))))))) +; +;(define (drop lis k) +; (check-arg integer? k drop) +; (if (negative? k) +; (let recur ((lis lis) (nelts (+ k (length lis)))) +; (if (zero? nelts) '() +; (cons (car lis) +; (recur (cdr lis) (- nelts 1))))) +; (list-tail lis k))) +; +; +;(define (take! lis k) +; (check-arg integer? k take!) +; (cond ((zero? k) '()) +; ((positive? k) +; (set-cdr! (list-tail lis (- k 1)) '()) +; lis) +; (else (list-tail lis (+ k (length lis)))))) +; +;(define (drop! lis k) +; (check-arg integer? k drop!) +; (if (negative? k) +; (let ((nelts (+ k (length lis)))) +; (if (zero? nelts) '() +; (begin (set-cdr! (list-tail lis (- nelts 1)) '()) +; lis))) +; (list-tail lis k))) + +(define (split-at x k) + (check-arg integer? k split-at) + (let recur ((lis x) (k k)) + (if (zero? k) (values '() lis) + (receive (prefix suffix) (recur (cdr lis) (- k 1)) + (values (cons (car lis) prefix) suffix))))) + +(define (split-at! x k) + (check-arg integer? k split-at!) + (if (zero? k) (values '() x) + (let* ((prev (drop x (- k 1))) + (suffix (cdr prev))) + (set-cdr! prev '()) + (values x suffix)))) + + +(define (last lis) (car (last-pair lis))) + +(define (last-pair lis) + (check-arg pair? lis last-pair) + (let lp ((lis lis)) + (let ((tail (cdr lis))) + (if (pair? tail) (lp tail) lis)))) + + +;;; Unzippers -- 1 through 5 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (unzip1 lis) (map car lis)) + +(define (unzip2 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis) ; Use NOT-PAIR? to handle + (let ((elt (car lis))) ; dotted lists. + (receive (a b) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b))))))) + +(define (unzip3 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis) + (let ((elt (car lis))) + (receive (a b c) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c))))))) + +(define (unzip4 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis lis) + (let ((elt (car lis))) + (receive (a b c d) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c) + (cons (cadddr elt) d))))))) + +(define (unzip5 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis lis lis) + (let ((elt (car lis))) + (receive (a b c d e) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c) + (cons (cadddr elt) d) + (cons (car (cddddr elt)) e))))))) + + +;;; append! append-reverse append-reverse! concatenate concatenate! +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (append! . lists) + ;; First, scan through lists looking for a non-empty one. + (let lp ((lists lists) (prev '())) + (if (not (pair? lists)) prev + (let ((first (car lists)) + (rest (cdr lists))) + (if (not (pair? first)) (lp rest first) + + ;; Now, do the splicing. + (let lp2 ((tail-cons (last-pair first)) + (rest rest)) + (if (pair? rest) + (let ((next (car rest)) + (rest (cdr rest))) + (set-cdr! tail-cons next) + (lp2 (if (pair? next) (last-pair next) tail-cons) + rest)) + first))))))) + +;;; APPEND is R4RS. +;(define (append . lists) +; (if (pair? lists) +; (let recur ((list1 (car lists)) (lists (cdr lists))) +; (if (pair? lists) +; (let ((tail (recur (car lists) (cdr lists)))) +; (fold-right cons tail list1)) ; Append LIST1 & TAIL. +; list1)) +; '())) + +;(define (append-reverse rev-head tail) (fold cons tail rev-head)) + +;(define (append-reverse! rev-head tail) +; (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) +; tail +; rev-head)) + +;;; Hand-inline the FOLD and PAIR-FOLD ops for speed. + +(define (append-reverse rev-head tail) + (let lp ((rev-head rev-head) (tail tail)) + (if (null-list? rev-head) tail + (lp (cdr rev-head) (cons (car rev-head) tail))))) + +(define (append-reverse! rev-head tail) + (let lp ((rev-head rev-head) (tail tail)) + (if (null-list? rev-head) tail + (let ((next-rev (cdr rev-head))) + (set-cdr! rev-head tail) + (lp next-rev rev-head))))) + + +(define (concatenate lists) (reduce-right append '() lists)) +(define (concatenate! lists) (reduce-right append! '() lists)) + +;;; Fold/map internal utilities +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These little internal utilities are used by the general +;;; fold & mapper funs for the n-ary cases . It'd be nice if they got inlined. +;;; One the other hand, the n-ary cases are painfully inefficient as it is. +;;; An aggressive implementation should simply re-write these functions +;;; for raw efficiency; I have written them for as much clarity, portability, +;;; and simplicity as can be achieved. +;;; +;;; I use the dreaded call/cc to do local aborts. A good compiler could +;;; handle this with extreme efficiency. An implementation that provides +;;; a one-shot, non-persistent continuation grabber could help the compiler +;;; out by using that in place of the call/cc's in these routines. +;;; +;;; These functions have funky definitions that are precisely tuned to +;;; the needs of the fold/map procs -- for example, to minimize the number +;;; of times the argument lists need to be examined. + +;;; Return (map cdr lists). +;;; However, if any element of LISTS is empty, just abort and return '(). +(define (%cdrs lists) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (let ((lis (car lists))) + (if (null-list? lis) (abort '()) + (cons (cdr lis) (recur (cdr lists))))) + '()))))) + +(define (%cars+ lists last-elt) ; (append! (map car lists) (list last-elt)) + (let recur ((lists lists)) + (if (pair? lists) (cons (caar lists) (recur (cdr lists))) (list last-elt)))) + +;;; LISTS is a (not very long) non-empty list of lists. +;;; Return two lists: the cars & the cdrs of the lists. +;;; However, if any of the lists is empty, just abort and return [() ()]. + +(define (%cars+cdrs lists) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (if (null-list? list) (abort '() '()) ; LIST is empty -- bail out + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs)))))) + (values '() '())))))) + +;;; Like %CARS+CDRS, but we pass in a final elt tacked onto the end of the +;;; cars list. What a hack. +(define (%cars+cdrs+ lists cars-final) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (if (null-list? list) (abort '() '()) ; LIST is empty -- bail out + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs)))))) + (values (list cars-final) '())))))) + +;;; Like %CARS+CDRS, but blow up if any list is empty. +(define (%cars+cdrs/no-test lists) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs))))) + (values '() '())))) + + +;;; count +;;;;;;;;; +(define (count pred list1 . lists) + (check-arg procedure? pred count) + (if (pair? lists) + + ;; N-ary case + (let lp ((list1 list1) (lists lists) (i 0)) + (if (null-list? list1) i + (receive (as ds) (%cars+cdrs lists) + (if (null? as) i + (lp (cdr list1) ds + (if (apply pred (car list1) as) (+ i 1) i)))))) + + ;; Fast path + (let lp ((lis list1) (i 0)) + (if (null-list? lis) i + (lp (cdr lis) (if (pred (car lis)) (+ i 1) i)))))) + + +;;; fold/unfold +;;;;;;;;;;;;;;; + +(define (unfold-right p f g seed . maybe-tail) + (check-arg procedure? p unfold-right) + (check-arg procedure? f unfold-right) + (check-arg procedure? g unfold-right) + (let lp ((seed seed) (ans (:optional maybe-tail '()))) + (if (p seed) ans + (lp (g seed) + (cons (f seed) ans))))) + + +(define (unfold p f g seed . maybe-tail-gen) + (check-arg procedure? p unfold) + (check-arg procedure? f unfold) + (check-arg procedure? g unfold) + (if (pair? maybe-tail-gen) + + (let ((tail-gen (car maybe-tail-gen))) + (if (pair? (cdr maybe-tail-gen)) + (apply error "Too many arguments" unfold p f g seed maybe-tail-gen) + + (let recur ((seed seed)) + (if (p seed) (tail-gen seed) + (cons (f seed) (recur (g seed))))))) + + (let recur ((seed seed)) + (if (p seed) '() + (cons (f seed) (recur (g seed))))))) + + +(define (fold kons knil lis1 . lists) + (check-arg procedure? kons fold) + (if (pair? lists) + (let lp ((lists (cons lis1 lists)) (ans knil)) ; N-ary case + (receive (cars+ans cdrs) (%cars+cdrs+ lists ans) + (if (null? cars+ans) ans ; Done. + (lp cdrs (apply kons cars+ans))))) + + (let lp ((lis lis1) (ans knil)) ; Fast path + (if (null-list? lis) ans + (lp (cdr lis) (kons (car lis) ans)))))) + + +(define (fold-right kons knil lis1 . lists) + (check-arg procedure? kons fold-right) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) ; N-ary case + (let ((cdrs (%cdrs lists))) + (if (null? cdrs) knil + (apply kons (%cars+ lists (recur cdrs)))))) + + (let recur ((lis lis1)) ; Fast path + (if (null-list? lis) knil + (let ((head (car lis))) + (kons head (recur (cdr lis)))))))) + + +(define (pair-fold-right f zero lis1 . lists) + (check-arg procedure? f pair-fold-right) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) ; N-ary case + (let ((cdrs (%cdrs lists))) + (if (null? cdrs) zero + (apply f (append! lists (list (recur cdrs))))))) + + (let recur ((lis lis1)) ; Fast path + (if (null-list? lis) zero (f lis (recur (cdr lis))))))) + +(define (pair-fold f zero lis1 . lists) + (check-arg procedure? f pair-fold) + (if (pair? lists) + (let lp ((lists (cons lis1 lists)) (ans zero)) ; N-ary case + (let ((tails (%cdrs lists))) + (if (null? tails) ans + (lp tails (apply f (append! lists (list ans))))))) + + (let lp ((lis lis1) (ans zero)) + (if (null-list? lis) ans + (let ((tail (cdr lis))) ; Grab the cdr now, + (lp tail (f lis ans))))))) ; in case F SET-CDR!s LIS. + + +;;; REDUCE and REDUCE-RIGHT only use RIDENTITY in the empty-list case. +;;; These cannot meaningfully be n-ary. + +(define (reduce f ridentity lis) + (check-arg procedure? f reduce) + (if (null-list? lis) ridentity + (fold f (car lis) (cdr lis)))) + +(define (reduce-right f ridentity lis) + (check-arg procedure? f reduce-right) + (if (null-list? lis) ridentity + (let recur ((head (car lis)) (lis (cdr lis))) + (if (pair? lis) + (f head (recur (car lis) (cdr lis))) + head)))) + + + +;;; Mappers: append-map append-map! pair-for-each map! filter-map map-in-order +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (append-map f lis1 . lists) + (really-append-map append-map append f lis1 lists)) +(define (append-map! f lis1 . lists) + (really-append-map append-map! append! f lis1 lists)) + +(define (really-append-map who appender f lis1 lists) + (check-arg procedure? f who) + (if (pair? lists) + (receive (cars cdrs) (%cars+cdrs (cons lis1 lists)) + (if (null? cars) '() + (let recur ((cars cars) (cdrs cdrs)) + (let ((vals (apply f cars))) + (receive (cars2 cdrs2) (%cars+cdrs cdrs) + (if (null? cars2) vals + (appender vals (recur cars2 cdrs2)))))))) + + ;; Fast path + (if (null-list? lis1) '() + (let recur ((elt (car lis1)) (rest (cdr lis1))) + (let ((vals (f elt))) + (if (null-list? rest) vals + (appender vals (recur (car rest) (cdr rest))))))))) + + +(define (pair-for-each proc lis1 . lists) + (check-arg procedure? proc pair-for-each) + (if (pair? lists) + + (let lp ((lists (cons lis1 lists))) + (let ((tails (%cdrs lists))) + (if (pair? tails) + (begin (apply proc lists) + (lp tails))))) + + ;; Fast path. + (let lp ((lis lis1)) + (if (not (null-list? lis)) + (let ((tail (cdr lis))) ; Grab the cdr now, + (proc lis) ; in case PROC SET-CDR!s LIS. + (lp tail)))))) + +;;; We stop when LIS1 runs out, not when any list runs out. +(define (map! f lis1 . lists) + (check-arg procedure? f map!) + (if (pair? lists) + (let lp ((lis1 lis1) (lists lists)) + (if (not (null-list? lis1)) + (receive (heads tails) (%cars+cdrs/no-test lists) + (set-car! lis1 (apply f (car lis1) heads)) + (lp (cdr lis1) tails)))) + + ;; Fast path. + (pair-for-each (lambda (pair) (set-car! pair (f (car pair)))) lis1)) + lis1) + + +;;; Map F across L, and save up all the non-false results. +(define (filter-map f lis1 . lists) + (check-arg procedure? f filter-map) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) + (receive (cars cdrs) (%cars+cdrs lists) + (if (pair? cars) + (cond ((apply f cars) => (lambda (x) (cons x (recur cdrs)))) + (else (recur cdrs))) ; Tail call in this arm. + '()))) + + ;; Fast path. + (let recur ((lis lis1)) + (if (null-list? lis) lis + (let ((tail (recur (cdr lis)))) + (cond ((f (car lis)) => (lambda (x) (cons x tail))) + (else tail))))))) + + +;;; Map F across lists, guaranteeing to go left-to-right. +;;; NOTE: Some implementations of R5RS MAP are compliant with this spec; +;;; in which case this procedure may simply be defined as a synonym for MAP. + +(define (map-in-order f lis1 . lists) + (check-arg procedure? f map-in-order) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) + (receive (cars cdrs) (%cars+cdrs lists) + (if (pair? cars) + (let ((x (apply f cars))) ; Do head first, + (cons x (recur cdrs))) ; then tail. + '()))) + + ;; Fast path. + (let recur ((lis lis1)) + (if (null-list? lis) lis + (let ((tail (cdr lis)) + (x (f (car lis)))) ; Do head first, + (cons x (recur tail))))))) ; then tail. + + +;;; We extend MAP to handle arguments of unequal length. +(define map map-in-order) + + +;;; filter, remove, partition +;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; FILTER, REMOVE, PARTITION and their destructive counterparts do not +;;; disorder the elements of their argument. + +;; This FILTER shares the longest tail of L that has no deleted elements. +;; If Scheme had multi-continuation calls, they could be made more efficient. + +(define (filter pred lis) ; Sleazing with EQ? makes this + (check-arg procedure? pred filter) ; one faster. + (let recur ((lis lis)) + (if (null-list? lis) lis ; Use NOT-PAIR? to handle dotted lists. + (let ((head (car lis)) + (tail (cdr lis))) + (if (pred head) + (let ((new-tail (recur tail))) ; Replicate the RECUR call so + (if (eq? tail new-tail) lis + (cons head new-tail))) + (recur tail)))))) ; this one can be a tail call. + + +;;; Another version that shares longest tail. +;(define (filter pred lis) +; (receive (ans no-del?) +; ;; (recur l) returns L with (pred x) values filtered. +; ;; It also returns a flag NO-DEL? if the returned value +; ;; is EQ? to L, i.e. if it didn't have to delete anything. +; (let recur ((l l)) +; (if (null-list? l) (values l #t) +; (let ((x (car l)) +; (tl (cdr l))) +; (if (pred x) +; (receive (ans no-del?) (recur tl) +; (if no-del? +; (values l #t) +; (values (cons x ans) #f))) +; (receive (ans no-del?) (recur tl) ; Delete X. +; (values ans #f)))))) +; ans)) + + + +;(define (filter! pred lis) ; Things are much simpler +; (let recur ((lis lis)) ; if you are willing to +; (if (pair? lis) ; push N stack frames & do N +; (cond ((pred (car lis)) ; SET-CDR! writes, where N is +; (set-cdr! lis (recur (cdr lis))); the length of the answer. +; lis) +; (else (recur (cdr lis)))) +; lis))) + + +;;; This implementation of FILTER! +;;; - doesn't cons, and uses no stack; +;;; - is careful not to do redundant SET-CDR! writes, as writes to memory are +;;; usually expensive on modern machines, and can be extremely expensive on +;;; modern Schemes (e.g., ones that have generational GC's). +;;; It just zips down contiguous runs of in and out elts in LIS doing the +;;; minimal number of SET-CDR!s to splice the tail of one run of ins to the +;;; beginning of the next. + +(define (filter! pred lis) + (check-arg procedure? pred filter!) + (let lp ((ans lis)) + (cond ((null-list? ans) ans) ; Scan looking for + ((not (pred (car ans))) (lp (cdr ans))) ; first cons of result. + + ;; ANS is the eventual answer. + ;; SCAN-IN: (CDR PREV) = LIS and (CAR PREV) satisfies PRED. + ;; Scan over a contiguous segment of the list that + ;; satisfies PRED. + ;; SCAN-OUT: (CAR PREV) satisfies PRED. Scan over a contiguous + ;; segment of the list that *doesn't* satisfy PRED. + ;; When the segment ends, patch in a link from PREV + ;; to the start of the next good segment, and jump to + ;; SCAN-IN. + (else (letrec ((scan-in (lambda (prev lis) + (if (pair? lis) + (if (pred (car lis)) + (scan-in lis (cdr lis)) + (scan-out prev (cdr lis)))))) + (scan-out (lambda (prev lis) + (let lp ((lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (begin (set-cdr! prev lis) + (scan-in lis (cdr lis))) + (lp (cdr lis))) + (set-cdr! prev lis)))))) + (scan-in ans (cdr ans)) + ans))))) + + + +;;; Answers share common tail with LIS where possible; +;;; the technique is slightly subtle. + +(define (partition pred lis) + (check-arg procedure? pred partition) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis) ; Use NOT-PAIR? to handle dotted lists. + (let ((elt (car lis)) + (tail (cdr lis))) + (receive (in out) (recur tail) + (if (pred elt) + (values (if (pair? out) (cons elt in) lis) out) + (values in (if (pair? in) (cons elt out) lis)))))))) + + + +;(define (partition! pred lis) ; Things are much simpler +; (let recur ((lis lis)) ; if you are willing to +; (if (null-list? lis) (values lis lis) ; push N stack frames & do N +; (let ((elt (car lis))) ; SET-CDR! writes, where N is +; (receive (in out) (recur (cdr lis)) ; the length of LIS. +; (cond ((pred elt) +; (set-cdr! lis in) +; (values lis out)) +; (else (set-cdr! lis out) +; (values in lis)))))))) + + +;;; This implementation of PARTITION! +;;; - doesn't cons, and uses no stack; +;;; - is careful not to do redundant SET-CDR! writes, as writes to memory are +;;; usually expensive on modern machines, and can be extremely expensive on +;;; modern Schemes (e.g., ones that have generational GC's). +;;; It just zips down contiguous runs of in and out elts in LIS doing the +;;; minimal number of SET-CDR!s to splice these runs together into the result +;;; lists. + +(define (partition! pred lis) + (check-arg procedure? pred partition!) + (if (null-list? lis) (values lis lis) + + ;; This pair of loops zips down contiguous in & out runs of the + ;; list, splicing the runs together. The invariants are + ;; SCAN-IN: (cdr in-prev) = LIS. + ;; SCAN-OUT: (cdr out-prev) = LIS. + (letrec ((scan-in (lambda (in-prev out-prev lis) + (let lp ((in-prev in-prev) (lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (lp lis (cdr lis)) + (begin (set-cdr! out-prev lis) + (scan-out in-prev lis (cdr lis)))) + (set-cdr! out-prev lis))))) ; Done. + + (scan-out (lambda (in-prev out-prev lis) + (let lp ((out-prev out-prev) (lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (begin (set-cdr! in-prev lis) + (scan-in lis out-prev (cdr lis))) + (lp lis (cdr lis))) + (set-cdr! in-prev lis)))))) ; Done. + + ;; Crank up the scan&splice loops. + (if (pred (car lis)) + ;; LIS begins in-list. Search for out-list's first pair. + (let lp ((prev-l lis) (l (cdr lis))) + (cond ((not (pair? l)) (values lis l)) + ((pred (car l)) (lp l (cdr l))) + (else (scan-out prev-l l (cdr l)) + (values lis l)))) ; Done. + + ;; LIS begins out-list. Search for in-list's first pair. + (let lp ((prev-l lis) (l (cdr lis))) + (cond ((not (pair? l)) (values l lis)) + ((pred (car l)) + (scan-in l prev-l (cdr l)) + (values l lis)) ; Done. + (else (lp l (cdr l))))))))) + + +;;; Inline us, please. +(define (remove pred l) (filter (lambda (x) (not (pred x))) l)) +(define (remove! pred l) (filter! (lambda (x) (not (pred x))) l)) + + + +;;; Here's the taxonomy for the DELETE/ASSOC/MEMBER functions. +;;; (I don't actually think these are the world's most important +;;; functions -- the procedural FILTER/REMOVE/FIND/FIND-TAIL variants +;;; are far more general.) +;;; +;;; Function Action +;;; --------------------------------------------------------------------------- +;;; remove pred lis Delete by general predicate +;;; delete x lis [=] Delete by element comparison +;;; +;;; find pred lis Search by general predicate +;;; find-tail pred lis Search by general predicate +;;; member x lis [=] Search by element comparison +;;; +;;; assoc key lis [=] Search alist by key comparison +;;; alist-delete key alist [=] Alist-delete by key comparison + +(define (delete x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter (lambda (y) (not (= x y))) lis))) + +(define (delete! x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter! (lambda (y) (not (= x y))) lis))) + +;;; Extended from R4RS to take an optional comparison argument. +(define (member x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (find-tail (lambda (y) (= x y)) lis))) + +;;; R4RS, hence we don't bother to define. +;;; The MEMBER and then FIND-TAIL call should definitely +;;; be inlined for MEMQ & MEMV. +;(define (memq x lis) (member x lis eq?)) +;(define (memv x lis) (member x lis eqv?)) + + +;;; right-duplicate deletion +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; delete-duplicates delete-duplicates! +;;; +;;; Beware -- these are N^2 algorithms. To efficiently remove duplicates +;;; in long lists, sort the list to bring duplicates together, then use a +;;; linear-time algorithm to kill the dups. Or use an algorithm based on +;;; element-marking. The former gives you O(n lg n), the latter is linear. + +(define (delete-duplicates lis . maybe-=) + (let ((elt= (:optional maybe-= equal?))) + (check-arg procedure? elt= delete-duplicates) + (let recur ((lis lis)) + (if (null-list? lis) lis + (let* ((x (car lis)) + (tail (cdr lis)) + (new-tail (recur (delete x tail elt=)))) + (if (eq? tail new-tail) lis (cons x new-tail))))))) + +(define (delete-duplicates! lis . maybe-=) + (let ((elt= (:optional maybe-= equal?))) + (check-arg procedure? elt= delete-duplicates!) + (let recur ((lis lis)) + (if (null-list? lis) lis + (let* ((x (car lis)) + (tail (cdr lis)) + (new-tail (recur (delete! x tail elt=)))) + (if (eq? tail new-tail) lis (cons x new-tail))))))) + + +;;; alist stuff +;;;;;;;;;;;;;;; + +;;; Extended from R4RS to take an optional comparison argument. +(define (assoc x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (find (lambda (entry) (= x (car entry))) lis))) + +(define (alist-cons key datum alist) (cons (cons key datum) alist)) + +(define (alist-copy alist) + (map (lambda (elt) (cons (car elt) (cdr elt))) + alist)) + +(define (alist-delete key alist . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter (lambda (elt) (not (= key (car elt)))) alist))) + +(define (alist-delete! key alist . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter! (lambda (elt) (not (= key (car elt)))) alist))) + + +;;; find find-tail take-while drop-while span break any every list-index +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (find pred list) + (cond ((find-tail pred list) => car) + (else #f))) + +(define (find-tail pred list) + (check-arg procedure? pred find-tail) + (let lp ((list list)) + (and (not (null-list? list)) + (if (pred (car list)) list + (lp (cdr list)))))) + +(define (take-while pred lis) + (check-arg procedure? pred take-while) + (let recur ((lis lis)) + (if (null-list? lis) '() + (let ((x (car lis))) + (if (pred x) + (cons x (recur (cdr lis))) + '()))))) + +(define (drop-while pred lis) + (check-arg procedure? pred drop-while) + (let lp ((lis lis)) + (if (null-list? lis) '() + (if (pred (car lis)) + (lp (cdr lis)) + lis)))) + +(define (take-while! pred lis) + (check-arg procedure? pred take-while!) + (if (or (null-list? lis) (not (pred (car lis)))) '() + (begin (let lp ((prev lis) (rest (cdr lis))) + (if (pair? rest) + (let ((x (car rest))) + (if (pred x) (lp rest (cdr rest)) + (set-cdr! prev '()))))) + lis))) + +(define (span pred lis) + (check-arg procedure? pred span) + (let recur ((lis lis)) + (if (null-list? lis) (values '() '()) + (let ((x (car lis))) + (if (pred x) + (receive (prefix suffix) (recur (cdr lis)) + (values (cons x prefix) suffix)) + (values '() lis)))))) + +(define (span! pred lis) + (check-arg procedure? pred span!) + (if (or (null-list? lis) (not (pred (car lis)))) (values '() lis) + (let ((suffix (let lp ((prev lis) (rest (cdr lis))) + (if (null-list? rest) rest + (let ((x (car rest))) + (if (pred x) (lp rest (cdr rest)) + (begin (set-cdr! prev '()) + rest))))))) + (values lis suffix)))) + + +(define (break pred lis) (span (lambda (x) (not (pred x))) lis)) +(define (break! pred lis) (span! (lambda (x) (not (pred x))) lis)) + +(define (any pred lis1 . lists) + (check-arg procedure? pred any) + (if (pair? lists) + + ;; N-ary case + (receive (heads tails) (%cars+cdrs (cons lis1 lists)) + (and (pair? heads) + (let lp ((heads heads) (tails tails)) + (receive (next-heads next-tails) (%cars+cdrs tails) + (if (pair? next-heads) + (or (apply pred heads) (lp next-heads next-tails)) + (apply pred heads)))))) ; Last PRED app is tail call. + + ;; Fast path + (and (not (null-list? lis1)) + (let lp ((head (car lis1)) (tail (cdr lis1))) + (if (null-list? tail) + (pred head) ; Last PRED app is tail call. + (or (pred head) (lp (car tail) (cdr tail)))))))) + + +;(define (every pred list) ; Simple definition. +; (let lp ((list list)) ; Doesn't return the last PRED value. +; (or (not (pair? list)) +; (and (pred (car list)) +; (lp (cdr list)))))) + +(define (every pred lis1 . lists) + (check-arg procedure? pred every) + (if (pair? lists) + + ;; N-ary case + (receive (heads tails) (%cars+cdrs (cons lis1 lists)) + (or (not (pair? heads)) + (let lp ((heads heads) (tails tails)) + (receive (next-heads next-tails) (%cars+cdrs tails) + (if (pair? next-heads) + (and (apply pred heads) (lp next-heads next-tails)) + (apply pred heads)))))) ; Last PRED app is tail call. + + ;; Fast path + (or (null-list? lis1) + (let lp ((head (car lis1)) (tail (cdr lis1))) + (if (null-list? tail) + (pred head) ; Last PRED app is tail call. + (and (pred head) (lp (car tail) (cdr tail)))))))) + +(define (list-index pred lis1 . lists) + (check-arg procedure? pred list-index) + (if (pair? lists) + + ;; N-ary case + (let lp ((lists (cons lis1 lists)) (n 0)) + (receive (heads tails) (%cars+cdrs lists) + (and (pair? heads) + (if (apply pred heads) n + (lp tails (+ n 1)))))) + + ;; Fast path + (let lp ((lis lis1) (n 0)) + (and (not (null-list? lis)) + (if (pred (car lis)) n (lp (cdr lis) (+ n 1))))))) + +;;; Reverse +;;;;;;;;;;; + +;R4RS, so not defined here. +;(define (reverse lis) (fold cons '() lis)) + +;(define (reverse! lis) +; (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis)) + +(define (reverse! lis) + (let lp ((lis lis) (ans '())) + (if (null-list? lis) ans + (let ((tail (cdr lis))) + (set-cdr! lis ans) + (lp tail lis))))) + +;;; Lists-as-sets +;;;;;;;;;;;;;;;;; + +;;; This is carefully tuned code; do not modify casually. +;;; - It is careful to share storage when possible; +;;; - Side-effecting code tries not to perform redundant writes. +;;; - It tries to avoid linear-time scans in special cases where constant-time +;;; computations can be performed. +;;; - It relies on similar properties from the other list-lib procs it calls. +;;; For example, it uses the fact that the implementations of MEMBER and +;;; FILTER in this source code share longest common tails between args +;;; and results to get structure sharing in the lset procedures. + +(define (%lset2<= = lis1 lis2) (every (lambda (x) (member x lis2 =)) lis1)) + +(define (lset<= = . lists) + (check-arg procedure? = lset<=) + (or (not (pair? lists)) ; 0-ary case + (let lp ((s1 (car lists)) (rest (cdr lists))) + (or (not (pair? rest)) + (let ((s2 (car rest)) (rest (cdr rest))) + (and (or (eq? s2 s1) ; Fast path + (%lset2<= = s1 s2)) ; Real test + (lp s2 rest))))))) + +(define (lset= = . lists) + (check-arg procedure? = lset=) + (or (not (pair? lists)) ; 0-ary case + (let lp ((s1 (car lists)) (rest (cdr lists))) + (or (not (pair? rest)) + (let ((s2 (car rest)) + (rest (cdr rest))) + (and (or (eq? s1 s2) ; Fast path + (and (%lset2<= = s1 s2) (%lset2<= = s2 s1))) ; Real test + (lp s2 rest))))))) + + +(define (lset-adjoin = lis . elts) + (check-arg procedure? = lset-adjoin) + (fold (lambda (elt ans) (if (member elt ans =) ans (cons elt ans))) + lis elts)) + + +(define (lset-union = . lists) + (check-arg procedure? = lset-union) + (reduce (lambda (lis ans) ; Compute ANS + LIS. + (cond ((null? lis) ans) ; Don't copy any lists + ((null? ans) lis) ; if we don't have to. + ((eq? lis ans) ans) + (else + (fold (lambda (elt ans) (if (any (lambda (x) (= x elt)) ans) + ans + (cons elt ans))) + ans lis)))) + '() lists)) + +(define (lset-union! = . lists) + (check-arg procedure? = lset-union!) + (reduce (lambda (lis ans) ; Splice new elts of LIS onto the front of ANS. + (cond ((null? lis) ans) ; Don't copy any lists + ((null? ans) lis) ; if we don't have to. + ((eq? lis ans) ans) + (else + (pair-fold (lambda (pair ans) + (let ((elt (car pair))) + (if (any (lambda (x) (= x elt)) ans) + ans + (begin (set-cdr! pair ans) pair)))) + ans lis)))) + '() lists)) + + +(define (lset-intersection = lis1 . lists) + (check-arg procedure? = lset-intersection) + (let ((lists (delete lis1 lists eq?))) ; Throw out any LIS1 vals. + (cond ((any null-list? lists) '()) ; Short cut + ((null? lists) lis1) ; Short cut + (else (filter (lambda (x) + (every (lambda (lis) (member x lis =)) lists)) + lis1))))) + +(define (lset-intersection! = lis1 . lists) + (check-arg procedure? = lset-intersection!) + (let ((lists (delete lis1 lists eq?))) ; Throw out any LIS1 vals. + (cond ((any null-list? lists) '()) ; Short cut + ((null? lists) lis1) ; Short cut + (else (filter! (lambda (x) + (every (lambda (lis) (member x lis =)) lists)) + lis1))))) + + +(define (lset-difference = lis1 . lists) + (check-arg procedure? = lset-difference) + (let ((lists (filter pair? lists))) ; Throw out empty lists. + (cond ((null? lists) lis1) ; Short cut + ((memq lis1 lists) '()) ; Short cut + (else (filter (lambda (x) + (every (lambda (lis) (not (member x lis =))) + lists)) + lis1))))) + +(define (lset-difference! = lis1 . lists) + (check-arg procedure? = lset-difference!) + (let ((lists (filter pair? lists))) ; Throw out empty lists. + (cond ((null? lists) lis1) ; Short cut + ((memq lis1 lists) '()) ; Short cut + (else (filter! (lambda (x) + (every (lambda (lis) (not (member x lis =))) + lists)) + lis1))))) + + +(define (lset-xor = . lists) + (check-arg procedure? = lset-xor) + (reduce (lambda (b a) ; Compute A xor B: + ;; Note that this code relies on the constant-time + ;; short-cuts provided by LSET-DIFF+INTERSECTION, + ;; LSET-DIFFERENCE & APPEND to provide constant-time short + ;; cuts for the cases A = (), B = (), and A eq? B. It takes + ;; a careful case analysis to see it, but it's carefully + ;; built in. + + ;; Compute a-b and a^b, then compute b-(a^b) and + ;; cons it onto the front of a-b. + (receive (a-b a-int-b) (lset-diff+intersection = a b) + (cond ((null? a-b) (lset-difference b a =)) + ((null? a-int-b) (append b a)) + (else (fold (lambda (xb ans) + (if (member xb a-int-b =) ans (cons xb ans))) + a-b + b))))) + '() lists)) + + +(define (lset-xor! = . lists) + (check-arg procedure? = lset-xor!) + (reduce (lambda (b a) ; Compute A xor B: + ;; Note that this code relies on the constant-time + ;; short-cuts provided by LSET-DIFF+INTERSECTION, + ;; LSET-DIFFERENCE & APPEND to provide constant-time short + ;; cuts for the cases A = (), B = (), and A eq? B. It takes + ;; a careful case analysis to see it, but it's carefully + ;; built in. + + ;; Compute a-b and a^b, then compute b-(a^b) and + ;; cons it onto the front of a-b. + (receive (a-b a-int-b) (lset-diff+intersection! = a b) + (cond ((null? a-b) (lset-difference! b a =)) + ((null? a-int-b) (append! b a)) + (else (pair-fold (lambda (b-pair ans) + (if (member (car b-pair) a-int-b =) ans + (begin (set-cdr! b-pair ans) b-pair))) + a-b + b))))) + '() lists)) + + +(define (lset-diff+intersection = lis1 . lists) + (check-arg procedure? = lset-diff+intersection) + (cond ((every null-list? lists) (values lis1 '())) ; Short cut + ((memq lis1 lists) (values '() lis1)) ; Short cut + (else (partition (lambda (elt) + (not (any (lambda (lis) (member elt lis =)) + lists))) + lis1)))) + +(define (lset-diff+intersection! = lis1 . lists) + (check-arg procedure? = lset-diff+intersection!) + (cond ((every null-list? lists) (values lis1 '())) ; Short cut + ((memq lis1 lists) (values '() lis1)) ; Short cut + (else (partition! (lambda (elt) + (not (any (lambda (lis) (member elt lis =)) + lists))) + lis1)))) diff --git a/scheme/srfi/srfi-11.scm b/scheme/srfi/srfi-11.scm new file mode 100644 index 0000000..4ae6f72 --- /dev/null +++ b/scheme/srfi/srfi-11.scm @@ -0,0 +1,38 @@ + +; Taken directly from the SRFI document. + +(define-syntax let-values + (syntax-rules () + ((let-values (?binding ...) ?body0 ?body1 ...) + (let-values "bind" (?binding ...) () (begin ?body0 ?body1 ...))) + + ((let-values "bind" () ?tmps ?body) + (let ?tmps ?body)) + + ((let-values "bind" ((?b0 ?e0) ?binding ...) ?tmps ?body) + (let-values "mktmp" ?b0 ?e0 () (?binding ...) ?tmps ?body)) + + ((let-values "mktmp" () ?e0 ?args ?bindings ?tmps ?body) + (call-with-values + (lambda () ?e0) + (lambda ?args + (let-values "bind" ?bindings ?tmps ?body)))) + + ((let-values "mktmp" (?a . ?b) ?e0 (?arg ...) ?bindings (?tmp ...) ?body) + (let-values "mktmp" ?b ?e0 (?arg ... x) ?bindings (?tmp ... (?a x)) ?body)) + + ((let-values "mktmp" ?a ?e0 (?arg ...) ?bindings (?tmp ...) ?body) + (call-with-values + (lambda () ?e0) + (lambda (?arg ... . x) + (let-values "bind" ?bindings (?tmp ... (?a x)) ?body)))))) + +(define-syntax let*-values + (syntax-rules () + ((let*-values () ?body0 ?body1 ...) + (begin ?body0 ?body1 ...)) + + ((let*-values (?binding0 ?binding1 ...) ?body0 ?body1 ...) + (let-values (?binding0) + (let*-values (?binding1 ...) ?body0 ?body1 ...))))) + diff --git a/scheme/srfi/srfi-13.scm b/scheme/srfi/srfi-13.scm new file mode 100644 index 0000000..fc9dfd7 --- /dev/null +++ b/scheme/srfi/srfi-13.scm @@ -0,0 +1,2103 @@ +;;; SRFI 13 string library reference implementation -*- Scheme -*- +;;; Olin Shivers 7/2000 +;;; +;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology. +;;; Copyright (c) 1998, 1999, 2000 Olin Shivers. All rights reserved. +;;; The details of the copyrights appear at the end of the file. Short +;;; summary: BSD-style open source. + +;;; Exports: +;;; string-map string-map! +;;; string-fold string-unfold +;;; string-fold-right string-unfold-right +;;; string-tabulate string-for-each string-for-each-index +;;; string-every string-any +;;; string-hash string-hash-ci +;;; string-compare string-compare-ci +;;; string= string< string> string<= string>= string<> +;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> +;;; string-downcase string-upcase string-titlecase +;;; string-downcase! string-upcase! string-titlecase! +;;; string-take string-take-right +;;; string-drop string-drop-right +;;; string-pad string-pad-right +;;; string-trim string-trim-right string-trim-both +;;; string-filter string-delete +;;; string-index string-index-right +;;; string-skip string-skip-right +;;; string-count +;;; string-prefix-length string-prefix-length-ci +;;; string-suffix-length string-suffix-length-ci +;;; string-prefix? string-prefix-ci? +;;; string-suffix? string-suffix-ci? +;;; string-contains string-contains-ci +;;; string-copy! substring/shared +;;; string-reverse string-reverse! reverse-list->string +;;; string-concatenate string-concatenate/shared string-concatenate-reverse +;;; string-append/shared +;;; xsubstring string-xcopy! +;;; string-null? +;;; string-join +;;; string-tokenize +;;; string-replace +;;; +;;; R5RS extended: +;;; string->list string-copy string-fill! +;;; +;;; R5RS re-exports: +;;; string? make-string string-length string-ref string-set! +;;; +;;; R5RS re-exports (also defined here but commented-out): +;;; string string-append list->string +;;; +;;; Low-level routines: +;;; make-kmp-restart-vector string-kmp-partial-search kmp-step +;;; string-parse-start+end +;;; string-parse-final-start+end +;;; let-string-start+end +;;; check-substring-spec +;;; substring-spec-ok? + +;;; Imports +;;; This is a fairly large library. While it was written for portability, you +;;; must be aware of its dependencies in order to run it in a given scheme +;;; implementation. Here is a complete list of the dependencies it has and the +;;; assumptions it makes beyond stock R5RS Scheme: +;;; +;;; This code has the following non-R5RS dependencies: +;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro; +;;; +;;; - Various imports from the char-set library for the routines that can +;;; take char-set arguments; +;;; +;;; - An n-ary ERROR procedure; +;;; +;;; - BITWISE-AND for the hash functions; +;;; +;;; - A simple CHECK-ARG procedure for checking parameter values; it is +;;; (lambda (pred val proc) +;;; (if (pred val) val (error "Bad arg" val pred proc))) +;;; +;;; - :OPTIONAL and LET-OPTIONALS* macros for parsing, defaulting & +;;; type-checking optional parameters from a rest argument; +;;; +;;; - CHAR-CASED? and CHAR-TITLECASE for the STRING-TITLECASE & +;;; STRING-TITLECASE! procedures. The former returns true iff a character is +;;; one that has case distinctions; in ASCII it returns true on a-z and A-Z. +;;; CHAR-TITLECASE is analagous to CHAR-UPCASE and CHAR-DOWNCASE. In ASCII & +;;; Latin-1, it is the same as CHAR-UPCASE. +;;; +;;; The code depends upon a small set of core string primitives from R5RS: +;;; MAKE-STRING STRING-REF STRING-SET! STRING? STRING-LENGTH SUBSTRING +;;; (Actually, SUBSTRING is not a primitive, but we assume that an +;;; implementation's native version is probably faster than one we could +;;; define, so we import it from R5RS.) +;;; +;;; The code depends upon a small set of R5RS character primitives: +;;; char? char=? char-ci=? charinteger (for the hash functions) +;;; +;;; We assume the following: +;;; - CHAR-DOWNCASE o CHAR-UPCASE = CHAR-DOWNCASE +;;; - CHAR-CI=? is equivalent to +;;; (lambda (c1 c2) (char=? (char-downcase (char-upcase c1)) +;;; (char-downcase (char-upcase c2)))) +;;; - CHAR-UPCASE, CHAR-DOWNCASE and CHAR-TITLECASE are locale-insensitive +;;; and consistent with Unicode's 1-1 char-mapping spec. +;;; These things are typically true, but if not, you would need to modify +;;; the case-mapping and case-insensitive routines. + +;;; Enough introductory blather. On to the source code. (But see the end of +;;; the file for further notes on porting & performance tuning.) + +; Start S48 additions + +(define (check-arg pred val caller) + (if (not (pred val)) + (error val caller)) + val) + +(define-syntax :optional + (syntax-rules () + ((:optional rest default-exp) + (let ((maybe-arg rest)) + (if (pair? maybe-arg) + (if (null? (cdr maybe-arg)) (car maybe-arg) + (error "too many optional arguments" maybe-arg)) + default-exp))) + + ((:optional rest default-exp arg-test) + (let ((maybe-arg rest)) + (if (pair? maybe-arg) + (if (null? (cdr maybe-arg)) + (let ((val (car maybe-arg))) + (if (arg-test val) val + (error "Optional argument failed test" + 'arg-test val))) + (error "too many optional arguments" maybe-arg)) + default-exp))))) + +(define-syntax let-optionals* + (syntax-rules () + ((let-optionals* arg (opt-clause ...) body ...) + (let ((rest arg)) + (%let-optionals* rest (opt-clause ...) body ...))))) + +(define-syntax %let-optionals* + (syntax-rules () + ((%let-optionals* arg (((var ...) xparser) opt-clause ...) body ...) + (call-with-values (lambda () (xparser arg)) + (lambda (rest var ...) + (%let-optionals* rest (opt-clause ...) body ...)))) + + ((%let-optionals* arg ((var default) opt-clause ...) body ...) + (call-with-values (lambda () (if (null? arg) (values default '()) + (values (car arg) (cdr arg)))) + (lambda (var rest) + (%let-optionals* rest (opt-clause ...) body ...)))) + + ((%let-optionals* arg ((var default test) opt-clause ...) body ...) + (call-with-values (lambda () + (if (null? arg) (values default '()) + (let ((var (car arg))) + (if test (values var (cdr arg)) + (error "arg failed LET-OPT test" var))))) + (lambda (var rest) + (%let-optionals* rest (opt-clause ...) body ...)))) + + ((%let-optionals* arg ((var default test supplied?) opt-clause ...) body ...) + (call-with-values (lambda () + (if (null? arg) (values default #f '()) + (let ((var (car arg))) + (if test (values var #t (cdr arg)) + (error "arg failed LET-OPT test" var))))) + (lambda (var supplied? rest) + (%let-optionals* rest (opt-clause ...) body ...)))) + + ((%let-optionals* arg (rest) body ...) + (let ((rest arg)) body ...)) + + ((%let-optionals* arg () body ...) + (if (null? arg) (begin body ...) + (error "Too many arguments in let-opt" arg))))) + +(define (char-cased? ch) + (or (and (char<=? #\a ch) + (char<=? ch #\z)) + (and (char<=? #\A ch) + (char<=? ch #\Z)))) + +(define char-titlecase char-upcase) + +; End S48 additions + +;;; Support for START/END substring specs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This macro parses optional start/end arguments from arg lists, defaulting +;;; them to 0/(string-length s), and checks them for correctness. + +(define-syntax let-string-start+end + (syntax-rules () + ((let-string-start+end (start end) proc s-exp args-exp body ...) + (receive (start end) (string-parse-final-start+end proc s-exp args-exp) + body ...)) + ((let-string-start+end (start end rest) proc s-exp args-exp body ...) + (receive (rest start end) (string-parse-start+end proc s-exp args-exp) + body ...)))) + +;;; This one parses out a *pair* of final start/end indices. +;;; Not exported; for internal use. +(define-syntax let-string-start+end2 + (syntax-rules () + ((l-s-s+e2 (start1 end1 start2 end2) proc s1 s2 args body ...) + (let ((procv proc)) ; Make sure PROC is only evaluated once. + (let-string-start+end (start1 end1 rest) procv s1 args + (let-string-start+end (start2 end2) procv s2 rest + body ...)))))) + + +;;; Returns three values: rest start end + +(define (string-parse-start+end proc s args) + (if (not (string? s)) (error "Non-string value" proc s)) + (let ((slen (string-length s))) + (if (pair? args) + + (let ((start (car args)) + (args (cdr args))) + (if (and (integer? start) (exact? start) (>= start 0)) + (receive (end args) + (if (pair? args) + (let ((end (car args)) + (args (cdr args))) + (if (and (integer? end) (exact? end) (<= end slen)) + (values end args) + (error "Illegal substring END spec" proc end s))) + (values slen args)) + (if (<= start end) (values args start end) + (error "Illegal substring START/END spec" + proc start end s))) + (error "Illegal substring START spec" proc start s))) + + (values '() 0 slen)))) + +(define (string-parse-final-start+end proc s args) + (receive (rest start end) (string-parse-start+end proc s args) + (if (pair? rest) (error "Extra arguments to procedure" proc rest) + (values start end)))) + +(define (substring-spec-ok? s start end) + (and (string? s) + (integer? start) + (exact? start) + (integer? end) + (exact? end) + (<= 0 start) + (<= start end) + (<= end (string-length s)))) + +(define (check-substring-spec proc s start end) + (if (not (substring-spec-ok? s start end)) + (error "Illegal substring spec." proc s start end))) + + +;;; Defined by R5RS, so commented out here. +;(define (string . chars) +; (let* ((len (length chars)) +; (ans (make-string len))) +; (do ((i 0 (+ i 1)) +; (chars chars (cdr chars))) +; ((>= i len)) +; (string-set! ans i (car chars))) +; ans)) +; +;(define (string . chars) (string-unfold null? car cdr chars)) + + + +;;; substring/shared S START [END] +;;; string-copy S [START END] +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; All this goop is just arg parsing & checking surrounding a call to the +;;; actual primitive, %SUBSTRING/SHARED. + +(define (substring/shared s start . maybe-end) + (check-arg string? s substring/shared) + (let ((slen (string-length s))) + (check-arg (lambda (start) (and (integer? start) (exact? start) (<= 0 start))) + start substring/shared) + (%substring/shared s start + (:optional maybe-end slen + (lambda (end) (and (integer? end) + (exact? end) + (<= start end) + (<= end slen))))))) + +;;; Split out so that other routines in this library can avoid arg-parsing +;;; overhead for END parameter. +(define (%substring/shared s start end) + (if (and (zero? start) (= end (string-length s))) s + (substring s start end))) + +(define (string-copy s . maybe-start+end) + (let-string-start+end (start end) string-copy s maybe-start+end + (substring s start end))) + +;This library uses the R5RS SUBSTRING, but doesn't export it. +;Here is a definition, just for completeness. +;(define (substring s start end) +; (check-substring-spec substring s start end) +; (let* ((slen (- end start)) +; (ans (make-string slen))) +; (do ((i 0 (+ i 1)) +; (j start (+ j 1))) +; ((>= i slen) ans) +; (string-set! ans i (string-ref s j))))) + +;;; Basic iterators and other higher-order abstractions +;;; (string-map proc s [start end]) +;;; (string-map! proc s [start end]) +;;; (string-fold kons knil s [start end]) +;;; (string-fold-right kons knil s [start end]) +;;; (string-unfold p f g seed [base make-final]) +;;; (string-unfold-right p f g seed [base make-final]) +;;; (string-for-each proc s [start end]) +;;; (string-for-each-index proc s [start end]) +;;; (string-every char-set/char/pred s [start end]) +;;; (string-any char-set/char/pred s [start end]) +;;; (string-tabulate proc len) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; You want compiler support for high-level transforms on fold and unfold ops. +;;; You'd at least like a lot of inlining for clients of these procedures. +;;; Don't hold your breath. + +(define (string-map proc s . maybe-start+end) + (check-arg procedure? proc string-map) + (let-string-start+end (start end) string-map s maybe-start+end + (%string-map proc s start end))) + +(define (%string-map proc s start end) ; Internal utility + (let* ((len (- end start)) + (ans (make-string len))) + (do ((i (- end 1) (- i 1)) + (j (- len 1) (- j 1))) + ((< j 0)) + (string-set! ans j (proc (string-ref s i)))) + ans)) + +(define (string-map! proc s . maybe-start+end) + (check-arg procedure? proc string-map!) + (let-string-start+end (start end) string-map! s maybe-start+end + (%string-map! proc s start end))) + +(define (%string-map! proc s start end) + (do ((i (- end 1) (- i 1))) + ((< i start)) + (string-set! s i (proc (string-ref s i))))) + +(define (string-fold kons knil s . maybe-start+end) + (check-arg procedure? kons string-fold) + (let-string-start+end (start end) string-fold s maybe-start+end + (let lp ((v knil) (i start)) + (if (< i end) (lp (kons (string-ref s i) v) (+ i 1)) + v)))) + +(define (string-fold-right kons knil s . maybe-start+end) + (check-arg procedure? kons string-fold-right) + (let-string-start+end (start end) string-fold-right s maybe-start+end + (let lp ((v knil) (i (- end 1))) + (if (>= i start) (lp (kons (string-ref s i) v) (- i 1)) + v)))) + +;;; (string-unfold p f g seed [base make-final]) +;;; This is the fundamental constructor for strings. +;;; - G is used to generate a series of "seed" values from the initial seed: +;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... +;;; - P tells us when to stop -- when it returns true when applied to one +;;; of these seed values. +;;; - F maps each seed value to the corresponding character +;;; in the result string. These chars are assembled into the +;;; string in a left-to-right order. +;;; - BASE is the optional initial/leftmost portion of the constructed string; +;;; it defaults to the empty string "". +;;; - MAKE-FINAL is applied to the terminal seed value (on which P returns +;;; true) to produce the final/rightmost portion of the constructed string. +;;; It defaults to (LAMBDA (X) ""). +;;; +;;; In other words, the following (simple, inefficient) definition holds: +;;; (define (string-unfold p f g seed base make-final) +;;; (string-append base +;;; (let recur ((seed seed)) +;;; (if (p seed) (make-final seed) +;;; (string-append (string (f seed)) +;;; (recur (g seed))))))) +;;; +;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to +;;; reverse a string, copy a string, convert a list to a string, read +;;; a port into a string, and so forth. Examples: +;;; (port->string port) = +;;; (string-unfold (compose eof-object? peek-char) +;;; read-char values port) +;;; +;;; (list->string lis) = (string-unfold null? car cdr lis) +;;; +;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0) + +;;; A problem with the following simple formulation is that it pushes one +;;; stack frame for every char in the result string -- an issue if you are +;;; using it to read a 100kchar string. So we don't use it -- but I include +;;; it to give a clear, straightforward description of what the function +;;; does. + +;(define (string-unfold p f g seed base make-final) +; (let ((ans (let recur ((seed seed) (i (string-length base))) +; (if (p seed) +; (let* ((final (make-final seed)) +; (ans (make-string (+ i (string-length final))))) +; (string-copy! ans i final) +; ans) +; +; (let* ((c (f seed)) +; (s (recur (g seed) (+ i 1)))) +; (string-set! s i c) +; s))))) +; (string-copy! ans 0 base) +; ans)) + +;;; The strategy is to allocate a series of chunks into which we stash the +;;; chars as we generate them. Chunk size goes up in powers of two starting +;;; with 40 and levelling out at 4k, i.e. +;;; 40 40 80 160 320 640 1280 2560 4096 4096 4096 4096 4096... +;;; This should work pretty well for short strings, 1-line (80 char) strings, +;;; and longer ones. When done, we allocate an answer string and copy the +;;; chars over from the chunk buffers. + +(define (string-unfold p f g seed . base+make-final) + (check-arg procedure? p string-unfold) + (check-arg procedure? f string-unfold) + (check-arg procedure? g string-unfold) + (let-optionals* base+make-final + ((base "" (string? base)) + (make-final (lambda (x) "") (procedure? make-final))) + (let lp ((chunks '()) ; Previously filled chunks + (nchars 0) ; Number of chars in CHUNKS + (chunk (make-string 40)) ; Current chunk into which we write + (chunk-len 40) + (i 0) ; Number of chars written into CHUNK + (seed seed)) + (let lp2 ((i i) (seed seed)) + (if (not (p seed)) + (let ((c (f seed)) + (seed (g seed))) + (if (< i chunk-len) + (begin (string-set! chunk i c) + (lp2 (+ i 1) seed)) + + (let* ((nchars2 (+ chunk-len nchars)) + (chunk-len2 (min 4096 nchars2)) + (new-chunk (make-string chunk-len2))) + (string-set! new-chunk 0 c) + (lp (cons chunk chunks) (+ nchars chunk-len) + new-chunk chunk-len2 1 seed)))) + + ;; We're done. Make the answer string & install the bits. + (let* ((final (make-final seed)) + (flen (string-length final)) + (base-len (string-length base)) + (j (+ base-len nchars i)) + (ans (make-string (+ j flen)))) + (%string-copy! ans j final 0 flen) ; Install FINAL. + (let ((j (- j i))) + (%string-copy! ans j chunk 0 i) ; Install CHUNK[0,I). + (let lp ((j j) (chunks chunks)) ; Install CHUNKS. + (if (pair? chunks) + (let* ((chunk (car chunks)) + (chunks (cdr chunks)) + (chunk-len (string-length chunk)) + (j (- j chunk-len))) + (%string-copy! ans j chunk 0 chunk-len) + (lp j chunks))))) + (%string-copy! ans 0 base 0 base-len) ; Install BASE. + ans)))))) + +(define (string-unfold-right p f g seed . base+make-final) + (let-optionals* base+make-final + ((base "" (string? base)) + (make-final (lambda (x) "") (procedure? make-final))) + (let lp ((chunks '()) ; Previously filled chunks + (nchars 0) ; Number of chars in CHUNKS + (chunk (make-string 40)) ; Current chunk into which we write + (chunk-len 40) + (i 40) ; Number of chars available in CHUNK + (seed seed)) + (let lp2 ((i i) (seed seed)) ; Fill up CHUNK from right + (if (not (p seed)) ; to left. + (let ((c (f seed)) + (seed (g seed))) + (if (> i 0) + (let ((i (- i 1))) + (string-set! chunk i c) + (lp2 i seed)) + + (let* ((nchars2 (+ chunk-len nchars)) + (chunk-len2 (min 4096 nchars2)) + (new-chunk (make-string chunk-len2)) + (i (- chunk-len2 1))) + (string-set! new-chunk i c) + (lp (cons chunk chunks) (+ nchars chunk-len) + new-chunk chunk-len2 i seed)))) + + ;; We're done. Make the answer string & install the bits. + (let* ((final (make-final seed)) + (flen (string-length final)) + (base-len (string-length base)) + (chunk-used (- chunk-len i)) + (j (+ base-len nchars chunk-used)) + (ans (make-string (+ j flen)))) + (%string-copy! ans 0 final 0 flen) ; Install FINAL. + (%string-copy! ans flen chunk i chunk-len); Install CHUNK[I,). + (let lp ((j (+ flen chunk-used)) ; Install CHUNKS. + (chunks chunks)) + (if (pair? chunks) + (let* ((chunk (car chunks)) + (chunks (cdr chunks)) + (chunk-len (string-length chunk))) + (%string-copy! ans j chunk 0 chunk-len) + (lp (+ j chunk-len) chunks)) + (%string-copy! ans j base 0 base-len))); Install BASE. + ans)))))) + + +(define (string-for-each proc s . maybe-start+end) + (check-arg procedure? proc string-for-each) + (let-string-start+end (start end) string-for-each s maybe-start+end + (let lp ((i start)) + (if (< i end) + (begin (proc (string-ref s i)) + (lp (+ i 1))))))) + +(define (string-for-each-index proc s . maybe-start+end) + (check-arg procedure? proc string-for-each-index) + (let-string-start+end (start end) string-for-each-index s maybe-start+end + (let lp ((i start)) + (if (< i end) (begin (proc i) (lp (+ i 1))))))) + +(define (string-every criterion s . maybe-start+end) + (let-string-start+end (start end) string-every s maybe-start+end + (cond ((char? criterion) + (let lp ((i start)) + (or (>= i end) + (and (char=? criterion (string-ref s i)) + (lp (+ i 1)))))) + + ((char-set? criterion) + (let lp ((i start)) + (or (>= i end) + (and (char-set-contains? criterion (string-ref s i)) + (lp (+ i 1)))))) + + ((procedure? criterion) ; Slightly funky loop so that + (or (= start end) ; final (PRED S[END-1]) call + (let lp ((i start)) ; is a tail call. + (let ((c (string-ref s i)) + (i1 (+ i 1))) + (if (= i1 end) (criterion c) ; Tail call. + (and (criterion c) (lp i1))))))) + + (else (error "Second param is neither char-set, char, or predicate procedure." + string-every criterion))))) + + +(define (string-any criterion s . maybe-start+end) + (let-string-start+end (start end) string-any s maybe-start+end + (cond ((char? criterion) + (let lp ((i start)) + (and (< i end) + (or (char=? criterion (string-ref s i)) + (lp (+ i 1)))))) + + ((char-set? criterion) + (let lp ((i start)) + (and (< i end) + (or (char-set-contains? criterion (string-ref s i)) + (lp (+ i 1)))))) + + ((procedure? criterion) ; Slightly funky loop so that + (and (< start end) ; final (PRED S[END-1]) call + (let lp ((i start)) ; is a tail call. + (let ((c (string-ref s i)) + (i1 (+ i 1))) + (if (= i1 end) (criterion c) ; Tail call + (or (criterion c) (lp i1))))))) + + (else (error "Second param is neither char-set, char, or predicate procedure." + string-any criterion))))) + + +(define (string-tabulate proc len) + (check-arg procedure? proc string-tabulate) + (check-arg (lambda (val) (and (integer? val) (exact? val) (<= 0 val))) + len string-tabulate) + (let ((s (make-string len))) + (do ((i (- len 1) (- i 1))) + ((< i 0)) + (string-set! s i (proc i))) + s)) + + + +;;; string-prefix-length[-ci] s1 s2 [start1 end1 start2 end2] +;;; string-suffix-length[-ci] s1 s2 [start1 end1 start2 end2] +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Find the length of the common prefix/suffix. +;;; It is not required that the two substrings passed be of equal length. +;;; This was microcode in MIT Scheme -- a very tightly bummed primitive. +;;; %STRING-PREFIX-LENGTH is the core routine of all string-comparisons, +;;; so should be as tense as possible. + +(define (%string-prefix-length s1 start1 end1 s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (end1 (+ start1 delta))) + + (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path + delta + + (let lp ((i start1) (j start2)) ; Regular path + (if (or (>= i end1) + (not (char=? (string-ref s1 i) + (string-ref s2 j)))) + (- i start1) + (lp (+ i 1) (+ j 1))))))) + +(define (%string-suffix-length s1 start1 end1 s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (start1 (- end1 delta))) + + (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path + delta + + (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path + (if (or (< i start1) + (not (char=? (string-ref s1 i) + (string-ref s2 j)))) + (- (- end1 i) 1) + (lp (- i 1) (- j 1))))))) + +(define (%string-prefix-length-ci s1 start1 end1 s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (end1 (+ start1 delta))) + + (if (and (eq? s1 s2) (= start1 start2)) ; EQ fast path + delta + + (let lp ((i start1) (j start2)) ; Regular path + (if (or (>= i end1) + (not (char-ci=? (string-ref s1 i) + (string-ref s2 j)))) + (- i start1) + (lp (+ i 1) (+ j 1))))))) + +(define (%string-suffix-length-ci s1 start1 end1 s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (start1 (- end1 delta))) + + (if (and (eq? s1 s2) (= end1 end2)) ; EQ fast path + delta + + (let lp ((i (- end1 1)) (j (- end2 1))) ; Regular path + (if (or (< i start1) + (not (char-ci=? (string-ref s1 i) + (string-ref s2 j)))) + (- (- end1 i) 1) + (lp (- i 1) (- j 1))))))) + + +(define (string-prefix-length s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-prefix-length s1 s2 maybe-starts+ends + (%string-prefix-length s1 start1 end1 s2 start2 end2))) + +(define (string-suffix-length s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-suffix-length s1 s2 maybe-starts+ends + (%string-suffix-length s1 start1 end1 s2 start2 end2))) + +(define (string-prefix-length-ci s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-prefix-length-ci s1 s2 maybe-starts+ends + (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))) + +(define (string-suffix-length-ci s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-suffix-length-ci s1 s2 maybe-starts+ends + (%string-suffix-length-ci s1 start1 end1 s2 start2 end2))) + + +;;; string-prefix? s1 s2 [start1 end1 start2 end2] +;;; string-suffix? s1 s2 [start1 end1 start2 end2] +;;; string-prefix-ci? s1 s2 [start1 end1 start2 end2] +;;; string-suffix-ci? s1 s2 [start1 end1 start2 end2] +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These are all simple derivatives of the previous counting funs. + +(define (string-prefix? s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-prefix? s1 s2 maybe-starts+ends + (%string-prefix? s1 start1 end1 s2 start2 end2))) + +(define (string-suffix? s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-suffix? s1 s2 maybe-starts+ends + (%string-suffix? s1 start1 end1 s2 start2 end2))) + +(define (string-prefix-ci? s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-prefix-ci? s1 s2 maybe-starts+ends + (%string-prefix-ci? s1 start1 end1 s2 start2 end2))) + +(define (string-suffix-ci? s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-suffix-ci? s1 s2 maybe-starts+ends + (%string-suffix-ci? s1 start1 end1 s2 start2 end2))) + + +;;; Here are the internal routines that do the real work. + +(define (%string-prefix? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= (%string-prefix-length s1 start1 end1 + s2 start2 end2) + len1)))) + +(define (%string-suffix? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (%string-suffix-length s1 start1 end1 + s2 start2 end2))))) + +(define (%string-prefix-ci? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (%string-prefix-length-ci s1 start1 end1 + s2 start2 end2))))) + +(define (%string-suffix-ci? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (%string-suffix-length-ci s1 start1 end1 + s2 start2 end2))))) + + +;;; string-compare s1 s2 proc< proc= proc> [start1 end1 start2 end2] +;;; string-compare-ci s1 s2 proc< proc= proc> [start1 end1 start2 end2] +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Primitive string-comparison functions. +;;; Continuation order is different from MIT Scheme. +;;; Continuations are applied to s1's mismatch index; +;;; in the case of equality, this is END1. + +(define (%string-compare s1 start1 end1 s2 start2 end2 + proc< proc= proc>) + (let ((size1 (- end1 start1)) + (size2 (- end2 start2))) + (let ((match (%string-prefix-length s1 start1 end1 s2 start2 end2))) + (if (= match size1) + ((if (= match size2) proc= proc<) end1) + ((if (= match size2) + proc> + (if (char)) + (+ match start1)))))) + +(define (%string-compare-ci s1 start1 end1 s2 start2 end2 + proc< proc= proc>) + (let ((size1 (- end1 start1)) + (size2 (- end2 start2))) + (let ((match (%string-prefix-length-ci s1 start1 end1 s2 start2 end2))) + (if (= match size1) + ((if (= match size2) proc= proc<) end1) + ((if (= match size2) proc> + (if (char-ci)) + (+ start1 match)))))) + +(define (string-compare s1 s2 proc< proc= proc> . maybe-starts+ends) + (check-arg procedure? proc< string-compare) + (check-arg procedure? proc= string-compare) + (check-arg procedure? proc> string-compare) + (let-string-start+end2 (start1 end1 start2 end2) + string-compare s1 s2 maybe-starts+ends + (%string-compare s1 start1 end1 s2 start2 end2 proc< proc= proc>))) + +(define (string-compare-ci s1 s2 proc< proc= proc> . maybe-starts+ends) + (check-arg procedure? proc< string-compare-ci) + (check-arg procedure? proc= string-compare-ci) + (check-arg procedure? proc> string-compare-ci) + (let-string-start+end2 (start1 end1 start2 end2) + string-compare-ci s1 s2 maybe-starts+ends + (%string-compare-ci s1 start1 end1 s2 start2 end2 proc< proc= proc>))) + + + +;;; string= string<> string-ci= string-ci<> +;;; string< string> string-ci< string-ci> +;;; string<= string>= string-ci<= string-ci>= +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Simple definitions in terms of the previous comparison funs. +;;; I sure hope the %STRING-COMPARE calls get integrated. + +(define (string= s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string= s1 s2 maybe-starts+ends + (and (= (- end1 start1) (- end2 start2)) ; Quick filter + (or (and (eq? s1 s2) (= start1 start2)) ; Fast path + (%string-compare s1 start1 end1 s2 start2 end2 ; Real test + (lambda (i) #f) + values + (lambda (i) #f)))))) + +(define (string<> s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string<> s1 s2 maybe-starts+ends + (or (not (= (- end1 start1) (- end2 start2))) ; Fast path + (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter + (%string-compare s1 start1 end1 s2 start2 end2 ; Real test + values + (lambda (i) #f) + values))))) + +(define (string< s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string< s1 s2 maybe-starts+ends + (if (and (eq? s1 s2) (= start1 start2)) ; Fast path + (< end1 end2) + + (%string-compare s1 start1 end1 s2 start2 end2 ; Real test + values + (lambda (i) #f) + (lambda (i) #f))))) + +(define (string> s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string> s1 s2 maybe-starts+ends + (if (and (eq? s1 s2) (= start1 start2)) ; Fast path + (> end1 end2) + + (%string-compare s1 start1 end1 s2 start2 end2 ; Real test + (lambda (i) #f) + (lambda (i) #f) + values)))) + +(define (string<= s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string<= s1 s2 maybe-starts+ends + (if (and (eq? s1 s2) (= start1 start2)) ; Fast path + (<= end1 end2) + + (%string-compare s1 start1 end1 s2 start2 end2 ; Real test + values + values + (lambda (i) #f))))) + +(define (string>= s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string>= s1 s2 maybe-starts+ends + (if (and (eq? s1 s2) (= start1 start2)) ; Fast path + (>= end1 end2) + + (%string-compare s1 start1 end1 s2 start2 end2 ; Real test + (lambda (i) #f) + values + values)))) + +(define (string-ci= s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-ci= s1 s2 maybe-starts+ends + (and (= (- end1 start1) (- end2 start2)) ; Quick filter + (or (and (eq? s1 s2) (= start1 start2)) ; Fast path + (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test + (lambda (i) #f) + values + (lambda (i) #f)))))) + +(define (string-ci<> s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-ci<> s1 s2 maybe-starts+ends + (or (not (= (- end1 start1) (- end2 start2))) ; Fast path + (and (not (and (eq? s1 s2) (= start1 start2))) ; Quick filter + (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test + values + (lambda (i) #f) + values))))) + +(define (string-ci< s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-ci< s1 s2 maybe-starts+ends + (if (and (eq? s1 s2) (= start1 start2)) ; Fast path + (< end1 end2) + + (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test + values + (lambda (i) #f) + (lambda (i) #f))))) + +(define (string-ci> s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-ci> s1 s2 maybe-starts+ends + (if (and (eq? s1 s2) (= start1 start2)) ; Fast path + (> end1 end2) + + (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test + (lambda (i) #f) + (lambda (i) #f) + values)))) + +(define (string-ci<= s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-ci<= s1 s2 maybe-starts+ends + (if (and (eq? s1 s2) (= start1 start2)) ; Fast path + (<= end1 end2) + + (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test + values + values + (lambda (i) #f))))) + +(define (string-ci>= s1 s2 . maybe-starts+ends) + (let-string-start+end2 (start1 end1 start2 end2) + string-ci>= s1 s2 maybe-starts+ends + (if (and (eq? s1 s2) (= start1 start2)) ; Fast path + (>= end1 end2) + + (%string-compare-ci s1 start1 end1 s2 start2 end2 ; Real test + (lambda (i) #f) + values + values)))) + + +;;; Hash +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in +;;; to keep the intermediate values small. (We do the calculation with just +;;; enough bits to represent BOUND, masking off high bits at each step in +;;; calculation. If this screws up any important properties of the hash +;;; function I'd like to hear about it. -Olin) +;;; +;;; If you keep BOUND small enough, the intermediate calculations will +;;; always be fixnums. How small is dependent on the underlying Scheme system; +;;; we use a default BOUND of 2^22 = 4194304, which should hack it in +;;; Schemes that give you at least 29 signed bits for fixnums. The core +;;; calculation that you don't want to overflow is, worst case, +;;; (+ 65535 (* 37 (- bound 1))) +;;; where 65535 is the max character code. Choose the default BOUND to be the +;;; biggest power of two that won't cause this expression to fixnum overflow, +;;; and everything will be copacetic. + +(define (%string-hash s char->int bound start end) + (let ((iref (lambda (s i) (char->int (string-ref s i)))) + ;; Compute a 111...1 mask that will cover BOUND-1: + (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh? + (if (>= i bound) (- i 1) (lp (+ i i)))))) + (let lp ((i start) (ans 0)) + (if (>= i end) (modulo ans bound) + (lp (+ i 1) (bitwise-and mask (+ (* 37 ans) (iref s i)))))))) + +(define (string-hash s . maybe-bound+start+end) + (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound) + (exact? bound) + (<= 0 bound))) + rest) + (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default. + (let-string-start+end (start end) string-hash s rest + (%string-hash s char->integer bound start end))))) + +(define (string-hash-ci s . maybe-bound+start+end) + (let-optionals* maybe-bound+start+end ((bound 4194304 (and (integer? bound) + (exact? bound) + (<= 0 bound))) + rest) + (let ((bound (if (zero? bound) 4194304 bound))) ; 0 means default. + (let-string-start+end (start end) string-hash-ci s rest + (%string-hash s (lambda (c) (char->integer (char-downcase c))) + bound start end))))) + +;;; Case hacking +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-upcase s [start end] +;;; string-upcase! s [start end] +;;; string-downcase s [start end] +;;; string-downcase! s [start end] +;;; +;;; string-titlecase s [start end] +;;; string-titlecase! s [start end] +;;; Capitalize every contiguous alpha sequence: capitalise +;;; first char, lowercase rest. + +(define (string-upcase s . maybe-start+end) + (let-string-start+end (start end) string-upcase s maybe-start+end + (%string-map char-upcase s start end))) + +(define (string-upcase! s . maybe-start+end) + (let-string-start+end (start end) string-upcase! s maybe-start+end + (%string-map! char-upcase s start end))) + +(define (string-downcase s . maybe-start+end) + (let-string-start+end (start end) string-downcase s maybe-start+end + (%string-map char-downcase s start end))) + +(define (string-downcase! s . maybe-start+end) + (let-string-start+end (start end) string-downcase! s maybe-start+end + (%string-map! char-downcase s start end))) + +(define (%string-titlecase! s start end) + (let lp ((i start)) + (cond ((string-index s char-cased? i end) => + (lambda (i) + (string-set! s i (char-titlecase (string-ref s i))) + (let ((i1 (+ i 1))) + (cond ((string-skip s char-cased? i1 end) => + (lambda (j) + (string-downcase! s i1 j) + (lp (+ j 1)))) + (else (string-downcase! s i1 end))))))))) + +(define (string-titlecase! s . maybe-start+end) + (let-string-start+end (start end) string-titlecase! s maybe-start+end + (%string-titlecase! s start end))) + +(define (string-titlecase s . maybe-start+end) + (let-string-start+end (start end) string-titlecase! s maybe-start+end + (let ((ans (substring s start end))) + (%string-titlecase! ans 0 (- end start)) + ans))) + + +;;; Cutting & pasting strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-take string nchars +;;; string-drop string nchars +;;; +;;; string-take-right string nchars +;;; string-drop-right string nchars +;;; +;;; string-pad string k [char start end] +;;; string-pad-right string k [char start end] +;;; +;;; string-trim string [char/char-set/pred start end] +;;; string-trim-right string [char/char-set/pred start end] +;;; string-trim-both string [char/char-set/pred start end] +;;; +;;; These trimmers invert the char-set meaning from MIT Scheme -- you +;;; say what you want to trim. + +(define (string-take s n) + (check-arg string? s string-take) + (check-arg (lambda (val) (and (integer? n) (exact? n) + (<= 0 n (string-length s)))) + n string-take) + (%substring/shared s 0 n)) + +(define (string-take-right s n) + (check-arg string? s string-take-right) + (let ((len (string-length s))) + (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) + n string-take-right) + (%substring/shared s (- len n) len))) + +(define (string-drop s n) + (check-arg string? s string-drop) + (let ((len (string-length s))) + (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) + n string-drop) + (%substring/shared s n len))) + +(define (string-drop-right s n) + (check-arg string? s string-drop-right) + (let ((len (string-length s))) + (check-arg (lambda (val) (and (integer? n) (exact? n) (<= 0 n len))) + n string-drop-right) + (%substring/shared s 0 (- len n)))) + + +(define (string-trim s . criterion+start+end) + (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) + (let-string-start+end (start end) string-trim s rest + (cond ((string-skip s criterion start end) => + (lambda (i) (%substring/shared s i end))) + (else ""))))) + +(define (string-trim-right s . criterion+start+end) + (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) + (let-string-start+end (start end) string-trim-right s rest + (cond ((string-skip-right s criterion start end) => + (lambda (i) (%substring/shared s 0 (+ 1 i)))) + (else ""))))) + +(define (string-trim-both s . criterion+start+end) + (let-optionals* criterion+start+end ((criterion char-set:whitespace) rest) + (let-string-start+end (start end) string-trim-both s rest + (cond ((string-skip s criterion start end) => + (lambda (i) + (%substring/shared s i (+ 1 (string-skip-right s criterion i end))))) + (else ""))))) + + +(define (string-pad-right s n . char+start+end) + (let-optionals* char+start+end ((char #\space (char? char)) rest) + (let-string-start+end (start end) string-pad-right s rest + (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n))) + n string-pad-right) + (let ((len (- end start))) + (if (<= n len) + (%substring/shared s start (+ start n)) + (let ((ans (make-string n char))) + (%string-copy! ans 0 s start end) + ans)))))) + +(define (string-pad s n . char+start+end) + (let-optionals* char+start+end ((char #\space (char? char)) rest) + (let-string-start+end (start end) string-pad s rest + (check-arg (lambda (n) (and (integer? n) (exact? n) (<= 0 n))) + n string-pad) + (let ((len (- end start))) + (if (<= n len) + (%substring/shared s (- end n) end) + (let ((ans (make-string n char))) + (%string-copy! ans (- n len) s start end) + ans)))))) + + + +;;; Filtering strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-delete char/char-set/pred string [start end] +;;; string-filter char/char-set/pred string [start end] +;;; +;;; If the criterion is a char or char-set, we scan the string twice with +;;; string-fold -- once to determine the length of the result string, +;;; and once to do the filtered copy. +;;; If the criterion is a predicate, we don't do this double-scan strategy, +;;; because the predicate might have side-effects or be very expensive to +;;; compute. So we preallocate a temp buffer pessimistically, and only do +;;; one scan over S. This is likely to be faster and more space-efficient +;;; than consing a list. + +(define (string-delete criterion s . maybe-start+end) + (let-string-start+end (start end) string-delete s maybe-start+end + (if (procedure? criterion) + (let* ((slen (- end start)) + (temp (make-string slen)) + (ans-len (string-fold (lambda (c i) + (if (criterion c) i + (begin (string-set! temp i c) + (+ i 1)))) + 0 s start end))) + (if (= ans-len slen) temp (substring temp 0 ans-len))) + + (let* ((cset (cond ((char-set? criterion) criterion) + ((char? criterion) (char-set criterion)) + (else (error "string-delete criterion not predicate, char or char-set" criterion)))) + (len (string-fold (lambda (c i) (if (char-set-contains? cset c) + i + (+ i 1))) + 0 s start end)) + (ans (make-string len))) + (string-fold (lambda (c i) (if (char-set-contains? cset c) + i + (begin (string-set! ans i c) + (+ i 1)))) + 0 s start end) + ans)))) + +(define (string-filter criterion s . maybe-start+end) + (let-string-start+end (start end) string-filter s maybe-start+end + (if (procedure? criterion) + (let* ((slen (- end start)) + (temp (make-string slen)) + (ans-len (string-fold (lambda (c i) + (if (criterion c) + (begin (string-set! temp i c) + (+ i 1)) + i)) + 0 s start end))) + (if (= ans-len slen) temp (substring temp 0 ans-len))) + + (let* ((cset (cond ((char-set? criterion) criterion) + ((char? criterion) (char-set criterion)) + (else (error "string-delete criterion not predicate, char or char-set" criterion)))) + + (len (string-fold (lambda (c i) (if (char-set-contains? cset c) + (+ i 1) + i)) + 0 s start end)) + (ans (make-string len))) + (string-fold (lambda (c i) (if (char-set-contains? cset c) + (begin (string-set! ans i c) + (+ i 1)) + i)) + 0 s start end) + ans)))) + + +;;; String search +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-index string char/char-set/pred [start end] +;;; string-index-right string char/char-set/pred [start end] +;;; string-skip string char/char-set/pred [start end] +;;; string-skip-right string char/char-set/pred [start end] +;;; string-count string char/char-set/pred [start end] +;;; There's a lot of replicated code here for efficiency. +;;; For example, the char/char-set/pred discrimination has +;;; been lifted above the inner loop of each proc. + +(define (string-index str criterion . maybe-start+end) + (let-string-start+end (start end) string-index str maybe-start+end + (cond ((char? criterion) + (let lp ((i start)) + (and (< i end) + (if (char=? criterion (string-ref str i)) i + (lp (+ i 1)))))) + ((char-set? criterion) + (let lp ((i start)) + (and (< i end) + (if (char-set-contains? criterion (string-ref str i)) i + (lp (+ i 1)))))) + ((procedure? criterion) + (let lp ((i start)) + (and (< i end) + (if (criterion (string-ref str i)) i + (lp (+ i 1)))))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-index criterion))))) + +(define (string-index-right str criterion . maybe-start+end) + (let-string-start+end (start end) string-index-right str maybe-start+end + (cond ((char? criterion) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char=? criterion (string-ref str i)) i + (lp (- i 1)))))) + ((char-set? criterion) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char-set-contains? criterion (string-ref str i)) i + (lp (- i 1)))))) + ((procedure? criterion) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (criterion (string-ref str i)) i + (lp (- i 1)))))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-index-right criterion))))) + +(define (string-skip str criterion . maybe-start+end) + (let-string-start+end (start end) string-skip str maybe-start+end + (cond ((char? criterion) + (let lp ((i start)) + (and (< i end) + (if (char=? criterion (string-ref str i)) + (lp (+ i 1)) + i)))) + ((char-set? criterion) + (let lp ((i start)) + (and (< i end) + (if (char-set-contains? criterion (string-ref str i)) + (lp (+ i 1)) + i)))) + ((procedure? criterion) + (let lp ((i start)) + (and (< i end) + (if (criterion (string-ref str i)) (lp (+ i 1)) + i)))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-skip criterion))))) + +(define (string-skip-right str criterion . maybe-start+end) + (let-string-start+end (start end) string-skip-right str maybe-start+end + (cond ((char? criterion) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char=? criterion (string-ref str i)) + (lp (- i 1)) + i)))) + ((char-set? criterion) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char-set-contains? criterion (string-ref str i)) + (lp (- i 1)) + i)))) + ((procedure? criterion) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (criterion (string-ref str i)) (lp (- i 1)) + i)))) + (else (error "CRITERION param is neither char-set or char." + string-skip-right criterion))))) + + +(define (string-count s criterion . maybe-start+end) + (let-string-start+end (start end) string-count s maybe-start+end + (cond ((char? criterion) + (do ((i start (+ i 1)) + (count 0 (if (char=? criterion (string-ref s i)) + (+ count 1) + count))) + ((>= i end) count))) + + ((char-set? criterion) + (do ((i start (+ i 1)) + (count 0 (if (char-set-contains? criterion (string-ref s i)) + (+ count 1) + count))) + ((>= i end) count))) + + ((procedure? criterion) + (do ((i start (+ i 1)) + (count 0 (if (criterion (string-ref s i)) (+ count 1) count))) + ((>= i end) count))) + + (else (error "CRITERION param is neither char-set or char." + string-count criterion))))) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-fill! string char [start end] +;;; +;;; string-copy! to tstart from [fstart fend] +;;; Guaranteed to work, even if s1 eq s2. + +(define (string-fill! s char . maybe-start+end) + (check-arg char? char string-fill!) + (let-string-start+end (start end) string-fill! s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (string-set! s i char)))) + +(define (string-copy! to tstart from . maybe-fstart+fend) + (let-string-start+end (fstart fend) string-copy! from maybe-fstart+fend + (check-arg integer? tstart string-copy!) + (check-substring-spec string-copy! to tstart (+ tstart (- fend fstart))) + (%string-copy! to tstart from fstart fend))) + +;;; Library-internal routine +(define (%string-copy! to tstart from fstart fend) + (if (> fstart tstart) + (do ((i fstart (+ i 1)) + (j tstart (+ j 1))) + ((>= i fend)) + (string-set! to j (string-ref from i))) + + (do ((i (- fend 1) (- i 1)) + (j (+ -1 tstart (- fend fstart)) (- j 1))) + ((< i fstart)) + (string-set! to j (string-ref from i))))) + + + +;;; Returns starting-position in STRING or #f if not true. +;;; This implementation is slow & simple. It is useful as a "spec" or for +;;; comparison testing with fancier implementations. +;;; See below for fast KMP version. + +;(define (string-contains string substring . maybe-starts+ends) +; (let-string-start+end2 (start1 end1 start2 end2) +; string-contains string substring maybe-starts+ends +; (let* ((len (- end2 start2)) +; (i-bound (- end1 len))) +; (let lp ((i start1)) +; (and (< i i-bound) +; (if (string= string substring i (+ i len) start2 end2) +; i +; (lp (+ i 1)))))))) + + +;;; Searching for an occurrence of a substring +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (string-contains text pattern . maybe-starts+ends) + (let-string-start+end2 (t-start t-end p-start p-end) + string-contains text pattern maybe-starts+ends + (%kmp-search pattern text char=? p-start p-end t-start t-end))) + +(define (string-contains-ci text pattern . maybe-starts+ends) + (let-string-start+end2 (t-start t-end p-start p-end) + string-contains-ci text pattern maybe-starts+ends + (%kmp-search pattern text char-ci=? p-start p-end t-start t-end))) + + +;;; Knuth-Morris-Pratt string searching +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; See +;;; "Fast pattern matching in strings" +;;; SIAM J. Computing 6(2):323-350 1977 +;;; D. E. Knuth, J. H. Morris and V. R. Pratt +;;; also described in +;;; "Pattern matching in strings" +;;; Alfred V. Aho +;;; Formal Language Theory - Perspectives and Open Problems +;;; Ronald V. Brook (editor) +;;; This algorithm is O(m + n) where m and n are the +;;; lengths of the pattern and string respectively + +;;; KMP search source[start,end) for PATTERN. Return starting index of +;;; leftmost match or #f. + +(define (%kmp-search pattern text c= p-start p-end t-start t-end) + (let ((plen (- p-end p-start)) + (rv (make-kmp-restart-vector pattern c= p-start p-end))) + + ;; The search loop. TJ & PJ are redundant state. + (let lp ((ti t-start) (pi 0) + (tj (- t-end t-start)) ; (- tlen ti) -- how many chars left. + (pj plen)) ; (- plen pi) -- how many chars left. + + (if (= pi plen) (- ti plen) ; Win. + + (and (<= pj tj) ; Lose. + + (if (c= (string-ref text ti) ; Search. + (string-ref pattern (+ p-start pi))) + (lp (+ 1 ti) (+ 1 pi) (- tj 1) (- pj 1)) ; Advance. + + (let ((pi (vector-ref rv pi))) ; Retreat. + (if (= pi -1) + (lp (+ ti 1) 0 (- tj 1) plen) ; Punt. + (lp ti pi tj (- plen pi)))))))))) + +;;; (make-kmp-restart-vector pattern [c= start end]) -> integer-vector +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Compute the KMP restart vector RV for string PATTERN. If +;;; we have matched chars 0..i-1 of PATTERN against a search string S, and +;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to +;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to +;;; S[k+1] and PATTERN[0] -- no possible match of PAT[0..i] contains S[k]. +;;; +;;; In other words, if you have matched the first i chars of PATTERN, but +;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest +;;; prefix of PATTERN is that you have matched. +;;; +;;; - C= (default CHAR=?) is used to compare characters for equality. +;;; Pass in CHAR-CI=? for case-folded string search. +;;; +;;; - START & END restrict the pattern to the indicated substring; the +;;; returned vector will be of length END - START. The numbers stored +;;; in the vector will be values in the range [0,END-START) -- that is, +;;; they are valid indices into the restart vector; you have to add START +;;; to them to use them as indices into PATTERN. +;;; +;;; I've split this out as a separate function in case other constant-string +;;; searchers might want to use it. +;;; +;;; E.g.: +;;; a b d a b x +;;; #(-1 0 0 -1 1 2) + +(define (make-kmp-restart-vector pattern . maybe-c=+start+end) + (let-optionals* maybe-c=+start+end + ((c= char=? (procedure? c=)) + ((start end) (lambda (args) + (string-parse-start+end make-kmp-restart-vector + pattern args)))) + (let* ((rvlen (- end start)) + (rv (make-vector rvlen -1))) + (if (> rvlen 0) + (let ((rvlen-1 (- rvlen 1)) + (c0 (string-ref pattern start))) + + ;; Here's the main loop. We have set rv[0] ... rv[i]. + ;; K = I + START -- it is the corresponding index into PATTERN. + (let lp1 ((i 0) (j -1) (k start)) + (if (< i rvlen-1) + + (let ((ck (string-ref pattern k))) + ;; lp2 invariant: + ;; pat[(k-j) .. k-1] matches pat[start .. start+j-1] + ;; or j = -1. + (let lp2 ((j j)) + + (cond ((= j -1) + (let ((i1 (+ i 1))) + (vector-set! rv i1 (if (c= ck c0) -1 0)) + (lp1 i1 0 (+ k 1)))) + + ;; pat[(k-j) .. k] matches pat[start..start+j]. + ((c= ck (string-ref pattern (+ j start))) + (let* ((i1 (+ 1 i)) + (j1 (+ 1 j))) + (vector-set! rv i1 j1) + (lp1 i1 j1 (+ k 1)))) + + (else (lp2 (vector-ref rv j)))))))))) + rv))) + + +;;; We've matched I chars from PAT. C is the next char from the search string. +;;; Return the new I after handling C. +;;; +;;; The pattern is (VECTOR-LENGTH RV) chars long, beginning at index PAT-START +;;; in PAT (PAT-START is usually 0). The I chars of the pattern we've matched +;;; are +;;; PAT[PAT-START .. PAT-START + I]. +;;; +;;; It's *not* an oversight that there is no friendly error checking or +;;; defaulting of arguments. This is a low-level, inner-loop procedure +;;; that we want integrated/inlined into the point of call. + +(define (kmp-step pat rv c i c= p-start) + (let lp ((i i)) + (if (c= c (string-ref pat (+ i p-start))) ; Match => + (+ i 1) ; Done. + (let ((i (vector-ref rv i))) ; Back up in PAT. + (if (= i -1) 0 ; Can't back up further. + (lp i)))))) ; Keep trying for match. + +;;; Zip through S[start,end), looking for a match of PAT. Assume we've +;;; already matched the first I chars of PAT when we commence at S[start]. +;;; - <0: If we find a match *ending* at index J, return -J. +;;; - >=0: If we get to the end of the S[start,end) span without finding +;;; a complete match, return the number of chars from PAT we'd matched +;;; when we ran off the end. +;;; +;;; This is useful for searching *across* buffers -- that is, when your +;;; input comes in chunks of text. We hand-integrate the KMP-STEP loop +;;; for speed. + +(define (string-kmp-partial-search pat rv s i . c=+p-start+s-start+s-end) + (check-arg vector? rv string-kmp-partial-search) + (let-optionals* c=+p-start+s-start+s-end + ((c= char=? (procedure? c=)) + (p-start 0 (and (integer? p-start) (exact? p-start) (<= 0 p-start))) + ((s-start s-end) (lambda (args) + (string-parse-start+end string-kmp-partial-search + s args)))) + (let ((patlen (vector-length rv))) + (check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i) (< i patlen))) + i string-kmp-partial-search) + + ;; Enough prelude. Here's the actual code. + (let lp ((si s-start) ; An index into S. + (vi i)) ; An index into RV. + (cond ((= vi patlen) (- si)) ; Win. + ((= si s-end) vi) ; Ran off the end. + (else ; Match s[si] & loop. + (let ((c (string-ref s si))) + (lp (+ si 1) + (let lp2 ((vi vi)) ; This is just KMP-STEP. + (if (c= c (string-ref pat (+ vi p-start))) + (+ vi 1) + (let ((vi (vector-ref rv vi))) + (if (= vi -1) 0 + (lp2 vi))))))))))))) + + +;;; Misc +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (string-null? s) +;;; (string-reverse s [start end]) +;;; (string-reverse! s [start end]) +;;; (reverse-list->string clist) +;;; (string->list s [start end]) + +(define (string-null? s) (zero? (string-length s))) + +(define (string-reverse s . maybe-start+end) + (let-string-start+end (start end) string-reverse s maybe-start+end + (let* ((len (- end start)) + (ans (make-string len))) + (do ((i start (+ i 1)) + (j (- len 1) (- j 1))) + ((< j 0)) + (string-set! ans j (string-ref s i))) + ans))) + +(define (string-reverse! s . maybe-start+end) + (let-string-start+end (start end) string-reverse! s maybe-start+end + (do ((i (- end 1) (- i 1)) + (j start (+ j 1))) + ((<= i j)) + (let ((ci (string-ref s i))) + (string-set! s i (string-ref s j)) + (string-set! s j ci))))) + + +(define (reverse-list->string clist) + (let* ((len (length clist)) + (s (make-string len))) + (do ((i (- len 1) (- i 1)) (clist clist (cdr clist))) + ((not (pair? clist))) + (string-set! s i (car clist))) + s)) + + +;(define (string->list s . maybe-start+end) +; (apply string-fold-right cons '() s maybe-start+end)) + +(define (string->list s . maybe-start+end) + (let-string-start+end (start end) string->list s maybe-start+end + (do ((i (- end 1) (- i 1)) + (ans '() (cons (string-ref s i) ans))) + ((< i start) ans)))) + +;;; Defined by R5RS, so commented out here. +;(define (list->string lis) (string-unfold null? car cdr lis)) + + +;;; string-concatenate string-list -> string +;;; string-concatenate/shared string-list -> string +;;; string-append/shared s ... -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; STRING-APPEND/SHARED has license to return a string that shares storage +;;; with any of its arguments. In particular, if there is only one non-empty +;;; string amongst its parameters, it is permitted to return that string as +;;; its result. STRING-APPEND, by contrast, always allocates new storage. +;;; +;;; STRING-CONCATENATE & STRING-CONCATENATE/SHARED are passed a list of +;;; strings, which they concatenate into a result string. STRING-CONCATENATE +;;; always allocates a fresh string; STRING-CONCATENATE/SHARED may (or may +;;; not) return a result that shares storage with any of its arguments. In +;;; particular, if it is applied to a singleton list, it is permitted to +;;; return the car of that list as its value. + +(define (string-append/shared . strings) (string-concatenate/shared strings)) + +(define (string-concatenate/shared strings) + (let lp ((strings strings) (nchars 0) (first #f)) + (cond ((pair? strings) ; Scan the args, add up total + (let* ((string (car strings)) ; length, remember 1st + (tail (cdr strings)) ; non-empty string. + (slen (string-length string))) + (if (zero? slen) + (lp tail nchars first) + (lp tail (+ nchars slen) (or first strings))))) + + ((zero? nchars) "") + + ;; Just one non-empty string! Return it. + ((= nchars (string-length (car first))) (car first)) + + (else (let ((ans (make-string nchars))) + (let lp ((strings first) (i 0)) + (if (pair? strings) + (let* ((s (car strings)) + (slen (string-length s))) + (%string-copy! ans i s 0 slen) + (lp (cdr strings) (+ i slen))))) + ans))))) + + +; Alas, Scheme 48's APPLY blows up if you have many, many arguments. +;(define (string-concatenate strings) (apply string-append strings)) + +;;; Here it is written out. I avoid using REDUCE to add up string lengths +;;; to avoid non-R5RS dependencies. +(define (string-concatenate strings) + (let* ((total (do ((strings strings (cdr strings)) + (i 0 (+ i (string-length (car strings))))) + ((not (pair? strings)) i))) + (ans (make-string total))) + (let lp ((i 0) (strings strings)) + (if (pair? strings) + (let* ((s (car strings)) + (slen (string-length s))) + (%string-copy! ans i s 0 slen) + (lp (+ i slen) (cdr strings))))) + ans)) + + +;;; Defined by R5RS, so commented out here. +;(define (string-append . strings) (string-concatenate strings)) + +;;; string-concatenate-reverse string-list [final-string end] -> string +;;; string-concatenate-reverse/shared string-list [final-string end] -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Return +;;; (string-concatenate +;;; (reverse +;;; (cons (substring final-string 0 end) string-list))) + +(define (string-concatenate-reverse string-list . maybe-final+end) + (let-optionals* maybe-final+end ((final "" (string? final)) + (end (string-length final) + (and (integer? end) + (exact? end) + (<= 0 end (string-length final))))) + (let ((len (let lp ((sum 0) (lis string-list)) + (if (pair? lis) + (lp (+ sum (string-length (car lis))) (cdr lis)) + sum)))) + + (%finish-string-concatenate-reverse len string-list final end)))) + +(define (string-concatenate-reverse/shared string-list . maybe-final+end) + (let-optionals* maybe-final+end ((final "" (string? final)) + (end (string-length final) + (and (integer? end) + (exact? end) + (<= 0 end (string-length final))))) + ;; Add up the lengths of all the strings in STRING-LIST; also get a + ;; pointer NZLIST into STRING-LIST showing where the first non-zero-length + ;; string starts. + (let lp ((len 0) (nzlist #f) (lis string-list)) + (if (pair? lis) + (let ((slen (string-length (car string-list)))) + (lp (+ len slen) + (if (or nzlist (zero? slen)) nzlist lis) + (cdr lis))) + + (cond ((zero? len) (substring/shared final 0 end)) + + ;; LEN > 0, so NZLIST is non-empty. + + ((and (zero? end) (= len (string-length (car nzlist)))) + (car nzlist)) + + (else (%finish-string-concatenate-reverse len nzlist final end))))))) + +(define (%finish-string-concatenate-reverse len string-list final end) + (let ((ans (make-string (+ end len)))) + (%string-copy! ans len final 0 end) + (let lp ((i len) (lis string-list)) + (if (pair? lis) + (let* ((s (car lis)) + (lis (cdr lis)) + (slen (string-length s)) + (i (- i slen))) + (%string-copy! ans i s 0 slen) + (lp i lis)))) + ans)) + + + + +;;; string-replace s1 s2 start1 end1 [start2 end2] -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Replace S1[START1,END1) with S2[START2,END2). + +(define (string-replace s1 s2 start1 end1 . maybe-start+end) + (check-substring-spec string-replace s1 start1 end1) + (let-string-start+end (start2 end2) string-replace s2 maybe-start+end + (let* ((slen1 (string-length s1)) + (sublen2 (- end2 start2)) + (alen (+ (- slen1 (- end1 start1)) sublen2)) + (ans (make-string alen))) + (%string-copy! ans 0 s1 0 start1) + (%string-copy! ans start1 s2 start2 end2) + (%string-copy! ans (+ start1 sublen2) s1 end1 slen1) + ans))) + + +;;; string-tokenize s [token-set start end] -> list +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Break S up into a list of token strings, where a token is a maximal +;;; non-empty contiguous sequence of chars belonging to TOKEN-SET. +;;; (string-tokenize "hello, world") => ("hello," "world") + +(define (string-tokenize s . token-chars+start+end) + (let-optionals* token-chars+start+end + ((token-chars char-set:graphic (char-set? token-chars)) rest) + (let-string-start+end (start end) string-tokenize s rest + (let lp ((i end) (ans '())) + (cond ((and (< start i) (string-index-right s token-chars start i)) => + (lambda (tend-1) + (let ((tend (+ 1 tend-1))) + (cond ((string-skip-right s token-chars start tend-1) => + (lambda (tstart-1) + (lp tstart-1 + (cons (substring s (+ 1 tstart-1) tend) + ans)))) + (else (cons (substring s start tend) ans)))))) + (else ans)))))) + + +;;; xsubstring s from [to start end] -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; S is a string; START and END are optional arguments that demarcate +;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole +;;; string). Replicate this substring up and down index space, in both the +;; positive and negative directions. For example, if S = "abcdefg", START=3, +;;; and END=6, then we have the conceptual bidirectionally-infinite string +;;; ... d e f d e f d e f d e f d e f d e f d e f ... +;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ... +;;; XSUBSTRING returns the substring of this string beginning at index FROM, +;;; and ending at TO (which defaults to FROM+(END-START)). +;;; +;;; You can use XSUBSTRING in many ways: +;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab" +;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd" +;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca" +;;; +;;; Note that +;;; - The FROM/TO indices give a half-open range -- the characters from +;;; index FROM up to, but not including index TO. +;;; - The FROM/TO indices are not in terms of the index space for string S. +;;; They are in terms of the replicated index space of the substring +;;; defined by S, START, and END. +;;; +;;; It is an error if START=END -- although this is allowed by special +;;; dispensation when FROM=TO. + +(define (xsubstring s from . maybe-to+start+end) + (check-arg (lambda (val) (and (integer? val) (exact? val))) + from xsubstring) + (receive (to start end) + (if (pair? maybe-to+start+end) + (let-string-start+end (start end) xsubstring s (cdr maybe-to+start+end) + (let ((to (car maybe-to+start+end))) + (check-arg (lambda (val) (and (integer? val) + (exact? val) + (<= from val))) + to xsubstring) + (values to start end))) + (let ((slen (string-length (check-arg string? s xsubstring)))) + (values (+ from slen) 0 slen))) + (let ((slen (- end start)) + (anslen (- to from))) + (cond ((zero? anslen) "") + ((zero? slen) (error "Cannot replicate empty (sub)string" + xsubstring s from to start end)) + + ((= 1 slen) ; Fast path for 1-char replication. + (make-string anslen (string-ref s start))) + + ;; Selected text falls entirely within one span. + ((= (floor (/ from slen)) (floor (/ to slen))) + (substring s (+ start (modulo from slen)) + (+ start (modulo to slen)))) + + ;; Selected text requires multiple spans. + (else (let ((ans (make-string anslen))) + (%multispan-repcopy! ans 0 s from to start end) + ans)))))) + + +;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Exactly the same as xsubstring, but the extracted text is written +;;; into the string TARGET starting at index TSTART. +;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy +;;; a string on top of itself. + +(define (string-xcopy! target tstart s sfrom . maybe-sto+start+end) + (check-arg (lambda (val) (and (integer? val) (exact? val))) + sfrom string-xcopy!) + (receive (sto start end) + (if (pair? maybe-sto+start+end) + (let-string-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end) + (let ((sto (car maybe-sto+start+end))) + (check-arg (lambda (val) (and (integer? val) (exact? val))) + sto string-xcopy!) + (values sto start end))) + (let ((slen (string-length s))) + (values (+ sfrom slen) 0 slen))) + + (let* ((tocopy (- sto sfrom)) + (tend (+ tstart tocopy)) + (slen (- end start))) + (check-substring-spec string-xcopy! target tstart tend) + (cond ((zero? tocopy)) + ((zero? slen) (error "Cannot replicate empty (sub)string" + string-xcopy! + target tstart s sfrom sto start end)) + + ((= 1 slen) ; Fast path for 1-char replication. + (string-fill! target (string-ref s start) tstart tend)) + + ;; Selected text falls entirely within one span. + ((= (floor (/ sfrom slen)) (floor (/ sto slen))) + (%string-copy! target tstart s + (+ start (modulo sfrom slen)) + (+ start (modulo sto slen)))) + + ;; Multi-span copy. + (else (%multispan-repcopy! target tstart s sfrom sto start end)))))) + +;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY! +;;; Internal -- not exported, no careful arg checking. +(define (%multispan-repcopy! target tstart s sfrom sto start end) + (let* ((slen (- end start)) + (i0 (+ start (modulo sfrom slen))) + (total-chars (- sto sfrom))) + + ;; Copy the partial span @ the beginning + (%string-copy! target tstart s i0 end) + + (let* ((ncopied (- end i0)) ; We've copied this many. + (nleft (- total-chars ncopied)) ; # chars left to copy. + (nspans (quotient nleft slen))) ; # whole spans to copy + + ;; Copy the whole spans in the middle. + (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index. + (nspans nspans (- nspans 1))) ; # spans to copy + ((zero? nspans) + ;; Copy the partial-span @ the end & we're done. + (%string-copy! target i s start (+ start (- total-chars (- i tstart))))) + + (%string-copy! target i s start end))))); Copy a whole span. + + + +;;; (string-join string-list [delimiter grammar]) => string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Paste strings together using the delimiter string. +;;; +;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" +;;; +;;; DELIMITER defaults to a single space " " +;;; GRAMMAR is one of the symbols {prefix, infix, strict-infix, suffix} +;;; and defaults to 'infix. +;;; +;;; I could rewrite this more efficiently -- precompute the length of the +;;; answer string, then allocate & fill it in iteratively. Using +;;; STRING-CONCATENATE is less efficient. + +(define (string-join strings . delim+grammar) + (let-optionals* delim+grammar ((delim " " (string? delim)) + (grammar 'infix)) + (let ((buildit (lambda (lis final) + (let recur ((lis lis)) + (if (pair? lis) + (cons delim (cons (car lis) (recur (cdr lis)))) + final))))) + + (cond ((pair? strings) + (string-concatenate + (case grammar + + ((infix strict-infix) + (cons (car strings) (buildit (cdr strings) '()))) + + ((prefix) (buildit strings '())) + + ((suffix) + (cons (car strings) (buildit (cdr strings) (list delim)))) + + (else (error "Illegal join grammar" + grammar string-join))))) + + ((not (null? strings)) + (error "STRINGS parameter not list." strings string-join)) + + ;; STRINGS is () + + ((eq? grammar 'strict-infix) + (error "Empty list cannot be joined with STRICT-INFIX grammar." + string-join)) + + (else ""))))) ; Special-cased for infix grammar. + + +;;; Porting & performance-tuning notes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; See the section at the beginning of this file on external dependencies. +;;; +;;; The biggest issue with respect to porting is the LET-OPTIONALS* macro. +;;; There are many, many optional arguments in this library; the complexity +;;; of parsing, defaulting & type-testing these parameters is handled with the +;;; aid of this macro. There are about 15 uses of LET-OPTIONALS*. You can +;;; rewrite the uses, port the hairy macro definition (which is implemented +;;; using a Clinger-Rees low-level explicit-renaming macro system), or port +;;; the simple, high-level definition, which is less efficient. +;;; +;;; There is a fair amount of argument checking. This is, strictly speaking, +;;; unnecessary -- the actual body of the procedures will blow up if, say, a +;;; START/END index is improper. However, the error message will not be as +;;; good as if the error were caught at the "higher level." Also, a very, very +;;; smart Scheme compiler may be able to exploit having the type checks done +;;; early, so that the actual body of the procedures can assume proper values. +;;; This isn't likely; this kind of compiler technology isn't common any +;;; longer. +;;; +;;; The overhead of optional-argument parsing is irritating. The optional +;;; arguments must be consed into a rest list on entry, and then parsed out. +;;; Function call should be a matter of a few register moves and a jump; it +;;; should not involve heap allocation! Your Scheme system may have a superior +;;; non-R5RS optional-argument system that can eliminate this overhead. If so, +;;; then this is a prime candidate for optimising these procedures, +;;; *especially* the many optional START/END index parameters. +;;; +;;; Note that optional arguments are also a barrier to procedure integration. +;;; If your Scheme system permits you to specify alternate entry points +;;; for a call when the number of optional arguments is known in a manner +;;; that enables inlining/integration, this can provide performance +;;; improvements. +;;; +;;; There is enough *explicit* error checking that *all* string-index +;;; operations should *never* produce a bounds error. Period. Feel like +;;; living dangerously? *Big* performance win to be had by replacing +;;; STRING-REF's and STRING-SET!'s with unsafe equivalents in the loops. +;;; Similarly, fixnum-specific operators can speed up the arithmetic done on +;;; the index values in the inner loops. The only arguments that are not +;;; completely error checked are +;;; - string lists (complete checking requires time proportional to the +;;; length of the list) +;;; - procedure arguments, such as char->char maps & predicates. +;;; There is no way to check the range & domain of procedures in Scheme. +;;; Procedures that take these parameters cannot fully check their +;;; arguments. But all other types to all other procedures are fully +;;; checked. +;;; +;;; This does open up the alternate possibility of simply *removing* these +;;; checks, and letting the safe primitives raise the errors. On a dumb +;;; Scheme system, this would provide speed (by eliminating the redundant +;;; error checks) at the cost of error-message clarity. +;;; +;;; See the comments preceding the hash function code for notes on tuning +;;; the default bound so that the code never overflows your implementation's +;;; fixnum size into bignum calculation. +;;; +;;; In an interpreted Scheme, some of these procedures, or the internal +;;; routines with % prefixes, are excellent candidates for being rewritten +;;; in C. Consider STRING-HASH, %STRING-COMPARE, the +;;; %STRING-{SUF,PRE}FIX-LENGTH routines, STRING-COPY!, STRING-INDEX & +;;; STRING-SKIP (char-set & char cases), SUBSTRING and SUBSTRING/SHARED, +;;; %KMP-SEARCH, and %MULTISPAN-REPCOPY!. +;;; +;;; It would also be nice to have the ability to mark some of these +;;; routines as candidates for inlining/integration. +;;; +;;; All the %-prefixed routines in this source code are written +;;; to be called internally to this library. They do *not* perform +;;; friendly error checks on the inputs; they assume everything is +;;; proper. They also do not take optional arguments. These two properties +;;; save calling overhead and enable procedure integration -- but they +;;; are not appropriate for exported routines. + + +;;; Copyright details +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The prefix/suffix and comparison routines in this code had (extremely +;;; distant) origins in MIT Scheme's string lib, and was substantially +;;; reworked by Olin Shivers (shivers@ai.mit.edu) 9/98. As such, it is +;;; covered by MIT Scheme's open source copyright. See below for details. +;;; +;;; The KMP string-search code was influenced by implementations written +;;; by Stephen Bevan, Brian Dehneyer and Will Fitzgerald. However, this +;;; version was written from scratch by myself. +;;; +;;; The remainder of this code was written from scratch by myself for scsh. +;;; The scsh copyright is a BSD-style open source copyright. See below for +;;; details. +;;; -Olin Shivers + +;;; MIT Scheme copyright terms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This material was developed by the Scheme project at the Massachusetts +;;; Institute of Technology, Department of Electrical Engineering and +;;; Computer Science. Permission to copy and modify this software, to +;;; redistribute either the original software or a modified version, and +;;; to use this software for any purpose is granted, subject to the +;;; following restrictions and understandings. +;;; +;;; 1. Any copy made of this software must include this copyright notice +;;; in full. +;;; +;;; 2. Users of this software agree to make their best efforts (a) to +;;; return to the MIT Scheme project any improvements or extensions that +;;; they make, so that these may be included in future releases; and (b) +;;; to inform MIT of noteworthy uses of this software. +;;; +;;; 3. All materials developed as a consequence of the use of this +;;; software shall duly acknowledge such use, in accordance with the usual +;;; standards of acknowledging credit in academic research. +;;; +;;; 4. MIT has made no warrantee or representation that the operation of +;;; this software will be error-free, and MIT is under no obligation to +;;; provide any services, by way of maintenance, update, or otherwise. +;;; +;;; 5. In conjunction with products arising from the use of this material, +;;; there shall be no use of the name of the Massachusetts Institute of +;;; Technology nor of any adaptation thereof in any advertising, +;;; promotional, or sales literature without prior written consent from +;;; MIT in each case. + +;;; Scsh copyright terms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/scheme/srfi/srfi-14.scm b/scheme/srfi/srfi-14.scm new file mode 100644 index 0000000..d217da4 --- /dev/null +++ b/scheme/srfi/srfi-14.scm @@ -0,0 +1,901 @@ +;;; SRFI-14 character-sets library -*- Scheme -*- +;;; +;;; - Ported from MIT Scheme runtime by Brian D. Carlstrom. +;;; - Massively rehacked & extended by Olin Shivers 6/98. +;;; - Massively redesigned and rehacked 5/2000 during SRFI process. +;;; At this point, the code bears the following relationship to the +;;; MIT Scheme code: "This is my grandfather's axe. My father replaced +;;; the head, and I have replaced the handle." Nonetheless, we preserve +;;; the MIT Scheme copyright: +;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology +;;; The MIT Scheme license is a "free software" license. See the end of +;;; this file for the tedious details. + +;;; Exports: +;;; char-set? char-set= char-set<= +;;; char-set-hash +;;; char-set-cursor char-set-ref char-set-cursor-next end-of-char-set? +;;; char-set-fold char-set-unfold char-set-unfold! +;;; char-set-for-each char-set-map +;;; char-set-copy char-set +;;; +;;; list->char-set string->char-set +;;; list->char-set! string->char-set! +;;; +;;; filterchar-set ucs-range->char-set ->char-set +;;; filterchar-set! ucs-range->char-set! +;;; +;;; char-set->list char-set->string +;;; +;;; char-set-size char-set-count char-set-contains? +;;; char-set-every char-set-any +;;; +;;; char-set-adjoin char-set-delete +;;; char-set-adjoin! char-set-delete! +;;; + +;;; char-set-complement char-set-union char-set-intersection +;;; char-set-complement! char-set-union! char-set-intersection! +;;; +;;; char-set-difference char-set-xor char-set-diff+intersection +;;; char-set-difference! char-set-xor! char-set-diff+intersection! +;;; +;;; char-set:lower-case char-set:upper-case char-set:title-case +;;; char-set:letter char-set:digit char-set:letter+digit +;;; char-set:graphic char-set:printing char-set:whitespace +;;; char-set:iso-control char-set:punctuation char-set:symbol +;;; char-set:hex-digit char-set:blank char-set:ascii +;;; char-set:empty char-set:full + +;;; Imports +;;; This code has the following non-R5RS dependencies: +;;; - ERROR +;;; - %LATIN1->CHAR %CHAR->LATIN1 +;;; - LET-OPTIONALS* and :OPTIONAL macros for parsing, checking & defaulting +;;; optional arguments from rest lists. +;;; - BITWISE-AND for CHAR-SET-HASH +;;; - The SRFI-19 DEFINE-RECORD-TYPE record macro +;;; - A simple CHECK-ARG procedure: +;;; (lambda (pred val caller) (if (not (pred val)) (error val caller))) + +;;; This is simple code, not great code. Char sets are represented as 256-char +;;; strings. If char I is ASCII/Latin-1 0, then it isn't in the set; if char I +;;; is ASCII/Latin-1 1, then it is in the set. +;;; - Should be rewritten to use bit strings or byte vecs. +;;; - Is Latin-1 specific. Would certainly have to be rewritten for Unicode. + +;;; See the end of the file for porting and performance-tuning notes. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Start S48 additions + +(define (check-arg pred val caller) + (if (not (pred val)) + (error val caller)) + val) + +(define-syntax :optional + (syntax-rules () + ((:optional rest default-exp) + (let ((maybe-arg rest)) + (if (pair? maybe-arg) + (if (null? (cdr maybe-arg)) (car maybe-arg) + (error "too many optional arguments" maybe-arg)) + default-exp))) + + ((:optional rest default-exp arg-test) + (let ((maybe-arg rest)) + (if (pair? maybe-arg) + (if (null? (cdr maybe-arg)) + (let ((val (car maybe-arg))) + (if (arg-test val) val + (error "Optional argument failed test" + 'arg-test val))) + (error "too many optional arguments" maybe-arg)) + default-exp))))) + +(define-syntax let-optionals* + (syntax-rules () + ((let-optionals* arg (opt-clause ...) body ...) + (let ((rest arg)) + (%let-optionals* rest (opt-clause ...) body ...))))) + +(define-syntax %let-optionals* + (syntax-rules () + ((%let-optionals* arg (((var ...) xparser) opt-clause ...) body ...) + (call-with-values (lambda () (xparser arg)) + (lambda (rest var ...) + (%let-optionals* rest (opt-clause ...) body ...)))) + + ((%let-optionals* arg ((var default) opt-clause ...) body ...) + (call-with-values (lambda () (if (null? arg) (values default '()) + (values (car arg) (cdr arg)))) + (lambda (var rest) + (%let-optionals* rest (opt-clause ...) body ...)))) + + ((%let-optionals* arg ((var default test) opt-clause ...) body ...) + (call-with-values (lambda () + (if (null? arg) (values default '()) + (let ((var (car arg))) + (if test (values var (cdr arg)) + (error "arg failed LET-OPT test" var))))) + (lambda (var rest) + (%let-optionals* rest (opt-clause ...) body ...)))) + + ((%let-optionals* arg ((var default test supplied?) opt-clause ...) body ...) + (call-with-values (lambda () + (if (null? arg) (values default #f '()) + (let ((var (car arg))) + (if test (values var #t (cdr arg)) + (error "arg failed LET-OPT test" var))))) + (lambda (var supplied? rest) + (%let-optionals* rest (opt-clause ...) body ...)))) + + ((%let-optionals* arg (rest) body ...) + (let ((rest arg)) body ...)) + + ((%let-optionals* arg () body ...) + (if (null? arg) (begin body ...) + (error "Too many arguments in let-opt" arg))))) + +; End S48 additions + +(define-record-type :char-set + (make-char-set s) + char-set? + (s char-set:s)) + +(define (%string-copy s) (substring s 0 (string-length s))) + +;;; Parse, type-check & default a final optional BASE-CS parameter from +;;; a rest argument. Return a *fresh copy* of the underlying string. +;;; The default is the empty set. The PROC argument is to help us +;;; generate informative error exceptions. + +(define (%default-base maybe-base proc) + (if (pair? maybe-base) + (let ((bcs (car maybe-base)) + (tail (cdr maybe-base))) + (if (null? tail) + (if (char-set? bcs) (%string-copy (char-set:s bcs)) + (error "BASE-CS parameter not a char-set" proc bcs)) + (error "Expected final base char set -- too many parameters" + proc maybe-base))) + (make-string 256 (%latin1->char 0)))) + +;;; If CS is really a char-set, do CHAR-SET:S, otw report an error msg on +;;; behalf of our caller, PROC. This procedure exists basically to provide +;;; explicit error-checking & reporting. + +(define (%char-set:s/check cs proc) + (let lp ((cs cs)) + (if (char-set? cs) (char-set:s cs) + (lp (error "Not a char-set" cs proc))))) + + + +;;; These internal functions hide a lot of the dependency on the +;;; underlying string representation of char sets. They should be +;;; inlined if possible. + +(define (si=0? s i) (zero? (%char->latin1 (string-ref s i)))) +(define (si=1? s i) (not (si=0? s i))) +(define c0 (%latin1->char 0)) +(define c1 (%latin1->char 1)) +(define (si s i) (%char->latin1 (string-ref s i))) +(define (%set0! s i) (string-set! s i c0)) +(define (%set1! s i) (string-set! s i c1)) + +;;; These do various "s[i] := s[i] op val" operations -- see +;;; %CHAR-SET-ALGEBRA. They are used to implement the various +;;; set-algebra procedures. +(define (setv! s i v) (string-set! s i (%latin1->char v))) ; SET to a Value. +(define (%not! s i v) (setv! s i (- 1 v))) +(define (%and! s i v) (if (zero? v) (%set0! s i))) +(define (%or! s i v) (if (not (zero? v)) (%set1! s i))) +(define (%minus! s i v) (if (not (zero? v)) (%set0! s i))) +(define (%xor! s i v) (if (not (zero? v)) (setv! s i (- 1 (si s i))))) + + +(define (char-set-copy cs) + (make-char-set (%string-copy (%char-set:s/check cs char-set-copy)))) + +(define (char-set= . rest) + (or (null? rest) + (let* ((cs1 (car rest)) + (rest (cdr rest)) + (s1 (%char-set:s/check cs1 char-set=))) + (let lp ((rest rest)) + (or (not (pair? rest)) + (and (string=? s1 (%char-set:s/check (car rest) char-set=)) + (lp (cdr rest)))))))) + +(define (char-set<= . rest) + (or (null? rest) + (let ((cs1 (car rest)) + (rest (cdr rest))) + (let lp ((s1 (%char-set:s/check cs1 char-set<=)) (rest rest)) + (or (not (pair? rest)) + (let ((s2 (%char-set:s/check (car rest) char-set<=)) + (rest (cdr rest))) + (if (eq? s1 s2) (lp s2 rest) ; Fast path + (let lp2 ((i 255)) ; Real test + (if (< i 0) (lp s2 rest) + (and (<= (si s1 i) (si s2 i)) + (lp2 (- i 1)))))))))))) + +;;; Hash +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND, with sleaze thrown in +;;; to keep the intermediate values small. (We do the calculation with just +;;; enough bits to represent BOUND, masking off high bits at each step in +;;; calculation. If this screws up any important properties of the hash +;;; function I'd like to hear about it. -Olin) +;;; +;;; If you keep BOUND small enough, the intermediate calculations will +;;; always be fixnums. How small is dependent on the underlying Scheme system; +;;; we use a default BOUND of 2^22 = 4194304, which should hack it in +;;; Schemes that give you at least 29 signed bits for fixnums. The core +;;; calculation that you don't want to overflow is, worst case, +;;; (+ 65535 (* 37 (- bound 1))) +;;; where 65535 is the max character code. Choose the default BOUND to be the +;;; biggest power of two that won't cause this expression to fixnum overflow, +;;; and everything will be copacetic. + +(define (char-set-hash cs . maybe-bound) + (let* ((bound (:optional maybe-bound 4194304 (lambda (n) (and (integer? n) + (exact? n) + (<= 0 n))))) + (bound (if (zero? bound) 4194304 bound)) ; 0 means default. + (s (%char-set:s/check cs char-set-hash)) + ;; Compute a 111...1 mask that will cover BOUND-1: + (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh? + (if (>= i bound) (- i 1) (lp (+ i i)))))) + + (let lp ((i 255) (ans 0)) + (if (< i 0) (modulo ans bound) + (lp (- i 1) + (if (si=0? s i) ans + (bitwise-and mask (+ (* 37 ans) i)))))))) + + +(define (char-set-contains? cs char) + (si=1? (%char-set:s/check cs char-set-contains?) + (%char->latin1 (check-arg char? char char-set-contains?)))) + +(define (char-set-size cs) + (let ((s (%char-set:s/check cs char-set-size))) + (let lp ((i 255) (size 0)) + (if (< i 0) size + (lp (- i 1) (+ size (si s i))))))) + +(define (char-set-count pred cset) + (check-arg procedure? pred char-set-count) + (let ((s (%char-set:s/check cset char-set-count))) + (let lp ((i 255) (count 0)) + (if (< i 0) count + (lp (- i 1) + (if (and (si=1? s i) (pred (%latin1->char i))) + (+ count 1) + count)))))) + + +;;; -- Adjoin & delete + +(define (%set-char-set set proc cs chars) + (let ((s (%string-copy (%char-set:s/check cs proc)))) + (for-each (lambda (c) (set s (%char->latin1 c))) + chars) + (make-char-set s))) + +(define (%set-char-set! set proc cs chars) + (let ((s (%char-set:s/check cs proc))) + (for-each (lambda (c) (set s (%char->latin1 c))) + chars)) + cs) + +(define (char-set-adjoin cs . chars) + (%set-char-set %set1! char-set-adjoin cs chars)) +(define (char-set-adjoin! cs . chars) + (%set-char-set! %set1! char-set-adjoin! cs chars)) +(define (char-set-delete cs . chars) + (%set-char-set %set0! char-set-delete cs chars)) +(define (char-set-delete! cs . chars) + (%set-char-set! %set0! char-set-delete! cs chars)) + + +;;; Cursors +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Simple implementation. A cursors is an integer index into the +;;; mark vector, and -1 for the end-of-char-set cursor. +;;; +;;; If we represented char sets as a bit set, we could do the following +;;; trick to pick the lowest bit out of the set: +;;; (count-bits (xor (- cset 1) cset)) +;;; (But first mask out the bits already scanned by the cursor first.) + +(define (char-set-cursor cset) + (%char-set-cursor-next cset 256 char-set-cursor)) + +(define (end-of-char-set? cursor) (< cursor 0)) + +(define (char-set-ref cset cursor) (%latin1->char cursor)) + +(define (char-set-cursor-next cset cursor) + (check-arg (lambda (i) (and (integer? i) (exact? i) (<= 0 i 255))) cursor + char-set-cursor-next) + (%char-set-cursor-next cset cursor char-set-cursor-next)) + +(define (%char-set-cursor-next cset cursor proc) ; Internal + (let ((s (%char-set:s/check cset proc))) + (let lp ((cur cursor)) + (let ((cur (- cur 1))) + (if (or (< cur 0) (si=1? s cur)) cur + (lp cur)))))) + + +;;; -- for-each map fold unfold every any + +(define (char-set-for-each proc cs) + (check-arg procedure? proc char-set-for-each) + (let ((s (%char-set:s/check cs char-set-for-each))) + (let lp ((i 255)) + (cond ((>= i 0) + (if (si=1? s i) (proc (%latin1->char i))) + (lp (- i 1))))))) + +(define (char-set-map proc cs) + (check-arg procedure? proc char-set-map) + (let ((s (%char-set:s/check cs char-set-map)) + (ans (make-string 256 c0))) + (let lp ((i 255)) + (cond ((>= i 0) + (if (si=1? s i) + (%set1! ans (%char->latin1 (proc (%latin1->char i))))) + (lp (- i 1))))) + (make-char-set ans))) + +(define (char-set-fold kons knil cs) + (check-arg procedure? kons char-set-fold) + (let ((s (%char-set:s/check cs char-set-fold))) + (let lp ((i 255) (ans knil)) + (if (< i 0) ans + (lp (- i 1) + (if (si=0? s i) ans + (kons (%latin1->char i) ans))))))) + +(define (char-set-every pred cs) + (check-arg procedure? pred char-set-every) + (let ((s (%char-set:s/check cs char-set-every))) + (let lp ((i 255)) + (or (< i 0) + (and (or (si=0? s i) (pred (%latin1->char i))) + (lp (- i 1))))))) + +(define (char-set-any pred cs) + (check-arg procedure? pred char-set-any) + (let ((s (%char-set:s/check cs char-set-any))) + (let lp ((i 255)) + (and (>= i 0) + (or (and (si=1? s i) (pred (%latin1->char i))) + (lp (- i 1))))))) + + +(define (%char-set-unfold! proc p f g s seed) + (check-arg procedure? p proc) + (check-arg procedure? f proc) + (check-arg procedure? g proc) + (let lp ((seed seed)) + (cond ((not (p seed)) ; P says we are done. + (%set1! s (%char->latin1 (f seed))) ; Add (F SEED) to set. + (lp (g seed)))))) ; Loop on (G SEED). + +(define (char-set-unfold p f g seed . maybe-base) + (let ((bs (%default-base maybe-base char-set-unfold))) + (%char-set-unfold! char-set-unfold p f g bs seed) + (make-char-set bs))) + +(define (char-set-unfold! p f g seed base-cset) + (%char-set-unfold! char-set-unfold! p f g + (%char-set:s/check base-cset char-set-unfold!) + seed) + base-cset) + + + +;;; list <--> char-set + +(define (%list->char-set! chars s) + (for-each (lambda (char) (%set1! s (%char->latin1 char))) + chars)) + +(define (char-set . chars) + (let ((s (make-string 256 c0))) + (%list->char-set! chars s) + (make-char-set s))) + +(define (list->char-set chars . maybe-base) + (let ((bs (%default-base maybe-base list->char-set))) + (%list->char-set! chars bs) + (make-char-set bs))) + +(define (list->char-set! chars base-cs) + (%list->char-set! chars (%char-set:s/check base-cs list->char-set!)) + base-cs) + + +(define (char-set->list cs) + (let ((s (%char-set:s/check cs char-set->list))) + (let lp ((i 255) (ans '())) + (if (< i 0) ans + (lp (- i 1) + (if (si=0? s i) ans + (cons (%latin1->char i) ans))))))) + + + +;;; string <--> char-set + +(define (%string->char-set! str bs proc) + (check-arg string? str proc) + (do ((i (- (string-length str) 1) (- i 1))) + ((< i 0)) + (%set1! bs (%char->latin1 (string-ref str i))))) + +(define (string->char-set str . maybe-base) + (let ((bs (%default-base maybe-base string->char-set))) + (%string->char-set! str bs string->char-set) + (make-char-set bs))) + +(define (string->char-set! str base-cs) + (%string->char-set! str (%char-set:s/check base-cs string->char-set!) + string->char-set!) + base-cs) + + +(define (char-set->string cs) + (let* ((s (%char-set:s/check cs char-set->string)) + (ans (make-string (char-set-size cs)))) + (let lp ((i 255) (j 0)) + (if (< i 0) ans + (let ((j (if (si=0? s i) j + (begin (string-set! ans j (%latin1->char i)) + (+ j 1))))) + (lp (- i 1) j)))))) + + +;;; -- UCS-range -> char-set + +(define (%ucs-range->char-set! lower upper error? bs proc) + (check-arg (lambda (x) (and (integer? x) (exact? x) (<= 0 x))) lower proc) + (check-arg (lambda (x) (and (integer? x) (exact? x) (<= lower x))) upper proc) + + (if (and (< lower upper) (< 256 upper) error?) + (error "Requested UCS range contains unavailable characters -- this implementation only supports Latin-1" + proc lower upper)) + + (let lp ((i (- (min upper 256) 1))) + (cond ((<= lower i) (%set1! bs i) (lp (- i 1)))))) + +(define (ucs-range->char-set lower upper . rest) + (let-optionals* rest ((error? #f) rest) + (let ((bs (%default-base rest ucs-range->char-set))) + (%ucs-range->char-set! lower upper error? bs ucs-range->char-set) + (make-char-set bs)))) + +(define (ucs-range->char-set! lower upper error? base-cs) + (%ucs-range->char-set! lower upper error? + (%char-set:s/check base-cs ucs-range->char-set!) + ucs-range->char-set) + base-cs) + + +;;; -- predicate -> char-set + +(define (%char-set-filter! pred ds bs proc) + (check-arg procedure? pred proc) + (let lp ((i 255)) + (cond ((>= i 0) + (if (and (si=1? ds i) (pred (%latin1->char i))) + (%set1! bs i)) + (lp (- i 1)))))) + +(define (char-set-filter predicate domain . maybe-base) + (let ((bs (%default-base maybe-base char-set-filter))) + (%char-set-filter! predicate + (%char-set:s/check domain char-set-filter!) + bs + char-set-filter) + (make-char-set bs))) + +(define (char-set-filter! predicate domain base-cs) + (%char-set-filter! predicate + (%char-set:s/check domain char-set-filter!) + (%char-set:s/check base-cs char-set-filter!) + char-set-filter!) + base-cs) + + +;;; {string, char, char-set, char predicate} -> char-set + +(define (x->char-set x) + (cond ((char-set? x) x) + ((string? x) (string->char-set x)) + ((char? x) (char-set x)) + (else (error "->char-set: Not a charset, string or char." x)))) + + + +;;; Set algebra +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The exported ! procs are "linear update" -- allowed, but not required, to +;;; side-effect their first argument when computing their result. In other +;;; words, you must use them as if they were completely functional, just like +;;; their non-! counterparts, and you must additionally ensure that their +;;; first arguments are "dead" at the point of call. In return, we promise a +;;; more efficient result, plus allowing you to always assume char-sets are +;;; unchangeable values. + +;;; Apply P to each index and its char code in S: (P I VAL). +;;; Used by the set-algebra ops. + +(define (%string-iter p s) + (let lp ((i (- (string-length s) 1))) + (cond ((>= i 0) + (p i (%char->latin1 (string-ref s i))) + (lp (- i 1)))))) + +;;; String S represents some initial char-set. (OP s i val) does some +;;; kind of s[i] := s[i] op val update. Do +;;; S := S OP CSETi +;;; for all the char-sets in the list CSETS. The n-ary set-algebra ops +;;; all use this internal proc. + +(define (%char-set-algebra s csets op proc) + (for-each (lambda (cset) + (let ((s2 (%char-set:s/check cset proc))) + (let lp ((i 255)) + (cond ((>= i 0) + (op s i (si s2 i)) + (lp (- i 1))))))) + csets)) + + +;;; -- Complement + +(define (char-set-complement cs) + (let ((s (%char-set:s/check cs char-set-complement)) + (ans (make-string 256))) + (%string-iter (lambda (i v) (%not! ans i v)) s) + (make-char-set ans))) + +(define (char-set-complement! cset) + (let ((s (%char-set:s/check cset char-set-complement!))) + (%string-iter (lambda (i v) (%not! s i v)) s)) + cset) + + +;;; -- Union + +(define (char-set-union! cset1 . csets) + (%char-set-algebra (%char-set:s/check cset1 char-set-union!) + csets %or! char-set-union!) + cset1) + +(define (char-set-union . csets) + (if (pair? csets) + (let ((s (%string-copy (%char-set:s/check (car csets) char-set-union)))) + (%char-set-algebra s (cdr csets) %or! char-set-union) + (make-char-set s)) + (char-set-copy char-set:empty))) + + +;;; -- Intersection + +(define (char-set-intersection! cset1 . csets) + (%char-set-algebra (%char-set:s/check cset1 char-set-intersection!) + csets %and! char-set-intersection!) + cset1) + +(define (char-set-intersection . csets) + (if (pair? csets) + (let ((s (%string-copy (%char-set:s/check (car csets) char-set-intersection)))) + (%char-set-algebra s (cdr csets) %and! char-set-intersection) + (make-char-set s)) + (char-set-copy char-set:full))) + + +;;; -- Difference + +(define (char-set-difference! cset1 . csets) + (%char-set-algebra (%char-set:s/check cset1 char-set-difference!) + csets %minus! char-set-difference!) + cset1) + +(define (char-set-difference cs1 . csets) + (if (pair? csets) + (let ((s (%string-copy (%char-set:s/check cs1 char-set-difference)))) + (%char-set-algebra s csets %minus! char-set-difference) + (make-char-set s)) + (char-set-copy cs1))) + + +;;; -- Xor + +(define (char-set-xor! cset1 . csets) + (%char-set-algebra (%char-set:s/check cset1 char-set-xor!) + csets %xor! char-set-xor!) + cset1) + +(define (char-set-xor . csets) + (if (pair? csets) + (let ((s (%string-copy (%char-set:s/check (car csets) char-set-xor)))) + (%char-set-algebra s (cdr csets) %xor! char-set-xor) + (make-char-set s)) + (char-set-copy char-set:empty))) + + +;;; -- Difference & intersection + +(define (%char-set-diff+intersection! diff int csets proc) + (for-each (lambda (cs) + (%string-iter (lambda (i v) + (if (not (zero? v)) + (cond ((si=1? diff i) + (%set0! diff i) + (%set1! int i))))) + (%char-set:s/check cs proc))) + csets)) + +(define (char-set-diff+intersection! cs1 cs2 . csets) + (let ((s1 (%char-set:s/check cs1 char-set-diff+intersection!)) + (s2 (%char-set:s/check cs2 char-set-diff+intersection!))) + (%string-iter (lambda (i v) (if (zero? v) + (%set0! s2 i) + (if (si=1? s2 i) (%set0! s1 i)))) + s1) + (%char-set-diff+intersection! s1 s2 csets char-set-diff+intersection!)) + (values cs1 cs2)) + +(define (char-set-diff+intersection cs1 . csets) + (let ((diff (string-copy (%char-set:s/check cs1 char-set-diff+intersection))) + (int (make-string 256 c0))) + (%char-set-diff+intersection! diff int csets char-set-diff+intersection) + (values (make-char-set diff) (make-char-set int)))) + + +;;;; System character sets +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These definitions are for Latin-1. +;;; +;;; If your Scheme implementation allows you to mark the underlying strings +;;; as immutable, you should do so -- it would be very, very bad if a client's +;;; buggy code corrupted these constants. + +(define char-set:empty (char-set)) +(define char-set:full (char-set-complement char-set:empty)) + +(define char-set:lower-case + (let* ((a-z (ucs-range->char-set #x61 #x7B)) + (latin1 (ucs-range->char-set! #xdf #xf7 #t a-z)) + (latin2 (ucs-range->char-set! #xf8 #x100 #t latin1))) + (char-set-adjoin! latin2 (%latin1->char #xb5)))) + +(define char-set:upper-case + (let ((A-Z (ucs-range->char-set #x41 #x5B))) + ;; Add in the Latin-1 upper-case chars. + (ucs-range->char-set! #xd8 #xdf #t + (ucs-range->char-set! #xc0 #xd7 #t A-Z)))) + +(define char-set:title-case char-set:empty) + +(define char-set:letter + (let ((u/l (char-set-union char-set:upper-case char-set:lower-case))) + (char-set-adjoin! u/l + (%latin1->char #xaa) ; FEMININE ORDINAL INDICATOR + (%latin1->char #xba)))) ; MASCULINE ORDINAL INDICATOR + +(define char-set:digit (string->char-set "0123456789")) +(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF")) + +(define char-set:letter+digit + (char-set-union char-set:letter char-set:digit)) + +(define char-set:punctuation + (let ((ascii (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")) + (latin-1-chars (map %latin1->char '(#xA1 ; INVERTED EXCLAMATION MARK + #xAB ; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + #xAD ; SOFT HYPHEN + #xB7 ; MIDDLE DOT + #xBB ; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + #xBF)))) ; INVERTED QUESTION MARK + (list->char-set! latin-1-chars ascii))) + +(define char-set:symbol + (let ((ascii (string->char-set "$+<=>^`|~")) + (latin-1-chars (map %latin1->char '(#x00A2 ; CENT SIGN + #x00A3 ; POUND SIGN + #x00A4 ; CURRENCY SIGN + #x00A5 ; YEN SIGN + #x00A6 ; BROKEN BAR + #x00A7 ; SECTION SIGN + #x00A8 ; DIAERESIS + #x00A9 ; COPYRIGHT SIGN + #x00AC ; NOT SIGN + #x00AE ; REGISTERED SIGN + #x00AF ; MACRON + #x00B0 ; DEGREE SIGN + #x00B1 ; PLUS-MINUS SIGN + #x00B4 ; ACUTE ACCENT + #x00B6 ; PILCROW SIGN + #x00B8 ; CEDILLA + #x00D7 ; MULTIPLICATION SIGN + #x00F7)))) ; DIVISION SIGN + (list->char-set! latin-1-chars ascii))) + + +(define char-set:graphic + (char-set-union char-set:letter+digit char-set:punctuation char-set:symbol)) + +(define char-set:whitespace + (list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION + #x0A ; LINE FEED + #x0B ; VERTICAL TABULATION + #x0C ; FORM FEED + #x0D ; CARRIAGE RETURN + #x20 ; SPACE + #xA0)))) + +(define char-set:printing (char-set-union char-set:whitespace char-set:graphic)) ; NO-BREAK SPACE + +(define char-set:blank + (list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION + #x20 ; SPACE + #xA0)))) ; NO-BREAK SPACE + + +(define char-set:iso-control + (ucs-range->char-set! #x7F #xA0 #t (ucs-range->char-set 0 32))) + +(define char-set:ascii (ucs-range->char-set 0 128)) + +; Begin S48 additions + +(define (make-char-set-immutable! char-set) + (make-immutable! char-set) + (make-immutable! (char-set:s char-set))) + +(make-char-set-immutable! char-set:empty) +(make-char-set-immutable! char-set:full) +(make-char-set-immutable! char-set:lower-case) +(make-char-set-immutable! char-set:upper-case) +(make-char-set-immutable! char-set:letter) +(make-char-set-immutable! char-set:digit) +(make-char-set-immutable! char-set:hex-digit) +(make-char-set-immutable! char-set:letter+digit) +(make-char-set-immutable! char-set:punctuation) +(make-char-set-immutable! char-set:symbol) +(make-char-set-immutable! char-set:graphic) +(make-char-set-immutable! char-set:whitespace) +(make-char-set-immutable! char-set:printing) +(make-char-set-immutable! char-set:blank) +(make-char-set-immutable! char-set:iso-control) +(make-char-set-immutable! char-set:ascii) + +; End S48 additions + + +;;; Porting & performance-tuning notes +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; See the section at the beginning of this file on external dependencies. +;;; +;;; First and foremost, rewrite this code to use bit vectors of some sort. +;;; This will give big speedup and memory savings. +;;; +;;; - LET-OPTIONALS* macro. +;;; This is only used once. You can rewrite the use, port the hairy macro +;;; definition (which is implemented using a Clinger-Rees low-level +;;; explicit-renaming macro system), or port the simple, high-level +;;; definition, which is less efficient. +;;; +;;; - :OPTIONAL macro +;;; Very simply defined using an R5RS high-level macro. +;;; +;;; Implementations that can arrange for the base char sets to be immutable +;;; should do so. (E.g., Scheme 48 allows one to mark a string as immutable, +;;; which can be used to protect the underlying strings.) It would be very, +;;; very bad if a client's buggy code corrupted these constants. +;;; +;;; There is a fair amount of argument checking. This is, strictly speaking, +;;; unnecessary -- the actual body of the procedures will blow up if an +;;; illegal value is passed in. However, the error message will not be as good +;;; as if the error were caught at the "higher level." Also, a very, very +;;; smart Scheme compiler may be able to exploit having the type checks done +;;; early, so that the actual body of the procedures can assume proper values. +;;; This isn't likely; this kind of compiler technology isn't common any +;;; longer. +;;; +;;; The overhead of optional-argument parsing is irritating. The optional +;;; arguments must be consed into a rest list on entry, and then parsed out. +;;; Function call should be a matter of a few register moves and a jump; it +;;; should not involve heap allocation! Your Scheme system may have a superior +;;; non-R5RS optional-argument system that can eliminate this overhead. If so, +;;; then this is a prime candidate for optimising these procedures, +;;; *especially* the many optional BASE-CS parameters. +;;; +;;; Note that optional arguments are also a barrier to procedure integration. +;;; If your Scheme system permits you to specify alternate entry points +;;; for a call when the number of optional arguments is known in a manner +;;; that enables inlining/integration, this can provide performance +;;; improvements. +;;; +;;; There is enough *explicit* error checking that *all* internal operations +;;; should *never* produce a type or index-range error. Period. Feel like +;;; living dangerously? *Big* performance win to be had by replacing string +;;; and record-field accessors and setters with unsafe equivalents in the +;;; code. Similarly, fixnum-specific operators can speed up the arithmetic +;;; done on the index values in the inner loops. The only arguments that are +;;; not completely error checked are +;;; - string lists (complete checking requires time proportional to the +;;; length of the list) +;;; - procedure arguments, such as char->char maps & predicates. +;;; There is no way to check the range & domain of procedures in Scheme. +;;; Procedures that take these parameters cannot fully check their +;;; arguments. But all other types to all other procedures are fully +;;; checked. +;;; +;;; This does open up the alternate possibility of simply *removing* these +;;; checks, and letting the safe primitives raise the errors. On a dumb +;;; Scheme system, this would provide speed (by eliminating the redundant +;;; error checks) at the cost of error-message clarity. +;;; +;;; In an interpreted Scheme, some of these procedures, or the internal +;;; routines with % prefixes, are excellent candidates for being rewritten +;;; in C. +;;; +;;; It would also be nice to have the ability to mark some of these +;;; routines as candidates for inlining/integration. +;;; +;;; See the comments preceding the hash function code for notes on tuning +;;; the default bound so that the code never overflows your implementation's +;;; fixnum size into bignum calculation. +;;; +;;; All the %-prefixed routines in this source code are written +;;; to be called internally to this library. They do *not* perform +;;; friendly error checks on the inputs; they assume everything is +;;; proper. They also do not take optional arguments. These two properties +;;; save calling overhead and enable procedure integration -- but they +;;; are not appropriate for exported routines. + +;;; Copyright notice +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology +;;; +;;; This material was developed by the Scheme project at the Massachusetts +;;; Institute of Technology, Department of Electrical Engineering and +;;; Computer Science. Permission to copy and modify this software, to +;;; redistribute either the original software or a modified version, and +;;; to use this software for any purpose is granted, subject to the +;;; following restrictions and understandings. +;;; +;;; 1. Any copy made of this software must include this copyright notice +;;; in full. +;;; +;;; 2. Users of this software agree to make their best efforts (a) to +;;; return to the MIT Scheme project any improvements or extensions that +;;; they make, so that these may be included in future releases; and (b) +;;; to inform MIT of noteworthy uses of this software. +;;; +;;; 3. All materials developed as a consequence of the use of this +;;; software shall duly acknowledge such use, in accordance with the usual +;;; standards of acknowledging credit in academic research. +;;; +;;; 4. MIT has made no warrantee or representation that the operation of +;;; this software will be error-free, and MIT is under no obligation to +;;; provide any services, by way of maintenance, update, or otherwise. +;;; +;;; 5. In conjunction with products arising from the use of this material, +;;; there shall be no use of the name of the Massachusetts Institute of +;;; Technology nor of any adaptation thereof in any advertising, +;;; promotional, or sales literature without prior written consent from +;;; MIT in each case. diff --git a/scheme/srfi/srfi-16.scm b/scheme/srfi/srfi-16.scm new file mode 100644 index 0000000..b2b1594 --- /dev/null +++ b/scheme/srfi/srfi-16.scm @@ -0,0 +1,41 @@ +; Copied from Lars T Hansen's SRFI document. + +(define-syntax case-lambda + (syntax-rules () + ((case-lambda + (?a1 ?e1 ...) + ?clause1 ...) + (lambda args + (let ((l (length args))) + (case-lambda "CLAUSE" args l + (?a1 ?e1 ...) + ?clause1 ...)))) + ((case-lambda "CLAUSE" ?args ?l + ((?a1 ...) ?e1 ...) + ?clause1 ...) + (if (= ?l (length '(?a1 ...))) + (apply (lambda (?a1 ...) ?e1 ...) ?args) + (case-lambda "CLAUSE" ?args ?l + ?clause1 ...))) + ((case-lambda "CLAUSE" ?args ?l + ((?a1 . ?ar) ?e1 ...) + ?clause1 ...) + (case-lambda "IMPROPER" ?args ?l 1 (?a1 . ?ar) (?ar ?e1 ...) + ?clause1 ...)) + ((case-lambda "CLAUSE" ?args ?l + (?a1 ?e1 ...) + ?clause1 ...) + (let ((?a1 ?args)) + ?e1 ...)) + ((case-lambda "CLAUSE" ?args ?l) + (error "Wrong number of arguments to CASE-LAMBDA.")) + ((case-lambda "IMPROPER" ?args ?l ?k ?al ((?a1 . ?ar) ?e1 ...) + ?clause1 ...) + (case-lambda "IMPROPER" ?args ?l (+ ?k 1) ?al (?ar ?e1 ...) + ?clause1 ...)) + ((case-lambda "IMPROPER" ?args ?l ?k ?al (?ar ?e1 ...) + ?clause1 ...) + (if (>= ?l ?k) + (apply (lambda ?al ?e1 ...) ?args) + (case-lambda "CLAUSE" ?args ?l + ?clause1 ...))))) diff --git a/scheme/srfi/srfi-17.scm b/scheme/srfi/srfi-17.scm new file mode 100644 index 0000000..9995082 --- /dev/null +++ b/scheme/srfi/srfi-17.scm @@ -0,0 +1,69 @@ + +(define-syntax set! + (syntax-rules () + ((set! (?e0 ?e1 ...) ?v) + ((setter ?e0) ?e1 ... ?v)) + ((set! ?i ?v) + (scheme-set! ?i ?v)))) + +(define (setter proc) + (let ((probe (assv proc setters))) + (if probe + (cdr probe) + (error "No setter found" proc)))) + +(define (set-setter! proc setter) + (let ((probe (assv proc setters))) + (if probe + (set-cdr! probe setter) + (scheme-set! setters + (cons (cons proc setter) + setters))) + (unspecific))) + +(define (car-setter proc) + (lambda (p v) + (set-car! (proc p) v))) + +(define (cdr-setter proc) + (lambda (p v) + (set-cdr! (proc p) v))) + +(define setters + (list (cons setter set-setter!) + (cons vector-ref vector-set!) + (cons string-ref string-set!) + (cons car set-car!) + (cons cdr set-cdr!) + + (cons caar (car-setter car)) + (cons cdar (cdr-setter car)) + (cons cadr (car-setter cdr)) + (cons cddr (cdr-setter cdr)) + + (cons caaar (car-setter caar)) + (cons cdaar (cdr-setter caar)) + (cons cadar (car-setter cdar)) + (cons cddar (cdr-setter cdar)) + (cons caadr (car-setter cadr)) + (cons cdadr (cdr-setter cadr)) + (cons caddr (car-setter cddr)) + (cons cdddr (cdr-setter cddr)) + + (cons caaaar (car-setter caaar)) + (cons cdaaar (cdr-setter caaar)) + (cons cadaar (car-setter cdaar)) + (cons cddaar (cdr-setter cdaar)) + (cons caadar (car-setter cadar)) + (cons cdadar (cdr-setter cadar)) + (cons caddar (car-setter cddar)) + (cons cdddar (cdr-setter cddar)) + (cons caaadr (car-setter caadr)) + (cons cdaadr (cdr-setter caadr)) + (cons cadadr (car-setter cdadr)) + (cons cddadr (cdr-setter cdadr)) + (cons caaddr (car-setter caddr)) + (cons cdaddr (cdr-setter caddr)) + (cons cadddr (car-setter cdddr)) + (cons cddddr (cdr-setter cdddr)))) + diff --git a/scheme/srfi/srfi-19.scm b/scheme/srfi/srfi-19.scm new file mode 100644 index 0000000..4d7d6d3 --- /dev/null +++ b/scheme/srfi/srfi-19.scm @@ -0,0 +1,1410 @@ +;; SRFI-19: Time Data Types and Procedures. +;; +;; Copyright (C) Neodesic Corporation (2000). All Rights Reserved. +;; +;; This document and translations of it may be copied and furnished to others, +;; and derivative works that comment on or otherwise explain it or assist in its +;; implementation may be prepared, copied, published and distributed, in whole or +;; in part, without restriction of any kind, provided that the above copyright +;; notice and this paragraph are included on all such copies and derivative works. +;; However, this document itself may not be modified in any way, such as by +;; removing the copyright notice or references to the Scheme Request For +;; Implementation process or editors, except as needed for the purpose of +;; developing SRFIs in which case the procedures for copyrights defined in the SRFI +;; process must be followed, or as required to translate it into languages other +;; than English. +;; +;; The limited permissions granted above are perpetual and will not be revoked +;; by the authors or their successors or assigns. +;; +;; This document and the information contained herein is provided on an "AS IS" +;; basis and THE AUTHOR AND THE SRFI EDITORS DISCLAIM ALL WARRANTIES, EXPRESS OR +;; IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE +;; INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF +;; MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +; (define (defining x) (display ";; Defining ") (display x) (newline)) + +(define (defining x) (values)) + +;; MzScheme specific: Rice doesn't believe in syntax-rules? +;; This is for RECEIVE and :OPTIONAL only. + +(require-library "synrule.ss") + +(define-syntax receive + (syntax-rules () + ((receive formals expression body ...) + (call-with-values (lambda () expression) + (lambda formals body ...))))) + +(defining "RECEIVE") + +;;; -- we want receive later on for a couple of small things +;; + +;; :OPTIONAL is nice, too + +(define-syntax :optional + (syntax-rules () + ((_ val default-value) + (if (null? val) default-value (car val))))) + +(defining "Constants") + +(define time-tai 'time-tai) +(define time-utc 'time-utc) +(define time-monotonic 'time-monotonic) +(define time-thread 'time-thread) +(define time-process 'time-process) +(define time-duration 'time-duration) + +;; example of extension (MZScheme specific) +(define time-gc 'time-gc) + +;;-- LOCALE dependent constants + +(define tm:locale-number-separator ".") + +(define tm:locale-abbr-weekday-vector (vector "Sun" "Mon" "Tue" "Wed" + "Thu" "Fri" "Sat")) +(define tm:locale-long-weekday-vector (vector "Sunday" "Monday" + "Tuesday" "Wednesday" + "Thursday" "Friday" + "Saturday")) +;; note empty string in 0th place. +(define tm:locale-abbr-month-vector (vector "" "Jan" "Feb" "Mar" + "Apr" "May" "Jun" "Jul" + "Aug" "Sep" "Oct" "Nov" + "Dec")) +(define tm:locale-long-month-vector (vector "" "January" "February" + "March" "April" "May" + "June" "July" "August" + "September" "October" + "November" "December")) + +(define tm:locale-pm "PM") +(define tm:locale-am "AM") + +;; See date->string +(define tm:locale-date-time-format "~a ~b ~d ~H:~M:~S~z ~Y") +(define tm:locale-short-date-format "~m/~d/~y") +(define tm:locale-time-format "~H:~M:~S") +(define tm:iso-8601-date-time-format "~Y-~m-~dT~H:~M:~S~z") +;;-- Miscellaneous Constants. +;;-- only the tm:tai-epoch-in-jd might need changing if +;; a different epoch is used. + +(define tm:nano 10000000) +(define tm:sid 86400) ; seconds in a day +(define tm:sihd 43200) ; seconds in a half day +(define tm:tai-epoch-in-jd 4881175/2) ; julian day number for 'the epoch' + + +(defining "the Time Errors") +;;; A Very simple Error system for the time procedures +;;; +(define tm:time-error-types + '(invalid-clock-type + unsupported-clock-type + incompatible-time-types + not-duration + dates-are-immutable + bad-date-format-string + bad-date-template-string + invalid-month-specification + )) + +(define (tm:time-error caller type value) + (if (member type tm:time-error-types) + (if value + (error caller "TIME-ERROR type ~S: ~S" type value) + (error caller "TIME-ERROR type ~S" type)) + (error caller "TIME-ERROR unsupported error type ~S" type))) + + +(defining "leap seconds") +;; A table of leap seconds +;; See ftp://maia.usno.navy.mil/ser7/tai-utc.dat +;; and update as necessary. +;; this procedures reads the file in the abover +;; format and creates the leap second table +;; it also calls the almost standard, but not R5 procedures read-line +;; & open-input-string +;; ie (set! tm:leap-second-table (tm:read-tai-utc-date "tai-utc.dat")) + +(define (tm:read-tai-utc-data filename) + (define (convert-jd jd) + (* (- (inexact->exact jd) tm:tai-epoch-in-jd) tm:sid)) + (define (convert-sec sec) + (inexact->exact sec)) + (let ( (port (open-input-file filename)) + (table '()) ) + (let loop ((line (read-line port))) + (if (not (eq? line eof)) + (begin + (let* ( (data (read (open-input-string (string-append "(" line ")")))) + (year (car data)) + (jd (cadddr (cdr data))) + (secs (cadddr (cdddr data))) ) + (if (>= year 1972) + (set! table (cons (cons (convert-jd jd) (convert-sec secs)) table))) + (loop (read-line port)))))) + table)) + +;; each entry is ( tai seconds since epoch . # seconds to subtract for utc ) +;; note they go higher to lower, and end in 1972. +(define tm:leap-second-table + '((915148800 . 32) + (867715200 . 31) + (820454400 . 30) + (773020800 . 29) + (741484800 . 28) + (709948800 . 27) + (662688000 . 26) + (631152000 . 25) + (567993600 . 24) + (489024000 . 23) + (425865600 . 22) + (394329600 . 21) + (362793600 . 20) + (315532800 . 19) + (283996800 . 18) + (252460800 . 17) + (220924800 . 16) + (189302400 . 15) + (157766400 . 14) + (126230400 . 13) + (94694400 . 12) + (78796800 . 11) + (63072000 . 10))) + +(define (read-leap-second-table filename) + (set! tm:leap-second-table (tm:read-tai-utc-data filename)) + (values)) + + +(define (tm:leap-second-delta utc-seconds) + (letrec ( (lsd (lambda (table) + (cond ((>= utc-seconds (caar table)) + (cdar table)) + (else (lsd (cdr table)))))) ) + (if (< utc-seconds (* (- 1972 1970) 365 tm:sid)) 0 + (lsd tm:leap-second-table)))) + + +(defining "the Time Structure") +;;; the TIME structure; creates the accessors, too. + +(define-struct time (type second nanosecond)) + +(define (copy-time time) + (let ((ntime (make-time #f #f #f))) + (set-time-type! ntime (time-type time)) + (set-time-second! ntime (time-second time)) + (set-time-nanosecond! ntime (time-nanosecond time)) + ntime)) + +(defining "CURRENT-TIME") +;;; current-time + +;;; specific time getters. +;;; These should be rewritten to be OS specific. +;; +;; -- using GNU gettimeofday() would be useful here -- gets +;; second + millisecond +;; let's pretend we do, using MzScheme's current-seconds & current-milliseconds +;; this is supposed to return UTC. +;; + +(define (tm:get-time-of-day) + (values (current-seconds) + (abs (remainder (current-milliseconds) 1000)))) + +(define (tm:current-time-utc) + (receive (seconds ms) (tm:get-time-of-day) + (make-time time-utc seconds (* ms 10000)))) + +(define (tm:current-time-tai) + (receive (seconds ms) (tm:get-time-of-day) + (make-time time-tai + (+ seconds (tm:leap-second-delta seconds)) + (* ms 10000)))) + + +(define (tm:current-time-ms-time time-type proc) + (let ((current-ms (proc))) + (make-time time-type (quotient current-ms 10000) + (* (remainder current-ms 1000) 10000)))) + +;; -- we define it to be the same as TAI. +;; A different implemation of current-time-montonic +;; will require rewriting all of the time-monotonic converters, +;; of course. + +(define (tm:current-time-monotonic) + (receive (seconds ms) (tm:get-time-of-day) + (make-time time-monotonic + (+ seconds (tm:leap-second-delta seconds)) + (* ms 10000)))) + + +(define (tm:current-time-thread) + (tm:time-error 'current-time 'unsupported-clock-type 'time-thread)) + +(define (tm:current-time-process) + (tm:current-time-ms-time time-process current-process-milliseconds)) + +(define (tm:current-time-gc) + (tm:current-time-ms-time time-gc current-gc-milliseconds)) + +(define (current-time . clock-type) + (let ( (clock-type (:optional clock-type time-utc)) ) + (cond + ((eq? clock-type time-tai) (tm:current-time-tai)) + ((eq? clock-type time-utc) (tm:current-time-utc)) + ((eq? clock-type time-monotonic) (tm:current-time-monotonic)) + ((eq? clock-type time-thread) (tm:current-time-thread)) + ((eq? clock-type time-process) (tm:current-time-process)) + ((eq? clock-type time-gc) (tm:current-time-gc)) + (else (tm:time-error 'current-time 'invalid-clock-type clock-type))))) + +(defining "TIME-RESOLUTION") + +;; -- Time Resolution +;; This is the resolution of the clock in nanoseconds. +;; This will be implementation specific. + +(define (time-resolution . clock-type) + (let ((clock-type (:optional clock-type time-utc))) + (cond + ((eq? clock-type time-tai) 10000) + ((eq? clock-type time-utc) 10000) + ((eq? clock-type time-monotonic) 10000) + ((eq? clock-type time-thread) 10000) + ((eq? clock-type time-gc) 10000) + (else (tm:time-error 'time-resolution 'invalid-clock-type clock-type))))) + +(defining "TIME comparisons") +;; -- Time comparisons + +(define (tm:time-compare time1 time2 proc caller) + (if (or (not (and (time? time1) (time? time2))) + (not (eq? (time-type time1) (time-type time2)))) + (tm:time-error caller 'incompatible-time-types #f) + (and (proc (time-second time1) (time-second time2)) + (proc (time-nanosecond time1) (time-nanosecond time2))))) + +(define (time=? time1 time2) + (tm:time-compare time1 time2 = 'time=?)) + +(define (time>? time1 time2) + (tm:time-compare time1 time2 > 'time>?)) + +(define (time=? time1 time2) + (tm:time-compare time1 time2 >= 'time>=?)) + +(define (time<=? time1 time2) + (tm:time-compare time1 time2 <= 'time<=?)) + +(defining "Time arithmetic") +;; -- Time arithmetic + +(define (tm:time-difference time1 time2 time3) + (if (or (not (and (time? time1) (time? time2))) + (not (eq? (time-type time1) (time-type time2)))) + (tm:time-error 'time-difference 'incompatible-time-types #f) + (let ( (sec-diff (- (time-second time1) (time-second time2))) + (nsec-diff (- (time-nanosecond time1) (time-nanosecond time2))) ) + (set-time-type! time3 time-duration) + (if (negative? nsec-diff) + (begin + (set-time-second! time3 (- sec-diff 1)) + (set-time-nanosecond! time3 (+ tm:nano nsec-diff))) + (begin + (set-time-second! time3 sec-diff) + (set-time-nanosecond! time3 nsec-diff))) + time3))) + +(define (time-difference time1 time2) + (tm:time-difference time1 time2 (make-time #f #f #f))) + +(define (time-difference! time1 time2) + (tm:time-difference time1 time2 time1)) + +(define (tm:add-duration time1 duration time3) + (if (not (and (time? time1) (time? duration))) + (tm:time-error 'add-duration 'incompatible-time-types #f)) + (if (not (eq? (time-type duration) time-duration)) + (tm:time-error 'add-duration 'not-duration duration) + (let ( (sec-plus (+ (time-second time1) (time-second duration))) + (nsec-plus (+ (time-nanosecond time1) (time-nanosecond duration))) ) + (let ((r (remainder nsec-plus tm:nano)) + (q (quotient nsec-plus tm:nano))) + ; (set-time-type! time3 (time-type time1)) + (if (negative? r) + (begin + (set-time-second! time3 (+ sec-plus q -1)) + (set-time-nanosecond! time3 (+ tm:nano r))) + (begin + (set-time-second! time3 (+ sec-plus q)) + (set-time-nanosecond! time3 r))) + time3)))) + +(define (add-duration time1 duration) + (tm:add-duration time1 duration (make-time (time-type time1) #f #f))) + +(define (add-duration! time1 duration) + (tm:add-duration time1 duration time1)) + +(define (tm:subtract-duration time1 duration time3) + (if (not (and (time? time1) (time? duration))) + (tm:time-error 'add-duration 'incompatible-time-types #f)) + (if (not (eq? (time-type duration) time-duration)) + (tm:time-error 'add-duration 'not-duration duration) + (let ( (sec-minus (- (time-second time1) (time-second duration))) + (nsec-minus (- (time-nanosecond time1) (time-nanosecond duration))) ) + (let ((r (remainder nsec-minus tm:nano)) + (q (quotient nsec-minus tm:nano))) + (if (negative? r) + (begin + (set-time-second! time3 (- sec-minus q 1)) + (set-time-nanosecond! time3 (+ tm:nano r))) + (begin + (set-time-second! time3 (- sec-minus q)) + (set-time-nanosecond! time3 r))) + time3)))) + +(define (subtract-duration time1 duration) + (tm:subtract-duration time1 duration (make-time (time-type time1) #f #f))) + +(define (subtract-duration! time1 duration) + (tm:subtract-duration time1 duration time1)) + +(defining "Time converters") + +;; -- Converters between types. + +(define (tm:time-tai->time-utc! time-in time-out caller) + (if (not (eq? (time-type time-in) time-tai)) + (tm:time-error caller 'incompatible-time-types time-in)) + (set-time-type! time-out time-utc) + (set-time-nanosecond! time-out (time-nanosecond time-in)) + (set-time-second! time-out (- (time-second time-in) + (tm:leap-second-delta + (time-second time-in)))) + time-out) + +(define (time-tai->time-utc time-in) + (tm:time-tai->time-utc! time-in (make-time #f #f #f) 'time-tai->time-utc)) + + +(define (time-tai->time-utc! time-in) + (tm:time-tai->time-utc! time-in time-in 'time-tai->time-utc!)) + + +(define (tm:time-utc->time-tai! time-in time-out caller) + (if (not (eq? (time-type time-in) time-utc)) + (tm:time-error caller 'incompatible-time-types time-in)) + (set-time-type! time-out time-tai) + (set-time-nanosecond! time-out (time-nanosecond time-in)) + (set-time-second! time-out (+ (time-second time-in) + (tm:leap-second-delta + (time-second time-in)))) + time-out) + + +(define (time-utc->time-tai time-in) + (tm:time-utc->time-tai! time-in (make-time #f #f #f) 'time-utc->time-tai)) + +(define (time-utc->time-tai! time-in) + (tm:time-utc->time-tai! time-in time-in 'time-utc->time-tai!)) + +;; -- these depend on time-monotonic having the same definition as time-tai! +(define (time-monotonic->time-utc time-in) + (if (not (eq? (time-type time-in) time-monotonic)) + (tm:time-error caller 'incompatible-time-types time-in)) + (let ((ntime (copy-time time-in))) + (set-time-type! ntime time-tai) + (tm:time-tai->time-utc! ntime ntime 'time-monotonic->time-utc))) + +(define (time-monotonic->time-utc! time-in) + (if (not (eq? (time-type time-in) time-monotonic)) + (tm:time-error caller 'incompatible-time-types time-in)) + (set-time-type! time-in time-tai) + (tm:time-tai->time-utc! ntime ntime 'time-monotonic->time-utc)) + +(define (time-monotonic->time-tai time-in) + (if (not (eq? (time-type time-in) time-monotonic)) + (tm:time-error caller 'incompatible-time-types time-in)) + (let ((ntime (copy-time time-in))) + (set-time-type! ntime time-tai) + ntime)) + +(define (time-monotonic->time-tai! time-in) + (if (not (eq? (time-type time-in) time-monotonic)) + (tm:time-error caller 'incompatible-time-types time-in)) + (set-time-type! time-in time-tai) + time-in) + +(define (time-utc->time-monotonic time-in) + (if (not (eq? (time-type time-in) time-utc)) + (tm:time-error caller 'incompatible-time-types time-in)) + (let ((ntime (tm:time-utc->time-tai! time-in (make-time #f #f #f) + 'time-utc->time-monotonic))) + (set-time-type! ntime time-monotonic) + ntime)) + + +(define (time-utc->time-monotonic! time-in) + (if (not (eq? (time-type time-in) time-utc)) + (tm:time-error caller 'incompatible-time-types time-in)) + (let ((ntime (tm:time-utc->time-tai! time-in time-in + 'time-utc->time-monotonic!))) + (set-time-type! ntime time-monotonic) + ntime)) + + +(define (time-tai->time-monotonic time-in) + (if (not (eq? (time-type time-in) time-tai)) + (tm:time-error caller 'incompatible-time-types time-in)) + (let ((ntime (copy-time time-in))) + (set-time-type! ntime time-monotonic) + ntime)) + +(define (time-tai->time-monotonic! time-in) + (if (not (eq? (time-type time-in) time-tai)) + (tm:time-error caller 'incompatible-time-types time-in)) + (set-time-type! time-in time-monotonic) + time-in) + + +(defining "Date structures") +;; -- Date Structures + +(define-struct date (nanosecond second minute hour day month year zone-offset)) + +;; redefine setters + +(define tm:set-date-nanosecond! set-date-nanosecond!) +(define tm:set-date-second! set-date-second!) +(define tm:set-date-minute! set-date-minute!) +(define tm:set-date-hour! set-date-hour!) +(define tm:set-date-day! set-date-day!) +(define tm:set-date-month! set-date-month!) +(define tm:set-date-year! set-date-year!) +(define tm:set-date-zone-offset! set-date-zone-offset!) + +(define (set-date-second! date val) + (tm:time-error 'set-date-second! 'dates-are-immutable date)) + +(define (set-date-minute! date val) + (tm:time-error 'set-date-minute! 'dates-are-immutable date)) + +(define (set-date-day! date val) + (tm:time-error 'set-date-day! 'dates-are-immutable date)) + +(define (set-date-month! date val) + (tm:time-error 'set-date-month! 'dates-are-immutable date)) + +(define (set-date-year! date val) + (tm:time-error 'set-date-year! 'dates-are-immutable date)) + +(define (set-date-zone-offset! date val) + (tm:time-error 'set-date-zone-offset! 'dates-are-immutable date)) + +;; gives the julian day which starts at noon. +(define (tm:encode-julian-day-number day month year) + (let* ((a (quotient (- 14 month) 12)) + (y (- (+ year 4800) a (if (negative? year) -1 0))) + (m (- (+ month (* 12 a)) 3))) + (+ day + (quotient (+ (* 153 m) 2) 5) + (* 365 y) + (quotient y 4) + (- (quotient y 100)) + (quotient y 400) + -32045))) + +(define (tm:split-real r) + (if (integer? r) (values r 0) + (let ((l (truncate r))) + (values l (- r l))))) + +;; gives the seconds/date/month/year +(define (tm:decode-julian-day-number jdn) + (let* ((days (truncate jdn)) + (a (+ days 32044)) + (b (quotient (+ (* 4 a) 3) 146097)) + (c (- a (quotient (* 146097 b) 4))) + (d (quotient (+ (* 4 c) 3) 1461)) + (e (- c (quotient (* 1461 d) 4))) + (m (quotient (+ (* 5 e) 2) 153)) + (y (+ (* 100 b) d -4800 (quotient m 10)))) + (values ; seconds date month year + (* (- jdn days) tm:sid) + (+ e (- (quotient (+ (* 153 m) 2) 5)) 1) + (+ m 3 (* -12 (quotient m 10))) + (if (>= 0 y) (- y 1) y)) + )) + +(defining "TIME->DATE") + +;; relies on the fact that we named our time zone accessor +;; differently from MzScheme's.... +;; This should be written to be OS specific. + +(define (tm:local-tz-offset) + (date-time-zone-offset (seconds->date (current-seconds)))) + +;; special thing -- ignores nanos +(define (tm:time->julian-day-number seconds tz-offset) + (+ (/ (+ seconds + tz-offset + tm:sihd) + tm:sid) + tm:tai-epoch-in-jd)) + +(define (tm:leap-second? second) + (and (assoc second tm:leap-second-table) #t)) + +(define (time-utc->date time . tz-offset) + (if (not (eq? (time-type time) time-utc)) + (tm:time-error 'time->date 'incompatible-time-types time)) + (let* ( (offset (:optional tz-offset (tm:local-tz-offset))) + (is-leap-second (tm:leap-second? (+ offset (time-second time)))) ) + (call-with-values + (lambda () + (if is-leap-second + (tm:decode-julian-day-number (tm:time->julian-day-number (- (time-second time) 1) offset)) + (tm:decode-julian-day-number (tm:time->julian-day-number (time-second time) offset)))) + (lambda (secs date month year) + (let* ( (hours (quotient secs (* 60 60))) + (rem (remainder secs (* 60 60))) + (minutes (quotient rem 60)) + (seconds (remainder rem 60)) ) + (make-date (time-nanosecond time) + (if is-leap-second (+ seconds 1) seconds) + minutes + hours + date + month + year + offset)))))) + +(define (time-tai->date time . tz-offset) + (if (not (eq? (time-type time) time-tai)) + (tm:time-error 'time->date 'incompatible-time-types time)) + (let* ( (offset (:optional tz-offset (tm:local-tz-offset))) + (seconds (- (time-second time) (tm:leap-second-delta (time-second time)))) + (is-leap-second (tm:leap-second? (+ offset seconds))) ) + (call-with-values + (lambda () + (if is-leap-second + (tm:decode-julian-day-number (tm:time->julian-day-number (- seconds 1) offset)) + (tm:decode-julian-day-number (tm:time->julian-day-number seconds offset))) ) + (lambda (secs date month year) + ;; adjust for leap seconds if necessary ... + (let* ( (hours (quotient secs (* 60 60))) + (rem (remainder secs (* 60 60))) + (minutes (quotient rem 60)) + (seconds (remainder rem 60)) ) + (make-date (time-nanosecond time) + (if is-leap-second (+ seconds 1) seconds) + minutes + hours + date + month + year + offset)))))) + +;; this is the same as time-tai->date. +(define (time-monotonic->date time . tz-offset) + (if (not (eq? (time-type time) time-monotonic)) + (tm:time-error 'time->date 'incompatible-time-types time)) + (let* ( (offset (:optional tz-offset (tm:local-tz-offset))) + (seconds (- (time-second time) (tm:leap-second-delta (time-second time)))) + (is-leap-second (tm:leap-second? (+ offset seconds))) ) + (call-with-values + (lambda () + (if is-leap-second + (tm:decode-julian-day-number (tm:time->julian-day-number (- seconds 1) offset)) + (tm:decode-julian-day-number (tm:time->julian-day-number seconds offset))) ) + (lambda (secs date month year) + ;; adjust for leap seconds if necessary ... + (let* ( (hours (quotient secs (* 60 60))) + (rem (remainder secs (* 60 60))) + (minutes (quotient rem 60)) + (seconds (remainder rem 60)) ) + (make-date (time-nanosecond time) + (if is-leap-second (+ seconds 1) seconds) + minutes + hours + date + month + year + offset)))))) + +(define (date->time-utc date) + (let ( (nanosecond (date-nanosecond date)) + (second (date-second date)) + (minute (date-minute date)) + (hour (date-hour date)) + (day (date-day date)) + (month (date-month date)) + (year (date-year date)) ) + (let ( (jdays (- (tm:encode-julian-day-number day month year) + tm:tai-epoch-in-jd)) ) + (make-time + time-utc + (+ (* (- jdays 1/2) 24 60 60) + (* hour 60 60) + (* minute 60) + second) + nanosecond)))) + +(define (date->time-tai date) + (time-utc->time-tai! (date->time-utc date))) + +(define (date->time-monotonic date) + (time-utc->time-monotonic! (date->time-utc date))) + +(defining "Date accessors") + +(define (tm:leap-year? year) + (or (= (modulo year 400) 0) + (and (= (modulo year 4) 0) (not (= (modulo year 100) 0))))) + +(define (leap-year? date) + (tm:leap-year? (date-year date))) + +(define tm:month-assoc '((1 . 31) (2 . 59) (3 . 90) (4 . 120) + (5 . 151) (6 . 181) (7 . 212) (8 . 243) + (9 . 273) (10 . 304) (11 . 334) (12 . 365))) + +(define (tm:year-day day month year) + (let ((days-pr (assoc day tm:month-assoc))) + (if (not days-pr) + (tm:error 'date-year-day 'invalid-month-specification month)) + (if (and (tm:leap-year? year) (> month 2)) + (+ day (cdr days-pr) 1) + (+ day (cdr days-pr))))) + +(define (date-year-day date) + (tm:year-day (date-day date) (date-month date) (date-year date))) + +;; from calendar faq +(define (tm:week-day day month year) + (let* ((a (quotient (- 14 month) 12)) + (y (- year a)) + (m (+ month (* 12 a) -2))) + (modulo (+ day y (quotient y 4) (- (quotient y 100)) + (quotient y 400) (quotient (* 31 m) 12)) + 7))) + +(define (date-week-day date) + (tm:week-day (date-day date) (date-month date) (date-year date))) + +(define (tm:days-before-first-week date day-of-week-starting-week) + (let* ( (first-day (make-date 0 0 0 0 + 1 + 1 + (date-year date) + #f)) + (fdweek-day (date-week-day first-day)) ) + (modulo (- day-of-week-starting-week fdweek-day) + 7))) + +(define (date-week-number date day-of-week-starting-week) + (quotient (- (date-year-day date) + (tm:days-before-first-week date day-of-week-starting-week)) + 7)) + +(defining "Current Date") + +(define (current-date . tz-offset) + (time-utc->date (current-time time-utc) + (:optional tz-offset (tm:local-tz-offset)))) + +;; given a 'two digit' number, find the year within 50 years +/- +(define (tm:natural-year n) + (let* ( (current-year (date-year (current-date))) + (current-century (* (quotient current-year 100) 100)) ) + (cond + ((>= n 100) n) + ((< n 0) n) + ((<= (- (+ current-century n) current-year) 50) + (+ current-century n)) + (else + (+ (- current-century 100) n))))) + +(defining "Julian Day") + +(define (date->julian-day date) + (let ( (nanosecond (date-nanosecond date)) + (second (date-second date)) + (minute (date-minute date)) + (hour (date-hour date)) + (day (date-day date)) + (month (date-month date)) + (year (date-year date)) ) + (+ (tm:encode-julian-day-number day month year) + (- 1/2) + (+ (/ (+ (* hour 60 60) + (* minute 60) + second + (/ nanosecond tm:nano)) + tm:sid))))) + +(define (date->modified-julian-day date) + (- (date->julian-day date) + 4800001/2)) + + +(define (time-utc->julian-day time) + (if (not (eq? (time-type time) time-utc)) + (tm:time-error 'time->date 'incompatible-time-types time)) + (+ (/ (+ (time-second time) (/ (time-nanosecond time) tm:nano)) + tm:sid) + tm:tai-epoch-in-jd)) + +(define (time-utc->modified-julian-day time) + (- (time-utc->julian-day time) + 4800001/2)) + +(define (time-tai->julian-day time) + (if (not (eq? (time-type time) time-tai)) + (tm:time-error 'time->date 'incompatible-time-types time)) + (+ (/ (+ (- (time-second time) + (tm:leap-second-delta (time-second time))) + (/ (time-nanosecond time) tm:nano)) + tm:sid) + tm:tai-epoch-in-jd)) + +(define (time-tai->modified-julian-day time) + (- (time-tai->julian-day time) + 4800001/2)) + +;; this is the same as time-tai->julian-day +(define (time-monotonic->julian-day time) + (if (not (eq? (time-type time) time-monotonic)) + (tm:time-error 'time->date 'incompatible-time-types time)) + (+ (/ (+ (- (time-second time) + (tm:leap-second-delta (time-second time))) + (/ (time-nanosecond time) tm:nano)) + tm:sid) + tm:tai-epoch-in-jd)) + + +(define (time-monotonic->modified-julian-day time) + (- (time-monotonic->julian-day time) + 4800001/2)) + + +(define (julian-day->time-utc jdn) + (let ( (secs (* tm:sid (- jdn tm:tai-epoch-in-jd))) ) + (receive (seconds parts) + (tm:split-real secs) + (make-time time-utc + (inexact->exact seconds) + (inexact->exact (truncate (* parts tm:nano))))))) + +(define (julian-day->time-tai jdn) + (time-utc->time-tai! (julian-day->time-utc jdn))) + +(define (julian-day->time-monotonic jdn) + (time-utc->time-monotonic! (julian-day->time-utc jdn))) + +(define (julian-day->date jdn . tz-offset) + (let ((offset (:optional tz-offset (tm:local-tz-offset)))) + (time-utc->date (julian-day->time-utc jdn) offset))) + +(define (modified-julian-day->date jdn . tz-offset) + (let ((offset (:optional tz-offset (tm:local-tz-offset)))) + (julian-day->date (+ jdn 4800001/2) offset))) + +(define (modified-julian-day->time-utc jdn) + (julian-day->time-utc (+ jdn 4800001/2))) + +(define (modified-julian-day->time-tai jdn) + (julian-day->time-tai (+ jdn 4800001/2))) + +(define (modified-julian-day->time-monotonic jdn) + (julian-day->time-monotonic (+ jdn 4800001/2))) + +(define (current-julian-day) + (time-utc->julian-day (current-time time-utc))) + +(define (current-modified-julian-day) + (time-utc->modified-julian-day (current-time time-utc))) + +(defining " Date formatting procedures.") + +;; returns a string rep. of number N, of minimum LENGTH, +;; padded with character PAD-WITH. If PAD-WITH if #f, +;; no padding is done, and it's as if number->string was used. +;; if string is longer than LENGTH, it's as if number->string was used. + +(define (tm:padding n pad-with length) + (let* ( (str (number->string n)) + (str-len (string-length str)) ) + (if (or (> str-len length) + (not pad-with)) + str + (let* ( (new-str (make-string length pad-with)) + (new-str-offset (- (string-length new-str) + str-len)) ) + (do ((i 0 (+ i 1))) + ((>= i (string-length str))) + (string-set! new-str (+ new-str-offset i) + (string-ref str i))) + new-str)))) + +(define (tm:last-n-digits i n) + (abs (remainder i (expt 10 n)))) + +(define (tm:locale-abbr-weekday n) + (vector-ref tm:locale-abbr-weekday-vector n)) + +(define (tm:locale-long-weekday n) + (vector-ref tm:locale-long-weekday-vector n)) + +(define (tm:locale-abbr-month n) + (vector-ref tm:locale-abbr-month-vector n)) + +(define (tm:locale-long-month n) + (vector-ref tm:locale-long-month-vector n)) + +(define (tm:vector-find needle haystack comparator) + (let ((len (vector-length haystack))) + (define (tm:vector-find-int index) + (cond + ((>= index len) #f) + ((comparator needle (vector-ref haystack index)) index) + (else (tm:vector-find-int (+ index 1))))) + (tm:vector-find-int 0))) + +(define (tm:locale-abbr-weekday->index string) + (tm:vector-find string tm:locale-abbr-weekday-vector string=?)) + +(define (tm:locale-long-weekday->index string) + (tm:vector-find string tm:locale-long-weekday-vector string=?)) + +(define (tm:locale-abbr-month->index string) + (tm:vector-find string tm:locale-abbr-month-vector string=?)) + +(define (tm:locale-long-month->index string) + (tm:vector-find string tm:locale-long-month-vector string=?)) + + + +;; do nothing. +;; Your implementation might want to do something... +;; +(define (tm:locale-print-time-zone date port) + (values)) + +;; Again, locale specific. +(define (tm:locale-am/pm hr) + (if (> hr 11) tm:locale-pm tm:locale-am)) + +(define (tm:tz-printer offset port) + (cond + ((= offset 0) (display "Z" port)) + ((negative? offset) (display "-" port)) + (else (display "+" port))) + (if (not (= offset 0)) + (let ( (hours (abs (quotient offset (* 60 60)))) + (minutes (abs (quotient (remainder offset (* 60 60)) 60))) ) + (display (tm:padding hours #\0 2) port) + (display (tm:padding minutes #\0 2) port)))) + +;; A table of output formatting directives. +;; the first time is the format char. +;; the second is a procedure that takes the date, a padding character +;; (which might be #f), and the output port. +;; +(define tm:directives + (list + (cons #\~ (lambda (date pad-with port) (display #\~ port))) + + (cons #\a (lambda (date pad-with port) + (display (tm:locale-abbr-weekday (date-week-day date)) + port))) + (cons #\A (lambda (date pad-with port) + (display (tm:locale-long-weekday (date-week-day date)) + port))) + (cons #\b (lambda (date pad-with port) + (display (tm:locale-abbr-month (date-month date)) + port))) + (cons #\B (lambda (date pad-with port) + (display (tm:locale-long-month (date-month date)) + port))) + (cons #\c (lambda (date pad-with port) + (display (date->string date tm:locale-date-time-format) port))) + (cons #\d (lambda (date pad-with port) + (display (tm:padding (date-day date) + #\0 2) + port))) + (cons #\D (lambda (date pad-with port) + (display (date->string date "~m/~d/~y") port))) + (cons #\e (lambda (date pad-with port) + (display (tm:padding (date-day date) + #\Space 2) + port))) + (cons #\f (lambda (date pad-with port) + (if (> (date-nanosecond date) + tm:nano) + (display (tm:padding (+ (date-second date) 1) + pad-with 2) + port) + (display (tm:padding (date-second date) + pad-with 2) + port)) + (receive (i f) + (tm:split-real (/ + (date-nanosecond date) + tm:nano 1.0)) + (let* ((ns (number->string f)) + (le (string-length ns))) + (if (> le 2) + (begin + (display tm:locale-number-separator port) + (display (substring ns 2 le) port))))))) + (cons #\h (lambda (date pad-with port) + (display (date->string date "~b") port))) + (cons #\H (lambda (date pad-with port) + (display (tm:padding (date-hour date) + pad-with 2) + port))) + (cons #\I (lambda (date pad-with port) + (let ((hr (date-hour date))) + (if (> hr 12) + (display (tm:padding (- hr 12) + pad-with 2) + port) + (display (tm:padding hr + pad-with 2) + port))))) + (cons #\j (lambda (date pad-with port) + (display (tm:padding (date-year-day date) + pad-with 3) + port))) + (cons #\k (lambda (date pad-with port) + (display (tm:padding (date-hour date) + #\Space 2) + port))) + (cons #\l (lambda (date pad-with port) + (let ((hr (if (> (date-hour date) 12) + (- (date-hour date) 12) (date-hour date)))) + (display (tm:padding hr #\Space 2) + port)))) + (cons #\m (lambda (date pad-with port) + (display (tm:padding (date-month date) + pad-with 2) + port))) + (cons #\M (lambda (date pad-with port) + (display (tm:padding (date-minute date) + pad-with 2) + port))) + (cons #\n (lambda (date pad-with port) + (newline port))) + (cons #\N (lambda (date pad-with port) + (display (tm:padding (date-nanosecond date) + pad-with 7) + port))) + (cons #\p (lambda (date pad-with port) + (display (tm:locale-am/pm (date-hour date)) port))) + (cons #\r (lambda (date pad-with port) + (display (date->string date "~I:~M:~S ~p") port))) + (cons #\s (lambda (date pad-with port) + (display (time-second (date->time-utc date)) port))) + (cons #\S (lambda (date pad-with port) + (if (> (date-nanosecond date) + tm:nano) + (display (tm:padding (+ (date-second date) 1) + pad-with 2) + port) + (display (tm:padding (date-second date) + pad-with 2) + port)))) + (cons #\t (lambda (date pad-with port) + (display #\Tab port))) + (cons #\T (lambda (date pad-with port) + (display (date->string date "~H:~M:~S") port))) + (cons #\U (lambda (date pad-with port) + (if (> (tm:days-before-first-week date 0) 0) + (display (tm:padding (+ (date-week-number date 0) 1) + #\0 2) port) + (display (tm:padding (date-week-number date 0) + #\0 2) port)))) + (cons #\V (lambda (date pad-with port) + (display (tm:padding (date-week-number date 1) + #\0 2) port))) + (cons #\w (lambda (date pad-with port) + (display (date-week-day date) port))) + (cons #\x (lambda (date pad-with port) + (display (date->string date tm:locale-short-date-format) port))) + (cons #\X (lambda (date pad-with port) + (display (date->string date tm:locale-time-format) port))) + (cons #\W (lambda (date pad-with port) + (if (> (tm:days-before-first-week date 1) 0) + (display (tm:padding (+ (date-week-number date 1) 1) + #\0 2) port) + (display (tm:padding (date-week-number date 1) + #\0 2) port)))) + (cons #\y (lambda (date pad-with port) + (display (tm:padding (tm:last-n-digits + (date-year date) 2) + pad-with + 2) + port))) + (cons #\Y (lambda (date pad-with port) + (display (date-year date) port))) + (cons #\z (lambda (date pad-with port) + (tm:tz-printer (date-zone-offset date) port))) + (cons #\Z (lambda (date pad-with port) + (tm:locale-print-time-zone date port))) + (cons #\1 (lambda (date pad-with port) + (display (date->string date "~Y-~m-~d") port))) + (cons #\2 (lambda (date pad-with port) + (display (date->string date "~k:~M:~S~z") port))) + (cons #\3 (lambda (date pad-with port) + (display (date->string date "~k:~M:~S") port))) + (cons #\4 (lambda (date pad-with port) + (display (date->string date "~Y-~m-~dT~k:~M:~S~z") port))) + (cons #\5 (lambda (date pad-with port) + (display (date->string date "~Y-~m-~dT~k:~M:~S") port))) + )) + + +(define (tm:get-formatter char) + (let ( (associated (assoc char tm:directives)) ) + (if associated (cdr associated) #f))) + +(define (tm:date-printer date index format-string str-len port) + (if (>= index str-len) + (values) + (let ( (current-char (string-ref format-string index)) ) + (if (not (char=? current-char #\~)) + (begin + (display current-char port) + (tm:date-printer date (+ index 1) format-string str-len port)) + (if (= (+ index 1) str-len) ; bad format string. + (tm:time-error 'tm:date-printer 'bad-date-format-string + format-string) + (let ( (pad-char? (string-ref format-string (+ index 1))) ) + (cond + ((char=? pad-char? #\-) + (if (= (+ index 2) str-len) ; bad format string. + (tm:time-error 'tm:date-printer 'bad-date-format-string + format-string) + (let ( (formatter (tm:get-formatter + (string-ref format-string + (+ index 2)))) ) + (if (not formatter) + (tm:time-error 'tm:date-printer 'bad-date-format-string + format-string) + (begin + (formatter date #f port) + (tm:date-printer date (+ index 3) + format-string str-len port)))))) + + ((char=? pad-char? #\_) + (if (= (+ index 2) str-len) ; bad format string. + (tm:time-error 'tm:date-printer 'bad-date-format-string + format-string) + (let ( (formatter (tm:get-formatter + (string-ref format-string + (+ index 2)))) ) + (if (not formatter) + (tm:time-error 'tm:date-printer 'bad-date-format-string + format-string) + (begin + (formatter date #\Space port) + (tm:date-printer date (+ index 3) + format-string str-len port)))))) + (else + (let ( (formatter (tm:get-formatter + (string-ref format-string + (+ index 1)))) ) + (if (not formatter) + (tm:time-error 'tm:date-printer 'bad-date-format-string + format-string) + (begin + (formatter date #\0 port) + (tm:date-printer date (+ index 2) + format-string str-len port)))))))))))) + + +(define (date->string date . format-string) + (let ( (str-port (open-output-string)) + (fmt-str (:optional format-string "~c")) ) + (tm:date-printer date 0 fmt-str (string-length fmt-str) str-port) + (get-output-string str-port))) + +(defining "STRING->DATE") + +(define (tm:char->int ch) + (cond + ((char=? ch #\0) 0) + ((char=? ch #\1) 1) + ((char=? ch #\2) 2) + ((char=? ch #\3) 3) + ((char=? ch #\4) 4) + ((char=? ch #\5) 5) + ((char=? ch #\6) 6) + ((char=? ch #\7) 7) + ((char=? ch #\8) 8) + ((char=? ch #\9) 9) + (else (tm:time-error 'bad-date-template-string + (list "Non-integer character" ch i))))) + +;; read an integer upto n characters long on port; upto -> #f if any length +(define (tm:integer-reader upto port) + (define (accum-int port accum nchars) + (let ((ch (peek-char port))) + (if (or (eof-object? ch) + (not (char-numeric? ch)) + (and upto (>= nchars upto ))) + accum + (accum-int port (+ (* accum 10) (tm:char->int (read-char + port))) (+ + nchars 1))))) + (accum-int port 0 0)) + +(define (tm:make-integer-reader upto) + (lambda (port) + (tm:integer-reader upto port))) + +;; read *exactly* n characters and convert to integer; could be padded +(define (tm:integer-reader-exact n port) + (let ( (padding-ok #t) ) + (define (accum-int port accum nchars) + (let ((ch (peek-char port))) + (cond + ((>= nchars n) accum) + ((eof-object? ch) + (tm:time-error 'string->date 'bad-date-template-string + "Premature ending to integer read.")) + ((char-numeric? ch) + (set! padding-ok #f) + (accum-int port (+ (* accum 10) (tm:char->int (read-char + port))) + (+ nchars 1))) + (padding-ok + (read-ch port) ; consume padding + (accum-int prot accum (+ nchars 1))) + (else ; padding where it shouldn't be + (tm:time-error 'string->date 'bad-date-template-string + "Non-numeric characters in integer read."))))) + (accum-int port 0 0))) + + +(define (tm:make-integer-exact-reader n) + (lambda (port) + (tm:integer-reader-exact n port))) + +(define (tm:zone-reader port) + (let ( (offset 0) + (positive? #f) ) + (let ( (ch (read-char port)) ) + (if (eof-object? ch) + (tm:time-error 'string->date 'bad-date-template-string + (list "Invalid time zone +/-" ch))) + (if (or (char=? ch #\Z) (char=? ch #\z)) + 0 + (begin + (cond + ((char=? ch #\+) (set! positive? #t)) + ((char=? ch #\-) (set! positive? #f)) + (else + (tm:time-error 'string->date 'bad-date-template-string + (list "Invalid time zone +/-" ch)))) + (let ((ch (read-char port))) + (if (eof-object? ch) + (tm:time-error 'string->date 'bad-date-template-string + (list "Invalid time zone number" ch))) + (set! offset (* (tm:char->int ch) + 10 60 60))) + (let ((ch (read-char port))) + (if (eof-object? ch) + (tm:time-error 'string->date 'bad-date-template-string + (list "Invalid time zone number" ch))) + (set! offset (+ offset (* (tm:char->int ch) + 60 60)))) + (let ((ch (read-char port))) + (if (eof-object? ch) + (tm:time-error 'string->date 'bad-date-template-string + (list "Invalid time zone number" ch))) + (set! offset (+ offset (* (tm:char->int ch) + 10 60)))) + (let ((ch (read-char port))) + (if (eof-object? ch) + (tm:time-error 'string->date 'bad-date-template-string + (list "Invalid time zone number" ch))) + (set! offset (+ offset (* (tm:char->int ch) + 60)))) + (if positive? offset (- offset))))))) + +;; looking at a char, read the char string, run thru indexer, return index +(define (tm:locale-reader port indexer) + (let ( (string-port (open-output-string)) ) + (define (read-char-string) + (let ((ch (peek-char port))) + (if (char-alphabetic? ch) + (begin (write-char (read-char port) string-port) + (read-char-string)) + (get-output-string string-port)))) + (let* ( (str (read-char-string)) + (index (indexer str)) ) + (if index index (tm:time-error 'string->date + 'bad-date-template-string + (list "Invalid string for " indexer)))))) + +(define (tm:make-locale-reader indexer) + (lambda (port) + (tm:locale-reader port indexer))) + +(define (tm:make-char-id-reader char) + (lambda (port) + (if (char=? char (read-char port)) + char + (tm:time-error 'string->date + 'bad-date-template-string + "Invalid character match.")))) + +;; A List of formatted read directives. +;; Each entry is a list. +;; 1. the character directive; +;; a procedure, which takes a character as input & returns +;; 2. #t as soon as a character on the input port is acceptable +;; for input, +;; 3. a port reader procedure that knows how to read the current port +;; for a value. Its one parameter is the port. +;; 4. a action procedure, that takes the value (from 3.) and some +;; object (here, always the date) and (probably) side-effects it. +;; In some cases (e.g., ~A) the action is to do nothing + +(define tm:read-directives + (let ( (ireader4 (tm:make-integer-reader 4)) + (ireader2 (tm:make-integer-reader 2)) + (ireaderf (tm:make-integer-reader #f)) + (eireader2 (tm:make-integer-exact-reader 2)) + (eireader4 (tm:make-integer-exact-reader 4)) + (locale-reader-abbr-weekday (tm:make-locale-reader + tm:locale-abbr-weekday->index)) + (locale-reader-long-weekday (tm:make-locale-reader + tm:locale-long-weekday->index)) + (locale-reader-abbr-month (tm:make-locale-reader + tm:locale-abbr-month->index)) + (locale-reader-long-month (tm:make-locale-reader + tm:locale-long-month->index)) + (char-fail (lambda (ch) #t)) + (do-nothing (lambda (val object) (values))) + ) + + (list + (list #\~ char-fail (tm:make-char-id-reader #\~) do-nothing) + (list #\a char-alphabetic? locale-reader-abbr-weekday do-nothing) + (list #\A char-alphabetic? locale-reader-long-weekday do-nothing) + (list #\b char-alphabetic? locale-reader-abbr-month + (lambda (val object) + (tm:set-date-month! object val))) + (list #\B char-alphabetic? locale-reader-long-month + (lambda (val object) + (tm:set-date-month! object val))) + (list #\d char-numeric? ireader2 (lambda (val object) + (tm:set-date-day! + object val))) + (list #\e char-fail eireader2 (lambda (val object) + (tm:set-date-day! object val))) + (list #\h char-alphabetic? locale-reader-abbr-month + (lambda (val object) + (tm:set-date-month! object val))) + (list #\H char-numeric? ireader2 (lambda (val object) + (tm:set-date-hour! object val))) + (list #\k char-fail eireader2 (lambda (val object) + (tm:set-date-hour! object val))) + (list #\m char-numeric? ireader2 (lambda (val object) + (tm:set-date-month! object val))) + (list #\M char-numeric? ireader2 (lambda (val object) + (tm:set-date-minute! + object val))) + (list #\S char-numeric? ireader2 (lambda (val object) + (tm:set-date-second! object val))) + (list #\y char-fail eireader2 + (lambda (val object) + (tm:set-date-year! object (tm:natural-year val)))) + (list #\Y char-numeric? ireader4 (lambda (val object) + (tm:set-date-year! object val))) + (list #\z (lambda (c) + (or (char=? c #\Z) + (char=? c #\z) + (char=? c #\+) + (char=? c #\-))) + tm:zone-reader (lambda (val object) + (tm:set-date-zone-offset! object val))) + ))) + +(define (tm:string->date date index format-string str-len port template-string) + (define (skip-until port skipper) + (let ((ch (peek-char port))) + (if (eof-object? port) + (tm:time-error 'string->date 'bad-date-format-string template-string) + (if (not (skipper ch)) + (begin (read-char port) (skip-until port skipper)))))) + (if (>= index str-len) + (begin + (values)) + (let ( (current-char (string-ref format-string index)) ) + (if (not (char=? current-char #\~)) + (let ((port-char (read-char port))) + (if (or (eof-object? port-char) + (not (char=? current-char port-char))) + (tm:time-error 'string->date 'bad-date-format-string template-string)) + (tm:string->date date (+ index 1) format-string str-len port template-string)) + ;; otherwise, it's an escape, we hope + (if (> (+ index 1) str-len) + (tm:time-error 'string->date 'bad-date-format-string template-string) + (let* ( (format-char (string-ref format-string (+ index 1))) + (format-info (assoc format-char tm:read-directives)) ) + (if (not format-info) + (tm:time-error 'string->date 'bad-date-format-string template-string) + (begin + (let ((skipper (cadr format-info)) + (reader (caddr format-info)) + (actor (cadddr format-info))) + (skip-until port skipper) + (let ((val (reader port))) + (if (eof-object? val) + (tm:time-error 'string->date 'bad-date-format-string template-string) + (actor val date))) + (tm:string->date date (+ index 2) format-string str-len port template-string)))))))))) + +(define (string->date input-string template-string) + (define (tm:date-ok? date) + (and (date-nanosecond date) + (date-second date) + (date-minute date) + (date-hour date) + (date-day date) + (date-month date) + (date-year date) + (date-zone-offset date))) + (let ( (newdate (make-date 0 0 0 0 #f #f #f (tm:local-tz-offset))) ) + (tm:string->date newdate + 0 + template-string + (string-length template-string) + (open-input-string input-string) + template-string) + (if (tm:date-ok? newdate) + newdate + (tm:time-error 'string->date 'bad-date-format-string (list "Incomplete date read. " newdate template-string))))) + +(defining "DATE is all done.") + \ No newline at end of file diff --git a/scheme/srfi/srfi-2.scm b/scheme/srfi/srfi-2.scm new file mode 100644 index 0000000..b01cc88 --- /dev/null +++ b/scheme/srfi/srfi-2.scm @@ -0,0 +1,102 @@ +; Copyright (c) 1993-2001 by Richard Kelsey and Jonathan Rees. See file COPYING. + +; The reference implementation is written in some weird Scheme variant. +; This is an attempt to produce the same result using SYNTAX-RULES. + +; I found the both the specification and the implementation unhelpful. +; For example, one would think that (AND-LET* ()) -> #T by analogy with +; (AND) -> #T. The specification doesn't say. +; +; The following behaves correctly on the test cases at the end of the +; reference implementation, except that it doesn't catch the three syntax +; errors. There is no way for SYNTAX-RULES to distinguish between a +; constant and a variable, and no easy way to check if a variable is +; being used twice in the same AND-LET* (and why is that an error? LET* +; allows it). + +(define-syntax and-let* + (syntax-rules () + + ; No body - behave like AND. + ((and-let* ()) + #t) + ((and-let* ((var exp))) + exp) + ((and-let* ((exp))) + exp) + ((and-let* (var)) + var) + + ; Have body - behave like LET* but check for #F values. + + ; No clauses so just use the body. + ((and-let* () . body) + (begin . body)) + + ; (VAR VAL) clause - bind the variable and check for #F. + ((and-let* ((var val) more ...) . body) + (let ((var val)) + (if var + (and-let* (more ...) . body) + #f))) + + ; Error check to catch illegal (A B ...) clauses. + ((and-let* ((exp junk . more-junk) more ...) . body) + (error "syntax error" + '(and-let* ((exp junk . more-junk) more ...) . body))) + + ; (EXP) and VAR - just check the value for #F. + ; There is no way for us to check that VAR is an identifier and not a + ; constant + ((and-let* ((exp) more ...) . body) + (if exp + (and-let* (more ...) . body) + #f)) + ((and-let* (var more ...) . body) + (if var + (and-let* (more ...) . body) + #f)))) + +;(define-syntax expect +; (syntax-rules () +; ((expect a b) +; (if (not (equal? a b)) +; (error "test failed" 'a b))))) +; +;(expect (and-let* () 1) 1) +;(expect (and-let* () 1 2) 2) +;(expect (and-let* () ) #t) +; +;(expect (let ((x #f)) (and-let* (x))) #f) +;(expect (let ((x 1)) (and-let* (x))) 1) +;(expect (and-let* ((x #f)) ) #f) +;(expect (and-let* ((x 1)) ) 1) +;;(must-be-a-syntax-error (and-let* ( #f (x 1))) ) +;(expect (and-let* ( (#f) (x 1)) ) #f) +;;(must-be-a-syntax-error (and-let* (2 (x 1))) ) +;(expect (and-let* ( (2) (x 1)) ) 1) +;(expect (and-let* ( (x 1) (2)) ) 2) +;(expect (let ((x #f)) (and-let* (x) x)) #f) +;(expect (let ((x "")) (and-let* (x) x)) "") +;(expect (let ((x "")) (and-let* (x) )) "") +;(expect (let ((x 1)) (and-let* (x) (+ x 1))) 2) +;(expect (let ((x #f)) (and-let* (x) (+ x 1))) #f) +;(expect (let ((x 1)) (and-let* (((positive? x))) (+ x 1))) 2) +;(expect (let ((x 1)) (and-let* (((positive? x))) )) #t) +;(expect (let ((x 0)) (and-let* (((positive? x))) (+ x 1))) #f) +;(expect (let ((x 1)) (and-let* (((positive? x)) (x (+ x 1))) (+ x 1))) 3) +;;(must-be-a-syntax-error +;; (let ((x 1)) (and-let* (((positive? x)) (x (+ x 1)) (x (+ x 1))) (+ x 1))) +;;) +; +;(expect (let ((x 1)) (and-let* (x ((positive? x))) (+ x 1))) 2) +;(expect (let ((x 1)) (and-let* ( ((begin x)) ((positive? x))) (+ x 1))) 2) +;(expect (let ((x 0)) (and-let* (x ((positive? x))) (+ x 1))) #f) +;(expect (let ((x #f)) (and-let* (x ((positive? x))) (+ x 1))) #f) +;(expect (let ((x #f)) (and-let* ( ((begin x)) ((positive? x))) (+ x 1))) #f) +; +;(expect (let ((x 1)) (and-let* (x (y (- x 1)) ((positive? y))) (/ x y))) #f) +;(expect (let ((x 0)) (and-let* (x (y (- x 1)) ((positive? y))) (/ x y))) #f) +;(expect (let ((x #f)) (and-let* (x (y (- x 1)) ((positive? y))) (/ x y))) #f) +;(expect (let ((x 3)) (and-let* (x (y (- x 1)) ((positive? y))) (/ x y))) 3/2) + diff --git a/scheme/srfi/srfi-5.scm b/scheme/srfi/srfi-5.scm new file mode 100644 index 0000000..65fa81e --- /dev/null +++ b/scheme/srfi/srfi-5.scm @@ -0,0 +1,68 @@ +; Copyright (c) 1993-2001 by Richard Kelsey and Jonathan Rees. See file COPYING. + +; Rewritten, simplified, and corrected from the SRFI document. +; +; The SRFI implementation gets the scoping wrong for the name. It is visible +; to the arguments and should not be. + +(define-syntax let + (syntax-rules () + + ; If no name we go straight to the standard LET. + ((let () body ...) + (standard-let () body ...)) + ((let ((variable value) bindings ...) body ...) + (standard-let ((variable value) bindings ...) body ...)) + + ;; Signature-style and standard named LET. + ((let (name bindings ...) body ...) + (let-loop name (bindings ...) () () (body ...))) + ((let name bindings body ...) + (let-loop name bindings () () (body ...))))) + +; A loop to walk down the list of bindings. + +(define-syntax let-loop + (syntax-rules () + + ; No more bindings - make a LETREC. + ((let-loop name () (vars ...) (vals ...) body) + ((letrec ((name (lambda (vars ...) . body))) + name) + vals ...)) + + ; Process a (var val) pair. + ((let-loop name ((var val) more ...) (vars ...) (vals ...) body) + (let-loop name (more ...) (vars ... var) (vals ... val) body)) + + ; End with a rest variable - make a LETREC. + ((let-loop name (rest-var rest-vals ...) (vars ...) (vals ...) body) + ((letrec ((name (lambda (vars ... . rest-var) . body))) + name) + vals ... rest-vals ...)))) + +; Four loops - normal and `signature-style', each with and without a rest +; binding. +; +;(let fibonacci ((n 10) (i 0) (f0 0) (f1 1)) +; (if (= i n) +; f0 +; (fibonacci n (+ i 1) f1 (+ f0 f1)))) +; +;(let (fibonacci (n 10) (i 0) (f0 0) (f1 1)) +; (if (= i n) +; f0 +; (fibonacci n (+ i 1) f1 (+ f0 f1)))) +; +;(let fibonacci ((n 10) (i 0) . (f 0 1)) +; (if (= i n) +; (car f) +; (fibonacci n (+ i 1) (cadr f) (+ (car f) (cadr f))))) +; +;(let (fibonacci (n 10) (i 0) . (f 0 1)) +; (if (= i n) +; (car f) +; (fibonacci n (+ i 1) (cadr f) (+ (car f) (cadr f))))) + + + diff --git a/scheme/srfi/srfi-7.scm b/scheme/srfi/srfi-7.scm new file mode 100644 index 0000000..f01935c --- /dev/null +++ b/scheme/srfi/srfi-7.scm @@ -0,0 +1,232 @@ +; Copyright (c) 1993-2001 by Richard Kelsey and Jonathan Rees. See file COPYING. + + +; A command for loading SRFI-7 programs. This is a user command because +; it gets loaded after the command processor is built. + +(define-user-command-syntax 'load-srfi-7-program " " + "load an SRFI-7 program" + '(name filename)) + +; We create a structure, EVAL it in the configuration package, give +; it a name, and then load it. + +(define (load-srfi-7-program name filename) + (eval `(define ,name ,(read-srfi-7-program filename)) + (config-package)) + (let ((structure (eval name (config-package)))) + (note-structure-name! structure name) + (ensure-loaded structure))) + +; Add the LOAD-SRFI-7-PROGRAM to the user's command environment. + +(environment-define! (user-command-environment) + 'load-srfi-7-program + load-srfi-7-program) + +; Read a program from FILENAME and return the code for a structure that +; contains it. + +(define (read-srfi-7-program filename) + (call-with-input-file filename + (lambda (in) + (let ((program (read in))) + (if (and (pair? program) + (eq? (car program) 'program)) + (receive (needed source) + (parse-program program available-srfis) + (if needed + (program->structure-exp needed source) + (error "cannot satisfy program's requirements"))) + (error "program not found in file" filename)))))) + +; Returns a STRUCTURE expression for a program that uses the SRFIs listed +; in NEEDED and whose source is SOURCE. + +(define (program->structure-exp needed source) + (let ((shadowed (find-shadowed needed))) + `(structure (export) + (open ,(if (null? shadowed) + 'scheme + `(modify scheme (hide . ,shadowed))) + . ,needed) + . ,(map (lambda (source) + (if (eq? (car source) + 'code) + (cons 'begin (cdr source)) + source)) + source)))) + +; Returns a list of the names that SRFIS redefine from Scheme. + +(define (find-shadowed srfis) + (apply append (map (lambda (srfi) + (cond ((assq srfi shadowed) + => cdr) + (else + '()))) + srfis))) + +;---------------- +; Parsing a program to find the source that we will use for it. +; +; The arguments are a PROGRAM form and a list of the names of available SRFIs. +; Two values are returned: a list of needed SRFIs and the program source as +; a list of (files ...) and (code ...) clauses. +; +; This searches through the possible sets of SRFIs to find one that works for +; the program. The EITHER macro is used to try choices. There are only two +; places where choices occur: FEATURE-COND clauses and OR predicates (which +; includes (NOT (AND ...)) predicates). + +(define (parse-program program available) + (receive (needed available source) + (with-nondeterminism + (lambda () + (either (process-clauses (cdr program) '() available) + (values #f #f #f)))) + (if needed + (values needed (reverse source)) + (values #f #f)))) + +; NEEDED is a list of SRFIs that we already know we need. AVAILABLE is a list +; of other SRFIs that can be used. This returns new needed and available lists +; as well as a list of (files ...) and (code ...) clauses. +; +; This is a simple dispatch on the types of clauses. For REQUIRES and +; FEATURE-COND we call another procedure to check the requirements list +; or cond clauses. + +(define (process-clauses clauses needed available) + (let loop ((clauses clauses) + (needed needed) + (available available) + (source '())) + (if (null? clauses) + (values needed available source) + (let ((clause (car clauses))) + (case (car clause) + ((requires) + (receive (needed available) + (check-predicate `(and . ,(cdr clause)) #f needed available) + (loop (cdr clauses) + needed + available + source))) + ((feature-cond) + (receive (needed available more-source) + (process-cond-clauses (cdr clause) needed available) + (loop (cdr clauses) + needed + available + (append more-source source)))) + ((code files) + (loop (cdr clauses) + needed + available + (cons clause source))) + (else + (error "bad program clause" clause))))))) + + +; Loop down CLAUSES looking for one whose predicate can be satisfied. +; If we find one we process its clauses. EITHER is used to allow us +; to backtrack in case of failure. + +(define (process-cond-clauses clauses needed available) + (let loop ((clauses clauses)) + (if (null? clauses) + (fail) + (either (receive (needed available) + (check-predicate (caar clauses) #f needed available) + (process-clauses (cdar clauses) needed available)) + (loop (cdr clauses)))))) + +; REQUIREMENT is one of: +; (and ...) +; (or ...) +; (not ) +; +; NEGATE? is true if we really want the negation of REQUIREMENT. +; +; AND and OR have their own procedures, which get flipped by negation. +; NOT just flips NEGATE?. There are two separate procedures for positive +; and negative occurances of . + +(define (check-predicate requirement negate? needed available) + (cond ((pair? requirement) + (case (car requirement) + ((and) + ((if negate? any-satisfied all-satisfied) + (cdr requirement) negate? needed available)) + ((or) + ((if negate? all-satisfied any-satisfied) + (cdr requirement) negate? needed available)) + ((not) + (check-predicate (cadr requirement) + (not negate?) + needed + available)))) + (negate? + (do-not-want requirement needed available)) + (else + (want requirement needed available)))) + +; We want SRFI. If it is NEEDED we are fine. If it is in AVAILABLE we +; move it to needed. Otherwise we lose. + +(define (want srfi needed available) + (cond ((memq srfi needed) + (values needed available)) + ((memq srfi available) + (values (cons srfi needed) + (delq srfi available))) ; not really necessary + (else + (fail)))) + +; We do not want SRFI. If it is NEEDED we lose. If it is in AVAILABLE we +; get rid of it. Otherwise we win as-is. + +(define (do-not-want srfi needed available) + (cond ((memq srfi needed) + (fail)) + ((memq srfi available) + (values needed + (delq srfi available))) + (else + (values needed available)))) + +; Two loops for `and' and `or'. The `and' loop needs to update NEEDED +; and AVAILABLE as it goes, the `or' keeps reusing the originals. + +(define (all-satisfied list negate? needed available) + (let loop ((list list) (needed needed) (available available)) + (if (null? list) + (values needed available) + (receive (needed available) + (check-predicate (car list) negate? needed available) + (if needed + (loop (cdr list) needed available) + (fail)))))) + +; Again, we use EITHER to allow for backtracking. + +(define (any-satisfied list negate? needed available) + (let loop ((list list)) + (if (null? list) + (fail) + (either (check-predicate (car list) negate? needed available) + (loop (cdr list)))))) + +;---------------- +; Our own copy to avoid having to load BIG-UTIL to get the original. + +(define (delq thing list) + (cond ((null? list) + '()) + ((eq? thing (car list)) + (cdr list)) + (else + (cons (car list) + (delq thing (cdr list)))))) +