From 58f90e23590bdd9d5afadcbf4a2a1d337a2036b3 Mon Sep 17 00:00:00 2001 From: marting Date: Thu, 23 Sep 1999 15:24:25 +0000 Subject: [PATCH] added from 0.5.2 --- scsh/lib/ccp-pack.scm | 106 ++ scsh/lib/ccp.scm | 576 +++++++ scsh/lib/list-lib.scm | 1508 +++++++++++++++++++ scsh/lib/list-pack.scm | 235 +++ scsh/lib/srfi-1.html | 3056 ++++++++++++++++++++++++++++++++++++++ scsh/lib/srfi-1.txt | 1912 ++++++++++++++++++++++++ scsh/lib/string-lib.scm | 1384 +++++++++++++++++ scsh/lib/string-pack.scm | 315 ++++ scsh/lib/strings.txt | 578 +++++++ 9 files changed, 9670 insertions(+) create mode 100644 scsh/lib/ccp-pack.scm create mode 100644 scsh/lib/ccp.scm create mode 100644 scsh/lib/list-lib.scm create mode 100644 scsh/lib/list-pack.scm create mode 100644 scsh/lib/srfi-1.html create mode 100644 scsh/lib/srfi-1.txt create mode 100644 scsh/lib/string-lib.scm create mode 100644 scsh/lib/string-pack.scm create mode 100644 scsh/lib/strings.txt diff --git a/scsh/lib/ccp-pack.scm b/scsh/lib/ccp-pack.scm new file mode 100644 index 0000000..b5095a1 --- /dev/null +++ b/scsh/lib/ccp-pack.scm @@ -0,0 +1,106 @@ +;;; CPP Lib +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Character->Character Partial functions + +;;; Many of these types are pretty weak, but there is no way to +;;; specify that a parameter must be a particular record type. +;;; Every little bit helps, though. + +(define-interface ccp-lib-interface + (export + ;; ccp? x -> boolean + (ccp? (proc (:value) :boolean)) + + ;; ccp-domain ccp -> char-set + (ccp-domain (proc (:value) :value)) ; Not very informative. + + ;; ccp-copy ccp -> ccp + (ccp-copy (proc (:value) :value)) + + ;; ccp= ccp1 ccp2 ... + ;; ccp<= ccp1 ccp2 ... + ((ccp= ccp<=) (proc (&rest :value) :boolean)) ; Not very informative. + + ;; ccp-fold kons knil ccp -> value + (ccp-fold (proc ((proc (:char :char :value) :value) :value :value) :value)) + + ;; ccp-for-each proc ccp + (ccp-for-each (proc ((proc (:char :char) :values)) :unspecific)) + + ;; ccp->alist ccp -> alist + (ccp->alist (proc (:value) :value)) + + ;; ccp-restrict ccp cset -> ccp + ;; ccp-restrict! ccp cset -> ccp + ((ccp-restrict ccp-restrict!) (proc (:value :value) :value)) + + ;; ccp-adjoin ccp from-char1 to-char1 ... -> ccp + ;; ccp-adjoin! ccp from-char1 to-char1 ... -> ccp + ;; ccp-delete ccp from-char1 ... -> ccp + ;; ccp-delete! ccp from-char1 ... -> ccp + ((ccp-adjoin ccp-adjoin!) (proc (:value &rest :char) :value)) + ((ccp-delete ccp-delete!) (proc (:value &rest :char) :value)) + + ;; ccp-extend ccp1 ... -> ccp + ;; ccp-extend! ccp1 ... -> ccp + ((ccp-extend ccp-extend!) (proc (&rest :value) :value)) + + ;; ccp-compose ccp1 ... -> ccp + (ccp-compose (proc (&rest :value) :value)) + + ;; alist->ccp char/char-alist [ccp] -> ccp + ;; alist->ccp! char/char-alist [ccp] -> ccp + ((alist->ccp alist->ccp!) (proc (:value &opt :value) :value)) + + ;; proc->ccp proc [domain ccp] -> ccp + ;; proc->ccp! proc [domain ccp] -> ccp + ((proc->ccp proc->ccp!) (proc ((proc (:char) :char) &opt :value :value) + :value)) + + ;; constant-ccp char [domain ccp] -> ccp + ;; constant-ccp! char domain ccp -> ccp + ((constant-ccp constant-ccp!) (proc (:char &opt :value :value) :value)) + + ;; ccp/mappings from1 to1 ... -> ccp + ;; extend-ccp/mappings ccp from1 to1 ... -> ccp + ;; extend-ccp/mappings! ccp from1 to1 ... -> ccp + (ccp/mappings (proc (&rest :value) :value)) + ((extend-ccp/mappings extend-ccp/mappings!) + (proc (:value &rest :value) :value)) + + ;; construct-ccp ccp elt ... -> ccp + ;; construct-ccp! ccp elt ... -> ccp + ((construct-ccp construct-ccp!) (proc (:value &rest :value) :value)) + + ;; ccp-unfold p f g seed -> ccp + (ccp-unfold (proc ((proc (:value) :boolean) + (procedure :value (some-values :char :char)) + (proc (:value) :value) + :value) + :value)) + + ;; tr ccp string [start end] -> string + ;; ccp-map ccp string [start end] -> string + ;; ccp-map! ccp string [start end] + ;; ccp-app ccp char -> char or false + ((tr ccp-map) + (proc (:value :string &opt :exact-integer :exact-integer) :string)) + (ccp-map! (proc (:value :string &opt :exact-integer :exact-integer) :unspecific)) + (ccp-app (proc (:value :char) :value)) + + ;; Primitive CCP's. + ccp:0 ccp:1 ccp:upcase ccp:downcase + )) + +(define-structure ccp-lib ccp-lib-interface + (open char-set-package + ascii + defrec-package + string-lib + let-opt + receiving + list-lib ; EVERY + error-package + scheme) + (files ccp) + (optimize auto-integrate)) diff --git a/scsh/lib/ccp.scm b/scsh/lib/ccp.scm new file mode 100644 index 0000000..16dcece --- /dev/null +++ b/scsh/lib/ccp.scm @@ -0,0 +1,576 @@ +;;; Char->char partial maps -*- Scheme -*- +;;; Copyright (C) 1998 by Olin Shivers. + +;;; CCPs are an efficient data structure for doing simple string transforms, +;;; similar to the kinds of things you would do with the tr(1) program. +;;; +;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit +;;; character types would need a more sophisticated data structure, tuned +;;; for sparseness. I would suggest something like this: +;;; (define-record ccp +;;; domain ; The domain char-set +;;; map ; Sorted vector of (char . string) pairs +;;; ; specifying the map. +;;; id?) ; If true, mappings not specified by MAP are +;;; ; identity mapping. If false, MAP must +;;; ; specify a mapping for every char in DOMAIN. +;;; +;;; A (char . string) elements in MAP specifies a mapping for the contiguous +;;; sequence of L chars beginning with CHAR (in the sequence of the underlying +;;; char type representation), where L is the length of STRING. These MAP elements +;;; are sorted by CHAR, so that binary search can be used to get from an input +;;; character C to the right MAP element quickly. +;;; +;;; This representation should be reasonably compact for standard mappings on, +;;; say, a Unicode CCP. An implementation might wish to have a cache field +;;; in the record for storing the full 8kb bitset when performing ccp-map +;;; operations. Or, an implementation might want to store the Latin-1 subset +;;; of the map in a dense format, and keep the remainder in a sparse format. + +(define num-chars (char-set-size char-set:full)) ; AKA 256. + +(define-record ccp + domain ; The domain char-set + dshared? ; Is the domain value shared or linear? + map ; 256-elt string + mshared?) ; Is the map string shared or linear? + + +;;; Accessors and setters that manage the linear bookkeeping +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (ccp-domain ccp) + (set-ccp:dshared? ccp #t) + (ccp:domain ccp)) + +;;; CCP is a linear ccp. PROC is a domain->domain function; it must be +;;; linear in its parameter and result. +;;; +;;; Updates the domain of the CCP with PROC, returns the resulting +;;; CCP; reuses the old one to construct the new one. + +(define (restrict-linear-ccp-domain ccp proc) + (let ((new-d (proc (if (ccp:dshared? ccp) + (begin (set-ccp:dshared? ccp #f) + (char-set-copy (ccp:domain ccp))) + (ccp:domain ccp))))) + (set-ccp:domain ccp new-d) + ccp)) + +;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function. +;;; It is passed a linear domain and cmap string. It may side-effect +;;; the cmap string, and returns the resulting updated domain. +;;; We return the resulting CCP, reusing the parameter to construct it. + +(define (linear-update-ccp ccp proc) + (let* ((cmap (if (ccp:mshared? ccp) + (begin (set-ccp:mshared? ccp #f) + (string-copy (ccp:map ccp))) + (ccp:map ccp))) + + (new-d (proc (if (ccp:dshared? ccp) + (begin (set-ccp:dshared? ccp #f) + (char-set-copy (ccp:domain ccp))) + (ccp:domain ccp)) + cmap))) + (set-ccp:domain ccp new-d) + ccp)) + + + +;;; Return CCP's map field, and mark it as shared. CCP functions that +;;; restrict a ccp's domain share map strings, so they use this guy. +(define (ccp:map/shared ccp) + (set-ccp:mshared? ccp #t) + (ccp:map ccp)) + +(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f + (string-copy (ccp:map ccp)) #f)) + +;;; N-ary equality relation for partial maps + +(define (ccp= ccp1 . rest) + (let ((domain (ccp:domain ccp1)) + (cmap (ccp:map ccp1))) + (every (lambda (ccp2) + (and (char-set= domain (ccp:domain ccp2)) + (let ((cmap2 (ccp:map ccp2))) + (char-set-every? (lambda (c) + (let ((i (char->ascii c))) + (char=? (string-ref cmap i) + (string-ref cmap2 i)))) + domain)))) + rest))) + + +;;; N-ary subset relation for partial maps + +(define (ccp<= ccp1 . rest) + (let lp ((domain1 (ccp:domain ccp1)) + (cmap1 (ccp:map ccp1)) + (rest rest)) + (or (not (pair? rest)) + (let* ((ccp2 (car rest)) + (domain2 (ccp:domain ccp2)) + (cmap2 (ccp:map ccp2)) + (rest (cdr rest))) + (and (char-set<= domain1 domain2) + (let ((cmap2 (ccp:map ccp2))) + (char-set-every? (lambda (c) + (let ((i (char->ascii c))) + (char=? (string-ref cmap1 i) + (string-ref cmap2 i)))) + domain1)) + (lp domain2 cmap2 rest)))))) + + +;;; CCP iterators +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (ccp-fold kons knil ccp) + (let ((cmap (ccp:map ccp))) + (char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v)) + knil + (ccp:domain ccp)))) + +(define (ccp-for-each proc ccp) + (let ((cmap (ccp:map ccp))) + (char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c)))) + (ccp:domain ccp)))) + +(define (ccp->alist ccp) + (ccp-fold (lambda (from to alist) (cons (cons from to) alist)) + '() + ccp)) + + +;;; CCP-RESTRICT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Restrict a ccp's domain. + +(define (ccp-restrict ccp cset) + (make-ccp (char-set-intersection cset (ccp:domain ccp)) + #f + (ccp:map/shared ccp) + #t)) + +(define (ccp-restrict! ccp cset) + (restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset)))) + + +;;; CCP-ADJOIN ccp from-char1 to-char1 ... +;;; CCP-DELETE ccp char1 ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Add & delete mappings to/from a ccp. + +(define (ccp-delete ccp . chars) + (make-ccp (apply char-set-delete (ccp:domain ccp) chars) + #f + (ccp:map/shared ccp) + #t)) + +(define (ccp-delete! ccp . chars) + (restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars)))) + + +(define (ccp-adjoin ccp . chars) + (let ((cmap (string-copy (ccp:map ccp)))) + (make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars) + #f + cmap + #f))) + +(define (ccp-adjoin! ccp . chars) + (linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars)))) + +(define (install-ccp-adjoin! cmap domain chars) + (let lp ((chars chars) (d domain)) + (if (pair? chars) + (let ((from (car chars)) + (to (cadr chars)) + (chars (cddr chars))) + (string-set! cmap (char->ascii from) to) + (lp chars (char-set-adjoin! d from))) + d))) + + +;;; CCP-EXTEND ccp1 ... +;;; CCP-EXTEND! ccp1 ccp2 ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Extend ccp1 with ccp2, etc. + +(define (ccp-extend . ccps) + (if (pair? ccps) + (let ((ccp0 (car ccps)) + (ccps (cdr ccps))) + (if (pair? ccps) + (let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap. + ;; The FOLD installs each ccp in CCPS into CMAP and produces + ;; the new domain. + (make-ccp (fold (lambda (ccp d) + (install-ccp-extension! cmap d ccp)) + (char-set-copy (ccp:domain ccp0)) + ccps) + #f cmap #f)) + + ccp0)) ; Only 1 parameter + + ccp:0)) ; 0 parameters + +(define (ccp-extend! ccp0 . ccps) + (linear-update-ccp ccp0 + (lambda (domain cmap) + (fold (lambda (ccp d) (install-ccp-extension! cmap d ccp)) + domain + ccps)))) + + +;;; Side-effect CMAP, linear-update and return DOMAIN. +(define (install-ccp-extension! cmap domain ccp) + (let ((cmap1 (ccp:map ccp)) + (domain1 (ccp:domain ccp))) + (char-set-for-each (lambda (c) + (let ((i (char->ascii c))) + (string-set! cmap i (string-ref cmap1 i)))) + domain1) + (char-set-union! domain domain1))) + + +;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; For each character C-IN in the original domain, we push it +;;; through the pipeline of CCPs. If we ever land outside the +;;; domain of a ccp, we punt C-IN. If we push it all the way +;;; through, we add C-IN to our result domain, and add the mapping +;;; into the cmap we are assembling. +;;; +;;; Looping this way avoids building up intermediate temporary +;;; CCPs. If CCP's were small bitsets, we might be better off +;;; slicing the double-nested loops the other way around. + +(define (ccp-compose . ccps) + (cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1 + ((not (pair? (cdr ccps))) (car ccps)) ; 1 arg + (else + (let* ((v (list->vector ccps)) + (vlen-2 (- (vector-length v) 2)) + (cmap (make-string num-chars)) + (d1 (ccp:domain (vector-ref v (+ vlen-2 1)))) + (d (char-set-fold (lambda (c-in d) + (let lp ((c c-in) (i vlen-2)) + (if (>= i 0) + (let ((ccp (vector-ref v i))) + (if (char-set-contains? (ccp:domain ccp) c) + (lp (string-ref (ccp:map ccp) + (char->ascii c)) + (- i 1)) + + ;; Lose: remove c-in from d. + (char-set-delete! d c-in))) + + ;; Win: C-IN -> C + (begin (string-set! cmap + (char->ascii c-in) + c) + d)))) + (char-set-copy d1) + d1))) + (make-ccp d #f cmap #f))))) + + + +;;; ALIST->CPP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (alist->ccp cc-alist . maybe-base-ccp) + (let ((base (:optional maybe-base-ccp ccp:0))) + (if (pair? cc-alist) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-alist! cmap + (char-set-copy (ccp:domain base)) + cc-alist) + #f cmap #f)) + base))) + +(define (alist->ccp! alist base) + (linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist)))) + +;;; Side-effect CMAP, linear-update and return DOMAIN. +(define (install-ccp-alist! cmap domain alist) + (fold (lambda (from/to d) (let ((from (car from/to)) + (to (cdr from/to))) + (string-set! cmap (char->ascii from) to) + (char-set-adjoin! domain from))) + domain + alist)) + + +;;; PROC->CCP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (proc->ccp proc [domain base-ccp]) + +(define (proc->ccp proc . args) + (let-optionals args ((proc-domain char-set:full) + (base ccp:0)) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base)) + proc proc-domain) + #f cmap #f)))) + +(define (proc->ccp! proc proc-domain base) + (linear-update-ccp base + (lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain)))) + +(define (install-ccp-proc! cmap domain proc proc-domain) + (char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c))) + proc-domain) + (char-set-union! domain proc-domain)) + + +;;; CONSTANT-CCP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (constant-ccp char [domain base-ccp]) +;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR. +;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0. + +(define (constant-ccp char . args) + (let-optionals args ((char-domain char-set:full) (base ccp:0)) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base)) + char char-domain) + #f cmap #f)))) + +(define (constant-ccp! char char-domain base) + (linear-update-ccp base + (lambda (d cmap) (install-constant-ccp! cmap d char char-domain)))) + +;;; Install the constant mapping into CMAP0 by side-effect, +;;; linear-update & return DOMAIN0 with the constant-mapping's domain. +(define (install-constant-ccp! cmap0 domain0 char char-domain) + (char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char)) + char-domain) + (char-set-union! domain0 char-domain)) + + +;;; CCP/MAPPINGS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp +;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp +;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp +;;; Each FROM element is either a string or a (lo-char . hi-char) range. +;;; Each TO element is either a string or a lo-char. Strings are replicated +;;; to match the length of the corresponding FROM element. +;;; CCP/MAPPINGS's base CCP is CCP:0 +;;; +;;; Tedious code. + +;;; Internal utility. +;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect. +;;; Return the new domain. + +(define (install-ccp-mapping-pair! cmap domain from to) + ;; Tedium -- four possibilities here: + ;; str->str, str->lo-char, + ;; range->str, range->lo-char. + (if (string? from) + (if (string? to) + ;; "abc" -> "ABC" + (let ((len1 (string-length from)) + (len2 (string-length to))) + (let lp2 ((i (- len1 1)) + (j (modulo (- len2 1) len1)) + (d domain)) + (if (>= i 0) + (let ((c (string-ref from i))) + (string-set! cmap + (char->ascii c) + (string-ref to i)) + (lp2 (- i 1) + (- (if (> j 0) j len2) 1) + (char-set-adjoin! d c))) + d))) + + ;; "abc" -> #\A + (let lp2 ((i (- (string-length from) 1)) + (j (char->ascii to)) + (d domain)) + (if (>= i 0) + (let ((c (string-ref from i))) + (string-set! cmap + (char->ascii c) + (ascii->char j)) + (lp2 (- i 1) + (- j 1) + (char-set-adjoin! d c))) + d))) + + (let ((from-start (char->ascii (car from))) + (from-end (char->ascii (cdr from)))) + (if (string? to) + (let ((len2-1 (- (string-length to) 1))) + ;; (#\a . #\c) -> "ABC" + (let lp2 ((i from-start) (j 0) (d domain)) + (if (<= i from-end) + (let ((c (string-ref to j))) + (string-set! cmap i c) + (lp2 (+ i 1) + (if (= j len2-1) 0 (+ j 1)) + (char-set-adjoin! d c))) + d))) + + ;; (#\a . #\c) -> #\A + (do ((i from-start (+ i 1)) + (j (char->ascii to) (+ j 1)) + (d domain (begin (string-set! cmap i (ascii->char j)) + (char-set-adjoin d (ascii->char i))))) + ((> i from-end) d)))))) + +;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN. +(define (install-mapping-pairs cmap domain args) + (let lp ((domain domain) (args args)) + (if (pair? args) + (lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args)) + (cddr args)) + domain))) + +(define (ccp/mappings . args) + (let ((cmap (make-string num-chars))) + (make-ccp (install-mapping-pairs (make-string num-chars) + (char-set-copy char-set:empty) + args) + #f cmap #f))) + +(define (extend-ccp/mappings base . args) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args) + #f cmap #f))) + +(define (extend-ccp/mappings! base . args) + (linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args)))) + + +;;; CONSTRUCT-CCP! ccp elt ... +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; The kitchen-sink constructor; static typing be damned. +;;; ELTS are interpreted as follows: +;;; (lo-char . hi-char) to-string|lo-char ; ccp/range +;;; from-string to-string|lo-char ; ccp/range +;;; ccp ; ccp-extend +;;; alist ; alist->ccp +;;; domain char ; ccp-constant +;;; domain proc ; proc->ccp + +(define (construct-ccp! ccp . elts) + (linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts)))) + +(define (construct-ccp base . elts) + (let ((cmap (string-copy (ccp:map base)))) + (make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts) + #f cmap #f))) + +;;; Install the mappings into CMAP by side-effect, +;;; linear-update & return DOMAIN with the final domain. + +(define (install-ccp-construct! cmap domain elts) + (let lp ((d domain) (elts elts)) + ;(format #t "d=~s elts=~s\n" d elts) + (if (not (pair? elts)) d + (let ((elt (car elts)) + (elts (cdr elts))) + (cond ((pair? elt) + (cond ((pair? (car elt)) ; ELT is an alist. + (lp (install-ccp-alist! cmap d elt) elts)) + ((char? (car elt)) ; ELT is (lo-char . hi-char) range. + (lp (install-ccp-mapping-pair! cmap d elt (car elts)) + (cdr elts))) + (else (error "Illegal elt to construct-ccp" elt)))) + + ((string? elt) + (lp (install-ccp-mapping-pair! cmap d elt (car elts)) + (cdr elts))) + + ((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts)) + + ((char-set? elt) + (let ((elt2 (car elts)) + (elts (cdr elts))) + (lp (cond ((char? elt2) + (install-constant-ccp! cmap d elt2 elt)) + ((procedure? elt2) + (install-ccp-proc! cmap d elt2 elt)) + (else (error "Illegal elt-pair to construct-ccp" + elt elt2))) + elts))) + + (else (error "Illegal elt to construct-ccp" elt))))))) + + +;;; CCP unfold + +(define (ccp-unfold p f g seed) + (let lp ((seed seed) (ccp (ccp-copy ccp:0))) + (if (p seed) ccp + (lp (g seed) + (receive (from to) (f seed) + (lp (g seed) (ccp-adjoin! ccp from to))))))) + + + +;;; Using CCPs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; TR ccp string [start end] -> string +;;; CCP-MAP ccp string [start end] -> string +;;; CCP-MAP! ccp string [start end] -> undefined +;;; CCP-APP ccp char -> char or false + +;;; If a char in S is not in CCP's domain, it is dropped from the result. +;;; You can use this to map and delete chars from a string. + +(define (tr ccp s . maybe-start+end) + (let-optionals maybe-start+end ((start 0) (end (string-length s))) + ;; Count up the chars in S that are in the domain, + ;; and allocate the answer string ANS: + (let* ((len (- end start)) + (domain (ccp:domain ccp)) + (ans-len (string-fold (lambda (c numchars) + (if (char-set-contains? domain c) + (+ numchars 1) + numchars)) + 0 s start end)) + (ans (make-string ans-len))) + + ;; Apply the map, installing the resulting chars into ANS: + (string-fold (lambda (c i) (cond ((ccp-app ccp c) => + (lambda (c) + (string-set! ans i c) + (+ i 1))) + (else i))) ; Not in domain -- drop it. + 0 s start end) + ans))) + +(define (ccp-map ccp s . maybe-start+end) + (apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end)) + +(define (ccp-map! ccp s . maybe-start+end) + (apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end)) + +(define (ccp-app ccp char) + (and (char-set-contains? (ccp:domain ccp) char) + (string-ref (ccp:map ccp) (char->ascii char)))) + + +;;; Primitive CCPs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define id-cmap + (let ((m (make-string num-chars))) + (do ((i (- num-chars 1) (- i 1))) + ((< i 0)) + (string-set! m i (ascii->char i))) + m)) + +(define ccp:0 (make-ccp char-set:empty #t id-cmap #t)) +(define ccp:1 (make-ccp char-set:full #t id-cmap #t)) + +(define ccp:upcase (proc->ccp char-upcase char-set:full)) +(define ccp:downcase (proc->ccp char-downcase char-set:full)) diff --git a/scsh/lib/list-lib.scm b/scsh/lib/list-lib.scm new file mode 100644 index 0000000..c952b33 --- /dev/null +++ b/scsh/lib/list-lib.scm @@ -0,0 +1,1508 @@ +;;; SRFI-1 list-processing library -*- Scheme -*- +;;; Reference implementation +;;; +;;; Copyright (c) 1998, 1999 by Olin Shivers. You may do as you please with +;;; this code as long as you do not remove this copyright notice or +;;; hold me liable for its use. Please send bug reports to shivers@ai.mit.edu. +;;; -Olin + +;;; This is a library of list- and pair-processing functions. I wrote it after +;;; carefully considering the functions provided by the libraries found in +;;; R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, Common +;;; Lisp, Bigloo, guile, T, APL and the SML standard basis. It is a pretty +;;; rich toolkit, providing a superset of the functionality found in any of +;;; the various Schemes I considered. + +;;; This implementation is intended as a portable reference implementation +;;; for SRFI-1. See the porting notes below for more information. + +;;; Exported: +;;; xcons tree-copy make-list list-tabulate cons* list-copy +;;; proper-list? circular-list? dotted-list? not-pair? null-list? list= +;;; circular-list length+ +;;; iota +;;; first second third fourth fifth sixth seventh eighth ninth tenth +;;; car+cdr +;;; take drop +;;; take-right drop-right +;;; take! drop-right! +;;; last last-pair +;;; zip unzip1 unzip2 unzip3 unzip4 unzip5 +;;; count +;;; append! append-reverse append-reverse! +;;; unfold fold fold-right pair-fold pair-fold-right reduce reduce-right +;;; append-map append-map! map! pair-for-each filter-map map-in-order +;;; filter partition remove +;;; filter! partition! remove! +;;; find find-tail any every list-index +;;; delete delete! +;;; alist-cons alist-copy +;;; delete-duplicates delete-duplicates! +;;; alist-delete alist-delete! +;;; reverse! +;;; lset<= lset= lset-adjoin +;;; lset-union lset-intersection lset-difference lset-xor lset-diff+intersection +;;; lset-union! lset-intersection! lset-difference! lset-xor! lset-diff+intersection! +;;; +;;; In principle, the following R4RS list- and pair-processing procedures +;;; are also part of this package's exports, although they are not defined +;;; in this file: +;;; Primitives: cons pair? null? car cdr set-car! set-cdr! +;;; Non-primitives: list length append reverse cadr ... cddddr list-ref +;;; memq memv assq assv +;;; (The non-primitives are defined in this file, but commented out.) +;;; +;;; These R4RS procedures have extended definitions in SRFI-1 and are defined +;;; in this file: +;;; map for-each member assoc +;;; +;;; The remaining two R4RS list-processing procedures are not included: +;;; list-tail (use drop) +;;; list? (use proper-list?) + + +;;; A note on recursion and iteration/reversal: +;;; Many iterative list-processing algorithms naturally compute the elements +;;; of the answer list in the wrong order (left-to-right or head-to-tail) from +;;; the order needed to cons them into the proper answer (right-to-left, or +;;; tail-then-head). One style or idiom of programming these algorithms, then, +;;; loops, consing up the elements in reverse order, then destructively +;;; reverses the list at the end of the loop. I do not do this. The natural +;;; and efficient way to code these algorithms is recursively. This trades off +;;; intermediate temporary list structure for intermediate temporary stack +;;; structure. In a stack-based system, this improves cache locality and +;;; lightens the load on the GC system. Don't stand on your head to iterate! +;;; Recurse, where natural. Multiple-value returns make this even more +;;; convenient, when the recursion/iteration has multiple state values. + +;;; Porting: +;;; This is carefully tuned code; do not modify casually. +;;; - It is careful to share storage when possible; +;;; - Side-effecting code tries not to perform redundant writes. +;;; That said, a port of this library to a specific Scheme system might wish +;;; to tune this code to exploit particulars of the implementation. In +;;; particular, the n-ary mapping functions are particularly slow and +;;; cons-intensive, and are good candidates for tuning. I have coded fast +;;; paths for the single-list cases, but what you really want to do is exploit +;;; the fact that the compiler usually knows how many arguments are being +;;; passed to a particular application of these functions -- they are usually +;;; explicitly called, not passed around as higher-order values. If you can +;;; arrange to have your compiler produce custom code or custom linkages based +;;; on the number of arguments in the call, you can speed these functions up +;;; a *lot*. But this kind of compiler technology no longer exists in the +;;; Scheme world as far as I can see. +;;; +;;; Note that this code is, of course, dependent upon standard bindings for +;;; the R5RS procedures -- i.e., it assumes that the variable CAR is bound +;;; to the procedure that takes the car of a list. If your Scheme +;;; implementation allows user code to alter the bindings of these procedures +;;; in a manner that would be visible to these definitions, then there might +;;; be trouble. You could consider horrible kludgery along the lines of +;;; (define fact +;;; (let ((= =) (- -) (* *)) +;;; (letrec ((real-fact (lambda (n) +;;; (if (= n 0) 1 (* n (real-fact (- n 1))))))) +;;; real-fact))) +;;; Or you could consider shifting to a reasonable Scheme system that, say, +;;; has a module system protecting code from this kind of lossage. +;;; +;;; This code does a fair amount of run-time argument checking. If your +;;; Scheme system has a sophisticated compiler that can eliminate redundant +;;; error checks, this is no problem. However, if not, these checks incur +;;; some performance overhead -- and, in a safe Scheme implementation, they +;;; are in some sense redundant: if we don't check to see that the PROC +;;; parameter is a procedure, we'll find out anyway three lines later when +;;; we try to call the value. It's pretty easy to rip all this argument +;;; checking code out if it's inappropriate for your implementation -- just +;;; nuke every call to CHECK-ARG. +;;; +;;; On the other hand, if you *do* have a sophisticated compiler that will +;;; actually perform soft-typing and eliminate redundant checks (Rice's systems +;;; being the only possible candidate of which I'm aware), leaving these checks +;;; in can *help*, since their presence can be elided in redundant cases, +;;; and in cases where they are needed, performing the checks early, at +;;; procedure entry, can "lift" a check out of a loop. +;;; +;;; Finally, I have only checked the properties that can portably be checked +;;; with R5RS Scheme -- and this is not complete. You may wish to alter +;;; the CHECK-ARG parameter checks to perform extra, implementation-specific +;;; checks, such as procedure arity for higher-order values. +;;; +;;; The code has only these non-R4RS dependencies: +;;; A few calls to an ERROR procedure; +;;; Uses of the R5RS multiple-value procedure VALUES and the m-v binding +;;; RECEIVE macro (which isn't R5RS, but is a trivial macro). +;;; Many calls to a parameter-checking procedure check-arg: +;;; (define (check-arg pred val caller) +;;; (let lp ((val val)) +;;; (if (pred val) val (lp (error "Bad argument" val pred caller))))) +;;; A few uses of the LET-OPTIONAL and :OPTIONAL macros for parsing +;;; optional arguments. +;;; +;;; Most of these procedures use the NULL-LIST? test to trigger the +;;; base case in the inner loop or recursion. The NULL-LIST? function +;;; is defined to be a careful one -- it raises an error if passed a +;;; non-nil, non-pair value. The spec allows an implementation to use +;;; a less-careful implementation that simply defines NULL-LIST? to +;;; be NOT-PAIR?. This would speed up the inner loops of these procedures +;;; at the expense of having them silently accept dotted lists. + +;;; A note on dotted lists: +;;; I, personally, take the view that the only consistent view of lists +;;; in Scheme is the view that *everything* is a list -- values such as +;;; 3 or "foo" or 'bar are simply empty dotted lists. This is due to the +;;; fact that Scheme actually has no true list type. It has a pair type, +;;; and there is an *interpretation* of the trees built using this type +;;; as lists. +;;; +;;; I lobbied to have these list-processing procedures hew to this +;;; view, and accept any value as a list argument. I was overwhelmingly +;;; overruled during the SRFI discussion phase. So I am inserting this +;;; text in the reference lib and the SRFI spec as a sort of "minority +;;; opinion" dissent. +;;; +;;; Many of the procedures in this library can be trivially redefined +;;; to handle dotted lists, just by changing the NULL-LIST? base-case +;;; check to NOT-PAIR?, meaning that any non-pair value is taken to be +;;; an empty list. For most of these procedures, that's all that is +;;; required. +;;; +;;; However, we have to do a little more work for some procedures that +;;; *produce* lists from other lists. Were we to extend these procedures to +;;; accept dotted lists, we would have to define how they terminate the lists +;;; produced as results when passed a dotted list. I designed a coherent set +;;; of termination rules for these cases; this was posted to the SRFI-1 +;;; discussion list. I additionally wrote an earlier version of this library +;;; that implemented that spec. It has been discarded during later phases of +;;; the definition and implementation of this library. +;;; +;;; The argument *against* defining these procedures to work on dotted +;;; lists is that dotted lists are the rare, odd case, and that by +;;; arranging for the procedures to handle them, we lose error checking +;;; in the cases where a dotted list is passed by accident -- e.g., when +;;; the programmer swaps a two arguments to a list-processing function, +;;; one being a scalar and one being a list. For example, +;;; (member '(1 3 5 7 9) 7) +;;; This would quietly return #f if we extended MEMBER to accept dotted +;;; lists. +;;; +;;; The SRFI discussion record contains more discussion on this topic. + + +;;; Constructors +;;;;;;;;;;;;;;;; + +;;; Occasionally useful as a value to be passed to a fold or other +;;; higher-order procedure. +(define (xcons d a) (cons a d)) + +;;;; Recursively copy every cons. +;(define (tree-copy x) +; (let recur ((x x)) +; (if (not (pair? x)) x +; (cons (recur (car x)) (recur (cdr x)))))) + +;;; Make a list of length LEN. + +(define (make-list len . maybe-elt) + (check-arg (lambda (n) (and (integer? n) (>= n 0))) len make-list) + (let ((elt (cond ((null? maybe-elt) #f) ; Default value + ((null? (cdr maybe-elt)) (car maybe-elt)) + (else (error "Too many arguments to MAKE-LIST" + (cons len maybe-elt)))))) + (do ((i len (- i 1)) + (ans '() (cons elt ans))) + ((<= i 0) ans)))) + + +;(define (list . ans) ans) ; R4RS + + +;;; Make a list of length LEN. Elt i is (PROC i) for 0 <= i < LEN. + +(define (list-tabulate len proc) + (check-arg (lambda (n) (and (integer? n) (>= n 0))) len list-tabulate) + (check-arg procedure? proc list-tabulate) + (do ((i (- len 1) (- i 1)) + (ans '() (cons (proc i) ans))) + ((< i 0) ans))) + +;;; (cons* a1 a2 ... an) = (cons a1 (cons a2 (cons ... an))) +;;; (cons* a1) = a1 (cons* a1 a2 ...) = (cons a1 (cons* a2 ...)) +;;; +;;; (cons first (unfold-right not-pair? car cdr rest values)) + +(define (cons* first . rest) + (let recur ((x first) (rest rest)) + (if (pair? rest) + (cons x (recur (car rest) (cdr rest))) + x))) + +;;; (unfold-right not-pair? car cdr lis values) + +(define (list-copy lis) + (let recur ((lis lis)) + (if (pair? lis) + (cons (car lis) (recur (cdr lis))) + lis))) + +;;; IOTA count [start step] (start start+step ... start+(count-1)*step) + +(define (iota count . maybe-start+step) + (check-arg integer? count iota) + (if (< count 0) (error "Negative step count" iota count)) + (let-optionals maybe-start+step ((start 0) (step 1)) + (check-arg number? start iota) + (check-arg number? step iota) + (let ((last-val (+ start (* (- count 1) step)))) + (do ((count count (- count 1)) + (val last-val (- val step)) + (ans '() (cons val ans))) + ((<= count 0) ans))))) + +;;; I thought these were lovely, but the public at large did not share my +;;; enthusiasm... +;;; :IOTA to (0 ... to-1) +;;; :IOTA from to (from ... to-1) +;;; :IOTA from to step (from from+step ...) + +;;; IOTA: to (1 ... to) +;;; IOTA: from to (from+1 ... to) +;;; IOTA: from to step (from+step from+2step ...) + +;(define (%parse-iota-args arg1 rest-args proc) +; (let ((check (lambda (n) (check-arg integer? n proc)))) +; (check arg1) +; (if (pair? rest-args) +; (let ((arg2 (check (car rest-args))) +; (rest (cdr rest-args))) +; (if (pair? rest) +; (let ((arg3 (check (car rest))) +; (rest (cdr rest))) +; (if (pair? rest) (error "Too many parameters" proc arg1 rest-args) +; (values arg1 arg2 arg3))) +; (values arg1 arg2 1))) +; (values 0 arg1 1)))) +; +;(define (iota: arg1 . rest-args) +; (receive (from to step) (%parse-iota-args arg1 rest-args iota:) +; (let* ((numsteps (floor (/ (- to from) step))) +; (last-val (+ from (* step numsteps)))) +; (if (< numsteps 0) (error "Negative step count" iota: from to step)) +; (do ((steps-left numsteps (- steps-left 1)) +; (val last-val (- val step)) +; (ans '() (cons val ans))) +; ((<= steps-left 0) ans))))) +; +; +;(define (:iota arg1 . rest-args) +; (receive (from to step) (%parse-iota-args arg1 rest-args :iota) +; (let* ((numsteps (ceiling (/ (- to from) step))) +; (last-val (+ from (* step (- numsteps 1))))) +; (if (< numsteps 0) (error "Negative step count" :iota from to step)) +; (do ((steps-left numsteps (- steps-left 1)) +; (val last-val (- val step)) +; (ans '() (cons val ans))) +; ((<= steps-left 0) ans))))) + + + +(define (circular-list val1 . vals) + (let ((ans (cons val1 vals))) + (set-cdr! (last-pair ans) ans) + ans)) + +;;; ::= () ; Empty proper list +;;; | (cons ) ; Proper-list pair +;;; Note that this definition rules out circular lists -- and this +;;; function is required to detect this case and return false. + +(define (proper-list? x) + (let lp ((x x) (lag x)) + (if (pair? x) + (let ((x (cdr x))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (and (not (eq? x lag)) (lp x lag))) + (null? x))) + (null? x)))) + + +;;; A dotted list is a finite list (possibly of length 0) terminated +;;; by a non-nil value. Any non-cons, non-nil value (e.g., "foo" or 5) +;;; is a dotted list of length 0. +;;; +;;; ::= ; Empty dotted list +;;; | (cons ) ; Proper-list pair + +(define (dotted-list? x) + (let lp ((x x) (lag x)) + (if (pair? x) + (let ((x (cdr x))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (and (not (eq? x lag)) (lp x lag))) + (not (null? x)))) + (not (null? x))))) + +(define (circular-list? x) + (let lp ((x x) (lag x)) + (and (pair? x) + (let ((x (cdr x))) + (and (pair? x) + (let ((x (cdr x)) + (lag (cdr lag))) + (or (eq? x lag) (lp x lag)))))))) + +(define (not-pair? x) (not (pair? x))) ; Inline me. + +;;; This is a legal definition which is fast and sloppy: +;;; (define null-list? not-pair?) +;;; but we'll provide a more careful one: +(define (null-list? l) + (cond ((pair? l) #f) + ((null? l) #t) + (else (error "null-pair?: argument out of domain" l)))) + + +(define (list= = . lists) + (or (null? lists) ; special case + + (let lp1 ((list-a (car lists)) (others (cdr lists))) + (or (null? others) + (let ((list-b (car others)) + (others (cdr others))) + (if (eq? list-a list-b) ; EQ? => LIST= + (lp1 list-b others) + (let lp2 ((list-a list-a) (list-b list-b)) + (if (null-list? list-a) + (and (null-list? list-b) + (lp1 list-b others)) + (and (not (null-list? list-b)) + (= (car list-a) (car list-b)) + (lp2 (cdr list-a) (cdr list-b))))))))))) + + + +;;; R4RS, so commented out. +;(define (length x) ; LENGTH may diverge or +; (let lp ((x x) (len 0)) ; raise an error if X is +; (if (pair? x) ; a circular list. This version +; (lp (cdr x) (+ len 1)) ; diverges. +; len))) + +(define (length+ x) ; Returns #f if X is circular. + (let lp ((x x) (lag x) (len 0)) + (if (pair? x) + (let ((x (cdr x)) + (len (+ len 1))) + (if (pair? x) + (let ((x (cdr x)) + (lag (cdr lag)) + (len (+ len 1))) + (and (not (eq? x lag)) (lp x lag len))) + len)) + len))) + +(define (zip list1 . more-lists) (apply map list list1 more-lists)) + + +;;; Selectors +;;;;;;;;;;;;; + +;;; R4RS non-primitives: +;(define (caar x) (car (car x))) +;(define (cadr x) (car (cdr x))) +;(define (cdar x) (cdr (car x))) +;(define (cddr x) (cdr (cdr x))) +; +;(define (caaar x) (caar (car x))) +;(define (caadr x) (caar (cdr x))) +;(define (cadar x) (cadr (car x))) +;(define (caddr x) (cadr (cdr x))) +;(define (cdaar x) (cdar (car x))) +;(define (cdadr x) (cdar (cdr x))) +;(define (cddar x) (cddr (car x))) +;(define (cdddr x) (cddr (cdr x))) +; +;(define (caaaar x) (caaar (car x))) +;(define (caaadr x) (caaar (cdr x))) +;(define (caadar x) (caadr (car x))) +;(define (caaddr x) (caadr (cdr x))) +;(define (cadaar x) (cadar (car x))) +;(define (cadadr x) (cadar (cdr x))) +;(define (caddar x) (caddr (car x))) +;(define (cadddr x) (caddr (cdr x))) +;(define (cdaaar x) (cdaar (car x))) +;(define (cdaadr x) (cdaar (cdr x))) +;(define (cdadar x) (cdadr (car x))) +;(define (cdaddr x) (cdadr (cdr x))) +;(define (cddaar x) (cddar (car x))) +;(define (cddadr x) (cddar (cdr x))) +;(define (cdddar x) (cdddr (car x))) +;(define (cddddr x) (cdddr (cdr x))) + + +(define first car) +(define second cadr) +(define third caddr) +(define fourth cadddr) +(define (fifth x) (car (cddddr x))) +(define (sixth x) (cadr (cddddr x))) +(define (seventh x) (caddr (cddddr x))) +(define (eighth x) (cadddr (cddddr x))) +(define (ninth x) (car (cddddr (cddddr x)))) +(define (tenth x) (cadr (cddddr (cddddr x)))) + +(define (car+cdr pair) (values (car pair) (cdr pair))) + +;;; take & drop + +(define (take lis k) + (check-arg integer? k take) + (let recur ((lis lis) (k k)) + (if (zero? k) '() + (cons (car lis) + (recur (cdr lis) (- k 1)))))) + +(define (drop lis k) + (check-arg integer? k drop) + (let iter ((lis lis) (k k)) + (if (zero? k) lis (iter (cdr lis) (- k 1))))) + +(define (take! lis k) + (check-arg integer? k take!) + (if (zero? k) '() + (begin (set-cdr! (drop lis (- k 1)) '()) + lis))) + +;;; TAKE-RIGHT and DROP-RIGHT work by getting two pointers into the list, +;;; off by K, then chasing down the list until the lead pointer falls off +;;; the end. + +(define (take-right lis k) + (check-arg integer? k take-right) + (let lp ((lag lis) (lead (drop lis k))) + (if (pair? lead) + (lp (cdr lag) (cdr lead)) + lag))) + +(define (drop-right lis k) + (check-arg integer? k drop-right) + (let recur ((lag lis) (lead (drop lis k))) + (if (pair? lead) + (cons (car lag) (recur (cdr lag) (cdr lead))) + '()))) + +;;; In this function, LEAD is actually K+1 ahead of LAG. This lets +;;; us stop LAG one step early, in time to smash its cdr to (). +(define (drop-right! lis k) + (check-arg integer? k drop-right!) + (let ((lead (drop lis k))) + (if (pair? lead) + + (let lp ((lag lis) (lead (cdr lead))) ; Standard case + (if (pair? lead) + (lp (cdr lag) (cdr lead)) + (begin (set-cdr! lag '()) + lis))) + + '()))) ; Special case dropping everything -- no cons to side-effect. + +;(define (list-ref lis i) (car (drop lis i))) ; R4RS + +;;; These use the APL convention, whereby negative indices mean +;;; "from the right." I liked them, but they didn't win over the +;;; SRFI reviewers. +;;; K >= 0: Take and drop K elts from the front of the list. +;;; K <= 0: Take and drop -K elts from the end of the list. + +;(define (take lis k) +; (check-arg integer? k take) +; (if (negative? k) +; (list-tail lis (+ k (length lis))) +; (let recur ((lis lis) (k k)) +; (if (zero? k) '() +; (cons (car lis) +; (recur (cdr lis) (- k 1))))))) +; +;(define (drop lis k) +; (check-arg integer? k drop) +; (if (negative? k) +; (let recur ((lis lis) (nelts (+ k (length lis)))) +; (if (zero? nelts) '() +; (cons (car lis) +; (recur (cdr lis) (- nelts 1))))) +; (list-tail lis k))) +; +; +;(define (take! lis k) +; (check-arg integer? k take!) +; (cond ((zero? k) '()) +; ((positive? k) +; (set-cdr! (list-tail lis (- k 1)) '()) +; lis) +; (else (list-tail lis (+ k (length lis)))))) +; +;(define (drop! lis k) +; (check-arg integer? k drop!) +; (if (negative? k) +; (let ((nelts (+ k (length lis)))) +; (if (zero? nelts) '() +; (begin (set-cdr! (list-tail lis (- nelts 1)) '()) +; lis))) +; (list-tail lis k))) + + +(define (last lis) (car (last-pair lis))) + +(define (last-pair lis) + (check-arg pair? lis last-pair) + (let lp ((lis lis)) + (let ((tail (cdr lis))) + (if (pair? tail) (lp tail) lis)))) + + +;;; Unzippers -- 1 through 5 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (unzip1 lis) (map car lis)) + +(define (unzip2 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis) ; Use NOT-PAIR? to handle + (let ((elt (car lis))) ; dotted lists. + (receive (a b) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b))))))) + +(define (unzip3 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis) + (let ((elt (car lis))) + (receive (a b c) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c))))))) + +(define (unzip4 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis lis) + (let ((elt (car lis))) + (receive (a b c d) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c) + (cons (cadddr elt) d))))))) + +(define (unzip5 lis) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis lis lis lis) + (let ((elt (car lis))) + (receive (a b c d e) (recur (cdr lis)) + (values (cons (car elt) a) + (cons (cadr elt) b) + (cons (caddr elt) c) + (cons (cadddr elt) d) + (cons (car (cddddr elt)) e))))))) + + +;;; append! append-reverse append-reverse! +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (append! . lists) + ;; First, scan through lists looking for a non-empty one. + (let lp ((lists lists) (prev '())) + (if (not (pair? lists)) prev + (let ((first (car lists)) + (rest (cdr lists))) + (if (not (pair? first)) (lp rest first) + + ;; Now, do the splicing. + (let lp2 ((tail-cons (last-pair first)) + (rest rest)) + (if (pair? rest) + (let ((next (car rest)) + (rest (cdr rest))) + (set-cdr! tail-cons next) + (lp2 (if (pair? next) (last-pair next) tail-cons) + rest)) + first))))))) + +;;; APPEND is R4RS. +;(define (append . lists) +; (if (pair? lists) +; (let recur ((list1 (car lists)) (lists (cdr lists))) +; (if (pair? lists) +; (let ((tail (recur (car lists) (cdr lists)))) +; (fold-right cons tail list1)) ; Append LIST1 & TAIL. +; list1)) +; '())) + +;(define (append-reverse rev-head tail) (fold cons tail rev-head)) + +;(define (append-reverse! rev-head tail) +; (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) +; tail +; rev-head)) + +;;; Hand-inline the FOLD and PAIR-FOLD ops for speed. + +(define (append-reverse rev-head tail) + (let lp ((rev-head rev-head) (tail tail)) + (if (null-list? rev-head) tail + (lp (cdr rev-head) (cons (car rev-head) tail))))) + +(define (append-reverse! rev-head tail) + (let lp ((rev-head rev-head) (tail tail)) + (if (null-list? rev-head) tail + (let ((next-rev (cdr rev-head))) + (set-cdr! rev-head tail) + (lp next-rev rev-head))))) + + + +;;; Fold/map internal utilities +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These little internal utilities are used by the general +;;; fold & mapper funs for the n-ary cases . It'd be nice if they got inlined. +;;; One the other hand, the n-ary cases are painfully inefficient as it is. +;;; An aggressive implementation should simply re-write these functions +;;; for raw efficiency; I have written them for as much clarity, portability, +;;; and simplicity as can be achieved. +;;; +;;; I use the dreaded call/cc to do local aborts. A good compiler could +;;; handle this with extreme efficiency. An implementation that provides +;;; a one-shot, non-persistent continuation grabber could help the compiler +;;; out by using that in place of the call/cc's in these routines. +;;; +;;; These functions have funky definitions that are precisely tuned to +;;; the needs of the fold/map procs -- for example, to minimize the number +;;; of times the argument lists need to be examined. + +;;; Return (map cdr lists). +;;; However, if any element of LISTS is empty, just abort and return '(). +(define (%cdrs lists) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (let ((lis (car lists))) + (if (null-list? lis) (abort '()) + (cons (cdr lis) (recur (cdr lists))))) + '()))))) + +(define (%cars+ lists last-elt) ; (append! (map car lists) (list last-elt)) + (let recur ((lists lists)) + (if (pair? lists) (cons (caar lists) (recur (cdr lists))) (list last-elt)))) + +;;; LISTS is a (not very long) non-empty list of lists. +;;; Return two lists: the cars & the cdrs of the lists. +;;; However, if any of the lists is empty, just abort and return [() ()]. + +(define (%cars+cdrs lists) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (if (null-list? list) (abort '() '()) ; LIST is empty -- bail out + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs)))))) + (values '() '())))))) + +;;; Like %CARS+CDRS, but we pass in a final elt tacked onto the end of the +;;; cars list. What a hack. +(define (%cars+cdrs+ lists cars-final) + (call-with-current-continuation + (lambda (abort) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (if (null-list? list) (abort '() '()) ; LIST is empty -- bail out + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs)))))) + (values (list cars-final) '())))))) + +;;; Like %CARS+CDRS, but blow up if any list is empty. +(define (%cars+cdrs/no-test lists) + (let recur ((lists lists)) + (if (pair? lists) + (receive (list other-lists) (car+cdr lists) + (receive (a d) (car+cdr list) + (receive (cars cdrs) (recur other-lists) + (values (cons a cars) (cons d cdrs))))) + (values '() '())))) + + +;;; count +;;;;;;;;; +(define (count pred list1 . lists) + (check-arg procedure? pred count) + (if (pair? lists) + + ;; N-ary case + (let lp ((list1 list1) (lists lists) (i 0)) + (if (null-list? list1) i + (receive (as ds) (%cars+cdrs lists) + (if (null? as) i + (lp (cdr list1) ds + (if (apply pred (car list1) as) (+ i 1) i)))))) + + ;; Fast path + (let lp ((lis list1) (i 0)) + (if (null-list? lis) i + (lp (cdr lis) (if (pred (car lis)) (+ i 1) i)))))) + + +;;; fold/unfold +;;;;;;;;;;;;;;; + +(define (unfold p f g seed . maybe-tail) + (check-arg procedure? p unfold) + (check-arg procedure? f unfold) + (check-arg procedure? g unfold) + (let lp ((seed seed) (ans (:optional maybe-tail '()))) + (if (p seed) ans + (lp (g seed) + (cons (f seed) ans))))) + + +(define (unfold-right p f g seed . maybe-tail-gen) + (check-arg procedure? p unfold-right) + (check-arg procedure? f unfold-right) + (check-arg procedure? g unfold-right) + (if (pair? maybe-tail-gen) + + (let ((tail-gen (car maybe-tail-gen))) + (if (pair? (cdr maybe-tail-gen)) + (apply error "Too many arguments" unfold-right p f g seed maybe-tail-gen) + + (let recur ((seed seed)) + (if (p seed) (tail-gen seed) + (cons (f seed) (recur (g seed))))))) + + (let recur ((seed seed)) + (if (p seed) '() + (cons (f seed) (recur (g seed))))))) + + +(define (fold kons knil lis1 . lists) + (check-arg procedure? kons fold) + (if (pair? lists) + (let lp ((lists (cons lis1 lists)) (ans knil)) ; N-ary case + (receive (cars+ans cdrs) (%cars+cdrs+ lists ans) + (if (null? cars+ans) ans ; Done. + (lp cdrs (apply kons cars+ans))))) + + (let lp ((lis lis1) (ans knil)) ; Fast path + (if (null-list? lis) ans + (lp (cdr lis) (kons (car lis) ans)))))) + + +(define (fold-right kons knil lis1 . lists) + (check-arg procedure? kons fold-right) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) ; N-ary case + (let ((cdrs (%cdrs lists))) + (if (null? cdrs) knil + (apply kons (%cars+ lists (recur cdrs)))))) + + (let recur ((lis lis1)) ; Fast path + (if (null-list? lis) knil + (let ((head (car lis))) + (kons head (recur (cdr lis)))))))) + + +(define (pair-fold-right f zero lis1 . lists) + (check-arg procedure? f pair-fold-right) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) ; N-ary case + (let ((cdrs (%cdrs lists))) + (if (null? cdrs) zero + (apply f (append! lists (list (recur cdrs))))))) + + (let recur ((lis lis1)) ; Fast path + (if (null-list? lis) zero (f lis (recur (cdr lis))))))) + +(define (pair-fold f zero lis1 . lists) + (check-arg procedure? f pair-fold) + (if (pair? lists) + (let lp ((lists (cons lis1 lists)) (ans zero)) ; N-ary case + (let ((tails (%cdrs lists))) + (if (null? tails) ans + (lp tails (apply f (append! lists (list ans))))))) + + (let lp ((lis lis1) (ans zero)) + (if (null-list? lis) ans + (let ((tail (cdr lis))) ; Grab the cdr now, + (lp tail (f lis ans))))))) ; in case F SET-CDR!s LIS. + + +;;; REDUCE and REDUCE-RIGHT only use RIDENTITY in the empty-list case. +;;; These cannot meaningfully be n-ary. + +(define (reduce f ridentity lis) + (check-arg procedure? f reduce) + (if (null-list? lis) ridentity + (fold f (car lis) (cdr lis)))) + +(define (reduce-right f ridentity lis) + (check-arg procedure? f reduce-right) + (if (null-list? lis) ridentity + (let recur ((head (car lis)) (lis (cdr lis))) + (if (pair? lis) + (f head (recur (car lis) (cdr lis))) + head)))) + + + +;;; Mappers: append-map append-map! pair-for-each map! filter-map map-in-order +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (append-map f lis1 . lists) + (really-append-map append-map append f lis1 lists)) +(define (append-map! f lis1 . lists) + (really-append-map append-map! append! f lis1 lists)) + +(define (really-append-map who appender f lis1 lists) + (check-arg procedure? f who) + (if (pair? lists) + (receive (cars cdrs) (%cars+cdrs (cons lis1 lists)) + (if (null? cars) '() + (let recur ((cars cars) (cdrs cdrs)) + (let ((vals (apply f cars))) + (receive (cars2 cdrs2) (%cars+cdrs cdrs) + (if (null? cars2) vals + (appender vals (recur cars2 cdrs2)))))))) + + ;; Fast path + (if (null-list? lis1) '() + (let recur ((elt (car lis1)) (rest (cdr lis1))) + (let ((vals (f elt))) + (if (null-list? rest) vals + (appender vals (recur (car rest) (cdr rest))))))))) + + +(define (pair-for-each proc lis1 . lists) + (check-arg procedure? proc pair-for-each) + (if (pair? lists) + + (let lp ((lists (cons lis1 lists))) + (let ((tails (%cdrs lists))) + (if (pair? tails) + (begin (apply proc lists) + (lp tails))))) + + ;; Fast path. + (let lp ((lis lis1)) + (if (not (null-list? lis)) + (let ((tail (cdr lis))) ; Grab the cdr now, + (proc lis) ; in case PROC SET-CDR!s LIS. + (lp tail)))))) + +;;; We stop when LIS1 runs out, not when any list runs out. +(define (map! f lis1 . lists) + (check-arg procedure? f map!) + (if (pair? lists) + (let lp ((lis1 lis1) (lists lists)) + (if (not (null-list? lis1)) + (receive (heads tails) (%cars+cdrs/no-test lists) + (set-car! lis1 (apply f (car lis1) heads)) + (lp (cdr lis1) tails)))) + + ;; Fast path. + (pair-for-each (lambda (pair) (set-car! pair (f (car pair)))) lis1)) + lis1) + + +;;; Map F across L, and save up all the non-false results. +(define (filter-map f lis1 . lists) + (check-arg procedure? f filter-map) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) + (receive (cars cdrs) (%cars+cdrs lists) + (if (pair? cars) + (cond ((apply f cars) => (lambda (x) (cons x (recur cdrs)))) + (else (recur cdrs))) ; Tail call in this arm. + '()))) + + ;; Fast path. + (let recur ((lis lis1)) + (if (null-list? lis) lis + (let ((tail (recur (cdr lis)))) + (cond ((f (car lis)) => (lambda (x) (cons x tail))) + (else tail))))))) + + +;;; Map F across lists, guaranteeing to go left-to-right. +;;; NOTE: Some implementations of R5RS MAP are compliant with this spec; +;;; in which case this procedure may simply be defined as a synonym for MAP. + +(define (map-in-order f lis1 . lists) + (check-arg procedure? f map-in-order) + (if (pair? lists) + (let recur ((lists (cons lis1 lists))) + (receive (cars cdrs) (%cars+cdrs lists) + (if (pair? cars) + (let ((x (apply f cars))) ; Do head first, + (cons x (recur cdrs))) ; then tail. + '()))) + + ;; Fast path. + (let recur ((lis lis1)) + (if (null-list? lis) lis + (let ((tail (cdr lis)) + (x (f (car lis)))) ; Do head first, + (cons x (recur tail))))))) ; then tail. + + +;;; We extend MAP to handle arguments of unequal length. +(define map map-in-order) + + +;;; filter, remove, partition +;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; FILTER, REMOVE, PARTITION and their destructive counterparts do not +;;; disorder the elements of their argument. + +;; This FILTER shares the longest tail of L that has no deleted elements. +;; If Scheme had multi-continuation calls, they could be made more efficient. + +(define (filter pred lis) ; Sleazing with EQ? makes this + (check-arg procedure? pred filter) ; one faster. + (let recur ((lis lis)) + (if (null-list? lis) lis ; Use NOT-PAIR? to handle dotted lists. + (let ((head (car lis)) + (tail (cdr lis))) + (if (pred head) + (let ((new-tail (recur tail))) ; Replicate the RECUR call so + (if (eq? tail new-tail) lis + (cons head new-tail))) + (recur tail)))))) ; this one can be a tail call. + + +;;; Another version that shares longest tail. +;(define (filter pred lis) +; (receive (ans no-del?) +; ;; (recur l) returns L with (pred x) values filtered. +; ;; It also returns a flag NO-DEL? if the returned value +; ;; is EQ? to L, i.e. if it didn't have to delete anything. +; (let recur ((l l)) +; (if (null-list? l) (values l #t) +; (let ((x (car l)) +; (tl (cdr l))) +; (if (pred x) +; (receive (ans no-del?) (recur tl) +; (if no-del? +; (values l #t) +; (values (cons x ans) #f))) +; (receive (ans no-del?) (recur tl) ; Delete X. +; (values ans #f)))))) +; ans)) + + + +;(define (filter! pred lis) ; Things are much simpler +; (let recur ((lis lis)) ; if you are willing to +; (if (pair? lis) ; push N stack frames & do N +; (cond ((pred (car lis)) ; SET-CDR! writes, where N is +; (set-cdr! lis (recur (cdr lis))); the length of the answer. +; lis) +; (else (recur (cdr lis)))) +; lis))) + + +;;; This implementation of FILTER! +;;; - doesn't cons, and uses no stack; +;;; - is careful not to do redundant SET-CDR! writes, as writes to memory are +;;; usually expensive on modern machines, and can be extremely expensive on +;;; modern Schemes (e.g., ones that have generational GC's). +;;; It just zips down contiguous runs of in and out elts in LIS doing the +;;; minimal number of SET-CDR!s to splice the tail of one run of ins to the +;;; beginning of the next. + +(define (filter! pred lis) + (check-arg procedure? pred filter!) + (let lp ((ans lis)) + (cond ((null-list? ans) ans) ; Scan looking for + ((not (pred (car ans))) (lp (cdr ans))) ; first cons of result. + + ;; ANS is the eventual answer. + ;; SCAN-IN: (CDR PREV) = LIS and (CAR PREV) satisfies PRED. + ;; Scan over a contiguous segment of the list that + ;; satisfies PRED. + ;; SCAN-OUT: (CAR PREV) satisfies PRED. Scan over a contiguous + ;; segment of the list that *doesn't* satisfy PRED. + ;; When the segment ends, patch in a link from PREV + ;; to the start of the next good segment, and jump to + ;; SCAN-IN. + (else (letrec ((scan-in (lambda (prev lis) + (if (pair? lis) + (if (pred (car lis)) + (scan-in lis (cdr lis)) + (scan-out prev (cdr lis)))))) + (scan-out (lambda (prev lis) + (let lp ((lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (begin (set-cdr! prev lis) + (scan-in lis (cdr lis))) + (lp (cdr lis))) + (set-cdr! prev lis)))))) + (scan-in ans (cdr ans)) + ans))))) + + + +;;; Answers share common tail with LIS where possible; +;;; the technique is slightly subtle. + +(define (partition pred lis) + (check-arg procedure? pred partition) + (let recur ((lis lis)) + (if (null-list? lis) (values lis lis) ; Use NOT-PAIR? to handle dotted lists. + (let ((elt (car lis)) + (tail (cdr lis))) + (receive (in out) (recur tail) + (if (pred elt) + (values (if (pair? out) (cons elt in) lis) out) + (values in (if (pair? in) (cons elt out) lis)))))))) + + + +;(define (partition! pred lis) ; Things are much simpler +; (let recur ((lis lis)) ; if you are willing to +; (if (null-list? lis) (values lis lis) ; push N stack frames & do N +; (let ((elt (car lis))) ; SET-CDR! writes, where N is +; (receive (in out) (recur (cdr lis)) ; the length of LIS. +; (cond ((pred elt) +; (set-cdr! lis in) +; (values lis out)) +; (else (set-cdr! lis out) +; (values in lis)))))))) + + +;;; This implementation of PARTITION! +;;; - doesn't cons, and uses no stack; +;;; - is careful not to do redundant SET-CDR! writes, as writes to memory are +;;; usually expensive on modern machines, and can be extremely expensive on +;;; modern Schemes (e.g., ones that have generational GC's). +;;; It just zips down contiguous runs of in and out elts in LIS doing the +;;; minimal number of SET-CDR!s to splice these runs together into the result +;;; lists. + +(define (partition! pred lis) + (check-arg procedure? pred partition!) + (if (null-list? lis) (values lis lis) + + ;; This pair of loops zips down contiguous in & out runs of the + ;; list, splicing the runs together. The invariants are + ;; SCAN-IN: (cdr in-prev) = LIS. + ;; SCAN-OUT: (cdr out-prev) = LIS. + (letrec ((scan-in (lambda (in-prev out-prev lis) + (let lp ((in-prev in-prev) (lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (lp lis (cdr lis)) + (begin (set-cdr! out-prev lis) + (scan-out in-prev lis (cdr lis)))) + (set-cdr! out-prev lis))))) ; Done. + + (scan-out (lambda (in-prev out-prev lis) + (let lp ((out-prev out-prev) (lis lis)) + (if (pair? lis) + (if (pred (car lis)) + (begin (set-cdr! in-prev lis) + (scan-in lis out-prev (cdr lis))) + (lp lis (cdr lis))) + (set-cdr! in-prev lis)))))) ; Done. + + ;; Crank up the scan&splice loops. + (if (pred (car lis)) + ;; LIS begins in-list. Search for out-list's first pair. + (let lp ((prev-l lis) (l (cdr lis))) + (cond ((not (pair? l)) (values lis l)) + ((pred (car l)) (lp l (cdr l))) + (else (scan-out prev-l l (cdr l)) + (values lis l)))) ; Done. + + ;; LIS begins out-list. Search for in-list's first pair. + (let lp ((prev-l lis) (l (cdr lis))) + (cond ((not (pair? l)) (values l lis)) + ((pred (car l)) + (scan-in l prev-l (cdr l)) + (values l lis)) ; Done. + (else (lp l (cdr l))))))))) + + +;;; Inline us, please. +(define (remove pred l) (filter (lambda (x) (not (pred x))) l)) +(define (remove! pred l) (filter! (lambda (x) (not (pred x))) l)) + + + +;;; Here's the taxonomy for the DELETE/ASSOC/MEMBER functions. +;;; (I don't actually think these are the world's most important +;;; functions -- the procedural FILTER/REMOVE/FIND/FIND-TAIL variants +;;; are far more general.) +;;; +;;; Function Action +;;; --------------------------------------------------------------------------- +;;; remove pred lis Delete by general predicate +;;; delete x lis [=] Delete by element comparison +;;; +;;; find pred lis Search by general predicate +;;; find-tail pred lis Search by general predicate +;;; member x lis [=] Search by element comparison +;;; +;;; assoc key lis [=] Search alist by key comparison +;;; alist-delete key alist [=] Alist-delete by key comparison + +(define (delete x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter (lambda (y) (not (= x y))) lis))) + +(define (delete! x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter! (lambda (y) (not (= x y))) lis))) + +;;; Extended from R4RS to take an optional comparison argument. +(define (member x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (find-tail (lambda (y) (= x y)) lis))) + +;;; R4RS, hence we don't bother to define. +;;; The MEMBER and then FIND-TAIL call should definitely +;;; be inlined for MEMQ & MEMV. +;(define (memq x lis) (member x lis eq?)) +;(define (memv x lis) (member x lis eqv?)) + + +;;; right-duplicate deletion +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; delete-duplicates delete-duplicates! +;;; +;;; Beware -- these are N^2 algorithms. To efficiently remove duplicates +;;; in long lists, sort the list to bring duplicates together, then use a +;;; linear-time algorithm to kill the dups. Or use an algorithm based on +;;; element-marking. The former gives you O(n lg n), the latter is linear. + +(define (delete-duplicates elt= lis) + (check-arg procedure? elt= delete-duplicates) + (let recur ((lis lis)) + (if (null-list? lis) lis + (let* ((x (car lis)) + (tail (cdr lis)) + (new-tail (recur (delete x tail elt=)))) + (if (eq? tail new-tail) lis (cons x new-tail)))))) + +(define (delete-duplicates! elt= lis) + (check-arg procedure? elt= delete-duplicates!) + (let recur ((lis lis)) + (if (null-list? lis) lis + (let* ((x (car lis)) + (tail (cdr lis)) + (new-tail (recur (delete! x tail elt=)))) + (if (eq? tail new-tail) lis (cons x new-tail)))))) + + +;;; alist stuff +;;;;;;;;;;;;;;; + +;;; Extended from R4RS to take an optional comparison argument. +(define (assoc x lis . maybe-=) + (let ((= (:optional maybe-= equal?))) + (find (lambda (entry) (= x (car entry))) lis))) + +(define (alist-cons key datum alist) (cons (cons key datum) alist)) + +(define (alist-copy alist) + (map (lambda (elt) (cons (car elt) (cdr elt))) + alist)) + +(define (alist-delete key alist . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter (lambda (elt) (not (= key (car elt)))) alist))) + +(define (alist-delete! key alist . maybe-=) + (let ((= (:optional maybe-= equal?))) + (filter! (lambda (elt) (not (= key (car elt)))) alist))) + + +;;; find find-tail any every list-index +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; ANY returns the first true value produced by PRED. +;;; FIND returns the first list elt passed by PRED. + +(define (find pred list) + (cond ((find-tail pred list) => car) + (else #f))) + +(define (find-tail pred list) + (check-arg procedure? pred find-tail) + (let lp ((list list)) + (and (not (null-list? list)) + (if (pred (car list)) list + (lp (cdr list)))))) + +(define (any pred lis1 . lists) + (check-arg procedure? pred any) + (if (pair? lists) + + ;; N-ary case + (receive (heads tails) (%cars+cdrs (cons lis1 lists)) + (and (pair? heads) + (let lp ((heads heads) (tails tails)) + (receive (next-heads next-tails) (%cars+cdrs tails) + (if (pair? next-heads) + (or (apply pred heads) (lp next-heads next-tails)) + (apply pred heads)))))) ; Last PRED app is tail call. + + ;; Fast path + (and (not (null-list? lis1)) + (let lp ((head (car lis1)) (tail (cdr lis1))) + (if (null-list? tail) + (pred head) ; Last PRED app is tail call. + (or (pred head) (lp (car tail) (cdr tail)))))))) + + +;(define (every pred list) ; Simple definition. +; (let lp ((list list)) ; Doesn't return the last PRED value. +; (or (not (pair? list)) +; (and (pred (car list)) +; (lp (cdr list)))))) + +(define (every pred lis1 . lists) + (check-arg procedure? pred every) + (if (pair? lists) + + ;; N-ary case + (receive (heads tails) (%cars+cdrs (cons lis1 lists)) + (or (not (pair? heads)) + (let lp ((heads heads) (tails tails)) + (receive (next-heads next-tails) (%cars+cdrs tails) + (if (pair? next-heads) + (and (apply pred heads) (lp next-heads next-tails)) + (apply pred heads)))))) ; Last PRED app is tail call. + + ;; Fast path + (or (null-list? lis1) + (let lp ((head (car lis1)) (tail (cdr lis1))) + (if (null-list? tail) + (pred head) ; Last PRED app is tail call. + (and (pred head) (lp (car tail) (cdr tail)))))))) + + + +(define (list-index pred lis1 . lists) + (check-arg procedure? pred list-index) + (if (pair? lists) + + ;; N-ary case + (let lp ((lists (cons lis1 lists)) (n 0)) + (receive (heads tails) (%cars+cdrs lists) + (and (pair? heads) + (if (apply pred heads) n + (lp tails (+ n 1)))))) + + ;; Fast path + (let lp ((lis lis1) (n 0)) + (and (not (null-list? lis)) + (if (pred (car lis)) n (lp (cdr lis) (+ n 1))))))) + +;;; Reverse +;;;;;;;;;;; + +;R4RS, so not defined here. +;(define (reverse lis) (fold cons '() lis)) + +;(define (reverse! lis) +; (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis)) + +(define (reverse! lis) + (let lp ((lis lis) (ans '())) + (if (null-list? lis) ans + (let ((tail (cdr lis))) + (set-cdr! lis ans) + (lp tail lis))))) + +;;; Lists-as-sets +;;;;;;;;;;;;;;;;; + +;;; This is carefully tuned code; do not modify casually. +;;; - It is careful to share storage when possible; +;;; - Side-effecting code tries not to perform redundant writes. +;;; - It tries to avoid linear-time scans in special cases where constant-time +;;; computations can be performed. +;;; - It relies on similar properties from the other list-lib procs it calls. +;;; For example, it uses the fact that the implementations of MEMBER and +;;; FILTER in this source code share longest common tails between args +;;; and results to get structure sharing in the lset procedures. + +(define (%lset2<= = lis1 lis2) (every (lambda (x) (member x lis2 =)) lis1)) + +(define (lset<= = . lists) + (check-arg procedure? = lset<=) + (or (not (pair? lists)) ; 0-ary case + (let lp ((s1 (car lists)) (rest (cdr lists))) + (or (not (pair? rest)) + (let ((s2 (car rest)) (rest (cdr rest))) + (and (or (eq? s2 s1) ; Fast path + (%lset2<= = s1 s2)) ; Real test + (lp s2 rest))))))) + +(define (lset= = . lists) + (check-arg procedure? = lset=) + (or (not (pair? lists)) ; 0-ary case + (let lp ((s1 (car lists)) (rest (cdr lists))) + (or (not (pair? rest)) + (let ((s2 (car rest)) + (rest (cdr rest))) + (and (or (eq? s1 s2) ; Fast path + (and (%lset2<= = s1 s2) (%lset2<= = s2 s1))) ; Real test + (lp s2 rest))))))) + + +(define (lset-adjoin = lis . elts) + (check-arg procedure? = lset-adjoin) + (fold (lambda (elt ans) (if (member elt ans =) ans (cons elt ans))) + lis elts)) + + +(define (lset-union = . lists) + (check-arg procedure? = lset-union) + (reduce (lambda (lis ans) ; Compute ANS + LIS. + (cond ((null? lis) ans) ; Don't copy any lists + ((null? ans) lis) ; if we don't have to. + ((eq? lis ans) ans) + (else + (fold (lambda (elt ans) (if (any (lambda (x) (= x elt))) + ans + (cons elt ans))) + ans lis)))) + '() lists)) + +(define (lset-union! = . lists) + (check-arg procedure? = lset-union!) + (reduce (lambda (lis ans) ; Splice new elts of LIS onto the front of ANS. + (cond ((null? lis) ans) ; Don't copy any lists + ((null? ans) lis) ; if we don't have to. + ((eq? lis ans) ans) + (else + (pair-fold (lambda (pair ans) + (let ((elt (car pair))) + (if (any (lambda (x) (= x elt)) ans) + ans + (begin (set-cdr! pair ans) pair)))) + ans lis)))) + '() lists)) + + +(define (lset-intersection = lis1 . lists) + (check-arg procedure? = lset-intersection) + (let ((lists (delete lis1 lists eq?))) ; Throw out any LIS1 vals. + (cond ((any null-list? lists) '()) ; Short cut + ((null? lists) lis1) ; Short cut + (else (filter (lambda (x) + (every (lambda (lis) (member x lis =)) lists)) + lis1))))) + +(define (lset-intersection! = lis1 . lists) + (check-arg procedure? = lset-intersection!) + (let ((lists (delete lis1 lists eq?))) ; Throw out any LIS1 vals. + (cond ((any null-list? lists) '()) ; Short cut + ((null? lists) lis1) ; Short cut + (else (filter! (lambda (x) + (every (lambda (lis) (member x lis =)) lists)) + lis1))))) + + +(define (lset-difference = lis1 . lists) + (check-arg procedure? = lset-difference) + (let ((lists (filter pair? lists))) ; Throw out empty lists. + (cond ((null? lists) lis1) ; Short cut + ((memq lis1 lists) '()) ; Short cut + (else (filter (lambda (x) + (every (lambda (lis) (not (member x lis =))) + lists)) + lis1))))) + +(define (lset-difference! = lis1 . lists) + (check-arg procedure? = lset-difference!) + (let ((lists (filter pair? lists))) ; Throw out empty lists. + (cond ((null? lists) lis1) ; Short cut + ((memq lis1 lists) '()) ; Short cut + (else (filter! (lambda (x) + (every (lambda (lis) (not (member x lis =))) + lists)) + lis1))))) + + +(define (lset-xor = . lists) + (check-arg procedure? = lset-xor) + (reduce (lambda (b a) ; Compute A xor B: + ;; Note that this code relies on the constant-time + ;; short-cuts provided by LSET-DIFF+INTERSECTION, + ;; LSET-DIFFERENCE & APPEND to provide constant-time short + ;; cuts for the cases A = (), B = (), and A eq? B. It takes + ;; a careful case analysis to see it, but it's carefully + ;; built in. + + ;; Compute a-b and a^b, then compute b-(a^b) and + ;; cons it onto the front of a-b. + (receive (a-b a-int-b) (lset-diff+intersection = a b) + (cond ((null? a-b) (lset-difference b a =)) + ((null? a-int-b) (append b a)) + (else (fold (lambda (xb ans) + (if (member xb a-int-b =) ans (cons xb ans))) + a-b + b))))) + '() lists)) + + +(define (lset-xor! = . lists) + (check-arg procedure? = lset-xor!) + (reduce (lambda (b a) ; Compute A xor B: + ;; Note that this code relies on the constant-time + ;; short-cuts provided by LSET-DIFF+INTERSECTION, + ;; LSET-DIFFERENCE & APPEND to provide constant-time short + ;; cuts for the cases A = (), B = (), and A eq? B. It takes + ;; a careful case analysis to see it, but it's carefully + ;; built in. + + ;; Compute a-b and a^b, then compute b-(a^b) and + ;; cons it onto the front of a-b. + (receive (a-b a-int-b) (lset-diff+intersection! = a b) + (cond ((null? a-b) (lset-difference! b a =)) + ((null? a-int-b) (append! b a)) + (else (pair-fold (lambda (b-pair ans) + (if (member (car b-pair) a-int-b =) ans + (begin (set-cdr! b-pair ans) b-pair))) + a-b + b))))) + '() lists)) + + +(define (lset-diff+intersection = lis1 . lists) + (check-arg procedure? = lset-diff+intersection) + (cond ((every null-list? lists) (values lis1 '())) ; Short cut + ((memq lis1 lists) (values '() lis1)) ; Short cut + (else (partition (lambda (elt) + (not (any (lambda (lis) (member elt lis =)) + lists))) + lis1)))) + +(define (lset-diff+intersection! = lis1 . lists) + (check-arg procedure? = lset-diff+intersection!) + (cond ((every null-list? lists) (values lis1 '())) ; Short cut + ((memq lis1 lists) (values '() lis1)) ; Short cut + (else (partition! (lambda (elt) + (not (any (lambda (lis) (member elt lis =)) + lists))) + lis1)))) diff --git a/scsh/lib/list-pack.scm b/scsh/lib/list-pack.scm new file mode 100644 index 0000000..0c171ce --- /dev/null +++ b/scsh/lib/list-pack.scm @@ -0,0 +1,235 @@ +;;; This is a Scheme48 interface spec for the SRFI-1 list-lib package. +;;; It defines the LIST-LIB-INTERFACE interface and LIST-LIB structure. +;;; Bindings are typed as tightly as one can in Scheme48's type language. +;;; -Olin Shivers +;;; shivers@ai.mit.edu + +;;; list-lib +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; xcons cons* make-list list-tabulate list-copy circular-list iota +;;; proper-list? circular-list? dotted-list? not-pair? null-list? list= +;;; first second third fourth fifth sixth seventh eighth ninth tenth +;;; car+cdr +;;; take drop +;;; take-right drop-right +;;; take! drop-right! +;;; last last-pair +;;; length+ +;;; append! reverse! append-reverse append-reverse! +;;; zip unzip1 unzip2 unzip3 unzip4 unzip5 +;;; count +;;; unfold unfold-right +;;; fold unfold pair-fold reduce +;;; fold-right unfold-right pair-fold-right reduce-right +;;; append-map append-map! map! pair-for-each filter-map map-in-order +;;; filter partition remove +;;; filter! partition! remove! +;;; find find-tail any every list-index +;;; delete delete! delete-duplicates delete-duplicates! +;;; alist-cons alist-copy +;;; alist-delete alist-delete! +;;; +;;; lset<= lset= lset-adjoin +;;; lset-union lset-union! +;;; lset-intersection lset-intersection! +;;; lset-difference lset-difference! +;;; lset-xor lset-xor! +;;; lset-diff+intersection lset-diff+intersection! +;;; +;;; map for-each member assoc (Extended R4RS procedures) +;;; +;;; cons pair? null? list length append reverse (These are the +;;; car cdr ... cdddar cddddr set-car! set-cdr! list-ref R4RS procedures +;;; memq memv assoc assq assv re-exported by +;;; list-lib unchanged.) + +(define-interface list-lib-interface + (export + ;; xcons + (xcons (proc (:value :value) :value)) + + ;; cons* item ... + (cons* (proc (:value &rest :value) :value)) + + ;; make-list len [fill] + (make-list (proc (:exact-integer &opt :value) :value)) + + ;; list-tabulate elt-proc len + (list-tabulate (proc (:exact-integer (proc (:exact-integer) :value)) :value)) + + ;; list-copy lis + (list-copy (proc (:value) :value)) + + (circular-list (proc (:value &opt :value) :pair)) + +; ((:iota iota:) +; (proc (:number &opt :number :number) :value)) + + (iota (proc (:exact-integer &opt :number :number) :value)) + + (proper-list? (proc (:value) :boolean)) + (dotted-list? (proc (:value) :boolean)) + (circular-list? (proc (:value) :boolean)) + + (not-pair? (proc (:value) :boolean)) + (null-list? (proc (:value) :boolean)) + + (list= (proc ((proc (:value :value) :boolean) &rest :value) :boolean)) + + ((first second third fourth fifth sixth seventh eighth ninth tenth) + (proc (:pair) :value)) + + (car+cdr (proc (:pair) (some-values :value :value))) + + ;; take lis i take-right lis i + ;; drop lis i drop-right lis i + ;; take! lis i drop-right! lis i + ((take drop take-right drop-right take! drop-right!) + (proc (:value :exact-integer) :value)) + + (last (proc (:pair) :value)) + (last-pair (proc (:pair) :pair)) + + (length+ (proc (:value) :value)) + (append! (proc (:value &rest :value) :value)) + (reverse! (proc (:value) :value)) + ((append-reverse append-reverse!) (proc (:value :value) :value)) + + (zip (proc (:value &rest :value) :value)) + (unzip1 (proc (:value) :value)) + (unzip2 (proc (:value) (some-values :value :value))) + (unzip3 (proc (:value) (some-values :value :value :value))) + (unzip4 (proc (:value) (some-values :value :value :value :value))) + (unzip5 (proc (:value) (some-values :value :value :value :value :value))) + + (count (proc ((proc (:value) :boolean) :value) :exact-integer)) + + ((fold fold-right) + (proc ((proc (:value :value &rest :value) :value) + :value :value &rest :value) + :value)) + + ((unfold unfold-right) (proc ((proc (:value) :boolean) + (proc (:value) :value) + (proc (:value) :value) + :value + &opt (proc (:value) :value)) + :value)) + + ((pair-fold pair-fold-right) + (proc ((proc (:pair :value &rest :value) :value) + :value :value &rest :value) + :value)) + + ((reduce reduce-right) + (proc ((proc (:value :value) :value) :value :value) :value)) + + ((append-map append-map! map! filter-map map-in-order) + (proc ((proc (:value &rest :value) :value) :value &rest :value) :value)) + + (pair-for-each (proc ((proc (:pair &rest :pair) :values) :value &rest :value) + :unspecific)) + + ((filter filter! remove remove!) + (proc ((proc (:value) :boolean) :value) :value)) + + ((partition partition!) (proc ((proc (:value) :boolean) :value) + (some-values :value :value))) + + ((find find-tail) (proc ((proc (:value) :boolean) :value) :value)) + + ((any every) + (proc ((proc (:value &rest :value) :value) :value &rest :value) :value)) + + (list-index (proc ((proc (:value &rest :value) :value) :value &rest :value) + :value)) + + ((delete delete!) + (proc (:value :value &opt (proc (:value :value) :boolean)) :value)) + + ;; Extended from their R5RS definitions to take an optional comparison + ;; function: (MEMBER x lis [=]). + (member (proc (:value :value &opt (proc (:value :value) :boolean)) :value)) + (assoc (proc (:value :value &opt (proc (:value :value) :boolean)) :value)) + + ((delete-duplicates delete-duplicates!) + (proc (:value &opt (proc (:value :value) :boolean)) :value)) + + (alist-cons (proc (:value :value :value) :value)) + (alist-copy (proc (:value) :value)) + ((alist-delete alist-delete!) + (proc (:value :value &opt (proc (:value :value) :value)) :value)) + + ;; Extended from their R4RS definitions. + (map (proc ((proc (:value) :value) &rest :value) :value)) + (for-each (proc ((proc (:value) :values) &rest :value) :unspecific)) + + ;; R4RS exports + (cons (proc (:value :value) :pair)) + ((pair? null?) (proc (:value) :boolean)) + (list-ref (proc (:value :exact-integer) :value)) + (list (proc (&rest :value) :value)) + (length (proc (:value) :exact-integer)) + (append (proc (&rest :value) :value)) + (reverse (proc (:value) :value)) + ((car cdr + caaaar caaadr caadar caaddr caaar caadr caar + cadaar cadadr caddar cadddr cadar caddr cadr + cdaaar cdaadr cdadar cdaddr cdaar cdadr cdar + cddaar cddadr cdddar cddddr cddar cdddr cddr) (proc (:value) :value)) + ((set-car! set-cdr!) (proc (:pair :value) :unspecific)) + ((memq memv) (proc (:value :value) :value)) + ((assq assv) (proc (:value :value) :value)) + + + ;; lset-lib + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; lset<= = list1 list2 ... + ;; lset= = list1 list2 ... + ;; lset-adjoin = list elt1 ... + ;; lset-union = list1 ... + ;; lset-intersection = list1 list2 ... + ;; lset-difference = list1 list2 ... + ;; lset-xor = list1 ... + ;; lset-diff+intersection = list1 list2 ... + ;; ... and their side effecting counterparts: + ;; lset-union! lset-intersection! lset-difference! lset-xor! + ;; lset-diff+intersection! + + ;; lset= = list1 ... -> boolean + ;; lset<= = list1 ... -> boolean + ((lset= lset<=) + (proc ((proc (:value :value) :boolean) &rest :value) :boolean)) + + ;; lset-adjoin = list elt1 ... + (lset-adjoin (proc ((proc (:value :value) :boolean) :value &rest :value) :value)) + + ;; lset-union = list1 ... lset-xor = list1 ... + ;; lset-union! = list1 ... lset-xor! = list1 ... + ((lset-union lset-xor) + (proc ((proc (:value :value) :boolean) &rest :value) :value)) + + ;; lset-intersection = list1 list2 ... + ;; lset-intersection! = list1 list2 ... + ;; lset-difference = list1 list2 ... + ;; lset-difference! = list1 list2 ... + ((lset-intersection lset-difference + lset-intersection! lset-difference!) + (proc ((proc (:value :value) :boolean) :value &rest :value) :value)) + + ;; lset-diff+intersection = list1 list2 ... + ;; lset-diff+intersection! = list1 list2 ... + ((lset-diff+intersection lset-diff+intersection!) + (proc ((proc (:value :value) :boolean) :value &rest :value) + (some-values :value :value))) + )) + +(define-structure list-lib list-lib-interface + (open error-package ; ERROR procedure + receiving ; RECEIVE m-v macro + let-opt ; LET-OPTIONALS and :OPTIONAL. + scheme) + (begin (define (check-arg pred val caller) + (let lp ((val val)) + (if (pred val) val (lp (error "Bad argument" val pred caller)))))) + (files list-lib)) diff --git a/scsh/lib/srfi-1.html b/scsh/lib/srfi-1.html new file mode 100644 index 0000000..73f9c50 --- /dev/null +++ b/scsh/lib/srfi-1.html @@ -0,0 +1,3056 @@ + + + + + + + + + + SRFI 1: List Library + + + + + + + + + + +

Title

+
+List Library +
+ + +

Author

+
+ Olin Shivers / + shivers@ai.mit.edu +
+ + +

Table of contents

+ + + + + + +

Abstract

+

+R5RS Scheme has an impoverished set of list-processing utilities, which is a +problem for authors of portable code. This SRFI proposes a coherent and +comprehensive set of list-processing procedures; it is accompanied by a +reference implementation of the spec. The reference implementation is +

    +
  • portable +
  • efficient +
  • completely open, public-domain source +
+ +Note: This is a working draft, and tends to lag the plain-text version in terms of actual content. +See + ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt +for the latest copy of the plain-text draft. + + + +

Introduction

+

+The set of basic list and pair operations provided by R4RS/R5RS Scheme is far +from satisfactory. Because this set is so small and basic, most +implementations provide additional utilities, such as a list-filtering +function, or a "left fold" operator, and so forth. But, of course, this +introduces incompatibilities -- different Scheme implementations provide +different sets of procedures. + +

+I have designed a full-featured library of procedures for list processing. +While putting this library together, I checked as many Schemes as I could get +my hands on. (I have a fair amount of experience with several of these +already.) I missed Chez -- no on-line manual that I can find -- but I hit most +of the other big, full-featured Schemes. The complete list of list-processing +systems I checked is: +

+ R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, + Common Lisp, Bigloo, guile, T, APL and the SML standard basis +
+

+As a result, the library I am proposing is fairly rich. +

+Following this initial design phase, this library went through several +months of discussion on the SRFI mailing lists, and was altered in light +of the ideas and suggestions put forth during this discussion. +

+In parallel with designing this API, I have also written a reference +implementation. I have placed this source on the Net with an unencumbered, +"open" copyright. A few notes about the reference implementation: + +

    +
  • Although I got procedure names and specs from many Schemes, I wrote this + code myself. Thus, there are no entanglements. + Any Scheme implementor + can pick this library up with no worries about copyright problems -- both + commercial and non-commercial systems. + +
  • The code is written for portability and should be trivial to port to + any Scheme. It has only four deviations from R4RS, clearly discussed + in the comments:
      +
    • Use of an error procedure; +
    • Use of the R5RS values and a simple receive macro for producing + and consuming multiple return values; +
    • Use of simple :optional and let-optionals macros for optional + argument parsing and defaulting; +
    • Use of a simple check-arg procedure for argument checking. +
    + +
  • It is written for clarity and well-commented. The current source is + 1436 lines of text, of which 690 are source code; the rest being comments + and blank lines. + +
  • It is written for efficiency. Fast paths are provided for common + cases. Side-effecting procedures such as filter! avoid unnecessary, + redundant set-cdr!s which would thrash a generational GC's write barrier + and the store buffers of fast processors. Functions reuse longest common + tails from input parameters to construct their results where + possible. Constant-space iterations are used in preference to recursions; + local recursions are used in preference to consing temporary intermediate + data structures. +

    + This is not to say that the implementation can't be tuned up for + a specific Scheme implementation. There are notes in comments addressing + ways implementors can tune the reference implementation for performance. +

+

+In short, I've written the reference implementation to make it as painless +as possible for an implementor -- or a regular programmer -- to adopt this +library and get good results with it. + + + +

Procedure Index

+

+Here is a short list of the procedures provided by the list-lib package. +R5RS procedures are shown in +italic; +extended R5RS + procedures, in bold italic. +

+ +

+Four R4RS/R5RS list-processing procedures are extended by this library in +backwards-compatible ways: +

+ +
map for-each + (Extended to take lists of unequal length) +
member assoc + (Extended to take an optional comparison procedure.) +
+
+ +

+The following R4RS/R5RS list- and pair-processing procedures are also part of +list-lib's exports, as defined by the R5RS: +

+
+cons pair? null?
+car cdr ... cdddar cddddr 
+set-car! set-cdr! 
+list append reverse
+length list-ref
+memq memv assq assv
+
+
+ +

+The remaining two R4RS/R5RS list-processing +procedures are not part of +this library: +

+ +
list-tail + (renamed drop) +
list? + (see proper-list?, + circular-list? and + dotted-list?) +
+
+ + +

General discussion

+

+ +A set of general criteria guided the design of this library. + +

+ +I don't require "destructive" (what I call "linear update") procedures to +alter and recycle cons cells from the argument lists. They are allowed to, but +not required to. (And the reference implementations I have written do +recycle the argument lists.) + +

+List-filtering procedures such as filter or delete do not disorder +lists. Elements appear in the answer list in the same order as they appear in +the argument list. This constrains implementation, but seems like a desirable +feature, since in many uses of lists, order matters. (In particular, +disordering an alist is definitely a bad idea.) +

+Contrariwise, although the reference implementations of the list-filtering +procedures share longest common tails between argument and answer lists, +it not is part of the spec. +

+Because lists are an inherently sequential data structure (unlike, say, +vectors), list-inspection functions such as find, find-tail, for-each, any +and every commit to a left-to-right traversal order of their argument list. +

+However, constructor functions, such as list-tabulate and the mapping +procedures (append-map, append-map!, map!, pair-for-each, filter-map, +map-in-order), do not specify the dynamic order in which their procedural +argument is applied to its various values. +

+Predicates return useful true values wherever possible. Thus any must return +the true value produced by its predicate, and every returns the final true +value produced by applying its predicate argument to the last element of its +argument list. +

+Functionality is provided both in pure and linear-update (potentially +destructive) forms wherever this makes sense. +

+No special status accorded Scheme's built-in equality functions. +Any functionality provided in terms of eq?, eqv?, equal? is also +available using a client-provided equality function. +

+Proper design counts for more than backwards compatibility, but I have tried, +ceteris paribus, +to be as backwards-compatible as possible with existing +list-processing libraries, in order to facilitate porting old code to run as a +client of the procedures in this library. Name choices and semantics are, for +the most part, in agreement with existing practice in many current Scheme +systems. I have indicated some incompatibilities in the following text. +

+These procedures are not "sequence generic" -- i.e., procedures that +operate on either vectors and lists. They are list-specific. I prefer to +keep the library simple and focussed. +

+I have named these procedures without a qualifying initial "list-" lexeme, +which is in keeping with the existing set of list-processing utilities in +Scheme. +I follow the general Scheme convention (vector-length, string-ref) of +placing the type-name before the action when naming procedures -- so +we have list-copy and pair-for-each rather than the perhaps +more fluid, but less consistent, copy-list or for-each-pair. +

+I have generally followed a regular and consistent naming scheme, composing +procedure names from a set of basic lexemes. + + +

"Linear update" procedures

+

+ +Many procedures in this library have "pure" and "linear update" variants. A +"pure" procedure has no side-effects, and in particular does not alter its +arguments in any way. A "linear update" procedure is allowed -- but not +required -- to side-effect its arguments in order to construct its +result. "Linear update" procedures are typically given names ending with an +exclamation point. So, for example, (append! list1 list2) is allowed to +construct its result by simply using set-cdr! to set the cdr of the last pair +of list1 to point to list2, and then returning list1 (unless list1 is the +empty list, in which case it would simply return list2). However, append! may +also elect to perform a pure append operation -- this is a legal definition +of append!: +

+(define append! append)
+
+

+This is why we do not call these procedures "destructive" -- because they +aren't required to be destructive. They are potentially destructive. +

+What this means is that you may only apply linear-update procedures to +values that you know are "dead" -- values that will never be used again +in your program. This must be so, since you can't rely on the value passed +to a linear-update procedure after that procedure has been called. It +might be unchanged; it might be altered. +

+The "linear" in "linear update" doesn't mean "linear time" or "linear space" +or any sort of multiple-of-n kind of meaning. It's a fancy term that +type theorists and pure functional programmers use to describe +systems where you are only allowed to have exactly one reference to each +variable. This provides a guarantee that the value bound to a variable is +bound to no other variable. So when you use a variable in a variable +reference, you "use it up." Knowing that no one else has a pointer to that +value means the a system primitive is free to side-effect its arguments to +produce what is, observationally, a pure-functional result. +

+In the context of this library, "linear update" means you, the programmer, +know there are no other live references to the value passed to the +procedure -- after passing the value to one of these procedures, the +value of the old pointer is indeterminate. Basically, you are licensing +the Scheme implementation to alter the data structure if it feels like +it -- you have declared you don't care either way. +

+You get no help from Scheme in checking that the values you claim are "linear" +really are. So you better get it right. Or play it safe and use the non-! +procedures -- it doesn't do any good to compute quickly if you get the wrong +answer. +

+Why go to all this trouble to define the notion of "linear update" and use it +in a procedure spec, instead of the more common notion of a "destructive" +operation? First, note that destructive list-processing procedures are almost +always used in a linear-update fashion. This is in part required by the +special case of operating upon the empty list, which can't be side-effected. +This means that destructive operators are not pure side-effects -- they have +to return a result. Second, note that code written using linear-update +operators can be trivially ported to a pure, functional subset of Scheme by +simply providing pure implementations of the linear-update operators. Finally, +requiring destructive side-effects ruins opportunities to parallelise these +operations -- and the places where one has taken the trouble to spell out +destructive operations are usually exactly the code one would want a +parallelising compiler to parallelise: the efficiency-critical kernels of the +algorithm. Linear-update operations are easily parallelised. Going with a +linear-update spec doesn't close off these valuable alternative implementation +techniques. This list library is intended as a set of low-level, basic +operators, so we don't want to exclude these possible implementations. +

+The linear-update procedures in this library are +

+take! drop-right! +append! reverse! append-reverse! +append-map! map! +filter! partition! remove! +delete! alist-delete! delete-duplicates! +lset-adjoin! lset-union! lset-intersection! +lset-difference! lset-xor! lset-diff+intersection! +
+ + + +

Improper Lists

+

+ +Scheme does not properly have a list type, just as C does not have a string +type. Rather, Scheme has a binary-tuple type, from which one can build binary +trees. There is an interpretation of Scheme values that allows one to +treat these trees as lists. Further complications ensue from the fact that +Scheme allows side-effects to these tuples, raising the possibility of lists +of unbounded length, and trees of unbounded depth (that is, circular data +structures). + +

+However, there is a simple view of the world of Scheme values that considers +every value to be a list of some sort. that is, every value is either +

    +
  • a "proper list" -- a finite, nil-terminated list, such as:
    + (a b c)
    + ()
    + (32)
    +
  • a "dotted list" -- a finite, non-nil terminated list, such as:
    + (a b c . d)
    + (x . y)
    + 42
    + george
    +
  • or a "circular list" -- an infinite, unterminated list. +
+

+Note that the zero-length dotted lists are simply all the non-null, non-pair +values. + +

+This view is captured by the predicates proper-list?, dotted-list?, and +circular-list?. List-lib users should note that dotted lists are not commonly +used, and are considered by many Scheme programmers to be an ugly artifact of +Scheme's lack of a true list type. However, dotted lists do play a noticeable +role in the syntax of Scheme, in the "rest" parameters used by n-ary +lambdas: (lambda (x y . rest) ...). + +

+Dotted lists are not fully supported by list-lib. Most procedures are +defined only on proper lists -- that is, finite, nil-terminated lists. The +procedures that will also handle circular or dotted lists are specifically +marked. While this design decision restricts the domain of possible arguments +one can pass to these procedures, it has the benefit of allowing the +procedures to catch the error cases where programmers inadvertently pass +scalar values to a list procedure by accident, +e.g., by switching the arguments to a procedure call. + + +

Errors

+

+ +Note that statements of the form "it is an error" merely mean "don't +do that." They are not a guarantee that a conforming implementation will +"catch" such improper use by, for example, raising some kind of exception. +Regrettably, R5RS Scheme requires no firmer guarantee even for basic operators such +as car and cdr, so there's little point in requiring these procedures to do +more. Here is the relevant section of the R5RS: +

+

+ When speaking of an error situation, this report uses the phrase "an + error is signalled" to indicate that implementations must detect and + report the error. If such wording does not appear in the discussion + of an error, then implementations are not required to detect or + report the error, though they are encouraged to do so. An error + situation that implementations are not required to detect is usually + referred to simply as "an error." +

+ For example, it is an error for a procedure to be passed an argument + that the procedure is not explicitly specified to handle, even though + such domain errors are seldom mentioned in this report. + Implementations may extend a procedure's domain of definition to + include such arguments. +

+ + + +

Not included in this library

+

+The following items are not in this library: +

    +
  • Sort routines +
  • Destructuring/pattern-matching macro +
  • Tree-processing routines +
+

+They shound have their own SRFI specs. +

+ + + +

The procedures

+

+ +In a Scheme system that has a module or package system, these procedures +should be contained in a module named "list-lib". + +The templates given below obey the following conventions for procedure formals: + + +
list + A proper (finite, nil-terminated) list +
clist + A proper or circular list +
flist + A finite (proper or dotted) list +
pair + A pair +
x, y, d, a + Any value +
object, value + Any value +
n, i + A natural number (an integer >= 0) +
proc + A procedure +
= + A boolean procedure taking two arguments +
pred + A boolean procedure taking one argument +
+ +

+It is an error to pass a circular or dotted list to a procedure not +defined to accept such an argument. Such a procedure may either signal +an error or diverge when passed a circular list. + + +

Constructors

+

+ +

+ + +
+ +cons a d -> pair +
+ [R5RS] + The primitive constructor. Returns a newly allocated pair whose car is + a and whose cdr is d. + The pair is guaranteed to be different (in the sense of eqv?) + from every existing object. +
+(cons 'a '())        => (a)
+(cons '(a) '(b c d)) => ((a) b c d)
+(cons "a" '(b c))    => ("a" b c)
+(cons 'a 3)          => (a . 3)
+(cons '(a b) 'c)     => ((a b) . c)
+
+ + +
+ +list object ... -> list +
+ [R5RS] + Returns a newly allocated list of its arguments. +
+(list 'a (+ 3 4) 'c) =>  (a 7 c)
+(list)               =>  ()
+
+ + +
+ +xcons d a -> pair +
+
+(lambda (d a) (cons a d))
+
+ Of utility only as a value to be conveniently passed to higher-order + procedures. + +
+(xcons '(b c) 'a) => (a b c)
+
+ + The name stands for "eXchanged CONS." + + + +
cons* elt1 elt2 ... -> object +
+ + Like list, + but the last argument provides the tail of the constructed list, + returning +
+(cons elt1 (cons elt2 (cons ... eltn))) +
+ This function is called list* in Common Lisp and about + half of the Schemes that provide it, + and cons* in the other half. +
+(cons* 1 2 3 4) => (1 2 3 . 4)
+(cons* 1) => 1
+
+ + + +
make-list n [fill] -> list +
+ Returns an n-element list, + whose elements are all the value fill. + If the fill argument is not given, the elements of the list may + be arbitrary values. +
+(make-list 4 'c) => (c c c c)
+
+ + + +
list-tabulate n init-proc -> list +
+ Returns an n-element list. Element i of the list, where 0 <= i < n, + is produced by (init-proc i). No guarantee is made about the dynamic + order in which init-proc is applied to these indices. + +
+(list-tabulate 4 values) => (0 1 2 3)
+
+ + + +
list-copy flist -> flist +
+ Copies the spine of the argument. + + + +
circular-list elt1 elt2 ... -> list +
+ Constructs a circular list of the elements. +
+(circular-list 'z 'q) => (z q z q z q ...)
+
+ + + +
iota count [start step] -> list +
+ Returns a list containing the elements +
+(start start+step ... start+(count-1)*step)
+
+ The start and step parameters default to 0 and 1, respectively. + This procedure takes its name from the APL primitive. + +
+(iota 5) => (0 1 2 3 4)
+(iota 5 0 -0.1) => (0 -0.1 -0.2 -0.3 -0.4)
+
+
+ + +

Predicates

+

+Note: the predicates proper-list?, circular-list?, and dotted-list? +partition the entire universe of Scheme values. + +

+ +
+proper-list? x -> boolean + +
+ Returns true iff x is a proper list -- a finite, nil-terminated list. +

+ More carefully: The empty list is a proper list. A pair whose cdr is a + proper list is also a proper list: +

+<proper-list> ::= ()                            (Empty proper list)
+              |   (cons <x> <proper-list>)      (Proper-list pair)
+
+ Note that this definition rules out circular lists. This + function is required to detect this case and return false. +

+ Nil-terminated lists are called "proper" lists by R5RS and Common Lisp. + The opposite of proper is improper. +

+ R5RS binds this function to the variable list?. +

+

+(not (proper-list? x)) = (or (dotted-list? x) (circular-list? x))
+
+ + + +
circular-list? x -> boolean +
+ True if x is a circular list. A circular list is a value such that + for every n >= 0, cdrn(x) is a pair. +

+ Terminology: The opposite of circular is finite. +

+(not (circular-list? x)) = (or (proper-list? x) (dotted-list? x))
+
+ + + +
dotted-list? x -> boolean +
+ True if x is a finite, non-nil-terminated list. That is, there exists + an n >= 0 such that cdrn(x) is neither a pair nor (). + This includes + non-pair, non-() values (e.g. symbols, numbers), + which are considered to be dotted lists of length 0. +
+(not (dotted-list? x)) = (or (proper-list? x) (circular-list? x))
+
+ + + +
pair? object -> boolean +
+ [R5RS] + Returns #t if object is a pair; otherwise, #f. +
+(pair? '(a . b)) =>  #t
+(pair? '(a b c)) =>  #t
+(pair? '())      =>  #f
+(pair? '#(a b))  =>  #f
+(pair? 7)        =>  #f
+(pair? 'a)       =>  #f
+
+ + + +
null? object -> boolean +
+ [R5RS] + Returns #t if object is the empty list; otherwise, #f. + + + +
null-list? list -> boolean +
+ List is a proper or circular list. This procedure returns true if + the argument is the empty list (), and false otherwise. It is an + error to pass this procedure a value which is not a proper or + circular list. + + This procedure is recommended as the termination condition for + list-processing procedures that are not defined on dotted lists. + + +
+ +not-pair? x -> boolean +
+
(lambda (x) (not (pair? x)))
+ Provided as a procedure as it can be useful as the termination condition + for list-processing procedures that wish to handle all finite lists, + both proper and dotted. + + +
+ +list= elt= list1 ... -> boolean +
+ Determines list equality, given an element-equality procedure. + Proper list A equals proper list B + if they are of the same length, + and their corresponding elements are equal, + as determined by elt=. + If the element-comparison procedure's first argument is + from listi, + then its second argument is from listi+1, + i.e. it is always called as + (elt= a b) + for a an element of list A, + and b an element of list B. +

+ In the n-ary case, + every listi is compared to + listi+1 + (as opposed, for example, to comparing + list1 to every listi, + for i>1). + If there are no list arguments at all, + list= simply returns true. +

+ It is an error to apply list= to anything except proper lists. + While + implementations may choose to extend it to circular lists, note that it + cannot reasonably be extended to dotted lists, as it provides no way to + specify an equality procedure for comparing the list terminators. +

+ Note that the dynamic order in which the elt= procedure is + applied to pairs of elements is not specified. + For example, if list= is applied + to three lists, A, B, and C, + it may first completely compare A to B, + then compare B to C, + or it may compare the first elements of A and B, + then the first elements of B and C, + then the second elements of A and B, and so forth. +

+ The equality procedure must be consistent with eq?. + That is, it must be the case that +

+ (eq? x y) => (elt= x y). +
+ Note that this implies that two lists which are eq? + are always list=, as well. +
+(list= eq?) => #t       ; Trivial cases
+(list= eq? '(a)) => #t
+
+ +
+ + + +

Selectors

+
+ + + + +
car pair -> value +
cdr pair -> value +
+ [R5RS] + These functions return the contents of the car and cdr field of their + argument, respectively. + Note that it is an error to apply them to the empty list. +
+(car '(a b c))     =>  a             (cdr '(a b c))     =>  (b c)  
+(car '((a) b c d)) =>  (a)	     (cdr '((a) b c d)) =>  (b c d)
+(car '(1 . 2))     =>  1	     (cdr '(1 . 2))     =>  2      
+(car '())          =>  *error*	     (cdr '())          =>  *error*
+
+ + + + + + + + +
caar pair -> value +
cadr pair -> value +
: +
cdddar pair -> value +
cddddr pair -> value +
+ [R5RS] + These procedures are compositions of car and cdr, + where for example caddr could be defined by +
    
+(define caddr (lambda (x) (car (cdr (cdr x))))).
+
+ Arbitrary compositions, up to four deep, are provided. There are + twenty-eight of these procedures in all. + + + +
list-ref clist i -> value +
+ [R5RS] + Returns the ith element of clist. + (This is the same as the car of + (drop clist i).) + It is an error if i >= n, + where n is the length of clist. +
    
+(list-ref '(a b c d) 2) => c
+
+ + +
+ +first   pair -> object +
+ +second  pair -> object +
+ +third   pair -> object +
+ +fourth  pair -> object +
+ +fifth   pair -> object +
+ +sixth   pair -> object +
+ +seventh pair -> object +
+ +eighth  pair -> object +
+ +ninth   pair -> object +
+ +tenth   pair -> object +
+ Synonyms for car, cadr, caddr, ... + +
+(third '(a b c d e)) => c
+
+ + +
+ +car+cdr pair -> [x y] +
+ The fundamental pair deconstructor: +
+(lambda (p) (values (car p) (cdr p)))
+
+ This can, of course, be implemented more efficiently by a compiler. + + +
+ +take x i -> list +
+ +drop x i -> object +
+ take returns the first i elements of list x.
+ drop returns all but the first i elements of list x. +
+(take '(a b c d e)  2) => (a b)
+(drop '(a b c d e)  2) => (c d e)
+
+ x may be any value -- a proper, circular, or dotted list: +
+(take '(1 2 3 . d) 2) => (1 2)
+(drop '(1 2 3 . d) 2) => (3 . d)
+(take '(1 2 3 . d) 3) => (1 2 3)
+(drop '(1 2 3 . d) 3) => d
+
+ For a legal i, take and drop partition the list in a manner which + can be inverted with append: +
+(append (take x i) (drop x i)) = x
+
+ drop is exactly equivalent to performing i cdr operations on x; + the returned value shares a common tail with x. + + If the argument is a list of non-zero length, take is guaranteed to + return a freshly-allocated list, even in the case where the entire + list is taken, e.g. (take lis (length lis)). + + +
+ +take-right flist i -> object +
+ +drop-right flist i -> list +
+ take-right returns the last i elements of flist.
+ drop-right returns all but the last i elements of flist. +
+(take-right '(a b c d e) 2) => (d e)
+(drop-right '(a b c d e) 2) => (a b c)
+
+ The returned list may share a common tail with the argument list. +

+ flist may be any finite list, either proper or dotted: +

+(take-right '(1 2 3 . d) 2) => (2 3 . d)
+(drop-right '(1 2 3 . d) 2) => (1)
+(take-right '(1 2 3 . d) 0) => d
+(drop-right '(1 2 3 . d) 0) => (1 2 3)
+
+ For a legal i, take-right and drop-right partition the list in a manner + which can be inverted with append: +
+(append (take flist i) (drop flist i)) = flist
+
+ take-right's return value is guaranteed to share a common tail with flist. + + If the argument is a list of non-zero length, drop-right is guaranteed to + return a freshly-allocated list, even in the case where nothing is + dropped, e.g. (drop-right lis 0). + + +
+ +take! x i -> list +
+ +drop-right! flist i -> list +
+ take! and drop-right! are "linear-update" variants of take and + drop-right: the procedure is allowed, but not required, to alter the + argument list to produce the result. +

+ If x is circular, take! may return a shorter-than-expected list: +

+(take! (circular-list 1 3 5) 8) => (1 3)
+(take! (circular-list 1 3 5) 8) => (1 3 5 1 3 5 1 3)
+
+ + + +
+ +last pair -> object +
+ +last-pair pair -> pair +
+ last returns the last element of the non-empty, + finite list pair. + last-pair returns the last pair in the non-empty, + finite list pair. + +
+(last '(a b c)) => c
+(last-pair '(a b c)) => (c)
+
+ +
+ + +

Miscellaneous: length, append, reverse, zip & count

+ +
+ +
+ +length  list -> integer +
+ +length+ clist -> integer or #f +
+ Both length and length+ return the length of the argument. + It is an error to pass a value to length which is not a proper + list (finite and nil-terminated). In particular, this means an + implementation may diverge or signal an error when length is + applied to a circular list. +

+ length+, on the other hand, returns #F when applied to a circular + list. +

+ The length of a proper list is a non-negative integer n such that cdr + applied n times to the list produces the empty list. + + + +

+ +append  list1 ... -> list +
+ +append! list1 ... -> list +
+ [R5RS] + append returns a list consisting of the elements + of list1 + followed by the elements of the other list parameters. +
+(append '(x) '(y))        =>  (x y)
+(append '(a) '(b c d))    =>  (a b c d)
+(append '(a (b)) '((c)))  =>  (a (b) (c))
+
+ The resulting list is always newly allocated, except that it + shares structure with the final listi argument. + This last argument may be any value at all; + an improper list results if it is not + a proper list. All other arguments must be proper lists. +
+(append '(a b) '(c . d))  =>  (a b c . d)
+(append '() 'a)           =>  a
+
+ + append! is the "linear-update" variant of append + -- it is allowed, but not required, to alter cons cells in the argument + lists to construct the result list. + The last argument is never altered; the result + list shares structure with this parameter. + + +
+ +reverse  list -> list +
+ +reverse! list -> list +
+ [R5RS] + + reverse returns a newly allocated list consisting of + the elements of list in reverse order. +
+(reverse '(a b c)) =>  (c b a)
+(reverse '(a (b c) d (e (f))))
+    =>  ((e (f)) d (b c) a)
+
+ reverse! is the linear-update variant of reverse. + It is permitted, but not required, to alter the argument's cons cells + to produce the reversed list. + + + +
+ +append-reverse  rev-head tail -> list +
+ +append-reverse! rev-head tail -> list +
+ append-reverse returns + (append (reverse rev-head) tail). + It is provided because it is a common operation -- a common + list-processing style calls for this exact operation to transfer values + accumulated in reverse order onto the front of another list, and because + the implementation is significantly more efficient than the simple + composition it replaces. (But note that this pattern of iterative + computation followed by a reverse can frequently be rewritten as a + recursion, dispensing with the reverse and append-reverse steps, and + shifting temporary, intermediate storage from the heap to the stack, + which is typically a win for reasons of cache locality and eager storage + reclamation.) +

+ append-reverse! is just the linear-update variant -- it is allowed, but + not required, to alter rev-head's cons cells to construct the result. + + + +

zip clist1 clist2 ... -> list +
+
(lambda lists (apply map list lists))
+
+ If zip is passed n lists, it returns a list as long as the shortest + of these lists, each element of which is an n-element list comprised + of the corresponding elements from the parameter lists. + +
+(zip '(one two three) 
+     '(1 2 3)
+     '(odd even odd even odd even odd even))
+    => ((one 1 odd) (two 2 even) (three 3 odd))
+
+(zip '(1 2 3)) => ((1) (2) (3))
+
+ At least one of the argument lists must be finite: +
+(zip '(3 1 4 1) (circular-list #f #t)) 
+    => ((3 #f) (1 #t) (4 #f) (1 #t))
+
+ + + +
unzip1 list -> list + +
unzip2 list -> [list list] + +
unzip3 list -> [list list list] + +
unzip4 list -> [list list list list] + +
unzip5 list -> [list list list list list] +
+ unzip1 takes a list of lists, + where every list must contain at least one element, + and returns a list containing the initial element of each such list. + That is, it returns (map car lists). + unzip2 takes a list of lists, where every list must contain at least + two elements, and returns two values: a list of the first elements, + and a list of the second elements. unzip3 does the same for the first + three elements of the lists, and so forth. + +
+(unzip2 '((1 one) (2 two) (3 three))) =>
+    (1 2 3) 
+    (one two three)
+
+ + +
+ +count pred clist1 clist2 -> integer +
+ pred is a procedure taking as many arguments as there + are lists and returning a single value. It is applied + element-wise to the elements of the lists, and a count is + tallied of the number of elements that produce a true value. This count + is returned. count is "iterative" in that it is guaranteed + to apply pred to the list elements in a + left-to-right order. + The counting stops when the shortest list expires. +
+(count even? '(3 1 4 1 5 9 2 5 6)) => 3
+(count < '(1 2 4 8) '(2 4 6 8 10 12 14 16)) => 3
+
+ At least one of the argument lists must be finite: +
+(count < '(3 1 4 1) (circular-list 1 10)) => 2
+
+ +
+ + +

Fold, unfold & map

+
+ +
+ +fold kons knil clist1 clist2 ... -> value +
+ The fundamental list iterator. +

+ First, consider the single list-parameter case. If clist1 = (e1 e2 ... en), + then this procedure returns +

+(kons en ... (kons e2 (kons e1 knil)) ... ) +
+ That is, it obeys the (tail) recursion +
+(fold kons knil lis) = (fold kons (kons (car lis) knil) (cdr lis))
+(fold kons knil '()) = knil
+
+ + Examples: +
+(fold + 0 lis)			; Add up the elements of LIS.
+
+(fold cons '() lis)		; Reverse LIS.
+
+(fold cons tail rev-head)	; See APPEND-REVERSE.
+
+;; How many symbols in LIS?
+(fold (lambda (x count) (if (symbol? x) (+ count 1) count))
+      0
+      lis)
+
+;; Length of the longest string in LIS:
+(fold (lambda (s max-len) (max max-len (string-length s)))
+      0
+      lis)
+
+ + If n list arguments are provided, then the kons function must take + n+1 parameters: one element from each list, and the "seed" or fold + state, which is initially knil. The fold operation terminates when + the shortest list runs out of values: +
+(fold cons* '() '(a b c) '(1 2 3 4 5)) => (c 3 b 2 a 1)
+
+ At least one of the list arguments must be finite. + + +
+ +fold-right kons knil clist1 clist2 ... -> value +
+ The fundamental list recursion operator. +

+ First, consider the single list-parameter case. If clist1 = (e1 e2 ... en), + then this procedure returns +

+(kons e1 (kons e2 ... (kons en knil))) +
+ That is, it obeys the recursion +
+(fold-right kons knil lis) = (kons (car lis) (fold-right kons knil (cdr lis)))
+(fold-right kons knil '()) = knil
+
+ + Examples: +
+(fold-right cons '() lis)		; Copy LIS.
+
+;; Filter the even numbers out of LIS.
+(fold-right (lambda (x l) (if (even? x) (cons x l) l)) '() lis))
+
+ + If n list arguments are provided, then the kons function must take + n+1 parameters: one element from each list, and the "seed" or fold + state, which is initially knil. The fold operation terminates when + the shortest list runs out of values: +
+(fold-right cons* '() '(a b c) '(1 2 3 4 5)) => (a 1 b 2 c 3)
+
+ At least one of the list arguments must be finite. + + +
+ +pair-fold kons knil clist1 clist2 ... -> value +
+ Analogous to fold, but kons is applied to successive sublists of the + lists, rather than successive elements -- that is, kons is applied to the + pairs making up the lists, giving this (tail) recursion: +
+(pair-fold kons knil lis) = (let ((tail (cdr lis)))
+                              (pair-fold kons (kons lis knil) tail))
+(pair-fold kons knil '()) = knil
+
+ The kons function may reliably apply set-cdr! to the pairs it is given + without altering the sequence of execution. +

+ Example: +

+;;; Destructively reverse a list.
+(pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis))
+
+ + At least one of the list arguments must be finite. + + + +
+ +pair-fold-right kons knil clist1 clist2 ... -> value +
+ Holds the same relationship with fold-right that pair-fold holds with fold. + Obeys the recursion +
+(pair-fold-right kons knil lis) = 
+    (kons lis (pair-fold-right kons knil (cdr lis)))
+(pair-fold-right kons knil '()) = knil
+
+ + Example: +
+(pair-fold-right cons '() '(a b c)) => ((a b c) (b c) (c))
+
+ + At least one of the list arguments must be finite. + + +
+ +reduce f ridentity list -> value +
+ reduce is a variant of fold. +

+ ridentity should be a "right identity" of the procedure f -- that is, + for any value x acceptable to f, +

+(f x ridentity) = x
+
+ + reduce has the following definition: +
+If list = (), return ridentity;
+Otherwise, return (fold f (car list) (cdr list)). +
+ ...in other words, we compute + (fold f ridentity list). +

+ Note that ridentity is used only in the empty-list case. + You typically use reduce when applying f is expensive and you'd + like to avoid the extra application incurred when fold applies + f to the head of list and the identity value, + redundantly producing the same value passed in to f. + For example, if f involves searching a file directory or + performing a database query, this can be significant. + In general, however, fold is useful in many contexts where reduce is not + (consider the examples given in the fold definition -- only one of the + five folds uses a function with a right identity. + The other four may not be performed with reduce). + +

+ Note: MIT Scheme and Haskell flip F's arg order for their reduce and + fold functions. + + +

+ +reduce-right f ridentity list -> value +
+ reduce-right is the fold-right variant of reduce. + It obeys the following definition: +
+(reduce-right f ridentity '()) = ridentity
+(reduce-right f ridentity '(e1)) = (f e1 ridentity) = e1
+(reduce-right f ridentity '(e1 e2 ...)) =
+    (f e1 (reduce f ridentity (e2 ...)))
+
+ ...in other words, we compute + (fold-right f ridentity list). + + +
+ +unfold p f g seed [tail] -> list +
+ unfold constructs a list with the following loop: +
+(let lp ((seed seed) (lis tail))
+  (if (p seed) lis
+      (lp (g seed)
+          (cons (f seed) lis))))
+
+
+
p
Determines when to stop unfolding. +
f
Maps each seed value to the corresponding list element. +
g
Maps each seed value to next seed value. +
seed
The "state" value for the unfold. +
tail
list terminator; defaults to '(). +
+

+ unfold is the fundamental iterative list constructor, + just as fold is the + fundamental iterative list consumer. + While unfold may seem a bit abstract + to novice functional programmers, it can be used in a number of ways: +

+;; List of squares: 1^2 ... 10^2
+(unfold zero? 
+	(lambda (x) (* x x))
+	(lambda (x) (- x 1))
+	10)
+	
+;; Reverse a proper list.
+(unfold null-list? car cdr lis)
+
+;; Read current input port into a list of values.
+(unfold eof-object? values (lambda (x) (read)) (read))
+
+;; (append-reverse rev-head tail)
+(unfold null-list? car cdr rev-head tail)
+
+ + Interested functional programmers may enjoy noting that + fold and unfold + are in some sense inverses. + That is, given operations knull?, kar, + kdr, kons, and knil satisfying +
+(kons (kar x) (kdr x)) = x + and +(knull? knil) = #t +
+ then +
+(fold kons knil (unfold knull? kar kdr x)) = x +
+ and +
+(unfold knull? kar kdr (fold kons knil x)) = x. +
+ + This combinator presumably has some pretentious mathematical name; + interested readers are invited to communicate it to the author. + + + +
+ +unfold-right p f g seed [tail-gen] -> list +
+unfold is best described by its basic recursion: +
+(unfold-right p f g seed) = 
+    (if (p seed) (tail-gen seed)
+        (cons (f seed)
+              (unfold-right p f g (g seed))))
+
+
+
p
Determines when to stop unfolding. +
f
Maps each seed value to the corresponding list element. +
g
Maps each seed value to next seed value. +
seed
The "state" value for the unfold. +
tail-gen
Creates the tail of the list; + defaults to (lambda (x) '()) +
+

+ unfold-right is the fundamental recursive list constructor, + just as fold-right is + the fundamental recursive list consumer. + While unfold-right may seem a bit abstract + to novice functional programmers, it can be used in a number of ways: + +

+;; List of squares: 1^2 ... 10^2
+(unfold (lambda (x) (> x 10))
+        (lambda (x) (* x x))
+	(lambda (x) (+ x 1))
+	1)
+		
+(unfold-right null-list? car cdr lis) ; Copy a proper list.
+
+;; Read current input port into a list of values.
+(unfold-right eof-object? values (lambda (x) (read)) (read))
+
+;; Copy a possibly non-proper list:
+(unfold-right not-pair? car cdr lis 
+              values)
+
+;; Append HEAD onto TAIL:
+(unfold-right null-list? car cdr head 
+              (lambda (x) tail))
+
+ + Interested functional programmers may enjoy noting that + fold-right and unfold-right + are in some sense inverses. + That is, given operations knull?, kar, + kdr, kons, and knil satisfying +
+(kons (kar x) (kdr x)) = x + and +(knull? knil) = #t +
+ then +
+(fold-right kons knil (unfold-right knull? kar kdr x)) = x +
+ and +
+(unfold-right knull? kar kdr (fold-right kons knil x)) = x. +
+ + This combinator sometimes is called an "anamorphism;" when an + explicit tail-gen procedure is supplied, it is called an + "apomorphism." + + + +
+ +map proc clist1 clist2 ... -> list +
+ [R5RS+] + + proc is a procedure taking as many arguments + as there are list arguments and returning a single value. + map applies proc element-wise to the elements + of the lists and returns a list of the results, + in order. + The dynamic order in which proc + is applied to the elements of the lists is unspecified. + +
+(map cadr '((a b) (d e) (g h))) =>  (b e h)
+
+(map (lambda (n) (expt n n))
+     '(1 2 3 4 5))
+    =>  (1 4 27 256 3125)
+
+(map + '(1 2 3) '(4 5 6)) =>  (5 7 9)
+
+(let ((count 0))
+  (map (lambda (ignored)
+         (set! count (+ count 1))
+         count)
+       '(a b))) =>  (1 2) or (2 1)
+
+ + This procedure is extended from its + R5RS + specification to allow the arguments to be of unequal length; + it terminates when the shortest list runs out. +

+ At least one of the argument lists must be finite: +

+(map + '(3 1 4 1) (circular-list 1 0)) => (4 1 5 1)
+
+ + +
+ +for-each proc clist1 clist2 ... -> unspecified +
+ [R5RS+] + + The arguments to for-each are like the arguments to + map, but + for-each calls proc for its side effects rather + than for its values. + Unlike map, for-each is guaranteed to call + proc on the elements of the lists in order from the first + element(s) to the last, + and the value returned by for-each is unspecified. +
+(let ((v (make-vector 5)))
+  (for-each (lambda (i)
+              (vector-set! v i (* i i)))
+            '(0 1 2 3 4))
+  v)  =>  #(0 1 4 9 16)
+
+ + This procedure is extended from its + R5RS + specification to allow the arguments to be of unequal length; + it terminates when the shortest list runs out. +

+ At least one of the argument lists must be finite: +

+(map + '(3 1 4 1) (circular-list 1 0)) => (4 1 5 1)
+
+ + +
+ +append-map  f clist1 clist2 ... -> value +
+ +append-map! f clist1 clist2 ... -> value +
+ Equivalent to +
+(apply append (map f clist1 clist2 ...)) +
+ and +
+(apply append! (map f clist1 clist2 ...)) +
+ + Map f over the elements of the lists, just as in the map function. + However, the results of the applications are appended together to + make the final result. append-map uses append to append the results + together; append-map! uses append!. +

+ The dynamic order in which the various applications of f are made is + not specified. +

+ Example: +

+(append-map! (lambda (x) (list x (- x))) '(1 3 8))
+    => (1 -1 3 -3 8 -8)
+
+ + At least one of the list arguments must be finite. + + +
+ +map! f list1 clist2 ... -> list +
+ Linear-update variant of map -- map! is allowed, but not required, to + alter the cons cells of list1 to construct the result list. +

+ The dynamic order in which the various applications of f are made is + not specified. + + In the n-ary case, clist2, clist3, ... must have at least as many + elements as list1. + + +

+ +map-in-order f clist1 clist2 ... -> list +
+ A variant of the map procedure that guarantees to apply f across + the elements of the listi arguments in a left-to-right order. This + is useful for mapping procedures that both have side effects and + return useful values. +

+ At least one of the list arguments must be finite. + + +

+ +pair-for-each f clist1 clist2 ... -> unspecific +
+ Like for-each, but f is applied to successive sublists of the argument + lists. That is, f is applied to the cons cells of the lists, rather + than the lists' elements. These applications occur in left-to-right + order. +

+ The f procedure may reliably apply set-cdr! to the pairs it is given + without altering the sequence of execution. + +

+(pair-for-each (lambda (pair) (display pair) (newline)) '(a b c)) ==>
+    (a b c)
+    (b c)
+    (c)
+
+ + At least one of the list arguments must be finite. + + +
+ +filter-map f clist1 clist2 ... -> list +
+ Like map, but only true values are saved. +
+(filter-map (lambda (x) (and (number? x) (* x x))) '(a 1 b 3 c 7))
+    => (1 9 49)
+
+ The dynamic order in which the various applications of f are made is + not specified. +

+ At least one of the list arguments must be finite. +

+ + +

Filtering & partitioning

+
+ + +
+ +filter pred list -> list +
+ Return all the elements of list that satisfy predicate pred. + The list is not disordered -- elements that appear in the result list + occur in the same order as they occur in the argument list. + The returned list may share a common tail with the argument list. + The dynamic order in which the various applications of pred are made is + not specified. + +
+(filter even? '(0 7 8 8 43 -4)) => (0 8 8 -4)
+
+ + +
+ +partition pred list -> [list list] +
+ Partitions the elements of list with predicate pred, and returns two + values: the list of in-elements and the list of out-elements. + The list is not disordered -- elements occur in the result lists + in the same order as they occur in the argument list. + The dynamic order in which the various applications of pred are made is + not specified. One of the returned lists may share a common tail with the + argument list. + +
+(partition symbol? '(one 2 3 four five 6)) => 
+    (one four five)
+    (2 3 6)
+
+ + +
+ +remove pred list -> list +
+ Returns list without the elements that satisfy predicate pred: +
+(lambda (pred list) (filter (lambda (x) (not (pred x))) list))
+
+ The list is not disordered -- elements that appear in the result list + occur in the same order as they occur in the argument list. + The returned list may share a common tail with the argument list. + The dynamic order in which the various applications of pred are made is + not specified. + +
+(remove even? '(0 7 8 8 43 -4)) => (7 43)
+
+ + +
+ +filter!    pred list -> list +
+ +partition! pred list -> [list list] +
+ +remove!    pred list -> list +
+ Linear-update variants of filter, partition and remove. + These procedures are allowed, but not required, to alter the cons cells + in the argument list to construct the result lists. + +
+ + +

Searching

+

+ +The following procedures all search lists for a leftmost element satisfying +some criteria. This means they do not always examine the entire list; thus, +there is no efficient way for them to reliably detect and signal an error when +passed a dotted or circular list. Here are the general rules describing how +these procedures work when applied to different kinds of lists: + +

+
Proper lists: +
The standard, canonical behavior happens in this case. + +
Dotted lists: +
It is an error to pass these procedures a dotted list + that does not contain an element satisfying the search + criteria. That is, it is an error if the procedure has + to search all the way to the end of the dotted list. + However, this SRFI does not specify anything at all + about the behavior of these procedures when passed a + dotted list containing an element satisfying the search + criteria. It may finish successfully, signal an error, + or perform some third action. Different implementations + may provide different functionality in this case; code + which is compliant with this SRFI may not rely on any + particular behavior. Future SRFI's may refine SRFI-1 + to define specific behavior in this case. +

+ In brief, SRFI-1 compliant code may not pass a dotted + list argument to these procedures. + +

Circular lists: +
It is an error to pass these procedures a circular list + that does not contain an element satisfying the search + criteria. Note that the procedure is not required to + detect this case; it may simply diverge. It is, however, + acceptable to search a circular list if the search is + successful -- that is, if the list contains an element + satisfying the search criteria. +
+

+Here are some examples, using the find and any procedures as a canonical +representatives: +

+;; Proper list -- success
+(find even? '(1 2 3))	=> 2
+(any  even? '(1 2 3))	=> #t
+
+;; proper list -- failure
+(find even? '(1 7 3))	=> #f
+(any  even? '(1 7 3))	=> #f
+
+;; Failure is error on a dotted list.
+(find even? '(1 3 . x))	=> error
+(any  even? '(1 3 . x))	=> error
+
+;; The dotted list contains an element satisfying the search.
+;; This case is not specified -- it could be success, an error, 
+;; or some third possibility.
+(find even? '(1 2 . x))	=> error/undefined
+(any  even? '(1 2 . x))	=> error/undefined ; success, error or other.
+
+;; circular list -- success
+(find even? (circular-list 1 6 3)) => 6
+(any  even? (circular-list 1 6 3)) => #t
+
+;; circular list -- failure is error. Procedure may diverge.
+(find even? (circular-list 1 3)) => error
+(any  even? (circular-list 1 3)) => error
+
+ +
+ +
+ +find pred clist -> value +
+ Return the first element of clist that satisfies predicate pred; + false if no element does. + +
+(find even? '(3 1 4 1 5 9)) => 4
+
+ + Note that find has an ambiguity in its lookup semantics -- if find + returns #f, you cannot tell (in general) if it found a #f element + that satisfied pred, or if it did not find any element at all. In + many situations, this ambiguity cannot arise -- either the list being + searched is known not to contain any #f elements, or the list is + guaranteed to have an element satisfying pred. However, in cases + where this ambiguity can arise, you should use find-tail instead of + find -- find-tail has no such ambiguity: +
+(cond ((find-tail pred lis) => (lambda (pair) ...)) ; Handle (CAR PAIR)
+      (else ...)) ; Search failed.
+
+ + +
+ +find-tail pred clist -> pair or false +
+ Return the first pair of clist whose car satisfies pred. If no pair does, + return false. +

+ find-tail can be viewed as a general-predicate variant of the member + function. +

+ Examples: +

+(find-tail even? '(3 1 37 -8 -5 0 0)) => (-8 -5 0 0)
+(find-tail even? '(3 1 37 -5)) => #f
+
+;; MEMBER X LIS:
+(find-tail (lambda (elt) (equal? x elt)) lis)
+
+ + In the circular-list case, this procedure "rotates" the list. + + +
+ +any pred clist1 clist2 ... -> value +
+ Applies the predicate across the lists, returning true if the predicate + returns true on any application. +

+ If there are n list arguments clist1 ... clistn, then pred must be a + procedure taking n arguments and returning a boolean result. +

+ any applies pred to the first elements of the clisti parameters. + If this application returns a true value, any immediately returns + that value. Otherwise, it iterates, applying pred to the second + elements of the clisti parameters, then the third, and so forth. + The iteration stops when a true value is produced or one of the lists runs + out of values; in + the latter case, any returns #f. + The application of pred to the last element of the + lists is a tail call. +

+ Note the difference between find and any -- find returns the element + that satisfied the predicate; any returns the true value that the + predicate produced. +

+ Like every, any's name does not end with a question mark -- this is to + indicate that it does not return a simple boolean (#t or #f), but a + general value. + +

+(any integer? '(a 3 b 2.7))   => #t
+(any integer? '(a 3.1 b 2.7)) => #f
+(any < '(3 1 4 1 5)
+       '(2 7 1 8 2)) => #t
+
+ + +
+ +every pred clist1 clist2 ... -> value +
+ Applies the predicate across the lists, returning true if the predicate + returns true on every application. +

+ If there are n list arguments clist1 ... clistn, then pred must be a + procedure taking n arguments and returning a boolean result. +

+ every applies pred to the first elements of the clisti parameters. + If this application returns false, every immediately returns false. + Otherwise, it iterates, applying pred to the second elements of the + clisti parameters, then the third, and so forth. The iteration stops + when a false value is produced or one of the lists runs out of values. + In the latter case, every returns + the true value produced by its final application of pred. + The application of pred to the last element of the lists + is a tail call. +

+ If one of the clisti has no elements, every simply returns #t. +

+ Like any, every's name does not end with a question mark -- this is to + indicate that it does not return a simple boolean (#t or #f), but a + general value. + + +

+ +list-index pred clist1 clist2 ... -> integer or false +
+ Return the index of the leftmost element that satisfies pred. +

+ If there are n list arguments clist1 ... clistn, then pred must be a + function taking n arguments and returning a boolean result. +

+ list-index applies pred to the first elements of the clisti parameters. + If this application returns true, list-index immediately returns zero. + Otherwise, it iterates, applying pred to the second elements of the + clisti parameters, then the third, and so forth. When it finds a tuple of + list elements that cause pred to return true, it stops and returns the + zero-based index of that position in the lists. +

+ The iteration stops when one of the lists runs out of values; in this + case, list-index returns #f. + +

+(list-index even? '(3 1 4 1 5 9)) => 2
+(list-index < '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => 1
+(list-index = '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => #f
+
+ + +
+ +member x list [=] -> list +
+ +memq x list -> list +
+ +memv x list -> list +
+ [R5RS+] + + These procedures return the first sublist of list whose car is + x, where the sublists of list are the + non-empty lists returned by + (drop list i) + for i less than the length of list. + If x does + not occur in list, then #f is returned. + memq uses eq? to compare x + with the elements of list, + while memv uses eqv?, and + member uses equal?. + +
+    (memq 'a '(a b c))          =>  (a b c)
+    (memq 'b '(a b c))          =>  (b c)
+    (memq 'a '(b c d))          =>  #f
+    (memq (list 'a) '(b (a) c)) =>  #f
+    (member (list 'a)
+            '(b (a) c))         =>  ((a) c)
+    (memq 101 '(100 101 102))   =>  *unspecified*
+    (memv 101 '(100 101 102))   =>  (101 102)
+
+ + member is extended from its + R5RS + definition to allow the client to pass in + an optional equality procedure = used to compare keys. + +

+ The comparison procedure is used to compare the elements ei of list + to the key x in this way: +

+(= x ei) ; list is (E1 ... En) +
+ That is, the first argument is always x, and the second argument is + one of the list elements. Thus one can reliably find the first element + of list that is greater than five with + (member 5 list <) + +

+ Note that fully general list searching may be performed with + the find-tail and find procedures, e.g. +

+(find-tail even? list) ; Find the first elt with an even key.
+
+ +
+ + +

Deletion

+

+ +

+ +
+ +delete  x list [=] -> list +
+ +delete! x list [=] -> list +
+ delete uses the comparison procedure =, which defaults to equal?, to find + all elements of list that are equal to x, and deletes them from list. The + dynamic order in which the various applications of = are made is not + specified. + +

+ The list is not disordered -- elements that appear in the result list + occur in the same order as they occur in the argument list. + The result may share a common tail with the argument list. + +

+ Note that fully general element deletion can be performed with the remove + and remove! procedures, e.g.: +

+;; Delete all the even elements from LIS:
+(remove even? lis)
+
+ + The comparison procedure is used in this way: + (= x ei). + That is, x is always the first argument, + and a list element is always the + second argument. The comparison procedure will be used to compare each + element of list exactly once; the order in which it is applied to the + various ei is not specified. Thus, one can reliably remove all the + numbers greater than five from a list with + (delete 5 list <) + +

+ delete! is the linear-update variant of delete. + It is allowed, but not required, to alter the cons cells in + its argument list to construct the result. + + +

+ +delete-duplicates  list [=] -> list +
+ +delete-duplicates! list [=] -> list +
+ delete-duplicates removes duplicate elements from the + list argument. + If there are multiple equal elements in the argument list, the result list + only contains the first or leftmost of these elements in the result. + The order of these surviving elements is the same as in the original + list -- delete-duplicates does not disorder the list (hence it is useful + for "cleaning up" association lists). +

+ The = parameter is used to compare the elements of the list; it defaults + to equal?. If x comes before y in list, then the comparison is performed + (= x y). + The comparison procedure will be used to compare each pair of elements in + list no more than once; + the order in which it is applied to the various pairs is not specified. +

+ Implementations of delete-duplicates + are allowed to share common tails + between argument and result lists -- for example, if the list argument + contains only unique elements, it may simply return exactly + this list. +

+ Be aware that, in general, delete-duplicates + runs in time O(n2) for n-element lists. + Uniquifying long lists can be accomplished in O(n lg n) time by sorting + the list to bring equal elements together, then using a linear-time + algorithm to remove equal elements. Alternatively, one can use algorithms + based on element-marking, with linear-time results. + +

+ delete-duplicates! is the linear-update variant of delete-duplicates; it + is allowed, but not required, to alter the cons cells in its argument + list to construct the result. +

+(delete-duplicates '(a b a c a b c z)) => (a b c z)
+
+;; Clean up an alist:
+(delete-duplicates '((a . 3) (b . 7) (a . 9) (c . 1))
+                   (lambda (x y) (eq? (car x) (car y))))
+    => ((a . 3) (b . 7) (c . 1))
+
+
+ + +

Association lists

+

+An "association list" (or "alist") is a list of pairs. The car of each pair +contains a key value, and the cdr contains the associated data value. They can +be used to construct simple look-up tables in Scheme. Note that association +lists are probably inappropriate for performance-critical use on large data; +in these cases, hash tables or some other alternative should be employed. + +

+ +
+ +assoc key alist [=] -> pair or #f +
+ +assq key alist -> pair or #f +
+ +assv key alist -> pair or #f +
+ + [R5RS+] + alist must be an association list -- a list of pairs. + These procedures + find the first pair in alist whose car field is key, + and returns that pair. + If no pair in alist has key as its car, + then #f is returned. + assq uses eq? to compare key + with the car fields of the pairs in alist, + while assv uses eqv? + and assoc uses equal?. +
+(define e '((a 1) (b 2) (c 3)))
+(assq 'a e)                            =>  (a 1)
+(assq 'b e)                            =>  (b 2)
+(assq 'd e)                            =>  #f
+(assq (list 'a) '(((a)) ((b)) ((c))))  =>  #f
+(assoc (list 'a) '(((a)) ((b)) ((c)))) =>  ((a))
+(assq 5 '((2 3) (5 7) (11 13)))	   =>  *unspecified*
+(assv 5 '((2 3) (5 7) (11 13)))	   =>  (5 7)
+
+ + assoc is extended from its + R5RS + definition to allow the client to pass in + an optional equality procedure = used to compare keys. + +

+ The comparison procedure is used to compare the elements ei of list + to the key parameter in this way: +

+(= key (car ei)) ; list is (E1 ... En) +
+ That is, the first argument is always key, + and the second argument is one of the list elements. + Thus one can reliably find the first entry + of alist whose key is greater than five with + (assoc 5 alist <) + +

+ Note that fully general alist searching may be performed with + the find-tail and find procedures, e.g. +

+;; Look up the first association in alist with an even key:
+(find (lambda (a) (even? (car a))) alist)
+
+ + + +
+ +alist-cons key datum alist -> alist +
+
+(lambda (key datum alist) (cons (cons key datum) alist))
+
+ Cons a new alist entry mapping key -> datum onto alist. + + +
+ +alist-copy alist -> alist +
+ Make a fresh copy of alist. This means copying each pair that + forms an association as well as the spine of the list, i.e. +
+(lambda (a) (map (lambda (elt) (cons (car elt) (cdr elt))) a))
+
+ + +
+ +alist-delete  key alist [=] -> alist +
+ +alist-delete! key alist [=] -> alist +
+ alist-delete deletes all associations from alist with the given key, + using key-comparison procedure =, which defaults to equal?. + The dynamic order in which the various applications of = are made is not + specified. +

+ Return values may share common tails with the alist argument. + The alist is not disordered -- elements that appear in the result alist + occur in the same order as they occur in the argument alist. +

+ The comparison procedure is used to compare the element keys ki of alist's + entries to the key parameter in this way: + (= key ki). + Thus, one can reliably remove all entries of alist whose key is greater + than five with + (alist-delete 5 alist <) +

+ alist-delete! is the linear-update variant of alist-delete. + It is allowed, but not required, + to alter cons cells from the alist parameter to construct the result. + +

+ + + +

Set operations on lists

+

+These procedures implement operations on sets represented as lists of elements. +They all take an = argument used to compare elements of lists. +This equality procedure is required to be consistent with eq?. +That is, it must be the case that +

+ (eq? x y) => (= x y). +
+Note that this implies, in turn, that two lists that are eq? are +also set-equal by any legal comparison procedure. This allows for +constant-time determination of set operations on eq? lists. + +

+Be aware that these procedures typically run in time +O(n * m) +for n- and m-element list arguments. +Performance-critical applications +operating upon large sets will probably wish to use other data +structures and algorithms. + +

+ +
+ +lset<= = list1 ... -> boolean +
+ Returns true iff every listi is a subset of listi+1, using = for + the element-equality procedure. + List A is a subset of list B if every + element in A is equal to some element of B. + When performing an element comparison, + the = procedure's first argument is an element + of A; its second, an element of B. +
+(lset<= eq? '(a) '(a b a) '(a b c c)) => #t
+
+(lset<= eq?) => #t             ; Trivial cases
+(lset<= eq? '(a)) => #t
+
+ + +
+ +lset= = list1 list2 ... -> boolean +
+ Returns true iff every listi is set-equal to listi+1, using = for + the element-equality procedure. "Set-equal" simply means that + listi is a subset of listi+1, and listi+1 is a subset of listi. + The = procedure's first argument is an element of listi; its second is an element of + listi+1. +
+(lset= eq? '(b e a) '(a e b) '(e e b a)) => #t
+
+(lset= eq?) => #t               ; Trivial cases
+(lset= eq? '(a)) => #t
+
+ + +
+ +lset-adjoin = list elt1 ... -> list +
+ Adds the elti elements not already in the list parameter to the + result list. The result shares a common tail with the list parameter. + The new elements are added to the front of the list, but no guarantees + are made about their order. The = parameter is an equality procedure + used to determine if an elti is already a member of list. Its first + argument is an element of list; its second is one of the elti. +

+ The list parameter is always a suffix of the result -- even if the list + parameter contains repeated elements, these are not reduced. +

+(lset-adjoin eq? '(a b c d c e) 'a 'e 'i 'o 'u) => (u o i a b c d c e)
+
+ + +
+ +lset-union = list1 ... -> list +
+ Returns the union of the lists, using = for the element-equality + procedure. +

+ The union of lists A and B is constructed as follows: +

    +
  • If A is the empty list, + the answer is B (or a copy of B). +
  • Otherwise, the result is initialised to be list A + (or a copy of A). +
  • Proceed through the elements of list B + in a left-to-right order. + If b is such an element of B, + compare every element r of the current result list + to b: + (= r b). + If all comparisons fail, + b is consed onto the front of the result. +
+ However, there is no guarantee that = will be applied to every pair + of arguments from A and B. + In particular, if A is eq? to B, + the operation may immediately terminate. + +

+ In the n-ary case, the two-argument list-union operation is simply + folded across the argument lists. + +

+(lset-union eq? '(a b c d e) '(a e i o u)) => 
+    (u o i a b c d e)
+
+;; Repeated elements in LIST1 are preserved.
+(lset-union eq? '(a a c) '(x a x)) => (x a a c)
+
+;; Trivial cases
+(lset-union eq?) => ()
+(lset-union eq? '(a b c)) => (a b c)
+
+ + +
+ +lset-intersection = list1 list2 ... -> list +
+ Returns the intersection of the lists, + using = for the element-equality procedure. +

+ The intersection of lists A and B + is comprised of every element of A that is = + to some element of B: + (= a b), + for a in A, and b in B. + Note this implies that an element which appears in B + and multiple times in list A + will also appear multiple times in the result. +

+ The order in which elements appear in the result is the same as + they appear in list1 -- + that is, lset-intersection essentially filters + list1, + without disarranging element order. + The result may + share a common tail with list1. +

+ In the n-ary case, the two-argument list-intersection operation is simply + folded across the argument lists. However, the dynamic order in which the + applications of = are made is not specified. + The procedure may check an + element of list1 for membership + in every other list before proceeding to + consider the next element of list1, + or it may completely intersect list1 + and list2 + before proceeding to list3, + or it may go about its work in some third order. + +

+(lset-intersection eq? '(a b c d e) '(a e i o u)) => (a e)
+
+;; Repeated elements in LIST1 are preserved.
+(lset-intersection eq? '(a x y a) '(x a x z)) => '(a x a)
+
+(lset-intersection eq? '(a b c)) => (a b c)     ; Trivial case
+
+ + +
+ +lset-difference = list1 list2 ... -> list +
+ Returns the difference of the lists, using = for the element-equality + procedure -- all the elements of list1 that are not + = to any element from one of the + other listi parameters. +

+ The = procedure's first argument is + always an element of list1; + its second is an element of one of the other listi. + Elements that are repeated multiple times in the + list1 parameter + will occur multiple times in the result. + + The order in which elements appear in the result is the same as + they appear in list1 -- + that is, lset-difference essentially + filters list1, without disarranging element order. + The result may share a common tail with list1. + + The dynamic order in which the applications of = are made is not + specified. + The procedure may check an element of list1 + for membership in every other list before proceeding to consider the + next element of list1, + or it may completely compute the difference of + list1 and list2 before + proceeding to list3, + or it may go about its work in some third order. + +

+(lset-difference eq? '(a b c d e) '(a e i o u)) => (b c d)
+
+(lset-difference eq? '(a b c)) => (a b c) ; Trivial case
+
+ + +
+ +lset-xor = list1 ... -> list +
+ Returns the exclusive-or of the sets, + using = for the element-equality procedure. + If there are exactly two lists, this is all the elements + that appear in exactly one of the two lists. The operation is associative, + and thus extends to the n-ary case -- the elements that appear in an + odd number of the lists. The result may share a common tail with any of + the listi parameters. +

+ More precisely, for two lists A and B, + A xor B is a list of +

    +
  • every element a of A + such that there is no element b of B + such that (= a b), and +
  • every element b of B + such that there is no element a of A + such that (= b a). +
+ However, an implementation is allowed to assume that = is + symmetric -- that is, that +
+ (= a b) => + (= b a). +
+ This means, for example, that if a comparison + (= a b) produces + true for some a in A + and b in B, + both a and b may be removed from + inclusion in the result. +

+ In the n-ary case, the binary-xor operation is simply folded across + the lists. + +

+(lset-xor eq? '(a b c d e) '(a e i o u)) => (d c b i o u)
+
+;; Trivial cases.
+(lset-xor eq?) => ()
+(lset-xor eq? '(a b c d e)) => (a b c d e)
+
+ + + +
+ +lset-diff+intersection = list1 list2 ... -> [list list] +
+ Returns two values -- the difference and the intersection of the lists. + Is equivalent to +
+(values (lset-difference = list1 list2 ...)
+        (lset-intersection = list1
+                             (lset-union = list2 ...)))
+
+ but can be implemented more efficiently. +

+ The = procedure's first argument is an element of list1; its second + is an element of one of the other listi. +

+ Either of the answer lists may share a + common tail with list1. + This operation essentially partitions list1. + + +

+ +lset-union!             = list1 ... -> list +
+ +lset-intersection!      = list1 list2 ... -> list +
+ +lset-difference!        = list1 list2 ... -> list +
+ +lset-xor!               = list1 ... -> list +
+ +lset-diff+intersection! = list1 list2 ... -> [list list] +
+ These are linear-update variants. They are allowed, but not required, + to use the cons cells in their first list parameter to construct their + answer. lset-union! is permitted to recycle cons cells from any + of its list arguments. +
+ + +

Primitive side-effects

+

+These two procedures are the primitive, +R5RS +side-effect operations on pairs. + +

+ +
+ +set-car! pair object -> unspecified +
+ +set-cdr! pair object -> unspecified +
+ [R5RS] + These procedures store object in the car and cdr field + of pair, respectively. + The value returned is unspecified. +
+(define (f) (list 'not-a-constant-list))
+(define (g) '(constant-list))
+(set-car! (f) 3) =>  *unspecified*
+(set-car! (g) 3) =>  *error*
+
+
+ + +

Acknowledgements

+

+The design of this library benefited greatly from the feedback provided during +the SRFI discussion phase. Among those contributing thoughtful commentary and +suggestions, both on the mailing list and by private discussion, were Mike +Ashley, Darius Bacon, Alan Bawden, Phil Bewig, Jim Blandy, Dan Bornstein, Per +Bothner, Anthony Carrico, Doug Currie, Kent Dybvig, Sergei Egorov, Doug Evans, +Marc Feeley, Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Dan Friedman, +Lars Thomas Hansen, Brian Harvey, Erik Hilsdale, Wolfgang Hukriede, Richard +Kelsey, Donovan Kolbly, Shriram Krishnamurthi, Dave Mason, Jussi Piitulainen, +David Pokorny, Duncan Smith, Mike Sperber, Maciej Stachowiak, Harvey J. Stein, +John David Stone, and Joerg F. Wittenberger. I am grateful to them for their +assistance. +

+I am also grateful the authors, implementors and documentors of all the systems +mentioned in the introduction. Aubrey Jaffer and Kent Pitman should be noted +for their work in producing Web-accessible versions of the R5RS and Common +Lisp spec, which was a tremendous aid. +

+This is not to imply that these individuals necessarily endorse the final +results, of course. + + + +

References & links

+

+ +

+
This document, in HTML: +
+ http://srfi.schemers.org/srfi-1/srfi-1.html + +
+ ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.html (draft) + +
This document, in simple text format: +
+ http://srfi.schemers.org/srfi-1/srfi-1.txt + +
+ ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt (draft) + +
Source code for the reference implementation: +
+ http://srfi.schemers.org/srfi-1/srfi-1-reference.scm + +
+ ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1-reference.scm (draft) + +
Archive of SRFI-1 discussion-list email: +
+ http://srfi.schemers.org/srfi-1/mail-archive/maillist.html + +
SRFI web site: +
+ http://srfi.schemers.org/ +
+ +

+ +

+
[CLtL2]
+
Common Lisp: the Language
+Guy L. Steele Jr. (editor).
+Digital Press, Maynard, Mass., second edition 1990.
+Available at +http://www.harlequin.com/education/books/HyperSpec/. + +
[R5RS]
+
Revised5 report on the algorithmic language Scheme.
+ R. Kelsey, W. Clinger, J. Rees (editors).
+ Higher-Order and Symbolic Computation, Vol. 11, No. 1, September, 1998.
+ and ACM SIGPLAN Notices, Vol. 33, No. 9, October, 1998.
+ Available at + http://www.schemers.org/Documents/Standards/. + +
+ + + + +

Copyright

+

+ +Certain portions of this document -- the specific, marked segments of text +describing the R5RS procedures -- were adapted with permission from the R5RS +report. +

+ +All other text is copyright (C) Olin Shivers (1998, 1999). +All Rights Reserved. +

+This document and translations of it may be copied and furnished to +others, and derivative works that comment on or otherwise explain it +or assist in its implementation may be prepared, copied, published and +distributed, in whole or in part, without restriction of any kind, +provided that the above copyright notice and this paragraph are +included on all such copies and derivative works. However, this +document itself may not be modified in any way, such as by removing +the copyright notice or references to the Scheme Request For +Implementation process or editors, except as needed for the purpose of +developing SRFIs in which case the procedures for copyrights defined +in the SRFI process must be followed, or as required to translate it +into languages other than English. +

+The limited permissions granted above are perpetual and will not be +revoked by the authors or their successors or assigns. +

+This document and the information contained herein is provided on an +"AS IS" basis and THE AUTHOR AND THE SRFI EDITORS DISCLAIM ALL +WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY +WARRANTY THAT THE USE OF THE INFORMATION HEREIN WILL NOT INFRINGE ANY +RIGHTS OR ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +PARTICULAR PURPOSE. + + + diff --git a/scsh/lib/srfi-1.txt b/scsh/lib/srfi-1.txt new file mode 100644 index 0000000..271147d --- /dev/null +++ b/scsh/lib/srfi-1.txt @@ -0,0 +1,1912 @@ +The SRFI-1 list library -*- outline -*- +Olin Shivers +98/10/16 +Last Update: 99/9/11 + +Todo: carefully proofread. + Netscape prints with insufficient space between proc specs -- + see list=, for example. Mess about with css some more. + +Emacs should display this document is in outline mode. Say c-h m for +instructions on how to move through it by sections (e.g., c-c c-n, c-c c-p). +During the SRFI discussion period, the current draft may be found at + ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt + + +* Table of contents +------------------- + +Abstract +Introduction +Procedure index +General discussion + "Linear update" procedures + Improper lists + Errors + Not included in this library +The procedures + Constructors + Predicates + Selectors + Miscellaneous: length, append, reverse, zip & count + Fold, unfold & map + Filtering & partitioning + Searching + Deletion + Association lists + Set operations on lists + Primitive side-effects +Acknowledgements +References & links +Copyright + + +* Abstract +---------- + +R5RS Scheme has an impoverished set of list-processing utilities, which is a +problem for authors of portable code. This SRFI proposes a coherent and +comprehensive set of list-processing procedures; it is accompanied by a +reference implementation of the spec. The reference implementation is + - portable + - efficient + - completely open, public-domain source + + +* Introduction +-------------- + +The set of basic list and pair operations provided by R4RS/R5RS Scheme is far +from satisfactory. Because this set is so small and basic, most +implementations provide additional utilities, such as a list-filtering +function, or a "left fold" operator, and so forth. But, of course, this +introduces incompatibilities -- different Scheme implementations provide +different sets of procedures. + +I have designed a full-featured library of procedures for list processing. +While putting this library together, I checked as many Schemes as I could get +my hands on. (I have a fair amount of experience with several of these +already.) I missed Chez -- no on-line manual that I can find -- but I hit most +of the other big, full-featured Schemes. The complete list of list-processing +systems I checked is: + R4RS/R5RS Scheme, MIT Scheme, Gambit, RScheme, MzScheme, slib, Common + Lisp, Bigloo, guile, T, APL and the SML standard basis +As a result, the library I am proposing is fairly rich. + +Following this initial design phase, this library went through several +months of discussion on the SRFI mailing lists, and was altered in light +of the ideas and suggestions put forth during this discussion. + +In parallel with designing this API, I have also written a reference +implementation. I have placed this source on the Net with an unencumbered, +"open" copyright. A few notes about the reference implementation: + + - Although I got procedure names and specs from many Schemes, I wrote this + code myself. Thus, there are *no* entanglements. Any Scheme implementor + can pick this library up with no worries about copyright problems -- both + commercial and non-commercial systems. + + - The code is written for portability and should be trivial to port to + any Scheme. It has only four deviations from R4RS, clearly discussed + in the comments: + - Use of an ERROR procedure; + - Use of the R5RS VALUES and a simple RECEIVE macro for producing + and consuming multiple return values; + - Use of simple :OPTIONAL and LET-OPTIONALS macros for optional + argument parsing and defaulting; + - Use of a simple CHECK-ARG procedure for argument checking. + + - It is written for clarity and well-commented. The current source is + 1436 lines of text, of which 690 are source code; the rest being comments + and blank lines. + + - It is written for efficiency. Fast paths are provided for common + cases. Side-effecting procedures such as FILTER! avoid unnecessary, + redundant SET-CDR!s which would thrash a generational GC's write barrier + and the store buffers of fast processors. Functions reuse longest common + tails from input parameters to construct their results where + possible. Constant-space iterations are used in preference to recursions; + local recursions are used in preference to consing temporary intermediate + data structures. + + This is not to say that the implementation can't be tuned up for + a specific Scheme implementation. There are notes in comments addressing + ways implementors can tune the reference implementation for performance. + +In short, I've written the reference implementation to make it as painless +as possible for an implementor -- or a regular programmer -- to adopt this +library and get good results with it. + + + +* Procedure index +----------------- +Here is a short list of the procedures provided by the list-lib package. +"#" marks R5RS procedures; "+" marks extended R5RS procedures + +Constructors +# cons list + xcons cons* make-list list-tabulate + list-copy circular-list iota + +Predicates +# pair? null? + proper-list? circular-list? dotted-list? + not-pair? null-list? + list= + +Selectors +# car cdr ... cdddar cddddr list-ref + first second third fourth fifth sixth seventh eighth ninth tenth + car+cdr + take drop + take-right drop-right + take! drop-right! + last last-pair + +Miscellaneous: length, append, reverse, zip & count +# length + length+ +# append reverse + append! reverse! + append-reverse append-reverse! + zip unzip1 unzip2 unzip3 unzip4 unzip5 + count + +Fold, unfold & map ++ map for-each + fold unfold pair-fold reduce + fold-right unfold-right pair-fold-right reduce-right + append-map append-map! + map! pair-for-each filter-map map-in-order + +Filtering & partitioning + filter partition remove + filter! partition! remove! + +Searching ++ member +# memq memv + find find-tail + any every + list-index + +Deleting + delete delete-duplicates + delete! delete-duplicates! + +Association lists ++ assoc +# assq assv + alist-cons alist-copy + alist-delete alist-delete! + +Set operations on lists + lset<= lset= lset-adjoin + lset-union lset-union! + lset-intersection lset-intersection! + lset-difference lset-difference! + lset-xor lset-xor! + lset-diff+intersection lset-diff+intersection! + +Primitive side effects +# set-car! set-cdr! + +------ +Four R4RS/R5RS list-processing procedures are extended by this library in +backwards-compatible ways: + map for-each (Extended to take lists of unequal length) + member assoc (Extended to take an optional comparison procedure) + +The following R4RS/R5RS list- and pair-processing procedures are also part of +list-lib's exports, as defined by the R5RS report: + cons pair? null? list length append reverse + car cdr ... cdddar cddddr set-car! set-cdr! list-ref + memq memv assq assv + +The remaining two R4RS/R5RS list-processing procedures are *not* part of +this library: + list-tail (renamed DROP) + list? (see PROPER-LIST?, CIRCULAR-LIST? and DOTTED-LIST?) + + + +* General discussion +-------------------- + +A set of general criteria guided the design of this library. + +I don't require "destructive" (what I call "linear update") procedures to +alter and recycle cons cells from the argument lists. They are allowed to, but +not required to. (The reference implementations I have written *do* recycle +the argument lists.) See below for further discussion. + +List-filtering procedures such as FILTER or DELETE do not disorder +lists. Elements appear in the answer list in the same order as they appear in +the argument list. This constrains implementation, but seems like a desirable +feature, since in many uses of lists, order matters. (In particular, +disordering an alist is definitely a bad idea.) + +Contrariwise, although the reference implementations of the list-filtering +procedures share longest common tails between argument and answer lists, +it not is part of the spec. + +Because lists are an inherently sequential data structure (unlike, say, +vectors), list-inspection functions such as FIND, FIND-TAIL, FOR-EACH, ANY +and EVERY commit to a left-to-right traversal order of their argument list. + +However, constructor functions, such as LIST-TABULATE and the mapping +procedures (APPEND-MAP, APPEND-MAP!, MAP!, PAIR-FOR-EACH, FILTER-MAP, +MAP-IN-ORDER) do *not* specify the dynamic order in which their +procedural argument is applied to its various values. + +Predicates return useful true values wherever possible. Thus ANY must return +the true value produced by its predicate, and EVERY returns the final true +value produced by applying its predicate argument to the last element of its +argument list. + +Functionality is provided both in pure and linear-update (potentially +destructive) forms wherever this makes sense. + +No special status accorded Scheme's built-in equality functions. +Any functionality provided in terms of EQ?, EQV?, EQUAL? is also +available using a client-provided equality function. + +Proper design counts for more than backwards compatibility, but I have tried, +ceteris paribus, to be as backwards-compatible as possible with existing +list-processing libraries, in order to facilitate porting old code to run as a +client of the procedures in this library. Name choices and semantics are, for +the most part, in agreement with existing practice in many current Scheme +systems. I have indicated some incompatibilities in the following text. + +These procedures are *not* "sequence generic" -- i.e., procedures that +operate on either vectors and lists. They are list-specific. I prefer to +keep the library simple and focussed. + +I have named these procedures without a qualifying initial "list-" +lexeme, which is in keeping with the existing set of list-processing +utilities in Scheme. I follow the general Scheme convention +(VECTOR-LENGTH, STRING-REF) of placing the type-name before the action +when naming procedures -- so we have LIST-COPY and PAIR-FOR-EACH rather +than the perhaps more fluid, but less consistent, COPY-LIST, or +FOR-EACH-PAIR. + +I have generally followed a regular and consistent naming scheme, composing +procedure names from a set of basic lexemes. + + +** "Linear update" procedures +============================= + +Many procedures in this library have "pure" and "linear update" variants. A +"pure" procedure has no side-effects, and in particular does not alter its +arguments in any way. A "linear update" procedure is allowed -- but *not* +required -- to side-effect its arguments in order to construct its +result. "Linear update" procedures are typically given names ending with an +exclamation point. So, for example, (APPEND! list1 list2) is allowed to +construct its result by simply using SET-CDR! to set the cdr of the last pair +of list1 to point to list2, and then returning list1 (unless list1 is the +empty list, in which case it would simply return list2). However, APPEND! may +also elect to perform a pure append operation -- this is a legal definition +of APPEND!: + (define append! append) +This is why we do not call these procedures "destructive" -- because they +aren't *required* to be destructive. They are *potentially* destructive. + +What this means is that you may only apply linear-update procedures to +values that you know are "dead" -- values that will never be used again +in your program. This must be so, since you can't rely on the value passed +to a linear-update procedure after that procedure has been called. It +might be unchanged; it might be altered. + +The "linear" in "linear update" doesn't mean "linear time" or "linear space" +or any sort of multiple-of-n kind of meaning. It's a fancy term that type +theorists and pure functional programmers use to describe systems where you +are only allowed to have exactly one reference to each variable. This provides +a guarantee that the value bound to a variable is bound to no other +variable. So when you *use* a variable in a variable reference, you "use it +up." Knowing that no one else has a pointer to that value means the system +primitive is free to side-effect its arguments to produce what is, +observationally, a pure-functional result. + +In the context of this library, "linear update" means you, the programmer, +know there are *no other* live references to the value passed to the +procedure -- after passing the value to one of these procedures, the +value of the old pointer is indeterminate. Basically, you are licensing +the Scheme implementation to alter the data structure if it feels like +it -- you have declared you don't care either way. + +You get no help from Scheme in checking that the values you claim are "linear" +really are. So you better get it right. Or play it safe and use the non-! +procedures -- it doesn't do any good to compute quickly if you get the wrong +answer. + +Why go to all this trouble to define the notion of "linear update" and use it +in a procedure spec, instead of the more common notion of a "destructive" +operation? First, note that destructive list-processing procedures are almost +always used in a linear-update fashion. This is in part required by the +special case of operating upon the empty list, which can't be side-effected. +This means that destructive operators are not pure side-effects -- they have +to return a result. Second, note that code written using linear-update +operators can be trivially ported to a pure, functional subset of Scheme by +simply providing pure implementations of the linear-update operators. Finally, +requiring destructive side-effects ruins opportunities to parallelise these +operations -- and the places where one has taken the trouble to spell out +destructive operations are usually exactly the code one would want a +parallelising compiler to parallelise: the efficiency-critical kernels of the +algorithm. Linear-update operations are easily parallelised. Going with a +linear-update spec doesn't close off these valuable alternative implementation +techniques. This list library is intended as a set of low-level, basic +operators, so we don't want to exclude these possible implementations. + +The linear-update procedures in this library are + take! drop-right! + append! reverse! append-reverse! + append-map! map! + filter! partition! remove! + delete! alist-delete! delete-duplicates! + lset-adjoin! lset-union! lset-intersection! lset-difference! lset-xor! + lset-diff+intersection! + + +** Improper lists +================= + +Scheme does not properly have a list type, just as C does not have a string +type. Rather, Scheme has a binary-tuple type, from which one can build binary +trees. There is an *interpretation* of Scheme values that allows one to treat +these trees as lists. Further complications ensue from the fact that Scheme +allows side-effects to these tuples, raising the possibility of lists of +unbounded length, and trees of unbounded depth (that is, circular data +structures). + +However, there is a simple view of the world of Scheme values that considers +every value to be a list of some sort. That is, every value is either + - a "proper list" -- a finite, nil-terminated list, such as: + (a b c) + () + (32) + - a "dotted list" -- a finite, non-nil terminated list, such as + (a b c . d) + (x . y) + 42 + george + - or a "circular list" -- an infinite, unterminated list. +Note that the zero-length dotted lists are simply all the non-null, non-pair +values. + +This view is captured by the predicates PROPER-LIST?, DOTTED-LIST?, and +CIRCULAR-LIST?. List-lib users should note that dotted lists are not commonly +used, and are considered by many Scheme programmers to be an ugly artifact of +Scheme's lack of a true list type. However, dotted lists do play a noticeable +role in the *syntax* of Scheme, in the "rest" parameters used by n-ary +lambdas: (lambda (x y . rest) ...). + +Dotted lists are *not* fully supported by list-lib. Most procedures are +defined only on proper lists -- that is, finite, nil-terminated lists. The +procedures that will also handle circular or dotted lists are specifically +marked. While this design decision restricts the domain of possible arguments +one can pass to these procedures, it has the benefit of allowing the +procedures to catch the error cases where programmers inadvertently pass +scalar values to a list procedure by accident, e.g. by switching the arguments +to a procedure call. + + +** Errors +========= + +Note that statements of the form "it is an error" merely mean "don't +do that." They are not a guarantee that a conforming implementation will +"catch" such improper use by, for example, raising some kind of exception. +Regrettably, R5RS Scheme requires no firmer guarantee even for basic operators +such as CAR and CDR, so there's little point in requiring these procedures to +do more. Here is the relevant section of the R5RS report: + + When speaking of an error situation, this report uses the phrase "an + error is signalled" to indicate that implementations must detect and + report the error. If such wording does not appear in the discussion + of an error, then implementations are not required to detect or + report the error, though they are encouraged to do so. An error + situation that implementations are not required to detect is usually + referred to simply as "an error." + + For example, it is an error for a procedure to be passed an argument + that the procedure is not explicitly specified to handle, even though + such domain errors are seldom mentioned in this report. + Implementations may extend a procedure's domain of definition to + include such arguments. + + +** Not included in this library +=============================== + +The following items are not in this library: +- Sort routines +- Destructuring/pattern-matching macro +- Tree-processing routines +They shound have their own SRFI specs. + + + +* The procedures +---------------- +In a Scheme system that has a module or package system, these procedures +should be contained in a module named "list-lib". + +The templates given below obey the following conventions for procedure formals: + list A proper (finite, nil-terminated) list + clist A proper or circular list + flist A finite (proper or dotted) list + pair A pair + x, y, d, a Any value + object, value Any value + n, i A natural number (an integer >= 0) + proc A procedure + = A boolean procedure taking two arguments + pred A boolean procedure taking one argument + +It is an error to pass a circular or dotted list to a procedure not +defined to accept such an argument. Such a procedure may either signal +an error or diverge when passed a circular list. + +** Constructors +=============== + +cons a d -> pair R5RS + The primitive constructor. Returns a newly allocated pair whose car is A + and whose cdr is D. The pair is guaranteed to be different (in the sense + of EQV?) from every existing object. + + (cons 'a '()) ==> (a) + (cons '(a) '(b c d)) ==> ((a) b c d) + (cons "a" '(b c)) ==> ("a" b c) + (cons 'a 3) ==> (a . 3) + (cons '(a b) 'c) ==> ((a b) . c) + +list object ... -> list R5RS + Returns a newly allocated list of its arguments. + + (list 'a (+ 3 4) 'c) ==> (a 7 c) + (list) ==> () + +xcons d a -> pair + (lambda (d a) (cons a d)) + Of utility only as a value to be conveniently passed to higher-order + procedures. + + (xcons '(b c) 'a) => (a b c) + + The name stands for "eXchanged CONS." + +cons* elt1 elt2 ... -> object + Like LIST, but the last argument provides the tail of the constructed + list, returning (cons elt1 (cons elt2 (cons ... eltn))). + This function is called LIST* in Common Lisp and about half of the + Schemes that provide it; and CONS* in the other half. + + (cons* 1 2 3 4) => (1 2 3 . 4) + (cons* 1) => 1 + +make-list n [fill] -> list + Returns an N-element list, whose elements are all the value FILL. + If the FILL argument is not given, the elements of the list may + be arbitrary values. + + (make-list 4 'c) => (c c c c) + (make-list 10) => (2 3 5 7 11 13 17 19 23 29) + +list-tabulate n init-proc -> list + Returns an N-element list. Element i of the list, where 0 <= i < N, + is produced by (INIT-PROC i). No guarantee is made about the dynamic + order in which INIT-PROC is applied to these indices. + + (list-tabulate 4 values) => (0 1 2 3) + +list-copy flist -> flist + Copies the "spine" of the argument. + +circular-list elt1 elt2 ... -> clist + Constructs a circular list of the elements. + (circular-list 'z 'q) => (z q z q z q ...) + +iota count [start step] -> list + Returns a list containing the elements + (start start+step ... start+(count-1)*step) + The START and STEP parameters default to 0 and 1, respectively. + This procedure takes its name from the APL primitive. + + (iota 5) => (0 1 2 3 4) + (iota 5 0 -0.1) => (0 -0.1 -0.2 -0.3 -0.4) + + +** Predicates +============= + +Note: the predicates PROPER-LIST?, CIRCULAR-LIST?, and DOTTED-LIST? +partition the entire universe of Scheme values. + +proper-list? x -> boolean + Returns true iff X is a proper list -- a finite, nil-terminated list. + + More carefully: The empty list is a proper list. A pair whose cdr is a + proper list is also a proper list: + ::= () (Empty proper list) + | (cons ) (Proper-list pair) + Note that this definition rules out circular lists. This + function is required to detect this case and return false. + + Nil-terminated lists are called "proper" lists by R5RS and Common Lisp. + The opposite of proper is improper. + + R5RS binds this function to the variable LIST?. + + (not (proper-list? x)) = (or (dotted-list? x) (circular-list? x)) + +circular-list? x -> boolean + True if X is a circular list. A circular list is a value such that + for every n >= 0, cdr^n(x) is a pair. + + Terminology: The opposite of circular is finite. + (not (circular-list? x)) = (or (proper-list? x) (dotted-list? x)) + +dotted-list? x -> boolean + True if X is a finite, non-nil-terminated list. That is, there exists + an n >= 0 such that cdr^n(x) is neither a pair nor (). This includes + non-pair, non-() values (e.g. symbols, numbers), which are considered to + be dotted lists of length 0. + + (not (dotted-list? x)) = (or (proper-list? x) (circular-list? x)) + +pair? object -> boolean R5RS + Returns #t if OBJECT is a pair; otherwise, #f. + + (pair? '(a . b)) ==> #t + (pair? '(a b c)) ==> #t + (pair? '()) ==> #f + (pair? '#(a b)) ==> #f + (pair? 7) ==> #f + (pair? 'a) ==> #f + +null? object -> boolean R5RS + Returns #t if OBJECT is the empty list; otherwise, #f. + +null-list? list -> boolean + LIST is a proper or circular list. This procedure returns true if + the argument is the empty list (), and false otherwise. It is an + error to pass this procedure a value which is not a proper or + circular list. + + This procedure is recommended as the termination condition for + list-processing procedures that are not defined on dotted lists. + +not-pair? x -> boolean + (lambda (x) (not (pair? x))) + Provided as a procedure as it can be useful as the termination condition + for list-processing procedures that wish to handle all finite lists, + both proper and dotted. + +list= elt= list1 ... -> boolean + Determines list equality, given an element-equality procedure. + Proper list A equals proper list B if they are of the same length, + and their corresponding elements are equal, as determined by ELT=. + If the element-comparison procedure's first argument is from LISTi, + then its second argument is from LISTi+1, i.e. it is always called as + (elt= a b) + for a an element of list A, and b an element of list B. + + In the n-ary case, every LISTi is compared to LISTi+1 (as opposed, + for example, to comparing LIST1 to every LISTi, for i>1). If there + are no list arguments at all, LIST= simply returns true. + + It is an error to apply LIST= to anything except proper lists. While + implementations may choose to extend it to circular lists, note that it + cannot reasonably be extended to dotted lists, as it provides no way to + specify an equality procedure for comparing the list terminators. + + Note that the dynamic order in which the ELT= procedure is applied to + pairs of elements is not specified. For example, if LIST= is applied + to three lists, A, B, and C, it may first completely compare A to B, + then compare B to C, or it may compare the first elements of A and B, + then the first elements of B and C, then the second elements of A and + B, and so forth. + + The equality procedure must be consistent with EQ?. That is, + it must be the case that + (eq? x y) => (elt= x y). + Note that this implies that two lists which are EQ? are always LIST=, + as well. + + (list= eq?) => #t ; Trivial cases + (list= eq? '(a)) => #t + + +** Selectors +============ + +car pair -> value R5RS +cdr pair -> value R5RS + These procedures return the contents of the car and cdr field of + their argument, respectively. Note that it is an error to apply + them to the empty list. + + (car '(a b c)) ==> a + (car '((a) b c d)) ==> (a) + (car '(1 . 2)) ==> 1 + (car '()) ==> *error* + + (cdr '(a b c)) ==> (b c) + (cdr '((a) b c d)) ==> (b c d) + (cdr '(1 . 2)) ==> 2 + (cdr '()) ==> *error* + +caar pair -> value R5RS +cadr pair -> value + : +cdddar pair -> value +cddddr pair -> value + These procedures are compositions of CAR and CDR, where for + example CADDR could be defined by + + (define caddr (lambda (x) (car (cdr (cdr x))))). + + Arbitrary compositions, up to four deep, are provided. There are + twenty-eight of these procedures in all. + +list-ref clist i -> value R5RS + Returns the Ith element of CLIST. (This is the same as the car + of (DROP CLIST I).) It is an error if I >= N, where N is the length + of CLIST. + + (list-ref '(a b c d) 2) ==> c + +first second third fourth fifth +sixth seventh eighth ninth tenth: pair -> value + Synonyms for car, cadr, caddr, ... + + (third '(a b c d e)) => c + +car+cdr pair -> [x y] + The fundamental pair deconstructor: + (lambda (p) (values (car p) (cdr p))) + This can, of course, be implemented more efficiently by a compiler. + +take x i -> list +drop x i -> object + TAKE returns the first I elements of list X. + DROP returns all but the first I elements of list X. + + (take '(a b c d e) 2) => (a b) + (drop '(a b c d e) 2) => (c d e) + + X may be any value -- a proper, circular, or dotted list: + (take '(1 2 3 . d) 2) => (1 2) + (drop '(1 2 3 . d) 2) => (3 . d) + (take '(1 2 3 . d) 3) => (1 2 3) + (drop '(1 2 3 . d) 3) => d + For a legal I, TAKE and DROP partition the list in a manner which + can be inverted with APPEND: + (append (take x i) (drop x i)) = x + + DROP is exactly equivalent to performing I cdr operations on X; + the returned value shares a common tail with X. + + If the argument is a list of non-zero length, TAKE is guaranteed to + return a freshly-allocated list, even in the case where the entire + list is taken, e.g. (TAKE LIS (LENGTH LIS)). + +take-right flist i -> object +drop-right flist i -> list + TAKE-RIGHT returns the last I elements of FLIST. + DROP-RIGHT returns all but the last I elements of FLIST. + + The returned list may share a common tail with the argument list. + + (take-right '(a b c d e) 2) => (d e) + (drop-right '(a b c d e) 2) => (a b c) + + FLIST may be any finite list, either proper or dotted: + (take-right '(1 2 3 . d) 2) => (2 3 . d) + (drop-right '(1 2 3 . d) 2) => (1) + (take-right '(1 2 3 . d) 0) => d + (drop-right '(1 2 3 . d) 0) => (1 2 3) + For a legal I, TAKE-RIGHT and DROP-RIGHT partition the list in a manner + which can be inverted with APPEND: + (append (take flist i) (drop flist i)) = flist + + TAKE-RIGHT's return value is guaranteed to share a common tail with FLIST. + + If the argument is a list of non-zero length, DROP-RIGHT is guaranteed to + return a freshly-allocated list, even in the case where nothing is + dropped, e.g. (DROP-RIGHT LIS 0). + +take! x i -> list +drop-right! flist i -> list + TAKE! and DROP-RIGHT! are "linear-update" variants of TAKE and + DROP-RIGHT: the procedure is allowed, but not required, to alter the + argument list to produce the result. + + If X is circular, TAKE! may return a shorter-than-expected list: + (take! (circular-list 1 3 5) 8) => (1 3) + (take! (circular-list 1 3 5) 8) => (1 3 5 1 3 5 1 3) + +last pair -> object +last-pair pair -> pair + LAST returns the last element of the non-empty, finite list PAIR. + LAST-PAIR returns the last pair in the non-empty, finite list PAIR. + + (last '(a b c)) => c + (last-pair '(a b c)) => (c) + (last-pair '(a b c . d)) => (c . d) + + +** Miscellaneous: length, append, reverse, zip & count +====================================================== + +length list -> integer R5RS +length+ clist -> integer or #f + Both LENGTH and LENGTH+ return the length of the argument. + It is an error to pass a value to LENGTH which is not a proper + list (finite and nil-terminated). In particular, this means an + implementation may diverge or signal an error when LENGTH is + applied to a circular list. + + LENGTH+, on the other hand, returns #F when applied to a circular + list. + + The length of a proper list is a non-negative integer N such that CDR + applied N times to the list produces the empty list. + + (length '(a b c)) ==> 3 + (length '(a (b) (c d e))) ==> 3 + (length '()) ==> 0 + +append list1 ... -> value R5RS +append! list1 ... -> value + APPEND returns a list consisting of the elements of LIST1 + followed by the elements of the other list parameters. + + (append '(x) '(y)) ==> (x y) + (append '(a) '(b c d)) ==> (a b c d) + (append '(a (b)) '((c))) ==> (a (b) (c)) + + The resulting list is always newly allocated, except that it + shares structure with the final LISTi argument. This last argument + may be any value at all; an improper list results if it is not + a proper list. All other arguments must be proper lists. + + (append '(a b) '(c . d)) ==> (a b c . d) + (append '() 'a) ==> a + + APPEND! is the "linear-update" variant of APPEND -- it is allowed, but + not required, to alter cons cells in the argument lists to construct + the result list. The last argument is never altered; the result + list shares structure with this parameter. + +reverse list -> list R5RS +reverse! list -> list + REVERSE returns a newly allocated list consisting of the elements of + LIST in reverse order. + + (reverse '(a b c)) ==> (c b a) + (reverse '(a (b c) d (e (f)))) + ==> ((e (f)) d (b c) a) + + REVERSE! is the linear-update variant of REVERSE. It is permitted, + but not required, to alter the argument's cons cells to produce the + reversed list. + +append-reverse rev-head tail -> value +append-reverse! rev-head tail -> value + APPEND-REVERSE returns + (append (reverse rev-head) tail) + It it provided because it is a common operation -- a common + list-processing style calls for this exact operation to transfer values + accumulated in reverse order onto the front of another list, and because + the implementation is significantly more efficient than the simple + composition it replaces. (But note that this pattern of iterative + computation followed by a reverse can frequently be rewritten as a + recursion, dispensing with the REVERSE and APPEND-REVERSE steps, and + shifting temporary, intermediate storage from the heap to the stack, + which is typically a win for reasons of cache locality and eager storage + reclamation.) + + APPEND-REVERSE! is just the linear-update variant -- it is allowed, but + not required, to alter REV-HEAD's cons cells to construct the result. + +zip clist1 clist2 ... -> list + (lambda lists (apply map list lists)) + If ZIP is passed N lists, it returns a list as long as the shortest + of these lists, each element of which is an N-element list comprised + of the corresponding elements from the parameter lists. + + (zip '(one two three) '(1 2 3) '(odd even odd even odd even odd even)) => + ((one 1 odd) (two 2 even) (three 3 odd)) + + (zip '(1 2 3)) => ((1) (2) (3)) + + At least one of the argument lists must be finite: + (zip '(3 1 4 1) (circular-list #f #t)) => + ((3 #f) (1 #t) (4 #f) (1 #t)) + +unzip1 list -> list +unzip2 list -> [list list] +unzip3 list -> [list list list] +unzip4 list -> [list list list list] +unzip5 list -> [list list list list list] + UNZIP1 takes a list of lists, where every list must contain at least one + element, and returns a list containing the initial element of each such + list. That is, it returns (MAP CAR LISTS). UNZIP2 takes a list of lists, + where every list must contain at least two elements, and returns two + values: a list of the first elements, and a list of the second + elements. UNZIP3 does the same for the first three elements of the lists, + and so forth. + + (unzip2 '((1 one) (2 two) (3 three))) => + (1 2 3) + (one two three) + +count pred clist1 clist2 ... -> integer + PRED is a procedure taking as many arguments as there are lists and + returning a single value. It is applied element-wise to the elements of + the LISTs, and a count is tallied of the number of elements that produce a + true value. This count is returned. COUNT is "iterative" in that it is + guaranteed to apply PRED to the LIST elements in a left-to-right order. + The counting stops when the shortest list expires. + + (count even? '(3 1 4 1 5 9 2 5 6)) => 3 + (count < '(1 2 4 8) '(2 4 6 8 10 12 14 16)) => 3 + + At least one of the argument lists must be finite: + (count < '(3 1 4 1) (circular-list 1 10)) => 2 + + +** Fold, unfold & map +===================== + +fold kons knil clist1 clist2 ... -> value + The fundamental list iterator. + + First, consider the single list-parameter case. If CLIST1 = (e1 e2 ... en), + then this procedure returns + (kons en ... (kons e2 (kons e1 knil)) ... ) + That is, it obeys the (tail) recursion + (fold kons knil lis) = (fold kons (kons (car lis) knil) (cdr lis)) + (fold kons knil '()) = knil + + Examples: + (fold + 0 lis) ; Add up the elements of LIS. + + (fold cons '() lis) ; Reverse LIS. + + (fold cons tail rev-head) ; See APPEND-REVERSE. + + ;; How many symbols in LIS? + (fold (lambda (x count) (if (symbol? x) (+ count 1) count)) + 0 + lis) + + ;; Length of the longest string in LIS: + (fold (lambda (s max-len) (max max-len (string-length s))) + 0 + lis) + + If N list arguments are provided, then the KONS function must take + N+1 parameters: one element from each list, and the "seed" or fold + state, which is initially KNIL. The fold operation terminates when + the shortest list runs out of values: + (fold cons* '() '(a b c) '(1 2 3 4 5)) => (c 3 b 2 a 1) + + At least one of the list arguments must be finite. + +fold-right kons knil clist1 clist2 ... -> value + The fundamental list recursion operator. + + First, consider the single list-parameter case. If CLIST1 = (e1 e2 ... en), + then this procedure returns + (kons e1 (kons e2 ... (kons en knil))) + That is, it obeys the recursion + (fold-right kons knil lis) = (kons (car lis) (fold-right kons knil (cdr lis))) + (fold-right kons knil '()) = knil + + Examples: + (fold-right cons '() lis) ; Copy LIS. + + ;; Filter the even numbers out of LIS. + (fold-right (lambda (x l) (if (even? x) (cons x l) l)) '() lis)) + + If N list arguments are provided, then the KONS function must take + N+1 parameters: one element from each list, and the "seed" or fold + state, which is initially KNIL. The fold operation terminates when + the shortest list runs out of values: + (fold-right cons* '() '(a b c) '(1 2 3 4 5)) => (a 1 b 2 c 3) + + At least one of the list arguments must be finite. + +pair-fold kons knil clist1 clist2 ... -> value + Analogous to FOLD, but KONS is applied to successive sublists of the + lists, rather than successive elements -- that is, KONS is applied to the + pairs making up the lists, giving this (tail) recursion: + + (pair-fold kons knil lis) = (let ((tail (cdr lis))) + (pair-fold kons (kons lis knil) tail)) + + (pair-fold kons knil '()) = knil + + The KONS function may reliably apply SET-CDR! to the pairs it is given + without altering the sequence of execution. + + Example: + ;;; Destructively reverse a list. + (pair-fold (lambda (pair tail) (set-cdr! pair tail) pair) '() lis)) + + At least one of the list arguments must be finite. + +pair-fold-right kons knil clist1 clist2 ... -> value + Holds the same relationship with FOLD-RIGHT that PAIR-FOLD holds with FOLD. + Obeys the recursion + + (pair-fold-right kons knil lis) = + (kons lis (pair-fold-right kons knil (cdr lis))) + + (pair-fold-right kons knil '()) = knil + + Example: + (pair-fold-right cons '() '(a b c)) => ((a b c) (b c) (c)) + + At least one of the list arguments must be finite. + +reduce f ridentity list -> value + REDUCE is a variant of FOLD. + + RIDENTITY should be a "right identity" of the procedure F -- that is, + for any value X acceptable to F, + (f x ridentity) = x + + REDUCE has the following definition: + If LIST = (), return RIDENTITY. + Otherwise, return (fold F (car LIST) (cdr LIST)). + + ...in other words, we compute (fold F RIDENTITY LIST). + + Note that RIDENTITY is used *only* in the empty-list case. You + typically use REDUCE when applying F is expensive and you'd like + to avoid the extra application incurred when FOLD applies F to the + head of LIST and the identity value, redundantly producing the + same value passed in to F. For example, if F involves searching a + file directory or performing a database query, this can be + significant. In general, however, FOLD is useful in many contexts + where REDUCE is not (consider the examples given in the FOLD + definition -- only one of the five folds uses function with a + right identity. The other four may not be performed with REDUCE). + + Note: MIT Scheme and Haskell flip F's arg order for their REDUCE and + FOLD functions. + +reduce-right f ridentity list -> value + REDUCE-RIGHT is the fold-right variant of REDUCE. + It obeys the following definition: + (reduce-right f ridentity '()) = ridentity + (reduce-right f ridentity '(e1)) = (f e1 ridentity) = e1 + (reduce-right f ridentity '(e1 e2 ...)) = + (f e1 (reduce f ridentity (e2 ...))) + + ...in other words, we compute (fold-right F RIDENTITY LIST). + + +unfold p f g seed [tail] -> value + UNFOLD constructs a list with the following loop: + (let lp ((seed seed) (lis tail)) + (if (p seed) lis + (lp (g seed) + (cons (f seed) lis)))) + + P: Determines when to stop unfolding. + F: Maps each seed value to the corresponding list element. + G: Maps each seed value to next seed value. + SEED: The "state" value for the unfold. + TAIL: list terminator; defaults to '(). + + UNFOLD is the fundamental iterative list constructor, just as FOLD is the + fundamental iterative list consumer. While UNFOLD may seem a bit abstract + to novice functional programmers, it can be used in a number of ways: + + (unfold zero? ; List of squares: 1^2 ... 10^2 + (lambda (x) (* x x)) + (lambda (x) (- x 1)) + 10) + + (unfold null-list? car cdr lis) ; Reverse a proper list. + + ;; Read current input port into a list of values. + (unfold eof-object? values (lambda (x) (read)) (read)) + + ;; (APPEND-REVERSE rev-head tail) + (unfold null-list? car cdr rev-head tail) + + Interested functional programmers may enjoy noting that FOLD and UNFOLD + are in some sense inverses. That is, given operations KNULL?, KAR, KDR, + KONS, and KNIL satisfying + (kons (kar x) (kdr x)) = x and (knull? knil) = #t + then + (FOLD kons knil (UNFOLD knull? kar kdr x)) = x + and + (UNFOLD knull? kar kdr (FOLD kons knil x)) = x. + + This combinator presumably has some pretentious mathematical name; + interested readers are invited to communicate it to the author. + +unfold-right p f g seed [tail-gen]-> list + UNFOLD-RIGHT is best described by its basic recursion: + (unfold-right p f g seed) = (if (p seed) (tail-gen seed) + (cons (f seed) + (unfold-right p f g (g seed)))) + P: Determines when to stop unfolding. + F: Maps each seed value to the corresponding list element. + G: Maps each seed value to next seed value. + SEED: The "state" value for the unfold. + TAIL-GEN: creates the tail of the list; defaults to (lambda (x) '()) + + UNFOLD-RIGHT is the fundamental recursive list constructor, just as + FOLD-RIGHT is the fundamental recursive list consumer. While UNFOLD-RIGHT + may seem a bit abstract to novice functional programmers, it can be used + in a number of ways: + + (unfold-right (lambda (x) (> x 10)) ; List of squares: 1^2 ... 10^2. + (lambda (x) (* x x)) + (lambda (x) (+ x 1)) + 1) + + (unfold-right null-list? car cdr lis) ; Copy a proper list. + + ;; Read current input port into a list of values. + (unfold-right eof-object? values (lambda (x) (read)) (read)) + + ;; Copy a possibly non-proper list: + (unfold-right not-pair? car cdr lis + values) + + ;; Append HEAD onto TAIL: + (unfold-right null-list? car cdr head + (lambda (x) tail)) + + + Interested functional programmers may enjoy noting that FOLD-RIGHT and + UNFOLD-RIGHT are in some sense inverses. That is, given operations KNULL?, + KAR, KDR, KONS, and KNIL satisfying + (kons (kar x) (kdr x)) = x and (knull? knil) = #t + then + (FOLD-RIGHT kons knil (UNFOLD-RIGHT knull? kar kdr x)) = x + and + (UNFOLD-RIGHT knull? kar kdr (FOLD-RIGHT kons knil x)) = x. + + This combinator sometimes is called an "anamorphism;" when an + explicit TAIL-GEN procedure is supplied, it is called an + "apomorphism." + +map proc clist1 clist2 ... -> list R5RS+ + + PROC is a procedure taking as many arguments as there are list arguments + and returning a single value. MAP applies PROC element-wise to the + elements of the lists and returns a list of the results, in order. The + dynamic order in which PROC is applied to the elements of the lists is + unspecified. + + (map cadr '((a b) (d e) (g h))) + ==> (b e h) + + (map (lambda (n) (expt n n)) + '(1 2 3 4 5)) + ==> (1 4 27 256 3125) + + (map + '(1 2 3) '(4 5 6)) ==> (5 7 9) + + (let ((count 0)) + (map (lambda (ignored) + (set! count (+ count 1)) + count) + '(a b))) ==> (1 2) OR (2 1) + + This procedure is extended from its R5RS specification + to allow the arguments to be of unequal length; it terminates + when the shortest list runs out. + + At least one of the argument lists must be finite: + (map + '(3 1 4 1) (circular-list 1 0)) => (4 1 5 1) + +for-each proc clist1 clist2 ... -> unspecified R5RS+ + The arguments to FOR-EACH are like the arguments to MAP, but + FOR-EACH calls PROC for its side effects rather than for its + values. Unlike MAP, FOR-EACH is guaranteed to call PROC on + the elements of the CLISTs in order from the first element(s) to + the last, and the value returned by FOR-EACH is unspecified. + + (let ((v (make-vector 5))) + (for-each (lambda (i) + (vector-set! v i (* i i))) + '(0 1 2 3 4)) + v) ==> #(0 1 4 9 16) + + This procedure is extended from its R5RS specification + to allow the arguments to be of unequal length; it terminates + when the shortest list runs out. + + At least one of the argument lists must be finite: + (map + '(3 1 4 1) (circular-list 1 0)) => (4 1 5 1) + +append-map f clist1 clist2 ... -> value +append-map! f clist1 clist2 ... -> value + Equivalent to + (apply append (map f clist1 clist2 ...)) + and + (apply append! (map f clist1 clist2 ...)) + + Map F over the elements of the lists, just as in the MAP function. + However, the results of the applications are appended together to + make the final result. APPEND-MAP uses APPEND to append the results + together; APPEND-MAP! uses APPEND!. + + The dynamic order in which the various applications of F are made is + not specified. + + Example: + (append-map! (lambda (x) (list x (- x))) '(1 3 8)) + => (1 -1 3 -3 8 -8) + + At least one of the list arguments must be finite. + +map! f list1 clist2 ... -> list + Linear-update variant of MAP -- MAP! is allowed, but not required, to + alter the cons cells of LIST1 to construct the result list. + + The dynamic order in which the various applications of F are made is + not specified. + + In the n-ary case, CLIST2, CLIST3, ... must have at least as many + elements as LIST1. + +map-in-order f clist1 clist2 ... -> list + A variant of the MAP procedure that guarantees to apply F across + the elements of the LISTi arguments in a left-to-right order. This + is useful for mapping procedures that both have side effects and + return useful values. + + At least one of the list arguments must be finite. + +pair-for-each f clist1 clist2 ... -> unspecific + Like FOR-EACH, but F is applied to successive sublists of the argument + lists. That is, F is applied to the cons cells of the lists, rather + than the lists' elements. These applications occur in left-to-right + order. + + The F procedure may reliably apply SET-CDR! to the pairs it is given + without altering the sequence of execution. + + (pair-for-each (lambda (pair) (display pair) (newline)) '(a b c)) ==> + (a b c) + (b c) + (c) + + At least one of the list arguments must be finite. + +filter-map f clist1 clist2 ... -> list + Like MAP, but only true values are saved. + (filter-map (lambda (x) (and (number? x) (* x x))) '(a 1 b 3 c 7)) + => (1 9 49) + The dynamic order in which the various applications of F are made is + not specified. + + At least one of the list arguments must be finite. + + +** Filtering & partitioning +=========================== + +filter pred list -> list + Return all the elements of LIST that satisfy predicate PRED. + The list is not disordered -- elements that appear in the result list + occur in the same order as they occur in the argument list. + The returned list may share a common tail with the argument list. + The dynamic order in which the various applications of PRED are made is + not specified. + + (filter even? '(0 7 8 8 43 -4)) => (0 8 8 -4) + +partition pred list -> [list list] + Partitions the elements of LIST with predicate PRED, and returns two + values: the list of in-elements and the list of out-elements. + The list is not disordered -- elements occur in the result lists + in the same order as they occur in the argument list. + The dynamic order in which the various applications of PRED are made is + not specified. One of the returned lists may share a common tail with the + argument list. + + (partition symbol? '(one 2 3 four five 6)) + => (one four five) + (2 3 6) + +remove pred list -> list + Returns LIST without the elements that satisfy predicate PRED: + (lambda (pred list) (filter (lambda (x) (not (pred x))) list)) + The list is not disordered -- elements that appear in the result list + occur in the same order as they occur in the argument list. + The returned list may share a common tail with the argument list. + The dynamic order in which the various applications of PRED are made is + not specified. + + (remove even? '(0 7 8 8 43 -4)) => (7 43) + +filter! pred list -> list +partition! pred list -> [list list] +remove! pred list -> list + Linear-update variants of FIND, PARTITION and REMOVE. + These procedures are allowed, but not required, to alter the cons cells + in the argument list to construct the result lists. + + +** Searching +============ + +The following procedures all search lists for a leftmost element satisfying +some criteria. This means they do not always examine the entire list; thus, +there is no efficient way for them to reliably detect and signal an error when +passed a dotted or circular list. Here are the general rules describing how +these procedures work when applied to different kinds of lists: + + Proper lists: The standard, canonical behavior happens in this case. + + Dotted lists: It is an error to pass these procedures a dotted list + that does not contain an element satisfying the search + criteria. That is, it is an error if the procedure has + to search all the way to the end of the dotted list. + However, this SRFI does *not* specify anything at all + about the behavior of these procedures when passed a + dotted list containing an element satisfying the search + criteria. It may finish successfully, signal an error, + or perform some third action. Different implementations + may provide different functionality in this case; code + which is compliant with this SRFI may not rely on any + particular behavior. Future SRFI's may refine SRFI-1 + to define specific behavior in this case. + + In brief, SRFI-1 compliant code may not pass a dotted + list argument to these procedures. + + Circular lists: It is an error to pass these procedures a circular list + that does not contain an element satisfying the search + criteria. Note that the procedure is not required to + detect this case; it may simply diverge. It is, however, + acceptable to search a circular list *if the search is + successful* -- that is, if the list contains an element + satisfying the search criteria. + +Here are some examples, using the FIND and ANY procedures as a canonical +representatives: + ;; Proper list -- success + (find even? '(1 2 3)) => 2 + (any even? '(1 2 3)) => #t + + ;; proper list -- failure + (find even? '(1 7 3)) => #f + (any even? '(1 7 3)) => #f + + ;; Failure is error on a dotted list. + (find even? '(1 3 . x)) => error + (any even? '(1 3 . x)) => error + + ;; The dotted list contains an element satisfying the search. + ;; This case is not specified -- it could be success, an error, + ;; or some third possibility. + (find even? '(1 2 . x)) => error/undefined + (any even? '(1 2 . x)) => error/undefined ; success, error or other. + + ;; circular list -- success + (find even? (circular-list 1 6 3)) => 6 + (any even? (circular-list 1 6 3)) => #t + + ;; circular list -- failure is error. Procedure may diverge. + (find even? (circular-list 1 3)) => error + (any even? (circular-list 1 3)) => error + + +find pred clist -> value + Return the first element of CLIST that satisfies predicate PRED; + false if no element does. + + (find even? '(3 1 4 1 5 9)) => 4 + + Note that FIND has an ambiguity in its lookup semantics -- if FIND + returns #F, you cannot tell (in general) if it found a #F element + that satisfied PRED, or if it did not find any element at all. In + many situations, this ambiguity cannot arise -- either the list being + searched is known not to contain any #F elements, or the list is + guaranteed to have an element satisfying PRED. However, in cases + where this ambiguity can arise, you should use FIND-TAIL instead of + FIND -- FIND-TAIL has no such ambiguity: + (cond ((find-tail pred lis) => (lambda (pair) ...)) ; Handle (CAR PAIR) + (else ...)) ; Search failed. + +find-tail pred clist -> pair or false + Return the first pair of CLIST whose car satisfies PRED. If no pair does, + return false. + + FIND-TAIL can be viewed as a general-predicate variant of the MEMBER + function. + + Examples: + (find-tail even? '(3 1 37 -8 -5 0 0)) => (-8 -5 0 0) + + (find-tail even? '(3 1 37 -5)) => #f + + ;; MEMBER X LIS: + (find-tail (lambda (elt) (equal? x elt)) lis) + + In the circular-list case, this procedure "rotates" the list. + +any pred clist1 clist2 ... -> value + Applies the predicate across the lists, returning true if the predicate + returns true on any application. + + If there are N list arguments CLIST1 ... CLISTn, then PRED must be a + procedure taking N arguments and returning a boolean result. + + ANY applies PRED to the first elements of the CLISTi parameters. If this + application returns a true value, ANY immediately returns that value. + Otherwise, it iterates, applying PRED to the second elements of the CLISTi + parameters, then the third, and so forth. The iteration stops when a true + value is produced or one of the lists runs out of values; in the latter + case, ANY returns #F. The application of PRED to the last element of the + lists is a tail call. + + Note the difference between FIND and ANY -- FIND returns the element + that satisfied the predicate; ANY returns the true value that the + predicate produced. + + Like EVERY, ANY's name does not end with a question mark -- this is to + indicate that it does not return a simple boolean (#T or #F), but a + general value. + + (any integer? '(a 3 b 2.7)) => #T + (any integer? '(a 3.1 b 2.7)) => #F + (any < '(3 1 4 1 5) + '(2 7 1 8 2)) => #T + +every pred clist1 clist2 ... -> value + Applies the predicate across the lists, returning true if the predicate + returns true on every application. + + If there are N list arguments CLIST1 ... CLISTn, then PRED must be a + procedure taking N arguments and returning a boolean result. + + EVERY applies PRED to the first elements of the CLISTi parameters. If + this application returns false, EVERY immediately returns false. + Otherwise, it iterates, applying PRED to the second elements of the CLISTi + parameters, then the third, and so forth. The iteration stops when a false + value is produced or one of the lists run out of values. In the latter + case, EVERY returns the true value produced by its final application of + PRED. The application of PRED to the last element of the lists is a tail + call. + + If one of the CLISTi has no elements, EVERY simply returns #T. + + Like ANY, EVERY's name does not end with a question mark -- this is to + indicate that it does not return a simple boolean (#T or #F), but a + general value. + +list-index pred clist1 clist2 ... -> integer or false + Return the index of the leftmost element that satisfies PRED. + + If there are N list arguments CLIST1 ... CLISTn, then PRED must be a + function taking N arguments and returning a boolean result. + + LIST-INDEX applies PRED to the first elements of the CLISTi parameters. + If this application returns true, LIST-INDEX immediately returns zero. + Otherwise, it iterates, applying PRED to the second elements of the + CLISTi parameters, then the third, and so forth. When it finds a tuple of + list elements that cause PRED to return true, it stops and returns the + zero-based index of that position in the lists. + + The iteration stops when one of the lists runs out of values; in this + case, LIST-INDEX returns #F. + + (list-index even? '(3 1 4 1 5 9)) => 2 + (list-index < '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => 1 + (list-index = '(3 1 4 1 5 9 2 5 6) '(2 7 1 8 2)) => #f + +member x list [=] -> list or #f R5RS+ +memq x list -> list or #f R5RS +memv x list -> list or #f R5RS + These procedures return the first sublist of LIST whose car is + X, where the sublists of LIST are the non-empty lists returned + by (DROP LIST I) for I less than the length of LIST. If X does + not occur in LIST, then #f is returned. MEMQ uses EQ? to compare X + with the elements of LIST, while MEMV uses EQV? and MEMBER uses EQUAL?. + + (memq 'a '(a b c)) ==> (a b c) + (memq 'b '(a b c)) ==> (b c) + (memq 'a '(b c d)) ==> #f + (memq (list 'a) '(b (a) c)) ==> #f + (member (list 'a) + '(b (a) c)) ==> ((a) c) + (memq 101 '(100 101 102)) ==> *unspecified* + (memv 101 '(100 101 102)) ==> (101 102) + + MEMBER is extended from its R5RS definition to allow the client to pass + in an optional equality procedure = used to compare keys. + + The comparison procedure is used to compare the elements Ei of LIST + to the key X in this way: + (= X Ei) ; list is (E1 ... En) + That is, the first argument is always X, and the second argument is + one of the list elements. Thus one can reliably find the first element + of LIST that is greater than five with + (member 5 LIST <) + + Note that fully general list searching may be performed with + the FIND-TAIL and FIND procedures, e.g. + (find-tail even? list) ; Find the first elt with an even key. + + +** Deletion +=========== + +delete x list [=] -> list +delete! x list [=] -> list + DELETE uses the comparison procedure =, which defaults to EQUAL?, to find + all elements of LIST that are equal to X, and deletes them from LIST. The + dynamic order in which the various applications of = are made is not + specified. + + The list is not disordered -- elements that appear in the result list + occur in the same order as they occur in the argument list. + The result may share a common tail with the argument list. + + Note that fully general element deletion can be performed with the REMOVE + and REMOVE! procedures, e.g.: + ;; Delete all the even elements from LIS: + (remove even? lis) + + The comparison procedure is used in this way: + (= X Ei) + That is, X is always the first argument, and a list element is always the + second argument. The comparison procedure will be used to compare each + element of LIST exactly once; the order in which it is applied to the + various Ei is not specified. Thus, one can reliably remove all the + numbers greater than five from a list with + (delete 5 list <) + + DELETE! is the linear-update variant of DELETE. It is allowed, but not + required, to alter the cons cells in its argument list to construct the + result. + +delete-duplicates list [=] -> list +delete-duplicates! list [=] -> list + DELETE-DUPLICATES removes duplicate elements from the list argument. + If there are multiple equal elements in the argument list, the result list + only contains the first or leftmost of these elements in the result. + The order of these surviving elements is the same as in the original + list -- DELETE-DUPLICATES does not disorder the list (hence it is useful + for "cleaning up" association lists). + + The = parameter is used to compare the elements of the list; it defaults + to EQUAL?. If X comes before Y in LIST, then the comparison is performed + (= X Y) + The comparison procedure will be used to compare each pair of + elements in LIST no more than once; the order in which it is + applied to the various pairs is not specified. + + Implementations of DELETE-DUPLICATE are allowed to share common tails + between argument and result lists -- for example, if the list argument + contains only unique elements, it may simply return exactly this list. + + Be aware that, in general, DELETE-DUPLICATES runs in time O(n^2) + for N-element lists. Uniquifying long lists can be accomplished + in O(n lg n) time by sorting the list to bring equal elements + together, then using a linear-time algorithm to remove equal + elements. Alternatively, one can use algorithms based on + element-marking, with linear-time results. + + DELETE-DUPLICATES! is the linear-update variant of DELETE-DUPLICATES; it + is allowed, but not required, to alter the cons cells in its argument + list to construct the result. + + (delete-duplicates '(a b a c a b c z)) => (a b c z) + + ;; Clean up an alist: + (delete-duplicates '((a . 3) (b . 7) (a . 9) (c . 1)) + (lambda (x y) (eq? (car x) (car y)))) + => ((a . 3) (b . 7) (c . 1)) + + +** Association lists +==================== + +An "association list" (or "alist") is a list of pairs. The car of each pair +contains a key value, and the cdr contains the associated data value. They can +be used to construct simple look-up tables in Scheme. Note that association +lists are probably inappropriate for performance-critical use on large data; +in these cases, hash tables or some other alternative should be employed. + +assoc key alist [=] -> pair or #f R5RS+ +assq key alist -> pair or #f R5RS +assv key alist -> pair or #f R5RS + + ALIST must be an association list -- a list of pairs. These procedures + find the first pair in ALIST whose car field is KEY, and returns that + pair. If no pair in ALIST has KEY as its car, then #f is returned. ASSQ + uses EQ? to compare KEY with the car fields of the pairs in ALIST, while + ASSV uses EQV? and ASSOC uses EQUAL?. + + (define e '((a 1) (b 2) (c 3))) + (assq 'a e) ==> (a 1) + (assq 'b e) ==> (b 2) + (assq 'd e) ==> #f + (assq (list 'a) '(((a)) ((b)) ((c)))) ==> #f + (assoc (list 'a) '(((a)) ((b)) ((c)))) ==> ((a)) + (assq 5 '((2 3) (5 7) (11 13))) ==> *unspecified* + (assv 5 '((2 3) (5 7) (11 13))) ==> (5 7) + + ASSOC is extended from its R5RS definition to allow the client to pass in + an optional equality procedure = used to compare keys. + + The comparison procedure is used to compare the elements Ei of LIST + to the KEY parameter in this way: + (= KEY (CAR Ei)) ; list is (E1 ... En) + That is, the first argument is always KEY, and the second argument is + one of the list elements. Thus one can reliably find the first entry + of ALIST whose key is greater than five with + (assoc 5 ALIST <) + + Note that fully general alist searching may be performed with + the FIND-TAIL and FIND procedures, e.g. + ;; Look up the first association in ALIST with an even key: + (find (lambda (a) (even? (car a))) alist) + +alist-cons key datum alist -> alist + (lambda (key datum alist) (cons (cons key datum) alist)) + Cons a new alist entry mapping KEY -> DATUM onto ALIST. + +alist-copy alist -> alist + Make a fresh copy of ALIST. This means copying each pair that + forms an association as well as the spine of the list, i.e. + (lambda (a) (map (lambda (elt) (cons (car elt) (cdr elt))) a)) + +alist-delete key alist [=] -> alist +alist-delete! key alist [=] -> alist + ALIST-DELETE deletes all associations from ALIST with the given + KEY, using key-comparison procedure =, which defaults to EQUAL?. + The dynamic order in which the various applications of = are made + is not specified. + + Return values may share common tails with the ALIST argument. + The alist is not disordered -- elements that appear in the result alist + occur in the same order as they occur in the argument alist. + + The comparison procedure is used to compare the element keys Ki of ALIST's + entries to the KEY parameter in this way: + (= KEY Ki) + Thus, one can reliably remove all entries of ALIST whose key is greater + than five with + (alist-delete 5 alist <) + + ALIST-DELETE! is the linear-update variant of ALIST-DELETE. It + is allowed, but not required, to alter the cons cells from the ALIST + parameter to construct the result. + + +** Set operations on lists +========================== +These procedures implement operations on sets represented as lists of +elements. They all take an = argument used to compare elements of +lists. This equality procedure is required to be consistent with +EQ?. That is, it must be the case that + (eq? x y) => (= x y). +Note that this implies, in turn, that two lists that are EQ? are +also set-equal by any legal comparison procedure. This allows for +constant-time determination of set operations on EQ? lists. + +Be aware that these procedures typically run in time O(n * m) for N- +and M-element list arguments. Performance-critical applications +operating upon large sets will probably wish to use other data +structures and algorithms. + +lset<= = list1 ... -> boolean + Returns true iff every LISTi is a subset of LISTi+1, using = for the + element-equality procedure. List A is a subset of list B if every + element in A is equal to some element of B. When performing an + element comparison, the = procedure's first argument is an element + of A; its second, an element of B. + + (lset<= eq? '(a) '(a b a) '(a b c c)) => #t + + (lset<= eq?) => #t ; Trivial cases + (lset<= eq? '(a)) => #t + + +lset= = list1 ... -> boolean + Returns true iff every LISTi is set-equal to LISTi+1, using = for + the element-equality procedure. "Set-equal" simply means that + LISTi is a subset of LISTi+1, and LISTi+1 is a subset of LISTi. + + (lset= eq? '(b e a) '(a e b) '(e e b a)) => #t + + (lset= eq?) => #t ; Trivial cases + (lset= eq? '(a)) => #t + +lset-adjoin = list elt1 ... -> list + Adds the ELTi elements not already in the list parameter to the + result list. The result shares a common tail with the list parameter. + The new elements are added to the front of the list, but no guarantees + are made about their order. The = parameter is an equality procedure + used to determine if an ELTi is already a member of LIST. Its first + argument is an element of LIST; its second is one of the ELTi. + + The list parameter is always a suffix of the result -- even if the list + parameter contains repeated elements, these are not reduced. + + (lset-adjoin eq? '(a b c d c e) 'a 'e 'i 'o 'u) => (u o i a b c d c e) + +lset-union = list1 ... -> list + Returns the union of the lists, using = for the element-equality + procedure. + + The union of lists A and B is constructed as follows: + - If A is the empty list, the answer is B (or a copy of B). + - Otherwise, the result is initialised to be list A (or a copy of A). + - Proceed through the elements of list B in a left-to-right order. + If b is such an element of B, compare every element r of the current + result list to b: (= r b). If all comparisons fail, b is consed + onto the front of the result. + However, there is no guarantee that = will be applied to every pair + of arguments from A and B. In particular, if A is EQ? to B, the operation + may immediately terminate. + + In the n-ary case, the two-argument list-union operation is simply + folded across the argument lists. + + (lset-union eq? '(a b c d e) '(a e i o u)) => (u o i a b c d e) + + ;; Repeated elements in LIST1 are preserved. + (lset-union eq? '(a a c) '(x a x)) => (x a a c) + + (lset-union eq?) => () ; Trivial cases + (lset-union eq? '(a b c)) => (a b c) + +lset-intersection = list1 list2 ... -> list + Returns the intersection of the lists, using = for the element-equality + procedure. + + The intersection of lists A and B is comprised of every element of A + that is = to some element of B: (= a b), for a in A, and b in B. + Note this implies that an element which appears in B and multiple times + in list A will also appear multiple times in the result. + + The order in which elements appear in the result is the same as + they appear in LIST1 -- that is, LSET-INTERSECTION essentially + filters LIST1, without disarranging element order. The result may + share a common tail with LIST1. + + In the n-ary case, the two-argument list-intersection operation is simply + folded across the argument lists. However, the dynamic order in which the + applications of = are made is not specified. The procedure may check an + element of LIST1 for membership in every other list before proceeding to + consider the next element of LIST1, or it may completely intersect LIST1 + and LIST2 before proceeding to LIST3, or it may go about its work in some + third order. + + (lset-intersection eq? '(a b c d e) '(a e i o u)) => (a e) + + ;; Repeated elements in LIST1 are preserved. + (lset-intersection eq? '(a x y a) '(x a x z)) => '(a x a) + + (lset-intersection eq? '(a b c)) => (a b c) ; Trivial case + +lset-difference = list1 list2 ... -> list + Returns the difference of the lists, using = for the element-equality + procedure -- all the elements of LIST1 that are not = to any element from + one of the other LISTi parameters. + + The = procedure's first argument is always an element of LIST1; its second + is an element of one of the other LISTi. Elements that are repeated + multiple times in the LIST1 parameter will occur multiple times in the + result. + + The order in which elements appear in the result is the same as + they appear in LIST1 -- that is, LSET-DIFFERENCE essentially + filters LIST1, without disarranging element order. The result may + share a common tail with LIST1. + + The dynamic order in which the applications of = are made is not + specified. The procedure may check an element of LIST1 for membership in + every other list before proceeding to consider the next element of LIST1, + or it may completely compute the difference of LIST1 and LIST2 before + proceeding to LIST3, or it may go about its work in some third order. + + (lset-difference eq? '(a b c d e) '(a e i o u)) => (b c d) + + (lset-difference eq? '(a b c)) => (a b c) ; Trivial case + +lset-xor = list1 ... -> list + Returns the exclusive-or of the sets, using = for the element-equality + procedure. If there are exactly two lists, this is all the elements + that appear in exactly one of the two lists. The operation is associative, + and thus extends to the n-ary case -- the elements that appear in an + odd number of the lists. The result may share a common tail with any of + the LISTi parameters. + + More precisely, for two lists A and B, A xor B is a list of + - every element a of A such that there is no element b of B + such that (= a b) + - every element b of B such that there is no element a of A + such that (= b a) + However, an implementation is allowed to assume that = is + symmetric -- that is, that + (= a b) => (= b a). + This means, for example, that if a comparison (= a b) produces + true for some a in A and b in B, both a and b may be removed from + inclusion in the result. + + In the n-ary case, the binary-xor operation is simply folded across + the lists. + + (lset-xor eq? '(a b c d e) '(a e i o u)) => (d c b i o u) + + ;; Trivial cases. + (lset-xor eq?) => () + (lset-xor eq? '(a b c d e)) => (a b c d e) + +lset-diff+intersection = list1 list2 ... -> [list list] + Returns two values -- the difference and the intersection of the lists. + Is equivalent to + (values (lset-difference = list1 list2 ...) + (lset-intersection = list1 + (lset-union = list2 ...))) + but can be implemented more efficiently. + + The = procedure's first argument is an element of LIST1; its second is + an element of one of the other LISTi. + + Either of the answer lists may share a common tail with LIST1. + This operation essentially partitions LIST1. + +lset-union! = list1 ... -> list +lset-intersection! = list1 list2 ... -> list +lset-difference! = list1 list2 ... -> list +lset-xor! = list1 ... -> list +lset-diff+intersection! = list1 list2 ... -> [list list] + These are linear-update variants. They are allowed, but not required, + to use the cons cells in their first list parameter to construct their + answer. LSET-UNION! is permitted to recycle cons cells from *any* of its + list arguments. + + +** Primitive side-effects +========================= + +These two procedures are the primitive, R5RS side-effect operations on pairs. + +set-car! pair object -> unspecified R5RS +set-cdr! pair object -> unspecified R5RS + These procedures store OBJECT in the car and cdr field of PAIR, + respectively. The value returned is unspecified. + + (define (f) (list 'not-a-constant-list)) + (define (g) '(constant-list)) + (set-car! (f) 3) ==> *unspecified* + (set-car! (g) 3) ==> *error* + + + +* Acknowledgements +------------------ + +The design of this library benefited greatly from the feedback provided during +the SRFI discussion phase. Among those contributing thoughtful commentary and +suggestions, both on the mailing list and by private discussion, were Mike +Ashley, Darius Bacon, Alan Bawden, Phil Bewig, Jim Blandy, Dan Bornstein, Per +Bothner, Anthony Carrico, Doug Currie, Kent Dybvig, Sergei Egorov, Doug Evans, +Marc Feeley, Matthias Felleisen, Will Fitzgerald, Matthew Flatt, Dan Friedman, +Lars Thomas Hansen, Brian Harvey, Erik Hilsdale, Wolfgang Hukriede, Richard +Kelsey, Donovan Kolbly, Shriram Krishnamurthi, Dave Mason, Jussi Piitulainen, +David Pokorny, Duncan Smith, Mike Sperber, Maciej Stachowiak, Harvey J. Stein, +John David Stone, and Joerg F. Wittenberger. I am grateful to them for their +assistance. + +I am also grateful the authors, implementors and documentors of all the systems +mentioned in the introduction. Aubrey Jaffer and Kent Pitman should be noted +for their work in producing Web-accessible versions of the R5RS and Common +Lisp spec, which was a tremendous aid. + +This is not to imply that these individuals necessarily endorse the final +results, of course. + + + +* References & Links +-------------------- + +This document, in HTML: + http://srfi.schemers.org/srfi-1/srfi-1.html + ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.html (draft) + +This document, in simple text format: + http://srfi.schemers.org/srfi-1/srfi-1.txt + ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1.txt (draft) + +Source code for the reference implementation: + http://srfi.schemers.org/srfi-1/srfi-1-reference.scm + ftp://ftp.ai.mit.edu/people/shivers/srfi/srfi-1/srfi-1-reference.scm (draft) + +Archive of SRFI-1 discussion-list email: + http://srfi.schemers.org/srfi-1/mail-archive/maillist.html + +SRFI web site: + http://srfi.schemers.org/ + + +[CLtL2] + Common Lisp: the Language + Guy L. Steele Jr. (editor). + Digital Press, Maynard, Mass., second edition 1990. + Available at http://www.harlequin.com/education/books/HyperSpec/ + +[R5RS] + Revised^5 Report on the Algorithmic Language Scheme, + R. Kelsey, W. Clinger, J. Rees (editors). + Higher-Order and Symbolic Computation, Vol. 11, No. 1, September, 1998. + and ACM SIGPLAN Notices, Vol. 33, No. 9, October, 1998. + + Available at http://www.schemers.org/Documents/Standards/ + + +* Copyright +----------- + +Certain portions of this document -- the specific, marked segments of text +describing the R5RS procedures -- were adapted with permission from the R5RS +report. + +All other text is copyright (C) Olin Shivers (1998, 1999). +All Rights Reserved. + +This document and translations of it may be copied and furnished to others, +and derivative works that comment on or otherwise explain it or assist in its +implementation may be prepared, copied, published and distributed, in whole or +in part, without restriction of any kind, provided that the above copyright +notice and this paragraph are included on all such copies and derivative +works. However, this document itself may not be modified in any way, such as +by removing the copyright notice or references to the Scheme Request For +Implementation process or editors, except as needed for the purpose of +developing SRFIs in which case the procedures for copyrights defined in the +SRFI process must be followed, or as required to translate it into languages +other than English. + +The limited permissions granted above are perpetual and will not be revoked by +the authors or their successors or assigns. + +This document and the information contained herein is provided on an "AS IS" +basis and THE AUTHORS AND THE SRFI EDITORS DISCLAIM ALL WARRANTIES, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE +INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF +MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + + + +* Ispell "buffer local" dictionary +---------------------------------- + +Ispell dumps "buffer local" words here. Please ignore. + + LocalWords: RS SRFI Chez RScheme MzScheme slib Bigloo APL SML API CDR GC's Ei + LocalWords: EQ consing lib xcons unzip del delq delv mem lset lset xor diff lp + LocalWords: alist assq assv assoc cdr cdddar cddddr ref memq memv george iff + LocalWords: proc lis accessor ary TAIL's NCONS EQV rcons Contrariwise clist + LocalWords: paribus lexeme parallelise Destructuring init FP flist eof CLISTn + LocalWords: generalisation elt cadr caddr rev kons knil len rzero LZERO Ki Ith + LocalWords: arg LISTi pred cond LISTn ANY's EVERY's Uniquifying lg ridentity + LocalWords: eq netnews generalise Maciej Stachowiak al Bewig LocalWords ELTi + LocalWords: anamorphism apomorphism CLISTi ALIST's url ceteris eltn caar KNULL + LocalWords: deconstructor RIGHT's KAR KDR kar kdr knull HTML CLtL Clinger + LocalWords: Rees Bawden Blandy Bornstein Bothner Carrico Currie Dybvig + LocalWords: Egorov Feeley Matthias Felleisen Flatt Hilsdale Hukriede + LocalWords: Kolbly Shriram Krishnamurthi Jussi Piitulainen Pokorny Joerg + LocalWords: Sperber Wittenberger documentors Jaffer diff --git a/scsh/lib/string-lib.scm b/scsh/lib/string-lib.scm new file mode 100644 index 0000000..a53e362 --- /dev/null +++ b/scsh/lib/string-lib.scm @@ -0,0 +1,1384 @@ +;;; Scheme Underground string-processing library -*- Scheme -*- +;;; Olin Shivers 11/98 + +;;; SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT +;;; This is *draft* code for a SRFI proposal. If you see this notice in +;;; production code, you've got obsolete, bad source -- go find the final +;;; non-draft code on the Net. +;;; SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT -- SRFI DRAFT + +;;; Some of this code had (extremely distant) origins in MIT Scheme's string +;;; lib, and was substantially reworked by Olin Shivers (shivers@ai.mit.edu) +;;; 9/98. As such, it is +;;; Copyright (c) 1988-1994 Massachusetts Institute of Technology. +;;; The copyright terms are essentially open-software terms; +;;; the precise terms are at the end of this file. +;;; +;;; The KMP string-search code was massively rehacked from Stephen Bevan's +;;; code, written for scmlib, and is thus covered by the GPL. If that's a +;;; problem, write one from scratch (there are citations to standard textbooks +;;; in the comments), or rip it out and use the ten-line doubly-nested loop +;;; that's commented out just above this code. +;;; +;;; I wish I could mark definitions in this code to be inlined. +;;; Certain functions could benefit from compiler support. +;;; +;;; My policy on checking start/end substring specs is not uniform. +;;; I avoided doing arg checks when the function directly calls another +;;; lower-level function that will check the start/end specs as well. +;;; This has the advantage of not doing redundant checks, but the disadvantage +;;; is that errors are not reported early, at the highest possible call. +;;; There's not much high-level error checking of the other args, anyway. +;;; -Olin + +;;; Exports: +;;; string-map string-map! +;;; string-fold string-unfold +;;; string-fold-right string-unfold-right +;;; string-tabulate +;;; string-for-each string-iter +;;; string-every string-any +;;; string-compare string-compare-ci +;;; substring-compare substring-compare-ci +;;; string= string< string> string<= string>= string<> +;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> +;;; substring= substring<> substring-ci= substring-ci<> +;;; substring< substring> substring-ci< substring-ci> +;;; substring<= substring>= substring-ci<= substring-ci>= +;;; string-upper-case? string-lower-case? +;;; capitalize-string capitalize-words string-downcase string-upcase +;;; capitalize-string! capitalize-words! string-downcase! string-upcase! +;;; string-take string-take-right +;;; string-drop string-drop-right +;;; string-pad string-pad-right +;;; string-trim string-trim-right string-trim-both +;;; string-filter string-delete +;;; string-index string-index-right string-skip string-skip-right +;;; string-prefix-count string-prefix-count-ci +;;; string-suffix-count string-suffix-count-ci +;;; substring-prefix-count substring-prefix-count-ci +;;; substring-suffix-count substring-suffix-count-ci +;;; string-prefix? string-prefix-ci? +;;; string-suffix? string-suffix-ci? +;;; substring-prefix? substring-prefix-ci? +;;; substring-suffix? substring-suffix-ci? +;;; substring? substring-ci? +;;; string-fill! string-copy! string-copy substring +;;; string-reverse string-reverse! reverse-list->string +;;; string->list +;;; string-concat string-concat/shared string-append/shared +;;; xsubstring string-xcopy! +;;; string-null? +;;; join-strings +;;; +;;; string? make-string string string-length string-ref string-set! +;;; string-append list->string +;;; +;;; make-kmp-restart-vector +;;; parse-final-start+end +;;; parse-start+end +;;; check-substring-spec + +;;; Imports +;;; This code has the following non-R5RS dependencies: +;;; - (RECEIVE (var ...) mv-exp body ...) multiple-value binding macro +;;; - Various imports from the char-set library +;;; - ERROR +;;; - LET-OPTIONALS and :OPTIONAL macros for handling optional arguments +;;; - The R5RS SUBSTRING function is accessed using the Scheme 48 +;;; STRUCTURE-REF magic accessor. + + +;;; Support for START/END substring specs +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This macro parses optional start/end arguments from arg lists, defaulting +;;; them to 0/(string-length s), and checks them for correctness. + +(define-syntax let-start+end + (syntax-rules () + ((let-start+end (start end) proc s-exp args-exp body ...) + (receive (start end) (parse-final-start+end proc s-exp args-exp) + body ...)))) + + +;;; Returns three values: start end rest + +(define (parse-start+end proc s args) + (let ((slen (string-length s))) + (if (pair? args) + + (let ((start (car args)) + (args (cdr args))) + (if (or (not (integer? start)) (< start 0)) + (error "Illegal substring START spec" proc start s) + (receive (end args) + (if (pair? args) + (let ((end (car args)) + (args (cdr args))) + (if (or (not (integer? end)) (< slen end)) + (error "Illegal substring END spec" proc end s) + (values end args))) + (values slen args)) + (if (<= start end) (values start end args) + (error "Illegal substring START/END spec" + proc start end s))))) + + (values 0 (string-length s) '())))) + +(define (parse-final-start+end proc s args) + (receive (start end rest) (parse-start+end proc s args) + (if (pair? rest) (error "Extra arguments to procedure" proc rest) + (values start end)))) + +(define (check-substring-spec proc s start end) + (if (or (< start 0) + (< (string-length s) end) + (< end start)) + (error "Illegal substring START/END spec." proc s start end))) + + + +;;; substring S START [END] +;;; string-copy S [START END] +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Redefine SUBSTRING so that the END parameter is optional. +;;; SUBSTRINGX is the underlying R5RS SUBSTRING function. All +;;; the code in this file uses the simple SUBSTRINGX, so you can +;;; easily port this code. + +(define substringx (structure-ref scheme substring)) ; Simple R5RS SUBSTRING + +(define (substring s start . maybe-end) ; Our SUBSTRING + (substringx s start (:optional maybe-end (string-length s)))) + +(define (string-copy s . maybe-start+end) + (let-start+end (start end) string-copy s maybe-start+end + (substringx s start end))) + + + +;;; Basic iterators and other higher-order abstractions +;;; (string-map proc s [start end]) +;;; (string-map! proc s [start end]) +;;; (string-fold kons knil s [start end]) +;;; (string-fold-right kons knil s [start end]) +;;; (string-unfold p f g seed) +;;; (string-for-each proc s [start end]) +;;; (string-iter proc s [start end]) +;;; (string-every pred s [start end]) +;;; (string-any pred s [start end]) +;;; (string-tabulate proc len) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; No guarantees about order in MAP, FOR-EACH, EVERY, ANY procs. +;;; +;;; You want compiler support for high-level transforms on fold and unfold ops. +;;; You'd at least like a lot of inlining for clients of these procedures. +;;; Hold your breath. + +(define (string-map proc s . maybe-start+end) + (let-start+end (start end) string-map s maybe-start+end + (let* ((len (- end start)) + (ans (make-string len))) + (do ((i (- end 1) (- i 1)) + (j (- len 1) (- j 1))) + ((< j 0)) + (string-set! ans j (proc (string-ref s i)))) + ans))) + +(define (string-map! proc s . maybe-start+end) + (let-start+end (start end) string-map! s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (string-set! s i (proc (string-ref s i)))))) + +(define (string-fold kons knil s . maybe-start+end) + (let-start+end (start end) string-fold s maybe-start+end + (let lp ((v knil) (i start)) + (if (< i end) (lp (kons (string-ref s i) v) (+ i 1)) + v)))) + +(define (string-fold-right kons knil s . maybe-start+end) + (let-start+end (start end) string-fold-right s maybe-start+end + (let lp ((v knil) (i (- end 1))) + (if (>= i start) (lp (kons (string-ref s i) v) (- i 1)) + v)))) + +;;; (string-unfold p f g seed) +;;; This is the fundamental constructor for strings. +;;; - G is used to generate a series of "seed" values from the initial seed: +;;; SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... +;;; - P tells us when to stop -- when it returns true when applied to one +;;; of these seed values. +;;; - F maps each seed value to the corresponding character +;;; in the result string. +;;; +;;; In other words, the following (simple, inefficient) definition holds: +;;; (string-unfold p f g seed) = +;;; (if (p seed) "" +;;; (string-append (string (f seed)) +;;; (string-unfold p f g (g seed)))) +;;; +;;; STRING-UNFOLD is a fairly powerful constructor -- you can use it to +;;; reverse a string, copy a string, convert a list to a string, read +;;; a port into a string, and so forth. Examples: +;;; (port->string port) = +;;; (string-unfold (compose eof-object? peek-char) +;;; read-char identity port) +;;; +;;; (list->string lis) = (string-unfold null? car cdr lis) +;;; +;;; (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0) + +;;; A problem with the following simple formulation is that it pushes one +;;; stack frame for every char in the result string -- an issue if you are +;;; using it to read a 100kchar string. So we don't use it -- but I include +;;; it to give a clear, straightforward description of what the function +;;; does. + +;(define (string-unfold p f g seed) +; (let recur ((seed seed) (i 0)) +; (if (p seed) (make-string i) +; (let* ((c (f seed)) +; (s (recur (g seed) (+ i 1)))) +; (string-set! s i c) +; s)))) + +;;; This formulation chunks up the constructed string into 1024-char chunks, +;;; popping the stack frames. So it'll reduce stack growth by a factor of +;;; 1024. Marc Feeley alerted me to this issue and its general solution. + +(define (string-unfold p f g seed) + (string-concat/shared + (let recur ((seed seed)) + (receive (s seed done?) + (let recur2 ((seed seed) (i 0)) + (cond ((p seed) (values (make-string i) seed #t)) + ((>= i 1024) (values (make-string i) seed #f)) + (else (let ((c (f seed))) + (receive (s seed done?) + (recur2 (g seed) (+ i 1)) + (string-set! s i c) + (values s seed done?)))))) + + (if done? (list s) + (cons s (recur seed))))))) + + +;;; This is the same as STRING-UNFOLD, but defined for multiple +;;; seed parameters. If you pass N seeds, then +;;; - P maps N parameters to a boolean. +;;; - F maps N parameters to a character. +;;; - G maps N parameters to N return values. +;;; This definition does a lot of consing; it would need a fair amount +;;; of compiler support to be efficient. + +; Not released +;(define (string-unfoldn p f g . seeds) +; (apply string-append +; (let recur ((seeds seeds)) +; (receive (s seeds done?) +; (let recur2 ((seeds seeds) (i 0)) +; (cond ((apply p seeds) (values (make-string i) seeds #t)) +; ((>= i 1024) (values (make-string i) seeds #f)) +; (else (let ((c (apply f seeds))) +; (receive seeds (apply g seeds) +; (receive (s seeds done?) +; (recur2 seeds (+ i 1)) +; (string-set! s i c) +; (values s seeds done?))))))) +; +; (if done? (list s) +; (cons s (recur seeds))))))) + +(define (string-for-each proc s . maybe-start+end) + (let-start+end (start end) string-for-each s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (proc (string-ref s i))))) + +(define (string-iter proc s . maybe-start+end) + (let-start+end (start end) string-iter s maybe-start+end + (do ((i start (+ i 1))) + ((>= i end)) + (proc (string-ref s i))))) + +(define (string-every pred s . maybe-start+end) + (let-start+end (start end) string-every s maybe-start+end + (let lp ((i (- end 1))) + (or (< i start) + (and (pred (string-ref s i)) + (lp (- i 1))))))) + +(define (string-any pred s . maybe-start+end) + (let-start+end (start end) string-any s maybe-start+end + (let lp ((i (- end 1))) + (and (>= i start) + (or (pred (string-ref s i)) + (lp (- i 1))))))) + + +(define (string-tabulate proc len) + (let ((s (make-string len))) + (do ((i (- len 1) (- i 1))) + ((< i 0)) + (string-set! s i (proc i))) + s)) + + + +;;; string-prefix-count[-ci] s1 s2 +;;; string-suffix-count[-ci] s1 s2 +;;; substring-prefix-count[-ci] s1 start1 end1 s2 start2 end2 +;;; substring-suffix-count[-ci] s1 start1 end1 s2 start2 end2 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Find the length of the common prefix/suffix. +;;; It is not required that the two substrings passed be of equal length. +;;; This was microcode in MIT Scheme -- a very tightly bummed primitive. + +(define (substring-prefix-count s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-prefix-count s1 start1 end1) + (check-substring-spec substring-prefix-count s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (end1 (+ start1 delta))) + (let lp ((i start1) (j start2)) + (if (or (>= i end1) + (not (char=? (string-ref s1 i) + (string-ref s2 j)))) + (- i start1) + (lp (+ i 1) (+ j 1)))))) + +(define (substring-suffix-count s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-suffix-count s1 start1 end1) + (check-substring-spec substring-suffix-count s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (start1 (- end1 delta))) + (let lp ((i (- end1 1)) (j (- end2 1))) + (if (or (< i start1) + (not (char=? (string-ref s1 i) + (string-ref s2 j)))) + (- (- end1 i) 1) + (lp (- i 1) (- j 1)))))) + +(define (substring-prefix-count-ci s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-prefix-count-ci s1 start1 end1) + (check-substring-spec substring-prefix-count-ci s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (end1 (+ start1 delta))) + (let lp ((i start1) (j start2)) + (if (or (>= i end1) + (not (char-ci=? (string-ref s1 i) + (string-ref s2 j)))) + (- i start1) + (lp (+ i 1) (+ j 1)))))) + +(define (substring-suffix-count-ci s1 start1 end1 s2 start2 end2) + (check-substring-spec substring-suffix-count-ci s1 start1 end1) + (check-substring-spec substring-suffix-count-ci s2 start2 end2) + (let* ((delta (min (- end1 start1) (- end2 start2))) + (start1 (- end1 delta))) + (let lp ((i (- end1 1)) (j (- end2 1))) + (if (or (< i start1) + (not (char-ci=? (string-ref s1 i) + (string-ref s2 j)))) + (- (- end1 i) 1) + (lp (- i 1) (- j 1)))))) + + +(define (string-prefix-count s1 s2) + (substring-prefix-count s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-count s1 s2) + (substring-suffix-count s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-prefix-count-ci s1 s2) + (substring-prefix-count-ci s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-count-ci s1 s2) + (substring-suffix-count-ci s1 0 (string-length s1) s2 0 (string-length s2))) + + + +;;; string-prefix? s1 s2 +;;; string-suffix? s1 s2 +;;; string-prefix-ci? s1 s2 +;;; string-suffix-ci? s1 s2 +;;; +;;; substring-prefix? s1 start1 end1 s2 start2 end2 +;;; substring-suffix? s1 start1 end1 s2 start2 end2 +;;; substring-prefix-ci? s1 start1 end1 s2 start2 end2 +;;; substring-suffix-ci? s1 start1 end1 s2 start2 end2 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; These are all simple derivatives of the previous counting funs. + +(define (string-prefix? s1 s2) + (substring-prefix? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix? s1 s2) + (substring-suffix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-prefix-ci? s1 s2) + (substring-prefix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (string-suffix-ci? s1 s2) + (substring-suffix-ci? s1 0 (string-length s1) s2 0 (string-length s2))) + +(define (substring-prefix? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= (substring-prefix-count s1 start1 end1 + s2 start2 end2) + len1)))) + +(define (substring-suffix? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-suffix-count s1 start1 end1 + s2 start2 end2))))) + +(define (substring-prefix-ci? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-prefix-count-ci s1 start1 end1 + s2 start2 end2))))) + +(define (substring-suffix-ci? s1 start1 end1 s2 start2 end2) + (let ((len1 (- end1 start1))) + (and (<= len1 (- end2 start2)) ; Quick check + (= len1 (substring-suffix-count-ci s1 start1 end1 + s2 start2 end2))))) + + +;;; string-compare s1 s2 lt-proc eq-proc gt-proc +;;; string-compare-ci s1 s2 eq-proc lt-proc gt-proc +;;; substring-compare s1 start1 end1 s2 start2 end2 +;;; lt-proc eq-proc gt-proc +;;; substring-compare-ci s1 start1 end1 s2 start2 end2 +;;; lt-proc eq-proc gt-proc +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Primitive string-comparison functions. +;;; Continuation order is different from MIT Scheme. +;;; Continuations are applied to s1's mismatch index; +;;; in the case of equality, this is END1. + +(define (substring-compare s1 start1 end1 s2 start2 end2 + proc< proc= proc>) + (let ((size1 (- end1 start1)) + (size2 (- end2 start2))) + (let ((match (substring-prefix-count s1 start1 end1 s2 start2 end2))) + (if (= match size1) + ((if (= match size2) proc= proc<) end1) + ((if (= match size2) + proc> + (if (char)) + (+ match start1)))))) + +(define (substring-compare-ci s1 start1 end1 s2 start2 end2 + proc< proc= proc>) + (let ((size1 (- end1 start1)) + (size2 (- end2 start2))) + (let ((match (substring-prefix-count-ci s1 start1 end1 s2 start2 end2))) + (if (= match size1) + ((if (= match size2) proc= proc<) end1) + ((if (= match size2) proc> + (if (char-ci)) + (+ start1 match)))))) + +(define (string-compare s1 s2 proc< proc= proc>) + (substring-compare s1 0 (string-length s1) + s2 0 (string-length s2) + proc< proc= proc>)) + +(define (string-compare-ci s1 s2 proc< proc= proc>) + (substring-compare-ci s1 0 (string-length s1) + s2 0 (string-length s2) + proc< proc= proc>)) + + +;;; string= string<> string-ci= string-ci<> +;;; string< string> string-ci< string-ci> +;;; string<= string>= string-ci<= string-ci>= +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Simple definitions in terms of the previous comparison funs. +;;; Inequality predicates return #f or mismatch index. +;;; I sure hope these defns get integrated. + +(define (string= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) #f)))) + +(define (string< s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) #f)))) + +(define (string> s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare s1 s2 (lambda (i) #f) (lambda (i) #f) (lambda (i) i)))) + +(define (string<= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare s1 s2 (lambda (i) i) (lambda (i) i) (lambda (i) #f)))) + +(define (string>= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) i)))) + +(define (string<> s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) i)))) + + +(define (string-ci= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) #f)))) + +(define (string-ci< s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) #f)))) + +(define (string-ci> s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) #f) (lambda (i) i)))) + +(define (string-ci<= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) i) (lambda (i) #f)))) + +(define (string-ci>= s1 s2) + (if (eq? s1 s2) (string-length s1) ; Fast path + (string-compare-ci s1 s2 (lambda (i) #f) (lambda (i) i) (lambda (i) i)))) + +(define (string-ci<> s1 s2) + (and (not (eq? s1 s2)) ; Fast path + (string-compare-ci s1 s2 (lambda (i) i) (lambda (i) #f) (lambda (i) i)))) + + +(define (substring= s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) #f) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring<> s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) i))) + +(define (substring< s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) #f))) + +(define (substring> s1 start1 end1 s2 start2 end2) + (substring< s2 start2 end2 s1 start1 end1)) + +(define (substring<= s1 start1 end1 s2 start2 end2) + (substring-compare s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring>= s1 start1 end1 s2 start2 end2) + (substring<= s2 start2 end2 s1 start1 end1)) + +(define (substring-ci= s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) #f) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring-ci<> s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) i))) + +(define (substring-ci< s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) #f) + (lambda (i) #f))) + +(define (substring-ci> s1 start1 end1 s2 start2 end2) + (substring-ci< s2 start2 end2 s1 start1 end1)) + +(define (substring-ci<= s1 start1 end1 s2 start2 end2) + (substring-compare-ci s1 start1 end1 + s2 start2 end2 + (lambda (i) i) + (lambda (i) i) + (lambda (i) #f))) + +(define (substring-ci>= s1 start1 end1 s2 start2 end2) + (substring-ci<= s2 start2 end2 s1 start1 end1)) + + + +;;; Case hacking +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-upper-case? +;;; string-lower-case? +;;; +;;; string-upcase s [start end] +;;; string-upcase! s [start end] +;;; string-downcase s [start end] +;;; string-downcase! s [start end] +;;; +;;; capitalize-string s [start end] +;;; capitalize-string! s [start end] +;;; Uppercase first alphanum char, lowercase rest. +;;; +;;; capitalize-words s [start end] +;;; capitalize-words! s [start end] +;;; Capitalize every contiguous alphanum sequence: uppercase +;;; first char, lowercase rest. + +;;; These two use a different definition of an "upper-/lower-case string" +;;; than MIT Scheme uses: + +(define (string-upper-case? s . maybe-start+end) + (not (apply string-any char-lower-case? s maybe-start+end))) + +(define (string-lower-case? s . maybe-start+end) + (not (apply string-any char-upper-case? s maybe-start+end))) + + +(define (string-upcase s . maybe-start+end) + (apply string-map char-upcase s maybe-start+end)) + +(define (string-upcase! s . maybe-start+end) + (apply string-map! char-upcase s maybe-start+end)) + +(define (string-downcase s . maybe-start+end) + (apply string-map char-downcase s maybe-start+end)) + +(define (string-downcase! s . maybe-start+end) + (apply string-map! char-downcase s maybe-start+end)) + + +;;; capitalize-string s [start end] +;;; capitalize-string! s [start end] +;;; Uppercase first alphanum char, lowercase rest. + +(define (really-capitalize-string! s start end) + (cond ((string-index s char-set:alphanumeric start end) => + (lambda (i) + (string-set! s i (char-upcase (string-ref s i))) + (string-downcase! s i))))) + +(define (capitalize-string! s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (really-capitalize-string! s start end))) + +(define (capitalize-string s . maybe-start+end) + (let-start+end (start end) capitalize-string s maybe-start+end + (let ((ans (substringx s start end))) + (really-capitalize-string! ans 0 (- end start)) + ans))) + +;;; capitalize-words s [start end] +;;; capitalize-words! s [start end] +;;; Capitalize every contiguous alphanum sequence: uppercase +;;; first char, lowercase rest. + +(define (really-capitalize-words! s start end) + (let lp ((i start)) + (cond ((string-index s char-set:alphanumeric i end) => + (lambda (i) + (string-set! s i (char-upcase (string-ref s i))) + (let ((i1 (+ i 1))) + (cond ((string-skip s char-set:alphanumeric i1 end) => + (lambda (j) + (string-downcase! s i1 j) + (lp (+ j 1)))) + (else (string-downcase! s i1 end))))))))) + +(define (capitalize-words! s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (really-capitalize-words! s start end))) + +(define (capitalize-words s . maybe-start+end) + (let-start+end (start end) capitalize-string! s maybe-start+end + (let ((ans (substringx s start end))) + (really-capitalize-words! ans 0 (- end start)) + ans))) + + + +;;; Cutting & pasting strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-take string nchars +;;; string-drop string nchars +;;; +;;; string-pad string k [char start end] +;;; string-pad-right string k [char start end] +;;; +;;; string-trim string [char/char-set/pred start end] +;;; string-trim-right string [char/char-set/pred start end] +;;; string-trim-both string [char/char-set/pred start end] +;;; +;;; These trimmers invert the char-set meaning from MIT Scheme -- you +;;; say what you want to trim. + +(define (string-take s n) + (if (> n 0) + (substringx s 0 n) + (let ((len (string-length s))) + (substringx s (+ len n) len)))) + +(define (string-drop s n) + (let ((len (string-length s))) + (if (> n 0) + (substringx s n len) + (substringx s 0 (+ len n))))) + +(define (string-trim s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip s criteria start end) => + (lambda (i) (substringx s i end))) + (else "")))) + +(define (string-trim-right s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip-right s criteria end start) => + (lambda (i) (substringx s 0 (+ 1 i)))) + (else "")))) + +(define (string-trim-both s . args) + (let-optionals args ((criteria char-set:whitespace) + (start 0) + (end (string-length s))) + (cond ((string-skip s criteria start end) => + (lambda (i) (substringx s i (+ 1 (string-skip-right s criteria end))))) + (else "")))) + + +(define (string-pad-right s n . args) + (let-optionals args ((char #\space) (start 0) (end (string-length s))) + (check-substring-spec string-pad-right s start end) + (let ((len (- end start))) + (cond ((= n len) ; No pad. + (if (zero? start) s (substringx s start end))) + + ((< n len) (substringx s start (+ start n))) ; Trim. + + (else (let ((ans (make-string n char))) + (string-copy! ans 0 s start end) + ans)))))) + +(define (string-pad s n . args) + (let-optionals args ((char #\space) (start 0) (end (string-length s))) + (check-substring-spec string-pad s start end) + (let ((len (- end start))) + (cond ((= n len) ; No pad. + (if (zero? start) s (substringx s start end))) + + ((< n len) (substringx s (- end n) end)) ; Trim. + + (else (let ((ans (make-string n char))) + (string-copy! ans (- n len) s start end) + ans)))))) + + + +;;; Filtering strings +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-delete char/char-set/pred string [start end] +;;; string-filter char/char-set/pred string [start end] +;;; +;;; If the filter criteria is a char or char-set, we scan the string twice +;;; with string-fold -- once to determine the length of the result string, +;;; and once to do the filtered copy. +;;; If the filter criteria is a predicate, we don't do this double-scan +;;; strategy, because the predicate might have side-effects or be very +;;; expensive to compute. So we preallocate a temp buffer pessimistically, +;;; and only do one scan over S. This is likely to be faster and more +;;; space-efficient than consing a list. + +(define (string-delete criteria s . maybe-start+end) + (let-start+end (start end) string-delete s maybe-start+end + (if (procedure? criteria) + (let* ((slen (- end start)) + (temp (make-string slen)) + (ans-len (string-fold (lambda (c i) + (if (criteria c) i + (begin (string-set! temp i c) + (+ i 1)))) + 0 s start end))) + (if (= ans-len slen) temp (substringx temp 0 ans-len))) + + (let* ((cset (cond ((char-set? criteria) criteria) + ((char? criteria) (char-set criteria)) + (else (error "string-delete criteria not predicate, char or char-set" criteria)))) + (len (string-fold (lambda (c i) (if (char-set-contains? cset c) + i + (+ i 1))) + 0 s start end)) + (ans (make-string len))) + (string-fold (lambda (c i) (if (char-set-contains? cset c) + i + (begin (string-set! ans i c) + (+ i 1)))) + 0 s start end) + ans)))) + +(define (string-filter criteria s . maybe-start+end) + (let-start+end (start end) string-filter s maybe-start+end + (if (procedure? criteria) + (let* ((slen (- end start)) + (temp (make-string slen)) + (ans-len (string-fold (lambda (c i) + (if (criteria c) + (begin (string-set! temp i c) + (+ i 1)) + i)) + 0 s start end))) + (if (= ans-len slen) temp (substringx temp 0 ans-len))) + + (let* ((cset (cond ((char-set? criteria) criteria) + ((char? criteria) (char-set criteria)) + (else (error "string-delete criteria not predicate, char or char-set" criteria)))) + + (len (string-fold (lambda (c i) (if (char-set-contains? cset c) + (+ i 1) + i)) + 0 s start end)) + (ans (make-string len))) + (string-fold (lambda (c i) (if (char-set-contains? cset c) + (begin (string-set! ans i c) + (+ i 1)) + i)) + 0 s start end) + ans)))) + + + +;;; String search +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-index string char/char-set/pred [start end] +;;; string-index-right string char/char-set/pred [end start] +;;; string-skip string char/char-set/pred [start end] +;;; string-skip-right string char/char-set/pred [end start] +;;; Note the odd start/end ordering of index-right and skip-right params. +;;; There's a lot of replicated code here for efficiency. +;;; For example, the char/char-set/pred discrimination has +;;; been lifted above the inner loop of each proc. + +(define (string-index str criteria . maybe-start+end) + (let-start+end (start end) string-index str maybe-start+end + (cond ((char? criteria) + (let lp ((i start)) + (and (< i end) + (if (char=? criteria (string-ref str i)) i + (lp (+ i 1)))))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (char-set-contains? criteria (string-ref str i)) i + (lp (+ i 1)))))) + ((procedure? criteria) + (let lp ((i start)) + (and (< i end) + (if (criteria (string-ref str i)) i + (lp (+ i 1)))))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-index criteria))))) + +(define (string-index-right str criteria . maybe-end+start) + (let-optionals maybe-end+start ((start 0) (end (string-length str))) + (check-substring-spec string-index-right str start end) + (cond ((char? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char=? criteria (string-ref str i)) i + (lp (- i 1)))))) + ((char-set? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char-set-contains? criteria (string-ref str i)) i + (lp (- i 1)))))) + ((procedure? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (criteria (string-ref str i)) i + (lp (- i 1)))))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-index-right criteria))))) + +(define (string-skip str criteria . maybe-start+end) + (let-start+end (start end) string-skip str maybe-start+end + (cond ((char? criteria) + (let lp ((i start)) + (and (< i end) + (if (char=? criteria (string-ref str i)) + (lp (+ i 1)) + i)))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (char-set-contains? criteria (string-ref str i)) + (lp (+ i 1)) + i)))) + ((char-set? criteria) + (let lp ((i start)) + (and (< i end) + (if (criteria (string-ref str i)) (lp (+ i 1)) + i)))) + (else (error "Second param is neither char-set, char, or predicate procedure." + string-skip criteria))))) + +(define (string-skip-right str criteria . maybe-end+start) + (let-optionals maybe-end+start ((start 0) (end (string-length str))) + (check-substring-spec string-index-right str start end) + (cond ((char? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char=? criteria (string-ref str i)) + (lp (- i 1)) + i)))) + ((char-set? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (char-set-contains? criteria (string-ref str i)) + (lp (- i 1)) + i)))) + ((procedure? criteria) + (let lp ((i (- end 1))) + (and (>= i 0) + (if (criteria (string-ref str i)) (lp (- i 1)) + i)))) + (else (error "CRITERIA param is neither char-set or char." + string-skip-right criteria))))) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-fill! string char [start end] +;;; +;;; string-copy! to tstart from [fstart fend] +;;; Guaranteed to work, even if s1 eq s2. + +(define (string-fill! s char . maybe-start+end) + (let-start+end (start end) string-fill! s maybe-start+end + (do ((i (- end 1) (- i 1))) + ((< i start)) + (string-set! s i char)))) + +(define (string-copy! to tstart from . maybe-fstart+fend) + (let-start+end (fstart fend) string-copy! from maybe-fstart+fend + (let ((tend (+ tstart (- fend fstart)))) + (check-substring-spec string-copy! to tstart tend) + (if (> fstart tstart) + (do ((i fstart (+ i 1)) + (j tstart (+ j 1))) + ((>= i fend)) + (string-set! to j (string-ref from i))) + + (do ((i (- fend 1) (- i 1)) + (j (- tend 1) (- j 1))) + ((< i fstart)) + (string-set! to j (string-ref from i))))))) + + + +;;; Returns starting-position or #f if not true. +;;; This implementation is slow & simple. See below for KMP. +;;; Boyer-Moore would be nice. +;(define (substring? substring string . maybe-start+end) +; (let-start+end (start end) string substring? maybe-start+end +; (if (string-null? substring) start +; (let* ((len (string-length substring)) +; (i-bound (- end len)) +; (char1 (string-ref substring start))) +; (let lp ((i 0)) +; (cond ((string-index string char1 i i-bound) => +; (lambda (i) +; (if (substring= substring 0 len string i (+ i len)) +; i +; (lp (+ i 1))))) +; (else #f))))))) + + +;;; Searching for an occurence of a substring +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This uses the KMP algorithm +;;; "Fast Pattern Matching in Strings" +;;; SIAM J. Computing 6(2):323-350 1977 +;;; D. E. Knuth, J. H. Morris and V. R. Pratt +;;; also described in +;;; "Pattern Matching in Strings" +;;; Alfred V. Aho +;;; Formal Language Theory - Perspectives and Open Problems +;;; Ronald V. Brook (editor) +;;; This algorithm is O(m + n) where m and n are the +;;; lengths of the pattern and string respectively +;;; Original version of this code by bevan; I have substantially rehacked it. + +(define (substring? pattern source . maybe-start+end) + (let-start+end (start end) substring? source maybe-start+end + (really-substring? char=? pattern source start end))) + +(define (substring-ci? pattern source . maybe-start+end) + (let-start+end (start end) substring-ci? source maybe-start+end + (really-substring? char-ci=? pattern source start end))) + +;;; Compute the Knuth-Morris-Pratt restart vector RV for string PATTERN. If +;;; we have matched chars 0..i-1 of PATTERN against a search string S, and +;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to +;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to +;;; S[k+1] and PATTERN[0]. +;;; +;;; In other words, if you have matched the first i chars of PATTERN, but +;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest +;;; prefix of PATTERN is that you have matched. +;;; +;;; C= is the character comparator -- usefully CHAR= or CHAR-CI=. +;;; +;;; I've split this out as a separate function in case other constant-string +;;; searchers might want to use it. + +(define (make-kmp-restart-vector pattern c=) + (let* ((plen (string-length pattern)) + (rv (make-vector plen))) + (if (> plen 0) + (let ((plen-1 (- plen 1))) + (vector-set! rv 0 -1) + (let lp ((i 0) (j -1)) + (if (< i plen-1) + (if (or (= j -1) + (c= (string-ref pattern i) + (string-ref pattern j))) + (let ((i (+ 1 i)) + (j (+ 1 j))) + (vector-set! rv i j) + (lp i j)) + (lp i (vector-ref rv j))))))) + rv)) + +(define (really-substring? c= pattern source start end) + (let ((plen (string-length pattern)) + (rv (make-kmp-restart-vector pattern c=))) + + ;; The search loop. SJ & PJ are redundant state. + (let lp ((si start) (pi 0) + (sj (- end start)) ; (- end si) -- how many chars left. + (pj plen)) ; (- plen pi) -- how many chars left. + + (if (= pi plen) (- si plen) ; Win. + + (and (<= pj sj) ; Lose. + + (if (c= (string-ref source si) ; Search. + (string-ref pattern pi)) + (lp (+ 1 si) (+ 1 pi) (- sj 1) (- pj 1)) ; Advance. + + (let ((pi (vector-ref rv pi))) ; Retreat. + (if (= pi -1) + (lp (+ si 1) 0 (- sj 1) plen) ; Punt. + (lp si pi sj (- plen pi)))))))))) + + + +;;; Misc +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; (string-reverse s [start end]) +;;; (string-reverse! s [start end]) +;;; (string-null? s) + +(define (string-null? s) (zero? (string-length s))) + +(define (string-reverse s . maybe-start+end) + (let-start+end (start end) string-reverse s maybe-start+end + (let ((ans (make-string (- end start)))) + (do ((i (- end 1) (- i 1)) + (j start (+ j 1))) + ((< i j)) + (string-set! ans i (string-ref s j)) + (string-set! ans j (string-ref s i))) + ans))) + +(define (string-reverse! s . maybe-start+end) + (let-start+end (start end) string-reverse! s maybe-start+end + (do ((i (- end 1) (- i 1)) + (j start (+ j 1))) + ((<= i j)) + (let ((ci (string-ref s i))) + (string-set! s i (string-ref s j)) + (string-set! s j ci))))) + + +(define (reverse-list->string clist) + (let* ((len (length clist)) + (s (make-string len))) + (do ((i (- len 1) (- i 1)) (clist clist (cdr clist))) + ((not (pair? clist))) + (string-set! s i (car clist))) + s)) + + +;(define (string->list s . maybe-start+end) +; (let-start+end (start end) string->list s maybe-start+end +; (do ((i (- end 1) (- i 1)) +; (ans '() (cons (string-ref s i) ans))) +; ((< i start) ans)))) + +(define (string->list s . maybe-start+end) + (apply string-fold-right s cons '() maybe-start+end)) + + + +;;; string-concat string-list -> string +;;; string-concat/shared string-list -> string +;;; string-append/shared s ... -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; STRING-APPEND/SHARED has license to return a string that shares storage +;;; with any of its arguments. In particular, if there is only one non-empty +;;; string amongst its parameters, it is permitted to return that string as +;;; its result. STRING-APPEND, by contrast, always allocates new storage. +;;; +;;; STRING-CONCAT & STRING-CONCAT/SHARED are passed a list of strings, +;;; which they concatenate into a result string. STRING-CONCAT always +;;; allocates a fresh string; STRING-CONCAT/SHARED may (or may not) return +;;; a result that shares storage with any of its arguments. In particular, +;;; if it is applied to a singleton list, it is permitted to return the +;;; car of that list as its value. +;;; +;;; This is portable code, but could be much more efficient w/compiler +;;; support. Especially the n-ary guys. + +;;; We delete the empty strings from the parameter list before handing +;;; off to string-concat/shared. I wrote the recursion out by hand instead +;;; of using list-lib's FILTER or FILTER! to minimize non-R5RS dependencies. + +(define (string-append/shared . strings) (string-concat/shared strings)) + +(define (string-concat/shared strings) + (let ((strings (let recur ((strings strings)) ; Delete empty strings. + (if (pair? strings) + (let ((s (car strings)) + (tail (recur (cdr strings)))) + (if (string-null? s) tail (cons s tail))) + '())))) + + (cond ((not (pair? strings)) "") ; () => "". + ((not (pair? (cdr strings))) (car strings)) ; (s) => s. + (else (string-concat strings))))) ; Allocate & concat. + +; Alas, Scheme 48's APPLY blows up if you have many, many arguments. +;(define (string-concat strings) (apply string-append strings)) + +;;; Here it is written out. I avoid using REDUCE to add up string lengths +;;; to avoid non-R5RS dependencies. +(define (string-concat strings) + (let* ((total (do ((strings strings (cdr strings)) + (i 0 (+ i (string-length (car strings))))) + ((not (pair? strings)) i))) + (ans (make-string total))) + (let lp ((i 0) (strings strings)) + (if (pair? strings) + (let ((s (car strings))) + (string-copy! ans i s) + (lp (+ i (string-length s)) (cdr strings))))) + ans)) + + + + +;;; xsubstring s from [to start end] -> string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; S is a string; START and END are optional arguments that demarcate +;;; a substring of S, defaulting to 0 and the length of S (e.g., the whole +;;; string). Replicate this substring up and down index space, in both the +;; positive and negative directions. For example, if S = "abcdefg", START=3, +;;; and END=6, then we have the conceptual bidirectionally-infinite string +;;; ... d e f d e f d e f d e f d e f d e f d e f ... +;;; ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ... +;;; XSUBSTRING returns the substring of this string beginning at index FROM, +;;; and ending at TO (which defaults to FROM+(END-START)). +;;; +;;; You can use XSUBSTRING in many ways: +;;; - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab" +;;; - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd" +;;; - To replicate a string: (xsubstring "abc" 0 7) => "abcabca" +;;; +;;; Note that +;;; - The FROM/TO indices give a half-open range -- the characters from +;;; index FROM up to, but not including index TO. +;;; - The FROM/TO indices are not in terms of the index space for string S. +;;; They are in terms of the replicated index space of the substring +;;; defined by S, START, and END. +;;; +;;; It is an error if START=END -- although this is allowed by special +;;; dispensation when FROM=TO. + +(define (xsubstring s from . maybe-to+start+end) + (receive (to start end) + (if (pair? maybe-to+start+end) + (let-start+end (start end) xsubstring s (cdr maybe-to+start+end) + (values (car maybe-to+start+end) start end)) + (let ((slen (string-length s))) + (values (+ from slen) 0 slen))) + (let ((slen (- end start)) + (anslen (- to from))) + (cond ((< anslen 0) + (error "Illegal FROM/TO spec passed to xsubstring -- FROM > TO." + s from to start end)) + + ((zero? anslen) "") + ((zero? slen) (error "Empty (sub)string passed to xsubstring" + s from to start end)) + + ((= 1 slen) ; Fast path for 1-char replication. + (make-string anslen (string-ref s start))) + + ;; Selected text falls entirely within one span. + ((= (floor (/ from slen)) (floor (/ to slen))) + (substringx s (+ start (modulo from slen)) + (+ start (modulo to slen)))) + + ;; Selected text requires multiple spans. + (else (let ((ans (make-string anslen))) + (multispan-repcopy! ans 0 s from to start end) + ans)))))) + + +;;; string-xcopy! target tstart s sfrom [sto start end] -> unspecific +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Exactly the same as xsubstring, but the extracted text is written +;;; into the string TARGET starting at index TSTART. +;;; This operation is not defined if (EQ? TARGET S) -- you cannot copy +;;; a string on top of itself. + +(define (string-xcopy! target tstart s sfrom . maybe-sto+start+end) + (receive (sto start end) + (if (pair? maybe-sto+start+end) + (let-start+end (start end) string-xcopy! s (cdr maybe-sto+start+end) + (values (car maybe-sto+start+end) start end)) + (let ((slen (string-length s))) + (values (+ sfrom slen) 0 slen))) + + (let* ((tocopy (- sto sfrom)) + (tend (+ tstart tocopy)) + (slen (- end start))) + (check-substring-spec string-xcopy! target tstart tend) + (cond ((< tocopy 0) + (error "Illegal FROM/TO spec passed to string-xcopy! -- FROM > TO." + target tstart s sfrom sto start end)) + ((zero? tocopy)) + ((zero? slen) (error "Empty (sub)string passed to string-xcopy!" + target tstart s sfrom sto start end)) + + ((= 1 slen) ; Fast path for 1-char replication. + (string-fill! target (string-ref s start) tstart tend)) + + ;; Selected text falls entirely within one span. + ((= (floor (/ sfrom slen)) (floor (/ sto slen))) + (string-copy! target tstart s + (+ start (modulo sfrom slen)) + (+ start (modulo sto slen)))) + + ;; Multi-span copy. + (else (multispan-repcopy! target tstart s sfrom sto start end)))))) + +;;; This is the core copying loop for XSUBSTRING and STRING-XCOPY! +;;; Internal -- not exported, no careful arg checking. +(define (multispan-repcopy! target tstart s sfrom sto start end) + (let* ((slen (- end start)) + (i0 (+ start (modulo sfrom slen))) + (total-chars (- sto sfrom))) + + ;; Copy the partial span @ the beginning + (string-copy! target tstart s i0 end) + + (let* ((ncopied (- end i0)) ; We've copied this many. + (nleft (- total-chars ncopied)) ; # chars left to copy. + (nspans (quotient nleft slen))) ; # whole spans to copy + + ;; Copy the whole spans in the middle. + (do ((i (+ tstart ncopied) (+ i slen)) ; Current target index. + (nspans nspans (- nspans 1))) ; # spans to copy + ((zero? nspans) + ;; Copy the partial-span @ the end & we're done. + (string-copy! target i s start (+ start (- total-chars (- i tstart))))) + + (string-copy! target i s start end))))) ; Copy a whole span. + + + +;;; (join-strings string-list [delimiter grammar]) => string +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Paste strings together using the delimiter string. +;;; +;;; (join-strings '("foo" "bar" "baz") ":") => "foo:bar:baz" +;;; +;;; DELIMITER defaults to a single space " " +;;; GRAMMAR is one of the symbols {infix, suffix} and defaults to 'infix. + +;;; (join-strings strings [delim grammar]) + +(define (join-strings strings . args) + (if (pair? strings) + (let-optionals args ((delim " ") (grammar 'infix)) + (let ((strings (reverse strings))) + (let lp ((strings (cdr strings)) + (ans (case grammar + ((infix) (list (car strings))) + ((suffix) (list (car strings) delim)) + (else (error "Illegal join-strings grammar" grammar))))) + (if (pair? strings) + (lp (cdr strings) + (cons (car strings) (cons delim ans))) + + ; All done + (string-concat ans))))) + + "")) ; Special-cased for infix grammar. + + + +;;; MIT Scheme copyright terms +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; This material was developed by the Scheme project at the Massachusetts +;;; Institute of Technology, Department of Electrical Engineering and +;;; Computer Science. Permission to copy and modify this software, to +;;; redistribute either the original software or a modified version, and +;;; to use this software for any purpose is granted, subject to the +;;; following restrictions and understandings. +;;; +;;; 1. Any copy made of this software must include this copyright notice +;;; in full. +;;; +;;; 2. Users of this software agree to make their best efforts (a) to +;;; return to the MIT Scheme project any improvements or extensions that +;;; they make, so that these may be included in future releases; and (b) +;;; to inform MIT of noteworthy uses of this software. +;;; +;;; 3. All materials developed as a consequence of the use of this +;;; software shall duly acknowledge such use, in accordance with the usual +;;; standards of acknowledging credit in academic research. +;;; +;;; 4. MIT has made no warrantee or representation that the operation of +;;; this software will be error-free, and MIT is under no obligation to +;;; provide any services, by way of maintenance, update, or otherwise. +;;; +;;; 5. In conjunction with products arising from the use of this material, +;;; there shall be no use of the name of the Massachusetts Institute of +;;; Technology nor of any adaptation thereof in any advertising, +;;; promotional, or sales literature without prior written consent from +;;; MIT in each case. diff --git a/scsh/lib/string-pack.scm b/scsh/lib/string-pack.scm new file mode 100644 index 0000000..b59beeb --- /dev/null +++ b/scsh/lib/string-pack.scm @@ -0,0 +1,315 @@ +;;; string-lib +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; string-map string-map! +;;; string-fold string-fold-right +;;; string-unfold string-tabulate +;;; string-for-each string-iter +;;; string-every string-any +;;; string-compare string-compare-ci +;;; substring-compare substring-compare-ci +;;; string= string< string> string<= string>= string<> +;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<> +;;; substring= substring<> substring-ci= substring-ci<> +;;; substring< substring> substring-ci< substring-ci> +;;; substring<= substring>= substring-ci<= substring-ci>= +;;; string-upper-case? string-lower-case? +;;; capitalize-string capitalize-words string-downcase string-upcase +;;; capitalize-string! capitalize-words! string-downcase! string-upcase! +;;; string-take string-drop +;;; string-pad string-pad-right +;;; string-trim string-trim-right string-trim-both +;;; string-filter string-delete +;;; string-index string-index-right string-skip string-skip-right +;;; string-prefix-count string-prefix-count-ci +;;; string-suffix-count string-suffix-count-ci +;;; substring-prefix-count substring-prefix-count-ci +;;; substring-suffix-count substring-suffix-count-ci +;;; string-prefix? string-prefix-ci? +;;; string-suffix? string-suffix-ci? +;;; substring-prefix? substring-prefix-ci? +;;; substring-suffix? substring-suffix-ci? +;;; substring? substring-ci? +;;; string-fill! string-copy! string-copy substring +;;; string-reverse string-reverse! reverse-list->string +;;; string->list +;;; string-concat string-concat/shared string-append/shared +;;; xsubstring string-xcopy! +;;; string-null? +;;; join-strings +;;; +;;; string? make-string string string-length string-ref string-set! +;;; string-append list->string + +(define-interface string-lib-interface + (export + ;; string-map proc s [start end] -> s + (string-map (proc ((proc (:char) :char) + :string + &opt :exact-integer :exact-integer) + :string)) + + ;; string-map! proc s [start end] -> unspecific + (string-map! (proc ((proc (:char) :values) + :string + &opt :exact-integer :exact-integer) + :unspecific)) + + ;; string-fold kons knil s [start end] -> value + ;; string-fold-right kons knil s [start end] -> value + ((string-fold string-fold-right) + (proc ((proc (:char :value) :value) + :value :string + &opt :exact-integer :exact-integer) + :value)) + + ;; string-unfold p f g seed -> string + (string-unfold (proc ((proc (:value) :boolean) + (proc (:value) :char) + (proc (:value) :value) + :value) + :string)) + +; Enough is enough. +; ;; string-unfoldn p f g seed ... -> string +; (string-unfoldn (proc ((procedure :values :boolean) +; (procedure :values :char) +; (procedure :values :values) +; &rest :value) +; :string)) + + ;; string-tabulate proc len -> string + (string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer) + :string)) + + ;; string-for-each proc s [start end] -> unspecific + ;; string-iter proc s [start end] -> unspecific + ((string-for-each string-iter) + (proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer) + :unspecific)) + + ;; string-every pred s [start end] + ;; string-any pred s [start end] + (string-every + (proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer) + :boolean)) + (string-any + (proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer) + :value)) + + ;; string-compare string1 string2 lt-proc eq-proc gt-proc + ;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc + ((string-compare string-compare-ci) + (proc (:string :string (proc (:exact-integer) :values) + (proc (:exact-integer) :values) + (proc (:exact-integer) :values)) + :values)) + + ;; substring-compare string1 start1 end1 string2 start2 end2 lt eq gt + ;; substring-compare-ci string1 start1 end1 string2 start2 end2 lt eq gt + ((substring-compare substring-compare-ci) + (proc (:string :exact-integer :exact-integer + :string :exact-integer :exact-integer + (proc (:exact-integer) :values) + (proc (:exact-integer) :values) + (proc (:exact-integer) :values)) + :values)) + + ;; string< string1 string2 + ((string= string< string> string<= string>= string<> + string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>) + (proc (&rest :string) :value)) + + ;; substring< string1 start1 end1 string2 start2 end2 + ((substring= substring<> substring-ci= substring-ci<> + substring< substring> substring-ci< substring-ci> + substring<= substring>= substring-ci<= substring-ci>=) + (proc (:string :exact-integer :exact-integer + :string :exact-integer :exact-integer) + :value)) + + ;; string-upper-case? string [start end] + ;; string-lower-case? string [start end] + ((string-upper-case? string-lower-case?) + (proc (:string &opt :exact-integer :exact-integer) :boolean)) + + ;; capitalize-string string [start end] + ;; capitalize-words string [start end] + ;; string-downcase string [start end] + ;; string-upcase string [start end] + ;; capitalize-string! string [start end] + ;; capitalize-words! string [start end] + ;; string-downcase! string [start end] + ;; string-upcase! string [start end] + ((capitalize-string capitalize-words string-downcase string-upcase) + (proc (:string &opt :exact-integer :exact-integer) :string)) + ((capitalize-string! capitalize-words! string-downcase! string-upcase!) + (proc (:string &opt :exact-integer :exact-integer) :unspecific)) + + ;; string-take string nchars + ;; string-drop string nchars + ((string-take string-drop) (proc (:string :exact-integer) :string)) + + ;; string-pad string k [char start end] + ;; string-pad-right string k [char start end] + ((string-pad string-pad-right) + (proc (:string :exact-integer &opt :char :exact-integer :exact-integer) + :string)) + + ;; string-trim string [char/char-set/pred start end] + ;; string-trim-right string [char/char-set/pred start end] + ;; string-trim-both string [char/char-set/pred start end] + ((string-trim string-trim-right string-trim-both) + (proc (:string &opt :value :exact-integer :exact-integer) + :string)) + + ;; string-filter char/char-set/pred string [start end] + ;; string-delete char/char-set/pred string [start end] + ((string-filter string-delete) + (proc (:value :string &opt :exact-integer :exact-integer) :string)) + + ;; string-index string char/char-set/pred [start end] + ;; string-index-right string char/char-set/pred [end start] + ;; string-skip string char/char-set/pred [start end] + ;; string-skip-right string char/char-set/pred [end start] + ((string-index string-index-right string-skip string-skip-right) + (proc (:string :value &opt :exact-integer :exact-integer) + :value)) + + ;; string-prefix-count string1 string2 + ;; string-suffix-count string1 string2 + ;; string-prefix-count-ci string1 string2 + ;; string-suffix-count-ci string1 string2 + ((string-prefix-count string-prefix-count-ci + string-suffix-count string-suffix-count-ci) + (proc (:string :string) :exact-integer)) + + ;; substring-prefix-count string1 start1 end1 string2 start2 end2 + ;; substring-suffix-count string1 start1 end1 string2 start2 end2 + ;; substring-prefix-count-ci string1 start1 end1 string2 start2 end2 + ;; substring-suffix-count-ci string1 start1 end1 string2 start2 end2 + ((substring-prefix-count substring-prefix-count-ci + substring-suffix-count substring-suffix-count-ci) + (proc (:string :exact-integer :exact-integer + :string :exact-integer :exact-integer) + :exact-integer)) + + + ;; string-prefix? string1 string2 + ;; string-suffix? string1 string2 + ;; string-prefix-ci? string1 string2 + ;; string-suffix-ci? string1 string2 + ((string-prefix? string-prefix-ci? + string-suffix? string-suffix-ci?) + (proc (:string :string) :boolean)) + + ;; substring-prefix? string1 start1 end1 string2 start2 end2 + ;; substring-suffix? string1 start1 end1 string2 start2 end2 + ;; substring-prefix-ci? string1 start1 end1 string2 start2 end2 + ;; substring-suffix-ci? string1 start1 end1 string2 start2 end2 + ((substring-prefix? substring-prefix-ci? + substring-suffix? substring-suffix-ci?) + (proc (:string :exact-integer :exact-integer + :string :exact-integer :exact-integer) + :boolean)) + + ;; substring? pattern string [start end] + ;; substring-ci? pattern string [start end] + ((substring? substring-ci?) + (proc (:string :string &opt :exact-integer :exact-integer) + :value)) + + ;; string-fill! string char [start end] + (string-fill! (proc (:string :char &opt :exact-integer :exact-integer) + :unspecific)) + + ;; string-copy! to tstart from [fstart fend] + (string-copy! (proc (:string :exact-integer :string + &opt :exact-integer :exact-integer) + :unspecific)) + + ;; string-copy s [start end] -> string + ;; substring s start [end] -> string + (string-copy (proc (:string &opt :exact-integer :exact-integer) :string)) + (substring (proc (:string :exact-integer &opt :exact-integer) :string)) + + ;; string-reverse s [start end] + ;; string-reverse! s [start end] + (string-reverse (proc (:string &opt :exact-integer :exact-integer) :string)) + (string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific)) + + ;; reverse-list->string char-list + ;; string->list s [start end] + ;; string-concat string-list + ;; string-concat/shared string-list + ;; string-append/shared s ... + (reverse-list->string (proc (:value) :string)) + (string->list (proc (:string &opt :exact-integer :exact-integer) :value)) + ((string-concat string-concat/shared) (proc (:value) :string)) + (string-append/shared (proc (&rest :string) :string)) + + ;; xsubstring s from [to start end] + ;; string-xcopy! target tstart s from [to start end] + (xsubstring (proc (:string :exact-integer &opt + :exact-integer :exact-integer :exact-integer) + :string)) + (string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt + :exact-integer :exact-integer :exact-integer) + :unspecific)) + + ;; string-null? s + (string-null? (proc (:string) :boolean)) + + (join-strings (proc (:value &opt :string :symbol) :string)) + + ;; Here are the R4RS procs + (string? (proc (:value) :boolean)) + (make-string (proc (:exact-integer &opt :char) :string)) + (string (proc (&rest :char) :string)) + (string-length (proc (:string) :exact-integer)) + (string-ref (proc (:string :exact-integer) :char)) + (string-set! (proc (:string :exact-integer :char) :unspecific)) + + ; Not provided by string-lib. + ;((string=? string-ci=? string? string-ci>? string<=? string-ci<=? + ; string>=? string-ci>=?) (proc (:string :string) :boolean)) + + ;; These are the R4RS types for SUBSTRING, STRING-COPY, STRING-FILL!, + ;; and STRING->LIST. The string-lib types are different -- extended. + ;(substring (proc (:string :exact-integer :exact-integer) :string)) + ;(string-copy (proc (:string) :string)) + ;(string-fill! (proc (:string :char) :unspecific)) + ;(string->list (proc (:string) :value)) + + (string-append (proc (&rest :string) :string)) + (list->string (proc (:value) :string)) + )) + + +;;; make-kmp-restart-vector +;;; parse-final-start+end +;;; parse-start+end +;;; check-substring-spec + +(define-interface string-lib-internals-interface + (export + (parse-final-start+end (proc ((procedure :values :values) :string :value) + (some-values :exact-integer :exact-integer))) + (parse-start+end (proc ((procedure :values :values) :string :value) + (some-values :exact-integer :exact-integer :value))) + (check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer) + :unspecific)) + (make-kmp-restart-vector (proc (:string (proc (:char :char) :boolean)) + :vector)))) + + +(define-structures ((string-lib string-lib-interface) + (string-lib-internals string-lib-internals-interface)) + (access scheme) ; Get at R5RS SUBSTRING + (open receiving ; RECEIVE + char-set-package; Various + error-package ; ERROR + let-opt ; LET-OPTIONALS :OPTIONAL + structure-refs ; STRUCTURE-REF + scheme) + (files string-lib)) diff --git a/scsh/lib/strings.txt b/scsh/lib/strings.txt new file mode 100644 index 0000000..fa2d32d --- /dev/null +++ b/scsh/lib/strings.txt @@ -0,0 +1,578 @@ +Todo: + parse-start+end parse-final-start+end need "string" in the name + Also, export macro binder. + What's up w/quotient? (quotient -1 3) = 0. + regexp-foldl + type regexp interface + land* + Let-optional: + A let-optional that parses a prefix of the args. + Arg checking forms that get used if it parses, but are not + applied to the default. + +The Scheme Underground string library includes a rich set of operations +for manipulating strings. These are frequently useful for scripting and +other text-manipulation applications. + +The library's design was influenced by the string libraries found in MIT +Scheme, Gambit, RScheme, MzScheme, slib, Common Lisp, Bigloo, guile, APL and +the SML standard basis. Some of the code bears a distant family relation to +the MIT Scheme implementation, and being derived from that code, is covered by +the MIT Scheme copyright (which is a fairly generic "free" copyright -- see +the source file for details). The fast KMP string-search code used in +SUBSTRING? was loosely adapted from old slib code by Stephen Bevan. + +The library has the following design principles: +- *All* procedures involving character comparison are available in + both case-sensitive and case-insensitive forms. + +- *All* functionality is available in substring and full-string forms. + +- The procedures are spec'd so as to permit efficient implementation in a + Scheme that provided shared-text substrings (e.g., guile). This means that + you should not rely on many of the substring-selecting procedures to return + freshly-allocated strings. Careful attention is paid to the issue of which + procedures allocate fresh storage, and which are permitted to return results + that share storage with the arguments. + +- Common Lisp theft: + + inequality functions return mismatch index. + I generalised this so that this "protocol" is extended even to + the equality functions. This means that clients can be handed any generic + string-comparison function and rely on the meaning of the true value. + + + Common Lisp capitalisation definition + +The library addresses some problems with the R5RS string procedures: + - Question marks after string-comparison functions (string=?, etc.) + This is inconsistent with numeric comparison functions, and ugly, too. + - String-comparison functions do not provide useful true value. + - STRING-COPY should have optional start/end args; + SUBSTRING shouldn't specify if it copies or returns shared bits. + - STRING-FILL! and STRING->LIST should take optional start/end args. + - No <> function provided. + +In the following procedure specifications: + - Any S parameter is a string; + + - START and END parameters are half-open string indices specifying + a substring within a string parameter; when optional, they default + to 0 and the length of the string, respectively. When specified, it + must be the case that 0 <= START <= END <= (string-length S), for + the corresponding parameter S. They typically restrict a procedure's + action to the indicated substring. + + - A CHAR/CHAR-SET/PRED parameter is a value used to select/search + for a character in a string. If it is a character, it is used in + an equality test; if it is a character set, it is used as a + membership test; if it is a procedure, it is applied to the + characters as a test predicate. + +This library contains a large number of procedures, but they follow +a consistent naming scheme. The names are composed of smaller lexemes +in a regular way that exposes the structure and relationships between the +procedures. This should help the programmer to recall or reconstitute the name +of the particular procedure that he needs when writing his own code. In +particular + - Procedures whose names end in "-ci" are case-insensitive variants. + - Procedures whose names end in "!" are side-effecting variants. + These procedures generally return an unspecified value. + - The order of common parameters is fairly consistent across the + different procedures. + +For more text-manipulation functionality, see also the regular expression, +file-name, character set, and character->character partial map packages. + +------------------------------------------------------------------------------- +* R4RS/R5RS procedures + +The R4RS and R5RS reports define 22 string procedures. The string-lib +package includes 8 of these exactly as defined, 4 in an extended, +backwards-compatible way, and drops the remaining 10 (whose functionality +is available via other bindings). + +The 8 procedures provided exactly as documented in the reports are + string? + make-string + string + string-length + string-ref + string-set! + string-append + list->string + +The ten functions not included are the R4RS string-comparison functions: + string=? string-ci=? + string? string-ci>? + string<=? string-ci<=? + string>=? string-ci>=? +The string-lib package provides alternate bindings. + +Additionally, the four extended procedures are + + string-fill! s char [start end] -> unspecific + string->list s [start end] -> char-list + substring s start [end] -> string + string-copy s [start end] -> string + +These procedures are documented in the following section. In brief, they are +extended to take optional start/end parameters specifying substring ranges; +Additionally, SUBSTRING is allowed to return a value that shares storage with +its argument. + + +* Procedures + +These procedures are contained in the Scheme 48 package "string-lib", +which is open in the default user package. They are not found in the +"scsh" package; script writers and other programmers that use the Scheme +48 module system must open string-lib explicitly. + +string-map proc s [start end] -> string +string-map! proc s [start end] -> unspecified + PROC is a char->char procedure; it is mapped over S. + Note: no sequence order is specified. + +string-fold kons knil s [start end] -> value +string-fold-right kons knil s [start end] -> value + These are the fundamental iterators for strings. + The left-fold operator maps the KONS procedure across the + string from left to right + (... (kons s[2] (kons s[1] (kons s[0] knil)))) + In other words, string-fold obeys the recursion + (string-fold kons knil s start end) = + (string-fold kons (kons s[start] knil) start+1 end) + + The right-fold operator maps the KONS procedure across the + string from right to left + (kons s[0] (... (kons s[end-3] (kons s[end-2] (kons s[end-1] knil))))) + obeying the recursion + (string-fold-right kons knil s start end) = + (string-fold-right kons (kons s[end-1] knil) start end-1) + + Examples: + To convert a string to a list of chars: + (string-fold-right cons '() s) + + To count the number of lower-case characters in a string: + (string-fold (lambda (c count) + (if (char-set-contains? char-set:lower c) + (+ count 1) + count)) + 0 + s) + +string-unfold p f g seed -> string + This is the fundamental constructor for strings. + - G is used to generate a series of "seed" values from the initial seed: + SEED, (G SEED), (G^2 SEED), (G^3 SEED), ... + - P tells us when to stop -- when it returns true when applied to one + of these seed values. + - F maps each seed value to the corresponding character + in the result string. + + More precisely, the following (simple, inefficient) definition holds: + (define (string-unfold p f g seed) + (if (p seed) "" + (string-append (string (f seed)) + (string-unfold p f g (g seed))))) + + STRING-UNFOLD is a fairly powerful constructor -- you can use it to + reverse a string, copy a string, convert a list to a string, read + a port into a string, and so forth. Examples: + (port->string p) = (string-unfold eof-object? values + (lambda (x) (read-char p)) + (read-char p)) + + (list->string lis) = (string-unfold null? car cdr lis) + + (tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0) + + To map F over a list LIS, producing a string: + (string-unfold null? (compose f car) cdr lis) + +string-tabulate proc len -> string + PROC is an integer->char procedure. Construct a string of size LEN + by applying PROC to each index to produce the corresponding string + element. The order in which PROC is applied to the indices is not + specified. + +string-for-each proc s [start end] -> unspecified +string-iter proc s [start end] -> unspecified + Apply PROC to each character in S. + STRING-FOR-EACH has no specified iteration order. + STRING-ITER is required to iterate from START to END + in increasing order. + +string-every? pred s [start end] -> boolean +string-any? pred s [start end] -> value + Note: no sequence order specified. + Checks to see if predicate PRED is true of every / any character in S. + STRING-ANY? is witness-generating -- it applies PRED to the elements + of S, returning the first true value it finds, otherwise false. + +string-compare s1 s2 lt-proc eq-proc gt-proc -> values +string-compare-ci s1 s2 lt-proc eq-proc gt-proc -> values + Apply LT-PROC, EQ-PROC, GT-PROC to the mismatch index, depending + upon whether S1 is less than, equal to, or greater than S2. + The "mismatch index" is the largest index i such that for + every 0 <= j < i, s1[j] = s2[j] -- that is, I is the first + position that doesn't match. If S1 = S2, the mismatch index + is simply the length of the strings; we observe the protocol + in this redundant case for uniformity. + +substring-compare s1 start1 end1 s2 start2 end2 lt-proc eq-proc gt-proc -> values +substring-compare-ci s1 start1 end1 s2 start2 end2 lt-proc eq-proc gt-proc -> values + The continuation procedures are applied to S1's mismatch index (as defined + above). In the case of EQ-PROC, this is always END1. + +string= s1 s2 -> #f or integer +string<> s1 s2 -> #f or integer +string< s1 s2 -> #f or integer +string> s1 s2 -> #f or integer +string<= s1 s2 -> #f or integer +string>= s1 s2 -> #f or integer + If the comparison operation is true, the function returns the + mismatch index (as defined for the previous comparator functions). + +string-ci= s1 s2 -> #f or integer +string-ci<> s1 s2 -> #f or integer +string-ci< s1 s2 -> #f or integer +string-ci> s1 s2 -> #f or integer +string-ci<= s1 s2 -> #f or integer +string-ci>= s1 s2 -> #f or integer + Case-insensitive variants. + +substring= s1 start1 end1 s2 start2 end2 -> #f or integer +substring<> s1 start1 end1 s2 start2 end2 -> #f or integer +substring< s1 start1 end1 s2 start2 end2 -> #f or integer +substring> s1 start1 end1 s2 start2 end2 -> #f or integer +substring<= s1 start1 end1 s2 start2 end2 -> #f or integer +substring>= s1 start1 end1 s2 start2 end2 -> #f or integer + +substring-ci= s1 start1 end1 s2 start2 end2 -> #f or integer +substring-ci<> s1 start1 end1 s2 start2 end2 -> #f or integer +substring-ci< s1 start1 end1 s2 start2 end2 -> #f or integer +substring-ci> s1 start1 end1 s2 start2 end2 -> #f or integer +substring-ci<= s1 start1 end1 s2 start2 end2 -> #f or integer +substring-ci>= s1 start1 end1 s2 start2 end2 -> #f or integer + These variants restrict the comparison to the indicated + substrings of S1 and S2. + +string-upper-case? s [start end] -> boolean +string-lower-case? s [start end] -> boolean + STRING-UPPER-CASE? returns true iff the string contains + no lower-case characters. STRING-LOWER-CASE returns true + iff the string contains no upper-case characters. + (string-upper-case? "") => #t + (string-lower-case? "") => #t + (string-upper-case? "FOOb") => #f + (string-upper-case? "U.S.A.") => #t + +capitalize-string s [start end] -> string +capitalize-string! s [start end] -> unspecified + Capitalize the string: upcase the first alphanumeric character, + and downcase the rest of the string. CAPITALIZE-STRING returns + a freshly allocated string. + + (capitalize-string "--capitalize tHIS sentence.") => + "--Capitalize this sentence." + + (capitalize-string "see Spot run. see Nix run.") => + "See spot run. see nix run." + + (capitalize-string "3com makes routers.") => + "3com makes routers." + +capitalize-words s [start end] -> string +capitalize-words! s [start end] -> unspecified + A "word" is a maximal contiguous sequence of alphanumeric characters. + Upcase the first character of every word; downcase the rest of the word. + CAPITALIZE-WORDS returns a freshly allocated string. + + (capitalize-words "HELLO, 3THErE, my nAME IS olin") => + "Hello, 3there, My Name Is Olin" + + More sophisticated capitalisation procedures can be synthesized + using CAPITALIZE-STRING and pattern matchers. In this context, + the REGEXP-SUBSTITUTE/GLOBAL procedure may be useful for picking + out the units to be capitalised and applying CAPITALIZE-STRING to + their components. + +string-upcase s [start end] -> string +string-upcase! s [start end] -> unspecified +string-downcase s [start end] -> string +string-downcase! s [start end] -> unspecified + Raise or lower the case of the alphabetic characters in the string. + STRING-UPCASE and STRING-DOWNCASE return freshly allocated strings. + +string-take s nchars -> string +string-drop s nchars -> string +string-take-right s nchars -> string +string-drop-right s nchars -> string + STRING-TAKE returns the first NCHARS of STRING; + STRING-DROP returns all but the first NCHARS of STRING. + STRING-TAKE-RIGHT returns the last NCHARS of STRING; + STRING-DROP-RIGHT returns all but the last NCHARS of STRING. + These generalise MIT Scheme's HEAD & TAIL functions. + If these procedures produce the entire string, they may return either + S or a copy of S; in some implementations, proper substrings may share + memory with S. + +string-pad s k [char start end] -> string +string-pad-right s k [char start end] -> string + Build a string of length K comprised of S padded on the left (right) + by as many occurences of the character CHAR as needed. If S has more + than K chars, it is truncated on the left (right) to length k. CHAR + defaults to #\space. + + If K is exactly the length of S, these functions may return + either S or a copy of S. + +string-trim s [char/char-set/pred start end] -> string +string-trim-right s [char/char-set/pred start end] -> string +string-trim-both s [char/char-set/pred start end] -> string + Trim S by skipping over all characters on the left / on the right / + on both sides that satisfy the second parameter CHAR/CHAR-SET/PRED: + - If it is a character CHAR, characters equal to CHAR are trimmed. + - If it is a char set CHAR-SET, characters contained in CHAR-SET + are trimmed. + - If it is a predicate PRED, it is a test predicate that is applied + to the characters in S; a character causing it to return true + is skipped. + CHAR/CHAR/SET-PRED defaults to CHAR-SET:WHITESPACE. + + If no trimming occurs, these functions may return either S or a copy of S; + in some implementations, proper substrings may share memory with S. + + (string-trim-both " The outlook wasn't brilliant, \n\r") + => "The outlook wasn't brilliant," + +string-filter s char/char-set/pred [start end] -> string +string-delete s char/char-set/pred [start end] -> string + Filter the string S, retaining only those characters that + satisfy / do not satisfy the CHAR/CHAR-SET/PRED argument. If + this argument is a procedure, it is applied to the character + as a predicate; if it is a char-set, the character is tested + for membership; if it is a character, it is used in an equality test. + + If the string is unaltered by the filtering operation, these + functions may return either S or a copy of S. + +string-index s char/char-set/pred [start end] -> integer or #f +string-index-right s char/char-set/pred [end start] -> integer or #f +string-skip s char/char-set/pred [start end] -> integer or #f +string-skip-right s char/char-set/pred [end start] -> integer or #f + Note the inverted start/end ordering of index-right and skip-right's + parameters. + + Index (index-right) searches through the string from the left (right), + returning the index of the first occurence of a character which + - equals CHAR/CHAR-SET/PRED (if it is a character); + - is in CHAR/CHAR-SET/PRED (if it is a char-set); + - satisfies the predicate CHAR/CHAR-SET/PRED (if it is a procedure). + If no match is found, the functions return false. + + The skip functions are similar, but use the complement of the criteria: + they search for the first char that *doesn't* satisfy the test. E.g., + to skip over initial whitespace, say + (cond ((string-skip s char-set:whitespace) => + (lambda (i) + ;; (string-ref s i) is not whitespace. + ...))) + +string-prefix-count s1 s2 -> integer +string-suffix-count s1 s2 -> integer +string-prefix-count-ci s1 s2 -> integer +string-suffix-count-ci s1 s2 -> integer + Return the length of the longest common prefix/suffix of the two strings. + This is equivalent to the "mismatch index" for the strings. + +substring-prefix-count s1 start1 end1 s2 start2 end2 -> integer +substring-suffix-count s1 start1 end1 s2 start2 end2 -> integer +substring-prefix-count-ci s1 start1 end1 s2 start2 end2 -> integer +substring-suffix-count-ci s1 start1 end1 s2 start2 end2 -> integer + Substring variants. + +string-prefix? s1 s2 -> boolean +string-suffix? s1 s2 -> boolean +string-prefix-ci? s1 s2 -> boolean +string-suffix-ci? s1 s2 -> boolean + Is S1 a prefix/suffix of S2? + +substring-prefix? s1 start1 end1 s2 start2 end2 -> boolean +substring-suffix? s1 start1 end1 s2 start2 end2 -> boolean +substring-prefix-ci? s1 start1 end1 s2 start2 end2 -> boolean +substring-suffix-ci? s1 start1 end1 s2 start2 end2 -> boolean + Substring variants. + +substring? s1 s2 [start end] -> integer or false +substring-ci? s1 s2 [start end] -> integer or false + Return the index in S2 where S1 occurs as a substring, or false. + The returned index is in the range [start,end). + The current implementation uses the Knuth-Morris-Pratt algorithm. + +string-fill! s char [start end] -> unspecified + Store CHAR into the elements of S. + This is the R4RS procedure extended to have optional START/END parameters. + +string-copy! target tstart s [start end] -> unspecified + Copy the sequence of characters from index range [START,END) in + string S to string TARGET, beginning at index TSTART. The characters + are copied left-to-right or right-to-left as needed -- the copy is + guaranteed to work, even if TARGET and S are the same string. + +substring s start [end] -> string +string-copy s [start end] -> string + These R4RS procedures are extended to have optional START/END parameters. + Use STRING-COPY when you want to indicate explicitly in your code that you + wish to allocate new storage; use SUBSTRING when you don't care if you + get a fresh copy or share storage with the original string. + E.g.: + (string-copy "Beta substitution") => "Beta substitution" + (string-copy "Beta substitution" 1 10) + => "eta subst" + (string-copy "Beta substitution" 5) => "substitution" + + SUBSTRING may return a value with shares memory with S. + +string-reverse s [start end] -> string +string-reverse! s [start end] -> unspecific + Reverse the string. + +reverse-list->string char-list -> string + An efficient implementation of (compose string->list reverse): + (reverse-list->string '(#\a #\B #\c)) -> "cBa" + This is a common idiom in the epilog of string-processing loops + that accumulate an answer in a reverse-order list. + +string-concat string-list -> string + Append the elements of STRING-LIST together into a single list. + Guaranteed to return a freshly allocated list. Appears sufficiently + often as to warrant being named. + +string-concat/shared string-list -> string +string-append/shared s ... -> string + These two procedures are variants of STRING-CONCAT and STRING-APPEND + that are permitted to return results that share storage with their + parameters. In particular, if STRING-APPEND/SHARED is applied to just + one argument, it may return exactly that argument, whereas STRING-APPEND + is required to allocate a fresh string. + +string->list s [start end] -> char-list + The R5RS STRING->LIST procedure is extended to take optional START/END + arguments. + +string-null? s -> bool + Is S the empty string? + +xsubstring s from [to start end] -> string + This is the "extended substring" procedure that implements replicated + copying of a substring of some string. + + S is a string; START and END are optional arguments that demarcate + a substring of S, defaulting to 0 and the length of S (e.g., the whole + string). Replicate this substring up and down index space, in both the + positive and negative directions. For example, if S = "abcdefg", START=3, + and END=6, then we have the conceptual bidirectionally-infinite string + ... d e f d e f d e f d e f d e f d e f d e f ... + ... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ... + XSUBSTRING returns the substring of this string beginning at index FROM, + and ending at TO (which defaults to FROM+(END-START)). + + You can use XSUBSTRING to perform a variety of tasks: + - To rotate a string left: (xsubstring "abcdef" 2) => "cdefab" + - To rotate a string right: (xsubstring "abcdef" -2) => "efabcd" + - To replicate a string: (xsubstring "abc" 0 7) => "abcabca" + + Note that + - The FROM/TO indices give a half-open range -- the characters from + index FROM up to, but not including, index TO. + - The FROM/TO indices are not in terms of the index space for string S. + They are in terms of the replicated index space of the substring + defined by S, START, and END. + + It is an error if START=END -- although this is allowed by special + dispensation when FROM=TO. + +string-xcopy! target tstart s sfrom [sto start end] -> unspecific + Exactly the same as XSUBSTRING, but the extracted text is written + into the string TARGET starting at index TSTART. + This operation is not defined if (EQ? TARGET S) -- you cannot copy + a string on top of itself. + + +* Lower-level procedures + +The following procedures are useful for writing other string-processing +functions, and are contained in the string-lib-internals package. + +parse-start+end proc s args -> [start end rest] +parse-final-start+end proc s args -> [start end] + PARSE-START+END may be used to parse a pair of optional START/END arguments + from an argument list, defaulting them to 0 and the length of some string + S, respectively. Let the length of string S be SLEN. + - If ARGS = (), the function returns (values 0 slen '()) + - If ARGS = (i), I is checked to ensure it is an integer, and + that 0 <= i <= slen. Returns (values i slen (cdr rest)). + - If ARGS = (i j ...), I and J are checked to ensure they are + integers, and that 0 <= i <= j <= slen. Returns (values i j (cddr rest)). + If any of the checks fail, an error condition is raised, and PROC is used + as part of the error condition -- it should be the name of the client + procedure whose argument list PARSE-START+END is parsing. + + parse-final-start+end is exactly the same, except that the args list + passed to it is required to be of length two or less; if it is longer, + an error condition is raised. It may be used when the optional START/END + parameters are final arguments to the procedure. + +check-substring-spec proc s start end -> unspecific + Check values START and END to ensure they specify a valid substring + in S. This means that START and END are exact integers, and + 0 <= START <= END <= (STRING-LENGTH S) + If this is not the case, an error condition is raised. PROC is used + as part of error condition, and should be the procedure whose START/END + parameters we are checking. + +make-kmp-restart-vector s c= -> vector + Build the Knuth-Morris-Pratt "restart vector," which is useful + for quickly searching character sequences for the occurrence of + string S. C= is a character-equality function used to construct + the restart vector; it is usefully CHAR=? or CHAR-CI=?. + + The definition of the restart vector RV for string S is: + If we have matched chars 0..i-1 of S against some search string SS, and + S[i] doesn't match SS[k], then reset i := RV[i], and try again to + match SS[k]. If RV[i] = -1, then punt SS[k] completely, and move on to + SS[k+1] and S[0]. + + In other words, if you have matched the first i chars of S, but + the i+1'th char doesn't match, RV[i] tells you what the next-longest + prefix of PATTERN is that you have matched. + + The following string-search function shows how a restart vector + is used to search. It can be easily adapted to search other character + sequences (such as ports). + + (define (find-substring pattern source start end) + (let ((plen (string-length pattern)) + (rv (make-kmp-restart-vector pattern char=?))) + + ;; The search loop. SJ & PJ are redundant state. + (let lp ((si start) (pi 0) + (sj (- end start)) ; (- end si) -- how many chars left. + (pj plen)) ; (- plen pi) -- how many chars left. + + (if (= pi plen) (- si plen) ; Win. + + (and (<= pj sj) ; Lose. + + (if (char=? (string-ref source si) ; Search. + (string-ref pattern pi)) + (lp (+ 1 si) (+ 1 pi) (- sj 1) (- pj 1)) ; Advance. + + (let ((pi (vector-ref rv pi))) ; Retreat. + (if (= pi -1) + (lp (+ si 1) 0 (- sj 1) plen) ; Punt. + (lp si pi sj (- plen pi))))))))))