added from 0.5.2

This commit is contained in:
marting 1999-09-23 15:24:25 +00:00
parent 4e935c24a5
commit 58f90e2359
9 changed files with 9670 additions and 0 deletions

106
scsh/lib/ccp-pack.scm Normal file
View File

@ -0,0 +1,106 @@
;;; CPP Lib
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Character->Character Partial functions
;;; Many of these types are pretty weak, but there is no way to
;;; specify that a parameter must be a particular record type.
;;; Every little bit helps, though.
(define-interface ccp-lib-interface
(export
;; ccp? x -> boolean
(ccp? (proc (:value) :boolean))
;; ccp-domain ccp -> char-set
(ccp-domain (proc (:value) :value)) ; Not very informative.
;; ccp-copy ccp -> ccp
(ccp-copy (proc (:value) :value))
;; ccp= ccp1 ccp2 ...
;; ccp<= ccp1 ccp2 ...
((ccp= ccp<=) (proc (&rest :value) :boolean)) ; Not very informative.
;; ccp-fold kons knil ccp -> value
(ccp-fold (proc ((proc (:char :char :value) :value) :value :value) :value))
;; ccp-for-each proc ccp
(ccp-for-each (proc ((proc (:char :char) :values)) :unspecific))
;; ccp->alist ccp -> alist
(ccp->alist (proc (:value) :value))
;; ccp-restrict ccp cset -> ccp
;; ccp-restrict! ccp cset -> ccp
((ccp-restrict ccp-restrict!) (proc (:value :value) :value))
;; ccp-adjoin ccp from-char1 to-char1 ... -> ccp
;; ccp-adjoin! ccp from-char1 to-char1 ... -> ccp
;; ccp-delete ccp from-char1 ... -> ccp
;; ccp-delete! ccp from-char1 ... -> ccp
((ccp-adjoin ccp-adjoin!) (proc (:value &rest :char) :value))
((ccp-delete ccp-delete!) (proc (:value &rest :char) :value))
;; ccp-extend ccp1 ... -> ccp
;; ccp-extend! ccp1 ... -> ccp
((ccp-extend ccp-extend!) (proc (&rest :value) :value))
;; ccp-compose ccp1 ... -> ccp
(ccp-compose (proc (&rest :value) :value))
;; alist->ccp char/char-alist [ccp] -> ccp
;; alist->ccp! char/char-alist [ccp] -> ccp
((alist->ccp alist->ccp!) (proc (:value &opt :value) :value))
;; proc->ccp proc [domain ccp] -> ccp
;; proc->ccp! proc [domain ccp] -> ccp
((proc->ccp proc->ccp!) (proc ((proc (:char) :char) &opt :value :value)
:value))
;; constant-ccp char [domain ccp] -> ccp
;; constant-ccp! char domain ccp -> ccp
((constant-ccp constant-ccp!) (proc (:char &opt :value :value) :value))
;; ccp/mappings from1 to1 ... -> ccp
;; extend-ccp/mappings ccp from1 to1 ... -> ccp
;; extend-ccp/mappings! ccp from1 to1 ... -> ccp
(ccp/mappings (proc (&rest :value) :value))
((extend-ccp/mappings extend-ccp/mappings!)
(proc (:value &rest :value) :value))
;; construct-ccp ccp elt ... -> ccp
;; construct-ccp! ccp elt ... -> ccp
((construct-ccp construct-ccp!) (proc (:value &rest :value) :value))
;; ccp-unfold p f g seed -> ccp
(ccp-unfold (proc ((proc (:value) :boolean)
(procedure :value (some-values :char :char))
(proc (:value) :value)
:value)
:value))
;; tr ccp string [start end] -> string
;; ccp-map ccp string [start end] -> string
;; ccp-map! ccp string [start end]
;; ccp-app ccp char -> char or false
((tr ccp-map)
(proc (:value :string &opt :exact-integer :exact-integer) :string))
(ccp-map! (proc (:value :string &opt :exact-integer :exact-integer) :unspecific))
(ccp-app (proc (:value :char) :value))
;; Primitive CCP's.
ccp:0 ccp:1 ccp:upcase ccp:downcase
))
(define-structure ccp-lib ccp-lib-interface
(open char-set-package
ascii
defrec-package
string-lib
let-opt
receiving
list-lib ; EVERY
error-package
scheme)
(files ccp)
(optimize auto-integrate))

576
scsh/lib/ccp.scm Normal file
View File

@ -0,0 +1,576 @@
;;; Char->char partial maps -*- Scheme -*-
;;; Copyright (C) 1998 by Olin Shivers.
;;; CCPs are an efficient data structure for doing simple string transforms,
;;; similar to the kinds of things you would do with the tr(1) program.
;;;
;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit
;;; character types would need a more sophisticated data structure, tuned
;;; for sparseness. I would suggest something like this:
;;; (define-record ccp
;;; domain ; The domain char-set
;;; map ; Sorted vector of (char . string) pairs
;;; ; specifying the map.
;;; id?) ; If true, mappings not specified by MAP are
;;; ; identity mapping. If false, MAP must
;;; ; specify a mapping for every char in DOMAIN.
;;;
;;; A (char . string) elements in MAP specifies a mapping for the contiguous
;;; sequence of L chars beginning with CHAR (in the sequence of the underlying
;;; char type representation), where L is the length of STRING. These MAP elements
;;; are sorted by CHAR, so that binary search can be used to get from an input
;;; character C to the right MAP element quickly.
;;;
;;; This representation should be reasonably compact for standard mappings on,
;;; say, a Unicode CCP. An implementation might wish to have a cache field
;;; in the record for storing the full 8kb bitset when performing ccp-map
;;; operations. Or, an implementation might want to store the Latin-1 subset
;;; of the map in a dense format, and keep the remainder in a sparse format.
(define num-chars (char-set-size char-set:full)) ; AKA 256.
(define-record ccp
domain ; The domain char-set
dshared? ; Is the domain value shared or linear?
map ; 256-elt string
mshared?) ; Is the map string shared or linear?
;;; Accessors and setters that manage the linear bookkeeping
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (ccp-domain ccp)
(set-ccp:dshared? ccp #t)
(ccp:domain ccp))
;;; CCP is a linear ccp. PROC is a domain->domain function; it must be
;;; linear in its parameter and result.
;;;
;;; Updates the domain of the CCP with PROC, returns the resulting
;;; CCP; reuses the old one to construct the new one.
(define (restrict-linear-ccp-domain ccp proc)
(let ((new-d (proc (if (ccp:dshared? ccp)
(begin (set-ccp:dshared? ccp #f)
(char-set-copy (ccp:domain ccp)))
(ccp:domain ccp)))))
(set-ccp:domain ccp new-d)
ccp))
;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function.
;;; It is passed a linear domain and cmap string. It may side-effect
;;; the cmap string, and returns the resulting updated domain.
;;; We return the resulting CCP, reusing the parameter to construct it.
(define (linear-update-ccp ccp proc)
(let* ((cmap (if (ccp:mshared? ccp)
(begin (set-ccp:mshared? ccp #f)
(string-copy (ccp:map ccp)))
(ccp:map ccp)))
(new-d (proc (if (ccp:dshared? ccp)
(begin (set-ccp:dshared? ccp #f)
(char-set-copy (ccp:domain ccp)))
(ccp:domain ccp))
cmap)))
(set-ccp:domain ccp new-d)
ccp))
;;; Return CCP's map field, and mark it as shared. CCP functions that
;;; restrict a ccp's domain share map strings, so they use this guy.
(define (ccp:map/shared ccp)
(set-ccp:mshared? ccp #t)
(ccp:map ccp))
(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f
(string-copy (ccp:map ccp)) #f))
;;; N-ary equality relation for partial maps
(define (ccp= ccp1 . rest)
(let ((domain (ccp:domain ccp1))
(cmap (ccp:map ccp1)))
(every (lambda (ccp2)
(and (char-set= domain (ccp:domain ccp2))
(let ((cmap2 (ccp:map ccp2)))
(char-set-every? (lambda (c)
(let ((i (char->ascii c)))
(char=? (string-ref cmap i)
(string-ref cmap2 i))))
domain))))
rest)))
;;; N-ary subset relation for partial maps
(define (ccp<= ccp1 . rest)
(let lp ((domain1 (ccp:domain ccp1))
(cmap1 (ccp:map ccp1))
(rest rest))
(or (not (pair? rest))
(let* ((ccp2 (car rest))
(domain2 (ccp:domain ccp2))
(cmap2 (ccp:map ccp2))
(rest (cdr rest)))
(and (char-set<= domain1 domain2)
(let ((cmap2 (ccp:map ccp2)))
(char-set-every? (lambda (c)
(let ((i (char->ascii c)))
(char=? (string-ref cmap1 i)
(string-ref cmap2 i))))
domain1))
(lp domain2 cmap2 rest))))))
;;; CCP iterators
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (ccp-fold kons knil ccp)
(let ((cmap (ccp:map ccp)))
(char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v))
knil
(ccp:domain ccp))))
(define (ccp-for-each proc ccp)
(let ((cmap (ccp:map ccp)))
(char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c))))
(ccp:domain ccp))))
(define (ccp->alist ccp)
(ccp-fold (lambda (from to alist) (cons (cons from to) alist))
'()
ccp))
;;; CCP-RESTRICT
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Restrict a ccp's domain.
(define (ccp-restrict ccp cset)
(make-ccp (char-set-intersection cset (ccp:domain ccp))
#f
(ccp:map/shared ccp)
#t))
(define (ccp-restrict! ccp cset)
(restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset))))
;;; CCP-ADJOIN ccp from-char1 to-char1 ...
;;; CCP-DELETE ccp char1 ...
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Add & delete mappings to/from a ccp.
(define (ccp-delete ccp . chars)
(make-ccp (apply char-set-delete (ccp:domain ccp) chars)
#f
(ccp:map/shared ccp)
#t))
(define (ccp-delete! ccp . chars)
(restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars))))
(define (ccp-adjoin ccp . chars)
(let ((cmap (string-copy (ccp:map ccp))))
(make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars)
#f
cmap
#f)))
(define (ccp-adjoin! ccp . chars)
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars))))
(define (install-ccp-adjoin! cmap domain chars)
(let lp ((chars chars) (d domain))
(if (pair? chars)
(let ((from (car chars))
(to (cadr chars))
(chars (cddr chars)))
(string-set! cmap (char->ascii from) to)
(lp chars (char-set-adjoin! d from)))
d)))
;;; CCP-EXTEND ccp1 ...
;;; CCP-EXTEND! ccp1 ccp2 ...
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Extend ccp1 with ccp2, etc.
(define (ccp-extend . ccps)
(if (pair? ccps)
(let ((ccp0 (car ccps))
(ccps (cdr ccps)))
(if (pair? ccps)
(let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap.
;; The FOLD installs each ccp in CCPS into CMAP and produces
;; the new domain.
(make-ccp (fold (lambda (ccp d)
(install-ccp-extension! cmap d ccp))
(char-set-copy (ccp:domain ccp0))
ccps)
#f cmap #f))
ccp0)) ; Only 1 parameter
ccp:0)) ; 0 parameters
(define (ccp-extend! ccp0 . ccps)
(linear-update-ccp ccp0
(lambda (domain cmap)
(fold (lambda (ccp d) (install-ccp-extension! cmap d ccp))
domain
ccps))))
;;; Side-effect CMAP, linear-update and return DOMAIN.
(define (install-ccp-extension! cmap domain ccp)
(let ((cmap1 (ccp:map ccp))
(domain1 (ccp:domain ccp)))
(char-set-for-each (lambda (c)
(let ((i (char->ascii c)))
(string-set! cmap i (string-ref cmap1 i))))
domain1)
(char-set-union! domain domain1)))
;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; For each character C-IN in the original domain, we push it
;;; through the pipeline of CCPs. If we ever land outside the
;;; domain of a ccp, we punt C-IN. If we push it all the way
;;; through, we add C-IN to our result domain, and add the mapping
;;; into the cmap we are assembling.
;;;
;;; Looping this way avoids building up intermediate temporary
;;; CCPs. If CCP's were small bitsets, we might be better off
;;; slicing the double-nested loops the other way around.
(define (ccp-compose . ccps)
(cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1
((not (pair? (cdr ccps))) (car ccps)) ; 1 arg
(else
(let* ((v (list->vector ccps))
(vlen-2 (- (vector-length v) 2))
(cmap (make-string num-chars))
(d1 (ccp:domain (vector-ref v (+ vlen-2 1))))
(d (char-set-fold (lambda (c-in d)
(let lp ((c c-in) (i vlen-2))
(if (>= i 0)
(let ((ccp (vector-ref v i)))
(if (char-set-contains? (ccp:domain ccp) c)
(lp (string-ref (ccp:map ccp)
(char->ascii c))
(- i 1))
;; Lose: remove c-in from d.
(char-set-delete! d c-in)))
;; Win: C-IN -> C
(begin (string-set! cmap
(char->ascii c-in)
c)
d))))
(char-set-copy d1)
d1)))
(make-ccp d #f cmap #f)))))
;;; ALIST->CPP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (alist->ccp cc-alist . maybe-base-ccp)
(let ((base (:optional maybe-base-ccp ccp:0)))
(if (pair? cc-alist)
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-ccp-alist! cmap
(char-set-copy (ccp:domain base))
cc-alist)
#f cmap #f))
base)))
(define (alist->ccp! alist base)
(linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist))))
;;; Side-effect CMAP, linear-update and return DOMAIN.
(define (install-ccp-alist! cmap domain alist)
(fold (lambda (from/to d) (let ((from (car from/to))
(to (cdr from/to)))
(string-set! cmap (char->ascii from) to)
(char-set-adjoin! domain from)))
domain
alist))
;;; PROC->CCP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (proc->ccp proc [domain base-ccp])
(define (proc->ccp proc . args)
(let-optionals args ((proc-domain char-set:full)
(base ccp:0))
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base))
proc proc-domain)
#f cmap #f))))
(define (proc->ccp! proc proc-domain base)
(linear-update-ccp base
(lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain))))
(define (install-ccp-proc! cmap domain proc proc-domain)
(char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c)))
proc-domain)
(char-set-union! domain proc-domain))
;;; CONSTANT-CCP
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (constant-ccp char [domain base-ccp])
;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR.
;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0.
(define (constant-ccp char . args)
(let-optionals args ((char-domain char-set:full) (base ccp:0))
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base))
char char-domain)
#f cmap #f))))
(define (constant-ccp! char char-domain base)
(linear-update-ccp base
(lambda (d cmap) (install-constant-ccp! cmap d char char-domain))))
;;; Install the constant mapping into CMAP0 by side-effect,
;;; linear-update & return DOMAIN0 with the constant-mapping's domain.
(define (install-constant-ccp! cmap0 domain0 char char-domain)
(char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char))
char-domain)
(char-set-union! domain0 char-domain))
;;; CCP/MAPPINGS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp
;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp
;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp
;;; Each FROM element is either a string or a (lo-char . hi-char) range.
;;; Each TO element is either a string or a lo-char. Strings are replicated
;;; to match the length of the corresponding FROM element.
;;; CCP/MAPPINGS's base CCP is CCP:0
;;;
;;; Tedious code.
;;; Internal utility.
;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect.
;;; Return the new domain.
(define (install-ccp-mapping-pair! cmap domain from to)
;; Tedium -- four possibilities here:
;; str->str, str->lo-char,
;; range->str, range->lo-char.
(if (string? from)
(if (string? to)
;; "abc" -> "ABC"
(let ((len1 (string-length from))
(len2 (string-length to)))
(let lp2 ((i (- len1 1))
(j (modulo (- len2 1) len1))
(d domain))
(if (>= i 0)
(let ((c (string-ref from i)))
(string-set! cmap
(char->ascii c)
(string-ref to i))
(lp2 (- i 1)
(- (if (> j 0) j len2) 1)
(char-set-adjoin! d c)))
d)))
;; "abc" -> #\A
(let lp2 ((i (- (string-length from) 1))
(j (char->ascii to))
(d domain))
(if (>= i 0)
(let ((c (string-ref from i)))
(string-set! cmap
(char->ascii c)
(ascii->char j))
(lp2 (- i 1)
(- j 1)
(char-set-adjoin! d c)))
d)))
(let ((from-start (char->ascii (car from)))
(from-end (char->ascii (cdr from))))
(if (string? to)
(let ((len2-1 (- (string-length to) 1)))
;; (#\a . #\c) -> "ABC"
(let lp2 ((i from-start) (j 0) (d domain))
(if (<= i from-end)
(let ((c (string-ref to j)))
(string-set! cmap i c)
(lp2 (+ i 1)
(if (= j len2-1) 0 (+ j 1))
(char-set-adjoin! d c)))
d)))
;; (#\a . #\c) -> #\A
(do ((i from-start (+ i 1))
(j (char->ascii to) (+ j 1))
(d domain (begin (string-set! cmap i (ascii->char j))
(char-set-adjoin d (ascii->char i)))))
((> i from-end) d))))))
;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN.
(define (install-mapping-pairs cmap domain args)
(let lp ((domain domain) (args args))
(if (pair? args)
(lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args))
(cddr args))
domain)))
(define (ccp/mappings . args)
(let ((cmap (make-string num-chars)))
(make-ccp (install-mapping-pairs (make-string num-chars)
(char-set-copy char-set:empty)
args)
#f cmap #f)))
(define (extend-ccp/mappings base . args)
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args)
#f cmap #f)))
(define (extend-ccp/mappings! base . args)
(linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args))))
;;; CONSTRUCT-CCP! ccp elt ...
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; The kitchen-sink constructor; static typing be damned.
;;; ELTS are interpreted as follows:
;;; (lo-char . hi-char) to-string|lo-char ; ccp/range
;;; from-string to-string|lo-char ; ccp/range
;;; ccp ; ccp-extend
;;; alist ; alist->ccp
;;; domain char ; ccp-constant
;;; domain proc ; proc->ccp
(define (construct-ccp! ccp . elts)
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts))))
(define (construct-ccp base . elts)
(let ((cmap (string-copy (ccp:map base))))
(make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts)
#f cmap #f)))
;;; Install the mappings into CMAP by side-effect,
;;; linear-update & return DOMAIN with the final domain.
(define (install-ccp-construct! cmap domain elts)
(let lp ((d domain) (elts elts))
;(format #t "d=~s elts=~s\n" d elts)
(if (not (pair? elts)) d
(let ((elt (car elts))
(elts (cdr elts)))
(cond ((pair? elt)
(cond ((pair? (car elt)) ; ELT is an alist.
(lp (install-ccp-alist! cmap d elt) elts))
((char? (car elt)) ; ELT is (lo-char . hi-char) range.
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
(cdr elts)))
(else (error "Illegal elt to construct-ccp" elt))))
((string? elt)
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
(cdr elts)))
((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts))
((char-set? elt)
(let ((elt2 (car elts))
(elts (cdr elts)))
(lp (cond ((char? elt2)
(install-constant-ccp! cmap d elt2 elt))
((procedure? elt2)
(install-ccp-proc! cmap d elt2 elt))
(else (error "Illegal elt-pair to construct-ccp"
elt elt2)))
elts)))
(else (error "Illegal elt to construct-ccp" elt)))))))
;;; CCP unfold
(define (ccp-unfold p f g seed)
(let lp ((seed seed) (ccp (ccp-copy ccp:0)))
(if (p seed) ccp
(lp (g seed)
(receive (from to) (f seed)
(lp (g seed) (ccp-adjoin! ccp from to)))))))
;;; Using CCPs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; TR ccp string [start end] -> string
;;; CCP-MAP ccp string [start end] -> string
;;; CCP-MAP! ccp string [start end] -> undefined
;;; CCP-APP ccp char -> char or false
;;; If a char in S is not in CCP's domain, it is dropped from the result.
;;; You can use this to map and delete chars from a string.
(define (tr ccp s . maybe-start+end)
(let-optionals maybe-start+end ((start 0) (end (string-length s)))
;; Count up the chars in S that are in the domain,
;; and allocate the answer string ANS:
(let* ((len (- end start))
(domain (ccp:domain ccp))
(ans-len (string-fold (lambda (c numchars)
(if (char-set-contains? domain c)
(+ numchars 1)
numchars))
0 s start end))
(ans (make-string ans-len)))
;; Apply the map, installing the resulting chars into ANS:
(string-fold (lambda (c i) (cond ((ccp-app ccp c) =>
(lambda (c)
(string-set! ans i c)
(+ i 1)))
(else i))) ; Not in domain -- drop it.
0 s start end)
ans)))
(define (ccp-map ccp s . maybe-start+end)
(apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end))
(define (ccp-map! ccp s . maybe-start+end)
(apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end))
(define (ccp-app ccp char)
(and (char-set-contains? (ccp:domain ccp) char)
(string-ref (ccp:map ccp) (char->ascii char))))
;;; Primitive CCPs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define id-cmap
(let ((m (make-string num-chars)))
(do ((i (- num-chars 1) (- i 1)))
((< i 0))
(string-set! m i (ascii->char i)))
m))
(define ccp:0 (make-ccp char-set:empty #t id-cmap #t))
(define ccp:1 (make-ccp char-set:full #t id-cmap #t))
(define ccp:upcase (proc->ccp char-upcase char-set:full))
(define ccp:downcase (proc->ccp char-downcase char-set:full))

1508
scsh/lib/list-lib.scm Normal file

File diff suppressed because it is too large Load Diff

235
scsh/lib/list-pack.scm Normal file
View File

@ -0,0 +1,235 @@
;;; This is a Scheme48 interface spec for the SRFI-1 list-lib package.
;;; It defines the LIST-LIB-INTERFACE interface and LIST-LIB structure.
;;; Bindings are typed as tightly as one can in Scheme48's type language.
;;; -Olin Shivers
;;; shivers@ai.mit.edu
;;; list-lib
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; xcons cons* make-list list-tabulate list-copy circular-list iota
;;; proper-list? circular-list? dotted-list? not-pair? null-list? list=
;;; first second third fourth fifth sixth seventh eighth ninth tenth
;;; car+cdr
;;; take drop
;;; take-right drop-right
;;; take! drop-right!
;;; last last-pair
;;; length+
;;; append! reverse! append-reverse append-reverse!
;;; zip unzip1 unzip2 unzip3 unzip4 unzip5
;;; count
;;; unfold unfold-right
;;; fold unfold pair-fold reduce
;;; fold-right unfold-right pair-fold-right reduce-right
;;; append-map append-map! map! pair-for-each filter-map map-in-order
;;; filter partition remove
;;; filter! partition! remove!
;;; find find-tail any every list-index
;;; delete delete! delete-duplicates delete-duplicates!
;;; alist-cons alist-copy
;;; alist-delete alist-delete!
;;;
;;; lset<= lset= lset-adjoin
;;; lset-union lset-union!
;;; lset-intersection lset-intersection!
;;; lset-difference lset-difference!
;;; lset-xor lset-xor!
;;; lset-diff+intersection lset-diff+intersection!
;;;
;;; map for-each member assoc (Extended R4RS procedures)
;;;
;;; cons pair? null? list length append reverse (These are the
;;; car cdr ... cdddar cddddr set-car! set-cdr! list-ref R4RS procedures
;;; memq memv assoc assq assv re-exported by
;;; list-lib unchanged.)
(define-interface list-lib-interface
(export
;; xcons <cdr> <car>
(xcons (proc (:value :value) :value))
;; cons* item ...
(cons* (proc (:value &rest :value) :value))
;; make-list len [fill]
(make-list (proc (:exact-integer &opt :value) :value))
;; list-tabulate elt-proc len
(list-tabulate (proc (:exact-integer (proc (:exact-integer) :value)) :value))
;; list-copy lis
(list-copy (proc (:value) :value))
(circular-list (proc (:value &opt :value) :pair))
; ((:iota iota:)
; (proc (:number &opt :number :number) :value))
(iota (proc (:exact-integer &opt :number :number) :value))
(proper-list? (proc (:value) :boolean))
(dotted-list? (proc (:value) :boolean))
(circular-list? (proc (:value) :boolean))
(not-pair? (proc (:value) :boolean))
(null-list? (proc (:value) :boolean))
(list= (proc ((proc (:value :value) :boolean) &rest :value) :boolean))
((first second third fourth fifth sixth seventh eighth ninth tenth)
(proc (:pair) :value))
(car+cdr (proc (:pair) (some-values :value :value)))
;; take lis i take-right lis i
;; drop lis i drop-right lis i
;; take! lis i drop-right! lis i
((take drop take-right drop-right take! drop-right!)
(proc (:value :exact-integer) :value))
(last (proc (:pair) :value))
(last-pair (proc (:pair) :pair))
(length+ (proc (:value) :value))
(append! (proc (:value &rest :value) :value))
(reverse! (proc (:value) :value))
((append-reverse append-reverse!) (proc (:value :value) :value))
(zip (proc (:value &rest :value) :value))
(unzip1 (proc (:value) :value))
(unzip2 (proc (:value) (some-values :value :value)))
(unzip3 (proc (:value) (some-values :value :value :value)))
(unzip4 (proc (:value) (some-values :value :value :value :value)))
(unzip5 (proc (:value) (some-values :value :value :value :value :value)))
(count (proc ((proc (:value) :boolean) :value) :exact-integer))
((fold fold-right)
(proc ((proc (:value :value &rest :value) :value)
:value :value &rest :value)
:value))
((unfold unfold-right) (proc ((proc (:value) :boolean)
(proc (:value) :value)
(proc (:value) :value)
:value
&opt (proc (:value) :value))
:value))
((pair-fold pair-fold-right)
(proc ((proc (:pair :value &rest :value) :value)
:value :value &rest :value)
:value))
((reduce reduce-right)
(proc ((proc (:value :value) :value) :value :value) :value))
((append-map append-map! map! filter-map map-in-order)
(proc ((proc (:value &rest :value) :value) :value &rest :value) :value))
(pair-for-each (proc ((proc (:pair &rest :pair) :values) :value &rest :value)
:unspecific))
((filter filter! remove remove!)
(proc ((proc (:value) :boolean) :value) :value))
((partition partition!) (proc ((proc (:value) :boolean) :value)
(some-values :value :value)))
((find find-tail) (proc ((proc (:value) :boolean) :value) :value))
((any every)
(proc ((proc (:value &rest :value) :value) :value &rest :value) :value))
(list-index (proc ((proc (:value &rest :value) :value) :value &rest :value)
:value))
((delete delete!)
(proc (:value :value &opt (proc (:value :value) :boolean)) :value))
;; Extended from their R5RS definitions to take an optional comparison
;; function: (MEMBER x lis [=]).
(member (proc (:value :value &opt (proc (:value :value) :boolean)) :value))
(assoc (proc (:value :value &opt (proc (:value :value) :boolean)) :value))
((delete-duplicates delete-duplicates!)
(proc (:value &opt (proc (:value :value) :boolean)) :value))
(alist-cons (proc (:value :value :value) :value))
(alist-copy (proc (:value) :value))
((alist-delete alist-delete!)
(proc (:value :value &opt (proc (:value :value) :value)) :value))
;; Extended from their R4RS definitions.
(map (proc ((proc (:value) :value) &rest :value) :value))
(for-each (proc ((proc (:value) :values) &rest :value) :unspecific))
;; R4RS exports
(cons (proc (:value :value) :pair))
((pair? null?) (proc (:value) :boolean))
(list-ref (proc (:value :exact-integer) :value))
(list (proc (&rest :value) :value))
(length (proc (:value) :exact-integer))
(append (proc (&rest :value) :value))
(reverse (proc (:value) :value))
((car cdr
caaaar caaadr caadar caaddr caaar caadr caar
cadaar cadadr caddar cadddr cadar caddr cadr
cdaaar cdaadr cdadar cdaddr cdaar cdadr cdar
cddaar cddadr cdddar cddddr cddar cdddr cddr) (proc (:value) :value))
((set-car! set-cdr!) (proc (:pair :value) :unspecific))
((memq memv) (proc (:value :value) :value))
((assq assv) (proc (:value :value) :value))
;; lset-lib
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; lset<= = list1 list2 ...
;; lset= = list1 list2 ...
;; lset-adjoin = list elt1 ...
;; lset-union = list1 ...
;; lset-intersection = list1 list2 ...
;; lset-difference = list1 list2 ...
;; lset-xor = list1 ...
;; lset-diff+intersection = list1 list2 ...
;; ... and their side effecting counterparts:
;; lset-union! lset-intersection! lset-difference! lset-xor!
;; lset-diff+intersection!
;; lset= = list1 ... -> boolean
;; lset<= = list1 ... -> boolean
((lset= lset<=)
(proc ((proc (:value :value) :boolean) &rest :value) :boolean))
;; lset-adjoin = list elt1 ...
(lset-adjoin (proc ((proc (:value :value) :boolean) :value &rest :value) :value))
;; lset-union = list1 ... lset-xor = list1 ...
;; lset-union! = list1 ... lset-xor! = list1 ...
((lset-union lset-xor)
(proc ((proc (:value :value) :boolean) &rest :value) :value))
;; lset-intersection = list1 list2 ...
;; lset-intersection! = list1 list2 ...
;; lset-difference = list1 list2 ...
;; lset-difference! = list1 list2 ...
((lset-intersection lset-difference
lset-intersection! lset-difference!)
(proc ((proc (:value :value) :boolean) :value &rest :value) :value))
;; lset-diff+intersection = list1 list2 ...
;; lset-diff+intersection! = list1 list2 ...
((lset-diff+intersection lset-diff+intersection!)
(proc ((proc (:value :value) :boolean) :value &rest :value)
(some-values :value :value)))
))
(define-structure list-lib list-lib-interface
(open error-package ; ERROR procedure
receiving ; RECEIVE m-v macro
let-opt ; LET-OPTIONALS and :OPTIONAL.
scheme)
(begin (define (check-arg pred val caller)
(let lp ((val val))
(if (pred val) val (lp (error "Bad argument" val pred caller))))))
(files list-lib))

3056
scsh/lib/srfi-1.html Normal file

File diff suppressed because it is too large Load Diff

1912
scsh/lib/srfi-1.txt Normal file

File diff suppressed because it is too large Load Diff

1384
scsh/lib/string-lib.scm Normal file

File diff suppressed because it is too large Load Diff

315
scsh/lib/string-pack.scm Normal file
View File

@ -0,0 +1,315 @@
;;; string-lib
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; string-map string-map!
;;; string-fold string-fold-right
;;; string-unfold string-tabulate
;;; string-for-each string-iter
;;; string-every string-any
;;; string-compare string-compare-ci
;;; substring-compare substring-compare-ci
;;; string= string< string> string<= string>= string<>
;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>
;;; substring= substring<> substring-ci= substring-ci<>
;;; substring< substring> substring-ci< substring-ci>
;;; substring<= substring>= substring-ci<= substring-ci>=
;;; string-upper-case? string-lower-case?
;;; capitalize-string capitalize-words string-downcase string-upcase
;;; capitalize-string! capitalize-words! string-downcase! string-upcase!
;;; string-take string-drop
;;; string-pad string-pad-right
;;; string-trim string-trim-right string-trim-both
;;; string-filter string-delete
;;; string-index string-index-right string-skip string-skip-right
;;; string-prefix-count string-prefix-count-ci
;;; string-suffix-count string-suffix-count-ci
;;; substring-prefix-count substring-prefix-count-ci
;;; substring-suffix-count substring-suffix-count-ci
;;; string-prefix? string-prefix-ci?
;;; string-suffix? string-suffix-ci?
;;; substring-prefix? substring-prefix-ci?
;;; substring-suffix? substring-suffix-ci?
;;; substring? substring-ci?
;;; string-fill! string-copy! string-copy substring
;;; string-reverse string-reverse! reverse-list->string
;;; string->list
;;; string-concat string-concat/shared string-append/shared
;;; xsubstring string-xcopy!
;;; string-null?
;;; join-strings
;;;
;;; string? make-string string string-length string-ref string-set!
;;; string-append list->string
(define-interface string-lib-interface
(export
;; string-map proc s [start end] -> s
(string-map (proc ((proc (:char) :char)
:string
&opt :exact-integer :exact-integer)
:string))
;; string-map! proc s [start end] -> unspecific
(string-map! (proc ((proc (:char) :values)
:string
&opt :exact-integer :exact-integer)
:unspecific))
;; string-fold kons knil s [start end] -> value
;; string-fold-right kons knil s [start end] -> value
((string-fold string-fold-right)
(proc ((proc (:char :value) :value)
:value :string
&opt :exact-integer :exact-integer)
:value))
;; string-unfold p f g seed -> string
(string-unfold (proc ((proc (:value) :boolean)
(proc (:value) :char)
(proc (:value) :value)
:value)
:string))
; Enough is enough.
; ;; string-unfoldn p f g seed ... -> string
; (string-unfoldn (proc ((procedure :values :boolean)
; (procedure :values :char)
; (procedure :values :values)
; &rest :value)
; :string))
;; string-tabulate proc len -> string
(string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer)
:string))
;; string-for-each proc s [start end] -> unspecific
;; string-iter proc s [start end] -> unspecific
((string-for-each string-iter)
(proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer)
:unspecific))
;; string-every pred s [start end]
;; string-any pred s [start end]
(string-every
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
:boolean))
(string-any
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
:value))
;; string-compare string1 string2 lt-proc eq-proc gt-proc
;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc
((string-compare string-compare-ci)
(proc (:string :string (proc (:exact-integer) :values)
(proc (:exact-integer) :values)
(proc (:exact-integer) :values))
:values))
;; substring-compare string1 start1 end1 string2 start2 end2 lt eq gt
;; substring-compare-ci string1 start1 end1 string2 start2 end2 lt eq gt
((substring-compare substring-compare-ci)
(proc (:string :exact-integer :exact-integer
:string :exact-integer :exact-integer
(proc (:exact-integer) :values)
(proc (:exact-integer) :values)
(proc (:exact-integer) :values))
:values))
;; string< string1 string2
((string= string< string> string<= string>= string<>
string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>)
(proc (&rest :string) :value))
;; substring< string1 start1 end1 string2 start2 end2
((substring= substring<> substring-ci= substring-ci<>
substring< substring> substring-ci< substring-ci>
substring<= substring>= substring-ci<= substring-ci>=)
(proc (:string :exact-integer :exact-integer
:string :exact-integer :exact-integer)
:value))
;; string-upper-case? string [start end]
;; string-lower-case? string [start end]
((string-upper-case? string-lower-case?)
(proc (:string &opt :exact-integer :exact-integer) :boolean))
;; capitalize-string string [start end]
;; capitalize-words string [start end]
;; string-downcase string [start end]
;; string-upcase string [start end]
;; capitalize-string! string [start end]
;; capitalize-words! string [start end]
;; string-downcase! string [start end]
;; string-upcase! string [start end]
((capitalize-string capitalize-words string-downcase string-upcase)
(proc (:string &opt :exact-integer :exact-integer) :string))
((capitalize-string! capitalize-words! string-downcase! string-upcase!)
(proc (:string &opt :exact-integer :exact-integer) :unspecific))
;; string-take string nchars
;; string-drop string nchars
((string-take string-drop) (proc (:string :exact-integer) :string))
;; string-pad string k [char start end]
;; string-pad-right string k [char start end]
((string-pad string-pad-right)
(proc (:string :exact-integer &opt :char :exact-integer :exact-integer)
:string))
;; string-trim string [char/char-set/pred start end]
;; string-trim-right string [char/char-set/pred start end]
;; string-trim-both string [char/char-set/pred start end]
((string-trim string-trim-right string-trim-both)
(proc (:string &opt :value :exact-integer :exact-integer)
:string))
;; string-filter char/char-set/pred string [start end]
;; string-delete char/char-set/pred string [start end]
((string-filter string-delete)
(proc (:value :string &opt :exact-integer :exact-integer) :string))
;; string-index string char/char-set/pred [start end]
;; string-index-right string char/char-set/pred [end start]
;; string-skip string char/char-set/pred [start end]
;; string-skip-right string char/char-set/pred [end start]
((string-index string-index-right string-skip string-skip-right)
(proc (:string :value &opt :exact-integer :exact-integer)
:value))
;; string-prefix-count string1 string2
;; string-suffix-count string1 string2
;; string-prefix-count-ci string1 string2
;; string-suffix-count-ci string1 string2
((string-prefix-count string-prefix-count-ci
string-suffix-count string-suffix-count-ci)
(proc (:string :string) :exact-integer))
;; substring-prefix-count string1 start1 end1 string2 start2 end2
;; substring-suffix-count string1 start1 end1 string2 start2 end2
;; substring-prefix-count-ci string1 start1 end1 string2 start2 end2
;; substring-suffix-count-ci string1 start1 end1 string2 start2 end2
((substring-prefix-count substring-prefix-count-ci
substring-suffix-count substring-suffix-count-ci)
(proc (:string :exact-integer :exact-integer
:string :exact-integer :exact-integer)
:exact-integer))
;; string-prefix? string1 string2
;; string-suffix? string1 string2
;; string-prefix-ci? string1 string2
;; string-suffix-ci? string1 string2
((string-prefix? string-prefix-ci?
string-suffix? string-suffix-ci?)
(proc (:string :string) :boolean))
;; substring-prefix? string1 start1 end1 string2 start2 end2
;; substring-suffix? string1 start1 end1 string2 start2 end2
;; substring-prefix-ci? string1 start1 end1 string2 start2 end2
;; substring-suffix-ci? string1 start1 end1 string2 start2 end2
((substring-prefix? substring-prefix-ci?
substring-suffix? substring-suffix-ci?)
(proc (:string :exact-integer :exact-integer
:string :exact-integer :exact-integer)
:boolean))
;; substring? pattern string [start end]
;; substring-ci? pattern string [start end]
((substring? substring-ci?)
(proc (:string :string &opt :exact-integer :exact-integer)
:value))
;; string-fill! string char [start end]
(string-fill! (proc (:string :char &opt :exact-integer :exact-integer)
:unspecific))
;; string-copy! to tstart from [fstart fend]
(string-copy! (proc (:string :exact-integer :string
&opt :exact-integer :exact-integer)
:unspecific))
;; string-copy s [start end] -> string
;; substring s start [end] -> string
(string-copy (proc (:string &opt :exact-integer :exact-integer) :string))
(substring (proc (:string :exact-integer &opt :exact-integer) :string))
;; string-reverse s [start end]
;; string-reverse! s [start end]
(string-reverse (proc (:string &opt :exact-integer :exact-integer) :string))
(string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific))
;; reverse-list->string char-list
;; string->list s [start end]
;; string-concat string-list
;; string-concat/shared string-list
;; string-append/shared s ...
(reverse-list->string (proc (:value) :string))
(string->list (proc (:string &opt :exact-integer :exact-integer) :value))
((string-concat string-concat/shared) (proc (:value) :string))
(string-append/shared (proc (&rest :string) :string))
;; xsubstring s from [to start end]
;; string-xcopy! target tstart s from [to start end]
(xsubstring (proc (:string :exact-integer &opt
:exact-integer :exact-integer :exact-integer)
:string))
(string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt
:exact-integer :exact-integer :exact-integer)
:unspecific))
;; string-null? s
(string-null? (proc (:string) :boolean))
(join-strings (proc (:value &opt :string :symbol) :string))
;; Here are the R4RS procs
(string? (proc (:value) :boolean))
(make-string (proc (:exact-integer &opt :char) :string))
(string (proc (&rest :char) :string))
(string-length (proc (:string) :exact-integer))
(string-ref (proc (:string :exact-integer) :char))
(string-set! (proc (:string :exact-integer :char) :unspecific))
; Not provided by string-lib.
;((string=? string-ci=? string<? string-ci<?
; string>? string-ci>? string<=? string-ci<=?
; string>=? string-ci>=?) (proc (:string :string) :boolean))
;; These are the R4RS types for SUBSTRING, STRING-COPY, STRING-FILL!,
;; and STRING->LIST. The string-lib types are different -- extended.
;(substring (proc (:string :exact-integer :exact-integer) :string))
;(string-copy (proc (:string) :string))
;(string-fill! (proc (:string :char) :unspecific))
;(string->list (proc (:string) :value))
(string-append (proc (&rest :string) :string))
(list->string (proc (:value) :string))
))
;;; make-kmp-restart-vector
;;; parse-final-start+end
;;; parse-start+end
;;; check-substring-spec
(define-interface string-lib-internals-interface
(export
(parse-final-start+end (proc ((procedure :values :values) :string :value)
(some-values :exact-integer :exact-integer)))
(parse-start+end (proc ((procedure :values :values) :string :value)
(some-values :exact-integer :exact-integer :value)))
(check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer)
:unspecific))
(make-kmp-restart-vector (proc (:string (proc (:char :char) :boolean))
:vector))))
(define-structures ((string-lib string-lib-interface)
(string-lib-internals string-lib-internals-interface))
(access scheme) ; Get at R5RS SUBSTRING
(open receiving ; RECEIVE
char-set-package; Various
error-package ; ERROR
let-opt ; LET-OPTIONALS :OPTIONAL
structure-refs ; STRUCTURE-REF
scheme)
(files string-lib))

578
scsh/lib/strings.txt Normal file
View File

@ -0,0 +1,578 @@
Todo:
parse-start+end parse-final-start+end need "string" in the name
Also, export macro binder.
What's up w/quotient? (quotient -1 3) = 0.
regexp-foldl
type regexp interface
land*
Let-optional:
A let-optional that parses a prefix of the args.
Arg checking forms that get used if it parses, but are not
applied to the default.
The Scheme Underground string library includes a rich set of operations
for manipulating strings. These are frequently useful for scripting and
other text-manipulation applications.
The library's design was influenced by the string libraries found in MIT
Scheme, Gambit, RScheme, MzScheme, slib, Common Lisp, Bigloo, guile, APL and
the SML standard basis. Some of the code bears a distant family relation to
the MIT Scheme implementation, and being derived from that code, is covered by
the MIT Scheme copyright (which is a fairly generic "free" copyright -- see
the source file for details). The fast KMP string-search code used in
SUBSTRING? was loosely adapted from old slib code by Stephen Bevan.
The library has the following design principles:
- *All* procedures involving character comparison are available in
both case-sensitive and case-insensitive forms.
- *All* functionality is available in substring and full-string forms.
- The procedures are spec'd so as to permit efficient implementation in a
Scheme that provided shared-text substrings (e.g., guile). This means that
you should not rely on many of the substring-selecting procedures to return
freshly-allocated strings. Careful attention is paid to the issue of which
procedures allocate fresh storage, and which are permitted to return results
that share storage with the arguments.
- Common Lisp theft:
+ inequality functions return mismatch index.
I generalised this so that this "protocol" is extended even to
the equality functions. This means that clients can be handed any generic
string-comparison function and rely on the meaning of the true value.
+ Common Lisp capitalisation definition
The library addresses some problems with the R5RS string procedures:
- Question marks after string-comparison functions (string=?, etc.)
This is inconsistent with numeric comparison functions, and ugly, too.
- String-comparison functions do not provide useful true value.
- STRING-COPY should have optional start/end args;
SUBSTRING shouldn't specify if it copies or returns shared bits.
- STRING-FILL! and STRING->LIST should take optional start/end args.
- No <> function provided.
In the following procedure specifications:
- Any S parameter is a string;
- START and END parameters are half-open string indices specifying
a substring within a string parameter; when optional, they default
to 0 and the length of the string, respectively. When specified, it
must be the case that 0 <= START <= END <= (string-length S), for
the corresponding parameter S. They typically restrict a procedure's
action to the indicated substring.
- A CHAR/CHAR-SET/PRED parameter is a value used to select/search
for a character in a string. If it is a character, it is used in
an equality test; if it is a character set, it is used as a
membership test; if it is a procedure, it is applied to the
characters as a test predicate.
This library contains a large number of procedures, but they follow
a consistent naming scheme. The names are composed of smaller lexemes
in a regular way that exposes the structure and relationships between the
procedures. This should help the programmer to recall or reconstitute the name
of the particular procedure that he needs when writing his own code. In
particular
- Procedures whose names end in "-ci" are case-insensitive variants.
- Procedures whose names end in "!" are side-effecting variants.
These procedures generally return an unspecified value.
- The order of common parameters is fairly consistent across the
different procedures.
For more text-manipulation functionality, see also the regular expression,
file-name, character set, and character->character partial map packages.
-------------------------------------------------------------------------------
* R4RS/R5RS procedures
The R4RS and R5RS reports define 22 string procedures. The string-lib
package includes 8 of these exactly as defined, 4 in an extended,
backwards-compatible way, and drops the remaining 10 (whose functionality
is available via other bindings).
The 8 procedures provided exactly as documented in the reports are
string?
make-string
string
string-length
string-ref
string-set!
string-append
list->string
The ten functions not included are the R4RS string-comparison functions:
string=? string-ci=?
string<? string-ci<?
string>? string-ci>?
string<=? string-ci<=?
string>=? string-ci>=?
The string-lib package provides alternate bindings.
Additionally, the four extended procedures are
string-fill! s char [start end] -> unspecific
string->list s [start end] -> char-list
substring s start [end] -> string
string-copy s [start end] -> string
These procedures are documented in the following section. In brief, they are
extended to take optional start/end parameters specifying substring ranges;
Additionally, SUBSTRING is allowed to return a value that shares storage with
its argument.
* Procedures
These procedures are contained in the Scheme 48 package "string-lib",
which is open in the default user package. They are not found in the
"scsh" package; script writers and other programmers that use the Scheme
48 module system must open string-lib explicitly.
string-map proc s [start end] -> string
string-map! proc s [start end] -> unspecified
PROC is a char->char procedure; it is mapped over S.
Note: no sequence order is specified.
string-fold kons knil s [start end] -> value
string-fold-right kons knil s [start end] -> value
These are the fundamental iterators for strings.
The left-fold operator maps the KONS procedure across the
string from left to right
(... (kons s[2] (kons s[1] (kons s[0] knil))))
In other words, string-fold obeys the recursion
(string-fold kons knil s start end) =
(string-fold kons (kons s[start] knil) start+1 end)
The right-fold operator maps the KONS procedure across the
string from right to left
(kons s[0] (... (kons s[end-3] (kons s[end-2] (kons s[end-1] knil)))))
obeying the recursion
(string-fold-right kons knil s start end) =
(string-fold-right kons (kons s[end-1] knil) start end-1)
Examples:
To convert a string to a list of chars:
(string-fold-right cons '() s)
To count the number of lower-case characters in a string:
(string-fold (lambda (c count)
(if (char-set-contains? char-set:lower c)
(+ count 1)
count))
0
s)
string-unfold p f g seed -> string
This is the fundamental constructor for strings.
- G is used to generate a series of "seed" values from the initial seed:
SEED, (G SEED), (G^2 SEED), (G^3 SEED), ...
- P tells us when to stop -- when it returns true when applied to one
of these seed values.
- F maps each seed value to the corresponding character
in the result string.
More precisely, the following (simple, inefficient) definition holds:
(define (string-unfold p f g seed)
(if (p seed) ""
(string-append (string (f seed))
(string-unfold p f g (g seed)))))
STRING-UNFOLD is a fairly powerful constructor -- you can use it to
reverse a string, copy a string, convert a list to a string, read
a port into a string, and so forth. Examples:
(port->string p) = (string-unfold eof-object? values
(lambda (x) (read-char p))
(read-char p))
(list->string lis) = (string-unfold null? car cdr lis)
(tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0)
To map F over a list LIS, producing a string:
(string-unfold null? (compose f car) cdr lis)
string-tabulate proc len -> string
PROC is an integer->char procedure. Construct a string of size LEN
by applying PROC to each index to produce the corresponding string
element. The order in which PROC is applied to the indices is not
specified.
string-for-each proc s [start end] -> unspecified
string-iter proc s [start end] -> unspecified
Apply PROC to each character in S.
STRING-FOR-EACH has no specified iteration order.
STRING-ITER is required to iterate from START to END
in increasing order.
string-every? pred s [start end] -> boolean
string-any? pred s [start end] -> value
Note: no sequence order specified.
Checks to see if predicate PRED is true of every / any character in S.
STRING-ANY? is witness-generating -- it applies PRED to the elements
of S, returning the first true value it finds, otherwise false.
string-compare s1 s2 lt-proc eq-proc gt-proc -> values
string-compare-ci s1 s2 lt-proc eq-proc gt-proc -> values
Apply LT-PROC, EQ-PROC, GT-PROC to the mismatch index, depending
upon whether S1 is less than, equal to, or greater than S2.
The "mismatch index" is the largest index i such that for
every 0 <= j < i, s1[j] = s2[j] -- that is, I is the first
position that doesn't match. If S1 = S2, the mismatch index
is simply the length of the strings; we observe the protocol
in this redundant case for uniformity.
substring-compare s1 start1 end1 s2 start2 end2 lt-proc eq-proc gt-proc -> values
substring-compare-ci s1 start1 end1 s2 start2 end2 lt-proc eq-proc gt-proc -> values
The continuation procedures are applied to S1's mismatch index (as defined
above). In the case of EQ-PROC, this is always END1.
string= s1 s2 -> #f or integer
string<> s1 s2 -> #f or integer
string< s1 s2 -> #f or integer
string> s1 s2 -> #f or integer
string<= s1 s2 -> #f or integer
string>= s1 s2 -> #f or integer
If the comparison operation is true, the function returns the
mismatch index (as defined for the previous comparator functions).
string-ci= s1 s2 -> #f or integer
string-ci<> s1 s2 -> #f or integer
string-ci< s1 s2 -> #f or integer
string-ci> s1 s2 -> #f or integer
string-ci<= s1 s2 -> #f or integer
string-ci>= s1 s2 -> #f or integer
Case-insensitive variants.
substring= s1 start1 end1 s2 start2 end2 -> #f or integer
substring<> s1 start1 end1 s2 start2 end2 -> #f or integer
substring< s1 start1 end1 s2 start2 end2 -> #f or integer
substring> s1 start1 end1 s2 start2 end2 -> #f or integer
substring<= s1 start1 end1 s2 start2 end2 -> #f or integer
substring>= s1 start1 end1 s2 start2 end2 -> #f or integer
substring-ci= s1 start1 end1 s2 start2 end2 -> #f or integer
substring-ci<> s1 start1 end1 s2 start2 end2 -> #f or integer
substring-ci< s1 start1 end1 s2 start2 end2 -> #f or integer
substring-ci> s1 start1 end1 s2 start2 end2 -> #f or integer
substring-ci<= s1 start1 end1 s2 start2 end2 -> #f or integer
substring-ci>= s1 start1 end1 s2 start2 end2 -> #f or integer
These variants restrict the comparison to the indicated
substrings of S1 and S2.
string-upper-case? s [start end] -> boolean
string-lower-case? s [start end] -> boolean
STRING-UPPER-CASE? returns true iff the string contains
no lower-case characters. STRING-LOWER-CASE returns true
iff the string contains no upper-case characters.
(string-upper-case? "") => #t
(string-lower-case? "") => #t
(string-upper-case? "FOOb") => #f
(string-upper-case? "U.S.A.") => #t
capitalize-string s [start end] -> string
capitalize-string! s [start end] -> unspecified
Capitalize the string: upcase the first alphanumeric character,
and downcase the rest of the string. CAPITALIZE-STRING returns
a freshly allocated string.
(capitalize-string "--capitalize tHIS sentence.") =>
"--Capitalize this sentence."
(capitalize-string "see Spot run. see Nix run.") =>
"See spot run. see nix run."
(capitalize-string "3com makes routers.") =>
"3com makes routers."
capitalize-words s [start end] -> string
capitalize-words! s [start end] -> unspecified
A "word" is a maximal contiguous sequence of alphanumeric characters.
Upcase the first character of every word; downcase the rest of the word.
CAPITALIZE-WORDS returns a freshly allocated string.
(capitalize-words "HELLO, 3THErE, my nAME IS olin") =>
"Hello, 3there, My Name Is Olin"
More sophisticated capitalisation procedures can be synthesized
using CAPITALIZE-STRING and pattern matchers. In this context,
the REGEXP-SUBSTITUTE/GLOBAL procedure may be useful for picking
out the units to be capitalised and applying CAPITALIZE-STRING to
their components.
string-upcase s [start end] -> string
string-upcase! s [start end] -> unspecified
string-downcase s [start end] -> string
string-downcase! s [start end] -> unspecified
Raise or lower the case of the alphabetic characters in the string.
STRING-UPCASE and STRING-DOWNCASE return freshly allocated strings.
string-take s nchars -> string
string-drop s nchars -> string
string-take-right s nchars -> string
string-drop-right s nchars -> string
STRING-TAKE returns the first NCHARS of STRING;
STRING-DROP returns all but the first NCHARS of STRING.
STRING-TAKE-RIGHT returns the last NCHARS of STRING;
STRING-DROP-RIGHT returns all but the last NCHARS of STRING.
These generalise MIT Scheme's HEAD & TAIL functions.
If these procedures produce the entire string, they may return either
S or a copy of S; in some implementations, proper substrings may share
memory with S.
string-pad s k [char start end] -> string
string-pad-right s k [char start end] -> string
Build a string of length K comprised of S padded on the left (right)
by as many occurences of the character CHAR as needed. If S has more
than K chars, it is truncated on the left (right) to length k. CHAR
defaults to #\space.
If K is exactly the length of S, these functions may return
either S or a copy of S.
string-trim s [char/char-set/pred start end] -> string
string-trim-right s [char/char-set/pred start end] -> string
string-trim-both s [char/char-set/pred start end] -> string
Trim S by skipping over all characters on the left / on the right /
on both sides that satisfy the second parameter CHAR/CHAR-SET/PRED:
- If it is a character CHAR, characters equal to CHAR are trimmed.
- If it is a char set CHAR-SET, characters contained in CHAR-SET
are trimmed.
- If it is a predicate PRED, it is a test predicate that is applied
to the characters in S; a character causing it to return true
is skipped.
CHAR/CHAR/SET-PRED defaults to CHAR-SET:WHITESPACE.
If no trimming occurs, these functions may return either S or a copy of S;
in some implementations, proper substrings may share memory with S.
(string-trim-both " The outlook wasn't brilliant, \n\r")
=> "The outlook wasn't brilliant,"
string-filter s char/char-set/pred [start end] -> string
string-delete s char/char-set/pred [start end] -> string
Filter the string S, retaining only those characters that
satisfy / do not satisfy the CHAR/CHAR-SET/PRED argument. If
this argument is a procedure, it is applied to the character
as a predicate; if it is a char-set, the character is tested
for membership; if it is a character, it is used in an equality test.
If the string is unaltered by the filtering operation, these
functions may return either S or a copy of S.
string-index s char/char-set/pred [start end] -> integer or #f
string-index-right s char/char-set/pred [end start] -> integer or #f
string-skip s char/char-set/pred [start end] -> integer or #f
string-skip-right s char/char-set/pred [end start] -> integer or #f
Note the inverted start/end ordering of index-right and skip-right's
parameters.
Index (index-right) searches through the string from the left (right),
returning the index of the first occurence of a character which
- equals CHAR/CHAR-SET/PRED (if it is a character);
- is in CHAR/CHAR-SET/PRED (if it is a char-set);
- satisfies the predicate CHAR/CHAR-SET/PRED (if it is a procedure).
If no match is found, the functions return false.
The skip functions are similar, but use the complement of the criteria:
they search for the first char that *doesn't* satisfy the test. E.g.,
to skip over initial whitespace, say
(cond ((string-skip s char-set:whitespace) =>
(lambda (i)
;; (string-ref s i) is not whitespace.
...)))
string-prefix-count s1 s2 -> integer
string-suffix-count s1 s2 -> integer
string-prefix-count-ci s1 s2 -> integer
string-suffix-count-ci s1 s2 -> integer
Return the length of the longest common prefix/suffix of the two strings.
This is equivalent to the "mismatch index" for the strings.
substring-prefix-count s1 start1 end1 s2 start2 end2 -> integer
substring-suffix-count s1 start1 end1 s2 start2 end2 -> integer
substring-prefix-count-ci s1 start1 end1 s2 start2 end2 -> integer
substring-suffix-count-ci s1 start1 end1 s2 start2 end2 -> integer
Substring variants.
string-prefix? s1 s2 -> boolean
string-suffix? s1 s2 -> boolean
string-prefix-ci? s1 s2 -> boolean
string-suffix-ci? s1 s2 -> boolean
Is S1 a prefix/suffix of S2?
substring-prefix? s1 start1 end1 s2 start2 end2 -> boolean
substring-suffix? s1 start1 end1 s2 start2 end2 -> boolean
substring-prefix-ci? s1 start1 end1 s2 start2 end2 -> boolean
substring-suffix-ci? s1 start1 end1 s2 start2 end2 -> boolean
Substring variants.
substring? s1 s2 [start end] -> integer or false
substring-ci? s1 s2 [start end] -> integer or false
Return the index in S2 where S1 occurs as a substring, or false.
The returned index is in the range [start,end).
The current implementation uses the Knuth-Morris-Pratt algorithm.
string-fill! s char [start end] -> unspecified
Store CHAR into the elements of S.
This is the R4RS procedure extended to have optional START/END parameters.
string-copy! target tstart s [start end] -> unspecified
Copy the sequence of characters from index range [START,END) in
string S to string TARGET, beginning at index TSTART. The characters
are copied left-to-right or right-to-left as needed -- the copy is
guaranteed to work, even if TARGET and S are the same string.
substring s start [end] -> string
string-copy s [start end] -> string
These R4RS procedures are extended to have optional START/END parameters.
Use STRING-COPY when you want to indicate explicitly in your code that you
wish to allocate new storage; use SUBSTRING when you don't care if you
get a fresh copy or share storage with the original string.
E.g.:
(string-copy "Beta substitution") => "Beta substitution"
(string-copy "Beta substitution" 1 10)
=> "eta subst"
(string-copy "Beta substitution" 5) => "substitution"
SUBSTRING may return a value with shares memory with S.
string-reverse s [start end] -> string
string-reverse! s [start end] -> unspecific
Reverse the string.
reverse-list->string char-list -> string
An efficient implementation of (compose string->list reverse):
(reverse-list->string '(#\a #\B #\c)) -> "cBa"
This is a common idiom in the epilog of string-processing loops
that accumulate an answer in a reverse-order list.
string-concat string-list -> string
Append the elements of STRING-LIST together into a single list.
Guaranteed to return a freshly allocated list. Appears sufficiently
often as to warrant being named.
string-concat/shared string-list -> string
string-append/shared s ... -> string
These two procedures are variants of STRING-CONCAT and STRING-APPEND
that are permitted to return results that share storage with their
parameters. In particular, if STRING-APPEND/SHARED is applied to just
one argument, it may return exactly that argument, whereas STRING-APPEND
is required to allocate a fresh string.
string->list s [start end] -> char-list
The R5RS STRING->LIST procedure is extended to take optional START/END
arguments.
string-null? s -> bool
Is S the empty string?
xsubstring s from [to start end] -> string
This is the "extended substring" procedure that implements replicated
copying of a substring of some string.
S is a string; START and END are optional arguments that demarcate
a substring of S, defaulting to 0 and the length of S (e.g., the whole
string). Replicate this substring up and down index space, in both the
positive and negative directions. For example, if S = "abcdefg", START=3,
and END=6, then we have the conceptual bidirectionally-infinite string
... d e f d e f d e f d e f d e f d e f d e f ...
... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ...
XSUBSTRING returns the substring of this string beginning at index FROM,
and ending at TO (which defaults to FROM+(END-START)).
You can use XSUBSTRING to perform a variety of tasks:
- To rotate a string left: (xsubstring "abcdef" 2) => "cdefab"
- To rotate a string right: (xsubstring "abcdef" -2) => "efabcd"
- To replicate a string: (xsubstring "abc" 0 7) => "abcabca"
Note that
- The FROM/TO indices give a half-open range -- the characters from
index FROM up to, but not including, index TO.
- The FROM/TO indices are not in terms of the index space for string S.
They are in terms of the replicated index space of the substring
defined by S, START, and END.
It is an error if START=END -- although this is allowed by special
dispensation when FROM=TO.
string-xcopy! target tstart s sfrom [sto start end] -> unspecific
Exactly the same as XSUBSTRING, but the extracted text is written
into the string TARGET starting at index TSTART.
This operation is not defined if (EQ? TARGET S) -- you cannot copy
a string on top of itself.
* Lower-level procedures
The following procedures are useful for writing other string-processing
functions, and are contained in the string-lib-internals package.
parse-start+end proc s args -> [start end rest]
parse-final-start+end proc s args -> [start end]
PARSE-START+END may be used to parse a pair of optional START/END arguments
from an argument list, defaulting them to 0 and the length of some string
S, respectively. Let the length of string S be SLEN.
- If ARGS = (), the function returns (values 0 slen '())
- If ARGS = (i), I is checked to ensure it is an integer, and
that 0 <= i <= slen. Returns (values i slen (cdr rest)).
- If ARGS = (i j ...), I and J are checked to ensure they are
integers, and that 0 <= i <= j <= slen. Returns (values i j (cddr rest)).
If any of the checks fail, an error condition is raised, and PROC is used
as part of the error condition -- it should be the name of the client
procedure whose argument list PARSE-START+END is parsing.
parse-final-start+end is exactly the same, except that the args list
passed to it is required to be of length two or less; if it is longer,
an error condition is raised. It may be used when the optional START/END
parameters are final arguments to the procedure.
check-substring-spec proc s start end -> unspecific
Check values START and END to ensure they specify a valid substring
in S. This means that START and END are exact integers, and
0 <= START <= END <= (STRING-LENGTH S)
If this is not the case, an error condition is raised. PROC is used
as part of error condition, and should be the procedure whose START/END
parameters we are checking.
make-kmp-restart-vector s c= -> vector
Build the Knuth-Morris-Pratt "restart vector," which is useful
for quickly searching character sequences for the occurrence of
string S. C= is a character-equality function used to construct
the restart vector; it is usefully CHAR=? or CHAR-CI=?.
The definition of the restart vector RV for string S is:
If we have matched chars 0..i-1 of S against some search string SS, and
S[i] doesn't match SS[k], then reset i := RV[i], and try again to
match SS[k]. If RV[i] = -1, then punt SS[k] completely, and move on to
SS[k+1] and S[0].
In other words, if you have matched the first i chars of S, but
the i+1'th char doesn't match, RV[i] tells you what the next-longest
prefix of PATTERN is that you have matched.
The following string-search function shows how a restart vector
is used to search. It can be easily adapted to search other character
sequences (such as ports).
(define (find-substring pattern source start end)
(let ((plen (string-length pattern))
(rv (make-kmp-restart-vector pattern char=?)))
;; The search loop. SJ & PJ are redundant state.
(let lp ((si start) (pi 0)
(sj (- end start)) ; (- end si) -- how many chars left.
(pj plen)) ; (- plen pi) -- how many chars left.
(if (= pi plen) (- si plen) ; Win.
(and (<= pj sj) ; Lose.
(if (char=? (string-ref source si) ; Search.
(string-ref pattern pi))
(lp (+ 1 si) (+ 1 pi) (- sj 1) (- pj 1)) ; Advance.
(let ((pi (vector-ref rv pi))) ; Retreat.
(if (= pi -1)
(lp (+ si 1) 0 (- sj 1) plen) ; Punt.
(lp si pi sj (- plen pi))))))))))