added from 0.5.2
This commit is contained in:
parent
4e935c24a5
commit
58f90e2359
|
@ -0,0 +1,106 @@
|
||||||
|
;;; CPP Lib
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Character->Character Partial functions
|
||||||
|
|
||||||
|
;;; Many of these types are pretty weak, but there is no way to
|
||||||
|
;;; specify that a parameter must be a particular record type.
|
||||||
|
;;; Every little bit helps, though.
|
||||||
|
|
||||||
|
(define-interface ccp-lib-interface
|
||||||
|
(export
|
||||||
|
;; ccp? x -> boolean
|
||||||
|
(ccp? (proc (:value) :boolean))
|
||||||
|
|
||||||
|
;; ccp-domain ccp -> char-set
|
||||||
|
(ccp-domain (proc (:value) :value)) ; Not very informative.
|
||||||
|
|
||||||
|
;; ccp-copy ccp -> ccp
|
||||||
|
(ccp-copy (proc (:value) :value))
|
||||||
|
|
||||||
|
;; ccp= ccp1 ccp2 ...
|
||||||
|
;; ccp<= ccp1 ccp2 ...
|
||||||
|
((ccp= ccp<=) (proc (&rest :value) :boolean)) ; Not very informative.
|
||||||
|
|
||||||
|
;; ccp-fold kons knil ccp -> value
|
||||||
|
(ccp-fold (proc ((proc (:char :char :value) :value) :value :value) :value))
|
||||||
|
|
||||||
|
;; ccp-for-each proc ccp
|
||||||
|
(ccp-for-each (proc ((proc (:char :char) :values)) :unspecific))
|
||||||
|
|
||||||
|
;; ccp->alist ccp -> alist
|
||||||
|
(ccp->alist (proc (:value) :value))
|
||||||
|
|
||||||
|
;; ccp-restrict ccp cset -> ccp
|
||||||
|
;; ccp-restrict! ccp cset -> ccp
|
||||||
|
((ccp-restrict ccp-restrict!) (proc (:value :value) :value))
|
||||||
|
|
||||||
|
;; ccp-adjoin ccp from-char1 to-char1 ... -> ccp
|
||||||
|
;; ccp-adjoin! ccp from-char1 to-char1 ... -> ccp
|
||||||
|
;; ccp-delete ccp from-char1 ... -> ccp
|
||||||
|
;; ccp-delete! ccp from-char1 ... -> ccp
|
||||||
|
((ccp-adjoin ccp-adjoin!) (proc (:value &rest :char) :value))
|
||||||
|
((ccp-delete ccp-delete!) (proc (:value &rest :char) :value))
|
||||||
|
|
||||||
|
;; ccp-extend ccp1 ... -> ccp
|
||||||
|
;; ccp-extend! ccp1 ... -> ccp
|
||||||
|
((ccp-extend ccp-extend!) (proc (&rest :value) :value))
|
||||||
|
|
||||||
|
;; ccp-compose ccp1 ... -> ccp
|
||||||
|
(ccp-compose (proc (&rest :value) :value))
|
||||||
|
|
||||||
|
;; alist->ccp char/char-alist [ccp] -> ccp
|
||||||
|
;; alist->ccp! char/char-alist [ccp] -> ccp
|
||||||
|
((alist->ccp alist->ccp!) (proc (:value &opt :value) :value))
|
||||||
|
|
||||||
|
;; proc->ccp proc [domain ccp] -> ccp
|
||||||
|
;; proc->ccp! proc [domain ccp] -> ccp
|
||||||
|
((proc->ccp proc->ccp!) (proc ((proc (:char) :char) &opt :value :value)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
;; constant-ccp char [domain ccp] -> ccp
|
||||||
|
;; constant-ccp! char domain ccp -> ccp
|
||||||
|
((constant-ccp constant-ccp!) (proc (:char &opt :value :value) :value))
|
||||||
|
|
||||||
|
;; ccp/mappings from1 to1 ... -> ccp
|
||||||
|
;; extend-ccp/mappings ccp from1 to1 ... -> ccp
|
||||||
|
;; extend-ccp/mappings! ccp from1 to1 ... -> ccp
|
||||||
|
(ccp/mappings (proc (&rest :value) :value))
|
||||||
|
((extend-ccp/mappings extend-ccp/mappings!)
|
||||||
|
(proc (:value &rest :value) :value))
|
||||||
|
|
||||||
|
;; construct-ccp ccp elt ... -> ccp
|
||||||
|
;; construct-ccp! ccp elt ... -> ccp
|
||||||
|
((construct-ccp construct-ccp!) (proc (:value &rest :value) :value))
|
||||||
|
|
||||||
|
;; ccp-unfold p f g seed -> ccp
|
||||||
|
(ccp-unfold (proc ((proc (:value) :boolean)
|
||||||
|
(procedure :value (some-values :char :char))
|
||||||
|
(proc (:value) :value)
|
||||||
|
:value)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
;; tr ccp string [start end] -> string
|
||||||
|
;; ccp-map ccp string [start end] -> string
|
||||||
|
;; ccp-map! ccp string [start end]
|
||||||
|
;; ccp-app ccp char -> char or false
|
||||||
|
((tr ccp-map)
|
||||||
|
(proc (:value :string &opt :exact-integer :exact-integer) :string))
|
||||||
|
(ccp-map! (proc (:value :string &opt :exact-integer :exact-integer) :unspecific))
|
||||||
|
(ccp-app (proc (:value :char) :value))
|
||||||
|
|
||||||
|
;; Primitive CCP's.
|
||||||
|
ccp:0 ccp:1 ccp:upcase ccp:downcase
|
||||||
|
))
|
||||||
|
|
||||||
|
(define-structure ccp-lib ccp-lib-interface
|
||||||
|
(open char-set-package
|
||||||
|
ascii
|
||||||
|
defrec-package
|
||||||
|
string-lib
|
||||||
|
let-opt
|
||||||
|
receiving
|
||||||
|
list-lib ; EVERY
|
||||||
|
error-package
|
||||||
|
scheme)
|
||||||
|
(files ccp)
|
||||||
|
(optimize auto-integrate))
|
|
@ -0,0 +1,576 @@
|
||||||
|
;;; Char->char partial maps -*- Scheme -*-
|
||||||
|
;;; Copyright (C) 1998 by Olin Shivers.
|
||||||
|
|
||||||
|
;;; CCPs are an efficient data structure for doing simple string transforms,
|
||||||
|
;;; similar to the kinds of things you would do with the tr(1) program.
|
||||||
|
;;;
|
||||||
|
;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit
|
||||||
|
;;; character types would need a more sophisticated data structure, tuned
|
||||||
|
;;; for sparseness. I would suggest something like this:
|
||||||
|
;;; (define-record ccp
|
||||||
|
;;; domain ; The domain char-set
|
||||||
|
;;; map ; Sorted vector of (char . string) pairs
|
||||||
|
;;; ; specifying the map.
|
||||||
|
;;; id?) ; If true, mappings not specified by MAP are
|
||||||
|
;;; ; identity mapping. If false, MAP must
|
||||||
|
;;; ; specify a mapping for every char in DOMAIN.
|
||||||
|
;;;
|
||||||
|
;;; A (char . string) elements in MAP specifies a mapping for the contiguous
|
||||||
|
;;; sequence of L chars beginning with CHAR (in the sequence of the underlying
|
||||||
|
;;; char type representation), where L is the length of STRING. These MAP elements
|
||||||
|
;;; are sorted by CHAR, so that binary search can be used to get from an input
|
||||||
|
;;; character C to the right MAP element quickly.
|
||||||
|
;;;
|
||||||
|
;;; This representation should be reasonably compact for standard mappings on,
|
||||||
|
;;; say, a Unicode CCP. An implementation might wish to have a cache field
|
||||||
|
;;; in the record for storing the full 8kb bitset when performing ccp-map
|
||||||
|
;;; operations. Or, an implementation might want to store the Latin-1 subset
|
||||||
|
;;; of the map in a dense format, and keep the remainder in a sparse format.
|
||||||
|
|
||||||
|
(define num-chars (char-set-size char-set:full)) ; AKA 256.
|
||||||
|
|
||||||
|
(define-record ccp
|
||||||
|
domain ; The domain char-set
|
||||||
|
dshared? ; Is the domain value shared or linear?
|
||||||
|
map ; 256-elt string
|
||||||
|
mshared?) ; Is the map string shared or linear?
|
||||||
|
|
||||||
|
|
||||||
|
;;; Accessors and setters that manage the linear bookkeeping
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(define (ccp-domain ccp)
|
||||||
|
(set-ccp:dshared? ccp #t)
|
||||||
|
(ccp:domain ccp))
|
||||||
|
|
||||||
|
;;; CCP is a linear ccp. PROC is a domain->domain function; it must be
|
||||||
|
;;; linear in its parameter and result.
|
||||||
|
;;;
|
||||||
|
;;; Updates the domain of the CCP with PROC, returns the resulting
|
||||||
|
;;; CCP; reuses the old one to construct the new one.
|
||||||
|
|
||||||
|
(define (restrict-linear-ccp-domain ccp proc)
|
||||||
|
(let ((new-d (proc (if (ccp:dshared? ccp)
|
||||||
|
(begin (set-ccp:dshared? ccp #f)
|
||||||
|
(char-set-copy (ccp:domain ccp)))
|
||||||
|
(ccp:domain ccp)))))
|
||||||
|
(set-ccp:domain ccp new-d)
|
||||||
|
ccp))
|
||||||
|
|
||||||
|
;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function.
|
||||||
|
;;; It is passed a linear domain and cmap string. It may side-effect
|
||||||
|
;;; the cmap string, and returns the resulting updated domain.
|
||||||
|
;;; We return the resulting CCP, reusing the parameter to construct it.
|
||||||
|
|
||||||
|
(define (linear-update-ccp ccp proc)
|
||||||
|
(let* ((cmap (if (ccp:mshared? ccp)
|
||||||
|
(begin (set-ccp:mshared? ccp #f)
|
||||||
|
(string-copy (ccp:map ccp)))
|
||||||
|
(ccp:map ccp)))
|
||||||
|
|
||||||
|
(new-d (proc (if (ccp:dshared? ccp)
|
||||||
|
(begin (set-ccp:dshared? ccp #f)
|
||||||
|
(char-set-copy (ccp:domain ccp)))
|
||||||
|
(ccp:domain ccp))
|
||||||
|
cmap)))
|
||||||
|
(set-ccp:domain ccp new-d)
|
||||||
|
ccp))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;;; Return CCP's map field, and mark it as shared. CCP functions that
|
||||||
|
;;; restrict a ccp's domain share map strings, so they use this guy.
|
||||||
|
(define (ccp:map/shared ccp)
|
||||||
|
(set-ccp:mshared? ccp #t)
|
||||||
|
(ccp:map ccp))
|
||||||
|
|
||||||
|
(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f
|
||||||
|
(string-copy (ccp:map ccp)) #f))
|
||||||
|
|
||||||
|
;;; N-ary equality relation for partial maps
|
||||||
|
|
||||||
|
(define (ccp= ccp1 . rest)
|
||||||
|
(let ((domain (ccp:domain ccp1))
|
||||||
|
(cmap (ccp:map ccp1)))
|
||||||
|
(every (lambda (ccp2)
|
||||||
|
(and (char-set= domain (ccp:domain ccp2))
|
||||||
|
(let ((cmap2 (ccp:map ccp2)))
|
||||||
|
(char-set-every? (lambda (c)
|
||||||
|
(let ((i (char->ascii c)))
|
||||||
|
(char=? (string-ref cmap i)
|
||||||
|
(string-ref cmap2 i))))
|
||||||
|
domain))))
|
||||||
|
rest)))
|
||||||
|
|
||||||
|
|
||||||
|
;;; N-ary subset relation for partial maps
|
||||||
|
|
||||||
|
(define (ccp<= ccp1 . rest)
|
||||||
|
(let lp ((domain1 (ccp:domain ccp1))
|
||||||
|
(cmap1 (ccp:map ccp1))
|
||||||
|
(rest rest))
|
||||||
|
(or (not (pair? rest))
|
||||||
|
(let* ((ccp2 (car rest))
|
||||||
|
(domain2 (ccp:domain ccp2))
|
||||||
|
(cmap2 (ccp:map ccp2))
|
||||||
|
(rest (cdr rest)))
|
||||||
|
(and (char-set<= domain1 domain2)
|
||||||
|
(let ((cmap2 (ccp:map ccp2)))
|
||||||
|
(char-set-every? (lambda (c)
|
||||||
|
(let ((i (char->ascii c)))
|
||||||
|
(char=? (string-ref cmap1 i)
|
||||||
|
(string-ref cmap2 i))))
|
||||||
|
domain1))
|
||||||
|
(lp domain2 cmap2 rest))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; CCP iterators
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(define (ccp-fold kons knil ccp)
|
||||||
|
(let ((cmap (ccp:map ccp)))
|
||||||
|
(char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v))
|
||||||
|
knil
|
||||||
|
(ccp:domain ccp))))
|
||||||
|
|
||||||
|
(define (ccp-for-each proc ccp)
|
||||||
|
(let ((cmap (ccp:map ccp)))
|
||||||
|
(char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c))))
|
||||||
|
(ccp:domain ccp))))
|
||||||
|
|
||||||
|
(define (ccp->alist ccp)
|
||||||
|
(ccp-fold (lambda (from to alist) (cons (cons from to) alist))
|
||||||
|
'()
|
||||||
|
ccp))
|
||||||
|
|
||||||
|
|
||||||
|
;;; CCP-RESTRICT
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Restrict a ccp's domain.
|
||||||
|
|
||||||
|
(define (ccp-restrict ccp cset)
|
||||||
|
(make-ccp (char-set-intersection cset (ccp:domain ccp))
|
||||||
|
#f
|
||||||
|
(ccp:map/shared ccp)
|
||||||
|
#t))
|
||||||
|
|
||||||
|
(define (ccp-restrict! ccp cset)
|
||||||
|
(restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; CCP-ADJOIN ccp from-char1 to-char1 ...
|
||||||
|
;;; CCP-DELETE ccp char1 ...
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Add & delete mappings to/from a ccp.
|
||||||
|
|
||||||
|
(define (ccp-delete ccp . chars)
|
||||||
|
(make-ccp (apply char-set-delete (ccp:domain ccp) chars)
|
||||||
|
#f
|
||||||
|
(ccp:map/shared ccp)
|
||||||
|
#t))
|
||||||
|
|
||||||
|
(define (ccp-delete! ccp . chars)
|
||||||
|
(restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars))))
|
||||||
|
|
||||||
|
|
||||||
|
(define (ccp-adjoin ccp . chars)
|
||||||
|
(let ((cmap (string-copy (ccp:map ccp))))
|
||||||
|
(make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars)
|
||||||
|
#f
|
||||||
|
cmap
|
||||||
|
#f)))
|
||||||
|
|
||||||
|
(define (ccp-adjoin! ccp . chars)
|
||||||
|
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars))))
|
||||||
|
|
||||||
|
(define (install-ccp-adjoin! cmap domain chars)
|
||||||
|
(let lp ((chars chars) (d domain))
|
||||||
|
(if (pair? chars)
|
||||||
|
(let ((from (car chars))
|
||||||
|
(to (cadr chars))
|
||||||
|
(chars (cddr chars)))
|
||||||
|
(string-set! cmap (char->ascii from) to)
|
||||||
|
(lp chars (char-set-adjoin! d from)))
|
||||||
|
d)))
|
||||||
|
|
||||||
|
|
||||||
|
;;; CCP-EXTEND ccp1 ...
|
||||||
|
;;; CCP-EXTEND! ccp1 ccp2 ...
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; Extend ccp1 with ccp2, etc.
|
||||||
|
|
||||||
|
(define (ccp-extend . ccps)
|
||||||
|
(if (pair? ccps)
|
||||||
|
(let ((ccp0 (car ccps))
|
||||||
|
(ccps (cdr ccps)))
|
||||||
|
(if (pair? ccps)
|
||||||
|
(let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap.
|
||||||
|
;; The FOLD installs each ccp in CCPS into CMAP and produces
|
||||||
|
;; the new domain.
|
||||||
|
(make-ccp (fold (lambda (ccp d)
|
||||||
|
(install-ccp-extension! cmap d ccp))
|
||||||
|
(char-set-copy (ccp:domain ccp0))
|
||||||
|
ccps)
|
||||||
|
#f cmap #f))
|
||||||
|
|
||||||
|
ccp0)) ; Only 1 parameter
|
||||||
|
|
||||||
|
ccp:0)) ; 0 parameters
|
||||||
|
|
||||||
|
(define (ccp-extend! ccp0 . ccps)
|
||||||
|
(linear-update-ccp ccp0
|
||||||
|
(lambda (domain cmap)
|
||||||
|
(fold (lambda (ccp d) (install-ccp-extension! cmap d ccp))
|
||||||
|
domain
|
||||||
|
ccps))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Side-effect CMAP, linear-update and return DOMAIN.
|
||||||
|
(define (install-ccp-extension! cmap domain ccp)
|
||||||
|
(let ((cmap1 (ccp:map ccp))
|
||||||
|
(domain1 (ccp:domain ccp)))
|
||||||
|
(char-set-for-each (lambda (c)
|
||||||
|
(let ((i (char->ascii c)))
|
||||||
|
(string-set! cmap i (string-ref cmap1 i))))
|
||||||
|
domain1)
|
||||||
|
(char-set-union! domain domain1)))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; For each character C-IN in the original domain, we push it
|
||||||
|
;;; through the pipeline of CCPs. If we ever land outside the
|
||||||
|
;;; domain of a ccp, we punt C-IN. If we push it all the way
|
||||||
|
;;; through, we add C-IN to our result domain, and add the mapping
|
||||||
|
;;; into the cmap we are assembling.
|
||||||
|
;;;
|
||||||
|
;;; Looping this way avoids building up intermediate temporary
|
||||||
|
;;; CCPs. If CCP's were small bitsets, we might be better off
|
||||||
|
;;; slicing the double-nested loops the other way around.
|
||||||
|
|
||||||
|
(define (ccp-compose . ccps)
|
||||||
|
(cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1
|
||||||
|
((not (pair? (cdr ccps))) (car ccps)) ; 1 arg
|
||||||
|
(else
|
||||||
|
(let* ((v (list->vector ccps))
|
||||||
|
(vlen-2 (- (vector-length v) 2))
|
||||||
|
(cmap (make-string num-chars))
|
||||||
|
(d1 (ccp:domain (vector-ref v (+ vlen-2 1))))
|
||||||
|
(d (char-set-fold (lambda (c-in d)
|
||||||
|
(let lp ((c c-in) (i vlen-2))
|
||||||
|
(if (>= i 0)
|
||||||
|
(let ((ccp (vector-ref v i)))
|
||||||
|
(if (char-set-contains? (ccp:domain ccp) c)
|
||||||
|
(lp (string-ref (ccp:map ccp)
|
||||||
|
(char->ascii c))
|
||||||
|
(- i 1))
|
||||||
|
|
||||||
|
;; Lose: remove c-in from d.
|
||||||
|
(char-set-delete! d c-in)))
|
||||||
|
|
||||||
|
;; Win: C-IN -> C
|
||||||
|
(begin (string-set! cmap
|
||||||
|
(char->ascii c-in)
|
||||||
|
c)
|
||||||
|
d))))
|
||||||
|
(char-set-copy d1)
|
||||||
|
d1)))
|
||||||
|
(make-ccp d #f cmap #f)))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;;; ALIST->CPP
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(define (alist->ccp cc-alist . maybe-base-ccp)
|
||||||
|
(let ((base (:optional maybe-base-ccp ccp:0)))
|
||||||
|
(if (pair? cc-alist)
|
||||||
|
(let ((cmap (string-copy (ccp:map base))))
|
||||||
|
(make-ccp (install-ccp-alist! cmap
|
||||||
|
(char-set-copy (ccp:domain base))
|
||||||
|
cc-alist)
|
||||||
|
#f cmap #f))
|
||||||
|
base)))
|
||||||
|
|
||||||
|
(define (alist->ccp! alist base)
|
||||||
|
(linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist))))
|
||||||
|
|
||||||
|
;;; Side-effect CMAP, linear-update and return DOMAIN.
|
||||||
|
(define (install-ccp-alist! cmap domain alist)
|
||||||
|
(fold (lambda (from/to d) (let ((from (car from/to))
|
||||||
|
(to (cdr from/to)))
|
||||||
|
(string-set! cmap (char->ascii from) to)
|
||||||
|
(char-set-adjoin! domain from)))
|
||||||
|
domain
|
||||||
|
alist))
|
||||||
|
|
||||||
|
|
||||||
|
;;; PROC->CCP
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; (proc->ccp proc [domain base-ccp])
|
||||||
|
|
||||||
|
(define (proc->ccp proc . args)
|
||||||
|
(let-optionals args ((proc-domain char-set:full)
|
||||||
|
(base ccp:0))
|
||||||
|
(let ((cmap (string-copy (ccp:map base))))
|
||||||
|
(make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base))
|
||||||
|
proc proc-domain)
|
||||||
|
#f cmap #f))))
|
||||||
|
|
||||||
|
(define (proc->ccp! proc proc-domain base)
|
||||||
|
(linear-update-ccp base
|
||||||
|
(lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain))))
|
||||||
|
|
||||||
|
(define (install-ccp-proc! cmap domain proc proc-domain)
|
||||||
|
(char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c)))
|
||||||
|
proc-domain)
|
||||||
|
(char-set-union! domain proc-domain))
|
||||||
|
|
||||||
|
|
||||||
|
;;; CONSTANT-CCP
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; (constant-ccp char [domain base-ccp])
|
||||||
|
;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR.
|
||||||
|
;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0.
|
||||||
|
|
||||||
|
(define (constant-ccp char . args)
|
||||||
|
(let-optionals args ((char-domain char-set:full) (base ccp:0))
|
||||||
|
(let ((cmap (string-copy (ccp:map base))))
|
||||||
|
(make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base))
|
||||||
|
char char-domain)
|
||||||
|
#f cmap #f))))
|
||||||
|
|
||||||
|
(define (constant-ccp! char char-domain base)
|
||||||
|
(linear-update-ccp base
|
||||||
|
(lambda (d cmap) (install-constant-ccp! cmap d char char-domain))))
|
||||||
|
|
||||||
|
;;; Install the constant mapping into CMAP0 by side-effect,
|
||||||
|
;;; linear-update & return DOMAIN0 with the constant-mapping's domain.
|
||||||
|
(define (install-constant-ccp! cmap0 domain0 char char-domain)
|
||||||
|
(char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char))
|
||||||
|
char-domain)
|
||||||
|
(char-set-union! domain0 char-domain))
|
||||||
|
|
||||||
|
|
||||||
|
;;; CCP/MAPPINGS
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp
|
||||||
|
;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp
|
||||||
|
;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp
|
||||||
|
;;; Each FROM element is either a string or a (lo-char . hi-char) range.
|
||||||
|
;;; Each TO element is either a string or a lo-char. Strings are replicated
|
||||||
|
;;; to match the length of the corresponding FROM element.
|
||||||
|
;;; CCP/MAPPINGS's base CCP is CCP:0
|
||||||
|
;;;
|
||||||
|
;;; Tedious code.
|
||||||
|
|
||||||
|
;;; Internal utility.
|
||||||
|
;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect.
|
||||||
|
;;; Return the new domain.
|
||||||
|
|
||||||
|
(define (install-ccp-mapping-pair! cmap domain from to)
|
||||||
|
;; Tedium -- four possibilities here:
|
||||||
|
;; str->str, str->lo-char,
|
||||||
|
;; range->str, range->lo-char.
|
||||||
|
(if (string? from)
|
||||||
|
(if (string? to)
|
||||||
|
;; "abc" -> "ABC"
|
||||||
|
(let ((len1 (string-length from))
|
||||||
|
(len2 (string-length to)))
|
||||||
|
(let lp2 ((i (- len1 1))
|
||||||
|
(j (modulo (- len2 1) len1))
|
||||||
|
(d domain))
|
||||||
|
(if (>= i 0)
|
||||||
|
(let ((c (string-ref from i)))
|
||||||
|
(string-set! cmap
|
||||||
|
(char->ascii c)
|
||||||
|
(string-ref to i))
|
||||||
|
(lp2 (- i 1)
|
||||||
|
(- (if (> j 0) j len2) 1)
|
||||||
|
(char-set-adjoin! d c)))
|
||||||
|
d)))
|
||||||
|
|
||||||
|
;; "abc" -> #\A
|
||||||
|
(let lp2 ((i (- (string-length from) 1))
|
||||||
|
(j (char->ascii to))
|
||||||
|
(d domain))
|
||||||
|
(if (>= i 0)
|
||||||
|
(let ((c (string-ref from i)))
|
||||||
|
(string-set! cmap
|
||||||
|
(char->ascii c)
|
||||||
|
(ascii->char j))
|
||||||
|
(lp2 (- i 1)
|
||||||
|
(- j 1)
|
||||||
|
(char-set-adjoin! d c)))
|
||||||
|
d)))
|
||||||
|
|
||||||
|
(let ((from-start (char->ascii (car from)))
|
||||||
|
(from-end (char->ascii (cdr from))))
|
||||||
|
(if (string? to)
|
||||||
|
(let ((len2-1 (- (string-length to) 1)))
|
||||||
|
;; (#\a . #\c) -> "ABC"
|
||||||
|
(let lp2 ((i from-start) (j 0) (d domain))
|
||||||
|
(if (<= i from-end)
|
||||||
|
(let ((c (string-ref to j)))
|
||||||
|
(string-set! cmap i c)
|
||||||
|
(lp2 (+ i 1)
|
||||||
|
(if (= j len2-1) 0 (+ j 1))
|
||||||
|
(char-set-adjoin! d c)))
|
||||||
|
d)))
|
||||||
|
|
||||||
|
;; (#\a . #\c) -> #\A
|
||||||
|
(do ((i from-start (+ i 1))
|
||||||
|
(j (char->ascii to) (+ j 1))
|
||||||
|
(d domain (begin (string-set! cmap i (ascii->char j))
|
||||||
|
(char-set-adjoin d (ascii->char i)))))
|
||||||
|
((> i from-end) d))))))
|
||||||
|
|
||||||
|
;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN.
|
||||||
|
(define (install-mapping-pairs cmap domain args)
|
||||||
|
(let lp ((domain domain) (args args))
|
||||||
|
(if (pair? args)
|
||||||
|
(lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args))
|
||||||
|
(cddr args))
|
||||||
|
domain)))
|
||||||
|
|
||||||
|
(define (ccp/mappings . args)
|
||||||
|
(let ((cmap (make-string num-chars)))
|
||||||
|
(make-ccp (install-mapping-pairs (make-string num-chars)
|
||||||
|
(char-set-copy char-set:empty)
|
||||||
|
args)
|
||||||
|
#f cmap #f)))
|
||||||
|
|
||||||
|
(define (extend-ccp/mappings base . args)
|
||||||
|
(let ((cmap (string-copy (ccp:map base))))
|
||||||
|
(make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args)
|
||||||
|
#f cmap #f)))
|
||||||
|
|
||||||
|
(define (extend-ccp/mappings! base . args)
|
||||||
|
(linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; CONSTRUCT-CCP! ccp elt ...
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; The kitchen-sink constructor; static typing be damned.
|
||||||
|
;;; ELTS are interpreted as follows:
|
||||||
|
;;; (lo-char . hi-char) to-string|lo-char ; ccp/range
|
||||||
|
;;; from-string to-string|lo-char ; ccp/range
|
||||||
|
;;; ccp ; ccp-extend
|
||||||
|
;;; alist ; alist->ccp
|
||||||
|
;;; domain char ; ccp-constant
|
||||||
|
;;; domain proc ; proc->ccp
|
||||||
|
|
||||||
|
(define (construct-ccp! ccp . elts)
|
||||||
|
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts))))
|
||||||
|
|
||||||
|
(define (construct-ccp base . elts)
|
||||||
|
(let ((cmap (string-copy (ccp:map base))))
|
||||||
|
(make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts)
|
||||||
|
#f cmap #f)))
|
||||||
|
|
||||||
|
;;; Install the mappings into CMAP by side-effect,
|
||||||
|
;;; linear-update & return DOMAIN with the final domain.
|
||||||
|
|
||||||
|
(define (install-ccp-construct! cmap domain elts)
|
||||||
|
(let lp ((d domain) (elts elts))
|
||||||
|
;(format #t "d=~s elts=~s\n" d elts)
|
||||||
|
(if (not (pair? elts)) d
|
||||||
|
(let ((elt (car elts))
|
||||||
|
(elts (cdr elts)))
|
||||||
|
(cond ((pair? elt)
|
||||||
|
(cond ((pair? (car elt)) ; ELT is an alist.
|
||||||
|
(lp (install-ccp-alist! cmap d elt) elts))
|
||||||
|
((char? (car elt)) ; ELT is (lo-char . hi-char) range.
|
||||||
|
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
|
||||||
|
(cdr elts)))
|
||||||
|
(else (error "Illegal elt to construct-ccp" elt))))
|
||||||
|
|
||||||
|
((string? elt)
|
||||||
|
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
|
||||||
|
(cdr elts)))
|
||||||
|
|
||||||
|
((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts))
|
||||||
|
|
||||||
|
((char-set? elt)
|
||||||
|
(let ((elt2 (car elts))
|
||||||
|
(elts (cdr elts)))
|
||||||
|
(lp (cond ((char? elt2)
|
||||||
|
(install-constant-ccp! cmap d elt2 elt))
|
||||||
|
((procedure? elt2)
|
||||||
|
(install-ccp-proc! cmap d elt2 elt))
|
||||||
|
(else (error "Illegal elt-pair to construct-ccp"
|
||||||
|
elt elt2)))
|
||||||
|
elts)))
|
||||||
|
|
||||||
|
(else (error "Illegal elt to construct-ccp" elt)))))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; CCP unfold
|
||||||
|
|
||||||
|
(define (ccp-unfold p f g seed)
|
||||||
|
(let lp ((seed seed) (ccp (ccp-copy ccp:0)))
|
||||||
|
(if (p seed) ccp
|
||||||
|
(lp (g seed)
|
||||||
|
(receive (from to) (f seed)
|
||||||
|
(lp (g seed) (ccp-adjoin! ccp from to)))))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;;; Using CCPs
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; TR ccp string [start end] -> string
|
||||||
|
;;; CCP-MAP ccp string [start end] -> string
|
||||||
|
;;; CCP-MAP! ccp string [start end] -> undefined
|
||||||
|
;;; CCP-APP ccp char -> char or false
|
||||||
|
|
||||||
|
;;; If a char in S is not in CCP's domain, it is dropped from the result.
|
||||||
|
;;; You can use this to map and delete chars from a string.
|
||||||
|
|
||||||
|
(define (tr ccp s . maybe-start+end)
|
||||||
|
(let-optionals maybe-start+end ((start 0) (end (string-length s)))
|
||||||
|
;; Count up the chars in S that are in the domain,
|
||||||
|
;; and allocate the answer string ANS:
|
||||||
|
(let* ((len (- end start))
|
||||||
|
(domain (ccp:domain ccp))
|
||||||
|
(ans-len (string-fold (lambda (c numchars)
|
||||||
|
(if (char-set-contains? domain c)
|
||||||
|
(+ numchars 1)
|
||||||
|
numchars))
|
||||||
|
0 s start end))
|
||||||
|
(ans (make-string ans-len)))
|
||||||
|
|
||||||
|
;; Apply the map, installing the resulting chars into ANS:
|
||||||
|
(string-fold (lambda (c i) (cond ((ccp-app ccp c) =>
|
||||||
|
(lambda (c)
|
||||||
|
(string-set! ans i c)
|
||||||
|
(+ i 1)))
|
||||||
|
(else i))) ; Not in domain -- drop it.
|
||||||
|
0 s start end)
|
||||||
|
ans)))
|
||||||
|
|
||||||
|
(define (ccp-map ccp s . maybe-start+end)
|
||||||
|
(apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end))
|
||||||
|
|
||||||
|
(define (ccp-map! ccp s . maybe-start+end)
|
||||||
|
(apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end))
|
||||||
|
|
||||||
|
(define (ccp-app ccp char)
|
||||||
|
(and (char-set-contains? (ccp:domain ccp) char)
|
||||||
|
(string-ref (ccp:map ccp) (char->ascii char))))
|
||||||
|
|
||||||
|
|
||||||
|
;;; Primitive CCPs
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(define id-cmap
|
||||||
|
(let ((m (make-string num-chars)))
|
||||||
|
(do ((i (- num-chars 1) (- i 1)))
|
||||||
|
((< i 0))
|
||||||
|
(string-set! m i (ascii->char i)))
|
||||||
|
m))
|
||||||
|
|
||||||
|
(define ccp:0 (make-ccp char-set:empty #t id-cmap #t))
|
||||||
|
(define ccp:1 (make-ccp char-set:full #t id-cmap #t))
|
||||||
|
|
||||||
|
(define ccp:upcase (proc->ccp char-upcase char-set:full))
|
||||||
|
(define ccp:downcase (proc->ccp char-downcase char-set:full))
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,235 @@
|
||||||
|
;;; This is a Scheme48 interface spec for the SRFI-1 list-lib package.
|
||||||
|
;;; It defines the LIST-LIB-INTERFACE interface and LIST-LIB structure.
|
||||||
|
;;; Bindings are typed as tightly as one can in Scheme48's type language.
|
||||||
|
;;; -Olin Shivers
|
||||||
|
;;; shivers@ai.mit.edu
|
||||||
|
|
||||||
|
;;; list-lib
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; xcons cons* make-list list-tabulate list-copy circular-list iota
|
||||||
|
;;; proper-list? circular-list? dotted-list? not-pair? null-list? list=
|
||||||
|
;;; first second third fourth fifth sixth seventh eighth ninth tenth
|
||||||
|
;;; car+cdr
|
||||||
|
;;; take drop
|
||||||
|
;;; take-right drop-right
|
||||||
|
;;; take! drop-right!
|
||||||
|
;;; last last-pair
|
||||||
|
;;; length+
|
||||||
|
;;; append! reverse! append-reverse append-reverse!
|
||||||
|
;;; zip unzip1 unzip2 unzip3 unzip4 unzip5
|
||||||
|
;;; count
|
||||||
|
;;; unfold unfold-right
|
||||||
|
;;; fold unfold pair-fold reduce
|
||||||
|
;;; fold-right unfold-right pair-fold-right reduce-right
|
||||||
|
;;; append-map append-map! map! pair-for-each filter-map map-in-order
|
||||||
|
;;; filter partition remove
|
||||||
|
;;; filter! partition! remove!
|
||||||
|
;;; find find-tail any every list-index
|
||||||
|
;;; delete delete! delete-duplicates delete-duplicates!
|
||||||
|
;;; alist-cons alist-copy
|
||||||
|
;;; alist-delete alist-delete!
|
||||||
|
;;;
|
||||||
|
;;; lset<= lset= lset-adjoin
|
||||||
|
;;; lset-union lset-union!
|
||||||
|
;;; lset-intersection lset-intersection!
|
||||||
|
;;; lset-difference lset-difference!
|
||||||
|
;;; lset-xor lset-xor!
|
||||||
|
;;; lset-diff+intersection lset-diff+intersection!
|
||||||
|
;;;
|
||||||
|
;;; map for-each member assoc (Extended R4RS procedures)
|
||||||
|
;;;
|
||||||
|
;;; cons pair? null? list length append reverse (These are the
|
||||||
|
;;; car cdr ... cdddar cddddr set-car! set-cdr! list-ref R4RS procedures
|
||||||
|
;;; memq memv assoc assq assv re-exported by
|
||||||
|
;;; list-lib unchanged.)
|
||||||
|
|
||||||
|
(define-interface list-lib-interface
|
||||||
|
(export
|
||||||
|
;; xcons <cdr> <car>
|
||||||
|
(xcons (proc (:value :value) :value))
|
||||||
|
|
||||||
|
;; cons* item ...
|
||||||
|
(cons* (proc (:value &rest :value) :value))
|
||||||
|
|
||||||
|
;; make-list len [fill]
|
||||||
|
(make-list (proc (:exact-integer &opt :value) :value))
|
||||||
|
|
||||||
|
;; list-tabulate elt-proc len
|
||||||
|
(list-tabulate (proc (:exact-integer (proc (:exact-integer) :value)) :value))
|
||||||
|
|
||||||
|
;; list-copy lis
|
||||||
|
(list-copy (proc (:value) :value))
|
||||||
|
|
||||||
|
(circular-list (proc (:value &opt :value) :pair))
|
||||||
|
|
||||||
|
; ((:iota iota:)
|
||||||
|
; (proc (:number &opt :number :number) :value))
|
||||||
|
|
||||||
|
(iota (proc (:exact-integer &opt :number :number) :value))
|
||||||
|
|
||||||
|
(proper-list? (proc (:value) :boolean))
|
||||||
|
(dotted-list? (proc (:value) :boolean))
|
||||||
|
(circular-list? (proc (:value) :boolean))
|
||||||
|
|
||||||
|
(not-pair? (proc (:value) :boolean))
|
||||||
|
(null-list? (proc (:value) :boolean))
|
||||||
|
|
||||||
|
(list= (proc ((proc (:value :value) :boolean) &rest :value) :boolean))
|
||||||
|
|
||||||
|
((first second third fourth fifth sixth seventh eighth ninth tenth)
|
||||||
|
(proc (:pair) :value))
|
||||||
|
|
||||||
|
(car+cdr (proc (:pair) (some-values :value :value)))
|
||||||
|
|
||||||
|
;; take lis i take-right lis i
|
||||||
|
;; drop lis i drop-right lis i
|
||||||
|
;; take! lis i drop-right! lis i
|
||||||
|
((take drop take-right drop-right take! drop-right!)
|
||||||
|
(proc (:value :exact-integer) :value))
|
||||||
|
|
||||||
|
(last (proc (:pair) :value))
|
||||||
|
(last-pair (proc (:pair) :pair))
|
||||||
|
|
||||||
|
(length+ (proc (:value) :value))
|
||||||
|
(append! (proc (:value &rest :value) :value))
|
||||||
|
(reverse! (proc (:value) :value))
|
||||||
|
((append-reverse append-reverse!) (proc (:value :value) :value))
|
||||||
|
|
||||||
|
(zip (proc (:value &rest :value) :value))
|
||||||
|
(unzip1 (proc (:value) :value))
|
||||||
|
(unzip2 (proc (:value) (some-values :value :value)))
|
||||||
|
(unzip3 (proc (:value) (some-values :value :value :value)))
|
||||||
|
(unzip4 (proc (:value) (some-values :value :value :value :value)))
|
||||||
|
(unzip5 (proc (:value) (some-values :value :value :value :value :value)))
|
||||||
|
|
||||||
|
(count (proc ((proc (:value) :boolean) :value) :exact-integer))
|
||||||
|
|
||||||
|
((fold fold-right)
|
||||||
|
(proc ((proc (:value :value &rest :value) :value)
|
||||||
|
:value :value &rest :value)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
((unfold unfold-right) (proc ((proc (:value) :boolean)
|
||||||
|
(proc (:value) :value)
|
||||||
|
(proc (:value) :value)
|
||||||
|
:value
|
||||||
|
&opt (proc (:value) :value))
|
||||||
|
:value))
|
||||||
|
|
||||||
|
((pair-fold pair-fold-right)
|
||||||
|
(proc ((proc (:pair :value &rest :value) :value)
|
||||||
|
:value :value &rest :value)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
((reduce reduce-right)
|
||||||
|
(proc ((proc (:value :value) :value) :value :value) :value))
|
||||||
|
|
||||||
|
((append-map append-map! map! filter-map map-in-order)
|
||||||
|
(proc ((proc (:value &rest :value) :value) :value &rest :value) :value))
|
||||||
|
|
||||||
|
(pair-for-each (proc ((proc (:pair &rest :pair) :values) :value &rest :value)
|
||||||
|
:unspecific))
|
||||||
|
|
||||||
|
((filter filter! remove remove!)
|
||||||
|
(proc ((proc (:value) :boolean) :value) :value))
|
||||||
|
|
||||||
|
((partition partition!) (proc ((proc (:value) :boolean) :value)
|
||||||
|
(some-values :value :value)))
|
||||||
|
|
||||||
|
((find find-tail) (proc ((proc (:value) :boolean) :value) :value))
|
||||||
|
|
||||||
|
((any every)
|
||||||
|
(proc ((proc (:value &rest :value) :value) :value &rest :value) :value))
|
||||||
|
|
||||||
|
(list-index (proc ((proc (:value &rest :value) :value) :value &rest :value)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
((delete delete!)
|
||||||
|
(proc (:value :value &opt (proc (:value :value) :boolean)) :value))
|
||||||
|
|
||||||
|
;; Extended from their R5RS definitions to take an optional comparison
|
||||||
|
;; function: (MEMBER x lis [=]).
|
||||||
|
(member (proc (:value :value &opt (proc (:value :value) :boolean)) :value))
|
||||||
|
(assoc (proc (:value :value &opt (proc (:value :value) :boolean)) :value))
|
||||||
|
|
||||||
|
((delete-duplicates delete-duplicates!)
|
||||||
|
(proc (:value &opt (proc (:value :value) :boolean)) :value))
|
||||||
|
|
||||||
|
(alist-cons (proc (:value :value :value) :value))
|
||||||
|
(alist-copy (proc (:value) :value))
|
||||||
|
((alist-delete alist-delete!)
|
||||||
|
(proc (:value :value &opt (proc (:value :value) :value)) :value))
|
||||||
|
|
||||||
|
;; Extended from their R4RS definitions.
|
||||||
|
(map (proc ((proc (:value) :value) &rest :value) :value))
|
||||||
|
(for-each (proc ((proc (:value) :values) &rest :value) :unspecific))
|
||||||
|
|
||||||
|
;; R4RS exports
|
||||||
|
(cons (proc (:value :value) :pair))
|
||||||
|
((pair? null?) (proc (:value) :boolean))
|
||||||
|
(list-ref (proc (:value :exact-integer) :value))
|
||||||
|
(list (proc (&rest :value) :value))
|
||||||
|
(length (proc (:value) :exact-integer))
|
||||||
|
(append (proc (&rest :value) :value))
|
||||||
|
(reverse (proc (:value) :value))
|
||||||
|
((car cdr
|
||||||
|
caaaar caaadr caadar caaddr caaar caadr caar
|
||||||
|
cadaar cadadr caddar cadddr cadar caddr cadr
|
||||||
|
cdaaar cdaadr cdadar cdaddr cdaar cdadr cdar
|
||||||
|
cddaar cddadr cdddar cddddr cddar cdddr cddr) (proc (:value) :value))
|
||||||
|
((set-car! set-cdr!) (proc (:pair :value) :unspecific))
|
||||||
|
((memq memv) (proc (:value :value) :value))
|
||||||
|
((assq assv) (proc (:value :value) :value))
|
||||||
|
|
||||||
|
|
||||||
|
;; lset-lib
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; lset<= = list1 list2 ...
|
||||||
|
;; lset= = list1 list2 ...
|
||||||
|
;; lset-adjoin = list elt1 ...
|
||||||
|
;; lset-union = list1 ...
|
||||||
|
;; lset-intersection = list1 list2 ...
|
||||||
|
;; lset-difference = list1 list2 ...
|
||||||
|
;; lset-xor = list1 ...
|
||||||
|
;; lset-diff+intersection = list1 list2 ...
|
||||||
|
;; ... and their side effecting counterparts:
|
||||||
|
;; lset-union! lset-intersection! lset-difference! lset-xor!
|
||||||
|
;; lset-diff+intersection!
|
||||||
|
|
||||||
|
;; lset= = list1 ... -> boolean
|
||||||
|
;; lset<= = list1 ... -> boolean
|
||||||
|
((lset= lset<=)
|
||||||
|
(proc ((proc (:value :value) :boolean) &rest :value) :boolean))
|
||||||
|
|
||||||
|
;; lset-adjoin = list elt1 ...
|
||||||
|
(lset-adjoin (proc ((proc (:value :value) :boolean) :value &rest :value) :value))
|
||||||
|
|
||||||
|
;; lset-union = list1 ... lset-xor = list1 ...
|
||||||
|
;; lset-union! = list1 ... lset-xor! = list1 ...
|
||||||
|
((lset-union lset-xor)
|
||||||
|
(proc ((proc (:value :value) :boolean) &rest :value) :value))
|
||||||
|
|
||||||
|
;; lset-intersection = list1 list2 ...
|
||||||
|
;; lset-intersection! = list1 list2 ...
|
||||||
|
;; lset-difference = list1 list2 ...
|
||||||
|
;; lset-difference! = list1 list2 ...
|
||||||
|
((lset-intersection lset-difference
|
||||||
|
lset-intersection! lset-difference!)
|
||||||
|
(proc ((proc (:value :value) :boolean) :value &rest :value) :value))
|
||||||
|
|
||||||
|
;; lset-diff+intersection = list1 list2 ...
|
||||||
|
;; lset-diff+intersection! = list1 list2 ...
|
||||||
|
((lset-diff+intersection lset-diff+intersection!)
|
||||||
|
(proc ((proc (:value :value) :boolean) :value &rest :value)
|
||||||
|
(some-values :value :value)))
|
||||||
|
))
|
||||||
|
|
||||||
|
(define-structure list-lib list-lib-interface
|
||||||
|
(open error-package ; ERROR procedure
|
||||||
|
receiving ; RECEIVE m-v macro
|
||||||
|
let-opt ; LET-OPTIONALS and :OPTIONAL.
|
||||||
|
scheme)
|
||||||
|
(begin (define (check-arg pred val caller)
|
||||||
|
(let lp ((val val))
|
||||||
|
(if (pred val) val (lp (error "Bad argument" val pred caller))))))
|
||||||
|
(files list-lib))
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,315 @@
|
||||||
|
;;; string-lib
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; string-map string-map!
|
||||||
|
;;; string-fold string-fold-right
|
||||||
|
;;; string-unfold string-tabulate
|
||||||
|
;;; string-for-each string-iter
|
||||||
|
;;; string-every string-any
|
||||||
|
;;; string-compare string-compare-ci
|
||||||
|
;;; substring-compare substring-compare-ci
|
||||||
|
;;; string= string< string> string<= string>= string<>
|
||||||
|
;;; string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>
|
||||||
|
;;; substring= substring<> substring-ci= substring-ci<>
|
||||||
|
;;; substring< substring> substring-ci< substring-ci>
|
||||||
|
;;; substring<= substring>= substring-ci<= substring-ci>=
|
||||||
|
;;; string-upper-case? string-lower-case?
|
||||||
|
;;; capitalize-string capitalize-words string-downcase string-upcase
|
||||||
|
;;; capitalize-string! capitalize-words! string-downcase! string-upcase!
|
||||||
|
;;; string-take string-drop
|
||||||
|
;;; string-pad string-pad-right
|
||||||
|
;;; string-trim string-trim-right string-trim-both
|
||||||
|
;;; string-filter string-delete
|
||||||
|
;;; string-index string-index-right string-skip string-skip-right
|
||||||
|
;;; string-prefix-count string-prefix-count-ci
|
||||||
|
;;; string-suffix-count string-suffix-count-ci
|
||||||
|
;;; substring-prefix-count substring-prefix-count-ci
|
||||||
|
;;; substring-suffix-count substring-suffix-count-ci
|
||||||
|
;;; string-prefix? string-prefix-ci?
|
||||||
|
;;; string-suffix? string-suffix-ci?
|
||||||
|
;;; substring-prefix? substring-prefix-ci?
|
||||||
|
;;; substring-suffix? substring-suffix-ci?
|
||||||
|
;;; substring? substring-ci?
|
||||||
|
;;; string-fill! string-copy! string-copy substring
|
||||||
|
;;; string-reverse string-reverse! reverse-list->string
|
||||||
|
;;; string->list
|
||||||
|
;;; string-concat string-concat/shared string-append/shared
|
||||||
|
;;; xsubstring string-xcopy!
|
||||||
|
;;; string-null?
|
||||||
|
;;; join-strings
|
||||||
|
;;;
|
||||||
|
;;; string? make-string string string-length string-ref string-set!
|
||||||
|
;;; string-append list->string
|
||||||
|
|
||||||
|
(define-interface string-lib-interface
|
||||||
|
(export
|
||||||
|
;; string-map proc s [start end] -> s
|
||||||
|
(string-map (proc ((proc (:char) :char)
|
||||||
|
:string
|
||||||
|
&opt :exact-integer :exact-integer)
|
||||||
|
:string))
|
||||||
|
|
||||||
|
;; string-map! proc s [start end] -> unspecific
|
||||||
|
(string-map! (proc ((proc (:char) :values)
|
||||||
|
:string
|
||||||
|
&opt :exact-integer :exact-integer)
|
||||||
|
:unspecific))
|
||||||
|
|
||||||
|
;; string-fold kons knil s [start end] -> value
|
||||||
|
;; string-fold-right kons knil s [start end] -> value
|
||||||
|
((string-fold string-fold-right)
|
||||||
|
(proc ((proc (:char :value) :value)
|
||||||
|
:value :string
|
||||||
|
&opt :exact-integer :exact-integer)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
;; string-unfold p f g seed -> string
|
||||||
|
(string-unfold (proc ((proc (:value) :boolean)
|
||||||
|
(proc (:value) :char)
|
||||||
|
(proc (:value) :value)
|
||||||
|
:value)
|
||||||
|
:string))
|
||||||
|
|
||||||
|
; Enough is enough.
|
||||||
|
; ;; string-unfoldn p f g seed ... -> string
|
||||||
|
; (string-unfoldn (proc ((procedure :values :boolean)
|
||||||
|
; (procedure :values :char)
|
||||||
|
; (procedure :values :values)
|
||||||
|
; &rest :value)
|
||||||
|
; :string))
|
||||||
|
|
||||||
|
;; string-tabulate proc len -> string
|
||||||
|
(string-tabulate (proc ((proc (:exact-integer) :char) :exact-integer)
|
||||||
|
:string))
|
||||||
|
|
||||||
|
;; string-for-each proc s [start end] -> unspecific
|
||||||
|
;; string-iter proc s [start end] -> unspecific
|
||||||
|
((string-for-each string-iter)
|
||||||
|
(proc ((proc (:char) :values) :string &opt :exact-integer :exact-integer)
|
||||||
|
:unspecific))
|
||||||
|
|
||||||
|
;; string-every pred s [start end]
|
||||||
|
;; string-any pred s [start end]
|
||||||
|
(string-every
|
||||||
|
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
|
||||||
|
:boolean))
|
||||||
|
(string-any
|
||||||
|
(proc ((proc (:char) :boolean) :string &opt :exact-integer :exact-integer)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
;; string-compare string1 string2 lt-proc eq-proc gt-proc
|
||||||
|
;; string-compare-ci string1 string2 lt-proc eq-proc gt-proc
|
||||||
|
((string-compare string-compare-ci)
|
||||||
|
(proc (:string :string (proc (:exact-integer) :values)
|
||||||
|
(proc (:exact-integer) :values)
|
||||||
|
(proc (:exact-integer) :values))
|
||||||
|
:values))
|
||||||
|
|
||||||
|
;; substring-compare string1 start1 end1 string2 start2 end2 lt eq gt
|
||||||
|
;; substring-compare-ci string1 start1 end1 string2 start2 end2 lt eq gt
|
||||||
|
((substring-compare substring-compare-ci)
|
||||||
|
(proc (:string :exact-integer :exact-integer
|
||||||
|
:string :exact-integer :exact-integer
|
||||||
|
(proc (:exact-integer) :values)
|
||||||
|
(proc (:exact-integer) :values)
|
||||||
|
(proc (:exact-integer) :values))
|
||||||
|
:values))
|
||||||
|
|
||||||
|
;; string< string1 string2
|
||||||
|
((string= string< string> string<= string>= string<>
|
||||||
|
string-ci= string-ci< string-ci> string-ci<= string-ci>= string-ci<>)
|
||||||
|
(proc (&rest :string) :value))
|
||||||
|
|
||||||
|
;; substring< string1 start1 end1 string2 start2 end2
|
||||||
|
((substring= substring<> substring-ci= substring-ci<>
|
||||||
|
substring< substring> substring-ci< substring-ci>
|
||||||
|
substring<= substring>= substring-ci<= substring-ci>=)
|
||||||
|
(proc (:string :exact-integer :exact-integer
|
||||||
|
:string :exact-integer :exact-integer)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
;; string-upper-case? string [start end]
|
||||||
|
;; string-lower-case? string [start end]
|
||||||
|
((string-upper-case? string-lower-case?)
|
||||||
|
(proc (:string &opt :exact-integer :exact-integer) :boolean))
|
||||||
|
|
||||||
|
;; capitalize-string string [start end]
|
||||||
|
;; capitalize-words string [start end]
|
||||||
|
;; string-downcase string [start end]
|
||||||
|
;; string-upcase string [start end]
|
||||||
|
;; capitalize-string! string [start end]
|
||||||
|
;; capitalize-words! string [start end]
|
||||||
|
;; string-downcase! string [start end]
|
||||||
|
;; string-upcase! string [start end]
|
||||||
|
((capitalize-string capitalize-words string-downcase string-upcase)
|
||||||
|
(proc (:string &opt :exact-integer :exact-integer) :string))
|
||||||
|
((capitalize-string! capitalize-words! string-downcase! string-upcase!)
|
||||||
|
(proc (:string &opt :exact-integer :exact-integer) :unspecific))
|
||||||
|
|
||||||
|
;; string-take string nchars
|
||||||
|
;; string-drop string nchars
|
||||||
|
((string-take string-drop) (proc (:string :exact-integer) :string))
|
||||||
|
|
||||||
|
;; string-pad string k [char start end]
|
||||||
|
;; string-pad-right string k [char start end]
|
||||||
|
((string-pad string-pad-right)
|
||||||
|
(proc (:string :exact-integer &opt :char :exact-integer :exact-integer)
|
||||||
|
:string))
|
||||||
|
|
||||||
|
;; string-trim string [char/char-set/pred start end]
|
||||||
|
;; string-trim-right string [char/char-set/pred start end]
|
||||||
|
;; string-trim-both string [char/char-set/pred start end]
|
||||||
|
((string-trim string-trim-right string-trim-both)
|
||||||
|
(proc (:string &opt :value :exact-integer :exact-integer)
|
||||||
|
:string))
|
||||||
|
|
||||||
|
;; string-filter char/char-set/pred string [start end]
|
||||||
|
;; string-delete char/char-set/pred string [start end]
|
||||||
|
((string-filter string-delete)
|
||||||
|
(proc (:value :string &opt :exact-integer :exact-integer) :string))
|
||||||
|
|
||||||
|
;; string-index string char/char-set/pred [start end]
|
||||||
|
;; string-index-right string char/char-set/pred [end start]
|
||||||
|
;; string-skip string char/char-set/pred [start end]
|
||||||
|
;; string-skip-right string char/char-set/pred [end start]
|
||||||
|
((string-index string-index-right string-skip string-skip-right)
|
||||||
|
(proc (:string :value &opt :exact-integer :exact-integer)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
;; string-prefix-count string1 string2
|
||||||
|
;; string-suffix-count string1 string2
|
||||||
|
;; string-prefix-count-ci string1 string2
|
||||||
|
;; string-suffix-count-ci string1 string2
|
||||||
|
((string-prefix-count string-prefix-count-ci
|
||||||
|
string-suffix-count string-suffix-count-ci)
|
||||||
|
(proc (:string :string) :exact-integer))
|
||||||
|
|
||||||
|
;; substring-prefix-count string1 start1 end1 string2 start2 end2
|
||||||
|
;; substring-suffix-count string1 start1 end1 string2 start2 end2
|
||||||
|
;; substring-prefix-count-ci string1 start1 end1 string2 start2 end2
|
||||||
|
;; substring-suffix-count-ci string1 start1 end1 string2 start2 end2
|
||||||
|
((substring-prefix-count substring-prefix-count-ci
|
||||||
|
substring-suffix-count substring-suffix-count-ci)
|
||||||
|
(proc (:string :exact-integer :exact-integer
|
||||||
|
:string :exact-integer :exact-integer)
|
||||||
|
:exact-integer))
|
||||||
|
|
||||||
|
|
||||||
|
;; string-prefix? string1 string2
|
||||||
|
;; string-suffix? string1 string2
|
||||||
|
;; string-prefix-ci? string1 string2
|
||||||
|
;; string-suffix-ci? string1 string2
|
||||||
|
((string-prefix? string-prefix-ci?
|
||||||
|
string-suffix? string-suffix-ci?)
|
||||||
|
(proc (:string :string) :boolean))
|
||||||
|
|
||||||
|
;; substring-prefix? string1 start1 end1 string2 start2 end2
|
||||||
|
;; substring-suffix? string1 start1 end1 string2 start2 end2
|
||||||
|
;; substring-prefix-ci? string1 start1 end1 string2 start2 end2
|
||||||
|
;; substring-suffix-ci? string1 start1 end1 string2 start2 end2
|
||||||
|
((substring-prefix? substring-prefix-ci?
|
||||||
|
substring-suffix? substring-suffix-ci?)
|
||||||
|
(proc (:string :exact-integer :exact-integer
|
||||||
|
:string :exact-integer :exact-integer)
|
||||||
|
:boolean))
|
||||||
|
|
||||||
|
;; substring? pattern string [start end]
|
||||||
|
;; substring-ci? pattern string [start end]
|
||||||
|
((substring? substring-ci?)
|
||||||
|
(proc (:string :string &opt :exact-integer :exact-integer)
|
||||||
|
:value))
|
||||||
|
|
||||||
|
;; string-fill! string char [start end]
|
||||||
|
(string-fill! (proc (:string :char &opt :exact-integer :exact-integer)
|
||||||
|
:unspecific))
|
||||||
|
|
||||||
|
;; string-copy! to tstart from [fstart fend]
|
||||||
|
(string-copy! (proc (:string :exact-integer :string
|
||||||
|
&opt :exact-integer :exact-integer)
|
||||||
|
:unspecific))
|
||||||
|
|
||||||
|
;; string-copy s [start end] -> string
|
||||||
|
;; substring s start [end] -> string
|
||||||
|
(string-copy (proc (:string &opt :exact-integer :exact-integer) :string))
|
||||||
|
(substring (proc (:string :exact-integer &opt :exact-integer) :string))
|
||||||
|
|
||||||
|
;; string-reverse s [start end]
|
||||||
|
;; string-reverse! s [start end]
|
||||||
|
(string-reverse (proc (:string &opt :exact-integer :exact-integer) :string))
|
||||||
|
(string-reverse! (proc (:string &opt :exact-integer :exact-integer) :unspecific))
|
||||||
|
|
||||||
|
;; reverse-list->string char-list
|
||||||
|
;; string->list s [start end]
|
||||||
|
;; string-concat string-list
|
||||||
|
;; string-concat/shared string-list
|
||||||
|
;; string-append/shared s ...
|
||||||
|
(reverse-list->string (proc (:value) :string))
|
||||||
|
(string->list (proc (:string &opt :exact-integer :exact-integer) :value))
|
||||||
|
((string-concat string-concat/shared) (proc (:value) :string))
|
||||||
|
(string-append/shared (proc (&rest :string) :string))
|
||||||
|
|
||||||
|
;; xsubstring s from [to start end]
|
||||||
|
;; string-xcopy! target tstart s from [to start end]
|
||||||
|
(xsubstring (proc (:string :exact-integer &opt
|
||||||
|
:exact-integer :exact-integer :exact-integer)
|
||||||
|
:string))
|
||||||
|
(string-xcopy! (proc (:string :exact-integer :string :exact-integer &opt
|
||||||
|
:exact-integer :exact-integer :exact-integer)
|
||||||
|
:unspecific))
|
||||||
|
|
||||||
|
;; string-null? s
|
||||||
|
(string-null? (proc (:string) :boolean))
|
||||||
|
|
||||||
|
(join-strings (proc (:value &opt :string :symbol) :string))
|
||||||
|
|
||||||
|
;; Here are the R4RS procs
|
||||||
|
(string? (proc (:value) :boolean))
|
||||||
|
(make-string (proc (:exact-integer &opt :char) :string))
|
||||||
|
(string (proc (&rest :char) :string))
|
||||||
|
(string-length (proc (:string) :exact-integer))
|
||||||
|
(string-ref (proc (:string :exact-integer) :char))
|
||||||
|
(string-set! (proc (:string :exact-integer :char) :unspecific))
|
||||||
|
|
||||||
|
; Not provided by string-lib.
|
||||||
|
;((string=? string-ci=? string<? string-ci<?
|
||||||
|
; string>? string-ci>? string<=? string-ci<=?
|
||||||
|
; string>=? string-ci>=?) (proc (:string :string) :boolean))
|
||||||
|
|
||||||
|
;; These are the R4RS types for SUBSTRING, STRING-COPY, STRING-FILL!,
|
||||||
|
;; and STRING->LIST. The string-lib types are different -- extended.
|
||||||
|
;(substring (proc (:string :exact-integer :exact-integer) :string))
|
||||||
|
;(string-copy (proc (:string) :string))
|
||||||
|
;(string-fill! (proc (:string :char) :unspecific))
|
||||||
|
;(string->list (proc (:string) :value))
|
||||||
|
|
||||||
|
(string-append (proc (&rest :string) :string))
|
||||||
|
(list->string (proc (:value) :string))
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
;;; make-kmp-restart-vector
|
||||||
|
;;; parse-final-start+end
|
||||||
|
;;; parse-start+end
|
||||||
|
;;; check-substring-spec
|
||||||
|
|
||||||
|
(define-interface string-lib-internals-interface
|
||||||
|
(export
|
||||||
|
(parse-final-start+end (proc ((procedure :values :values) :string :value)
|
||||||
|
(some-values :exact-integer :exact-integer)))
|
||||||
|
(parse-start+end (proc ((procedure :values :values) :string :value)
|
||||||
|
(some-values :exact-integer :exact-integer :value)))
|
||||||
|
(check-substring-spec (proc ((procedure :values :values) :string :exact-integer :exact-integer)
|
||||||
|
:unspecific))
|
||||||
|
(make-kmp-restart-vector (proc (:string (proc (:char :char) :boolean))
|
||||||
|
:vector))))
|
||||||
|
|
||||||
|
|
||||||
|
(define-structures ((string-lib string-lib-interface)
|
||||||
|
(string-lib-internals string-lib-internals-interface))
|
||||||
|
(access scheme) ; Get at R5RS SUBSTRING
|
||||||
|
(open receiving ; RECEIVE
|
||||||
|
char-set-package; Various
|
||||||
|
error-package ; ERROR
|
||||||
|
let-opt ; LET-OPTIONALS :OPTIONAL
|
||||||
|
structure-refs ; STRUCTURE-REF
|
||||||
|
scheme)
|
||||||
|
(files string-lib))
|
|
@ -0,0 +1,578 @@
|
||||||
|
Todo:
|
||||||
|
parse-start+end parse-final-start+end need "string" in the name
|
||||||
|
Also, export macro binder.
|
||||||
|
What's up w/quotient? (quotient -1 3) = 0.
|
||||||
|
regexp-foldl
|
||||||
|
type regexp interface
|
||||||
|
land*
|
||||||
|
Let-optional:
|
||||||
|
A let-optional that parses a prefix of the args.
|
||||||
|
Arg checking forms that get used if it parses, but are not
|
||||||
|
applied to the default.
|
||||||
|
|
||||||
|
The Scheme Underground string library includes a rich set of operations
|
||||||
|
for manipulating strings. These are frequently useful for scripting and
|
||||||
|
other text-manipulation applications.
|
||||||
|
|
||||||
|
The library's design was influenced by the string libraries found in MIT
|
||||||
|
Scheme, Gambit, RScheme, MzScheme, slib, Common Lisp, Bigloo, guile, APL and
|
||||||
|
the SML standard basis. Some of the code bears a distant family relation to
|
||||||
|
the MIT Scheme implementation, and being derived from that code, is covered by
|
||||||
|
the MIT Scheme copyright (which is a fairly generic "free" copyright -- see
|
||||||
|
the source file for details). The fast KMP string-search code used in
|
||||||
|
SUBSTRING? was loosely adapted from old slib code by Stephen Bevan.
|
||||||
|
|
||||||
|
The library has the following design principles:
|
||||||
|
- *All* procedures involving character comparison are available in
|
||||||
|
both case-sensitive and case-insensitive forms.
|
||||||
|
|
||||||
|
- *All* functionality is available in substring and full-string forms.
|
||||||
|
|
||||||
|
- The procedures are spec'd so as to permit efficient implementation in a
|
||||||
|
Scheme that provided shared-text substrings (e.g., guile). This means that
|
||||||
|
you should not rely on many of the substring-selecting procedures to return
|
||||||
|
freshly-allocated strings. Careful attention is paid to the issue of which
|
||||||
|
procedures allocate fresh storage, and which are permitted to return results
|
||||||
|
that share storage with the arguments.
|
||||||
|
|
||||||
|
- Common Lisp theft:
|
||||||
|
+ inequality functions return mismatch index.
|
||||||
|
I generalised this so that this "protocol" is extended even to
|
||||||
|
the equality functions. This means that clients can be handed any generic
|
||||||
|
string-comparison function and rely on the meaning of the true value.
|
||||||
|
|
||||||
|
+ Common Lisp capitalisation definition
|
||||||
|
|
||||||
|
The library addresses some problems with the R5RS string procedures:
|
||||||
|
- Question marks after string-comparison functions (string=?, etc.)
|
||||||
|
This is inconsistent with numeric comparison functions, and ugly, too.
|
||||||
|
- String-comparison functions do not provide useful true value.
|
||||||
|
- STRING-COPY should have optional start/end args;
|
||||||
|
SUBSTRING shouldn't specify if it copies or returns shared bits.
|
||||||
|
- STRING-FILL! and STRING->LIST should take optional start/end args.
|
||||||
|
- No <> function provided.
|
||||||
|
|
||||||
|
In the following procedure specifications:
|
||||||
|
- Any S parameter is a string;
|
||||||
|
|
||||||
|
- START and END parameters are half-open string indices specifying
|
||||||
|
a substring within a string parameter; when optional, they default
|
||||||
|
to 0 and the length of the string, respectively. When specified, it
|
||||||
|
must be the case that 0 <= START <= END <= (string-length S), for
|
||||||
|
the corresponding parameter S. They typically restrict a procedure's
|
||||||
|
action to the indicated substring.
|
||||||
|
|
||||||
|
- A CHAR/CHAR-SET/PRED parameter is a value used to select/search
|
||||||
|
for a character in a string. If it is a character, it is used in
|
||||||
|
an equality test; if it is a character set, it is used as a
|
||||||
|
membership test; if it is a procedure, it is applied to the
|
||||||
|
characters as a test predicate.
|
||||||
|
|
||||||
|
This library contains a large number of procedures, but they follow
|
||||||
|
a consistent naming scheme. The names are composed of smaller lexemes
|
||||||
|
in a regular way that exposes the structure and relationships between the
|
||||||
|
procedures. This should help the programmer to recall or reconstitute the name
|
||||||
|
of the particular procedure that he needs when writing his own code. In
|
||||||
|
particular
|
||||||
|
- Procedures whose names end in "-ci" are case-insensitive variants.
|
||||||
|
- Procedures whose names end in "!" are side-effecting variants.
|
||||||
|
These procedures generally return an unspecified value.
|
||||||
|
- The order of common parameters is fairly consistent across the
|
||||||
|
different procedures.
|
||||||
|
|
||||||
|
For more text-manipulation functionality, see also the regular expression,
|
||||||
|
file-name, character set, and character->character partial map packages.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
* R4RS/R5RS procedures
|
||||||
|
|
||||||
|
The R4RS and R5RS reports define 22 string procedures. The string-lib
|
||||||
|
package includes 8 of these exactly as defined, 4 in an extended,
|
||||||
|
backwards-compatible way, and drops the remaining 10 (whose functionality
|
||||||
|
is available via other bindings).
|
||||||
|
|
||||||
|
The 8 procedures provided exactly as documented in the reports are
|
||||||
|
string?
|
||||||
|
make-string
|
||||||
|
string
|
||||||
|
string-length
|
||||||
|
string-ref
|
||||||
|
string-set!
|
||||||
|
string-append
|
||||||
|
list->string
|
||||||
|
|
||||||
|
The ten functions not included are the R4RS string-comparison functions:
|
||||||
|
string=? string-ci=?
|
||||||
|
string<? string-ci<?
|
||||||
|
string>? string-ci>?
|
||||||
|
string<=? string-ci<=?
|
||||||
|
string>=? string-ci>=?
|
||||||
|
The string-lib package provides alternate bindings.
|
||||||
|
|
||||||
|
Additionally, the four extended procedures are
|
||||||
|
|
||||||
|
string-fill! s char [start end] -> unspecific
|
||||||
|
string->list s [start end] -> char-list
|
||||||
|
substring s start [end] -> string
|
||||||
|
string-copy s [start end] -> string
|
||||||
|
|
||||||
|
These procedures are documented in the following section. In brief, they are
|
||||||
|
extended to take optional start/end parameters specifying substring ranges;
|
||||||
|
Additionally, SUBSTRING is allowed to return a value that shares storage with
|
||||||
|
its argument.
|
||||||
|
|
||||||
|
|
||||||
|
* Procedures
|
||||||
|
|
||||||
|
These procedures are contained in the Scheme 48 package "string-lib",
|
||||||
|
which is open in the default user package. They are not found in the
|
||||||
|
"scsh" package; script writers and other programmers that use the Scheme
|
||||||
|
48 module system must open string-lib explicitly.
|
||||||
|
|
||||||
|
string-map proc s [start end] -> string
|
||||||
|
string-map! proc s [start end] -> unspecified
|
||||||
|
PROC is a char->char procedure; it is mapped over S.
|
||||||
|
Note: no sequence order is specified.
|
||||||
|
|
||||||
|
string-fold kons knil s [start end] -> value
|
||||||
|
string-fold-right kons knil s [start end] -> value
|
||||||
|
These are the fundamental iterators for strings.
|
||||||
|
The left-fold operator maps the KONS procedure across the
|
||||||
|
string from left to right
|
||||||
|
(... (kons s[2] (kons s[1] (kons s[0] knil))))
|
||||||
|
In other words, string-fold obeys the recursion
|
||||||
|
(string-fold kons knil s start end) =
|
||||||
|
(string-fold kons (kons s[start] knil) start+1 end)
|
||||||
|
|
||||||
|
The right-fold operator maps the KONS procedure across the
|
||||||
|
string from right to left
|
||||||
|
(kons s[0] (... (kons s[end-3] (kons s[end-2] (kons s[end-1] knil)))))
|
||||||
|
obeying the recursion
|
||||||
|
(string-fold-right kons knil s start end) =
|
||||||
|
(string-fold-right kons (kons s[end-1] knil) start end-1)
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
To convert a string to a list of chars:
|
||||||
|
(string-fold-right cons '() s)
|
||||||
|
|
||||||
|
To count the number of lower-case characters in a string:
|
||||||
|
(string-fold (lambda (c count)
|
||||||
|
(if (char-set-contains? char-set:lower c)
|
||||||
|
(+ count 1)
|
||||||
|
count))
|
||||||
|
0
|
||||||
|
s)
|
||||||
|
|
||||||
|
string-unfold p f g seed -> string
|
||||||
|
This is the fundamental constructor for strings.
|
||||||
|
- G is used to generate a series of "seed" values from the initial seed:
|
||||||
|
SEED, (G SEED), (G^2 SEED), (G^3 SEED), ...
|
||||||
|
- P tells us when to stop -- when it returns true when applied to one
|
||||||
|
of these seed values.
|
||||||
|
- F maps each seed value to the corresponding character
|
||||||
|
in the result string.
|
||||||
|
|
||||||
|
More precisely, the following (simple, inefficient) definition holds:
|
||||||
|
(define (string-unfold p f g seed)
|
||||||
|
(if (p seed) ""
|
||||||
|
(string-append (string (f seed))
|
||||||
|
(string-unfold p f g (g seed)))))
|
||||||
|
|
||||||
|
STRING-UNFOLD is a fairly powerful constructor -- you can use it to
|
||||||
|
reverse a string, copy a string, convert a list to a string, read
|
||||||
|
a port into a string, and so forth. Examples:
|
||||||
|
(port->string p) = (string-unfold eof-object? values
|
||||||
|
(lambda (x) (read-char p))
|
||||||
|
(read-char p))
|
||||||
|
|
||||||
|
(list->string lis) = (string-unfold null? car cdr lis)
|
||||||
|
|
||||||
|
(tabulate-string f size) = (string-unfold (lambda (i) (= i size)) f add1 0)
|
||||||
|
|
||||||
|
To map F over a list LIS, producing a string:
|
||||||
|
(string-unfold null? (compose f car) cdr lis)
|
||||||
|
|
||||||
|
string-tabulate proc len -> string
|
||||||
|
PROC is an integer->char procedure. Construct a string of size LEN
|
||||||
|
by applying PROC to each index to produce the corresponding string
|
||||||
|
element. The order in which PROC is applied to the indices is not
|
||||||
|
specified.
|
||||||
|
|
||||||
|
string-for-each proc s [start end] -> unspecified
|
||||||
|
string-iter proc s [start end] -> unspecified
|
||||||
|
Apply PROC to each character in S.
|
||||||
|
STRING-FOR-EACH has no specified iteration order.
|
||||||
|
STRING-ITER is required to iterate from START to END
|
||||||
|
in increasing order.
|
||||||
|
|
||||||
|
string-every? pred s [start end] -> boolean
|
||||||
|
string-any? pred s [start end] -> value
|
||||||
|
Note: no sequence order specified.
|
||||||
|
Checks to see if predicate PRED is true of every / any character in S.
|
||||||
|
STRING-ANY? is witness-generating -- it applies PRED to the elements
|
||||||
|
of S, returning the first true value it finds, otherwise false.
|
||||||
|
|
||||||
|
string-compare s1 s2 lt-proc eq-proc gt-proc -> values
|
||||||
|
string-compare-ci s1 s2 lt-proc eq-proc gt-proc -> values
|
||||||
|
Apply LT-PROC, EQ-PROC, GT-PROC to the mismatch index, depending
|
||||||
|
upon whether S1 is less than, equal to, or greater than S2.
|
||||||
|
The "mismatch index" is the largest index i such that for
|
||||||
|
every 0 <= j < i, s1[j] = s2[j] -- that is, I is the first
|
||||||
|
position that doesn't match. If S1 = S2, the mismatch index
|
||||||
|
is simply the length of the strings; we observe the protocol
|
||||||
|
in this redundant case for uniformity.
|
||||||
|
|
||||||
|
substring-compare s1 start1 end1 s2 start2 end2 lt-proc eq-proc gt-proc -> values
|
||||||
|
substring-compare-ci s1 start1 end1 s2 start2 end2 lt-proc eq-proc gt-proc -> values
|
||||||
|
The continuation procedures are applied to S1's mismatch index (as defined
|
||||||
|
above). In the case of EQ-PROC, this is always END1.
|
||||||
|
|
||||||
|
string= s1 s2 -> #f or integer
|
||||||
|
string<> s1 s2 -> #f or integer
|
||||||
|
string< s1 s2 -> #f or integer
|
||||||
|
string> s1 s2 -> #f or integer
|
||||||
|
string<= s1 s2 -> #f or integer
|
||||||
|
string>= s1 s2 -> #f or integer
|
||||||
|
If the comparison operation is true, the function returns the
|
||||||
|
mismatch index (as defined for the previous comparator functions).
|
||||||
|
|
||||||
|
string-ci= s1 s2 -> #f or integer
|
||||||
|
string-ci<> s1 s2 -> #f or integer
|
||||||
|
string-ci< s1 s2 -> #f or integer
|
||||||
|
string-ci> s1 s2 -> #f or integer
|
||||||
|
string-ci<= s1 s2 -> #f or integer
|
||||||
|
string-ci>= s1 s2 -> #f or integer
|
||||||
|
Case-insensitive variants.
|
||||||
|
|
||||||
|
substring= s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring<> s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring< s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring> s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring<= s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring>= s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
|
||||||
|
substring-ci= s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring-ci<> s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring-ci< s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring-ci> s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring-ci<= s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
substring-ci>= s1 start1 end1 s2 start2 end2 -> #f or integer
|
||||||
|
These variants restrict the comparison to the indicated
|
||||||
|
substrings of S1 and S2.
|
||||||
|
|
||||||
|
string-upper-case? s [start end] -> boolean
|
||||||
|
string-lower-case? s [start end] -> boolean
|
||||||
|
STRING-UPPER-CASE? returns true iff the string contains
|
||||||
|
no lower-case characters. STRING-LOWER-CASE returns true
|
||||||
|
iff the string contains no upper-case characters.
|
||||||
|
(string-upper-case? "") => #t
|
||||||
|
(string-lower-case? "") => #t
|
||||||
|
(string-upper-case? "FOOb") => #f
|
||||||
|
(string-upper-case? "U.S.A.") => #t
|
||||||
|
|
||||||
|
capitalize-string s [start end] -> string
|
||||||
|
capitalize-string! s [start end] -> unspecified
|
||||||
|
Capitalize the string: upcase the first alphanumeric character,
|
||||||
|
and downcase the rest of the string. CAPITALIZE-STRING returns
|
||||||
|
a freshly allocated string.
|
||||||
|
|
||||||
|
(capitalize-string "--capitalize tHIS sentence.") =>
|
||||||
|
"--Capitalize this sentence."
|
||||||
|
|
||||||
|
(capitalize-string "see Spot run. see Nix run.") =>
|
||||||
|
"See spot run. see nix run."
|
||||||
|
|
||||||
|
(capitalize-string "3com makes routers.") =>
|
||||||
|
"3com makes routers."
|
||||||
|
|
||||||
|
capitalize-words s [start end] -> string
|
||||||
|
capitalize-words! s [start end] -> unspecified
|
||||||
|
A "word" is a maximal contiguous sequence of alphanumeric characters.
|
||||||
|
Upcase the first character of every word; downcase the rest of the word.
|
||||||
|
CAPITALIZE-WORDS returns a freshly allocated string.
|
||||||
|
|
||||||
|
(capitalize-words "HELLO, 3THErE, my nAME IS olin") =>
|
||||||
|
"Hello, 3there, My Name Is Olin"
|
||||||
|
|
||||||
|
More sophisticated capitalisation procedures can be synthesized
|
||||||
|
using CAPITALIZE-STRING and pattern matchers. In this context,
|
||||||
|
the REGEXP-SUBSTITUTE/GLOBAL procedure may be useful for picking
|
||||||
|
out the units to be capitalised and applying CAPITALIZE-STRING to
|
||||||
|
their components.
|
||||||
|
|
||||||
|
string-upcase s [start end] -> string
|
||||||
|
string-upcase! s [start end] -> unspecified
|
||||||
|
string-downcase s [start end] -> string
|
||||||
|
string-downcase! s [start end] -> unspecified
|
||||||
|
Raise or lower the case of the alphabetic characters in the string.
|
||||||
|
STRING-UPCASE and STRING-DOWNCASE return freshly allocated strings.
|
||||||
|
|
||||||
|
string-take s nchars -> string
|
||||||
|
string-drop s nchars -> string
|
||||||
|
string-take-right s nchars -> string
|
||||||
|
string-drop-right s nchars -> string
|
||||||
|
STRING-TAKE returns the first NCHARS of STRING;
|
||||||
|
STRING-DROP returns all but the first NCHARS of STRING.
|
||||||
|
STRING-TAKE-RIGHT returns the last NCHARS of STRING;
|
||||||
|
STRING-DROP-RIGHT returns all but the last NCHARS of STRING.
|
||||||
|
These generalise MIT Scheme's HEAD & TAIL functions.
|
||||||
|
If these procedures produce the entire string, they may return either
|
||||||
|
S or a copy of S; in some implementations, proper substrings may share
|
||||||
|
memory with S.
|
||||||
|
|
||||||
|
string-pad s k [char start end] -> string
|
||||||
|
string-pad-right s k [char start end] -> string
|
||||||
|
Build a string of length K comprised of S padded on the left (right)
|
||||||
|
by as many occurences of the character CHAR as needed. If S has more
|
||||||
|
than K chars, it is truncated on the left (right) to length k. CHAR
|
||||||
|
defaults to #\space.
|
||||||
|
|
||||||
|
If K is exactly the length of S, these functions may return
|
||||||
|
either S or a copy of S.
|
||||||
|
|
||||||
|
string-trim s [char/char-set/pred start end] -> string
|
||||||
|
string-trim-right s [char/char-set/pred start end] -> string
|
||||||
|
string-trim-both s [char/char-set/pred start end] -> string
|
||||||
|
Trim S by skipping over all characters on the left / on the right /
|
||||||
|
on both sides that satisfy the second parameter CHAR/CHAR-SET/PRED:
|
||||||
|
- If it is a character CHAR, characters equal to CHAR are trimmed.
|
||||||
|
- If it is a char set CHAR-SET, characters contained in CHAR-SET
|
||||||
|
are trimmed.
|
||||||
|
- If it is a predicate PRED, it is a test predicate that is applied
|
||||||
|
to the characters in S; a character causing it to return true
|
||||||
|
is skipped.
|
||||||
|
CHAR/CHAR/SET-PRED defaults to CHAR-SET:WHITESPACE.
|
||||||
|
|
||||||
|
If no trimming occurs, these functions may return either S or a copy of S;
|
||||||
|
in some implementations, proper substrings may share memory with S.
|
||||||
|
|
||||||
|
(string-trim-both " The outlook wasn't brilliant, \n\r")
|
||||||
|
=> "The outlook wasn't brilliant,"
|
||||||
|
|
||||||
|
string-filter s char/char-set/pred [start end] -> string
|
||||||
|
string-delete s char/char-set/pred [start end] -> string
|
||||||
|
Filter the string S, retaining only those characters that
|
||||||
|
satisfy / do not satisfy the CHAR/CHAR-SET/PRED argument. If
|
||||||
|
this argument is a procedure, it is applied to the character
|
||||||
|
as a predicate; if it is a char-set, the character is tested
|
||||||
|
for membership; if it is a character, it is used in an equality test.
|
||||||
|
|
||||||
|
If the string is unaltered by the filtering operation, these
|
||||||
|
functions may return either S or a copy of S.
|
||||||
|
|
||||||
|
string-index s char/char-set/pred [start end] -> integer or #f
|
||||||
|
string-index-right s char/char-set/pred [end start] -> integer or #f
|
||||||
|
string-skip s char/char-set/pred [start end] -> integer or #f
|
||||||
|
string-skip-right s char/char-set/pred [end start] -> integer or #f
|
||||||
|
Note the inverted start/end ordering of index-right and skip-right's
|
||||||
|
parameters.
|
||||||
|
|
||||||
|
Index (index-right) searches through the string from the left (right),
|
||||||
|
returning the index of the first occurence of a character which
|
||||||
|
- equals CHAR/CHAR-SET/PRED (if it is a character);
|
||||||
|
- is in CHAR/CHAR-SET/PRED (if it is a char-set);
|
||||||
|
- satisfies the predicate CHAR/CHAR-SET/PRED (if it is a procedure).
|
||||||
|
If no match is found, the functions return false.
|
||||||
|
|
||||||
|
The skip functions are similar, but use the complement of the criteria:
|
||||||
|
they search for the first char that *doesn't* satisfy the test. E.g.,
|
||||||
|
to skip over initial whitespace, say
|
||||||
|
(cond ((string-skip s char-set:whitespace) =>
|
||||||
|
(lambda (i)
|
||||||
|
;; (string-ref s i) is not whitespace.
|
||||||
|
...)))
|
||||||
|
|
||||||
|
string-prefix-count s1 s2 -> integer
|
||||||
|
string-suffix-count s1 s2 -> integer
|
||||||
|
string-prefix-count-ci s1 s2 -> integer
|
||||||
|
string-suffix-count-ci s1 s2 -> integer
|
||||||
|
Return the length of the longest common prefix/suffix of the two strings.
|
||||||
|
This is equivalent to the "mismatch index" for the strings.
|
||||||
|
|
||||||
|
substring-prefix-count s1 start1 end1 s2 start2 end2 -> integer
|
||||||
|
substring-suffix-count s1 start1 end1 s2 start2 end2 -> integer
|
||||||
|
substring-prefix-count-ci s1 start1 end1 s2 start2 end2 -> integer
|
||||||
|
substring-suffix-count-ci s1 start1 end1 s2 start2 end2 -> integer
|
||||||
|
Substring variants.
|
||||||
|
|
||||||
|
string-prefix? s1 s2 -> boolean
|
||||||
|
string-suffix? s1 s2 -> boolean
|
||||||
|
string-prefix-ci? s1 s2 -> boolean
|
||||||
|
string-suffix-ci? s1 s2 -> boolean
|
||||||
|
Is S1 a prefix/suffix of S2?
|
||||||
|
|
||||||
|
substring-prefix? s1 start1 end1 s2 start2 end2 -> boolean
|
||||||
|
substring-suffix? s1 start1 end1 s2 start2 end2 -> boolean
|
||||||
|
substring-prefix-ci? s1 start1 end1 s2 start2 end2 -> boolean
|
||||||
|
substring-suffix-ci? s1 start1 end1 s2 start2 end2 -> boolean
|
||||||
|
Substring variants.
|
||||||
|
|
||||||
|
substring? s1 s2 [start end] -> integer or false
|
||||||
|
substring-ci? s1 s2 [start end] -> integer or false
|
||||||
|
Return the index in S2 where S1 occurs as a substring, or false.
|
||||||
|
The returned index is in the range [start,end).
|
||||||
|
The current implementation uses the Knuth-Morris-Pratt algorithm.
|
||||||
|
|
||||||
|
string-fill! s char [start end] -> unspecified
|
||||||
|
Store CHAR into the elements of S.
|
||||||
|
This is the R4RS procedure extended to have optional START/END parameters.
|
||||||
|
|
||||||
|
string-copy! target tstart s [start end] -> unspecified
|
||||||
|
Copy the sequence of characters from index range [START,END) in
|
||||||
|
string S to string TARGET, beginning at index TSTART. The characters
|
||||||
|
are copied left-to-right or right-to-left as needed -- the copy is
|
||||||
|
guaranteed to work, even if TARGET and S are the same string.
|
||||||
|
|
||||||
|
substring s start [end] -> string
|
||||||
|
string-copy s [start end] -> string
|
||||||
|
These R4RS procedures are extended to have optional START/END parameters.
|
||||||
|
Use STRING-COPY when you want to indicate explicitly in your code that you
|
||||||
|
wish to allocate new storage; use SUBSTRING when you don't care if you
|
||||||
|
get a fresh copy or share storage with the original string.
|
||||||
|
E.g.:
|
||||||
|
(string-copy "Beta substitution") => "Beta substitution"
|
||||||
|
(string-copy "Beta substitution" 1 10)
|
||||||
|
=> "eta subst"
|
||||||
|
(string-copy "Beta substitution" 5) => "substitution"
|
||||||
|
|
||||||
|
SUBSTRING may return a value with shares memory with S.
|
||||||
|
|
||||||
|
string-reverse s [start end] -> string
|
||||||
|
string-reverse! s [start end] -> unspecific
|
||||||
|
Reverse the string.
|
||||||
|
|
||||||
|
reverse-list->string char-list -> string
|
||||||
|
An efficient implementation of (compose string->list reverse):
|
||||||
|
(reverse-list->string '(#\a #\B #\c)) -> "cBa"
|
||||||
|
This is a common idiom in the epilog of string-processing loops
|
||||||
|
that accumulate an answer in a reverse-order list.
|
||||||
|
|
||||||
|
string-concat string-list -> string
|
||||||
|
Append the elements of STRING-LIST together into a single list.
|
||||||
|
Guaranteed to return a freshly allocated list. Appears sufficiently
|
||||||
|
often as to warrant being named.
|
||||||
|
|
||||||
|
string-concat/shared string-list -> string
|
||||||
|
string-append/shared s ... -> string
|
||||||
|
These two procedures are variants of STRING-CONCAT and STRING-APPEND
|
||||||
|
that are permitted to return results that share storage with their
|
||||||
|
parameters. In particular, if STRING-APPEND/SHARED is applied to just
|
||||||
|
one argument, it may return exactly that argument, whereas STRING-APPEND
|
||||||
|
is required to allocate a fresh string.
|
||||||
|
|
||||||
|
string->list s [start end] -> char-list
|
||||||
|
The R5RS STRING->LIST procedure is extended to take optional START/END
|
||||||
|
arguments.
|
||||||
|
|
||||||
|
string-null? s -> bool
|
||||||
|
Is S the empty string?
|
||||||
|
|
||||||
|
xsubstring s from [to start end] -> string
|
||||||
|
This is the "extended substring" procedure that implements replicated
|
||||||
|
copying of a substring of some string.
|
||||||
|
|
||||||
|
S is a string; START and END are optional arguments that demarcate
|
||||||
|
a substring of S, defaulting to 0 and the length of S (e.g., the whole
|
||||||
|
string). Replicate this substring up and down index space, in both the
|
||||||
|
positive and negative directions. For example, if S = "abcdefg", START=3,
|
||||||
|
and END=6, then we have the conceptual bidirectionally-infinite string
|
||||||
|
... d e f d e f d e f d e f d e f d e f d e f ...
|
||||||
|
... -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 ...
|
||||||
|
XSUBSTRING returns the substring of this string beginning at index FROM,
|
||||||
|
and ending at TO (which defaults to FROM+(END-START)).
|
||||||
|
|
||||||
|
You can use XSUBSTRING to perform a variety of tasks:
|
||||||
|
- To rotate a string left: (xsubstring "abcdef" 2) => "cdefab"
|
||||||
|
- To rotate a string right: (xsubstring "abcdef" -2) => "efabcd"
|
||||||
|
- To replicate a string: (xsubstring "abc" 0 7) => "abcabca"
|
||||||
|
|
||||||
|
Note that
|
||||||
|
- The FROM/TO indices give a half-open range -- the characters from
|
||||||
|
index FROM up to, but not including, index TO.
|
||||||
|
- The FROM/TO indices are not in terms of the index space for string S.
|
||||||
|
They are in terms of the replicated index space of the substring
|
||||||
|
defined by S, START, and END.
|
||||||
|
|
||||||
|
It is an error if START=END -- although this is allowed by special
|
||||||
|
dispensation when FROM=TO.
|
||||||
|
|
||||||
|
string-xcopy! target tstart s sfrom [sto start end] -> unspecific
|
||||||
|
Exactly the same as XSUBSTRING, but the extracted text is written
|
||||||
|
into the string TARGET starting at index TSTART.
|
||||||
|
This operation is not defined if (EQ? TARGET S) -- you cannot copy
|
||||||
|
a string on top of itself.
|
||||||
|
|
||||||
|
|
||||||
|
* Lower-level procedures
|
||||||
|
|
||||||
|
The following procedures are useful for writing other string-processing
|
||||||
|
functions, and are contained in the string-lib-internals package.
|
||||||
|
|
||||||
|
parse-start+end proc s args -> [start end rest]
|
||||||
|
parse-final-start+end proc s args -> [start end]
|
||||||
|
PARSE-START+END may be used to parse a pair of optional START/END arguments
|
||||||
|
from an argument list, defaulting them to 0 and the length of some string
|
||||||
|
S, respectively. Let the length of string S be SLEN.
|
||||||
|
- If ARGS = (), the function returns (values 0 slen '())
|
||||||
|
- If ARGS = (i), I is checked to ensure it is an integer, and
|
||||||
|
that 0 <= i <= slen. Returns (values i slen (cdr rest)).
|
||||||
|
- If ARGS = (i j ...), I and J are checked to ensure they are
|
||||||
|
integers, and that 0 <= i <= j <= slen. Returns (values i j (cddr rest)).
|
||||||
|
If any of the checks fail, an error condition is raised, and PROC is used
|
||||||
|
as part of the error condition -- it should be the name of the client
|
||||||
|
procedure whose argument list PARSE-START+END is parsing.
|
||||||
|
|
||||||
|
parse-final-start+end is exactly the same, except that the args list
|
||||||
|
passed to it is required to be of length two or less; if it is longer,
|
||||||
|
an error condition is raised. It may be used when the optional START/END
|
||||||
|
parameters are final arguments to the procedure.
|
||||||
|
|
||||||
|
check-substring-spec proc s start end -> unspecific
|
||||||
|
Check values START and END to ensure they specify a valid substring
|
||||||
|
in S. This means that START and END are exact integers, and
|
||||||
|
0 <= START <= END <= (STRING-LENGTH S)
|
||||||
|
If this is not the case, an error condition is raised. PROC is used
|
||||||
|
as part of error condition, and should be the procedure whose START/END
|
||||||
|
parameters we are checking.
|
||||||
|
|
||||||
|
make-kmp-restart-vector s c= -> vector
|
||||||
|
Build the Knuth-Morris-Pratt "restart vector," which is useful
|
||||||
|
for quickly searching character sequences for the occurrence of
|
||||||
|
string S. C= is a character-equality function used to construct
|
||||||
|
the restart vector; it is usefully CHAR=? or CHAR-CI=?.
|
||||||
|
|
||||||
|
The definition of the restart vector RV for string S is:
|
||||||
|
If we have matched chars 0..i-1 of S against some search string SS, and
|
||||||
|
S[i] doesn't match SS[k], then reset i := RV[i], and try again to
|
||||||
|
match SS[k]. If RV[i] = -1, then punt SS[k] completely, and move on to
|
||||||
|
SS[k+1] and S[0].
|
||||||
|
|
||||||
|
In other words, if you have matched the first i chars of S, but
|
||||||
|
the i+1'th char doesn't match, RV[i] tells you what the next-longest
|
||||||
|
prefix of PATTERN is that you have matched.
|
||||||
|
|
||||||
|
The following string-search function shows how a restart vector
|
||||||
|
is used to search. It can be easily adapted to search other character
|
||||||
|
sequences (such as ports).
|
||||||
|
|
||||||
|
(define (find-substring pattern source start end)
|
||||||
|
(let ((plen (string-length pattern))
|
||||||
|
(rv (make-kmp-restart-vector pattern char=?)))
|
||||||
|
|
||||||
|
;; The search loop. SJ & PJ are redundant state.
|
||||||
|
(let lp ((si start) (pi 0)
|
||||||
|
(sj (- end start)) ; (- end si) -- how many chars left.
|
||||||
|
(pj plen)) ; (- plen pi) -- how many chars left.
|
||||||
|
|
||||||
|
(if (= pi plen) (- si plen) ; Win.
|
||||||
|
|
||||||
|
(and (<= pj sj) ; Lose.
|
||||||
|
|
||||||
|
(if (char=? (string-ref source si) ; Search.
|
||||||
|
(string-ref pattern pi))
|
||||||
|
(lp (+ 1 si) (+ 1 pi) (- sj 1) (- pj 1)) ; Advance.
|
||||||
|
|
||||||
|
(let ((pi (vector-ref rv pi))) ; Retreat.
|
||||||
|
(if (= pi -1)
|
||||||
|
(lp (+ si 1) 0 (- sj 1) plen) ; Punt.
|
||||||
|
(lp si pi sj (- plen pi))))))))))
|
Loading…
Reference in New Issue