1999-09-15 10:27:23 -04:00
|
|
|
;;; Char->char partial maps -*- Scheme -*-
|
|
|
|
;;; Copyright (C) 1998 by Olin Shivers.
|
|
|
|
|
|
|
|
;;; CCPs are an efficient data structure for doing simple string transforms,
|
|
|
|
;;; similar to the kinds of things you would do with the tr(1) program.
|
|
|
|
;;;
|
|
|
|
;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit
|
|
|
|
;;; character types would need a more sophisticated data structure, tuned
|
|
|
|
;;; for sparseness. I would suggest something like this:
|
|
|
|
;;; (define-record ccp
|
|
|
|
;;; domain ; The domain char-set
|
|
|
|
;;; map ; Sorted vector of (char . string) pairs
|
|
|
|
;;; ; specifying the map.
|
|
|
|
;;; id?) ; If true, mappings not specified by MAP are
|
|
|
|
;;; ; identity mapping. If false, MAP must
|
|
|
|
;;; ; specify a mapping for every char in DOMAIN.
|
|
|
|
;;;
|
|
|
|
;;; A (char . string) elements in MAP specifies a mapping for the contiguous
|
|
|
|
;;; sequence of L chars beginning with CHAR (in the sequence of the underlying
|
|
|
|
;;; char type representation), where L is the length of STRING. These MAP elements
|
|
|
|
;;; are sorted by CHAR, so that binary search can be used to get from an input
|
|
|
|
;;; character C to the right MAP element quickly.
|
|
|
|
;;;
|
|
|
|
;;; This representation should be reasonably compact for standard mappings on,
|
|
|
|
;;; say, a Unicode CCP. An implementation might wish to have a cache field
|
|
|
|
;;; in the record for storing the full 8kb bitset when performing ccp-map
|
|
|
|
;;; operations. Or, an implementation might want to store the Latin-1 subset
|
|
|
|
;;; of the map in a dense format, and keep the remainder in a sparse format.
|
|
|
|
|
|
|
|
(define num-chars (char-set-size char-set:full)) ; AKA 256.
|
|
|
|
|
|
|
|
(define-record ccp
|
|
|
|
domain ; The domain char-set
|
|
|
|
dshared? ; Is the domain value shared or linear?
|
|
|
|
map ; 256-elt string
|
|
|
|
mshared?) ; Is the map string shared or linear?
|
|
|
|
|
|
|
|
|
|
|
|
;;; Accessors and setters that manage the linear bookkeeping
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
(define (ccp-domain ccp)
|
|
|
|
(set-ccp:dshared? ccp #t)
|
|
|
|
(ccp:domain ccp))
|
|
|
|
|
|
|
|
;;; CCP is a linear ccp. PROC is a domain->domain function; it must be
|
|
|
|
;;; linear in its parameter and result.
|
|
|
|
;;;
|
|
|
|
;;; Updates the domain of the CCP with PROC, returns the resulting
|
|
|
|
;;; CCP; reuses the old one to construct the new one.
|
|
|
|
|
|
|
|
(define (restrict-linear-ccp-domain ccp proc)
|
|
|
|
(let ((new-d (proc (if (ccp:dshared? ccp)
|
|
|
|
(begin (set-ccp:dshared? ccp #f)
|
|
|
|
(char-set-copy (ccp:domain ccp)))
|
|
|
|
(ccp:domain ccp)))))
|
|
|
|
(set-ccp:domain ccp new-d)
|
|
|
|
ccp))
|
|
|
|
|
|
|
|
;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function.
|
|
|
|
;;; It is passed a linear domain and cmap string. It may side-effect
|
|
|
|
;;; the cmap string, and returns the resulting updated domain.
|
|
|
|
;;; We return the resulting CCP, reusing the parameter to construct it.
|
|
|
|
|
|
|
|
(define (linear-update-ccp ccp proc)
|
|
|
|
(let* ((cmap (if (ccp:mshared? ccp)
|
|
|
|
(begin (set-ccp:mshared? ccp #f)
|
|
|
|
(string-copy (ccp:map ccp)))
|
|
|
|
(ccp:map ccp)))
|
|
|
|
|
|
|
|
(new-d (proc (if (ccp:dshared? ccp)
|
|
|
|
(begin (set-ccp:dshared? ccp #f)
|
|
|
|
(char-set-copy (ccp:domain ccp)))
|
|
|
|
(ccp:domain ccp))
|
|
|
|
cmap)))
|
|
|
|
(set-ccp:domain ccp new-d)
|
|
|
|
ccp))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Return CCP's map field, and mark it as shared. CCP functions that
|
|
|
|
;;; restrict a ccp's domain share map strings, so they use this guy.
|
|
|
|
(define (ccp:map/shared ccp)
|
|
|
|
(set-ccp:mshared? ccp #t)
|
|
|
|
(ccp:map ccp))
|
|
|
|
|
|
|
|
(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f
|
|
|
|
(string-copy (ccp:map ccp)) #f))
|
|
|
|
|
|
|
|
;;; N-ary equality relation for partial maps
|
|
|
|
|
|
|
|
(define (ccp= ccp1 . rest)
|
|
|
|
(let ((domain (ccp:domain ccp1))
|
|
|
|
(cmap (ccp:map ccp1)))
|
|
|
|
(every (lambda (ccp2)
|
|
|
|
(and (char-set= domain (ccp:domain ccp2))
|
|
|
|
(let ((cmap2 (ccp:map ccp2)))
|
2001-03-11 13:52:59 -05:00
|
|
|
(char-set-every (lambda (c)
|
|
|
|
(let ((i (char->ascii c)))
|
|
|
|
(char=? (string-ref cmap i)
|
|
|
|
(string-ref cmap2 i))))
|
|
|
|
domain))))
|
1999-09-15 10:27:23 -04:00
|
|
|
rest)))
|
|
|
|
|
|
|
|
|
|
|
|
;;; N-ary subset relation for partial maps
|
|
|
|
|
|
|
|
(define (ccp<= ccp1 . rest)
|
|
|
|
(let lp ((domain1 (ccp:domain ccp1))
|
|
|
|
(cmap1 (ccp:map ccp1))
|
|
|
|
(rest rest))
|
|
|
|
(or (not (pair? rest))
|
|
|
|
(let* ((ccp2 (car rest))
|
|
|
|
(domain2 (ccp:domain ccp2))
|
|
|
|
(cmap2 (ccp:map ccp2))
|
|
|
|
(rest (cdr rest)))
|
|
|
|
(and (char-set<= domain1 domain2)
|
|
|
|
(let ((cmap2 (ccp:map ccp2)))
|
2001-03-11 13:52:59 -05:00
|
|
|
(char-set-every (lambda (c)
|
|
|
|
(let ((i (char->ascii c)))
|
|
|
|
(char=? (string-ref cmap1 i)
|
|
|
|
(string-ref cmap2 i))))
|
|
|
|
domain1))
|
1999-09-15 10:27:23 -04:00
|
|
|
(lp domain2 cmap2 rest))))))
|
|
|
|
|
|
|
|
|
|
|
|
;;; CCP iterators
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
(define (ccp-fold kons knil ccp)
|
|
|
|
(let ((cmap (ccp:map ccp)))
|
|
|
|
(char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v))
|
|
|
|
knil
|
|
|
|
(ccp:domain ccp))))
|
|
|
|
|
|
|
|
(define (ccp-for-each proc ccp)
|
|
|
|
(let ((cmap (ccp:map ccp)))
|
|
|
|
(char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c))))
|
|
|
|
(ccp:domain ccp))))
|
|
|
|
|
|
|
|
(define (ccp->alist ccp)
|
|
|
|
(ccp-fold (lambda (from to alist) (cons (cons from to) alist))
|
|
|
|
'()
|
|
|
|
ccp))
|
|
|
|
|
|
|
|
|
|
|
|
;;; CCP-RESTRICT
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; Restrict a ccp's domain.
|
|
|
|
|
|
|
|
(define (ccp-restrict ccp cset)
|
|
|
|
(make-ccp (char-set-intersection cset (ccp:domain ccp))
|
|
|
|
#f
|
|
|
|
(ccp:map/shared ccp)
|
|
|
|
#t))
|
|
|
|
|
|
|
|
(define (ccp-restrict! ccp cset)
|
|
|
|
(restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset))))
|
|
|
|
|
|
|
|
|
|
|
|
;;; CCP-ADJOIN ccp from-char1 to-char1 ...
|
|
|
|
;;; CCP-DELETE ccp char1 ...
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; Add & delete mappings to/from a ccp.
|
|
|
|
|
|
|
|
(define (ccp-delete ccp . chars)
|
|
|
|
(make-ccp (apply char-set-delete (ccp:domain ccp) chars)
|
|
|
|
#f
|
|
|
|
(ccp:map/shared ccp)
|
|
|
|
#t))
|
|
|
|
|
|
|
|
(define (ccp-delete! ccp . chars)
|
|
|
|
(restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars))))
|
|
|
|
|
|
|
|
|
|
|
|
(define (ccp-adjoin ccp . chars)
|
|
|
|
(let ((cmap (string-copy (ccp:map ccp))))
|
|
|
|
(make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars)
|
|
|
|
#f
|
|
|
|
cmap
|
|
|
|
#f)))
|
|
|
|
|
|
|
|
(define (ccp-adjoin! ccp . chars)
|
|
|
|
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars))))
|
|
|
|
|
|
|
|
(define (install-ccp-adjoin! cmap domain chars)
|
|
|
|
(let lp ((chars chars) (d domain))
|
|
|
|
(if (pair? chars)
|
|
|
|
(let ((from (car chars))
|
|
|
|
(to (cadr chars))
|
|
|
|
(chars (cddr chars)))
|
|
|
|
(string-set! cmap (char->ascii from) to)
|
|
|
|
(lp chars (char-set-adjoin! d from)))
|
|
|
|
d)))
|
|
|
|
|
|
|
|
|
|
|
|
;;; CCP-EXTEND ccp1 ...
|
|
|
|
;;; CCP-EXTEND! ccp1 ccp2 ...
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; Extend ccp1 with ccp2, etc.
|
|
|
|
|
|
|
|
(define (ccp-extend . ccps)
|
|
|
|
(if (pair? ccps)
|
|
|
|
(let ((ccp0 (car ccps))
|
|
|
|
(ccps (cdr ccps)))
|
|
|
|
(if (pair? ccps)
|
|
|
|
(let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap.
|
|
|
|
;; The FOLD installs each ccp in CCPS into CMAP and produces
|
|
|
|
;; the new domain.
|
|
|
|
(make-ccp (fold (lambda (ccp d)
|
|
|
|
(install-ccp-extension! cmap d ccp))
|
|
|
|
(char-set-copy (ccp:domain ccp0))
|
|
|
|
ccps)
|
|
|
|
#f cmap #f))
|
|
|
|
|
|
|
|
ccp0)) ; Only 1 parameter
|
|
|
|
|
|
|
|
ccp:0)) ; 0 parameters
|
|
|
|
|
|
|
|
(define (ccp-extend! ccp0 . ccps)
|
|
|
|
(linear-update-ccp ccp0
|
|
|
|
(lambda (domain cmap)
|
|
|
|
(fold (lambda (ccp d) (install-ccp-extension! cmap d ccp))
|
|
|
|
domain
|
|
|
|
ccps))))
|
|
|
|
|
|
|
|
|
|
|
|
;;; Side-effect CMAP, linear-update and return DOMAIN.
|
|
|
|
(define (install-ccp-extension! cmap domain ccp)
|
|
|
|
(let ((cmap1 (ccp:map ccp))
|
|
|
|
(domain1 (ccp:domain ccp)))
|
|
|
|
(char-set-for-each (lambda (c)
|
|
|
|
(let ((i (char->ascii c)))
|
|
|
|
(string-set! cmap i (string-ref cmap1 i))))
|
|
|
|
domain1)
|
|
|
|
(char-set-union! domain domain1)))
|
|
|
|
|
|
|
|
|
|
|
|
;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1.
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; For each character C-IN in the original domain, we push it
|
|
|
|
;;; through the pipeline of CCPs. If we ever land outside the
|
|
|
|
;;; domain of a ccp, we punt C-IN. If we push it all the way
|
|
|
|
;;; through, we add C-IN to our result domain, and add the mapping
|
|
|
|
;;; into the cmap we are assembling.
|
|
|
|
;;;
|
|
|
|
;;; Looping this way avoids building up intermediate temporary
|
|
|
|
;;; CCPs. If CCP's were small bitsets, we might be better off
|
|
|
|
;;; slicing the double-nested loops the other way around.
|
|
|
|
|
|
|
|
(define (ccp-compose . ccps)
|
|
|
|
(cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1
|
|
|
|
((not (pair? (cdr ccps))) (car ccps)) ; 1 arg
|
|
|
|
(else
|
|
|
|
(let* ((v (list->vector ccps))
|
|
|
|
(vlen-2 (- (vector-length v) 2))
|
|
|
|
(cmap (make-string num-chars))
|
|
|
|
(d1 (ccp:domain (vector-ref v (+ vlen-2 1))))
|
|
|
|
(d (char-set-fold (lambda (c-in d)
|
|
|
|
(let lp ((c c-in) (i vlen-2))
|
|
|
|
(if (>= i 0)
|
|
|
|
(let ((ccp (vector-ref v i)))
|
|
|
|
(if (char-set-contains? (ccp:domain ccp) c)
|
|
|
|
(lp (string-ref (ccp:map ccp)
|
|
|
|
(char->ascii c))
|
|
|
|
(- i 1))
|
|
|
|
|
|
|
|
;; Lose: remove c-in from d.
|
|
|
|
(char-set-delete! d c-in)))
|
|
|
|
|
|
|
|
;; Win: C-IN -> C
|
|
|
|
(begin (string-set! cmap
|
|
|
|
(char->ascii c-in)
|
|
|
|
c)
|
|
|
|
d))))
|
|
|
|
(char-set-copy d1)
|
|
|
|
d1)))
|
|
|
|
(make-ccp d #f cmap #f)))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; ALIST->CPP
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
(define (alist->ccp cc-alist . maybe-base-ccp)
|
|
|
|
(let ((base (:optional maybe-base-ccp ccp:0)))
|
|
|
|
(if (pair? cc-alist)
|
|
|
|
(let ((cmap (string-copy (ccp:map base))))
|
|
|
|
(make-ccp (install-ccp-alist! cmap
|
|
|
|
(char-set-copy (ccp:domain base))
|
|
|
|
cc-alist)
|
|
|
|
#f cmap #f))
|
|
|
|
base)))
|
|
|
|
|
|
|
|
(define (alist->ccp! alist base)
|
|
|
|
(linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist))))
|
|
|
|
|
|
|
|
;;; Side-effect CMAP, linear-update and return DOMAIN.
|
|
|
|
(define (install-ccp-alist! cmap domain alist)
|
|
|
|
(fold (lambda (from/to d) (let ((from (car from/to))
|
|
|
|
(to (cdr from/to)))
|
|
|
|
(string-set! cmap (char->ascii from) to)
|
|
|
|
(char-set-adjoin! domain from)))
|
|
|
|
domain
|
|
|
|
alist))
|
|
|
|
|
|
|
|
|
|
|
|
;;; PROC->CCP
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; (proc->ccp proc [domain base-ccp])
|
|
|
|
|
|
|
|
(define (proc->ccp proc . args)
|
|
|
|
(let-optionals args ((proc-domain char-set:full)
|
|
|
|
(base ccp:0))
|
|
|
|
(let ((cmap (string-copy (ccp:map base))))
|
|
|
|
(make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base))
|
|
|
|
proc proc-domain)
|
|
|
|
#f cmap #f))))
|
|
|
|
|
|
|
|
(define (proc->ccp! proc proc-domain base)
|
|
|
|
(linear-update-ccp base
|
|
|
|
(lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain))))
|
|
|
|
|
|
|
|
(define (install-ccp-proc! cmap domain proc proc-domain)
|
|
|
|
(char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c)))
|
|
|
|
proc-domain)
|
|
|
|
(char-set-union! domain proc-domain))
|
|
|
|
|
|
|
|
|
|
|
|
;;; CONSTANT-CCP
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; (constant-ccp char [domain base-ccp])
|
|
|
|
;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR.
|
|
|
|
;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0.
|
|
|
|
|
|
|
|
(define (constant-ccp char . args)
|
|
|
|
(let-optionals args ((char-domain char-set:full) (base ccp:0))
|
|
|
|
(let ((cmap (string-copy (ccp:map base))))
|
|
|
|
(make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base))
|
|
|
|
char char-domain)
|
|
|
|
#f cmap #f))))
|
|
|
|
|
|
|
|
(define (constant-ccp! char char-domain base)
|
|
|
|
(linear-update-ccp base
|
|
|
|
(lambda (d cmap) (install-constant-ccp! cmap d char char-domain))))
|
|
|
|
|
|
|
|
;;; Install the constant mapping into CMAP0 by side-effect,
|
|
|
|
;;; linear-update & return DOMAIN0 with the constant-mapping's domain.
|
|
|
|
(define (install-constant-ccp! cmap0 domain0 char char-domain)
|
|
|
|
(char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char))
|
|
|
|
char-domain)
|
|
|
|
(char-set-union! domain0 char-domain))
|
|
|
|
|
|
|
|
|
|
|
|
;;; CCP/MAPPINGS
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp
|
|
|
|
;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp
|
|
|
|
;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp
|
|
|
|
;;; Each FROM element is either a string or a (lo-char . hi-char) range.
|
|
|
|
;;; Each TO element is either a string or a lo-char. Strings are replicated
|
|
|
|
;;; to match the length of the corresponding FROM element.
|
|
|
|
;;; CCP/MAPPINGS's base CCP is CCP:0
|
|
|
|
;;;
|
|
|
|
;;; Tedious code.
|
|
|
|
|
|
|
|
;;; Internal utility.
|
|
|
|
;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect.
|
|
|
|
;;; Return the new domain.
|
|
|
|
|
|
|
|
(define (install-ccp-mapping-pair! cmap domain from to)
|
|
|
|
;; Tedium -- four possibilities here:
|
|
|
|
;; str->str, str->lo-char,
|
|
|
|
;; range->str, range->lo-char.
|
|
|
|
(if (string? from)
|
|
|
|
(if (string? to)
|
|
|
|
;; "abc" -> "ABC"
|
|
|
|
(let ((len1 (string-length from))
|
|
|
|
(len2 (string-length to)))
|
|
|
|
(let lp2 ((i (- len1 1))
|
|
|
|
(j (modulo (- len2 1) len1))
|
|
|
|
(d domain))
|
|
|
|
(if (>= i 0)
|
|
|
|
(let ((c (string-ref from i)))
|
|
|
|
(string-set! cmap
|
|
|
|
(char->ascii c)
|
|
|
|
(string-ref to i))
|
|
|
|
(lp2 (- i 1)
|
|
|
|
(- (if (> j 0) j len2) 1)
|
|
|
|
(char-set-adjoin! d c)))
|
|
|
|
d)))
|
|
|
|
|
|
|
|
;; "abc" -> #\A
|
|
|
|
(let lp2 ((i (- (string-length from) 1))
|
|
|
|
(j (char->ascii to))
|
|
|
|
(d domain))
|
|
|
|
(if (>= i 0)
|
|
|
|
(let ((c (string-ref from i)))
|
|
|
|
(string-set! cmap
|
|
|
|
(char->ascii c)
|
|
|
|
(ascii->char j))
|
|
|
|
(lp2 (- i 1)
|
|
|
|
(- j 1)
|
|
|
|
(char-set-adjoin! d c)))
|
|
|
|
d)))
|
|
|
|
|
|
|
|
(let ((from-start (char->ascii (car from)))
|
|
|
|
(from-end (char->ascii (cdr from))))
|
|
|
|
(if (string? to)
|
|
|
|
(let ((len2-1 (- (string-length to) 1)))
|
|
|
|
;; (#\a . #\c) -> "ABC"
|
|
|
|
(let lp2 ((i from-start) (j 0) (d domain))
|
|
|
|
(if (<= i from-end)
|
|
|
|
(let ((c (string-ref to j)))
|
|
|
|
(string-set! cmap i c)
|
|
|
|
(lp2 (+ i 1)
|
|
|
|
(if (= j len2-1) 0 (+ j 1))
|
|
|
|
(char-set-adjoin! d c)))
|
|
|
|
d)))
|
|
|
|
|
|
|
|
;; (#\a . #\c) -> #\A
|
|
|
|
(do ((i from-start (+ i 1))
|
|
|
|
(j (char->ascii to) (+ j 1))
|
|
|
|
(d domain (begin (string-set! cmap i (ascii->char j))
|
|
|
|
(char-set-adjoin d (ascii->char i)))))
|
|
|
|
((> i from-end) d))))))
|
|
|
|
|
|
|
|
;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN.
|
|
|
|
(define (install-mapping-pairs cmap domain args)
|
|
|
|
(let lp ((domain domain) (args args))
|
|
|
|
(if (pair? args)
|
|
|
|
(lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args))
|
|
|
|
(cddr args))
|
|
|
|
domain)))
|
|
|
|
|
|
|
|
(define (ccp/mappings . args)
|
|
|
|
(let ((cmap (make-string num-chars)))
|
|
|
|
(make-ccp (install-mapping-pairs (make-string num-chars)
|
|
|
|
(char-set-copy char-set:empty)
|
|
|
|
args)
|
|
|
|
#f cmap #f)))
|
|
|
|
|
|
|
|
(define (extend-ccp/mappings base . args)
|
|
|
|
(let ((cmap (string-copy (ccp:map base))))
|
|
|
|
(make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args)
|
|
|
|
#f cmap #f)))
|
|
|
|
|
|
|
|
(define (extend-ccp/mappings! base . args)
|
|
|
|
(linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args))))
|
|
|
|
|
|
|
|
|
|
|
|
;;; CONSTRUCT-CCP! ccp elt ...
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; The kitchen-sink constructor; static typing be damned.
|
|
|
|
;;; ELTS are interpreted as follows:
|
|
|
|
;;; (lo-char . hi-char) to-string|lo-char ; ccp/range
|
|
|
|
;;; from-string to-string|lo-char ; ccp/range
|
|
|
|
;;; ccp ; ccp-extend
|
|
|
|
;;; alist ; alist->ccp
|
|
|
|
;;; domain char ; ccp-constant
|
|
|
|
;;; domain proc ; proc->ccp
|
|
|
|
|
|
|
|
(define (construct-ccp! ccp . elts)
|
|
|
|
(linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts))))
|
|
|
|
|
|
|
|
(define (construct-ccp base . elts)
|
|
|
|
(let ((cmap (string-copy (ccp:map base))))
|
|
|
|
(make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts)
|
|
|
|
#f cmap #f)))
|
|
|
|
|
|
|
|
;;; Install the mappings into CMAP by side-effect,
|
|
|
|
;;; linear-update & return DOMAIN with the final domain.
|
|
|
|
|
|
|
|
(define (install-ccp-construct! cmap domain elts)
|
|
|
|
(let lp ((d domain) (elts elts))
|
|
|
|
;(format #t "d=~s elts=~s\n" d elts)
|
|
|
|
(if (not (pair? elts)) d
|
|
|
|
(let ((elt (car elts))
|
|
|
|
(elts (cdr elts)))
|
|
|
|
(cond ((pair? elt)
|
|
|
|
(cond ((pair? (car elt)) ; ELT is an alist.
|
|
|
|
(lp (install-ccp-alist! cmap d elt) elts))
|
|
|
|
((char? (car elt)) ; ELT is (lo-char . hi-char) range.
|
|
|
|
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
|
|
|
|
(cdr elts)))
|
|
|
|
(else (error "Illegal elt to construct-ccp" elt))))
|
|
|
|
|
|
|
|
((string? elt)
|
|
|
|
(lp (install-ccp-mapping-pair! cmap d elt (car elts))
|
|
|
|
(cdr elts)))
|
|
|
|
|
|
|
|
((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts))
|
|
|
|
|
|
|
|
((char-set? elt)
|
|
|
|
(let ((elt2 (car elts))
|
|
|
|
(elts (cdr elts)))
|
|
|
|
(lp (cond ((char? elt2)
|
|
|
|
(install-constant-ccp! cmap d elt2 elt))
|
|
|
|
((procedure? elt2)
|
|
|
|
(install-ccp-proc! cmap d elt2 elt))
|
|
|
|
(else (error "Illegal elt-pair to construct-ccp"
|
|
|
|
elt elt2)))
|
|
|
|
elts)))
|
|
|
|
|
|
|
|
(else (error "Illegal elt to construct-ccp" elt)))))))
|
|
|
|
|
|
|
|
|
|
|
|
;;; CCP unfold
|
|
|
|
|
|
|
|
(define (ccp-unfold p f g seed)
|
|
|
|
(let lp ((seed seed) (ccp (ccp-copy ccp:0)))
|
|
|
|
(if (p seed) ccp
|
|
|
|
(lp (g seed)
|
|
|
|
(receive (from to) (f seed)
|
|
|
|
(lp (g seed) (ccp-adjoin! ccp from to)))))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; Using CCPs
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; TR ccp string [start end] -> string
|
|
|
|
;;; CCP-MAP ccp string [start end] -> string
|
|
|
|
;;; CCP-MAP! ccp string [start end] -> undefined
|
|
|
|
;;; CCP-APP ccp char -> char or false
|
|
|
|
|
|
|
|
;;; If a char in S is not in CCP's domain, it is dropped from the result.
|
|
|
|
;;; You can use this to map and delete chars from a string.
|
|
|
|
|
|
|
|
(define (tr ccp s . maybe-start+end)
|
|
|
|
(let-optionals maybe-start+end ((start 0) (end (string-length s)))
|
|
|
|
;; Count up the chars in S that are in the domain,
|
|
|
|
;; and allocate the answer string ANS:
|
|
|
|
(let* ((len (- end start))
|
|
|
|
(domain (ccp:domain ccp))
|
|
|
|
(ans-len (string-fold (lambda (c numchars)
|
|
|
|
(if (char-set-contains? domain c)
|
|
|
|
(+ numchars 1)
|
|
|
|
numchars))
|
|
|
|
0 s start end))
|
|
|
|
(ans (make-string ans-len)))
|
|
|
|
|
|
|
|
;; Apply the map, installing the resulting chars into ANS:
|
|
|
|
(string-fold (lambda (c i) (cond ((ccp-app ccp c) =>
|
|
|
|
(lambda (c)
|
|
|
|
(string-set! ans i c)
|
|
|
|
(+ i 1)))
|
|
|
|
(else i))) ; Not in domain -- drop it.
|
|
|
|
0 s start end)
|
|
|
|
ans)))
|
|
|
|
|
|
|
|
(define (ccp-map ccp s . maybe-start+end)
|
|
|
|
(apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end))
|
|
|
|
|
|
|
|
(define (ccp-map! ccp s . maybe-start+end)
|
|
|
|
(apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end))
|
|
|
|
|
|
|
|
(define (ccp-app ccp char)
|
|
|
|
(and (char-set-contains? (ccp:domain ccp) char)
|
|
|
|
(string-ref (ccp:map ccp) (char->ascii char))))
|
|
|
|
|
|
|
|
|
|
|
|
;;; Primitive CCPs
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
(define id-cmap
|
|
|
|
(let ((m (make-string num-chars)))
|
|
|
|
(do ((i (- num-chars 1) (- i 1)))
|
|
|
|
((< i 0))
|
|
|
|
(string-set! m i (ascii->char i)))
|
|
|
|
m))
|
|
|
|
|
|
|
|
(define ccp:0 (make-ccp char-set:empty #t id-cmap #t))
|
|
|
|
(define ccp:1 (make-ccp char-set:full #t id-cmap #t))
|
|
|
|
|
|
|
|
(define ccp:upcase (proc->ccp char-upcase char-set:full))
|
|
|
|
(define ccp:downcase (proc->ccp char-downcase char-set:full))
|