From b202a059d2ceb4d64ba0b1798bdfe70ca2da9ad3 Mon Sep 17 00:00:00 2001 From: shivers Date: Mon, 13 Sep 1999 17:45:20 +0000 Subject: [PATCH] Moved to lib directory. --- scsh/ccp.scm | 576 --------------------------------------------------- 1 file changed, 576 deletions(-) delete mode 100644 scsh/ccp.scm diff --git a/scsh/ccp.scm b/scsh/ccp.scm deleted file mode 100644 index 16dcece..0000000 --- a/scsh/ccp.scm +++ /dev/null @@ -1,576 +0,0 @@ -;;; Char->char partial maps -*- Scheme -*- -;;; Copyright (C) 1998 by Olin Shivers. - -;;; CCPs are an efficient data structure for doing simple string transforms, -;;; similar to the kinds of things you would do with the tr(1) program. -;;; -;;; This code is tuned for a 7- or 8-bit character type. Large, 16-bit -;;; character types would need a more sophisticated data structure, tuned -;;; for sparseness. I would suggest something like this: -;;; (define-record ccp -;;; domain ; The domain char-set -;;; map ; Sorted vector of (char . string) pairs -;;; ; specifying the map. -;;; id?) ; If true, mappings not specified by MAP are -;;; ; identity mapping. If false, MAP must -;;; ; specify a mapping for every char in DOMAIN. -;;; -;;; A (char . string) elements in MAP specifies a mapping for the contiguous -;;; sequence of L chars beginning with CHAR (in the sequence of the underlying -;;; char type representation), where L is the length of STRING. These MAP elements -;;; are sorted by CHAR, so that binary search can be used to get from an input -;;; character C to the right MAP element quickly. -;;; -;;; This representation should be reasonably compact for standard mappings on, -;;; say, a Unicode CCP. An implementation might wish to have a cache field -;;; in the record for storing the full 8kb bitset when performing ccp-map -;;; operations. Or, an implementation might want to store the Latin-1 subset -;;; of the map in a dense format, and keep the remainder in a sparse format. - -(define num-chars (char-set-size char-set:full)) ; AKA 256. - -(define-record ccp - domain ; The domain char-set - dshared? ; Is the domain value shared or linear? - map ; 256-elt string - mshared?) ; Is the map string shared or linear? - - -;;; Accessors and setters that manage the linear bookkeeping -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (ccp-domain ccp) - (set-ccp:dshared? ccp #t) - (ccp:domain ccp)) - -;;; CCP is a linear ccp. PROC is a domain->domain function; it must be -;;; linear in its parameter and result. -;;; -;;; Updates the domain of the CCP with PROC, returns the resulting -;;; CCP; reuses the old one to construct the new one. - -(define (restrict-linear-ccp-domain ccp proc) - (let ((new-d (proc (if (ccp:dshared? ccp) - (begin (set-ccp:dshared? ccp #f) - (char-set-copy (ccp:domain ccp))) - (ccp:domain ccp))))) - (set-ccp:domain ccp new-d) - ccp)) - -;;; CCP is a linear CCP. PROC is a domain x cmap -> domain function. -;;; It is passed a linear domain and cmap string. It may side-effect -;;; the cmap string, and returns the resulting updated domain. -;;; We return the resulting CCP, reusing the parameter to construct it. - -(define (linear-update-ccp ccp proc) - (let* ((cmap (if (ccp:mshared? ccp) - (begin (set-ccp:mshared? ccp #f) - (string-copy (ccp:map ccp))) - (ccp:map ccp))) - - (new-d (proc (if (ccp:dshared? ccp) - (begin (set-ccp:dshared? ccp #f) - (char-set-copy (ccp:domain ccp))) - (ccp:domain ccp)) - cmap))) - (set-ccp:domain ccp new-d) - ccp)) - - - -;;; Return CCP's map field, and mark it as shared. CCP functions that -;;; restrict a ccp's domain share map strings, so they use this guy. -(define (ccp:map/shared ccp) - (set-ccp:mshared? ccp #t) - (ccp:map ccp)) - -(define (ccp-copy ccp) (make-ccp (char-set-copy (ccp:domain ccp)) #f - (string-copy (ccp:map ccp)) #f)) - -;;; N-ary equality relation for partial maps - -(define (ccp= ccp1 . rest) - (let ((domain (ccp:domain ccp1)) - (cmap (ccp:map ccp1))) - (every (lambda (ccp2) - (and (char-set= domain (ccp:domain ccp2)) - (let ((cmap2 (ccp:map ccp2))) - (char-set-every? (lambda (c) - (let ((i (char->ascii c))) - (char=? (string-ref cmap i) - (string-ref cmap2 i)))) - domain)))) - rest))) - - -;;; N-ary subset relation for partial maps - -(define (ccp<= ccp1 . rest) - (let lp ((domain1 (ccp:domain ccp1)) - (cmap1 (ccp:map ccp1)) - (rest rest)) - (or (not (pair? rest)) - (let* ((ccp2 (car rest)) - (domain2 (ccp:domain ccp2)) - (cmap2 (ccp:map ccp2)) - (rest (cdr rest))) - (and (char-set<= domain1 domain2) - (let ((cmap2 (ccp:map ccp2))) - (char-set-every? (lambda (c) - (let ((i (char->ascii c))) - (char=? (string-ref cmap1 i) - (string-ref cmap2 i)))) - domain1)) - (lp domain2 cmap2 rest)))))) - - -;;; CCP iterators -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (ccp-fold kons knil ccp) - (let ((cmap (ccp:map ccp))) - (char-set-fold (lambda (c v) (kons c (string-ref cmap (char->ascii c)) v)) - knil - (ccp:domain ccp)))) - -(define (ccp-for-each proc ccp) - (let ((cmap (ccp:map ccp))) - (char-set-for-each (lambda (c) (proc c (string-ref cmap (char->ascii c)))) - (ccp:domain ccp)))) - -(define (ccp->alist ccp) - (ccp-fold (lambda (from to alist) (cons (cons from to) alist)) - '() - ccp)) - - -;;; CCP-RESTRICT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Restrict a ccp's domain. - -(define (ccp-restrict ccp cset) - (make-ccp (char-set-intersection cset (ccp:domain ccp)) - #f - (ccp:map/shared ccp) - #t)) - -(define (ccp-restrict! ccp cset) - (restrict-linear-ccp-domain ccp (lambda (d) (char-set-intersection! d cset)))) - - -;;; CCP-ADJOIN ccp from-char1 to-char1 ... -;;; CCP-DELETE ccp char1 ... -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Add & delete mappings to/from a ccp. - -(define (ccp-delete ccp . chars) - (make-ccp (apply char-set-delete (ccp:domain ccp) chars) - #f - (ccp:map/shared ccp) - #t)) - -(define (ccp-delete! ccp . chars) - (restrict-linear-ccp-domain ccp (lambda (d) (apply char-set-delete! d chars)))) - - -(define (ccp-adjoin ccp . chars) - (let ((cmap (string-copy (ccp:map ccp)))) - (make-ccp (install-ccp-adjoin! cmap (char-set-copy (ccp:domain ccp)) chars) - #f - cmap - #f))) - -(define (ccp-adjoin! ccp . chars) - (linear-update-ccp ccp (lambda (d cmap) (install-ccp-adjoin! cmap d chars)))) - -(define (install-ccp-adjoin! cmap domain chars) - (let lp ((chars chars) (d domain)) - (if (pair? chars) - (let ((from (car chars)) - (to (cadr chars)) - (chars (cddr chars))) - (string-set! cmap (char->ascii from) to) - (lp chars (char-set-adjoin! d from))) - d))) - - -;;; CCP-EXTEND ccp1 ... -;;; CCP-EXTEND! ccp1 ccp2 ... -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Extend ccp1 with ccp2, etc. - -(define (ccp-extend . ccps) - (if (pair? ccps) - (let ((ccp0 (car ccps)) - (ccps (cdr ccps))) - (if (pair? ccps) - (let ((cmap (string-copy (ccp:map ccp0)))) ; Copy cmap. - ;; The FOLD installs each ccp in CCPS into CMAP and produces - ;; the new domain. - (make-ccp (fold (lambda (ccp d) - (install-ccp-extension! cmap d ccp)) - (char-set-copy (ccp:domain ccp0)) - ccps) - #f cmap #f)) - - ccp0)) ; Only 1 parameter - - ccp:0)) ; 0 parameters - -(define (ccp-extend! ccp0 . ccps) - (linear-update-ccp ccp0 - (lambda (domain cmap) - (fold (lambda (ccp d) (install-ccp-extension! cmap d ccp)) - domain - ccps)))) - - -;;; Side-effect CMAP, linear-update and return DOMAIN. -(define (install-ccp-extension! cmap domain ccp) - (let ((cmap1 (ccp:map ccp)) - (domain1 (ccp:domain ccp))) - (char-set-for-each (lambda (c) - (let ((i (char->ascii c))) - (string-set! cmap i (string-ref cmap1 i)))) - domain1) - (char-set-union! domain domain1))) - - -;;; Compose the CCPs. 0-ary case: (ccp-compose) = ccp:1. -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; For each character C-IN in the original domain, we push it -;;; through the pipeline of CCPs. If we ever land outside the -;;; domain of a ccp, we punt C-IN. If we push it all the way -;;; through, we add C-IN to our result domain, and add the mapping -;;; into the cmap we are assembling. -;;; -;;; Looping this way avoids building up intermediate temporary -;;; CCPs. If CCP's were small bitsets, we might be better off -;;; slicing the double-nested loops the other way around. - -(define (ccp-compose . ccps) - (cond ((not (pair? ccps)) ccp:1) ; 0 args => ccp:1 - ((not (pair? (cdr ccps))) (car ccps)) ; 1 arg - (else - (let* ((v (list->vector ccps)) - (vlen-2 (- (vector-length v) 2)) - (cmap (make-string num-chars)) - (d1 (ccp:domain (vector-ref v (+ vlen-2 1)))) - (d (char-set-fold (lambda (c-in d) - (let lp ((c c-in) (i vlen-2)) - (if (>= i 0) - (let ((ccp (vector-ref v i))) - (if (char-set-contains? (ccp:domain ccp) c) - (lp (string-ref (ccp:map ccp) - (char->ascii c)) - (- i 1)) - - ;; Lose: remove c-in from d. - (char-set-delete! d c-in))) - - ;; Win: C-IN -> C - (begin (string-set! cmap - (char->ascii c-in) - c) - d)))) - (char-set-copy d1) - d1))) - (make-ccp d #f cmap #f))))) - - - -;;; ALIST->CPP -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (alist->ccp cc-alist . maybe-base-ccp) - (let ((base (:optional maybe-base-ccp ccp:0))) - (if (pair? cc-alist) - (let ((cmap (string-copy (ccp:map base)))) - (make-ccp (install-ccp-alist! cmap - (char-set-copy (ccp:domain base)) - cc-alist) - #f cmap #f)) - base))) - -(define (alist->ccp! alist base) - (linear-update-ccp base (lambda (d cmap) (install-ccp-alist! cmap d alist)))) - -;;; Side-effect CMAP, linear-update and return DOMAIN. -(define (install-ccp-alist! cmap domain alist) - (fold (lambda (from/to d) (let ((from (car from/to)) - (to (cdr from/to))) - (string-set! cmap (char->ascii from) to) - (char-set-adjoin! domain from))) - domain - alist)) - - -;;; PROC->CCP -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; (proc->ccp proc [domain base-ccp]) - -(define (proc->ccp proc . args) - (let-optionals args ((proc-domain char-set:full) - (base ccp:0)) - (let ((cmap (string-copy (ccp:map base)))) - (make-ccp (install-ccp-proc! cmap (char-set-copy (ccp:domain base)) - proc proc-domain) - #f cmap #f)))) - -(define (proc->ccp! proc proc-domain base) - (linear-update-ccp base - (lambda (d cmap) (install-ccp-proc! cmap d proc proc-domain)))) - -(define (install-ccp-proc! cmap domain proc proc-domain) - (char-set-for-each (lambda (c) (string-set! cmap (char->ascii c) (proc c))) - proc-domain) - (char-set-union! domain proc-domain)) - - -;;; CONSTANT-CCP -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; (constant-ccp char [domain base-ccp]) -;;; Extend BASE-CCP with the a map taking every char in DOMAIN to CHAR. -;;; DOMAIN defaults to char-set:full. BASE-CCP defaults to CCP:0. - -(define (constant-ccp char . args) - (let-optionals args ((char-domain char-set:full) (base ccp:0)) - (let ((cmap (string-copy (ccp:map base)))) - (make-ccp (install-constant-ccp! cmap (char-set-copy (ccp:domain base)) - char char-domain) - #f cmap #f)))) - -(define (constant-ccp! char char-domain base) - (linear-update-ccp base - (lambda (d cmap) (install-constant-ccp! cmap d char char-domain)))) - -;;; Install the constant mapping into CMAP0 by side-effect, -;;; linear-update & return DOMAIN0 with the constant-mapping's domain. -(define (install-constant-ccp! cmap0 domain0 char char-domain) - (char-set-for-each (lambda (c) (string-set! cmap0 (char->ascii c) char)) - char-domain) - (char-set-union! domain0 char-domain)) - - -;;; CCP/MAPPINGS -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; (ccp/mappings from1 to1 from2 to2 ...) -> ccp -;;; (extend-ccp/mappings base-ccp from1 to1 from2 to2 ...) -> ccp -;;; (extend-ccp/mappings! base-ccp from1 to1 from2 to2 ...) -> ccp -;;; Each FROM element is either a string or a (lo-char . hi-char) range. -;;; Each TO element is either a string or a lo-char. Strings are replicated -;;; to match the length of the corresponding FROM element. -;;; CCP/MAPPINGS's base CCP is CCP:0 -;;; -;;; Tedious code. - -;;; Internal utility. -;;; Install the FROM->TO mapping pair into DOMAIN & CMAP by side-effect. -;;; Return the new domain. - -(define (install-ccp-mapping-pair! cmap domain from to) - ;; Tedium -- four possibilities here: - ;; str->str, str->lo-char, - ;; range->str, range->lo-char. - (if (string? from) - (if (string? to) - ;; "abc" -> "ABC" - (let ((len1 (string-length from)) - (len2 (string-length to))) - (let lp2 ((i (- len1 1)) - (j (modulo (- len2 1) len1)) - (d domain)) - (if (>= i 0) - (let ((c (string-ref from i))) - (string-set! cmap - (char->ascii c) - (string-ref to i)) - (lp2 (- i 1) - (- (if (> j 0) j len2) 1) - (char-set-adjoin! d c))) - d))) - - ;; "abc" -> #\A - (let lp2 ((i (- (string-length from) 1)) - (j (char->ascii to)) - (d domain)) - (if (>= i 0) - (let ((c (string-ref from i))) - (string-set! cmap - (char->ascii c) - (ascii->char j)) - (lp2 (- i 1) - (- j 1) - (char-set-adjoin! d c))) - d))) - - (let ((from-start (char->ascii (car from))) - (from-end (char->ascii (cdr from)))) - (if (string? to) - (let ((len2-1 (- (string-length to) 1))) - ;; (#\a . #\c) -> "ABC" - (let lp2 ((i from-start) (j 0) (d domain)) - (if (<= i from-end) - (let ((c (string-ref to j))) - (string-set! cmap i c) - (lp2 (+ i 1) - (if (= j len2-1) 0 (+ j 1)) - (char-set-adjoin! d c))) - d))) - - ;; (#\a . #\c) -> #\A - (do ((i from-start (+ i 1)) - (j (char->ascii to) (+ j 1)) - (d domain (begin (string-set! cmap i (ascii->char j)) - (char-set-adjoin d (ascii->char i))))) - ((> i from-end) d)))))) - -;;; Internal utility -- side-effects CMAP; linear-updates & returns DOMAIN. -(define (install-mapping-pairs cmap domain args) - (let lp ((domain domain) (args args)) - (if (pair? args) - (lp (install-ccp-mapping-pair! cmap domain (car args) (cadr args)) - (cddr args)) - domain))) - -(define (ccp/mappings . args) - (let ((cmap (make-string num-chars))) - (make-ccp (install-mapping-pairs (make-string num-chars) - (char-set-copy char-set:empty) - args) - #f cmap #f))) - -(define (extend-ccp/mappings base . args) - (let ((cmap (string-copy (ccp:map base)))) - (make-ccp (install-mapping-pairs cmap (char-set-copy (ccp:domain base)) args) - #f cmap #f))) - -(define (extend-ccp/mappings! base . args) - (linear-update-ccp base (lambda (d cmap) (install-mapping-pairs cmap d args)))) - - -;;; CONSTRUCT-CCP! ccp elt ... -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; The kitchen-sink constructor; static typing be damned. -;;; ELTS are interpreted as follows: -;;; (lo-char . hi-char) to-string|lo-char ; ccp/range -;;; from-string to-string|lo-char ; ccp/range -;;; ccp ; ccp-extend -;;; alist ; alist->ccp -;;; domain char ; ccp-constant -;;; domain proc ; proc->ccp - -(define (construct-ccp! ccp . elts) - (linear-update-ccp ccp (lambda (d cmap) (install-ccp-construct! cmap d elts)))) - -(define (construct-ccp base . elts) - (let ((cmap (string-copy (ccp:map base)))) - (make-ccp (install-ccp-construct! cmap (char-set-copy (ccp:domain base)) elts) - #f cmap #f))) - -;;; Install the mappings into CMAP by side-effect, -;;; linear-update & return DOMAIN with the final domain. - -(define (install-ccp-construct! cmap domain elts) - (let lp ((d domain) (elts elts)) - ;(format #t "d=~s elts=~s\n" d elts) - (if (not (pair? elts)) d - (let ((elt (car elts)) - (elts (cdr elts))) - (cond ((pair? elt) - (cond ((pair? (car elt)) ; ELT is an alist. - (lp (install-ccp-alist! cmap d elt) elts)) - ((char? (car elt)) ; ELT is (lo-char . hi-char) range. - (lp (install-ccp-mapping-pair! cmap d elt (car elts)) - (cdr elts))) - (else (error "Illegal elt to construct-ccp" elt)))) - - ((string? elt) - (lp (install-ccp-mapping-pair! cmap d elt (car elts)) - (cdr elts))) - - ((ccp? elt) (lp (install-ccp-extension! cmap d elt) elts)) - - ((char-set? elt) - (let ((elt2 (car elts)) - (elts (cdr elts))) - (lp (cond ((char? elt2) - (install-constant-ccp! cmap d elt2 elt)) - ((procedure? elt2) - (install-ccp-proc! cmap d elt2 elt)) - (else (error "Illegal elt-pair to construct-ccp" - elt elt2))) - elts))) - - (else (error "Illegal elt to construct-ccp" elt))))))) - - -;;; CCP unfold - -(define (ccp-unfold p f g seed) - (let lp ((seed seed) (ccp (ccp-copy ccp:0))) - (if (p seed) ccp - (lp (g seed) - (receive (from to) (f seed) - (lp (g seed) (ccp-adjoin! ccp from to))))))) - - - -;;; Using CCPs -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; TR ccp string [start end] -> string -;;; CCP-MAP ccp string [start end] -> string -;;; CCP-MAP! ccp string [start end] -> undefined -;;; CCP-APP ccp char -> char or false - -;;; If a char in S is not in CCP's domain, it is dropped from the result. -;;; You can use this to map and delete chars from a string. - -(define (tr ccp s . maybe-start+end) - (let-optionals maybe-start+end ((start 0) (end (string-length s))) - ;; Count up the chars in S that are in the domain, - ;; and allocate the answer string ANS: - (let* ((len (- end start)) - (domain (ccp:domain ccp)) - (ans-len (string-fold (lambda (c numchars) - (if (char-set-contains? domain c) - (+ numchars 1) - numchars)) - 0 s start end)) - (ans (make-string ans-len))) - - ;; Apply the map, installing the resulting chars into ANS: - (string-fold (lambda (c i) (cond ((ccp-app ccp c) => - (lambda (c) - (string-set! ans i c) - (+ i 1))) - (else i))) ; Not in domain -- drop it. - 0 s start end) - ans))) - -(define (ccp-map ccp s . maybe-start+end) - (apply string-map (lambda (c) (ccp-app ccp c)) s maybe-start+end)) - -(define (ccp-map! ccp s . maybe-start+end) - (apply string-map! (lambda (c) (ccp-app ccp c)) s maybe-start+end)) - -(define (ccp-app ccp char) - (and (char-set-contains? (ccp:domain ccp) char) - (string-ref (ccp:map ccp) (char->ascii char)))) - - -;;; Primitive CCPs -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define id-cmap - (let ((m (make-string num-chars))) - (do ((i (- num-chars 1) (- i 1))) - ((< i 0)) - (string-set! m i (ascii->char i))) - m)) - -(define ccp:0 (make-ccp char-set:empty #t id-cmap #t)) -(define ccp:1 (make-ccp char-set:full #t id-cmap #t)) - -(define ccp:upcase (proc->ccp char-upcase char-set:full)) -(define ccp:downcase (proc->ccp char-downcase char-set:full))