diff --git a/scsh/char-set.scm b/scsh/char-set.scm deleted file mode 100644 index 72aedbe..0000000 --- a/scsh/char-set.scm +++ /dev/null @@ -1,348 +0,0 @@ -;;; -*-Scheme-*- -;;; -;;; Character Sets package -;;; - ported from MIT Scheme runtime -;;; by Brian D. Carlstrom -;;; - Rehacked & extended by Olin Shivers 6/98. - -;;; This is not great code. Char sets are represented as 256-char -;;; strings. If char i is ASCII 0, then it isn't in the set; if char i -;;; is ASCII 1, then it is in the set. -;;; - Should be rewritten to use bit strings, or at least byte vecs. -;;; - Is ASCII/Latin-1 specific. Would certainly have to be rewritten -;;; for Unicode. -;;; - The standard character sets are not Latin-1 compliant, just ASCII. - -;;; This code uses jar's DEFINE-RECORD-TYPE macro to define the char-set -;;; record type, because the scsh-standard DEFINE-RECORD form automatically -;;; defines a COPY-FOO function, which is not the one we want, being a shallow -;;; copy of the record fields. - -;;; New dfns: -;;; (char-set= cs1 cs2 ...) -;;; (char-set<= cs1 cs2 ...) -;;; (char-set-fold kons knil cs) -;;; (char-set-for-each f cs) -;;; (char-set-copy cs) -;;; (char-set-size cs) -;;; char-set:printing (char-printing? c) -;;; char-set:blank (char-blank? c) -;;; char-set:control (char-control? c) -;;; char-set:hex-digit (char-hex-digit? c) -;;; char-set:ascii (char-ascii? c) -;;; char-set:empty -;;; char-set:full -;;; char-set-every? pred cs -;;; char-set-any pred cs -;;; char-set-adjoin cset char -> cset -;;; char-set-adjoin! cset char -> cset -;;; char-set-delete cset char -> cset -;;; char-set-delete! cset char -> cset - -(define char:newline (ascii->char 13)) -(define char:tab (ascii->char 9)) -(define char:vtab (ascii->char 11)) -(define char:page (ascii->char 12)) -(define char:return (ascii->char 10)) -(define char:space (ascii->char 32)) - -(define (string-copy s) (substring s 0 (string-length s))) - -(define (string-fill-range! str lower upper ch) - (do ((index lower (+ index 1))) - ((>= index upper) str) - (string-set! str index ch))) - -(define (char-ascii? char) - (let ((maybe-ascii (char->ascii char))) - (and (<= 0 maybe-ascii 127) maybe-ascii))) - -;;;; Character Sets - -;(define-record char-set -; s) ; 256-char string; each char is either ASCII 0 or ASCII 1. - -;;; Use jar's record macro. -(define-record-type char-set :char-set - (make-char-set s) - char-set? - (s char-set:s)) - -(define (char-set-copy cs) (make-char-set (string-copy (char-set:s cs)))) - -;;; The = and <= code is ugly because it's n-ary. - -(define (char-set= cs1 . rest) - (let ((s1 (char-set:s cs1))) - (every (lambda (cs) (string=? s1 (char-set:s cs))) - rest))) - -(define (char-set<= cs1 . rest) - (let lp ((s1 (char-set:s cs1)) - (rest rest)) - (or (not (pair? rest)) - (let ((s2 (char-set:s (car rest))) - (rest (cdr rest))) - (let lp2 ((i 255)) - (if (< i 0) (lp s2 rest) - (and (<= (char->ascii (string-ref s1 i)) - (char->ascii (string-ref s2 i))) - (lp2 (- i 1))))))))) - - -(define (char-set-size cs) - (let ((s (char-set:s cs))) - (let lp ((i 255) (size 0)) - (if (< i 0) size - (lp (- i 1) - (if (= 0 (char->ascii (string-ref s i))) size (+ size 1))))))) - -(define (set-char-set cs in? . chars) - (let ((s (string-copy (char-set:s cs))) - (val (if in? (ascii->char 1) (ascii->char 0)))) - (for-each (lambda (c) (string-set! s (char->ascii c) val)) - chars) - (make-char-set s))) - -(define (set-char-set! cs in? . chars) - (let ((s (char-set:s cs)) - (val (if in? (ascii->char 1) (ascii->char 0)))) - (for-each (lambda (c) (string-set! s (char->ascii c) val)) - chars)) - cs) - -(define (char-set-adjoin cs . chars) (apply set-char-set cs #t chars)) -(define (char-set-adjoin! cs . chars) (apply set-char-set! cs #t chars)) -(define (char-set-delete cs . chars) (apply set-char-set cs #f chars)) -(define (char-set-delete! cs . chars) (apply set-char-set! cs #f chars)) - -(define (char-set-for-each proc cs) - (let ((s (char-set:s cs))) - (let lp ((i 255)) - (cond ((>= i 0) - (if (not (= 0 (char->ascii (string-ref s i)))) - (proc (ascii->char i))) - (lp (- i 1))))))) - -(define (char-set-fold kons knil cs) - (let ((s (char-set:s cs))) - (let lp ((i 255) (ans knil)) - (if (< i 0) ans - (lp (- i 1) - (if (= 0 (char->ascii (string-ref s i))) - ans - (kons (ascii->char i) ans))))))) - -(define reduce-char-set (deprecated-proc char-set-fold 'char-set-fold - "Use char-set-fold instead.")) - -(define (char-set-every? pred cs) - (let ((s (char-set:s cs))) - (let lp ((i 255)) - (or (< i 0) - (if (= 0 (char->ascii (string-ref s i))) - (lp (- i 1)) - (and (pred (ascii->char i)) - (lp (- i 1)))))))) - -(define (char-set-any pred cs) - (let ((s (char-set:s cs))) - (let lp ((i 255)) - (and (>= i 0) - (if (= 0 (char->ascii (string-ref s i))) - (lp (- i 1)) - (or (pred (ascii->char i)) - (lp (- i 1)))))))) - - -(define (char-set . chars) - (chars->char-set chars)) - -(define (chars->char-set chars) - (let ((s (make-string 256 (ascii->char 0)))) - (for-each (lambda (char) - (string-set! s (char->ascii char) (ascii->char 1))) - chars) - (make-char-set s))) - -(define (string->char-set str) - (let ((s (make-string 256 (ascii->char 0)))) - (do ((i (- (string-length str) 1) (- i 1))) - ((< i 0) (make-char-set s)) - (string-set! s (char->ascii (string-ref str i)) - (ascii->char 1))))) - -(define (ascii-range->char-set lower upper) - (let ((s (make-string 256 (ascii->char 0)))) - (string-fill-range! s lower upper (ascii->char 1)) - (make-char-set s))) - -(define (predicate->char-set predicate) - (let ((s (make-string 256))) - (let lp ((i 255)) - (if (>= i 0) - (begin (string-set! s i (if (predicate (ascii->char i)) - (ascii->char 1) - (ascii->char 0))) - (lp (- i 1))))) - (make-char-set s))) - - -;;; {string, char, char-set, char predicate} -> char-set - -(define (->char-set x) - (cond ((char-set? x) x) - ((string? x) (string->char-set x)) - ((char? x) (char-set x)) - ((procedure? x) (predicate->char-set x)) - (else (error "->char-set: Not a charset, string, char, or predicate." - x)))) - - -;;;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -(define (char-set-members cs) - (let ((s (char-set:s cs))) - (let lp ((i 255) (ans '())) - (if (< i 0) ans - (lp (- i 1) - (if (zero? (char->ascii (string-ref s i))) ans - (cons (ascii->char i) ans))))))) - -;;; De-releasing CHAR-SET-MEMBER? -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; No other way to do it. MIT Scheme defines it (c-s-m? cset char); scsh 0.3 -;;; defined it (c-s-m? char cset). MIT Scheme's arg order is not consistent -;;; with the MEMBER? procedure or common math notation, but they were here -;;; first, so I didn't want to just silently invert their arg order -- could -;;; break code. I ended up just choosing a new proc name that consistent with -;;; its arg order -- (CHAR-SET-CONTAINS? cset char). - -(define (char-set-contains? cs char) - (not (zero? (char->ascii (string-ref (char-set:s cs) - (char->ascii char)))))) - -;;; This actually isn't exported. Just CYA. -(define (char-set-member? . args) - (error "CHAR-SET-MEMBER? is no longer provided. Use CHAR-SET-CONTAINS? instead.")) - - -;;; Set algebra -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define (char-set-invert cs) - (predicate->char-set (lambda (char) - (not (char-set-contains? cs char))))) - -(define (char-set-union . csets) - (if (pair? csets) - (apply char-set-union! (char-set-copy (car csets)) (cdr csets)) - char-set:empty)) - -(define (char-set-intersection . csets) - (if (pair? csets) - (apply char-set-intersection! (char-set-copy (car csets)) (cdr csets)) - char-set:full)) - -(define (char-set-difference cs1 . csets) - (if (pair? csets) - (apply char-set-difference! (char-set-copy cs1) csets) - cs1)) - - -;;; Linear set-algebraic ops -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; These guys are allowed, but not required, to side-effect their first -;;; argument when computing their result. In other words, you must use them -;;; as if they were completely functional, just like their non-! counterparts, -;;; and you must additionally ensure that their first arguments are "dead" -;;; at the point of call. In return, we promise a more efficient result, plus -;;; allowing you to always assume char-sets are unchangeable values. - -;;; Apply P to each index and it's char in S: (P I C). -;;; Used by the intersection & difference. -(define (string-iter p s) - (let lp ((i (- (string-length s) 1))) - (cond ((>= i 0) - (p i (string-ref s i)) - (lp (- i 1)))))) - -(define (char-set-invert! cset) - (let ((s (char-set:s cset))) - (string-iter (lambda (i c) - (string-set! s i (ascii->char (- 1 (char->ascii c))))) - s)) - cset) - -(define (char-set-union! cset1 . csets) - (let ((s (char-set:s cset1))) - (for-each (lambda (cset) - (char-set-for-each (lambda (c) - (string-set! s (char->ascii c) - (ascii->char 1))) - cset)) - csets)) - cset1) - -(define (char-set-intersection! cset1 . csets) - (let ((s (char-set:s cset1))) - (for-each (lambda (cset) - (string-iter (lambda (i c) - (if (zero? (char->ascii c)) - (string-set! s i (ascii->char 0)))) - (char-set:s cset))) - csets)) - cset1) - -(define (char-set-difference! cset1 . csets) - (let ((s (char-set:s cset1))) - (for-each (lambda (cset) - (char-set-for-each (lambda (c) - (string-set! s (char->ascii c) - (ascii->char 0))) - cset)) - csets)) - cset1) - - - -;;;; System Character Sets -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define char-set:lower-case (ascii-range->char-set #x61 #x7B)) -(define char-set:upper-case (ascii-range->char-set #x41 #x5B)) -(define char-set:alphabetic - (char-set-union char-set:upper-case char-set:lower-case)) -(define char-set:numeric (ascii-range->char-set #x30 #x3A)) -(define char-set:alphanumeric - (char-set-union char-set:alphabetic char-set:numeric)) -(define char-set:graphic (ascii-range->char-set #x21 #x7F)) -(define char-set:printing (ascii-range->char-set #x20 #x7F)) -(define char-set:whitespace (char-set char:tab char:newline char:vtab - char:page char:return char:space)) -(define char-set:blank (char-set char:space char:tab)) -(define char-set:control (char-set-union (ascii-range->char-set 0 32) - (char-set (ascii->char 127)))) -(define char-set:punctuation - (string->char-set "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~")) -(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF")) -(define char-set:ascii (ascii-range->char-set 0 128)) -(define char-set:empty (char-set)) -(define char-set:full (char-set-invert char-set:empty)) - - -(define (char-set->pred cs) (lambda (c) (char-set-contains? cs c))) - -(define char-lower-case? (char-set->pred char-set:lower-case)) -(define char-upper-case? (char-set->pred char-set:upper-case)) -(define char-alphabetic? (char-set->pred char-set:alphabetic)) -(define char-numeric? (char-set->pred char-set:numeric)) -(define char-alphanumeric? (char-set->pred char-set:alphanumeric)) -(define char-graphic? (char-set->pred char-set:graphic)) -(define char-printing? (char-set->pred char-set:printing)) -(define char-whitespace? (char-set->pred char-set:whitespace)) -(define char-blank? (char-set->pred char-set:blank)) -(define char-control? (char-set->pred char-set:control)) -(define char-punctuation? (char-set->pred char-set:punctuation)) -(define char-hex-digit? (char-set->pred char-set:hex-digit)) -(define char-ascii? (char-set->pred char-set:ascii))