diff --git a/scsh/rx/parse.scm b/scsh/rx/parse.scm index bc8eda3..fbec798 100644 --- a/scsh/rx/parse.scm +++ b/scsh/rx/parse.scm @@ -220,7 +220,7 @@ (if cset? (if (re-char-set? re-or-cset) ; A char set or code - (uncase-char-set re-or-cset) ; producing a char set. + (re-char-set:cset (uncase re-or-cset)) `(,(r 'uncase) ,re-or-cset)) (if (static-regexp? re-or-cset) ; A regexp or code @@ -291,11 +291,16 @@ (else (if (every string? sre) ; A set spec -- ("wxyz"). - (let* ((cs (apply char-set-union - (map string->char-set sre))) - (cs (if case-sensitive? cs (uncase-char-set cs)))) - (if cset? cs (make-re-char-set cs))) - + (let ((cs (apply char-set-union + (map string->char-set sre)))) + (if case-sensitive? + (if cset? + cs + (make-re-char-set cs)) + (let ((uncased-re (uncase-char-set cs))) + (if cset? + (re-char-set:cset uncased-re) + uncased-re)))) (error "Illegal SRE" sre)))))) ;; It must be a char-class name (ANY, ALPHABETIC, etc.) @@ -375,7 +380,7 @@ (error "Unmatched range specifier" range-specs) (let lp ((i (- len 1)) (cset cset)) (if (< i 0) - (if cs? cset (uncase-char-set cset)) ; Case fold if necessary. + (if cs? cset (re-char-set:cset (uncase-char-set cset))) ; Case fold if necessary. (lp (- i 2) (ucs-range->char-set! (char->ascii (string-ref specs (- i 1))) (+ 1 (char->ascii (string-ref specs i))) diff --git a/scsh/rx/re.scm b/scsh/rx/re.scm index eef6633..72a521b 100644 --- a/scsh/rx/re.scm +++ b/scsh/rx/re.scm @@ -504,10 +504,10 @@ ((re-char-set? re) (let* ((cs (re-char-set:cset re)) - (new-cs (uncase-char-set cs))) ; Better not be code. - (if (char-set= cs new-cs) + (new-cs-re (uncase-char-set cs))) ; Better not be code. + (if (char-set= cs (re-char-set:cset new-cs-re)) (values re #f) - (values (make-re-char-set new-cs) #t)))) + (values new-cs-re #t)))) ((re-repeat? re) (receive (new-body body-changed?) (recur (re-repeat:body re)) @@ -539,16 +539,17 @@ ;;; (uncase-char-set cs) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Return a char-set cs' such that cs' contains every char c in cs in both -;;; its upcase and downcase form. +;;; Return a regexp for char-set cs' such that cs' contains every char +;;; c in cs in both its upcase and downcase form. (define (uncase-char-set cs) - (char-set-fold (lambda (c new-cset) - (char-set-adjoin! new-cset - (char-downcase c) - (char-upcase c))) - (char-set-copy char-set:empty) - cs)) + (make-re-char-set + (char-set-fold (lambda (c new-cset) + (char-set-adjoin! new-cset + (char-downcase c) + (char-upcase c))) + (char-set-copy char-set:empty) + cs))) ;;; I actually make an effort to keep this a re-string