From 14ad4d6e15647fb36b1905b32282ee81d37d0d15 Mon Sep 17 00:00:00 2001 From: mainzelm Date: Mon, 1 Oct 2001 14:50:33 +0000 Subject: [PATCH] Hygienic comparison for symbols. --- scsh/rx/parse.scm | 243 ++++++++++++++++++++++++---------------------- 1 file changed, 127 insertions(+), 116 deletions(-) diff --git a/scsh/rx/parse.scm b/scsh/rx/parse.scm index e75ccc2..71255de 100644 --- a/scsh/rx/parse.scm +++ b/scsh/rx/parse.scm @@ -162,138 +162,149 @@ ((c sre %word) (non-cset) re-word) ((pair? sre) - (case (car sre) - ((*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre)))) - ((+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre)))) - ((?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre)))) - ((=) (non-cset) (let ((n (cadr sre))) - (re-repeat n n (parse-seq (cddr sre))))) - ((>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre)))) - ((**) (non-cset) (re-repeat (cadr sre) (caddr sre) - (parse-seq (cdddr sre)))) - - ;; Choice is special wrt cset? because it's "polymorphic". - ;; Note that RE-CHOICE guarantees to construct a char-set - ;; or single-char string regexp if all of its args are char - ;; classes. - ((| or) (let ((elts (map (lambda (sre) - (recur sre case-sensitive? cset?)) - (cdr sre)))) - (if cset? - (assoc-cset-op char-set-union 'char-set-union elts r) - (re-choice elts)))) - - ((: seq) (non-cset) (parse-seq (cdr sre))) - - ((word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow))) - ((word+) - (recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_") - (,(r '|) . ,(cdr sre))))) - case-sensitive? - cset?)) - - ((submatch) (non-cset) (re-submatch (parse-seq (cdr sre)))) - ((dsm) (non-cset) (re-dsm (parse-seq (cdddr sre)) - (cadr sre) - (caddr sre))) - - ;; We could be more aggressive and push the uncase op down into - ;; partially-static regexps, but enough is enough. - ((uncase) - (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. - (if cset? - - (if (re-char-set? re-or-cset) ; A char set or code - (uncase-char-set re-or-cset) ; producing a char set. - `(,(r 'uncase) ,re-or-cset)) - - (if (static-regexp? re-or-cset) ; A regexp or code - (uncase re-or-cset) ; producing a regexp. - `(,(r 'uncase) - ,(regexp->scheme (simplify-regexp re-or-cset) r)))))) - - ;; These just change the lexical case-sensitivity context. - ((w/nocase) (parse-seq/context (cdr sre) #f)) - ((w/case) (parse-seq/context (cdr sre) #t)) - - ;; , and ,@ - ((unquote) + (let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym))))) + (cond + ((hygn-eq? '*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre)))) + ((hygn-eq? '+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre)))) + ((hygn-eq? '?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre)))) + ((hygn-eq? '=) (non-cset) (let ((n (cadr sre))) + (re-repeat n n (parse-seq (cddr sre))))) + ((hygn-eq? '>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre)))) + ((hygn-eq? '**) (non-cset) (re-repeat (cadr sre) (caddr sre) + (parse-seq (cdddr sre)))) + + ;; Choice is special wrt cset? because it's "polymorphic". + ;; Note that RE-CHOICE guarantees to construct a char-set + ;; or single-char string regexp if all of its args are char + ;; classes. + ((or (hygn-eq? '|) + (hygn-eq? 'or)) + (let ((elts (map (lambda (sre) + (recur sre case-sensitive? cset?)) + (cdr sre)))) + (if cset? + (assoc-cset-op char-set-union 'char-set-union elts r) + (re-choice elts)))) + + ((or (hygn-eq? ':) + (hygn-eq? 'seq)) + (non-cset) (parse-seq (cdr sre))) + + ((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow))) + ((hygn-eq? 'word+) + (recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_") + (,(r '|) . ,(cdr sre))))) + case-sensitive? + cset?)) + + ((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre)))) + ((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre)) + (cadr sre) + (caddr sre))) + + ;; We could be more aggressive and push the uncase op down into + ;; partially-static regexps, but enough is enough. + ((hygn-eq? 'uncase) + (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. + (if cset? + + (if (re-char-set? re-or-cset) ; A char set or code + (uncase-char-set re-or-cset) ; producing a char set. + `(,(r 'uncase) ,re-or-cset)) + + (if (static-regexp? re-or-cset) ; A regexp or code + (uncase re-or-cset) ; producing a regexp. + `(,(r 'uncase) + ,(regexp->scheme (simplify-regexp re-or-cset) r)))))) + + ;; These just change the lexical case-sensitivity context. + ((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f)) + ((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t)) + + ;; , and ,@ + ((hygn-eq? 'unquote) (let ((exp (cadr sre))) (if cset? `(,%coerce-dynamic-charset ,exp) `(,%flush-submatches (,%coerce-dynamic-regexp ,exp))))) - ((unquote-splicing) - (let ((exp (cadr sre))) - (if cset? - `(,%coerce-dynamic-charset ,exp) - `(,%coerce-dynamic-regexp ,exp)))) - - ((~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union - (map parse-char-class (cdr sre)) - r)) - (cs (if (char-set? cs) + ((hygn-eq? 'unquote-splicing) + (let ((exp (cadr sre))) + (if cset? + `(,%coerce-dynamic-charset ,exp) + `(,%coerce-dynamic-regexp ,exp)))) + + ((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union + (map parse-char-class (cdr sre)) + r)) + (cs (if (char-set? cs) (char-set-complement cs) `(,(r 'char-set-complement) ,cs)))) - (if cset? cs (make-re-char-set cs)))) - - ((&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection - (map parse-char-class (cdr sre)) - r))) - (if cset? cs (make-re-char-set cs)))) - - ((-) (if (pair? (cdr sre)) - (let* ((cs1 (parse-char-class (cadr sre))) - (cs2 (assoc-cset-op char-set-union 'char-set-union + (if cset? cs (make-re-char-set cs)))) + + ((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection + (map parse-char-class (cdr sre)) + r))) + (if cset? cs (make-re-char-set cs)))) + + ((hygn-eq? '-) (if (pair? (cdr sre)) + (let* ((cs1 (parse-char-class (cadr sre))) + (cs2 (assoc-cset-op char-set-union 'char-set-union (map parse-char-class (cddr sre)) r)) - (cs (if (and (char-set? cs1) (char-set? cs2)) - (char-set-difference cs1 cs2) - `(,(r 'char-set-difference) - ,(if (char-set? cs1) - (char-set->scheme cs1 r) + (cs (if (and (char-set? cs1) (char-set? cs2)) + (char-set-difference cs1 cs2) + `(,(r 'char-set-difference) + ,(if (char-set? cs1) + (char-set->scheme cs1 r) cs1) - . ,(if (char-set? cs2) - (list (char-set->scheme cs2 r)) - (cdr cs2)))))) - (if cset? cs (make-re-char-set cs))) - (error "SRE set-difference operator (- ...) requires at least one argument"))) - - ((/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) + . ,(if (char-set? cs2) + (list (char-set->scheme cs2 r)) + (cdr cs2)))))) + (if cset? cs (make-re-char-set cs))) + (error "SRE set-difference operator (- ...) requires at least one argument"))) + + ((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) (if cset? cset (make-re-char-set cset)))) - - ((posix-string) + + ((hygn-eq? 'posix-string) (if (and (= 1 (length (cdr sre))) (string? (cadr sre))) (posix-string->regexp (cadr sre)) (error "Illegal (posix-string ...) SRE body." sre))) - - (else (if (every string? sre) ; A set spec -- ("wxyz"). - (let* ((cs (apply char-set-union - (map string->char-set sre))) - (cs (if case-sensitive? cs (uncase-char-set cs)))) - (if cset? cs (make-re-char-set cs))) - - (error "Illegal SRE" sre))))) + + (else (if (every string? sre) ; A set spec -- ("wxyz"). + (let* ((cs (apply char-set-union + (map string->char-set sre))) + (cs (if case-sensitive? cs (uncase-char-set cs)))) + (if cset? cs (make-re-char-set cs))) + + (error "Illegal SRE" sre)))))) ;; It must be a char-class name (ANY, ALPHABETIC, etc.) - (else (let ((cs (case sre - ((any) char-set:full) - ((nonl) nonl-chars) - ((lower-case lower) char-set:lower-case) - ((upper-case upper) char-set:upper-case) - ((alphabetic alpha) char-set:letter) - ((numeric digit num) char-set:digit) - ((alphanumeric alnum alphanum) char-set:letter+digit) - ((punctuation punct) char-set:punctuation) - ((graphic graph) char-set:graphic) - ((blank) char-set:blank) - ((whitespace space white) char-set:whitespace) - ((printing print) char-set:printing) - ((control cntrl) char-set:iso-control) - ((hex-digit xdigit hex) char-set:hex-digit) - ((ascii) char-set:ascii) - (else (error "Illegal regular expression" sre))))) - (if cset? cs (make-re-char-set cs)))))))) + (else + (letrec ((hygn-memq? (lambda (sym-list) + (if (null? sym-list) + #f + (or (c sre (r (car sym-list))) + (hygn-memq? (cdr sym-list))))))) + (let ((cs (cond + ((hygn-memq? '(any)) char-set:full) + ((hygn-memq? '(nonl)) nonl-chars) + ((hygn-memq? '(lower-case lower)) char-set:lower-case) + ((hygn-memq? '(upper-case upper)) char-set:upper-case) + ((hygn-memq? '(alphabetic alpha)) char-set:letter) + ((hygn-memq? '(numeric digit num)) char-set:digit) + ((hygn-memq? '(alphanumeric alnum alphanum)) char-set:letter+digit) + ((hygn-memq? '(punctuation punct)) char-set:punctuation) + ((hygn-memq? '(graphic graph)) char-set:graphic) + ((hygn-memq? '(blank)) char-set:blank) + ((hygn-memq? '(whitespace space white)) char-set:whitespace) + ((hygn-memq? '(printing print)) char-set:printing) + ((hygn-memq? '(control cntrl)) char-set:iso-control) + ((hygn-memq? '(hex-digit xdigit hex)) char-set:hex-digit) + ((hygn-memq? '(ascii)) char-set:ascii) + (else (error "Illegal regular expression" sre))))) + (if cset? cs (make-re-char-set cs))))))))) ;;; In a CSET? true context, S must be a 1-char string; convert to a char set