Hygienic comparison for symbols.

This commit is contained in:
mainzelm 2001-10-01 14:50:33 +00:00
parent 34898c9b91
commit 14ad4d6e15
1 changed files with 127 additions and 116 deletions

View File

@ -162,44 +162,49 @@
((c sre %word) (non-cset) re-word) ((c sre %word) (non-cset) re-word)
((pair? sre) ((pair? sre)
(case (car sre) (let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym)))))
((*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre)))) (cond
((+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre)))) ((hygn-eq? '*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
((?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre)))) ((hygn-eq? '+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
((=) (non-cset) (let ((n (cadr sre))) ((hygn-eq? '?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
((hygn-eq? '=) (non-cset) (let ((n (cadr sre)))
(re-repeat n n (parse-seq (cddr sre))))) (re-repeat n n (parse-seq (cddr sre)))))
((>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre)))) ((hygn-eq? '>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre))))
((**) (non-cset) (re-repeat (cadr sre) (caddr sre) ((hygn-eq? '**) (non-cset) (re-repeat (cadr sre) (caddr sre)
(parse-seq (cdddr sre)))) (parse-seq (cdddr sre))))
;; Choice is special wrt cset? because it's "polymorphic". ;; Choice is special wrt cset? because it's "polymorphic".
;; Note that RE-CHOICE guarantees to construct a char-set ;; Note that RE-CHOICE guarantees to construct a char-set
;; or single-char string regexp if all of its args are char ;; or single-char string regexp if all of its args are char
;; classes. ;; classes.
((| or) (let ((elts (map (lambda (sre) ((or (hygn-eq? '|)
(hygn-eq? 'or))
(let ((elts (map (lambda (sre)
(recur sre case-sensitive? cset?)) (recur sre case-sensitive? cset?))
(cdr sre)))) (cdr sre))))
(if cset? (if cset?
(assoc-cset-op char-set-union 'char-set-union elts r) (assoc-cset-op char-set-union 'char-set-union elts r)
(re-choice elts)))) (re-choice elts))))
((: seq) (non-cset) (parse-seq (cdr sre))) ((or (hygn-eq? ':)
(hygn-eq? 'seq))
(non-cset) (parse-seq (cdr sre)))
((word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow))) ((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
((word+) ((hygn-eq? 'word+)
(recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_") (recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
(,(r '|) . ,(cdr sre))))) (,(r '|) . ,(cdr sre)))))
case-sensitive? case-sensitive?
cset?)) cset?))
((submatch) (non-cset) (re-submatch (parse-seq (cdr sre)))) ((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
((dsm) (non-cset) (re-dsm (parse-seq (cdddr sre)) ((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
(cadr sre) (cadr sre)
(caddr sre))) (caddr sre)))
;; We could be more aggressive and push the uncase op down into ;; We could be more aggressive and push the uncase op down into
;; partially-static regexps, but enough is enough. ;; partially-static regexps, but enough is enough.
((uncase) ((hygn-eq? 'uncase)
(let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
(if cset? (if cset?
@ -213,22 +218,22 @@
,(regexp->scheme (simplify-regexp re-or-cset) r)))))) ,(regexp->scheme (simplify-regexp re-or-cset) r))))))
;; These just change the lexical case-sensitivity context. ;; These just change the lexical case-sensitivity context.
((w/nocase) (parse-seq/context (cdr sre) #f)) ((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f))
((w/case) (parse-seq/context (cdr sre) #t)) ((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t))
;; ,<exp> and ,@<exp> ;; ,<exp> and ,@<exp>
((unquote) ((hygn-eq? 'unquote)
(let ((exp (cadr sre))) (let ((exp (cadr sre)))
(if cset? (if cset?
`(,%coerce-dynamic-charset ,exp) `(,%coerce-dynamic-charset ,exp)
`(,%flush-submatches (,%coerce-dynamic-regexp ,exp))))) `(,%flush-submatches (,%coerce-dynamic-regexp ,exp)))))
((unquote-splicing) ((hygn-eq? 'unquote-splicing)
(let ((exp (cadr sre))) (let ((exp (cadr sre)))
(if cset? (if cset?
`(,%coerce-dynamic-charset ,exp) `(,%coerce-dynamic-charset ,exp)
`(,%coerce-dynamic-regexp ,exp)))) `(,%coerce-dynamic-regexp ,exp))))
((~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union ((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
(map parse-char-class (cdr sre)) (map parse-char-class (cdr sre))
r)) r))
(cs (if (char-set? cs) (cs (if (char-set? cs)
@ -236,12 +241,12 @@
`(,(r 'char-set-complement) ,cs)))) `(,(r 'char-set-complement) ,cs))))
(if cset? cs (make-re-char-set cs)))) (if cset? cs (make-re-char-set cs))))
((&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection ((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
(map parse-char-class (cdr sre)) (map parse-char-class (cdr sre))
r))) r)))
(if cset? cs (make-re-char-set cs)))) (if cset? cs (make-re-char-set cs))))
((-) (if (pair? (cdr sre)) ((hygn-eq? '-) (if (pair? (cdr sre))
(let* ((cs1 (parse-char-class (cadr sre))) (let* ((cs1 (parse-char-class (cadr sre)))
(cs2 (assoc-cset-op char-set-union 'char-set-union (cs2 (assoc-cset-op char-set-union 'char-set-union
(map parse-char-class (cddr sre)) (map parse-char-class (cddr sre))
@ -258,10 +263,10 @@
(if cset? cs (make-re-char-set cs))) (if cset? cs (make-re-char-set cs)))
(error "SRE set-difference operator (- ...) requires at least one argument"))) (error "SRE set-difference operator (- ...) requires at least one argument")))
((/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) ((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
(if cset? cset (make-re-char-set cset)))) (if cset? cset (make-re-char-set cset))))
((posix-string) ((hygn-eq? 'posix-string)
(if (and (= 1 (length (cdr sre))) (if (and (= 1 (length (cdr sre)))
(string? (cadr sre))) (string? (cadr sre)))
(posix-string->regexp (cadr sre)) (posix-string->regexp (cadr sre))
@ -273,27 +278,33 @@
(cs (if case-sensitive? cs (uncase-char-set cs)))) (cs (if case-sensitive? cs (uncase-char-set cs))))
(if cset? cs (make-re-char-set cs))) (if cset? cs (make-re-char-set cs)))
(error "Illegal SRE" sre))))) (error "Illegal SRE" sre))))))
;; It must be a char-class name (ANY, ALPHABETIC, etc.) ;; It must be a char-class name (ANY, ALPHABETIC, etc.)
(else (let ((cs (case sre (else
((any) char-set:full) (letrec ((hygn-memq? (lambda (sym-list)
((nonl) nonl-chars) (if (null? sym-list)
((lower-case lower) char-set:lower-case) #f
((upper-case upper) char-set:upper-case) (or (c sre (r (car sym-list)))
((alphabetic alpha) char-set:letter) (hygn-memq? (cdr sym-list)))))))
((numeric digit num) char-set:digit) (let ((cs (cond
((alphanumeric alnum alphanum) char-set:letter+digit) ((hygn-memq? '(any)) char-set:full)
((punctuation punct) char-set:punctuation) ((hygn-memq? '(nonl)) nonl-chars)
((graphic graph) char-set:graphic) ((hygn-memq? '(lower-case lower)) char-set:lower-case)
((blank) char-set:blank) ((hygn-memq? '(upper-case upper)) char-set:upper-case)
((whitespace space white) char-set:whitespace) ((hygn-memq? '(alphabetic alpha)) char-set:letter)
((printing print) char-set:printing) ((hygn-memq? '(numeric digit num)) char-set:digit)
((control cntrl) char-set:iso-control) ((hygn-memq? '(alphanumeric alnum alphanum)) char-set:letter+digit)
((hex-digit xdigit hex) char-set:hex-digit) ((hygn-memq? '(punctuation punct)) char-set:punctuation)
((ascii) char-set:ascii) ((hygn-memq? '(graphic graph)) char-set:graphic)
((hygn-memq? '(blank)) char-set:blank)
((hygn-memq? '(whitespace space white)) char-set:whitespace)
((hygn-memq? '(printing print)) char-set:printing)
((hygn-memq? '(control cntrl)) char-set:iso-control)
((hygn-memq? '(hex-digit xdigit hex)) char-set:hex-digit)
((hygn-memq? '(ascii)) char-set:ascii)
(else (error "Illegal regular expression" sre))))) (else (error "Illegal regular expression" sre)))))
(if cset? cs (make-re-char-set cs)))))))) (if cset? cs (make-re-char-set cs)))))))))
;;; In a CSET? true context, S must be a 1-char string; convert to a char set ;;; In a CSET? true context, S must be a 1-char string; convert to a char set