Hygienic comparison for symbols.

This commit is contained in:
mainzelm 2001-10-01 14:50:33 +00:00
parent 34898c9b91
commit 14ad4d6e15
1 changed files with 127 additions and 116 deletions

View File

@ -162,44 +162,49 @@
((c sre %word) (non-cset) re-word)
((pair? sre)
(case (car sre)
((*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
((+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
((?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
((=) (non-cset) (let ((n (cadr sre)))
(let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym)))))
(cond
((hygn-eq? '*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
((hygn-eq? '+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
((hygn-eq? '?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
((hygn-eq? '=) (non-cset) (let ((n (cadr sre)))
(re-repeat n n (parse-seq (cddr sre)))))
((>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre))))
((**) (non-cset) (re-repeat (cadr sre) (caddr sre)
((hygn-eq? '>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre))))
((hygn-eq? '**) (non-cset) (re-repeat (cadr sre) (caddr sre)
(parse-seq (cdddr sre))))
;; Choice is special wrt cset? because it's "polymorphic".
;; Note that RE-CHOICE guarantees to construct a char-set
;; or single-char string regexp if all of its args are char
;; classes.
((| or) (let ((elts (map (lambda (sre)
((or (hygn-eq? '|)
(hygn-eq? 'or))
(let ((elts (map (lambda (sre)
(recur sre case-sensitive? cset?))
(cdr sre))))
(if cset?
(assoc-cset-op char-set-union 'char-set-union elts r)
(re-choice elts))))
((: seq) (non-cset) (parse-seq (cdr sre)))
((or (hygn-eq? ':)
(hygn-eq? 'seq))
(non-cset) (parse-seq (cdr sre)))
((word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
((word+)
((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
((hygn-eq? 'word+)
(recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
(,(r '|) . ,(cdr sre)))))
case-sensitive?
cset?))
((submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
((dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
(cadr sre)
(caddr sre)))
;; We could be more aggressive and push the uncase op down into
;; partially-static regexps, but enough is enough.
((uncase)
((hygn-eq? 'uncase)
(let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
(if cset?
@ -213,22 +218,22 @@
,(regexp->scheme (simplify-regexp re-or-cset) r))))))
;; These just change the lexical case-sensitivity context.
((w/nocase) (parse-seq/context (cdr sre) #f))
((w/case) (parse-seq/context (cdr sre) #t))
((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f))
((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t))
;; ,<exp> and ,@<exp>
((unquote)
((hygn-eq? 'unquote)
(let ((exp (cadr sre)))
(if cset?
`(,%coerce-dynamic-charset ,exp)
`(,%flush-submatches (,%coerce-dynamic-regexp ,exp)))))
((unquote-splicing)
((hygn-eq? 'unquote-splicing)
(let ((exp (cadr sre)))
(if cset?
`(,%coerce-dynamic-charset ,exp)
`(,%coerce-dynamic-regexp ,exp))))
((~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
(map parse-char-class (cdr sre))
r))
(cs (if (char-set? cs)
@ -236,12 +241,12 @@
`(,(r 'char-set-complement) ,cs))))
(if cset? cs (make-re-char-set cs))))
((&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
(map parse-char-class (cdr sre))
r)))
(if cset? cs (make-re-char-set cs))))
((-) (if (pair? (cdr sre))
((hygn-eq? '-) (if (pair? (cdr sre))
(let* ((cs1 (parse-char-class (cadr sre)))
(cs2 (assoc-cset-op char-set-union 'char-set-union
(map parse-char-class (cddr sre))
@ -258,10 +263,10 @@
(if cset? cs (make-re-char-set cs)))
(error "SRE set-difference operator (- ...) requires at least one argument")))
((/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
(if cset? cset (make-re-char-set cset))))
((posix-string)
((hygn-eq? 'posix-string)
(if (and (= 1 (length (cdr sre)))
(string? (cadr sre)))
(posix-string->regexp (cadr sre))
@ -273,27 +278,33 @@
(cs (if case-sensitive? cs (uncase-char-set cs))))
(if cset? cs (make-re-char-set cs)))
(error "Illegal SRE" sre)))))
(error "Illegal SRE" sre))))))
;; It must be a char-class name (ANY, ALPHABETIC, etc.)
(else (let ((cs (case sre
((any) char-set:full)
((nonl) nonl-chars)
((lower-case lower) char-set:lower-case)
((upper-case upper) char-set:upper-case)
((alphabetic alpha) char-set:letter)
((numeric digit num) char-set:digit)
((alphanumeric alnum alphanum) char-set:letter+digit)
((punctuation punct) char-set:punctuation)
((graphic graph) char-set:graphic)
((blank) char-set:blank)
((whitespace space white) char-set:whitespace)
((printing print) char-set:printing)
((control cntrl) char-set:iso-control)
((hex-digit xdigit hex) char-set:hex-digit)
((ascii) char-set:ascii)
(else
(letrec ((hygn-memq? (lambda (sym-list)
(if (null? sym-list)
#f
(or (c sre (r (car sym-list)))
(hygn-memq? (cdr sym-list)))))))
(let ((cs (cond
((hygn-memq? '(any)) char-set:full)
((hygn-memq? '(nonl)) nonl-chars)
((hygn-memq? '(lower-case lower)) char-set:lower-case)
((hygn-memq? '(upper-case upper)) char-set:upper-case)
((hygn-memq? '(alphabetic alpha)) char-set:letter)
((hygn-memq? '(numeric digit num)) char-set:digit)
((hygn-memq? '(alphanumeric alnum alphanum)) char-set:letter+digit)
((hygn-memq? '(punctuation punct)) char-set:punctuation)
((hygn-memq? '(graphic graph)) char-set:graphic)
((hygn-memq? '(blank)) char-set:blank)
((hygn-memq? '(whitespace space white)) char-set:whitespace)
((hygn-memq? '(printing print)) char-set:printing)
((hygn-memq? '(control cntrl)) char-set:iso-control)
((hygn-memq? '(hex-digit xdigit hex)) char-set:hex-digit)
((hygn-memq? '(ascii)) char-set:ascii)
(else (error "Illegal regular expression" sre)))))
(if cset? cs (make-re-char-set cs))))))))
(if cset? cs (make-re-char-set cs)))))))))
;;; In a CSET? true context, S must be a 1-char string; convert to a char set