Hygienic comparison for symbols.
This commit is contained in:
parent
34898c9b91
commit
14ad4d6e15
|
@ -162,138 +162,149 @@
|
|||
((c sre %word) (non-cset) re-word)
|
||||
|
||||
((pair? sre)
|
||||
(case (car sre)
|
||||
((*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
|
||||
((+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
|
||||
((?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
|
||||
((=) (non-cset) (let ((n (cadr sre)))
|
||||
(re-repeat n n (parse-seq (cddr sre)))))
|
||||
((>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre))))
|
||||
((**) (non-cset) (re-repeat (cadr sre) (caddr sre)
|
||||
(parse-seq (cdddr sre))))
|
||||
(let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym)))))
|
||||
(cond
|
||||
((hygn-eq? '*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
|
||||
((hygn-eq? '+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
|
||||
((hygn-eq? '?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
|
||||
((hygn-eq? '=) (non-cset) (let ((n (cadr sre)))
|
||||
(re-repeat n n (parse-seq (cddr sre)))))
|
||||
((hygn-eq? '>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre))))
|
||||
((hygn-eq? '**) (non-cset) (re-repeat (cadr sre) (caddr sre)
|
||||
(parse-seq (cdddr sre))))
|
||||
|
||||
;; Choice is special wrt cset? because it's "polymorphic".
|
||||
;; Note that RE-CHOICE guarantees to construct a char-set
|
||||
;; or single-char string regexp if all of its args are char
|
||||
;; classes.
|
||||
((| or) (let ((elts (map (lambda (sre)
|
||||
(recur sre case-sensitive? cset?))
|
||||
(cdr sre))))
|
||||
(if cset?
|
||||
(assoc-cset-op char-set-union 'char-set-union elts r)
|
||||
(re-choice elts))))
|
||||
;; Choice is special wrt cset? because it's "polymorphic".
|
||||
;; Note that RE-CHOICE guarantees to construct a char-set
|
||||
;; or single-char string regexp if all of its args are char
|
||||
;; classes.
|
||||
((or (hygn-eq? '|)
|
||||
(hygn-eq? 'or))
|
||||
(let ((elts (map (lambda (sre)
|
||||
(recur sre case-sensitive? cset?))
|
||||
(cdr sre))))
|
||||
(if cset?
|
||||
(assoc-cset-op char-set-union 'char-set-union elts r)
|
||||
(re-choice elts))))
|
||||
|
||||
((: seq) (non-cset) (parse-seq (cdr sre)))
|
||||
((or (hygn-eq? ':)
|
||||
(hygn-eq? 'seq))
|
||||
(non-cset) (parse-seq (cdr sre)))
|
||||
|
||||
((word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
|
||||
((word+)
|
||||
(recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
|
||||
(,(r '|) . ,(cdr sre)))))
|
||||
case-sensitive?
|
||||
cset?))
|
||||
((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
|
||||
((hygn-eq? 'word+)
|
||||
(recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
|
||||
(,(r '|) . ,(cdr sre)))))
|
||||
case-sensitive?
|
||||
cset?))
|
||||
|
||||
((submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
|
||||
((dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
|
||||
(cadr sre)
|
||||
(caddr sre)))
|
||||
((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
|
||||
((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
|
||||
(cadr sre)
|
||||
(caddr sre)))
|
||||
|
||||
;; We could be more aggressive and push the uncase op down into
|
||||
;; partially-static regexps, but enough is enough.
|
||||
((uncase)
|
||||
(let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
|
||||
(if cset?
|
||||
;; We could be more aggressive and push the uncase op down into
|
||||
;; partially-static regexps, but enough is enough.
|
||||
((hygn-eq? 'uncase)
|
||||
(let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
|
||||
(if cset?
|
||||
|
||||
(if (re-char-set? re-or-cset) ; A char set or code
|
||||
(uncase-char-set re-or-cset) ; producing a char set.
|
||||
`(,(r 'uncase) ,re-or-cset))
|
||||
(if (re-char-set? re-or-cset) ; A char set or code
|
||||
(uncase-char-set re-or-cset) ; producing a char set.
|
||||
`(,(r 'uncase) ,re-or-cset))
|
||||
|
||||
(if (static-regexp? re-or-cset) ; A regexp or code
|
||||
(uncase re-or-cset) ; producing a regexp.
|
||||
`(,(r 'uncase)
|
||||
,(regexp->scheme (simplify-regexp re-or-cset) r))))))
|
||||
(if (static-regexp? re-or-cset) ; A regexp or code
|
||||
(uncase re-or-cset) ; producing a regexp.
|
||||
`(,(r 'uncase)
|
||||
,(regexp->scheme (simplify-regexp re-or-cset) r))))))
|
||||
|
||||
;; These just change the lexical case-sensitivity context.
|
||||
((w/nocase) (parse-seq/context (cdr sre) #f))
|
||||
((w/case) (parse-seq/context (cdr sre) #t))
|
||||
;; These just change the lexical case-sensitivity context.
|
||||
((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f))
|
||||
((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t))
|
||||
|
||||
;; ,<exp> and ,@<exp>
|
||||
((unquote)
|
||||
;; ,<exp> and ,@<exp>
|
||||
((hygn-eq? 'unquote)
|
||||
(let ((exp (cadr sre)))
|
||||
(if cset?
|
||||
`(,%coerce-dynamic-charset ,exp)
|
||||
`(,%flush-submatches (,%coerce-dynamic-regexp ,exp)))))
|
||||
((unquote-splicing)
|
||||
(let ((exp (cadr sre)))
|
||||
(if cset?
|
||||
`(,%coerce-dynamic-charset ,exp)
|
||||
`(,%coerce-dynamic-regexp ,exp))))
|
||||
((hygn-eq? 'unquote-splicing)
|
||||
(let ((exp (cadr sre)))
|
||||
(if cset?
|
||||
`(,%coerce-dynamic-charset ,exp)
|
||||
`(,%coerce-dynamic-regexp ,exp))))
|
||||
|
||||
((~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
|
||||
(map parse-char-class (cdr sre))
|
||||
r))
|
||||
(cs (if (char-set? cs)
|
||||
((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
|
||||
(map parse-char-class (cdr sre))
|
||||
r))
|
||||
(cs (if (char-set? cs)
|
||||
(char-set-complement cs)
|
||||
`(,(r 'char-set-complement) ,cs))))
|
||||
(if cset? cs (make-re-char-set cs))))
|
||||
(if cset? cs (make-re-char-set cs))))
|
||||
|
||||
((&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
|
||||
(map parse-char-class (cdr sre))
|
||||
r)))
|
||||
(if cset? cs (make-re-char-set cs))))
|
||||
((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
|
||||
(map parse-char-class (cdr sre))
|
||||
r)))
|
||||
(if cset? cs (make-re-char-set cs))))
|
||||
|
||||
((-) (if (pair? (cdr sre))
|
||||
(let* ((cs1 (parse-char-class (cadr sre)))
|
||||
(cs2 (assoc-cset-op char-set-union 'char-set-union
|
||||
((hygn-eq? '-) (if (pair? (cdr sre))
|
||||
(let* ((cs1 (parse-char-class (cadr sre)))
|
||||
(cs2 (assoc-cset-op char-set-union 'char-set-union
|
||||
(map parse-char-class (cddr sre))
|
||||
r))
|
||||
(cs (if (and (char-set? cs1) (char-set? cs2))
|
||||
(char-set-difference cs1 cs2)
|
||||
`(,(r 'char-set-difference)
|
||||
,(if (char-set? cs1)
|
||||
(char-set->scheme cs1 r)
|
||||
(cs (if (and (char-set? cs1) (char-set? cs2))
|
||||
(char-set-difference cs1 cs2)
|
||||
`(,(r 'char-set-difference)
|
||||
,(if (char-set? cs1)
|
||||
(char-set->scheme cs1 r)
|
||||
cs1)
|
||||
. ,(if (char-set? cs2)
|
||||
(list (char-set->scheme cs2 r))
|
||||
(cdr cs2))))))
|
||||
(if cset? cs (make-re-char-set cs)))
|
||||
(error "SRE set-difference operator (- ...) requires at least one argument")))
|
||||
. ,(if (char-set? cs2)
|
||||
(list (char-set->scheme cs2 r))
|
||||
(cdr cs2))))))
|
||||
(if cset? cs (make-re-char-set cs)))
|
||||
(error "SRE set-difference operator (- ...) requires at least one argument")))
|
||||
|
||||
((/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
|
||||
((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
|
||||
(if cset? cset (make-re-char-set cset))))
|
||||
|
||||
((posix-string)
|
||||
((hygn-eq? 'posix-string)
|
||||
(if (and (= 1 (length (cdr sre)))
|
||||
(string? (cadr sre)))
|
||||
(posix-string->regexp (cadr sre))
|
||||
(error "Illegal (posix-string ...) SRE body." sre)))
|
||||
|
||||
(else (if (every string? sre) ; A set spec -- ("wxyz").
|
||||
(let* ((cs (apply char-set-union
|
||||
(map string->char-set sre)))
|
||||
(cs (if case-sensitive? cs (uncase-char-set cs))))
|
||||
(if cset? cs (make-re-char-set cs)))
|
||||
(else (if (every string? sre) ; A set spec -- ("wxyz").
|
||||
(let* ((cs (apply char-set-union
|
||||
(map string->char-set sre)))
|
||||
(cs (if case-sensitive? cs (uncase-char-set cs))))
|
||||
(if cset? cs (make-re-char-set cs)))
|
||||
|
||||
(error "Illegal SRE" sre)))))
|
||||
(error "Illegal SRE" sre))))))
|
||||
|
||||
;; It must be a char-class name (ANY, ALPHABETIC, etc.)
|
||||
(else (let ((cs (case sre
|
||||
((any) char-set:full)
|
||||
((nonl) nonl-chars)
|
||||
((lower-case lower) char-set:lower-case)
|
||||
((upper-case upper) char-set:upper-case)
|
||||
((alphabetic alpha) char-set:letter)
|
||||
((numeric digit num) char-set:digit)
|
||||
((alphanumeric alnum alphanum) char-set:letter+digit)
|
||||
((punctuation punct) char-set:punctuation)
|
||||
((graphic graph) char-set:graphic)
|
||||
((blank) char-set:blank)
|
||||
((whitespace space white) char-set:whitespace)
|
||||
((printing print) char-set:printing)
|
||||
((control cntrl) char-set:iso-control)
|
||||
((hex-digit xdigit hex) char-set:hex-digit)
|
||||
((ascii) char-set:ascii)
|
||||
(else (error "Illegal regular expression" sre)))))
|
||||
(if cset? cs (make-re-char-set cs))))))))
|
||||
(else
|
||||
(letrec ((hygn-memq? (lambda (sym-list)
|
||||
(if (null? sym-list)
|
||||
#f
|
||||
(or (c sre (r (car sym-list)))
|
||||
(hygn-memq? (cdr sym-list)))))))
|
||||
(let ((cs (cond
|
||||
((hygn-memq? '(any)) char-set:full)
|
||||
((hygn-memq? '(nonl)) nonl-chars)
|
||||
((hygn-memq? '(lower-case lower)) char-set:lower-case)
|
||||
((hygn-memq? '(upper-case upper)) char-set:upper-case)
|
||||
((hygn-memq? '(alphabetic alpha)) char-set:letter)
|
||||
((hygn-memq? '(numeric digit num)) char-set:digit)
|
||||
((hygn-memq? '(alphanumeric alnum alphanum)) char-set:letter+digit)
|
||||
((hygn-memq? '(punctuation punct)) char-set:punctuation)
|
||||
((hygn-memq? '(graphic graph)) char-set:graphic)
|
||||
((hygn-memq? '(blank)) char-set:blank)
|
||||
((hygn-memq? '(whitespace space white)) char-set:whitespace)
|
||||
((hygn-memq? '(printing print)) char-set:printing)
|
||||
((hygn-memq? '(control cntrl)) char-set:iso-control)
|
||||
((hygn-memq? '(hex-digit xdigit hex)) char-set:hex-digit)
|
||||
((hygn-memq? '(ascii)) char-set:ascii)
|
||||
(else (error "Illegal regular expression" sre)))))
|
||||
(if cset? cs (make-re-char-set cs)))))))))
|
||||
|
||||
|
||||
;;; In a CSET? true context, S must be a 1-char string; convert to a char set
|
||||
|
|
Loading…
Reference in New Issue