Hygienic comparison for symbols.
This commit is contained in:
parent
34898c9b91
commit
14ad4d6e15
|
@ -162,138 +162,149 @@
|
||||||
((c sre %word) (non-cset) re-word)
|
((c sre %word) (non-cset) re-word)
|
||||||
|
|
||||||
((pair? sre)
|
((pair? sre)
|
||||||
(case (car sre)
|
(let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym)))))
|
||||||
((*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
|
(cond
|
||||||
((+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
|
((hygn-eq? '*) (non-cset) (re-repeat 0 #f (parse-seq (cdr sre))))
|
||||||
((?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
|
((hygn-eq? '+) (non-cset) (re-repeat 1 #f (parse-seq (cdr sre))))
|
||||||
((=) (non-cset) (let ((n (cadr sre)))
|
((hygn-eq? '?) (non-cset) (re-repeat 0 1 (parse-seq (cdr sre))))
|
||||||
(re-repeat n n (parse-seq (cddr sre)))))
|
((hygn-eq? '=) (non-cset) (let ((n (cadr sre)))
|
||||||
((>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre))))
|
(re-repeat n n (parse-seq (cddr sre)))))
|
||||||
((**) (non-cset) (re-repeat (cadr sre) (caddr sre)
|
((hygn-eq? '>=) (non-cset) (re-repeat (cadr sre) #f (parse-seq (cddr sre))))
|
||||||
(parse-seq (cdddr sre))))
|
((hygn-eq? '**) (non-cset) (re-repeat (cadr sre) (caddr sre)
|
||||||
|
(parse-seq (cdddr sre))))
|
||||||
;; Choice is special wrt cset? because it's "polymorphic".
|
|
||||||
;; Note that RE-CHOICE guarantees to construct a char-set
|
;; Choice is special wrt cset? because it's "polymorphic".
|
||||||
;; or single-char string regexp if all of its args are char
|
;; Note that RE-CHOICE guarantees to construct a char-set
|
||||||
;; classes.
|
;; or single-char string regexp if all of its args are char
|
||||||
((| or) (let ((elts (map (lambda (sre)
|
;; classes.
|
||||||
(recur sre case-sensitive? cset?))
|
((or (hygn-eq? '|)
|
||||||
(cdr sre))))
|
(hygn-eq? 'or))
|
||||||
(if cset?
|
(let ((elts (map (lambda (sre)
|
||||||
(assoc-cset-op char-set-union 'char-set-union elts r)
|
(recur sre case-sensitive? cset?))
|
||||||
(re-choice elts))))
|
(cdr sre))))
|
||||||
|
(if cset?
|
||||||
((: seq) (non-cset) (parse-seq (cdr sre)))
|
(assoc-cset-op char-set-union 'char-set-union elts r)
|
||||||
|
(re-choice elts))))
|
||||||
((word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
|
|
||||||
((word+)
|
((or (hygn-eq? ':)
|
||||||
(recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
|
(hygn-eq? 'seq))
|
||||||
(,(r '|) . ,(cdr sre)))))
|
(non-cset) (parse-seq (cdr sre)))
|
||||||
case-sensitive?
|
|
||||||
cset?))
|
((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
|
||||||
|
((hygn-eq? 'word+)
|
||||||
((submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
|
(recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
|
||||||
((dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
|
(,(r '|) . ,(cdr sre)))))
|
||||||
(cadr sre)
|
case-sensitive?
|
||||||
(caddr sre)))
|
cset?))
|
||||||
|
|
||||||
;; We could be more aggressive and push the uncase op down into
|
((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
|
||||||
;; partially-static regexps, but enough is enough.
|
((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
|
||||||
((uncase)
|
(cadr sre)
|
||||||
(let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
|
(caddr sre)))
|
||||||
(if cset?
|
|
||||||
|
;; We could be more aggressive and push the uncase op down into
|
||||||
(if (re-char-set? re-or-cset) ; A char set or code
|
;; partially-static regexps, but enough is enough.
|
||||||
(uncase-char-set re-or-cset) ; producing a char set.
|
((hygn-eq? 'uncase)
|
||||||
`(,(r 'uncase) ,re-or-cset))
|
(let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
|
||||||
|
(if cset?
|
||||||
(if (static-regexp? re-or-cset) ; A regexp or code
|
|
||||||
(uncase re-or-cset) ; producing a regexp.
|
(if (re-char-set? re-or-cset) ; A char set or code
|
||||||
`(,(r 'uncase)
|
(uncase-char-set re-or-cset) ; producing a char set.
|
||||||
,(regexp->scheme (simplify-regexp re-or-cset) r))))))
|
`(,(r 'uncase) ,re-or-cset))
|
||||||
|
|
||||||
;; These just change the lexical case-sensitivity context.
|
(if (static-regexp? re-or-cset) ; A regexp or code
|
||||||
((w/nocase) (parse-seq/context (cdr sre) #f))
|
(uncase re-or-cset) ; producing a regexp.
|
||||||
((w/case) (parse-seq/context (cdr sre) #t))
|
`(,(r 'uncase)
|
||||||
|
,(regexp->scheme (simplify-regexp re-or-cset) r))))))
|
||||||
;; ,<exp> and ,@<exp>
|
|
||||||
((unquote)
|
;; These just change the lexical case-sensitivity context.
|
||||||
|
((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f))
|
||||||
|
((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t))
|
||||||
|
|
||||||
|
;; ,<exp> and ,@<exp>
|
||||||
|
((hygn-eq? 'unquote)
|
||||||
(let ((exp (cadr sre)))
|
(let ((exp (cadr sre)))
|
||||||
(if cset?
|
(if cset?
|
||||||
`(,%coerce-dynamic-charset ,exp)
|
`(,%coerce-dynamic-charset ,exp)
|
||||||
`(,%flush-submatches (,%coerce-dynamic-regexp ,exp)))))
|
`(,%flush-submatches (,%coerce-dynamic-regexp ,exp)))))
|
||||||
((unquote-splicing)
|
((hygn-eq? 'unquote-splicing)
|
||||||
(let ((exp (cadr sre)))
|
(let ((exp (cadr sre)))
|
||||||
(if cset?
|
(if cset?
|
||||||
`(,%coerce-dynamic-charset ,exp)
|
`(,%coerce-dynamic-charset ,exp)
|
||||||
`(,%coerce-dynamic-regexp ,exp))))
|
`(,%coerce-dynamic-regexp ,exp))))
|
||||||
|
|
||||||
((~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
|
((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
|
||||||
(map parse-char-class (cdr sre))
|
(map parse-char-class (cdr sre))
|
||||||
r))
|
r))
|
||||||
(cs (if (char-set? cs)
|
(cs (if (char-set? cs)
|
||||||
(char-set-complement cs)
|
(char-set-complement cs)
|
||||||
`(,(r 'char-set-complement) ,cs))))
|
`(,(r 'char-set-complement) ,cs))))
|
||||||
(if cset? cs (make-re-char-set cs))))
|
(if cset? cs (make-re-char-set cs))))
|
||||||
|
|
||||||
((&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
|
((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
|
||||||
(map parse-char-class (cdr sre))
|
(map parse-char-class (cdr sre))
|
||||||
r)))
|
r)))
|
||||||
(if cset? cs (make-re-char-set cs))))
|
(if cset? cs (make-re-char-set cs))))
|
||||||
|
|
||||||
((-) (if (pair? (cdr sre))
|
((hygn-eq? '-) (if (pair? (cdr sre))
|
||||||
(let* ((cs1 (parse-char-class (cadr sre)))
|
(let* ((cs1 (parse-char-class (cadr sre)))
|
||||||
(cs2 (assoc-cset-op char-set-union 'char-set-union
|
(cs2 (assoc-cset-op char-set-union 'char-set-union
|
||||||
(map parse-char-class (cddr sre))
|
(map parse-char-class (cddr sre))
|
||||||
r))
|
r))
|
||||||
(cs (if (and (char-set? cs1) (char-set? cs2))
|
(cs (if (and (char-set? cs1) (char-set? cs2))
|
||||||
(char-set-difference cs1 cs2)
|
(char-set-difference cs1 cs2)
|
||||||
`(,(r 'char-set-difference)
|
`(,(r 'char-set-difference)
|
||||||
,(if (char-set? cs1)
|
,(if (char-set? cs1)
|
||||||
(char-set->scheme cs1 r)
|
(char-set->scheme cs1 r)
|
||||||
cs1)
|
cs1)
|
||||||
. ,(if (char-set? cs2)
|
. ,(if (char-set? cs2)
|
||||||
(list (char-set->scheme cs2 r))
|
(list (char-set->scheme cs2 r))
|
||||||
(cdr cs2))))))
|
(cdr cs2))))))
|
||||||
(if cset? cs (make-re-char-set cs)))
|
(if cset? cs (make-re-char-set cs)))
|
||||||
(error "SRE set-difference operator (- ...) requires at least one argument")))
|
(error "SRE set-difference operator (- ...) requires at least one argument")))
|
||||||
|
|
||||||
((/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
|
((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
|
||||||
(if cset? cset (make-re-char-set cset))))
|
(if cset? cset (make-re-char-set cset))))
|
||||||
|
|
||||||
((posix-string)
|
((hygn-eq? 'posix-string)
|
||||||
(if (and (= 1 (length (cdr sre)))
|
(if (and (= 1 (length (cdr sre)))
|
||||||
(string? (cadr sre)))
|
(string? (cadr sre)))
|
||||||
(posix-string->regexp (cadr sre))
|
(posix-string->regexp (cadr sre))
|
||||||
(error "Illegal (posix-string ...) SRE body." sre)))
|
(error "Illegal (posix-string ...) SRE body." sre)))
|
||||||
|
|
||||||
(else (if (every string? sre) ; A set spec -- ("wxyz").
|
(else (if (every string? sre) ; A set spec -- ("wxyz").
|
||||||
(let* ((cs (apply char-set-union
|
(let* ((cs (apply char-set-union
|
||||||
(map string->char-set sre)))
|
(map string->char-set sre)))
|
||||||
(cs (if case-sensitive? cs (uncase-char-set cs))))
|
(cs (if case-sensitive? cs (uncase-char-set cs))))
|
||||||
(if cset? cs (make-re-char-set cs)))
|
(if cset? cs (make-re-char-set cs)))
|
||||||
|
|
||||||
(error "Illegal SRE" sre)))))
|
(error "Illegal SRE" sre))))))
|
||||||
|
|
||||||
;; It must be a char-class name (ANY, ALPHABETIC, etc.)
|
;; It must be a char-class name (ANY, ALPHABETIC, etc.)
|
||||||
(else (let ((cs (case sre
|
(else
|
||||||
((any) char-set:full)
|
(letrec ((hygn-memq? (lambda (sym-list)
|
||||||
((nonl) nonl-chars)
|
(if (null? sym-list)
|
||||||
((lower-case lower) char-set:lower-case)
|
#f
|
||||||
((upper-case upper) char-set:upper-case)
|
(or (c sre (r (car sym-list)))
|
||||||
((alphabetic alpha) char-set:letter)
|
(hygn-memq? (cdr sym-list)))))))
|
||||||
((numeric digit num) char-set:digit)
|
(let ((cs (cond
|
||||||
((alphanumeric alnum alphanum) char-set:letter+digit)
|
((hygn-memq? '(any)) char-set:full)
|
||||||
((punctuation punct) char-set:punctuation)
|
((hygn-memq? '(nonl)) nonl-chars)
|
||||||
((graphic graph) char-set:graphic)
|
((hygn-memq? '(lower-case lower)) char-set:lower-case)
|
||||||
((blank) char-set:blank)
|
((hygn-memq? '(upper-case upper)) char-set:upper-case)
|
||||||
((whitespace space white) char-set:whitespace)
|
((hygn-memq? '(alphabetic alpha)) char-set:letter)
|
||||||
((printing print) char-set:printing)
|
((hygn-memq? '(numeric digit num)) char-set:digit)
|
||||||
((control cntrl) char-set:iso-control)
|
((hygn-memq? '(alphanumeric alnum alphanum)) char-set:letter+digit)
|
||||||
((hex-digit xdigit hex) char-set:hex-digit)
|
((hygn-memq? '(punctuation punct)) char-set:punctuation)
|
||||||
((ascii) char-set:ascii)
|
((hygn-memq? '(graphic graph)) char-set:graphic)
|
||||||
(else (error "Illegal regular expression" sre)))))
|
((hygn-memq? '(blank)) char-set:blank)
|
||||||
(if cset? cs (make-re-char-set cs))))))))
|
((hygn-memq? '(whitespace space white)) char-set:whitespace)
|
||||||
|
((hygn-memq? '(printing print)) char-set:printing)
|
||||||
|
((hygn-memq? '(control cntrl)) char-set:iso-control)
|
||||||
|
((hygn-memq? '(hex-digit xdigit hex)) char-set:hex-digit)
|
||||||
|
((hygn-memq? '(ascii)) char-set:ascii)
|
||||||
|
(else (error "Illegal regular expression" sre)))))
|
||||||
|
(if cset? cs (make-re-char-set cs)))))))))
|
||||||
|
|
||||||
|
|
||||||
;;; In a CSET? true context, S must be a 1-char string; convert to a char set
|
;;; In a CSET? true context, S must be a 1-char string; convert to a char set
|
||||||
|
|
Loading…
Reference in New Issue