Remove BOW/EOW and WORD/WORD+ as they are not in POSIX, not supported
by a number of platforms, and their meaning is locale-dependent.
This commit is contained in:
parent
2cb2b88419
commit
a03bc65f4e
|
@ -92,12 +92,10 @@
|
|||
re-empty re-empty?
|
||||
re-bos re-bos? re-eos re-eos?
|
||||
re-bol re-bol? re-eol re-eol?
|
||||
re-bow re-bow? re-eow re-eow?
|
||||
|
||||
re-any re-any?
|
||||
|
||||
re-nonl
|
||||
re-word
|
||||
|
||||
re?
|
||||
re-tsm
|
||||
|
|
|
@ -48,12 +48,10 @@
|
|||
re-empty re-empty?
|
||||
re-bos re-bos? re-eos re-eos?
|
||||
re-bol re-bol? re-eol re-eol?
|
||||
re-bow re-bow? re-eow re-eow?
|
||||
|
||||
re-any re-any?
|
||||
|
||||
re-nonl
|
||||
re-word
|
||||
|
||||
regexp?
|
||||
re-tsm
|
||||
|
|
|
@ -122,14 +122,12 @@
|
|||
(re-empty? (proc (:value) :boolean))
|
||||
re-bos re-eos
|
||||
re-bol re-eol
|
||||
re-bow re-eow
|
||||
|
||||
((re-bos? re-eos? re-bol? re-eol? re-bow? re-eow? re-any?)
|
||||
((re-bos? re-eos? re-bol? re-eol? re-any?)
|
||||
(proc (:value) :boolean))
|
||||
|
||||
re-any
|
||||
re-nonl
|
||||
re-word
|
||||
|
||||
(regexp? (proc (:value) :boolean))
|
||||
(re-tsm (proc (:value) :exact-integer))
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
;;; Olin Shivers, January 1997, May 1998.
|
||||
|
||||
;;; Todo:
|
||||
;;; - Better unparsers for (word ...) and (word+ ...).
|
||||
;;; - Unparse char-sets into set-diff SREs -- find a char set that's a
|
||||
;;; tight bound, then get the difference. This would really pretty up
|
||||
;;; things like (- alpha "aeiou")
|
||||
|
@ -67,9 +66,8 @@
|
|||
((re-dsm? re) (static-regexp? (re-dsm:body re)))
|
||||
((re-submatch? re) (static-regexp? (re-submatch:body re)))
|
||||
|
||||
(else (or (re-bos? re) (re-eos? re) ; Otw, if it's not
|
||||
(re-bol? re) (re-eol? re) ; one of these,
|
||||
(re-bow? re) (re-eow? re) ; then it's Scheme code.
|
||||
(else (or (re-bos? re) (re-eos? re) ; Otw, if it's not
|
||||
(re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code.
|
||||
(re-string? re)))))
|
||||
|
||||
|
||||
|
@ -119,9 +117,6 @@
|
|||
(define (parse-sre/context sre case-sensitive? cset? r c)
|
||||
(let ((%bos (r 'bos)) (%eos (r 'eos))
|
||||
(%bol (r 'bol)) (%eol (r 'eol))
|
||||
(%bow (r 'bow)) (%eow (r 'eow))
|
||||
|
||||
(%word (r 'word))
|
||||
|
||||
(%flush-submatches (r 'flush-submatches))
|
||||
(%coerce-dynamic-charset (r 'coerce-dynamic-charset))
|
||||
|
@ -157,10 +152,6 @@
|
|||
((c sre %bol) (non-cset) re-bol)
|
||||
((c sre %eol) (non-cset) re-eol)
|
||||
|
||||
((c sre %bow) (non-cset) re-bow)
|
||||
((c sre %eow) (non-cset) re-eow)
|
||||
((c sre %word) (non-cset) re-word)
|
||||
|
||||
((pair? sre)
|
||||
(let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym)))))
|
||||
(cond
|
||||
|
@ -190,13 +181,6 @@
|
|||
(hygn-eq? 'seq))
|
||||
(non-cset) (parse-seq (cdr sre)))
|
||||
|
||||
((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
|
||||
((hygn-eq? 'word+)
|
||||
(recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
|
||||
(,(r '|) . ,(cdr sre)))))
|
||||
case-sensitive?
|
||||
cset?))
|
||||
|
||||
((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
|
||||
((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
|
||||
(cadr sre)
|
||||
|
@ -374,7 +358,6 @@
|
|||
(define (regexp->scheme re r)
|
||||
(let ((%re-bos (r 're-bos)) (%re-eos (r 're-eos))
|
||||
(%re-bol (r 're-bol)) (%re-eol (r 're-eol))
|
||||
(%re-bow (r 're-bow)) (%re-eow (r 're-eow))
|
||||
(%list (r 'list)))
|
||||
|
||||
(let recur ((re re))
|
||||
|
@ -430,8 +413,6 @@
|
|||
((re-eos? re) %re-eos)
|
||||
((re-bol? re) %re-bol)
|
||||
((re-eol? re) %re-eol)
|
||||
((re-bow? re) %re-bow)
|
||||
((re-eow? re) %re-eow)
|
||||
|
||||
(else re)))))
|
||||
|
||||
|
@ -601,8 +582,6 @@
|
|||
((re-eos? re) (r 'eos))
|
||||
((re-bol? re) (r 'bol))
|
||||
((re-eol? re) (r 'eol))
|
||||
((re-bow? re) (r 'bow))
|
||||
((re-eow? re) (r 'eow))
|
||||
|
||||
(else re)))) ; Presumably it's code.
|
||||
|
||||
|
|
|
@ -113,9 +113,6 @@
|
|||
((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation."))
|
||||
((re-eol? re) (error "End-of-line regexp not supported in this implementation."))
|
||||
|
||||
((re-bow? re) (values "[[:<:]]" 1 0 '#())) ; These two are
|
||||
((re-eow? re) (values "[[:>:]]" 1 0 '#())) ; Spencer-specific.
|
||||
|
||||
((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re))
|
||||
(body (re-dsm:body re)))
|
||||
(translate-dsm body pre-dsm
|
||||
|
|
|
@ -38,16 +38,10 @@
|
|||
((re-bol? re) (error "BOL regexp not supported in this implementation."))
|
||||
((re-eol? re) (error "EOL regexp not supported in this implementation."))
|
||||
|
||||
((re-bow? re) (or bow-cre (set! bow-cre (compile))))
|
||||
((re-eow? re) (or eow-cre (set! eow-cre (compile))))
|
||||
|
||||
(else (error "compile-regexp -- not a regexp" re)))))
|
||||
|
||||
(define bos-cre #f)
|
||||
(define eos-cre #f)
|
||||
(define bow-cre #f)
|
||||
(define eow-cre #f)
|
||||
|
||||
|
||||
|
||||
(define (regexp-search re str . maybe-start)
|
||||
|
|
|
@ -38,17 +38,12 @@
|
|||
(kw? head 'unquote) ; ,exp
|
||||
(kw? head 'unquote-splicing) ; ,@exp
|
||||
|
||||
(kw? head 'posix-string) ; (posix-string string)
|
||||
|
||||
(kw? head 'word+) ; (word+ re ...)
|
||||
(kw? head 'word)))) ; (word re ...)
|
||||
(kw? head 'posix-string)))) ; (posix-string string)
|
||||
|
||||
(kw? exp 'any) ; any
|
||||
(kw? exp 'nonl) ; nonl
|
||||
(kw? exp 'word) ; word
|
||||
(kw? exp 'bos) (kw? exp 'eos) ; bos / eos
|
||||
(kw? exp 'bol) (kw? exp 'eol) ; bol / eol
|
||||
(kw? exp 'bow) (kw? exp 'eow) ; bow / eow
|
||||
|
||||
(kw? exp 'lower-case) (kw? exp 'lower); The char class names
|
||||
(kw? exp 'upper-case) (kw? exp 'upper)
|
||||
|
|
|
@ -374,10 +374,6 @@
|
|||
(define-record re-bol) (define re-bol (make-re-bol))
|
||||
(define-record re-eol) (define re-eol (make-re-eol))
|
||||
|
||||
(define-record re-bow) (define re-bow (make-re-bow))
|
||||
(define-record re-eow) (define re-eow (make-re-eow))
|
||||
|
||||
|
||||
(define re-any (make-re-char-set/posix char-set:full "." '#()))
|
||||
|
||||
(define (re-any? re)
|
||||
|
@ -398,7 +394,6 @@
|
|||
(re-char-set? x) (re-string? x)
|
||||
(re-bos? x) (re-eos? x)
|
||||
(re-bol? x) (re-eol? x)
|
||||
(re-bow? x) (re-eow? x)
|
||||
(re-submatch? x) (re-dsm? x)))
|
||||
|
||||
|
||||
|
@ -413,14 +408,6 @@
|
|||
(else 0)))
|
||||
|
||||
|
||||
(define re-word
|
||||
(let ((wcs (char-set-union char-set:letter+digit ; Word chars
|
||||
(char-set #\_))))
|
||||
(make-re-seq (list re-bow
|
||||
(make-re-repeat 1 #f (make-re-char-set wcs))
|
||||
re-eow))))
|
||||
|
||||
|
||||
;;; (flush-submatches re)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Return regular expression RE with all submatch-binding elements
|
||||
|
|
|
@ -157,13 +157,11 @@
|
|||
tail)
|
||||
(no-simp)))
|
||||
|
||||
;; Coalesce adjacent bol/eol/bos/eos/bow/eow's.
|
||||
;; Coalesce adjacent bol/eol/bos/eos's.
|
||||
((re-bol? elt) (coalesce-anchor re-bol?))
|
||||
((re-eol? elt) (coalesce-anchor re-eol?))
|
||||
((re-bos? elt) (coalesce-anchor re-bos?))
|
||||
((re-eos? elt) (coalesce-anchor re-eos?))
|
||||
((re-bow? elt) (coalesce-anchor re-bow?))
|
||||
((re-eow? elt) (coalesce-anchor re-eow?))
|
||||
(else (no-simp)))))
|
||||
|
||||
(else (values pre-dsm elt '()))))))
|
||||
|
@ -205,12 +203,12 @@
|
|||
(define (simp-choice re)
|
||||
(let ((tsm (re-choice:tsm re)))
|
||||
|
||||
(receive (pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
|
||||
(receive (pre-dsm cset bos? eos? bol? eol? tail)
|
||||
(simp-choice1 (map simplify-regexp (re-choice:elts re)))
|
||||
|
||||
(let ((tail (assemble-boundary-tail char-set:empty cset
|
||||
bos? eos? bol? eol? bow? eow?
|
||||
#f #f #f #f #f #f
|
||||
bos? eos? bol? eol?
|
||||
#f #f #f #f
|
||||
tail)))
|
||||
(values (if (pair? tail)
|
||||
(if (pair? (cdr tail))
|
||||
|
@ -228,17 +226,14 @@
|
|||
;;; some earlier bit of the final result.
|
||||
|
||||
(define (assemble-boundary-tail prev-cset cset
|
||||
bos? eos? bol? eol? bow? eow?
|
||||
bos? eos? bol? eol?
|
||||
prev-bos? prev-eos?
|
||||
prev-bol? prev-eol?
|
||||
prev-bow? prev-eow?
|
||||
tail)
|
||||
(let* ((cset (char-set-difference cset prev-cset))
|
||||
(numchars (char-set-size cset))
|
||||
(tail (if (and eos? (not prev-eos?)) (cons re-eos tail) tail))
|
||||
(tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail))
|
||||
(tail (if (and eow? (not prev-eow?)) (cons re-eow tail) tail))
|
||||
(tail (if (and bow? (not prev-bow?)) (cons re-bow tail) tail))
|
||||
(tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail))
|
||||
(tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail))
|
||||
(tail (? ((zero? numchars) tail) ; Drop empty char set.
|
||||
|
@ -251,16 +246,15 @@
|
|||
|
||||
;;; Simplify the non-empty list of choices ELTS.
|
||||
;;; Return the result split out into the values
|
||||
;;; [pre-dsm, cset, bos?, eos?, bol?, eol?, bow?, eow?, tail]
|
||||
;;; [pre-dsm, cset, bos?, eos?, bol?, eol?, tail]
|
||||
|
||||
(define (simp-choice1 elts)
|
||||
(let recur ((elts elts)
|
||||
|
||||
(prev-cset char-set:empty) ; Chars we've already seen.
|
||||
(prev-cset char-set:empty) ; Chars we've already seen.
|
||||
|
||||
(prev-bos? #f) (prev-eos? #f) ; These flags say if we've
|
||||
(prev-bol? #f) (prev-eol? #f) ; already seen one of these
|
||||
(prev-bow? #f) (prev-eow? #f)) ; anchors.
|
||||
(prev-bos? #f) (prev-eos? #f) ; These flags say if we've
|
||||
(prev-bol? #f) (prev-eol? #f)) ; already seen one of these anchors.
|
||||
|
||||
|
||||
(if (pair? elts)
|
||||
|
@ -271,17 +265,16 @@
|
|||
|
||||
;; Flatten nested choices.
|
||||
(let ((sub-elts (re-seq:elts elt)))
|
||||
(receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
|
||||
(receive (tail-pre-dsm cset bos? eos? bol? eol? tail)
|
||||
(recur (append sub-elts elts)
|
||||
prev-cset
|
||||
prev-bos? prev-eos?
|
||||
prev-bol? prev-eol?
|
||||
prev-bow? prev-eow?)
|
||||
prev-bol? prev-eol?)
|
||||
(values (+ pre-dsm tail-pre-dsm)
|
||||
cset bos? eos? bol? eol? bow? eow? tail)))
|
||||
cset bos? eos? bol? eol? tail)))
|
||||
|
||||
;; Simplify the tail, then think about the head.
|
||||
(receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
|
||||
(receive (tail-pre-dsm cset bos? eos? bol? eol? tail)
|
||||
(recur elts
|
||||
(? ((and (re-string? elt)
|
||||
(= 1 (string-length (re-string:chars elt))))
|
||||
|
@ -298,9 +291,7 @@
|
|||
(or prev-bos? (re-bos? elt))
|
||||
(or prev-eos? (re-eos? elt))
|
||||
(or prev-bol? (re-bol? elt))
|
||||
(or prev-eol? (re-eol? elt))
|
||||
(or prev-bow? (re-bow? elt))
|
||||
(or prev-eow? (re-eow? elt)))
|
||||
(or prev-eol? (re-eol? elt)))
|
||||
|
||||
;; This guy is called when we couldn't find any other
|
||||
;; simplification. If ELT contains live submatches, then we
|
||||
|
@ -316,12 +307,10 @@
|
|||
(let ((tail (assemble-boundary-tail prev-cset cset
|
||||
bos? eos?
|
||||
bol? eol?
|
||||
bow? eow?
|
||||
prev-bos? prev-eos?
|
||||
prev-bol? prev-eol?
|
||||
prev-bow? prev-eow?
|
||||
tail)))
|
||||
(values pre-dsm char-set:empty #f #f #f #f #f #f
|
||||
(values pre-dsm char-set:empty #f #f #f #f
|
||||
(if (pair? tail)
|
||||
;; Tack tail-pre-dsm onto
|
||||
;; TAIL's first elt.
|
||||
|
@ -337,38 +326,34 @@
|
|||
;; ELT has no live submatches, so we can commute all
|
||||
;; the recursion state forwards past it.
|
||||
(values (+ pre-dsm tail-pre-dsm)
|
||||
cset bos? eos? bol? eol? bow? eow?
|
||||
cset bos? eos? bol? eol?
|
||||
(cons elt tail))))
|
||||
|
||||
(? ((and (re-char-set? elt)
|
||||
(char-set? (re-char-set:cset elt))) ; Might be Scheme code
|
||||
(values (+ pre-dsm tail-pre-dsm)
|
||||
(char-set-union cset (re-char-set:cset elt))
|
||||
bos? eos? bol? eol? bow? eow? tail))
|
||||
bos? eos? bol? eol? tail))
|
||||
|
||||
;; Treat a singleton string "c" as a singleton set {c}.
|
||||
((and (re-string? elt) (= 1 (string-length (re-string:chars elt))))
|
||||
(values (+ pre-dsm tail-pre-dsm)
|
||||
(char-set-union cset (string->char-set (re-string:chars elt)))
|
||||
bos? eos? bol? eol? bow? eow? tail))
|
||||
bos? eos? bol? eol? tail))
|
||||
|
||||
;; Coalesce bol/eol/bos/eos/bow/eow's.
|
||||
;; Coalesce bol/eol/bos/eos's.
|
||||
((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset
|
||||
#t eos? bol? eol? bow? eow? tail))
|
||||
#t eos? bol? eol? tail))
|
||||
((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset
|
||||
bos? #t bol? eol? bow? eow? tail))
|
||||
bos? #t bol? eol? tail))
|
||||
((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset
|
||||
bos? eos? #t eol? bow? eow? tail))
|
||||
bos? eos? #t eol? tail))
|
||||
((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset
|
||||
bos? eos? bol? #t bow? eow? tail))
|
||||
((re-bow? elt) (values (+ pre-dsm tail-pre-dsm) cset
|
||||
bos? eos? bol? eol? #t eow? tail))
|
||||
((re-eow? elt) (values (+ pre-dsm tail-pre-dsm) cset
|
||||
bos? eos? bol? eol? bow? #t tail))
|
||||
bos? eos? bol? #t tail))
|
||||
|
||||
(else (no-simp)))))))
|
||||
|
||||
(values 0 char-set:empty #f #f #f #f #f #f '()))))
|
||||
(values 0 char-set:empty #f #f #f #f '()))))
|
||||
|
||||
|
||||
|
||||
|
@ -399,5 +384,4 @@
|
|||
;; a live submatch:
|
||||
(else (not (or (re-char-set? re) (re-string? re)
|
||||
(re-bos? re) (re-eos? re)
|
||||
(re-bol? re) (re-eol? re)
|
||||
(re-bow? re) (re-eow? re)))))))
|
||||
(re-bol? re) (re-eol? re)))))))
|
||||
|
|
Loading…
Reference in New Issue