Remove BOW/EOW and WORD/WORD+ as they are not in POSIX, not supported

by a number of platforms, and their meaning is locale-dependent.
This commit is contained in:
sperber 2002-02-16 16:57:25 +00:00
parent 2cb2b88419
commit a03bc65f4e
9 changed files with 29 additions and 99 deletions

View File

@ -92,12 +92,10 @@
re-empty re-empty?
re-bos re-bos? re-eos re-eos?
re-bol re-bol? re-eol re-eol?
re-bow re-bow? re-eow re-eow?
re-any re-any?
re-nonl
re-word
re?
re-tsm

View File

@ -48,12 +48,10 @@
re-empty re-empty?
re-bos re-bos? re-eos re-eos?
re-bol re-bol? re-eol re-eol?
re-bow re-bow? re-eow re-eow?
re-any re-any?
re-nonl
re-word
regexp?
re-tsm

View File

@ -122,14 +122,12 @@
(re-empty? (proc (:value) :boolean))
re-bos re-eos
re-bol re-eol
re-bow re-eow
((re-bos? re-eos? re-bol? re-eol? re-bow? re-eow? re-any?)
((re-bos? re-eos? re-bol? re-eol? re-any?)
(proc (:value) :boolean))
re-any
re-nonl
re-word
(regexp? (proc (:value) :boolean))
(re-tsm (proc (:value) :exact-integer))

View File

@ -2,7 +2,6 @@
;;; Olin Shivers, January 1997, May 1998.
;;; Todo:
;;; - Better unparsers for (word ...) and (word+ ...).
;;; - Unparse char-sets into set-diff SREs -- find a char set that's a
;;; tight bound, then get the difference. This would really pretty up
;;; things like (- alpha "aeiou")
@ -68,8 +67,7 @@
((re-submatch? re) (static-regexp? (re-submatch:body re)))
(else (or (re-bos? re) (re-eos? re) ; Otw, if it's not
(re-bol? re) (re-eol? re) ; one of these,
(re-bow? re) (re-eow? re) ; then it's Scheme code.
(re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code.
(re-string? re)))))
@ -119,9 +117,6 @@
(define (parse-sre/context sre case-sensitive? cset? r c)
(let ((%bos (r 'bos)) (%eos (r 'eos))
(%bol (r 'bol)) (%eol (r 'eol))
(%bow (r 'bow)) (%eow (r 'eow))
(%word (r 'word))
(%flush-submatches (r 'flush-submatches))
(%coerce-dynamic-charset (r 'coerce-dynamic-charset))
@ -157,10 +152,6 @@
((c sre %bol) (non-cset) re-bol)
((c sre %eol) (non-cset) re-eol)
((c sre %bow) (non-cset) re-bow)
((c sre %eow) (non-cset) re-eow)
((c sre %word) (non-cset) re-word)
((pair? sre)
(let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym)))))
(cond
@ -190,13 +181,6 @@
(hygn-eq? 'seq))
(non-cset) (parse-seq (cdr sre)))
((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow)))
((hygn-eq? 'word+)
(recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_")
(,(r '|) . ,(cdr sre)))))
case-sensitive?
cset?))
((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre))))
((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre))
(cadr sre)
@ -374,7 +358,6 @@
(define (regexp->scheme re r)
(let ((%re-bos (r 're-bos)) (%re-eos (r 're-eos))
(%re-bol (r 're-bol)) (%re-eol (r 're-eol))
(%re-bow (r 're-bow)) (%re-eow (r 're-eow))
(%list (r 'list)))
(let recur ((re re))
@ -430,8 +413,6 @@
((re-eos? re) %re-eos)
((re-bol? re) %re-bol)
((re-eol? re) %re-eol)
((re-bow? re) %re-bow)
((re-eow? re) %re-eow)
(else re)))))
@ -601,8 +582,6 @@
((re-eos? re) (r 'eos))
((re-bol? re) (r 'bol))
((re-eol? re) (r 'eol))
((re-bow? re) (r 'bow))
((re-eow? re) (r 'eow))
(else re)))) ; Presumably it's code.

View File

@ -113,9 +113,6 @@
((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation."))
((re-eol? re) (error "End-of-line regexp not supported in this implementation."))
((re-bow? re) (values "[[:<:]]" 1 0 '#())) ; These two are
((re-eow? re) (values "[[:>:]]" 1 0 '#())) ; Spencer-specific.
((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re))
(body (re-dsm:body re)))
(translate-dsm body pre-dsm

View File

@ -38,16 +38,10 @@
((re-bol? re) (error "BOL regexp not supported in this implementation."))
((re-eol? re) (error "EOL regexp not supported in this implementation."))
((re-bow? re) (or bow-cre (set! bow-cre (compile))))
((re-eow? re) (or eow-cre (set! eow-cre (compile))))
(else (error "compile-regexp -- not a regexp" re)))))
(define bos-cre #f)
(define eos-cre #f)
(define bow-cre #f)
(define eow-cre #f)
(define (regexp-search re str . maybe-start)

View File

@ -38,17 +38,12 @@
(kw? head 'unquote) ; ,exp
(kw? head 'unquote-splicing) ; ,@exp
(kw? head 'posix-string) ; (posix-string string)
(kw? head 'word+) ; (word+ re ...)
(kw? head 'word)))) ; (word re ...)
(kw? head 'posix-string)))) ; (posix-string string)
(kw? exp 'any) ; any
(kw? exp 'nonl) ; nonl
(kw? exp 'word) ; word
(kw? exp 'bos) (kw? exp 'eos) ; bos / eos
(kw? exp 'bol) (kw? exp 'eol) ; bol / eol
(kw? exp 'bow) (kw? exp 'eow) ; bow / eow
(kw? exp 'lower-case) (kw? exp 'lower); The char class names
(kw? exp 'upper-case) (kw? exp 'upper)

View File

@ -374,10 +374,6 @@
(define-record re-bol) (define re-bol (make-re-bol))
(define-record re-eol) (define re-eol (make-re-eol))
(define-record re-bow) (define re-bow (make-re-bow))
(define-record re-eow) (define re-eow (make-re-eow))
(define re-any (make-re-char-set/posix char-set:full "." '#()))
(define (re-any? re)
@ -398,7 +394,6 @@
(re-char-set? x) (re-string? x)
(re-bos? x) (re-eos? x)
(re-bol? x) (re-eol? x)
(re-bow? x) (re-eow? x)
(re-submatch? x) (re-dsm? x)))
@ -413,14 +408,6 @@
(else 0)))
(define re-word
(let ((wcs (char-set-union char-set:letter+digit ; Word chars
(char-set #\_))))
(make-re-seq (list re-bow
(make-re-repeat 1 #f (make-re-char-set wcs))
re-eow))))
;;; (flush-submatches re)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Return regular expression RE with all submatch-binding elements

View File

@ -157,13 +157,11 @@
tail)
(no-simp)))
;; Coalesce adjacent bol/eol/bos/eos/bow/eow's.
;; Coalesce adjacent bol/eol/bos/eos's.
((re-bol? elt) (coalesce-anchor re-bol?))
((re-eol? elt) (coalesce-anchor re-eol?))
((re-bos? elt) (coalesce-anchor re-bos?))
((re-eos? elt) (coalesce-anchor re-eos?))
((re-bow? elt) (coalesce-anchor re-bow?))
((re-eow? elt) (coalesce-anchor re-eow?))
(else (no-simp)))))
(else (values pre-dsm elt '()))))))
@ -205,12 +203,12 @@
(define (simp-choice re)
(let ((tsm (re-choice:tsm re)))
(receive (pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
(receive (pre-dsm cset bos? eos? bol? eol? tail)
(simp-choice1 (map simplify-regexp (re-choice:elts re)))
(let ((tail (assemble-boundary-tail char-set:empty cset
bos? eos? bol? eol? bow? eow?
#f #f #f #f #f #f
bos? eos? bol? eol?
#f #f #f #f
tail)))
(values (if (pair? tail)
(if (pair? (cdr tail))
@ -228,17 +226,14 @@
;;; some earlier bit of the final result.
(define (assemble-boundary-tail prev-cset cset
bos? eos? bol? eol? bow? eow?
bos? eos? bol? eol?
prev-bos? prev-eos?
prev-bol? prev-eol?
prev-bow? prev-eow?
tail)
(let* ((cset (char-set-difference cset prev-cset))
(numchars (char-set-size cset))
(tail (if (and eos? (not prev-eos?)) (cons re-eos tail) tail))
(tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail))
(tail (if (and eow? (not prev-eow?)) (cons re-eow tail) tail))
(tail (if (and bow? (not prev-bow?)) (cons re-bow tail) tail))
(tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail))
(tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail))
(tail (? ((zero? numchars) tail) ; Drop empty char set.
@ -251,7 +246,7 @@
;;; Simplify the non-empty list of choices ELTS.
;;; Return the result split out into the values
;;; [pre-dsm, cset, bos?, eos?, bol?, eol?, bow?, eow?, tail]
;;; [pre-dsm, cset, bos?, eos?, bol?, eol?, tail]
(define (simp-choice1 elts)
(let recur ((elts elts)
@ -259,8 +254,7 @@
(prev-cset char-set:empty) ; Chars we've already seen.
(prev-bos? #f) (prev-eos? #f) ; These flags say if we've
(prev-bol? #f) (prev-eol? #f) ; already seen one of these
(prev-bow? #f) (prev-eow? #f)) ; anchors.
(prev-bol? #f) (prev-eol? #f)) ; already seen one of these anchors.
(if (pair? elts)
@ -271,17 +265,16 @@
;; Flatten nested choices.
(let ((sub-elts (re-seq:elts elt)))
(receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
(receive (tail-pre-dsm cset bos? eos? bol? eol? tail)
(recur (append sub-elts elts)
prev-cset
prev-bos? prev-eos?
prev-bol? prev-eol?
prev-bow? prev-eow?)
prev-bol? prev-eol?)
(values (+ pre-dsm tail-pre-dsm)
cset bos? eos? bol? eol? bow? eow? tail)))
cset bos? eos? bol? eol? tail)))
;; Simplify the tail, then think about the head.
(receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
(receive (tail-pre-dsm cset bos? eos? bol? eol? tail)
(recur elts
(? ((and (re-string? elt)
(= 1 (string-length (re-string:chars elt))))
@ -298,9 +291,7 @@
(or prev-bos? (re-bos? elt))
(or prev-eos? (re-eos? elt))
(or prev-bol? (re-bol? elt))
(or prev-eol? (re-eol? elt))
(or prev-bow? (re-bow? elt))
(or prev-eow? (re-eow? elt)))
(or prev-eol? (re-eol? elt)))
;; This guy is called when we couldn't find any other
;; simplification. If ELT contains live submatches, then we
@ -316,12 +307,10 @@
(let ((tail (assemble-boundary-tail prev-cset cset
bos? eos?
bol? eol?
bow? eow?
prev-bos? prev-eos?
prev-bol? prev-eol?
prev-bow? prev-eow?
tail)))
(values pre-dsm char-set:empty #f #f #f #f #f #f
(values pre-dsm char-set:empty #f #f #f #f
(if (pair? tail)
;; Tack tail-pre-dsm onto
;; TAIL's first elt.
@ -337,38 +326,34 @@
;; ELT has no live submatches, so we can commute all
;; the recursion state forwards past it.
(values (+ pre-dsm tail-pre-dsm)
cset bos? eos? bol? eol? bow? eow?
cset bos? eos? bol? eol?
(cons elt tail))))
(? ((and (re-char-set? elt)
(char-set? (re-char-set:cset elt))) ; Might be Scheme code
(values (+ pre-dsm tail-pre-dsm)
(char-set-union cset (re-char-set:cset elt))
bos? eos? bol? eol? bow? eow? tail))
bos? eos? bol? eol? tail))
;; Treat a singleton string "c" as a singleton set {c}.
((and (re-string? elt) (= 1 (string-length (re-string:chars elt))))
(values (+ pre-dsm tail-pre-dsm)
(char-set-union cset (string->char-set (re-string:chars elt)))
bos? eos? bol? eol? bow? eow? tail))
bos? eos? bol? eol? tail))
;; Coalesce bol/eol/bos/eos/bow/eow's.
;; Coalesce bol/eol/bos/eos's.
((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset
#t eos? bol? eol? bow? eow? tail))
#t eos? bol? eol? tail))
((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset
bos? #t bol? eol? bow? eow? tail))
bos? #t bol? eol? tail))
((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset
bos? eos? #t eol? bow? eow? tail))
bos? eos? #t eol? tail))
((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset
bos? eos? bol? #t bow? eow? tail))
((re-bow? elt) (values (+ pre-dsm tail-pre-dsm) cset
bos? eos? bol? eol? #t eow? tail))
((re-eow? elt) (values (+ pre-dsm tail-pre-dsm) cset
bos? eos? bol? eol? bow? #t tail))
bos? eos? bol? #t tail))
(else (no-simp)))))))
(values 0 char-set:empty #f #f #f #f #f #f '()))))
(values 0 char-set:empty #f #f #f #f '()))))
@ -399,5 +384,4 @@
;; a live submatch:
(else (not (or (re-char-set? re) (re-string? re)
(re-bos? re) (re-eos? re)
(re-bol? re) (re-eol? re)
(re-bow? re) (re-eow? re)))))))
(re-bol? re) (re-eol? re)))))))