From a03bc65f4e90a935049994626b77acb59d5a2e27 Mon Sep 17 00:00:00 2001 From: sperber Date: Sat, 16 Feb 2002 16:57:25 +0000 Subject: [PATCH] Remove BOW/EOW and WORD/WORD+ as they are not in POSIX, not supported by a number of platforms, and their meaning is locale-dependent. --- scsh/rx/packages-old.scm | 2 -- scsh/rx/packages-old2.scm | 2 -- scsh/rx/packages.scm | 4 +-- scsh/rx/parse.scm | 25 ++------------- scsh/rx/posixstr.scm | 3 -- scsh/rx/re-high.scm | 6 ---- scsh/rx/re-syntax.scm | 7 +---- scsh/rx/re.scm | 13 -------- scsh/rx/simp.scm | 66 +++++++++++++++------------------------ 9 files changed, 29 insertions(+), 99 deletions(-) diff --git a/scsh/rx/packages-old.scm b/scsh/rx/packages-old.scm index 46e6d86..615f12c 100644 --- a/scsh/rx/packages-old.scm +++ b/scsh/rx/packages-old.scm @@ -92,12 +92,10 @@ re-empty re-empty? re-bos re-bos? re-eos re-eos? re-bol re-bol? re-eol re-eol? - re-bow re-bow? re-eow re-eow? re-any re-any? re-nonl - re-word re? re-tsm diff --git a/scsh/rx/packages-old2.scm b/scsh/rx/packages-old2.scm index 22a2906..ccf7972 100644 --- a/scsh/rx/packages-old2.scm +++ b/scsh/rx/packages-old2.scm @@ -48,12 +48,10 @@ re-empty re-empty? re-bos re-bos? re-eos re-eos? re-bol re-bol? re-eol re-eol? - re-bow re-bow? re-eow re-eow? re-any re-any? re-nonl - re-word regexp? re-tsm diff --git a/scsh/rx/packages.scm b/scsh/rx/packages.scm index cf9aa00..50884e5 100644 --- a/scsh/rx/packages.scm +++ b/scsh/rx/packages.scm @@ -122,14 +122,12 @@ (re-empty? (proc (:value) :boolean)) re-bos re-eos re-bol re-eol - re-bow re-eow - ((re-bos? re-eos? re-bol? re-eol? re-bow? re-eow? re-any?) + ((re-bos? re-eos? re-bol? re-eol? re-any?) (proc (:value) :boolean)) re-any re-nonl - re-word (regexp? (proc (:value) :boolean)) (re-tsm (proc (:value) :exact-integer)) diff --git a/scsh/rx/parse.scm b/scsh/rx/parse.scm index 695d0f0..b7881c4 100644 --- a/scsh/rx/parse.scm +++ b/scsh/rx/parse.scm @@ -2,7 +2,6 @@ ;;; Olin Shivers, January 1997, May 1998. ;;; Todo: -;;; - Better unparsers for (word ...) and (word+ ...). ;;; - Unparse char-sets into set-diff SREs -- find a char set that's a ;;; tight bound, then get the difference. This would really pretty up ;;; things like (- alpha "aeiou") @@ -67,9 +66,8 @@ ((re-dsm? re) (static-regexp? (re-dsm:body re))) ((re-submatch? re) (static-regexp? (re-submatch:body re))) - (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not - (re-bol? re) (re-eol? re) ; one of these, - (re-bow? re) (re-eow? re) ; then it's Scheme code. + (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not + (re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code. (re-string? re))))) @@ -119,9 +117,6 @@ (define (parse-sre/context sre case-sensitive? cset? r c) (let ((%bos (r 'bos)) (%eos (r 'eos)) (%bol (r 'bol)) (%eol (r 'eol)) - (%bow (r 'bow)) (%eow (r 'eow)) - - (%word (r 'word)) (%flush-submatches (r 'flush-submatches)) (%coerce-dynamic-charset (r 'coerce-dynamic-charset)) @@ -157,10 +152,6 @@ ((c sre %bol) (non-cset) re-bol) ((c sre %eol) (non-cset) re-eol) - ((c sre %bow) (non-cset) re-bow) - ((c sre %eow) (non-cset) re-eow) - ((c sre %word) (non-cset) re-word) - ((pair? sre) (let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym))))) (cond @@ -190,13 +181,6 @@ (hygn-eq? 'seq)) (non-cset) (parse-seq (cdr sre))) - ((hygn-eq? 'word) (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow))) - ((hygn-eq? 'word+) - (recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_") - (,(r '|) . ,(cdr sre))))) - case-sensitive? - cset?)) - ((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre)))) ((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre)) (cadr sre) @@ -374,7 +358,6 @@ (define (regexp->scheme re r) (let ((%re-bos (r 're-bos)) (%re-eos (r 're-eos)) (%re-bol (r 're-bol)) (%re-eol (r 're-eol)) - (%re-bow (r 're-bow)) (%re-eow (r 're-eow)) (%list (r 'list))) (let recur ((re re)) @@ -430,8 +413,6 @@ ((re-eos? re) %re-eos) ((re-bol? re) %re-bol) ((re-eol? re) %re-eol) - ((re-bow? re) %re-bow) - ((re-eow? re) %re-eow) (else re))))) @@ -601,8 +582,6 @@ ((re-eos? re) (r 'eos)) ((re-bol? re) (r 'bol)) ((re-eol? re) (r 'eol)) - ((re-bow? re) (r 'bow)) - ((re-eow? re) (r 'eow)) (else re)))) ; Presumably it's code. diff --git a/scsh/rx/posixstr.scm b/scsh/rx/posixstr.scm index 9c9628b..9d9a59a 100644 --- a/scsh/rx/posixstr.scm +++ b/scsh/rx/posixstr.scm @@ -113,9 +113,6 @@ ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) ((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) - ((re-bow? re) (values "[[:<:]]" 1 0 '#())) ; These two are - ((re-eow? re) (values "[[:>:]]" 1 0 '#())) ; Spencer-specific. - ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) (body (re-dsm:body re))) (translate-dsm body pre-dsm diff --git a/scsh/rx/re-high.scm b/scsh/rx/re-high.scm index f9b1388..28c08de 100644 --- a/scsh/rx/re-high.scm +++ b/scsh/rx/re-high.scm @@ -38,16 +38,10 @@ ((re-bol? re) (error "BOL regexp not supported in this implementation.")) ((re-eol? re) (error "EOL regexp not supported in this implementation.")) - ((re-bow? re) (or bow-cre (set! bow-cre (compile)))) - ((re-eow? re) (or eow-cre (set! eow-cre (compile)))) - (else (error "compile-regexp -- not a regexp" re))))) (define bos-cre #f) (define eos-cre #f) -(define bow-cre #f) -(define eow-cre #f) - (define (regexp-search re str . maybe-start) diff --git a/scsh/rx/re-syntax.scm b/scsh/rx/re-syntax.scm index 41f2763..ed5c9c9 100644 --- a/scsh/rx/re-syntax.scm +++ b/scsh/rx/re-syntax.scm @@ -38,17 +38,12 @@ (kw? head 'unquote) ; ,exp (kw? head 'unquote-splicing) ; ,@exp - (kw? head 'posix-string) ; (posix-string string) - - (kw? head 'word+) ; (word+ re ...) - (kw? head 'word)))) ; (word re ...) + (kw? head 'posix-string)))) ; (posix-string string) (kw? exp 'any) ; any (kw? exp 'nonl) ; nonl - (kw? exp 'word) ; word (kw? exp 'bos) (kw? exp 'eos) ; bos / eos (kw? exp 'bol) (kw? exp 'eol) ; bol / eol - (kw? exp 'bow) (kw? exp 'eow) ; bow / eow (kw? exp 'lower-case) (kw? exp 'lower); The char class names (kw? exp 'upper-case) (kw? exp 'upper) diff --git a/scsh/rx/re.scm b/scsh/rx/re.scm index 132e514..a33d2c2 100644 --- a/scsh/rx/re.scm +++ b/scsh/rx/re.scm @@ -374,10 +374,6 @@ (define-record re-bol) (define re-bol (make-re-bol)) (define-record re-eol) (define re-eol (make-re-eol)) -(define-record re-bow) (define re-bow (make-re-bow)) -(define-record re-eow) (define re-eow (make-re-eow)) - - (define re-any (make-re-char-set/posix char-set:full "." '#())) (define (re-any? re) @@ -398,7 +394,6 @@ (re-char-set? x) (re-string? x) (re-bos? x) (re-eos? x) (re-bol? x) (re-eol? x) - (re-bow? x) (re-eow? x) (re-submatch? x) (re-dsm? x))) @@ -413,14 +408,6 @@ (else 0))) -(define re-word - (let ((wcs (char-set-union char-set:letter+digit ; Word chars - (char-set #\_)))) - (make-re-seq (list re-bow - (make-re-repeat 1 #f (make-re-char-set wcs)) - re-eow)))) - - ;;; (flush-submatches re) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Return regular expression RE with all submatch-binding elements diff --git a/scsh/rx/simp.scm b/scsh/rx/simp.scm index d8656fe..ac157f3 100644 --- a/scsh/rx/simp.scm +++ b/scsh/rx/simp.scm @@ -157,13 +157,11 @@ tail) (no-simp))) - ;; Coalesce adjacent bol/eol/bos/eos/bow/eow's. + ;; Coalesce adjacent bol/eol/bos/eos's. ((re-bol? elt) (coalesce-anchor re-bol?)) ((re-eol? elt) (coalesce-anchor re-eol?)) ((re-bos? elt) (coalesce-anchor re-bos?)) ((re-eos? elt) (coalesce-anchor re-eos?)) - ((re-bow? elt) (coalesce-anchor re-bow?)) - ((re-eow? elt) (coalesce-anchor re-eow?)) (else (no-simp))))) (else (values pre-dsm elt '())))))) @@ -205,12 +203,12 @@ (define (simp-choice re) (let ((tsm (re-choice:tsm re))) - (receive (pre-dsm cset bos? eos? bol? eol? bow? eow? tail) + (receive (pre-dsm cset bos? eos? bol? eol? tail) (simp-choice1 (map simplify-regexp (re-choice:elts re))) (let ((tail (assemble-boundary-tail char-set:empty cset - bos? eos? bol? eol? bow? eow? - #f #f #f #f #f #f + bos? eos? bol? eol? + #f #f #f #f tail))) (values (if (pair? tail) (if (pair? (cdr tail)) @@ -228,17 +226,14 @@ ;;; some earlier bit of the final result. (define (assemble-boundary-tail prev-cset cset - bos? eos? bol? eol? bow? eow? + bos? eos? bol? eol? prev-bos? prev-eos? prev-bol? prev-eol? - prev-bow? prev-eow? tail) (let* ((cset (char-set-difference cset prev-cset)) (numchars (char-set-size cset)) (tail (if (and eos? (not prev-eos?)) (cons re-eos tail) tail)) (tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail)) - (tail (if (and eow? (not prev-eow?)) (cons re-eow tail) tail)) - (tail (if (and bow? (not prev-bow?)) (cons re-bow tail) tail)) (tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail)) (tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail)) (tail (? ((zero? numchars) tail) ; Drop empty char set. @@ -251,16 +246,15 @@ ;;; Simplify the non-empty list of choices ELTS. ;;; Return the result split out into the values -;;; [pre-dsm, cset, bos?, eos?, bol?, eol?, bow?, eow?, tail] +;;; [pre-dsm, cset, bos?, eos?, bol?, eol?, tail] (define (simp-choice1 elts) (let recur ((elts elts) - (prev-cset char-set:empty) ; Chars we've already seen. + (prev-cset char-set:empty) ; Chars we've already seen. - (prev-bos? #f) (prev-eos? #f) ; These flags say if we've - (prev-bol? #f) (prev-eol? #f) ; already seen one of these - (prev-bow? #f) (prev-eow? #f)) ; anchors. + (prev-bos? #f) (prev-eos? #f) ; These flags say if we've + (prev-bol? #f) (prev-eol? #f)) ; already seen one of these anchors. (if (pair? elts) @@ -271,17 +265,16 @@ ;; Flatten nested choices. (let ((sub-elts (re-seq:elts elt))) - (receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail) + (receive (tail-pre-dsm cset bos? eos? bol? eol? tail) (recur (append sub-elts elts) prev-cset prev-bos? prev-eos? - prev-bol? prev-eol? - prev-bow? prev-eow?) + prev-bol? prev-eol?) (values (+ pre-dsm tail-pre-dsm) - cset bos? eos? bol? eol? bow? eow? tail))) + cset bos? eos? bol? eol? tail))) ;; Simplify the tail, then think about the head. - (receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail) + (receive (tail-pre-dsm cset bos? eos? bol? eol? tail) (recur elts (? ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) @@ -298,9 +291,7 @@ (or prev-bos? (re-bos? elt)) (or prev-eos? (re-eos? elt)) (or prev-bol? (re-bol? elt)) - (or prev-eol? (re-eol? elt)) - (or prev-bow? (re-bow? elt)) - (or prev-eow? (re-eow? elt))) + (or prev-eol? (re-eol? elt))) ;; This guy is called when we couldn't find any other ;; simplification. If ELT contains live submatches, then we @@ -316,12 +307,10 @@ (let ((tail (assemble-boundary-tail prev-cset cset bos? eos? bol? eol? - bow? eow? prev-bos? prev-eos? prev-bol? prev-eol? - prev-bow? prev-eow? tail))) - (values pre-dsm char-set:empty #f #f #f #f #f #f + (values pre-dsm char-set:empty #f #f #f #f (if (pair? tail) ;; Tack tail-pre-dsm onto ;; TAIL's first elt. @@ -337,38 +326,34 @@ ;; ELT has no live submatches, so we can commute all ;; the recursion state forwards past it. (values (+ pre-dsm tail-pre-dsm) - cset bos? eos? bol? eol? bow? eow? + cset bos? eos? bol? eol? (cons elt tail)))) (? ((and (re-char-set? elt) (char-set? (re-char-set:cset elt))) ; Might be Scheme code (values (+ pre-dsm tail-pre-dsm) (char-set-union cset (re-char-set:cset elt)) - bos? eos? bol? eol? bow? eow? tail)) + bos? eos? bol? eol? tail)) ;; Treat a singleton string "c" as a singleton set {c}. ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) (values (+ pre-dsm tail-pre-dsm) (char-set-union cset (string->char-set (re-string:chars elt))) - bos? eos? bol? eol? bow? eow? tail)) + bos? eos? bol? eol? tail)) - ;; Coalesce bol/eol/bos/eos/bow/eow's. + ;; Coalesce bol/eol/bos/eos's. ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset - #t eos? bol? eol? bow? eow? tail)) + #t eos? bol? eol? tail)) ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? #t bol? eol? bow? eow? tail)) + bos? #t bol? eol? tail)) ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? eos? #t eol? bow? eow? tail)) + bos? eos? #t eol? tail)) ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? eos? bol? #t bow? eow? tail)) - ((re-bow? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? eos? bol? eol? #t eow? tail)) - ((re-eow? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? eos? bol? eol? bow? #t tail)) + bos? eos? bol? #t tail)) (else (no-simp))))))) - (values 0 char-set:empty #f #f #f #f #f #f '())))) + (values 0 char-set:empty #f #f #f #f '())))) @@ -399,5 +384,4 @@ ;; a live submatch: (else (not (or (re-char-set? re) (re-string? re) (re-bos? re) (re-eos? re) - (re-bol? re) (re-eol? re) - (re-bow? re) (re-eow? re))))))) + (re-bol? re) (re-eol? re)))))))