Remove BOW/EOW and WORD/WORD+ as they are not in POSIX, not supported
by a number of platforms, and their meaning is locale-dependent.
This commit is contained in:
		
							parent
							
								
									2cb2b88419
								
							
						
					
					
						commit
						a03bc65f4e
					
				|  | @ -92,12 +92,10 @@ | ||||||
| 	  re-empty re-empty? | 	  re-empty re-empty? | ||||||
| 	  re-bos re-bos?	  re-eos re-eos? | 	  re-bos re-bos?	  re-eos re-eos? | ||||||
| 	  re-bol re-bol?	  re-eol re-eol? | 	  re-bol re-bol?	  re-eol re-eol? | ||||||
| 	  re-bow re-bow?	  re-eow re-eow? |  | ||||||
| 
 | 
 | ||||||
| 	  re-any re-any? | 	  re-any re-any? | ||||||
| 
 | 
 | ||||||
| 	  re-nonl | 	  re-nonl | ||||||
| 	  re-word |  | ||||||
| 
 | 
 | ||||||
| 	  re? | 	  re? | ||||||
| 	  re-tsm | 	  re-tsm | ||||||
|  |  | ||||||
|  | @ -48,12 +48,10 @@ | ||||||
| 	  re-empty re-empty? | 	  re-empty re-empty? | ||||||
| 	  re-bos re-bos?	  re-eos re-eos? | 	  re-bos re-bos?	  re-eos re-eos? | ||||||
| 	  re-bol re-bol?	  re-eol re-eol? | 	  re-bol re-bol?	  re-eol re-eol? | ||||||
| 	  re-bow re-bow?	  re-eow re-eow? |  | ||||||
| 
 | 
 | ||||||
| 	  re-any re-any? | 	  re-any re-any? | ||||||
| 
 | 
 | ||||||
| 	  re-nonl | 	  re-nonl | ||||||
| 	  re-word |  | ||||||
| 
 | 
 | ||||||
| 	  regexp? | 	  regexp? | ||||||
| 	  re-tsm | 	  re-tsm | ||||||
|  |  | ||||||
|  | @ -122,14 +122,12 @@ | ||||||
| 	  (re-empty? (proc (:value) :boolean)) | 	  (re-empty? (proc (:value) :boolean)) | ||||||
| 	  re-bos	  re-eos | 	  re-bos	  re-eos | ||||||
| 	  re-bol 	  re-eol | 	  re-bol 	  re-eol | ||||||
| 	  re-bow 	  re-eow |  | ||||||
| 
 | 
 | ||||||
| 	  ((re-bos? re-eos? re-bol? re-eol? re-bow? re-eow? re-any?) | 	  ((re-bos? re-eos? re-bol? re-eol? re-any?) | ||||||
| 	   (proc (:value) :boolean)) | 	   (proc (:value) :boolean)) | ||||||
| 
 | 
 | ||||||
| 	  re-any | 	  re-any | ||||||
| 	  re-nonl | 	  re-nonl | ||||||
| 	  re-word |  | ||||||
| 
 | 
 | ||||||
| 	  (regexp? (proc (:value) :boolean)) | 	  (regexp? (proc (:value) :boolean)) | ||||||
| 	  (re-tsm (proc (:value) :exact-integer)) | 	  (re-tsm (proc (:value) :exact-integer)) | ||||||
|  |  | ||||||
|  | @ -2,7 +2,6 @@ | ||||||
| ;;;     Olin Shivers, January 1997, May 1998. | ;;;     Olin Shivers, January 1997, May 1998. | ||||||
| 
 | 
 | ||||||
| ;;; Todo: | ;;; Todo: | ||||||
| ;;; - Better unparsers for (word ...) and (word+ ...). |  | ||||||
| ;;; - Unparse char-sets into set-diff SREs -- find a char set that's a | ;;; - Unparse char-sets into set-diff SREs -- find a char set that's a | ||||||
| ;;;   tight bound, then get the difference.  This would really pretty up | ;;;   tight bound, then get the difference.  This would really pretty up | ||||||
| ;;;   things like (- alpha "aeiou") | ;;;   things like (- alpha "aeiou") | ||||||
|  | @ -67,9 +66,8 @@ | ||||||
|      ((re-dsm? re)      (static-regexp? (re-dsm:body re))) |      ((re-dsm? re)      (static-regexp? (re-dsm:body re))) | ||||||
|      ((re-submatch? re) (static-regexp? (re-submatch:body re))) |      ((re-submatch? re) (static-regexp? (re-submatch:body re))) | ||||||
| 
 | 
 | ||||||
|      (else (or (re-bos? re) (re-eos? re)	; Otw, if it's not  |      (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not  | ||||||
| 	       (re-bol? re) (re-eol? re)	; one of these, | 	       (re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code. | ||||||
| 	       (re-bow? re) (re-eow? re)	; then it's Scheme code. |  | ||||||
| 	       (re-string? re)))))  | 	       (re-string? re)))))  | ||||||
|                 |                 | ||||||
| 
 | 
 | ||||||
|  | @ -119,9 +117,6 @@ | ||||||
| (define (parse-sre/context sre case-sensitive? cset? r c) | (define (parse-sre/context sre case-sensitive? cset? r c) | ||||||
|   (let ((%bos (r 'bos))		(%eos (r 'eos)) |   (let ((%bos (r 'bos))		(%eos (r 'eos)) | ||||||
| 	(%bol (r 'bol))		(%eol (r 'eol)) | 	(%bol (r 'bol))		(%eol (r 'eol)) | ||||||
| 	(%bow (r 'bow))		(%eow (r 'eow)) |  | ||||||
| 
 |  | ||||||
| 	(%word (r 'word)) |  | ||||||
| 
 | 
 | ||||||
| 	(%flush-submatches       (r 'flush-submatches)) | 	(%flush-submatches       (r 'flush-submatches)) | ||||||
| 	(%coerce-dynamic-charset (r 'coerce-dynamic-charset)) | 	(%coerce-dynamic-charset (r 'coerce-dynamic-charset)) | ||||||
|  | @ -157,10 +152,6 @@ | ||||||
| 	 ((c sre %bol) (non-cset) re-bol) | 	 ((c sre %bol) (non-cset) re-bol) | ||||||
| 	 ((c sre %eol) (non-cset) re-eol) | 	 ((c sre %eol) (non-cset) re-eol) | ||||||
| 
 | 
 | ||||||
| 	 ((c sre %bow)  (non-cset) re-bow) |  | ||||||
| 	 ((c sre %eow)  (non-cset) re-eow) |  | ||||||
| 	 ((c sre %word) (non-cset) re-word) |  | ||||||
| 
 |  | ||||||
| 	 ((pair? sre) | 	 ((pair? sre) | ||||||
| 	  (let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym))))) | 	  (let ((hygn-eq? (lambda (the-sym) (c (car sre) (r the-sym))))) | ||||||
| 	    (cond  | 	    (cond  | ||||||
|  | @ -190,13 +181,6 @@ | ||||||
| 		  (hygn-eq? 'seq)) | 		  (hygn-eq? 'seq)) | ||||||
| 	      (non-cset) (parse-seq (cdr sre))) | 	      (non-cset) (parse-seq (cdr sre))) | ||||||
| 	      | 	      | ||||||
| 	     ((hygn-eq? 'word)  (non-cset) (parse-seq `(,%bow ,@(cdr sre) ,%eow))) |  | ||||||
| 	     ((hygn-eq? 'word+) |  | ||||||
| 	      (recur `(,(r 'word) (,(r '+) (,(r '&) (,(r '|) ,(r 'alphanum) "_") |  | ||||||
| 					    (,(r '|) . ,(cdr sre))))) |  | ||||||
| 		     case-sensitive? |  | ||||||
| 		     cset?)) |  | ||||||
| 	      |  | ||||||
| 	     ((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre)))) | 	     ((hygn-eq? 'submatch) (non-cset) (re-submatch (parse-seq (cdr sre)))) | ||||||
| 	     ((hygn-eq? 'dsm)      (non-cset) (re-dsm (parse-seq (cdddr sre)) | 	     ((hygn-eq? 'dsm)      (non-cset) (re-dsm (parse-seq (cdddr sre)) | ||||||
| 						      (cadr sre) | 						      (cadr sre) | ||||||
|  | @ -374,7 +358,6 @@ | ||||||
| (define (regexp->scheme re r) | (define (regexp->scheme re r) | ||||||
|   (let ((%re-bos (r 're-bos))	(%re-eos (r 're-eos)) |   (let ((%re-bos (r 're-bos))	(%re-eos (r 're-eos)) | ||||||
| 	(%re-bol (r 're-bol))	(%re-eol (r 're-eol)) | 	(%re-bol (r 're-bol))	(%re-eol (r 're-eol)) | ||||||
| 	(%re-bow (r 're-bow))	(%re-eow (r 're-eow)) |  | ||||||
| 	(%list   (r 'list))) | 	(%list   (r 'list))) | ||||||
| 
 | 
 | ||||||
|   (let recur ((re re)) |   (let recur ((re re)) | ||||||
|  | @ -430,8 +413,6 @@ | ||||||
|        ((re-eos? re) %re-eos) |        ((re-eos? re) %re-eos) | ||||||
|        ((re-bol? re) %re-bol) |        ((re-bol? re) %re-bol) | ||||||
|        ((re-eol? re) %re-eol) |        ((re-eol? re) %re-eol) | ||||||
|        ((re-bow? re) %re-bow) |  | ||||||
|        ((re-eow? re) %re-eow) |  | ||||||
| 
 | 
 | ||||||
|        (else re))))) |        (else re))))) | ||||||
| 
 | 
 | ||||||
|  | @ -601,8 +582,6 @@ | ||||||
|        ((re-eos? re) (r 'eos)) |        ((re-eos? re) (r 'eos)) | ||||||
|        ((re-bol? re) (r 'bol)) |        ((re-bol? re) (r 'bol)) | ||||||
|        ((re-eol? re) (r 'eol)) |        ((re-eol? re) (r 'eol)) | ||||||
|        ((re-bow? re) (r 'bow)) |  | ||||||
|        ((re-eow? re) (r 'eow)) |  | ||||||
| 
 | 
 | ||||||
|        (else re))))			; Presumably it's code. |        (else re))))			; Presumably it's code. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -113,9 +113,6 @@ | ||||||
|      ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) |      ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) | ||||||
|      ((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) |      ((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) | ||||||
| 
 | 
 | ||||||
|      ((re-bow? re) (values "[[:<:]]" 1 0 '#())) ; These two are  |  | ||||||
|      ((re-eow? re) (values "[[:>:]]" 1 0 '#())) ; Spencer-specific. |  | ||||||
| 
 |  | ||||||
|      ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) |      ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) | ||||||
| 			 (body    (re-dsm:body re))) | 			 (body    (re-dsm:body re))) | ||||||
| 		     (translate-dsm body pre-dsm | 		     (translate-dsm body pre-dsm | ||||||
|  |  | ||||||
|  | @ -38,16 +38,10 @@ | ||||||
|        ((re-bol? re) (error "BOL regexp not supported in this implementation.")) |        ((re-bol? re) (error "BOL regexp not supported in this implementation.")) | ||||||
|        ((re-eol? re) (error "EOL regexp not supported in this implementation.")) |        ((re-eol? re) (error "EOL regexp not supported in this implementation.")) | ||||||
| 
 | 
 | ||||||
|        ((re-bow? re) (or bow-cre (set! bow-cre (compile)))) |  | ||||||
|        ((re-eow? re) (or eow-cre (set! eow-cre (compile)))) |  | ||||||
| 
 |  | ||||||
|        (else (error "compile-regexp -- not a regexp" re))))) |        (else (error "compile-regexp -- not a regexp" re))))) | ||||||
| 
 | 
 | ||||||
| (define bos-cre #f) | (define bos-cre #f) | ||||||
| (define eos-cre #f) | (define eos-cre #f) | ||||||
| (define bow-cre #f) |  | ||||||
| (define eow-cre #f) |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| (define (regexp-search re str . maybe-start) | (define (regexp-search re str . maybe-start) | ||||||
|  |  | ||||||
|  | @ -38,17 +38,12 @@ | ||||||
| 		   (kw? head 'unquote)		; ,exp | 		   (kw? head 'unquote)		; ,exp | ||||||
| 		   (kw? head 'unquote-splicing)	; ,@exp | 		   (kw? head 'unquote-splicing)	; ,@exp | ||||||
| 
 | 
 | ||||||
| 		   (kw? head 'posix-string)	; (posix-string string) | 		   (kw? head 'posix-string))))	; (posix-string string) | ||||||
| 
 |  | ||||||
| 		   (kw? head 'word+)		; (word+ re ...) |  | ||||||
| 		   (kw? head 'word))))		; (word re ...) |  | ||||||
| 
 | 
 | ||||||
| 	(kw? exp 'any)				; any | 	(kw? exp 'any)				; any | ||||||
| 	(kw? exp 'nonl)				; nonl | 	(kw? exp 'nonl)				; nonl | ||||||
| 	(kw? exp 'word)				; word |  | ||||||
| 	(kw? exp 'bos) (kw? exp 'eos)		; bos / eos | 	(kw? exp 'bos) (kw? exp 'eos)		; bos / eos | ||||||
| 	(kw? exp 'bol) (kw? exp 'eol)		; bol / eol | 	(kw? exp 'bol) (kw? exp 'eol)		; bol / eol | ||||||
| 	(kw? exp 'bow) (kw? exp 'eow)		; bow / eow |  | ||||||
| 
 | 
 | ||||||
| 	(kw? exp 'lower-case)	(kw? exp 'lower); The char class names | 	(kw? exp 'lower-case)	(kw? exp 'lower); The char class names | ||||||
| 	(kw? exp 'upper-case)	(kw? exp 'upper) | 	(kw? exp 'upper-case)	(kw? exp 'upper) | ||||||
|  |  | ||||||
|  | @ -374,10 +374,6 @@ | ||||||
| (define-record re-bol)  (define re-bol (make-re-bol)) | (define-record re-bol)  (define re-bol (make-re-bol)) | ||||||
| (define-record re-eol)  (define re-eol (make-re-eol)) | (define-record re-eol)  (define re-eol (make-re-eol)) | ||||||
| 
 | 
 | ||||||
| (define-record re-bow)  (define re-bow (make-re-bow)) |  | ||||||
| (define-record re-eow)  (define re-eow (make-re-eow)) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| (define re-any (make-re-char-set/posix char-set:full "." '#())) | (define re-any (make-re-char-set/posix char-set:full "." '#())) | ||||||
| 
 | 
 | ||||||
| (define (re-any? re) | (define (re-any? re) | ||||||
|  | @ -398,7 +394,6 @@ | ||||||
|       (re-char-set? x) (re-string? x) |       (re-char-set? x) (re-string? x) | ||||||
|       (re-bos? x)      (re-eos? x) |       (re-bos? x)      (re-eos? x) | ||||||
|       (re-bol? x)      (re-eol? x) |       (re-bol? x)      (re-eol? x) | ||||||
|       (re-bow? x)      (re-eow? x) |  | ||||||
|       (re-submatch? x) (re-dsm? x))) |       (re-submatch? x) (re-dsm? x))) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -413,14 +408,6 @@ | ||||||
|      (else 0))) |      (else 0))) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| (define re-word |  | ||||||
|   (let ((wcs (char-set-union char-set:letter+digit	; Word chars |  | ||||||
| 			     (char-set #\_)))) |  | ||||||
|     (make-re-seq (list re-bow |  | ||||||
| 		       (make-re-repeat 1 #f (make-re-char-set wcs)) |  | ||||||
| 		       re-eow)))) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| ;;; (flush-submatches re) | ;;; (flush-submatches re) | ||||||
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | ||||||
| ;;; Return regular expression RE with all submatch-binding elements | ;;; Return regular expression RE with all submatch-binding elements | ||||||
|  |  | ||||||
|  | @ -157,13 +157,11 @@ | ||||||
| 			    tail) | 			    tail) | ||||||
| 		    (no-simp))) | 		    (no-simp))) | ||||||
| 
 | 
 | ||||||
| 	       ;; Coalesce adjacent bol/eol/bos/eos/bow/eow's. | 	       ;; Coalesce adjacent bol/eol/bos/eos's. | ||||||
| 	       ((re-bol? elt) (coalesce-anchor re-bol?)) | 	       ((re-bol? elt) (coalesce-anchor re-bol?)) | ||||||
| 	       ((re-eol? elt) (coalesce-anchor re-eol?)) | 	       ((re-eol? elt) (coalesce-anchor re-eol?)) | ||||||
| 	       ((re-bos? elt) (coalesce-anchor re-bos?)) | 	       ((re-bos? elt) (coalesce-anchor re-bos?)) | ||||||
| 	       ((re-eos? elt) (coalesce-anchor re-eos?)) | 	       ((re-eos? elt) (coalesce-anchor re-eos?)) | ||||||
| 	       ((re-bow? elt) (coalesce-anchor re-bow?)) |  | ||||||
| 	       ((re-eow? elt) (coalesce-anchor re-eow?)) |  | ||||||
| 	       (else (no-simp))))) | 	       (else (no-simp))))) | ||||||
| 
 | 
 | ||||||
| 	 (else (values pre-dsm elt '())))))) | 	 (else (values pre-dsm elt '())))))) | ||||||
|  | @ -205,12 +203,12 @@ | ||||||
| (define (simp-choice re) | (define (simp-choice re) | ||||||
|   (let ((tsm (re-choice:tsm re))) |   (let ((tsm (re-choice:tsm re))) | ||||||
| 
 | 
 | ||||||
|     (receive (pre-dsm cset bos? eos? bol? eol? bow? eow? tail) |     (receive (pre-dsm cset bos? eos? bol? eol? tail) | ||||||
| 	     (simp-choice1 (map simplify-regexp (re-choice:elts re))) | 	     (simp-choice1 (map simplify-regexp (re-choice:elts re))) | ||||||
| 
 | 
 | ||||||
|       (let ((tail (assemble-boundary-tail char-set:empty cset |       (let ((tail (assemble-boundary-tail char-set:empty cset | ||||||
| 					  bos? eos? bol? eol? bow? eow? | 					  bos? eos? bol? eol? | ||||||
| 					  #f #f #f #f #f #f | 					  #f #f #f #f | ||||||
| 					  tail))) | 					  tail))) | ||||||
| 	(values (if (pair? tail) | 	(values (if (pair? tail) | ||||||
| 		    (if (pair? (cdr tail)) | 		    (if (pair? (cdr tail)) | ||||||
|  | @ -228,17 +226,14 @@ | ||||||
| ;;; some earlier bit of the final result. | ;;; some earlier bit of the final result. | ||||||
| 
 | 
 | ||||||
| (define (assemble-boundary-tail prev-cset cset | (define (assemble-boundary-tail prev-cset cset | ||||||
| 				bos? eos? bol? eol? bow? eow?  | 				bos? eos? bol? eol? | ||||||
| 				prev-bos? prev-eos? | 				prev-bos? prev-eos? | ||||||
| 				prev-bol? prev-eol? | 				prev-bol? prev-eol? | ||||||
| 				prev-bow? prev-eow?  |  | ||||||
| 				tail) | 				tail) | ||||||
|   (let* ((cset (char-set-difference cset prev-cset)) |   (let* ((cset (char-set-difference cset prev-cset)) | ||||||
| 	 (numchars (char-set-size cset)) | 	 (numchars (char-set-size cset)) | ||||||
| 	 (tail (if (and eos? (not prev-eos?)) (cons re-eos tail) tail)) | 	 (tail (if (and eos? (not prev-eos?)) (cons re-eos tail) tail)) | ||||||
| 	 (tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail)) | 	 (tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail)) | ||||||
| 	 (tail (if (and eow? (not prev-eow?)) (cons re-eow tail) tail)) |  | ||||||
| 	 (tail (if (and bow? (not prev-bow?)) (cons re-bow tail) tail)) |  | ||||||
| 	 (tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail)) | 	 (tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail)) | ||||||
| 	 (tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail)) | 	 (tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail)) | ||||||
| 	 (tail (? ((zero? numchars) tail)	; Drop empty char set. | 	 (tail (? ((zero? numchars) tail)	; Drop empty char set. | ||||||
|  | @ -251,16 +246,15 @@ | ||||||
| 
 | 
 | ||||||
| ;;; Simplify the non-empty list of choices ELTS. | ;;; Simplify the non-empty list of choices ELTS. | ||||||
| ;;; Return the result split out into the values | ;;; Return the result split out into the values | ||||||
| ;;;     [pre-dsm, cset, bos?, eos?, bol?, eol?, bow?, eow?, tail] | ;;;     [pre-dsm, cset, bos?, eos?, bol?, eol?, tail] | ||||||
| 
 | 
 | ||||||
| (define (simp-choice1 elts) | (define (simp-choice1 elts) | ||||||
|   (let recur ((elts elts) |   (let recur ((elts elts) | ||||||
| 
 | 
 | ||||||
| 	      (prev-cset char-set:empty)	; Chars we've already seen. | 	      (prev-cset char-set:empty) ; Chars we've already seen. | ||||||
| 
 | 
 | ||||||
| 	      (prev-bos? #f) (prev-eos? #f)	; These flags say if we've | 	      (prev-bos? #f) (prev-eos? #f) ; These flags say if we've | ||||||
| 	      (prev-bol? #f) (prev-eol? #f)	; already seen one of these | 	      (prev-bol? #f) (prev-eol? #f)) ; already seen one of these anchors. | ||||||
| 	      (prev-bow? #f) (prev-eow? #f))	; anchors. |  | ||||||
| 			        | 			        | ||||||
|      |      | ||||||
|     (if (pair? elts) |     (if (pair? elts) | ||||||
|  | @ -271,17 +265,16 @@ | ||||||
| 
 | 
 | ||||||
| 		;; Flatten nested choices. | 		;; Flatten nested choices. | ||||||
| 		(let ((sub-elts (re-seq:elts elt))) | 		(let ((sub-elts (re-seq:elts elt))) | ||||||
| 		  (receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail) | 		  (receive (tail-pre-dsm cset bos? eos? bol? eol? tail) | ||||||
| 		           (recur (append sub-elts elts) | 		           (recur (append sub-elts elts) | ||||||
| 				  prev-cset | 				  prev-cset | ||||||
| 				  prev-bos? prev-eos? | 				  prev-bos? prev-eos? | ||||||
| 				  prev-bol? prev-eol? | 				  prev-bol? prev-eol?) | ||||||
| 				  prev-bow? prev-eow?) |  | ||||||
| 		    (values (+ pre-dsm tail-pre-dsm) | 		    (values (+ pre-dsm tail-pre-dsm) | ||||||
| 			    cset bos? eos? bol? eol? bow? eow? tail))) | 			    cset bos? eos? bol? eol? tail))) | ||||||
| 		   | 		   | ||||||
| 		;; Simplify the tail, then think about the head. | 		;; Simplify the tail, then think about the head. | ||||||
| 		(receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail) | 		(receive (tail-pre-dsm cset bos? eos? bol? eol? tail) | ||||||
| 		         (recur elts | 		         (recur elts | ||||||
| 				(? ((and (re-string? elt) | 				(? ((and (re-string? elt) | ||||||
| 					 (= 1 (string-length (re-string:chars elt)))) | 					 (= 1 (string-length (re-string:chars elt)))) | ||||||
|  | @ -298,9 +291,7 @@ | ||||||
| 				(or prev-bos? (re-bos? elt)) | 				(or prev-bos? (re-bos? elt)) | ||||||
| 				(or prev-eos? (re-eos? elt)) | 				(or prev-eos? (re-eos? elt)) | ||||||
| 				(or prev-bol? (re-bol? elt)) | 				(or prev-bol? (re-bol? elt)) | ||||||
| 				(or prev-eol? (re-eol? elt)) | 				(or prev-eol? (re-eol? elt))) | ||||||
| 				(or prev-bow? (re-bow? elt)) |  | ||||||
| 				(or prev-eow? (re-eow? elt))) |  | ||||||
| 
 | 
 | ||||||
| 		  ;; This guy is called when we couldn't find any other | 		  ;; This guy is called when we couldn't find any other | ||||||
| 		  ;; simplification. If ELT contains live submatches, then we | 		  ;; simplification. If ELT contains live submatches, then we | ||||||
|  | @ -316,12 +307,10 @@ | ||||||
| 			(let ((tail (assemble-boundary-tail prev-cset cset | 			(let ((tail (assemble-boundary-tail prev-cset cset | ||||||
| 							    bos? eos? | 							    bos? eos? | ||||||
| 							    bol? eol? | 							    bol? eol? | ||||||
| 							    bow? eow?  |  | ||||||
| 							    prev-bos? prev-eos? | 							    prev-bos? prev-eos? | ||||||
| 							    prev-bol? prev-eol? | 							    prev-bol? prev-eol? | ||||||
| 							    prev-bow? prev-eow? |  | ||||||
| 							    tail))) | 							    tail))) | ||||||
| 			  (values pre-dsm char-set:empty #f #f #f #f #f #f | 			  (values pre-dsm char-set:empty #f #f #f #f | ||||||
| 				  (if (pair? tail) | 				  (if (pair? tail) | ||||||
| 				      ;; Tack tail-pre-dsm onto | 				      ;; Tack tail-pre-dsm onto | ||||||
| 				      ;; TAIL's first elt. | 				      ;; TAIL's first elt. | ||||||
|  | @ -337,38 +326,34 @@ | ||||||
| 			;; ELT has no live submatches, so we can commute all | 			;; ELT has no live submatches, so we can commute all | ||||||
| 			;; the recursion state forwards past it. | 			;; the recursion state forwards past it. | ||||||
| 			(values (+ pre-dsm tail-pre-dsm) | 			(values (+ pre-dsm tail-pre-dsm) | ||||||
| 				cset bos? eos? bol? eol? bow? eow? | 				cset bos? eos? bol? eol? | ||||||
| 				(cons elt tail)))) | 				(cons elt tail)))) | ||||||
| 
 | 
 | ||||||
| 	    (? ((and (re-char-set? elt) | 	    (? ((and (re-char-set? elt) | ||||||
| 		     (char-set? (re-char-set:cset elt))) ; Might be Scheme code | 		     (char-set? (re-char-set:cset elt))) ; Might be Scheme code | ||||||
| 		(values (+ pre-dsm tail-pre-dsm) | 		(values (+ pre-dsm tail-pre-dsm) | ||||||
| 			(char-set-union cset (re-char-set:cset elt)) | 			(char-set-union cset (re-char-set:cset elt)) | ||||||
| 			bos? eos? bol? eol? bow? eow? tail)) | 			bos? eos? bol? eol? tail)) | ||||||
| 
 | 
 | ||||||
| 	       ;; Treat a singleton string "c" as a singleton set {c}. | 	       ;; Treat a singleton string "c" as a singleton set {c}. | ||||||
| 	       ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) | 	       ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) | ||||||
| 		(values (+ pre-dsm tail-pre-dsm) | 		(values (+ pre-dsm tail-pre-dsm) | ||||||
| 			(char-set-union cset (string->char-set (re-string:chars elt))) | 			(char-set-union cset (string->char-set (re-string:chars elt))) | ||||||
| 			bos? eos? bol? eol? bow? eow? tail)) | 			bos? eos? bol? eol? tail)) | ||||||
| 
 | 
 | ||||||
| 	       ;; Coalesce bol/eol/bos/eos/bow/eow's. | 	       ;; Coalesce bol/eol/bos/eos's. | ||||||
| 	       ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset | 	       ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset | ||||||
| 				      #t   eos? bol? eol? bow? eow? tail)) | 				      #t   eos? bol? eol? tail)) | ||||||
| 	       ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset | 	       ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset | ||||||
| 				      bos? #t   bol? eol? bow? eow? tail)) | 				      bos? #t   bol? eol? tail)) | ||||||
| 	       ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset | 	       ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset | ||||||
| 				      bos? eos? #t   eol? bow? eow? tail)) | 				      bos? eos? #t   eol? tail)) | ||||||
| 	       ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset | 	       ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset | ||||||
| 				      bos? eos? bol? #t   bow? eow? tail)) | 				      bos? eos? bol? #t   tail)) | ||||||
| 	       ((re-bow? elt) (values (+ pre-dsm tail-pre-dsm) cset |  | ||||||
| 				      bos? eos? bol? eol? #t   eow? tail)) |  | ||||||
| 	       ((re-eow? elt) (values (+ pre-dsm tail-pre-dsm) cset |  | ||||||
| 				      bos? eos? bol? eol? bow? #t   tail)) |  | ||||||
| 
 | 
 | ||||||
| 	       (else (no-simp))))))) | 	       (else (no-simp))))))) | ||||||
| 
 | 
 | ||||||
| 	(values 0 char-set:empty #f #f #f #f #f #f '())))) | 	(values 0 char-set:empty #f #f #f #f '())))) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -399,5 +384,4 @@ | ||||||
| 	 ;; a live submatch: | 	 ;; a live submatch: | ||||||
| 	 (else (not (or (re-char-set? re) (re-string? re) | 	 (else (not (or (re-char-set? re) (re-string? re) | ||||||
| 			(re-bos? re) (re-eos? re)  | 			(re-bos? re) (re-eos? re)  | ||||||
| 			(re-bol? re) (re-eol? re)  | 			(re-bol? re) (re-eol? re))))))) | ||||||
| 			(re-bow? re) (re-eow? re))))))) |  | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 sperber
						sperber