scsh-0.5/scsh/rx/simp.scm

;;; Olin Shivers, June 1998
;;; Copyright (c) 1998 by the Scheme Underground.

;;; One export: (simplify-regexp re) -> re

;;; Regexp simplifier
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (| (in c1 ...) (in c2 ...) re ...) => (| (in c1 ... c2 ...) re ...)
;;; (| (not-in c1 ...) (not-in c2 ...)) => (| (not-in [intersect (c1 ...)
;;; 							     (c2 ...)])
;;; A run of BOS's or a run of EOS's in a sequence may be elided.
;;; Nested exponents can be collapsed (*, +, ?) -- multiply the "from's"
;;;   together; multiply the "to's" together.
;;; Exponent range [1,1] simplifies, as does [0,0].
;;; Uniquify branches
;;; Adjacent literals in a sequence can be collapsed
;;; A singleton-char char class can be collapsed into a constant
;;; Nested choices can be collapsed
;;; Nested sequences can be collapsed
;;; An empty sequence (:) can be turned into an empty-string match "".
;;; Singleton choices and sequences can be reduced to their body.
;;;
;;; The simplifier is carefully written so that it won't blow up
;;; when applied to a dynamic regexp -- that is, 
;;; - a chunk of Scheme code that produces a regexp instead of
;;;   an actual regexp value;
;;; - a repeat regexp whose FROM or TO fields are chunks of Scheme code
;;;   rather than integers; 
;;; - a char-set regexp whose CSET field is a chunk of Scheme code rather
;;;   than an actual char-set value.
;;; This is useful because the RX macro can build such a regexp as part
;;; of its expansion process.

(define (simplify-regexp re)
  (receive (simp-re pre-dsm) (simp-re re)
    (re-dsm simp-re pre-dsm (- (re-tsm re) (+ (re-tsm simp-re) pre-dsm)))))

(define (simp-re re)
  (? ((re-string? re) (values re 0))
     ((re-seq? re)    (simp-seq re))
     ((re-choice? re) (simp-choice re))

     ;; Singleton char-sets reduce to the character.
     ;; Bear in mind the cset field might be Scheme code instead 
     ;; of an actual char set if the regexp is dynamic.
     ((re-char-set? re)
      (values (let ((cs (re-char-set:cset re)))
		(if (and (char-set? cs)
			 (= 1 (char-set-size cs)))
		    (make-re-string (string (car (char-set->list cs))))
		    re))
	      0))

     ((re-repeat? re) (simp-repeat re))

     ((re-submatch? re) (simp-submatch re))
     ((re-dsm?      re) (simp-dsm      re))

     (else (values re 0))))


;;; If the body of a submatch is the empty re, reduce it to the empty re.

(define (simp-submatch re)
  (let ((tsm     (re-submatch:tsm     re))
	(pre-dsm (re-submatch:pre-dsm re)))
    (receive (body1 pre-dsm1) (simp-re (re-submatch:body re))
      (if (re-empty? body1)
	  (values re-empty tsm)
	  (values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)
		  0)))))

;;; - Flatten nested DSM's.
;;; - Return pre-dsm field and body field as the two return values.

(define (simp-dsm re)
  (receive (body pre-dsm1) (simp-re (re-dsm:body re))
    (values body (+ (re-dsm:pre-dsm re) pre-dsm1))))


;;; Simplifying sequences
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; - Collapse nested sequences and DSM's.
;;; - Merge adjacent strings, identical adjacent anchors (bos, eos, etc.).
;;; - Bubble DSM's forwards past elts that don't contain live submatches.
;;;   (Going past live submatches would switch the submatch indexes around,
;;;   which would be an error). This helps to coalesce DSMs and if we bring
;;;   them all the way to the front, we can pop them off and make them a 
;;;   pre-dsm for the entire seq record. 
;;; - If an elt is the re-empty, reduce the whole re to the empty re.
;;; - Reduce singleton and empty seq.

(define (simp-seq re)
  (let ((tsm (re-seq:tsm re))
	(elts (map simplify-regexp (re-seq:elts re))))
    (if (pair? elts)

	(call-with-current-continuation
	 (lambda (abort)
	   (receive (pre-dsm head tail) (simp-seq1 elts abort tsm)
	     (values (if (pair? tail)
			 (%make-re-seq (cons head tail) (- tsm pre-dsm))
			 head)					; Singleton seq
		     pre-dsm))))
		     
	(values re-trivial 0))))				; Empty seq


;;; Simplify the non-empty sequence ELTS.
;;; - Return the result split out into three values: 
;;;   [head-elt-pre-dsm, head-elt, tail].
;;; - If any elt is the empty (impossible) re, abort by calling
;;;   (abort elt tsm). TSM is otherwise unused.

(define (simp-seq1 elts abort tsm)
  (let recur ((elt (car elts)) (elts (cdr elts)))
    (receive (elt pre-dsm) (open-dsm elt)
      (? ((re-seq? elt)					; Flatten nested seqs.
	  (let ((sub-elts (re-seq:elts elt)))
	    (recur (re-dsm (car sub-elts) pre-dsm 0)
		   (append (cdr sub-elts) elts))))
		  					
	 ((re-empty? elt) (abort elt tsm))		; Bomb out on the empty
							; (impossible) re.
	 ((pair? elts)
	  (receive (next-pre-dsm next tail)		; Simplify the tail,
	           (recur (car elts) (cdr elts))	; then think about
							; the head:
	    ;; This guy is called when we couldn't find any other
	    ;; simplification. If ELT contains live submatches, then
	    ;; there really is nothing to be done at this step -- just
	    ;; assemble the pieces together and return them. If ELT
	    ;; *doesn't* contain any live submatches, do the same, but
	    ;; bubble its following next-pre-dsm submatches forwards.
	    (define (no-simp)
	      (if (has-live-submatches? elt)
		  (values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail))
		  (values (+ pre-dsm next-pre-dsm) elt (cons next tail))))

	    ;; Coalesces two adjacent bol's, two adjacent eol's, etc.
	    (define (coalesce-anchor anchor?)
	      (if (and (anchor? elt) (anchor? next))
		  (values (+ pre-dsm next-pre-dsm) elt tail)
		  (no-simp)))

	    (? ((re-trivial? elt)	; Drop trivial re's.
		(values (+ pre-dsm next-pre-dsm) next tail))

	       ;; Coalesce adjacent strings
	       ((re-string? elt)
		(if (re-string? next)
		    (values (+ pre-dsm next-pre-dsm)
			    (make-re-string (string-append (re-string:chars elt)
							   (re-string:chars next)))
			    tail)
		    (no-simp)))

	       ;; Coalesce adjacent bol/eol/bos/eos/bow/eow's.
	       ((re-bol? elt) (coalesce-anchor re-bol?))
	       ((re-eol? elt) (coalesce-anchor re-eol?))
	       ((re-bos? elt) (coalesce-anchor re-bos?))
	       ((re-eos? elt) (coalesce-anchor re-eos?))
	       ((re-bow? elt) (coalesce-anchor re-bow?))
	       ((re-eow? elt) (coalesce-anchor re-eow?))
	       (else (no-simp)))))

	 (else (values pre-dsm elt '()))))))


;;; Simplifying choices
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; - Collapse nested choices and DSM's.
;;; - Delete re-empty's.
;;; - Merge sets; merge identical anchors (bos, eos, etc.).
;;;   But you can't merge across an element that contains a live submatch,
;;;   see below.
;;; - A singleton string "c" is included into the char-set merge as a 
;;;   singleton set.
;;; - Bubble DSM's forwards past elts that don't contain live submatches.
;;;   (Going past live submatches would switch the submatch indexes around,
;;;   which would be an error). This helps to coalesce DSMs and if we bring
;;;   them all the way to the front, we can pop them off and make them a 
;;;   pre-dsm for the entire seq record. 
;;; - Reduce singleton and empty choice.
;;;
;;; You have to be careful simplifying choices -- you can't merge two sets
;;; that appear on different sides of an element containing a live submatch.
;;; The problem is that the assignment of submatches breaks ties left-to-right.
;;; So these aren't the same:
;;;     (| (submatch "x") any)    (| any (submatch "x"))
;;; The first assigns the submatch, the second doesn't -- the ANY gets credit.
;;; We want to collapse multiple char-sets, bos's, and eos's, but we have
;;; to deal with this issue. So
;;; - When we coalesce anchors, we retain the *leftmost* one.
;;; - We coalesce sets that appear between live-submatch boundaries.
;;;   When we do this, we subtract from the set any char that was in
;;;   an earlier coalesced char-set. If this gets us down to the empty set,
;;;   we drop it. If it gets us down to a singleton set, we convert it into
;;;   a singleton string.
;;; Whew. I had to think about this one.

(define (simp-choice re)
  (let ((tsm (re-choice:tsm re)))

    (receive (pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
	     (simp-choice1 (map simplify-regexp (re-choice:elts re)))

      (let ((tail (assemble-boundary-tail char-set:empty cset
					  bos? eos? bol? eol? bow? eow?
					  #f #f #f #f #f #f
					  tail)))
	(values (if (pair? tail)
		    (if (pair? (cdr tail))
			(%make-re-choice tail (- tsm pre-dsm))
			(car tail))		; Singleton choice
		    re-empty)			; Empty choice
		pre-dsm)))))		


;;; Given the return values from simp-choice1, this tacks all
;;; the various pieces (CSET, BOS?, EOS?, etc.) onto the front of
;;; TAIL. However, elements are not added onto TAIL that are already
;;; described by PREV-CSET, PREV-BOS?, etc. -- they will be added onto
;;; some earlier bit of the final result.

(define (assemble-boundary-tail prev-cset cset
				bos? eos? bol? eol? bow? eow? 
				prev-bos? prev-eos?
				prev-bol? prev-eol?
				prev-bow? prev-eow? 
				tail)
  (let* ((cset (char-set-difference cset prev-cset))
	 (numchars (char-set-size cset))
	 (tail (if (and eos? (not prev-eos?)) (cons re-eos tail) tail))
	 (tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail))
	 (tail (if (and eow? (not prev-eow?)) (cons re-eow tail) tail))
	 (tail (if (and bow? (not prev-bow?)) (cons re-bow tail) tail))
	 (tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail))
	 (tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail))
	 (tail (? ((zero? numchars) tail)	; Drop empty char set.
		  ((= 1 numchars)		; {c} => "c"
		   (cons (make-re-string (string (car (char-set->list cset))))
			 tail))
		  (else (cons (make-re-char-set cset) tail)))))
    tail))


;;; Simplify the non-empty list of choices ELTS.
;;; Return the result split out into the values
;;;     [pre-dsm, cset, bos?, eos?, bol?, eol?, bow?, eow?, tail]

(define (simp-choice1 elts)
  (let recur ((elts elts)

	      (prev-cset char-set:empty)	; Chars we've already seen.

	      (prev-bos? #f) (prev-eos? #f)	; These flags say if we've
	      (prev-bol? #f) (prev-eol? #f)	; already seen one of these
	      (prev-bow? #f) (prev-eow? #f))	; anchors.
			       
    
    (if (pair? elts)
	(let ((elt  (car elts))
	      (elts (cdr elts)))
	  (receive (elt pre-dsm) (open-dsm elt)
	    (if (re-choice? elt)

		;; Flatten nested choices.
		(let ((sub-elts (re-seq:elts elt)))
		  (receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
		           (recur (append sub-elts elts)
				  prev-cset
				  prev-bos? prev-eos?
				  prev-bol? prev-eol?
				  prev-bow? prev-eow?)
		    (values (+ pre-dsm tail-pre-dsm)
			    cset bos? eos? bol? eol? bow? eow? tail)))
		  
		;; Simplify the tail, then think about the head.
		(receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail)
		         (recur elts
				(? ((and (re-string? elt)
					 (= 1 (string-length (re-string:chars elt))))
				    (char-set-union prev-cset
						    (string->char-set (re-string:chars elt))))

				   ;; The cset might be a Scheme exp.
				   ((and (re-char-set? elt)
					 (char-set? (re-char-set:cset elt)))
				    (char-set-union prev-cset
						    (re-char-set:cset elt)))

				   (else prev-cset))
				(or prev-bos? (re-bos? elt))
				(or prev-eos? (re-eos? elt))
				(or prev-bol? (re-bol? elt))
				(or prev-eol? (re-eol? elt))
				(or prev-bow? (re-bow? elt))
				(or prev-eow? (re-eow? elt)))

		  ;; This guy is called when we couldn't find any other
		  ;; simplification. If ELT contains live submatches, then we
		  ;; are at a merge boundary, and have to take all the
		  ;; TAIL-PRE-DSM, CSET, BOS?, EOS?, ... stuff we've collected
		  ;; and tack them onto TAIL as elements, then put ELT on
		  ;; front.  Otherwise, we can commute TAIL-PRE-DSM, CSET,
		  ;; BOS?, etc. with ELT, since it contains no live
		  ;; submatches, so just tack ELT onto TAIL.

		  (define (no-simp)
		    (if (has-live-submatches? elt)
			(let ((tail (assemble-boundary-tail prev-cset cset
							    bos? eos?
							    bol? eol?
							    bow? eow? 
							    prev-bos? prev-eos?
							    prev-bol? prev-eol?
							    prev-bow? prev-eow?
							    tail)))
			  (values pre-dsm char-set:empty #f #f #f #f #f #f
				  (if (pair? tail)
				      ;; Tack tail-pre-dsm onto
				      ;; TAIL's first elt.
				      (cons elt
					    (cons (re-dsm (car tail)
							  tail-pre-dsm 0)
						  (cdr tail)))

				      ;; Squirrel case: TAIL is empty, so use 
				      ;; TAIL-PRE-DSM as ELT's post-dsm.
				      (list (re-dsm elt 0 tail-pre-dsm)))))

			;; ELT has no live submatches, so we can commute all
			;; the recursion state forwards past it.
			(values (+ pre-dsm tail-pre-dsm)
				cset bos? eos? bol? eol? bow? eow?
				(cons elt tail))))

	    (? ((and (re-char-set? elt)
		     (char-set? (re-char-set:cset elt))) ; Might be Scheme code
		(values (+ pre-dsm tail-pre-dsm)
			(char-set-union cset (re-char-set:cset elt))
			bos? eos? bol? eol? bow? eow? tail))

	       ;; Treat a singleton string "c" as a singleton set {c}.
	       ((and (re-string? elt) (= 1 (string-length (re-string:chars elt))))
		(values (+ pre-dsm tail-pre-dsm)
			(char-set-union cset (string->char-set (re-string:chars elt)))
			bos? eos? bol? eol? bow? eow? tail))

	       ;; Coalesce bol/eol/bos/eos/bow/eow's.
	       ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset
				      #t   eos? bol? eol? bow? eow? tail))
	       ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset
				      bos? #t   bol? eol? bow? eow? tail))
	       ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset
				      bos? eos? #t   eol? bow? eow? tail))
	       ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset
				      bos? eos? bol? #t   bow? eow? tail))
	       ((re-bow? elt) (values (+ pre-dsm tail-pre-dsm) cset
				      bos? eos? bol? eol? #t   eow? tail))
	       ((re-eow? elt) (values (+ pre-dsm tail-pre-dsm) cset
				      bos? eos? bol? eol? bow? #t   tail))

	       (else (no-simp)))))))

	(values 0 char-set:empty #f #f #f #f #f #f '()))))


(define (simp-repeat re)
  (let ((from (re-repeat:from re))
	(to   (re-repeat:to   re))
	(body (re-repeat:body re)))
    (receive (simp-body pre-dsm) (simp-re body) ; Simplify body.
      ;; The fancy reductions are all handled by REDUCE-REPEAT.
      (reduce-repeat from to simp-body pre-dsm))))


;;; Does RE contain a live submatch?
;;; If RE is dynamic, we can't tell, so we err conservatively, 
;;; which means we say "yes."

(define (has-live-submatches? re)
  (or (re-submatch? re)
      (? ((re-seq?    re) (every has-live-submatches? (re-seq:elts    re)))
	 ((re-choice? re) (every has-live-submatches? (re-choice:elts re)))
	 ((re-repeat? re) (has-live-submatches? (re-repeat:body re)))
	 ((re-dsm?    re) (has-live-submatches? (re-dsm:body    re)))

	 ;; If it's not one of these things, then this isn't a regexp -- it's
	 ;; a chunk of Scheme code producing a regexp, and we conservatively
	 ;; return #T -- the expression *might* produce a regexp containing
	 ;; a live submatch:
	 (else (not (or (re-char-set? re) (re-string? re)
			(re-bos? re) (re-eos? re) 
			(re-bol? re) (re-eol? re) 
			(re-bow? re) (re-eow? re)))))))
Integrated srfi-13 (string lib)& srfi-14 (char-set lib) - Ported sources to use these libs instead of older string & cset libs - Wrote backwards-compatibility libs for now-deprecated procs in the scsh API - Hacked the makefile/package/interfaces glue Whew. 2001-03-10 22:47:00 -05:00			`;;; Olin Shivers, June 1998`
			`;;; Copyright (c) 1998 by the Scheme Underground.`

			`;;; One export: (simplify-regexp re) -> re`

			`;;; Regexp simplifier`
			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
			`;;; (\| (in c1 ...) (in c2 ...) re ...) => (\| (in c1 ... c2 ...) re ...)`
			`;;; (\| (not-in c1 ...) (not-in c2 ...)) => (\| (not-in [intersect (c1 ...)`
			`;;; (c2 ...)])`
			`;;; A run of BOS's or a run of EOS's in a sequence may be elided.`
			`;;; Nested exponents can be collapsed (*, +, ?) -- multiply the "from's"`
			`;;; together; multiply the "to's" together.`
			`;;; Exponent range [1,1] simplifies, as does [0,0].`
			`;;; Uniquify branches`
			`;;; Adjacent literals in a sequence can be collapsed`
			`;;; A singleton-char char class can be collapsed into a constant`
			`;;; Nested choices can be collapsed`
			`;;; Nested sequences can be collapsed`
			`;;; An empty sequence (:) can be turned into an empty-string match "".`
			`;;; Singleton choices and sequences can be reduced to their body.`
			`;;;`
			`;;; The simplifier is carefully written so that it won't blow up`
			`;;; when applied to a dynamic regexp -- that is,`
			`;;; - a chunk of Scheme code that produces a regexp instead of`
			`;;; an actual regexp value;`
			`;;; - a repeat regexp whose FROM or TO fields are chunks of Scheme code`
			`;;; rather than integers;`
			`;;; - a char-set regexp whose CSET field is a chunk of Scheme code rather`
			`;;; than an actual char-set value.`
			`;;; This is useful because the RX macro can build such a regexp as part`
			`;;; of its expansion process.`

			`(define (simplify-regexp re)`
			`(receive (simp-re pre-dsm) (simp-re re)`
			`(re-dsm simp-re pre-dsm (- (re-tsm re) (+ (re-tsm simp-re) pre-dsm)))))`

			`(define (simp-re re)`
			`(? ((re-string? re) (values re 0))`
			`((re-seq? re) (simp-seq re))`
			`((re-choice? re) (simp-choice re))`

			`;; Singleton char-sets reduce to the character.`
			`;; Bear in mind the cset field might be Scheme code instead`
			`;; of an actual char set if the regexp is dynamic.`
			`((re-char-set? re)`
			`(values (let ((cs (re-char-set:cset re)))`
			`(if (and (char-set? cs)`
			`(= 1 (char-set-size cs)))`
			`(make-re-string (string (car (char-set->list cs))))`
			`re))`
			`0))`

			`((re-repeat? re) (simp-repeat re))`

			`((re-submatch? re) (simp-submatch re))`
			`((re-dsm? re) (simp-dsm re))`

			`(else (values re 0))))`



			`;;; If the body of a submatch is the empty re, reduce it to the empty re.`

			`(define (simp-submatch re)`
			`(let ((tsm (re-submatch:tsm re))`
			`(pre-dsm (re-submatch:pre-dsm re)))`
			`(receive (body1 pre-dsm1) (simp-re (re-submatch:body re))`
			`(if (re-empty? body1)`
			`(values re-empty tsm)`
			`(values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)`
			`0)))))`

			`;;; - Flatten nested DSM's.`
			`;;; - Return pre-dsm field and body field as the two return values.`

			`(define (simp-dsm re)`
			`(receive (body pre-dsm1) (simp-re (re-dsm:body re))`
			`(values body (+ (re-dsm:pre-dsm re) pre-dsm1))))`



			`;;; Simplifying sequences`
			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
			`;;; - Collapse nested sequences and DSM's.`
			`;;; - Merge adjacent strings, identical adjacent anchors (bos, eos, etc.).`
			`;;; - Bubble DSM's forwards past elts that don't contain live submatches.`
			`;;; (Going past live submatches would switch the submatch indexes around,`
			`;;; which would be an error). This helps to coalesce DSMs and if we bring`
			`;;; them all the way to the front, we can pop them off and make them a`
			`;;; pre-dsm for the entire seq record.`
			`;;; - If an elt is the re-empty, reduce the whole re to the empty re.`
			`;;; - Reduce singleton and empty seq.`

			`(define (simp-seq re)`
			`(let ((tsm (re-seq:tsm re))`
			`(elts (map simplify-regexp (re-seq:elts re))))`
			`(if (pair? elts)`

			`(call-with-current-continuation`
			`(lambda (abort)`
			`(receive (pre-dsm head tail) (simp-seq1 elts abort tsm)`
			`(values (if (pair? tail)`
			`(%make-re-seq (cons head tail) (- tsm pre-dsm))`
			`head) ; Singleton seq`
			`pre-dsm))))`

			`(values re-trivial 0)))) ; Empty seq`


			`;;; Simplify the non-empty sequence ELTS.`
			`;;; - Return the result split out into three values:`
			`;;; [head-elt-pre-dsm, head-elt, tail].`
			`;;; - If any elt is the empty (impossible) re, abort by calling`
			`;;; (abort elt tsm). TSM is otherwise unused.`

			`(define (simp-seq1 elts abort tsm)`
			`(let recur ((elt (car elts)) (elts (cdr elts)))`
			`(receive (elt pre-dsm) (open-dsm elt)`
			`(? ((re-seq? elt) ; Flatten nested seqs.`
			`(let ((sub-elts (re-seq:elts elt)))`
			`(recur (re-dsm (car sub-elts) pre-dsm 0)`
			`(append (cdr sub-elts) elts))))`

			`((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty`
			`; (impossible) re.`
			`((pair? elts)`
			`(receive (next-pre-dsm next tail) ; Simplify the tail,`
			`(recur (car elts) (cdr elts)) ; then think about`
			`; the head:`
			`;; This guy is called when we couldn't find any other`
			`;; simplification. If ELT contains live submatches, then`
			`;; there really is nothing to be done at this step -- just`
			`;; assemble the pieces together and return them. If ELT`
			`;; doesn't contain any live submatches, do the same, but`
			`;; bubble its following next-pre-dsm submatches forwards.`
			`(define (no-simp)`
			`(if (has-live-submatches? elt)`
			`(values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail))`
			`(values (+ pre-dsm next-pre-dsm) elt (cons next tail))))`

			`;; Coalesces two adjacent bol's, two adjacent eol's, etc.`
			`(define (coalesce-anchor anchor?)`
			`(if (and (anchor? elt) (anchor? next))`
			`(values (+ pre-dsm next-pre-dsm) elt tail)`
			`(no-simp)))`

			`(? ((re-trivial? elt) ; Drop trivial re's.`
			`(values (+ pre-dsm next-pre-dsm) next tail))`

			`;; Coalesce adjacent strings`
			`((re-string? elt)`
			`(if (re-string? next)`
			`(values (+ pre-dsm next-pre-dsm)`
			`(make-re-string (string-append (re-string:chars elt)`
			`(re-string:chars next)))`
			`tail)`
			`(no-simp)))`

			`;; Coalesce adjacent bol/eol/bos/eos/bow/eow's.`
			`((re-bol? elt) (coalesce-anchor re-bol?))`
			`((re-eol? elt) (coalesce-anchor re-eol?))`
			`((re-bos? elt) (coalesce-anchor re-bos?))`
			`((re-eos? elt) (coalesce-anchor re-eos?))`
			`((re-bow? elt) (coalesce-anchor re-bow?))`
			`((re-eow? elt) (coalesce-anchor re-eow?))`
			`(else (no-simp)))))`

			`(else (values pre-dsm elt '()))))))`



			`;;; Simplifying choices`
			`;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;`
			`;;; - Collapse nested choices and DSM's.`
			`;;; - Delete re-empty's.`
			`;;; - Merge sets; merge identical anchors (bos, eos, etc.).`
			`;;; But you can't merge across an element that contains a live submatch,`
			`;;; see below.`
			`;;; - A singleton string "c" is included into the char-set merge as a`
			`;;; singleton set.`
			`;;; - Bubble DSM's forwards past elts that don't contain live submatches.`
			`;;; (Going past live submatches would switch the submatch indexes around,`
			`;;; which would be an error). This helps to coalesce DSMs and if we bring`
			`;;; them all the way to the front, we can pop them off and make them a`
			`;;; pre-dsm for the entire seq record.`
			`;;; - Reduce singleton and empty choice.`
			`;;;`
			`;;; You have to be careful simplifying choices -- you can't merge two sets`
			`;;; that appear on different sides of an element containing a live submatch.`
			`;;; The problem is that the assignment of submatches breaks ties left-to-right.`
			`;;; So these aren't the same:`
			`;;; (\| (submatch "x") any) (\| any (submatch "x"))`
			`;;; The first assigns the submatch, the second doesn't -- the ANY gets credit.`
			`;;; We want to collapse multiple char-sets, bos's, and eos's, but we have`
			`;;; to deal with this issue. So`
			`;;; - When we coalesce anchors, we retain the leftmost one.`
			`;;; - We coalesce sets that appear between live-submatch boundaries.`
			`;;; When we do this, we subtract from the set any char that was in`
			`;;; an earlier coalesced char-set. If this gets us down to the empty set,`
			`;;; we drop it. If it gets us down to a singleton set, we convert it into`
			`;;; a singleton string.`
			`;;; Whew. I had to think about this one.`

			`(define (simp-choice re)`
			`(let ((tsm (re-choice:tsm re)))`

			`(receive (pre-dsm cset bos? eos? bol? eol? bow? eow? tail)`
			`(simp-choice1 (map simplify-regexp (re-choice:elts re)))`

			`(let ((tail (assemble-boundary-tail char-set:empty cset`
			`bos? eos? bol? eol? bow? eow?`
			`#f #f #f #f #f #f`
			`tail)))`
			`(values (if (pair? tail)`
			`(if (pair? (cdr tail))`
			`(%make-re-choice tail (- tsm pre-dsm))`
			`(car tail)) ; Singleton choice`
			`re-empty) ; Empty choice`
			`pre-dsm)))))`



			`;;; Given the return values from simp-choice1, this tacks all`
			`;;; the various pieces (CSET, BOS?, EOS?, etc.) onto the front of`
			`;;; TAIL. However, elements are not added onto TAIL that are already`
			`;;; described by PREV-CSET, PREV-BOS?, etc. -- they will be added onto`
			`;;; some earlier bit of the final result.`

			`(define (assemble-boundary-tail prev-cset cset`
			`bos? eos? bol? eol? bow? eow?`
			`prev-bos? prev-eos?`
			`prev-bol? prev-eol?`
			`prev-bow? prev-eow?`
			`tail)`
			`(let* ((cset (char-set-difference cset prev-cset))`
			`(numchars (char-set-size cset))`
			`(tail (if (and eos? (not prev-eos?)) (cons re-eos tail) tail))`
			`(tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail))`
			`(tail (if (and eow? (not prev-eow?)) (cons re-eow tail) tail))`
			`(tail (if (and bow? (not prev-bow?)) (cons re-bow tail) tail))`
			`(tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail))`
			`(tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail))`
			`(tail (? ((zero? numchars) tail) ; Drop empty char set.`
			`((= 1 numchars) ; {c} => "c"`
			`(cons (make-re-string (string (car (char-set->list cset))))`
			`tail))`
			`(else (cons (make-re-char-set cset) tail)))))`
			`tail))`


			`;;; Simplify the non-empty list of choices ELTS.`
			`;;; Return the result split out into the values`
			`;;; [pre-dsm, cset, bos?, eos?, bol?, eol?, bow?, eow?, tail]`

			`(define (simp-choice1 elts)`
			`(let recur ((elts elts)`

			`(prev-cset char-set:empty) ; Chars we've already seen.`

			`(prev-bos? #f) (prev-eos? #f) ; These flags say if we've`
			`(prev-bol? #f) (prev-eol? #f) ; already seen one of these`
			`(prev-bow? #f) (prev-eow? #f)) ; anchors.`


			`(if (pair? elts)`
			`(let ((elt (car elts))`
			`(elts (cdr elts)))`
			`(receive (elt pre-dsm) (open-dsm elt)`
			`(if (re-choice? elt)`

			`;; Flatten nested choices.`
			`(let ((sub-elts (re-seq:elts elt)))`
			`(receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail)`
			`(recur (append sub-elts elts)`
			`prev-cset`
			`prev-bos? prev-eos?`
			`prev-bol? prev-eol?`
			`prev-bow? prev-eow?)`
			`(values (+ pre-dsm tail-pre-dsm)`
			`cset bos? eos? bol? eol? bow? eow? tail)))`

			`;; Simplify the tail, then think about the head.`
			`(receive (tail-pre-dsm cset bos? eos? bol? eol? bow? eow? tail)`
			`(recur elts`
			`(? ((and (re-string? elt)`
			`(= 1 (string-length (re-string:chars elt))))`
			`(char-set-union prev-cset`
			`(string->char-set (re-string:chars elt))))`

			`;; The cset might be a Scheme exp.`
			`((and (re-char-set? elt)`
			`(char-set? (re-char-set:cset elt)))`
			`(char-set-union prev-cset`
			`(re-char-set:cset elt)))`

			`(else prev-cset))`
			`(or prev-bos? (re-bos? elt))`
			`(or prev-eos? (re-eos? elt))`
			`(or prev-bol? (re-bol? elt))`
			`(or prev-eol? (re-eol? elt))`
			`(or prev-bow? (re-bow? elt))`
			`(or prev-eow? (re-eow? elt)))`

			`;; This guy is called when we couldn't find any other`
			`;; simplification. If ELT contains live submatches, then we`
			`;; are at a merge boundary, and have to take all the`
			`;; TAIL-PRE-DSM, CSET, BOS?, EOS?, ... stuff we've collected`
			`;; and tack them onto TAIL as elements, then put ELT on`
			`;; front. Otherwise, we can commute TAIL-PRE-DSM, CSET,`
			`;; BOS?, etc. with ELT, since it contains no live`
			`;; submatches, so just tack ELT onto TAIL.`

			`(define (no-simp)`
			`(if (has-live-submatches? elt)`
			`(let ((tail (assemble-boundary-tail prev-cset cset`
			`bos? eos?`
			`bol? eol?`
			`bow? eow?`
			`prev-bos? prev-eos?`
			`prev-bol? prev-eol?`
			`prev-bow? prev-eow?`
			`tail)))`
			`(values pre-dsm char-set:empty #f #f #f #f #f #f`
			`(if (pair? tail)`
			`;; Tack tail-pre-dsm onto`
			`;; TAIL's first elt.`
			`(cons elt`
			`(cons (re-dsm (car tail)`
			`tail-pre-dsm 0)`
			`(cdr tail)))`

			`;; Squirrel case: TAIL is empty, so use`
			`;; TAIL-PRE-DSM as ELT's post-dsm.`
			`(list (re-dsm elt 0 tail-pre-dsm)))))`

			`;; ELT has no live submatches, so we can commute all`
			`;; the recursion state forwards past it.`
			`(values (+ pre-dsm tail-pre-dsm)`
			`cset bos? eos? bol? eol? bow? eow?`
			`(cons elt tail))))`

			`(? ((and (re-char-set? elt)`
			`(char-set? (re-char-set:cset elt))) ; Might be Scheme code`
			`(values (+ pre-dsm tail-pre-dsm)`
			`(char-set-union cset (re-char-set:cset elt))`
			`bos? eos? bol? eol? bow? eow? tail))`

			`;; Treat a singleton string "c" as a singleton set {c}.`
			`((and (re-string? elt) (= 1 (string-length (re-string:chars elt))))`
			`(values (+ pre-dsm tail-pre-dsm)`
			`(char-set-union cset (string->char-set (re-string:chars elt)))`
			`bos? eos? bol? eol? bow? eow? tail))`

			`;; Coalesce bol/eol/bos/eos/bow/eow's.`
			`((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset`
			`#t eos? bol? eol? bow? eow? tail))`
			`((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset`
			`bos? #t bol? eol? bow? eow? tail))`
			`((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset`
			`bos? eos? #t eol? bow? eow? tail))`
			`((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset`
			`bos? eos? bol? #t bow? eow? tail))`
			`((re-bow? elt) (values (+ pre-dsm tail-pre-dsm) cset`
			`bos? eos? bol? eol? #t eow? tail))`
			`((re-eow? elt) (values (+ pre-dsm tail-pre-dsm) cset`
			`bos? eos? bol? eol? bow? #t tail))`

			`(else (no-simp)))))))`

			`(values 0 char-set:empty #f #f #f #f #f #f '()))))`



			`(define (simp-repeat re)`
			`(let ((from (re-repeat:from re))`
			`(to (re-repeat:to re))`
			`(body (re-repeat:body re)))`
			`(receive (simp-body pre-dsm) (simp-re body) ; Simplify body.`
			`;; The fancy reductions are all handled by REDUCE-REPEAT.`
			`(reduce-repeat from to simp-body pre-dsm))))`



			`;;; Does RE contain a live submatch?`
			`;;; If RE is dynamic, we can't tell, so we err conservatively,`
			`;;; which means we say "yes."`

			`(define (has-live-submatches? re)`
			`(or (re-submatch? re)`
			`(? ((re-seq? re) (every has-live-submatches? (re-seq:elts re)))`
			`((re-choice? re) (every has-live-submatches? (re-choice:elts re)))`
			`((re-repeat? re) (has-live-submatches? (re-repeat:body re)))`
			`((re-dsm? re) (has-live-submatches? (re-dsm:body re)))`

			`;; If it's not one of these things, then this isn't a regexp -- it's`
			`;; a chunk of Scheme code producing a regexp, and we conservatively`
			`;; return #T -- the expression might produce a regexp containing`
			`;; a live submatch:`
			`(else (not (or (re-char-set? re) (re-string? re)`
			`(re-bos? re) (re-eos? re)`
			`(re-bol? re) (re-eol? re)`
			`(re-bow? re) (re-eow? re)))))))`