Folding SRE system into scsh.

This commit is contained in:
shivers 1999-09-08 15:21:40 +00:00
parent 90d43117bf
commit 7e82845fb8
8 changed files with 50 additions and 51 deletions

View File

@ -74,7 +74,7 @@
re-string:chars set-re-string:chars re-string:chars set-re-string:chars
re-string:posix set-re-string:posix re-string:posix set-re-string:posix
trivial-re trivial-re? re-trivial re-trivial?
re-char-set? make-re-char-set re-char-set re-char-set? make-re-char-set re-char-set
re-char-set:cset set-re-char-set:cset re-char-set:cset set-re-char-set:cset
@ -89,7 +89,7 @@
%make-re-dsm/posix %make-re-dsm/posix
%make-re-submatch/posix %make-re-submatch/posix
empty-re empty-re? re-empty re-empty?
re-bos re-bos? re-eos re-eos? re-bos re-bos? re-eos re-eos?
re-bol re-bol? re-eol re-eol? re-bol re-bol? re-eol re-eol?
re-bow re-bow? re-eow re-eow? re-bow re-bow? re-eow re-eow?

View File

@ -39,13 +39,13 @@
re-string:chars set-re-string:chars re-string:chars set-re-string:chars
re-string:posix set-re-string:posix re-string:posix set-re-string:posix
trivial-re trivial-re? re-trivial re-trivial?
re-char-set? make-re-char-set re-char-set re-char-set? make-re-char-set re-char-set
re-char-set:cset set-re-char-set:cset re-char-set:cset set-re-char-set:cset
re-char-set:posix set-re-char-set:posix re-char-set:posix set-re-char-set:posix
empty-re empty-re? re-empty re-empty?
re-bos re-bos? re-eos re-eos? re-bos re-bos? re-eos re-eos?
re-bol re-bol? re-eol re-eol? re-bol re-bol? re-eol re-eol?
re-bow re-bow? re-eow re-eow? re-bow re-bow? re-eow re-eow?

View File

@ -67,8 +67,8 @@
(re-string:posix (proc (:value) :value)) (re-string:posix (proc (:value) :value))
(set-re-string:posix (proc (:value :value) :unspecific)) (set-re-string:posix (proc (:value :value) :unspecific))
trivial-re re-trivial
(trivial-re? (proc (:value) :boolean)) (re-trivial? (proc (:value) :boolean))
(re-char-set? (proc (:value) :boolean)) (re-char-set? (proc (:value) :boolean))
((make-re-char-set re-char-set) (proc (:value) :value)) ((make-re-char-set re-char-set) (proc (:value) :value))
@ -77,8 +77,8 @@
(re-char-set:posix (proc (:value) :value)) (re-char-set:posix (proc (:value) :value))
(set-re-char-set:posix (proc (:value :value) :unspecific)) (set-re-char-set:posix (proc (:value :value) :unspecific))
empty-re re-empty
(empty-re? (proc (:value) :boolean)) (re-empty? (proc (:value) :boolean))
re-bos re-eos re-bos re-eos
re-bol re-eol re-bol re-eol
re-bow re-eow re-bow re-eow

View File

@ -373,7 +373,7 @@
(else `(,(r op) . ,args)))) (else `(,(r op) . ,args))))
(? ((re-string? re) (if (trivial-re? re) (r 'trivial-re) ; Special hack (? ((re-string? re) (if (re-trivial? re) (r 're-trivial) ; Special hack
(doit 'make-re-string 'make-re-string/posix (doit 'make-re-string 'make-re-string/posix
`(,(re-string:chars re)) `(,(re-string:chars re))
re-string:posix))) re-string:posix)))

View File

@ -220,6 +220,8 @@
;; and allocated PREV-SMCOUNT submatches. ;; and allocated PREV-SMCOUNT submatches.
(let ((elt (car elts)) (tail (cdr elts))) (let ((elt (car elts)) (tail (cdr elts)))
(receive (s1 level1 pcount1 submatches1) (translate-regexp elt) (receive (s1 level1 pcount1 submatches1) (translate-regexp elt)
(let ((submatches1 (mapv (lambda (sm) (and sm (+ sm prev-pcount)))
submatches1)))
(if (pair? tail) (if (pair? tail)
(receive (s level pcount submatches) (receive (s level pcount submatches)
(recur tail (recur tail
@ -227,12 +229,9 @@
(+ prev-smcount (re-tsm elt))) (+ prev-smcount (re-tsm elt)))
(values (string-append s1 "|" s) 3 (values (string-append s1 "|" s) 3
(+ pcount1 pcount) (+ pcount1 pcount)
(vector-append (mapv (lambda (sm) (vector-append submatches1 submatches)))
(and sm (+ sm prev-smcount)))
submatches1)
submatches)))
(values s1 level1 pcount1 submatches1))))) (values s1 level1 pcount1 submatches1))))))
(values "[^\000-\377]" 1 0 (n-falses tsm))))) ; Empty choice. (values "[^\000-\377]" 1 0 (n-falses tsm))))) ; Empty choice.

View File

@ -106,7 +106,7 @@
(tail (recur (cdr res)))) (tail (recur (cdr res))))
(? ((re-seq? re) ; Flatten nested seqs (? ((re-seq? re) ; Flatten nested seqs
(append (recur (re-seq:elts re)) tail)) (append (recur (re-seq:elts re)) tail))
((trivial-re? re) tail) ; Drop trivial elts ((re-trivial? re) tail) ; Drop trivial elts
(else (cons re tail)))) (else (cons re tail))))
'())))) '()))))
@ -114,7 +114,7 @@
(if (pair? (cdr res)) (if (pair? (cdr res))
(make-re-seq res) ; General case (make-re-seq res) ; General case
(car res)) ; Singleton sequence (car res)) ; Singleton sequence
trivial-re))) ; Empty seq -- "" re-trivial))) ; Empty seq -- ""
;;; Choice: (| re ...) ;;; Choice: (| re ...)
@ -156,7 +156,7 @@
(tail (recur (cdr res)))) (tail (recur (cdr res))))
(? ((re-choice? re) ; Flatten nested choices (? ((re-choice? re) ; Flatten nested choices
(append (recur (re-choice:elts re)) tail)) (append (recur (re-choice:elts re)) tail))
((empty-re? re) tail) ; Drop empty re's. ((re-empty? re) tail) ; Drop empty re's.
(else (cons re tail)))) (else (cons re tail))))
'())))) '()))))
;; If all elts are char-class re's, fold them together. ;; If all elts are char-class re's, fold them together.
@ -175,7 +175,7 @@
(if (pair? (cdr res)) (if (pair? (cdr res))
(make-re-choice res) ; General case (make-re-choice res) ; General case
(car res)) ; Singleton sequence (car res)) ; Singleton sequence
empty-re)))) ; Empty choice = ("") re-empty)))) ; Empty choice = ("")
;;; Repetition (*,?,+,=,>=,**) ;;; Repetition (*,?,+,=,>=,**)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -254,15 +254,15 @@
(values body1 pre-dsm)) (values body1 pre-dsm))
((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => "" ((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => ""
(values trivial-re (+ (re-tsm body1) pre-dsm))) (values re-trivial (+ (re-tsm body1) pre-dsm)))
;; re{m,n} => empty-re when m>n: ;; re{m,n} => re-empty when m>n:
((and (integer? from) (integer? to) (> from to)) ((and (integer? from) (integer? to) (> from to))
(values empty-re (+ (re-tsm body1) pre-dsm))) (values re-empty (+ (re-tsm body1) pre-dsm)))
;; Reduce the body = empty-re case. ;; Reduce the body = re-empty case.
((and (empty-re? body1) (integer? from)) ; (+ (in)) => (in) ((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in)
(values (if (> from 0) empty-re trivial-re) ; (* (in)) => "" (values (if (> from 0) re-empty re-trivial) ; (* (in)) => ""
pre-dsm)) pre-dsm))
;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1. ;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1.
@ -317,21 +317,21 @@
;;; Slightly smart submatch constructor ;;; Slightly smart submatch constructor
;;; - DSM's unpacked ;;; - DSM's unpacked
;;; - If BODY is the empty-re, we'll never match, so just produce a DSM. ;;; - If BODY is the re-empty, we'll never match, so just produce a DSM.
(define (re-submatch body . maybe-pre+post-dsm) (define (re-submatch body . maybe-pre+post-dsm)
(let-optionals maybe-pre+post-dsm ((pre-dsm 0) (post-dsm 0)) (let-optionals maybe-pre+post-dsm ((pre-dsm 0) (post-dsm 0))
(let ((tsm (+ 1 pre-dsm (re-tsm body) post-dsm))) (let ((tsm (+ 1 pre-dsm (re-tsm body) post-dsm)))
(receive (body1 pre-dsm1) (open-dsm body) (receive (body1 pre-dsm1) (open-dsm body)
(if (empty-re? body1) (if (re-empty? body1)
(re-dsm empty-re tsm 0) (re-dsm re-empty tsm 0)
(%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)))))) (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm))))))
;;; Other regexps : string, char-set, bos & eos ;;; Other regexps : string, char-set, bos & eos
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Also, empty-re and trivial-re. ;;; Also, re-empty and re-trivial.
(define-record re-string (define-record re-string
chars ; String chars ; String
@ -347,9 +347,9 @@
re)) re))
;;; Matches the empty string anywhere. ;;; Matches the empty string anywhere.
(define trivial-re (make-re-string/posix "" "" '#())) (define re-trivial (make-re-string/posix "" "" '#()))
(define (trivial-re? re) (define (re-trivial? re)
(and (re-string? re) (zero? (string-length (re-string:chars re))))) (and (re-string? re) (zero? (string-length (re-string:chars re)))))
(define-record re-char-set (define-record re-char-set
@ -366,9 +366,9 @@
;;; Never matches ;;; Never matches
;;; NEED TO OPTIMIZE - PRE-SET POSIX FIELD. ;;; NEED TO OPTIMIZE - PRE-SET POSIX FIELD.
(define empty-re (make-re-char-set char-set:empty)) (define re-empty (make-re-char-set char-set:empty))
(define (empty-re? re) (define (re-empty? re)
(and (re-char-set? re) (and (re-char-set? re)
(let ((cs (re-char-set:cset re))) (let ((cs (re-char-set:cset re)))
(and (char-set? cs) ; Might be code... (and (char-set? cs) ; Might be code...

View File

@ -66,8 +66,8 @@
(let ((tsm (re-submatch:tsm re)) (let ((tsm (re-submatch:tsm re))
(pre-dsm (re-submatch:pre-dsm re))) (pre-dsm (re-submatch:pre-dsm re)))
(receive (body1 pre-dsm1) (simp-re (re-submatch:body re)) (receive (body1 pre-dsm1) (simp-re (re-submatch:body re))
(if (empty-re? body1) (if (re-empty? body1)
(values empty-re tsm) (values re-empty tsm)
(values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm) (values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)
0))))) 0)))))
@ -89,7 +89,7 @@
;;; which would be an error). This helps to coalesce DSMs and if we bring ;;; which would be an error). This helps to coalesce DSMs and if we bring
;;; them all the way to the front, we can pop them off and make them a ;;; them all the way to the front, we can pop them off and make them a
;;; pre-dsm for the entire seq record. ;;; pre-dsm for the entire seq record.
;;; - If an elt is the empty-re, reduce the whole re to the empty re. ;;; - If an elt is the re-empty, reduce the whole re to the empty re.
;;; - Reduce singleton and empty seq. ;;; - Reduce singleton and empty seq.
(define (simp-seq re) (define (simp-seq re)
@ -105,7 +105,7 @@
head) ; Singleton seq head) ; Singleton seq
pre-dsm)))) pre-dsm))))
(values trivial-re 0)))) ; Empty seq (values re-trivial 0)))) ; Empty seq
;;; Simplify the non-empty sequence ELTS. ;;; Simplify the non-empty sequence ELTS.
@ -122,7 +122,7 @@
(recur (re-dsm (car sub-elts) pre-dsm 0) (recur (re-dsm (car sub-elts) pre-dsm 0)
(append (cdr sub-elts) elts)))) (append (cdr sub-elts) elts))))
((empty-re? elt) (abort elt tsm)) ; Bomb out on the empty ((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty
; (impossible) re. ; (impossible) re.
((pair? elts) ((pair? elts)
(receive (next-pre-dsm next tail) ; Simplify the tail, (receive (next-pre-dsm next tail) ; Simplify the tail,
@ -145,7 +145,7 @@
(values (+ pre-dsm next-pre-dsm) elt tail) (values (+ pre-dsm next-pre-dsm) elt tail)
(no-simp))) (no-simp)))
(? ((trivial-re? elt) ; Drop trivial re's. (? ((re-trivial? elt) ; Drop trivial re's.
(values (+ pre-dsm next-pre-dsm) next tail)) (values (+ pre-dsm next-pre-dsm) next tail))
;; Coalesce adjacent strings ;; Coalesce adjacent strings
@ -173,7 +173,7 @@
;;; Simplifying choices ;;; Simplifying choices
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; - Collapse nested choices and DSM's. ;;; - Collapse nested choices and DSM's.
;;; - Delete empty-re's. ;;; - Delete re-empty's.
;;; - Merge sets; merge identical anchors (bos, eos, etc.). ;;; - Merge sets; merge identical anchors (bos, eos, etc.).
;;; But you can't merge across an element that contains a live submatch, ;;; But you can't merge across an element that contains a live submatch,
;;; see below. ;;; see below.
@ -216,7 +216,7 @@
(if (pair? (cdr tail)) (if (pair? (cdr tail))
(%make-re-choice tail (- tsm pre-dsm)) (%make-re-choice tail (- tsm pre-dsm))
(car tail)) ; Singleton choice (car tail)) ; Singleton choice
empty-re) ; Empty choice re-empty) ; Empty choice
pre-dsm))))) pre-dsm)))))

View File

@ -24,7 +24,7 @@
(char=? #\| (string-ref s i))) (char=? #\| (string-ref s i)))
(lp (+ i 1) branches) (lp (+ i 1) branches)
(values (re-choice (reverse branches)) i))))) (values (re-choice (reverse branches)) i)))))
(values trivial-re i)))) (values re-trivial i))))
;;; A branch is a sequence of pieces -- stuff that goes in-between |'s. ;;; A branch is a sequence of pieces -- stuff that goes in-between |'s.
@ -92,11 +92,11 @@
(+ i 1)) (+ i 1))
(error "Regexps may not terminate with a backslash" s)))) (error "Regexps may not terminate with a backslash" s))))
((#\) #\| #\* #\+ #\? #\{) (values trivial-re i)) ((#\) #\| #\* #\+ #\? #\{) (values re-trivial i))
(else (values (make-re-string (string c)) (+ i 1))))) (else (values (make-re-string (string c)) (+ i 1)))))
(values trivial-re i)))) (values re-trivial i))))
;;; Parse a [...] or [^...] bracket expression into a regexp. ;;; Parse a [...] or [^...] bracket expression into a regexp.