Folding SRE system into scsh.

This commit is contained in:
shivers 1999-09-08 15:21:40 +00:00
parent 90d43117bf
commit 7e82845fb8
8 changed files with 50 additions and 51 deletions

View File

@ -74,7 +74,7 @@
re-string:chars set-re-string:chars
re-string:posix set-re-string:posix
trivial-re trivial-re?
re-trivial re-trivial?
re-char-set? make-re-char-set re-char-set
re-char-set:cset set-re-char-set:cset
@ -89,7 +89,7 @@
%make-re-dsm/posix
%make-re-submatch/posix
empty-re empty-re?
re-empty re-empty?
re-bos re-bos? re-eos re-eos?
re-bol re-bol? re-eol re-eol?
re-bow re-bow? re-eow re-eow?

View File

@ -39,13 +39,13 @@
re-string:chars set-re-string:chars
re-string:posix set-re-string:posix
trivial-re trivial-re?
re-trivial re-trivial?
re-char-set? make-re-char-set re-char-set
re-char-set:cset set-re-char-set:cset
re-char-set:posix set-re-char-set:posix
empty-re empty-re?
re-empty re-empty?
re-bos re-bos? re-eos re-eos?
re-bol re-bol? re-eol re-eol?
re-bow re-bow? re-eow re-eow?

View File

@ -67,8 +67,8 @@
(re-string:posix (proc (:value) :value))
(set-re-string:posix (proc (:value :value) :unspecific))
trivial-re
(trivial-re? (proc (:value) :boolean))
re-trivial
(re-trivial? (proc (:value) :boolean))
(re-char-set? (proc (:value) :boolean))
((make-re-char-set re-char-set) (proc (:value) :value))
@ -77,8 +77,8 @@
(re-char-set:posix (proc (:value) :value))
(set-re-char-set:posix (proc (:value :value) :unspecific))
empty-re
(empty-re? (proc (:value) :boolean))
re-empty
(re-empty? (proc (:value) :boolean))
re-bos re-eos
re-bol re-eol
re-bow re-eow

View File

@ -373,7 +373,7 @@
(else `(,(r op) . ,args))))
(? ((re-string? re) (if (trivial-re? re) (r 'trivial-re) ; Special hack
(? ((re-string? re) (if (re-trivial? re) (r 're-trivial) ; Special hack
(doit 'make-re-string 'make-re-string/posix
`(,(re-string:chars re))
re-string:posix)))

View File

@ -220,19 +220,18 @@
;; and allocated PREV-SMCOUNT submatches.
(let ((elt (car elts)) (tail (cdr elts)))
(receive (s1 level1 pcount1 submatches1) (translate-regexp elt)
(if (pair? tail)
(receive (s level pcount submatches)
(recur tail
(+ pcount1 prev-pcount)
(+ prev-smcount (re-tsm elt)))
(values (string-append s1 "|" s) 3
(+ pcount1 pcount)
(vector-append (mapv (lambda (sm)
(and sm (+ sm prev-smcount)))
submatches1)
submatches)))
(let ((submatches1 (mapv (lambda (sm) (and sm (+ sm prev-pcount)))
submatches1)))
(if (pair? tail)
(receive (s level pcount submatches)
(recur tail
(+ pcount1 prev-pcount)
(+ prev-smcount (re-tsm elt)))
(values (string-append s1 "|" s) 3
(+ pcount1 pcount)
(vector-append submatches1 submatches)))
(values s1 level1 pcount1 submatches1)))))
(values s1 level1 pcount1 submatches1))))))
(values "[^\000-\377]" 1 0 (n-falses tsm))))) ; Empty choice.

View File

@ -106,7 +106,7 @@
(tail (recur (cdr res))))
(? ((re-seq? re) ; Flatten nested seqs
(append (recur (re-seq:elts re)) tail))
((trivial-re? re) tail) ; Drop trivial elts
((re-trivial? re) tail) ; Drop trivial elts
(else (cons re tail))))
'()))))
@ -114,7 +114,7 @@
(if (pair? (cdr res))
(make-re-seq res) ; General case
(car res)) ; Singleton sequence
trivial-re))) ; Empty seq -- ""
re-trivial))) ; Empty seq -- ""
;;; Choice: (| re ...)
@ -156,7 +156,7 @@
(tail (recur (cdr res))))
(? ((re-choice? re) ; Flatten nested choices
(append (recur (re-choice:elts re)) tail))
((empty-re? re) tail) ; Drop empty re's.
((re-empty? re) tail) ; Drop empty re's.
(else (cons re tail))))
'()))))
;; If all elts are char-class re's, fold them together.
@ -175,7 +175,7 @@
(if (pair? (cdr res))
(make-re-choice res) ; General case
(car res)) ; Singleton sequence
empty-re)))) ; Empty choice = ("")
re-empty)))) ; Empty choice = ("")
;;; Repetition (*,?,+,=,>=,**)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -254,15 +254,15 @@
(values body1 pre-dsm))
((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => ""
(values trivial-re (+ (re-tsm body1) pre-dsm)))
(values re-trivial (+ (re-tsm body1) pre-dsm)))
;; re{m,n} => empty-re when m>n:
;; re{m,n} => re-empty when m>n:
((and (integer? from) (integer? to) (> from to))
(values empty-re (+ (re-tsm body1) pre-dsm)))
(values re-empty (+ (re-tsm body1) pre-dsm)))
;; Reduce the body = empty-re case.
((and (empty-re? body1) (integer? from)) ; (+ (in)) => (in)
(values (if (> from 0) empty-re trivial-re) ; (* (in)) => ""
;; Reduce the body = re-empty case.
((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in)
(values (if (> from 0) re-empty re-trivial) ; (* (in)) => ""
pre-dsm))
;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1.
@ -317,21 +317,21 @@
;;; Slightly smart submatch constructor
;;; - DSM's unpacked
;;; - If BODY is the empty-re, we'll never match, so just produce a DSM.
;;; - If BODY is the re-empty, we'll never match, so just produce a DSM.
(define (re-submatch body . maybe-pre+post-dsm)
(let-optionals maybe-pre+post-dsm ((pre-dsm 0) (post-dsm 0))
(let ((tsm (+ 1 pre-dsm (re-tsm body) post-dsm)))
(receive (body1 pre-dsm1) (open-dsm body)
(if (empty-re? body1)
(re-dsm empty-re tsm 0)
(if (re-empty? body1)
(re-dsm re-empty tsm 0)
(%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm))))))
;;; Other regexps : string, char-set, bos & eos
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Also, empty-re and trivial-re.
;;; Also, re-empty and re-trivial.
(define-record re-string
chars ; String
@ -347,9 +347,9 @@
re))
;;; Matches the empty string anywhere.
(define trivial-re (make-re-string/posix "" "" '#()))
(define re-trivial (make-re-string/posix "" "" '#()))
(define (trivial-re? re)
(define (re-trivial? re)
(and (re-string? re) (zero? (string-length (re-string:chars re)))))
(define-record re-char-set
@ -366,9 +366,9 @@
;;; Never matches
;;; NEED TO OPTIMIZE - PRE-SET POSIX FIELD.
(define empty-re (make-re-char-set char-set:empty))
(define re-empty (make-re-char-set char-set:empty))
(define (empty-re? re)
(define (re-empty? re)
(and (re-char-set? re)
(let ((cs (re-char-set:cset re)))
(and (char-set? cs) ; Might be code...

View File

@ -66,8 +66,8 @@
(let ((tsm (re-submatch:tsm re))
(pre-dsm (re-submatch:pre-dsm re)))
(receive (body1 pre-dsm1) (simp-re (re-submatch:body re))
(if (empty-re? body1)
(values empty-re tsm)
(if (re-empty? body1)
(values re-empty tsm)
(values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)
0)))))
@ -89,7 +89,7 @@
;;; which would be an error). This helps to coalesce DSMs and if we bring
;;; them all the way to the front, we can pop them off and make them a
;;; pre-dsm for the entire seq record.
;;; - If an elt is the empty-re, reduce the whole re to the empty re.
;;; - If an elt is the re-empty, reduce the whole re to the empty re.
;;; - Reduce singleton and empty seq.
(define (simp-seq re)
@ -105,7 +105,7 @@
head) ; Singleton seq
pre-dsm))))
(values trivial-re 0)))) ; Empty seq
(values re-trivial 0)))) ; Empty seq
;;; Simplify the non-empty sequence ELTS.
@ -122,7 +122,7 @@
(recur (re-dsm (car sub-elts) pre-dsm 0)
(append (cdr sub-elts) elts))))
((empty-re? elt) (abort elt tsm)) ; Bomb out on the empty
((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty
; (impossible) re.
((pair? elts)
(receive (next-pre-dsm next tail) ; Simplify the tail,
@ -145,7 +145,7 @@
(values (+ pre-dsm next-pre-dsm) elt tail)
(no-simp)))
(? ((trivial-re? elt) ; Drop trivial re's.
(? ((re-trivial? elt) ; Drop trivial re's.
(values (+ pre-dsm next-pre-dsm) next tail))
;; Coalesce adjacent strings
@ -173,7 +173,7 @@
;;; Simplifying choices
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; - Collapse nested choices and DSM's.
;;; - Delete empty-re's.
;;; - Delete re-empty's.
;;; - Merge sets; merge identical anchors (bos, eos, etc.).
;;; But you can't merge across an element that contains a live submatch,
;;; see below.
@ -216,7 +216,7 @@
(if (pair? (cdr tail))
(%make-re-choice tail (- tsm pre-dsm))
(car tail)) ; Singleton choice
empty-re) ; Empty choice
re-empty) ; Empty choice
pre-dsm)))))

View File

@ -24,7 +24,7 @@
(char=? #\| (string-ref s i)))
(lp (+ i 1) branches)
(values (re-choice (reverse branches)) i)))))
(values trivial-re i))))
(values re-trivial i))))
;;; A branch is a sequence of pieces -- stuff that goes in-between |'s.
@ -92,11 +92,11 @@
(+ i 1))
(error "Regexps may not terminate with a backslash" s))))
((#\) #\| #\* #\+ #\? #\{) (values trivial-re i))
((#\) #\| #\* #\+ #\? #\{) (values re-trivial i))
(else (values (make-re-string (string c)) (+ i 1)))))
(values trivial-re i))))
(values re-trivial i))))
;;; Parse a [...] or [^...] bracket expression into a regexp.