From 7e82845fb80f9e062e403c8f765826fa35fbf120 Mon Sep 17 00:00:00 2001 From: shivers Date: Wed, 8 Sep 1999 15:21:40 +0000 Subject: [PATCH] Folding SRE system into scsh. --- scsh/rx/packages-old.scm | 4 ++-- scsh/rx/packages-old2.scm | 4 ++-- scsh/rx/packages.scm | 8 ++++---- scsh/rx/parse.scm | 2 +- scsh/rx/posixstr.scm | 25 ++++++++++++------------- scsh/rx/re.scm | 36 ++++++++++++++++++------------------ scsh/rx/simp.scm | 16 ++++++++-------- scsh/rx/spencer.scm | 6 +++--- 8 files changed, 50 insertions(+), 51 deletions(-) diff --git a/scsh/rx/packages-old.scm b/scsh/rx/packages-old.scm index ec10277..46e6d86 100644 --- a/scsh/rx/packages-old.scm +++ b/scsh/rx/packages-old.scm @@ -74,7 +74,7 @@ re-string:chars set-re-string:chars re-string:posix set-re-string:posix - trivial-re trivial-re? + re-trivial re-trivial? re-char-set? make-re-char-set re-char-set re-char-set:cset set-re-char-set:cset @@ -89,7 +89,7 @@ %make-re-dsm/posix %make-re-submatch/posix - empty-re empty-re? + re-empty re-empty? re-bos re-bos? re-eos re-eos? re-bol re-bol? re-eol re-eol? re-bow re-bow? re-eow re-eow? diff --git a/scsh/rx/packages-old2.scm b/scsh/rx/packages-old2.scm index 28133b2..22a2906 100644 --- a/scsh/rx/packages-old2.scm +++ b/scsh/rx/packages-old2.scm @@ -39,13 +39,13 @@ re-string:chars set-re-string:chars re-string:posix set-re-string:posix - trivial-re trivial-re? + re-trivial re-trivial? re-char-set? make-re-char-set re-char-set re-char-set:cset set-re-char-set:cset re-char-set:posix set-re-char-set:posix - empty-re empty-re? + re-empty re-empty? re-bos re-bos? re-eos re-eos? re-bol re-bol? re-eol re-eol? re-bow re-bow? re-eow re-eow? diff --git a/scsh/rx/packages.scm b/scsh/rx/packages.scm index 2d06a7e..06a10c3 100644 --- a/scsh/rx/packages.scm +++ b/scsh/rx/packages.scm @@ -67,8 +67,8 @@ (re-string:posix (proc (:value) :value)) (set-re-string:posix (proc (:value :value) :unspecific)) - trivial-re - (trivial-re? (proc (:value) :boolean)) + re-trivial + (re-trivial? (proc (:value) :boolean)) (re-char-set? (proc (:value) :boolean)) ((make-re-char-set re-char-set) (proc (:value) :value)) @@ -77,8 +77,8 @@ (re-char-set:posix (proc (:value) :value)) (set-re-char-set:posix (proc (:value :value) :unspecific)) - empty-re - (empty-re? (proc (:value) :boolean)) + re-empty + (re-empty? (proc (:value) :boolean)) re-bos re-eos re-bol re-eol re-bow re-eow diff --git a/scsh/rx/parse.scm b/scsh/rx/parse.scm index a4cea78..fed54e0 100644 --- a/scsh/rx/parse.scm +++ b/scsh/rx/parse.scm @@ -373,7 +373,7 @@ (else `(,(r op) . ,args)))) - (? ((re-string? re) (if (trivial-re? re) (r 'trivial-re) ; Special hack + (? ((re-string? re) (if (re-trivial? re) (r 're-trivial) ; Special hack (doit 'make-re-string 'make-re-string/posix `(,(re-string:chars re)) re-string:posix))) diff --git a/scsh/rx/posixstr.scm b/scsh/rx/posixstr.scm index 45f4dd5..a195ed7 100644 --- a/scsh/rx/posixstr.scm +++ b/scsh/rx/posixstr.scm @@ -220,19 +220,18 @@ ;; and allocated PREV-SMCOUNT submatches. (let ((elt (car elts)) (tail (cdr elts))) (receive (s1 level1 pcount1 submatches1) (translate-regexp elt) - (if (pair? tail) - (receive (s level pcount submatches) - (recur tail - (+ pcount1 prev-pcount) - (+ prev-smcount (re-tsm elt))) - (values (string-append s1 "|" s) 3 - (+ pcount1 pcount) - (vector-append (mapv (lambda (sm) - (and sm (+ sm prev-smcount))) - submatches1) - submatches))) - - (values s1 level1 pcount1 submatches1))))) + (let ((submatches1 (mapv (lambda (sm) (and sm (+ sm prev-pcount))) + submatches1))) + (if (pair? tail) + (receive (s level pcount submatches) + (recur tail + (+ pcount1 prev-pcount) + (+ prev-smcount (re-tsm elt))) + (values (string-append s1 "|" s) 3 + (+ pcount1 pcount) + (vector-append submatches1 submatches))) + + (values s1 level1 pcount1 submatches1)))))) (values "[^\000-\377]" 1 0 (n-falses tsm))))) ; Empty choice. diff --git a/scsh/rx/re.scm b/scsh/rx/re.scm index d62efb6..bf03461 100644 --- a/scsh/rx/re.scm +++ b/scsh/rx/re.scm @@ -106,7 +106,7 @@ (tail (recur (cdr res)))) (? ((re-seq? re) ; Flatten nested seqs (append (recur (re-seq:elts re)) tail)) - ((trivial-re? re) tail) ; Drop trivial elts + ((re-trivial? re) tail) ; Drop trivial elts (else (cons re tail)))) '())))) @@ -114,7 +114,7 @@ (if (pair? (cdr res)) (make-re-seq res) ; General case (car res)) ; Singleton sequence - trivial-re))) ; Empty seq -- "" + re-trivial))) ; Empty seq -- "" ;;; Choice: (| re ...) @@ -156,7 +156,7 @@ (tail (recur (cdr res)))) (? ((re-choice? re) ; Flatten nested choices (append (recur (re-choice:elts re)) tail)) - ((empty-re? re) tail) ; Drop empty re's. + ((re-empty? re) tail) ; Drop empty re's. (else (cons re tail)))) '())))) ;; If all elts are char-class re's, fold them together. @@ -175,7 +175,7 @@ (if (pair? (cdr res)) (make-re-choice res) ; General case (car res)) ; Singleton sequence - empty-re)))) ; Empty choice = ("") + re-empty)))) ; Empty choice = ("") ;;; Repetition (*,?,+,=,>=,**) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -254,15 +254,15 @@ (values body1 pre-dsm)) ((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => "" - (values trivial-re (+ (re-tsm body1) pre-dsm))) + (values re-trivial (+ (re-tsm body1) pre-dsm))) - ;; re{m,n} => empty-re when m>n: + ;; re{m,n} => re-empty when m>n: ((and (integer? from) (integer? to) (> from to)) - (values empty-re (+ (re-tsm body1) pre-dsm))) + (values re-empty (+ (re-tsm body1) pre-dsm))) - ;; Reduce the body = empty-re case. - ((and (empty-re? body1) (integer? from)) ; (+ (in)) => (in) - (values (if (> from 0) empty-re trivial-re) ; (* (in)) => "" + ;; Reduce the body = re-empty case. + ((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in) + (values (if (> from 0) re-empty re-trivial) ; (* (in)) => "" pre-dsm)) ;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1. @@ -317,21 +317,21 @@ ;;; Slightly smart submatch constructor ;;; - DSM's unpacked -;;; - If BODY is the empty-re, we'll never match, so just produce a DSM. +;;; - If BODY is the re-empty, we'll never match, so just produce a DSM. (define (re-submatch body . maybe-pre+post-dsm) (let-optionals maybe-pre+post-dsm ((pre-dsm 0) (post-dsm 0)) (let ((tsm (+ 1 pre-dsm (re-tsm body) post-dsm))) (receive (body1 pre-dsm1) (open-dsm body) - (if (empty-re? body1) - (re-dsm empty-re tsm 0) + (if (re-empty? body1) + (re-dsm re-empty tsm 0) (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)))))) ;;; Other regexps : string, char-set, bos & eos ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Also, empty-re and trivial-re. +;;; Also, re-empty and re-trivial. (define-record re-string chars ; String @@ -347,9 +347,9 @@ re)) ;;; Matches the empty string anywhere. -(define trivial-re (make-re-string/posix "" "" '#())) +(define re-trivial (make-re-string/posix "" "" '#())) -(define (trivial-re? re) +(define (re-trivial? re) (and (re-string? re) (zero? (string-length (re-string:chars re))))) (define-record re-char-set @@ -366,9 +366,9 @@ ;;; Never matches ;;; NEED TO OPTIMIZE - PRE-SET POSIX FIELD. -(define empty-re (make-re-char-set char-set:empty)) +(define re-empty (make-re-char-set char-set:empty)) -(define (empty-re? re) +(define (re-empty? re) (and (re-char-set? re) (let ((cs (re-char-set:cset re))) (and (char-set? cs) ; Might be code... diff --git a/scsh/rx/simp.scm b/scsh/rx/simp.scm index 07dde93..8d15347 100644 --- a/scsh/rx/simp.scm +++ b/scsh/rx/simp.scm @@ -66,8 +66,8 @@ (let ((tsm (re-submatch:tsm re)) (pre-dsm (re-submatch:pre-dsm re))) (receive (body1 pre-dsm1) (simp-re (re-submatch:body re)) - (if (empty-re? body1) - (values empty-re tsm) + (if (re-empty? body1) + (values re-empty tsm) (values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm) 0))))) @@ -89,7 +89,7 @@ ;;; which would be an error). This helps to coalesce DSMs and if we bring ;;; them all the way to the front, we can pop them off and make them a ;;; pre-dsm for the entire seq record. -;;; - If an elt is the empty-re, reduce the whole re to the empty re. +;;; - If an elt is the re-empty, reduce the whole re to the empty re. ;;; - Reduce singleton and empty seq. (define (simp-seq re) @@ -105,7 +105,7 @@ head) ; Singleton seq pre-dsm)))) - (values trivial-re 0)))) ; Empty seq + (values re-trivial 0)))) ; Empty seq ;;; Simplify the non-empty sequence ELTS. @@ -122,7 +122,7 @@ (recur (re-dsm (car sub-elts) pre-dsm 0) (append (cdr sub-elts) elts)))) - ((empty-re? elt) (abort elt tsm)) ; Bomb out on the empty + ((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty ; (impossible) re. ((pair? elts) (receive (next-pre-dsm next tail) ; Simplify the tail, @@ -145,7 +145,7 @@ (values (+ pre-dsm next-pre-dsm) elt tail) (no-simp))) - (? ((trivial-re? elt) ; Drop trivial re's. + (? ((re-trivial? elt) ; Drop trivial re's. (values (+ pre-dsm next-pre-dsm) next tail)) ;; Coalesce adjacent strings @@ -173,7 +173,7 @@ ;;; Simplifying choices ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; - Collapse nested choices and DSM's. -;;; - Delete empty-re's. +;;; - Delete re-empty's. ;;; - Merge sets; merge identical anchors (bos, eos, etc.). ;;; But you can't merge across an element that contains a live submatch, ;;; see below. @@ -216,7 +216,7 @@ (if (pair? (cdr tail)) (%make-re-choice tail (- tsm pre-dsm)) (car tail)) ; Singleton choice - empty-re) ; Empty choice + re-empty) ; Empty choice pre-dsm))))) diff --git a/scsh/rx/spencer.scm b/scsh/rx/spencer.scm index 4d3eeb5..2656d48 100644 --- a/scsh/rx/spencer.scm +++ b/scsh/rx/spencer.scm @@ -24,7 +24,7 @@ (char=? #\| (string-ref s i))) (lp (+ i 1) branches) (values (re-choice (reverse branches)) i))))) - (values trivial-re i)))) + (values re-trivial i)))) ;;; A branch is a sequence of pieces -- stuff that goes in-between |'s. @@ -92,11 +92,11 @@ (+ i 1)) (error "Regexps may not terminate with a backslash" s)))) - ((#\) #\| #\* #\+ #\? #\{) (values trivial-re i)) + ((#\) #\| #\* #\+ #\? #\{) (values re-trivial i)) (else (values (make-re-string (string c)) (+ i 1))))) - (values trivial-re i)))) + (values re-trivial i)))) ;;; Parse a [...] or [^...] bracket expression into a regexp.