Folding SRE system into scsh.
This commit is contained in:
parent
90d43117bf
commit
7e82845fb8
|
@ -74,7 +74,7 @@
|
|||
re-string:chars set-re-string:chars
|
||||
re-string:posix set-re-string:posix
|
||||
|
||||
trivial-re trivial-re?
|
||||
re-trivial re-trivial?
|
||||
|
||||
re-char-set? make-re-char-set re-char-set
|
||||
re-char-set:cset set-re-char-set:cset
|
||||
|
@ -89,7 +89,7 @@
|
|||
%make-re-dsm/posix
|
||||
%make-re-submatch/posix
|
||||
|
||||
empty-re empty-re?
|
||||
re-empty re-empty?
|
||||
re-bos re-bos? re-eos re-eos?
|
||||
re-bol re-bol? re-eol re-eol?
|
||||
re-bow re-bow? re-eow re-eow?
|
||||
|
|
|
@ -39,13 +39,13 @@
|
|||
re-string:chars set-re-string:chars
|
||||
re-string:posix set-re-string:posix
|
||||
|
||||
trivial-re trivial-re?
|
||||
re-trivial re-trivial?
|
||||
|
||||
re-char-set? make-re-char-set re-char-set
|
||||
re-char-set:cset set-re-char-set:cset
|
||||
re-char-set:posix set-re-char-set:posix
|
||||
|
||||
empty-re empty-re?
|
||||
re-empty re-empty?
|
||||
re-bos re-bos? re-eos re-eos?
|
||||
re-bol re-bol? re-eol re-eol?
|
||||
re-bow re-bow? re-eow re-eow?
|
||||
|
|
|
@ -67,8 +67,8 @@
|
|||
(re-string:posix (proc (:value) :value))
|
||||
(set-re-string:posix (proc (:value :value) :unspecific))
|
||||
|
||||
trivial-re
|
||||
(trivial-re? (proc (:value) :boolean))
|
||||
re-trivial
|
||||
(re-trivial? (proc (:value) :boolean))
|
||||
|
||||
(re-char-set? (proc (:value) :boolean))
|
||||
((make-re-char-set re-char-set) (proc (:value) :value))
|
||||
|
@ -77,8 +77,8 @@
|
|||
(re-char-set:posix (proc (:value) :value))
|
||||
(set-re-char-set:posix (proc (:value :value) :unspecific))
|
||||
|
||||
empty-re
|
||||
(empty-re? (proc (:value) :boolean))
|
||||
re-empty
|
||||
(re-empty? (proc (:value) :boolean))
|
||||
re-bos re-eos
|
||||
re-bol re-eol
|
||||
re-bow re-eow
|
||||
|
|
|
@ -373,7 +373,7 @@
|
|||
|
||||
(else `(,(r op) . ,args))))
|
||||
|
||||
(? ((re-string? re) (if (trivial-re? re) (r 'trivial-re) ; Special hack
|
||||
(? ((re-string? re) (if (re-trivial? re) (r 're-trivial) ; Special hack
|
||||
(doit 'make-re-string 'make-re-string/posix
|
||||
`(,(re-string:chars re))
|
||||
re-string:posix)))
|
||||
|
|
|
@ -220,6 +220,8 @@
|
|||
;; and allocated PREV-SMCOUNT submatches.
|
||||
(let ((elt (car elts)) (tail (cdr elts)))
|
||||
(receive (s1 level1 pcount1 submatches1) (translate-regexp elt)
|
||||
(let ((submatches1 (mapv (lambda (sm) (and sm (+ sm prev-pcount)))
|
||||
submatches1)))
|
||||
(if (pair? tail)
|
||||
(receive (s level pcount submatches)
|
||||
(recur tail
|
||||
|
@ -227,12 +229,9 @@
|
|||
(+ prev-smcount (re-tsm elt)))
|
||||
(values (string-append s1 "|" s) 3
|
||||
(+ pcount1 pcount)
|
||||
(vector-append (mapv (lambda (sm)
|
||||
(and sm (+ sm prev-smcount)))
|
||||
submatches1)
|
||||
submatches)))
|
||||
(vector-append submatches1 submatches)))
|
||||
|
||||
(values s1 level1 pcount1 submatches1)))))
|
||||
(values s1 level1 pcount1 submatches1))))))
|
||||
|
||||
(values "[^\000-\377]" 1 0 (n-falses tsm))))) ; Empty choice.
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@
|
|||
(tail (recur (cdr res))))
|
||||
(? ((re-seq? re) ; Flatten nested seqs
|
||||
(append (recur (re-seq:elts re)) tail))
|
||||
((trivial-re? re) tail) ; Drop trivial elts
|
||||
((re-trivial? re) tail) ; Drop trivial elts
|
||||
(else (cons re tail))))
|
||||
'()))))
|
||||
|
||||
|
@ -114,7 +114,7 @@
|
|||
(if (pair? (cdr res))
|
||||
(make-re-seq res) ; General case
|
||||
(car res)) ; Singleton sequence
|
||||
trivial-re))) ; Empty seq -- ""
|
||||
re-trivial))) ; Empty seq -- ""
|
||||
|
||||
|
||||
;;; Choice: (| re ...)
|
||||
|
@ -156,7 +156,7 @@
|
|||
(tail (recur (cdr res))))
|
||||
(? ((re-choice? re) ; Flatten nested choices
|
||||
(append (recur (re-choice:elts re)) tail))
|
||||
((empty-re? re) tail) ; Drop empty re's.
|
||||
((re-empty? re) tail) ; Drop empty re's.
|
||||
(else (cons re tail))))
|
||||
'()))))
|
||||
;; If all elts are char-class re's, fold them together.
|
||||
|
@ -175,7 +175,7 @@
|
|||
(if (pair? (cdr res))
|
||||
(make-re-choice res) ; General case
|
||||
(car res)) ; Singleton sequence
|
||||
empty-re)))) ; Empty choice = ("")
|
||||
re-empty)))) ; Empty choice = ("")
|
||||
|
||||
;;; Repetition (*,?,+,=,>=,**)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
@ -254,15 +254,15 @@
|
|||
(values body1 pre-dsm))
|
||||
|
||||
((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => ""
|
||||
(values trivial-re (+ (re-tsm body1) pre-dsm)))
|
||||
(values re-trivial (+ (re-tsm body1) pre-dsm)))
|
||||
|
||||
;; re{m,n} => empty-re when m>n:
|
||||
;; re{m,n} => re-empty when m>n:
|
||||
((and (integer? from) (integer? to) (> from to))
|
||||
(values empty-re (+ (re-tsm body1) pre-dsm)))
|
||||
(values re-empty (+ (re-tsm body1) pre-dsm)))
|
||||
|
||||
;; Reduce the body = empty-re case.
|
||||
((and (empty-re? body1) (integer? from)) ; (+ (in)) => (in)
|
||||
(values (if (> from 0) empty-re trivial-re) ; (* (in)) => ""
|
||||
;; Reduce the body = re-empty case.
|
||||
((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in)
|
||||
(values (if (> from 0) re-empty re-trivial) ; (* (in)) => ""
|
||||
pre-dsm))
|
||||
|
||||
;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1.
|
||||
|
@ -317,21 +317,21 @@
|
|||
|
||||
;;; Slightly smart submatch constructor
|
||||
;;; - DSM's unpacked
|
||||
;;; - If BODY is the empty-re, we'll never match, so just produce a DSM.
|
||||
;;; - If BODY is the re-empty, we'll never match, so just produce a DSM.
|
||||
|
||||
(define (re-submatch body . maybe-pre+post-dsm)
|
||||
(let-optionals maybe-pre+post-dsm ((pre-dsm 0) (post-dsm 0))
|
||||
(let ((tsm (+ 1 pre-dsm (re-tsm body) post-dsm)))
|
||||
(receive (body1 pre-dsm1) (open-dsm body)
|
||||
(if (empty-re? body1)
|
||||
(re-dsm empty-re tsm 0)
|
||||
(if (re-empty? body1)
|
||||
(re-dsm re-empty tsm 0)
|
||||
(%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm))))))
|
||||
|
||||
|
||||
|
||||
;;; Other regexps : string, char-set, bos & eos
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; Also, empty-re and trivial-re.
|
||||
;;; Also, re-empty and re-trivial.
|
||||
|
||||
(define-record re-string
|
||||
chars ; String
|
||||
|
@ -347,9 +347,9 @@
|
|||
re))
|
||||
|
||||
;;; Matches the empty string anywhere.
|
||||
(define trivial-re (make-re-string/posix "" "" '#()))
|
||||
(define re-trivial (make-re-string/posix "" "" '#()))
|
||||
|
||||
(define (trivial-re? re)
|
||||
(define (re-trivial? re)
|
||||
(and (re-string? re) (zero? (string-length (re-string:chars re)))))
|
||||
|
||||
(define-record re-char-set
|
||||
|
@ -366,9 +366,9 @@
|
|||
|
||||
;;; Never matches
|
||||
;;; NEED TO OPTIMIZE - PRE-SET POSIX FIELD.
|
||||
(define empty-re (make-re-char-set char-set:empty))
|
||||
(define re-empty (make-re-char-set char-set:empty))
|
||||
|
||||
(define (empty-re? re)
|
||||
(define (re-empty? re)
|
||||
(and (re-char-set? re)
|
||||
(let ((cs (re-char-set:cset re)))
|
||||
(and (char-set? cs) ; Might be code...
|
||||
|
|
|
@ -66,8 +66,8 @@
|
|||
(let ((tsm (re-submatch:tsm re))
|
||||
(pre-dsm (re-submatch:pre-dsm re)))
|
||||
(receive (body1 pre-dsm1) (simp-re (re-submatch:body re))
|
||||
(if (empty-re? body1)
|
||||
(values empty-re tsm)
|
||||
(if (re-empty? body1)
|
||||
(values re-empty tsm)
|
||||
(values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)
|
||||
0)))))
|
||||
|
||||
|
@ -89,7 +89,7 @@
|
|||
;;; which would be an error). This helps to coalesce DSMs and if we bring
|
||||
;;; them all the way to the front, we can pop them off and make them a
|
||||
;;; pre-dsm for the entire seq record.
|
||||
;;; - If an elt is the empty-re, reduce the whole re to the empty re.
|
||||
;;; - If an elt is the re-empty, reduce the whole re to the empty re.
|
||||
;;; - Reduce singleton and empty seq.
|
||||
|
||||
(define (simp-seq re)
|
||||
|
@ -105,7 +105,7 @@
|
|||
head) ; Singleton seq
|
||||
pre-dsm))))
|
||||
|
||||
(values trivial-re 0)))) ; Empty seq
|
||||
(values re-trivial 0)))) ; Empty seq
|
||||
|
||||
|
||||
;;; Simplify the non-empty sequence ELTS.
|
||||
|
@ -122,7 +122,7 @@
|
|||
(recur (re-dsm (car sub-elts) pre-dsm 0)
|
||||
(append (cdr sub-elts) elts))))
|
||||
|
||||
((empty-re? elt) (abort elt tsm)) ; Bomb out on the empty
|
||||
((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty
|
||||
; (impossible) re.
|
||||
((pair? elts)
|
||||
(receive (next-pre-dsm next tail) ; Simplify the tail,
|
||||
|
@ -145,7 +145,7 @@
|
|||
(values (+ pre-dsm next-pre-dsm) elt tail)
|
||||
(no-simp)))
|
||||
|
||||
(? ((trivial-re? elt) ; Drop trivial re's.
|
||||
(? ((re-trivial? elt) ; Drop trivial re's.
|
||||
(values (+ pre-dsm next-pre-dsm) next tail))
|
||||
|
||||
;; Coalesce adjacent strings
|
||||
|
@ -173,7 +173,7 @@
|
|||
;;; Simplifying choices
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; - Collapse nested choices and DSM's.
|
||||
;;; - Delete empty-re's.
|
||||
;;; - Delete re-empty's.
|
||||
;;; - Merge sets; merge identical anchors (bos, eos, etc.).
|
||||
;;; But you can't merge across an element that contains a live submatch,
|
||||
;;; see below.
|
||||
|
@ -216,7 +216,7 @@
|
|||
(if (pair? (cdr tail))
|
||||
(%make-re-choice tail (- tsm pre-dsm))
|
||||
(car tail)) ; Singleton choice
|
||||
empty-re) ; Empty choice
|
||||
re-empty) ; Empty choice
|
||||
pre-dsm)))))
|
||||
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
(char=? #\| (string-ref s i)))
|
||||
(lp (+ i 1) branches)
|
||||
(values (re-choice (reverse branches)) i)))))
|
||||
(values trivial-re i))))
|
||||
(values re-trivial i))))
|
||||
|
||||
|
||||
;;; A branch is a sequence of pieces -- stuff that goes in-between |'s.
|
||||
|
@ -92,11 +92,11 @@
|
|||
(+ i 1))
|
||||
(error "Regexps may not terminate with a backslash" s))))
|
||||
|
||||
((#\) #\| #\* #\+ #\? #\{) (values trivial-re i))
|
||||
((#\) #\| #\* #\+ #\? #\{) (values re-trivial i))
|
||||
|
||||
(else (values (make-re-string (string c)) (+ i 1)))))
|
||||
|
||||
(values trivial-re i))))
|
||||
(values re-trivial i))))
|
||||
|
||||
|
||||
;;; Parse a [...] or [^...] bracket expression into a regexp.
|
||||
|
|
Loading…
Reference in New Issue