Fix the most obvious insanities that's to do with dynamic parts of the

RX syntax.  Namely, a lot of code (used to) assume(s) that the
total-number-of-submatches (TSM) quantity is static even for dynamic
regexps.

Specifically, RE-TSM now returns an unspecific value instead of 0 for
non-regexps which should break most of the code that used to just
silently do the wrong thing.  It's likely that more examples involving
dynamic sub-regexps will fail.

In the process, I also removed some of Olin's naming craziness (using
"%" signs to convey meaning) which I needed to do to halfway
understand what's going on.
This commit is contained in:
sperber 2002-09-23 12:59:28 +00:00
parent 7062ec4f17
commit 938182d4b1
4 changed files with 203 additions and 160 deletions

View File

@ -56,8 +56,8 @@
(open-dsm (proc (:value) (some-values :value :exact-integer)))
(re-seq? (proc (:value) :boolean))
(%%make-re-seq (proc (:value :exact-integer :value) :value))
(%make-re-seq (proc (:value :exact-integer) :value))
(really-make-re-seq (proc (:value :exact-integer :value) :value))
(make-re-seq/tsm (proc (:value :exact-integer) :value))
((re-seq make-re-seq) (proc (:value) :value))
(re-seq:elts (proc (:value) :value))
(re-seq:tsm (proc (:value) :exact-integer))
@ -65,8 +65,8 @@
(set-re-seq:posix (proc (:value :value) :unspecific))
(re-choice? (proc (:value) :boolean))
(%%make-re-choice (proc (:value :exact-integer :value) :value))
(%make-re-choice (proc (:value :exact-integer) :value))
(really-make-re-choice (proc (:value :exact-integer :value) :value))
(make-re-choice/tsm (proc (:value :exact-integer) :value))
((make-re-choice re-choice) (proc (:value) :value))
(re-choice:elts (proc (:value) :value))
(re-choice:tsm (proc (:value) :exact-integer))
@ -74,11 +74,12 @@
(set-re-choice:posix (proc (:value :value) :unspecific))
(re-repeat? (proc (:value) :boolean))
(%%make-re-repeat (proc (:exact-integer :value :value
:exact-integer :value)
(really-make-re-repeat (proc (:exact-integer
:value :value
:exact-integer :value)
:value))
(%make-re-repeat (proc (:exact-integer :value :value :exact-integer )
:value))
(make-re-repeat/tsm (proc (:exact-integer :value :value :exact-integer )
:value))
((re-repeat make-re-repeat)
(proc (:exact-integer :value :value) :value))
((re-repeat:from re-repeat:tsm)
@ -89,9 +90,9 @@
(set-re-repeat:posix (proc (:value :value) :unspecific))
(re-submatch? (proc (:value) :boolean))
(%%make-re-submatch (proc (:value :exact-integer :exact-integer :value)
:value))
(%make-re-submatch (proc (:value :exact-integer :exact-integer) :value))
(really-make-re-submatch (proc (:value :exact-integer :exact-integer :value)
:value))
(make-re-submatch/tsm (proc (:value :exact-integer :exact-integer) :value))
((make-re-submatch re-submatch)
(proc (:value &opt :exact-integer :exact-integer) :value))
@ -155,14 +156,14 @@
;; These are constructors for the Scheme unparser
(export
(make-re-string/posix (proc (:string :string :vector) :value))
((%make-re-seq/posix %make-re-choice/posix)
((make-re-seq/posix make-re-choice/posix)
(proc (:value :exact-integer :string :vector) :value))
(make-re-char-set/posix (proc (:value :string :vector) :value))
(%make-re-repeat/posix (proc (:exact-integer :value :value :exact-integer :string :vector)
(make-re-repeat/posix (proc (:exact-integer :value :value :exact-integer :string :vector)
:value))
(%make-re-dsm/posix (proc (:value :exact-integer :exact-integer :string :vector)
(make-re-dsm/posix (proc (:value :exact-integer :exact-integer :string :vector)
:value))
(%make-re-submatch/posix (proc (:value :exact-integer :exact-integer :string :vector) :value))))
(make-re-submatch/posix (proc (:value :exact-integer :exact-integer :string :vector) :value))))
(define re-match-internals-interface

View File

@ -358,54 +358,79 @@
(%re-bol (r 're-bol)) (%re-eol (r 're-eol))
(%list (r 'list)))
(let recur ((re re))
;; If (fetch-posix re) = #f, produce (OP . ARGS);
;; Otherwise, produce (OP/POSIX ,@ARGS '<posix-translation>).
(define (doit op op/posix args fetch-posix)
(? ((fetch-posix re) =>
(lambda (psx) `(,(r op/posix) ,@args
',(cre:string psx) ',(cre:tvec psx))))
(let recur ((re re))
;; If (fetch-posix re) = #f, produce (OP . ARGS);
;; Otherwise, produce (OP/POSIX ,@ARGS '<posix-translation>).
(define (doit op op/tsm maybe-tsm op/posix args fetch-posix)
(cond
((fetch-posix re) =>
(lambda (psx)
`(,(r op/posix) ,@args ,maybe-tsm
',(cre:string psx) ',(cre:tvec psx))))
((number? maybe-tsm)
`(,(r op/tsm) ,@args ,maybe-tsm))
(else
`(,(r op) ,@args))))
(define (doit/leaf op op/posix args fetch-posix)
(cond
((fetch-posix re) =>
(lambda (psx)
`(,(r op/posix) ,@args
',(cre:string psx) ',(cre:tvec psx))))
(else `(,(r op) . ,args))))
(? ((re-string? re) (if (re-trivial? re) (r 're-trivial) ; Special hack
(doit 'make-re-string 'make-re-string/posix
`(,(re-string:chars re))
re-string:posix)))
(cond
((re-string? re)
(if (re-trivial? re)
(r 're-trivial) ; Special hack
(doit/leaf 'make-re-string 'make-re-string/posix
`(,(re-string:chars re))
re-string:posix)))
((re-seq? re) (doit '%make-re-seq '%make-re-seq/posix
`((,%list . ,(map recur (re-seq:elts re)))
,(re-seq:tsm re))
re-seq:posix))
((re-seq? re)
(doit 'make-re-seq 'make-re-seq/tsm (re-seq:tsm re)
'make-re-seq/posix
`((,%list ,@(map recur (re-seq:elts re))))
re-seq:posix))
((re-choice? re) (doit '%make-re-choice '%make-re-choice/posix
`((,%list . ,(map recur (re-choice:elts re)))
,(re-choice:tsm re))
re-choice:posix))
((re-choice? re)
(doit 'make-re-choice 'make-re-choice/tsm (re-choice:tsm re)
'make-re-choice/posix
`((,%list ,@(map recur (re-choice:elts re))))
re-choice:posix))
((re-char-set? re) (if (re-any? re) (r 're-any) ; Special hack for ANY.
(doit 'make-re-char-set 'make-re-char-set/posix
`(,(char-set->scheme (re-char-set:cset re) r))
re-char-set:posix)))
((re-char-set? re)
(if (re-any? re)
(r 're-any) ; Special hack for ANY.
(doit/leaf 'make-re-char-set 'make-re-char-set/posix
`(,(char-set->scheme (re-char-set:cset re) r))
re-char-set:posix)))
((re-repeat? re) (doit '%make-re-repeat '%make-re-repeat/posix
`(,(re-repeat:from re)
,(re-repeat:to re)
,(recur (re-repeat:body re))
,(re-repeat:tsm re))
re-repeat:posix))
((re-repeat? re)
(doit 'make-re-repeat 'make-re-repeat/tsm (re-repeat:tsm re)
'make-re-repeat/posix
`(,(re-repeat:from re)
,(re-repeat:to re)
,(recur (re-repeat:body re)))
re-repeat:posix))
((re-dsm? re) (doit '%make-re-dsm '%make-re-dsm/posix
`(,(recur (re-dsm:body re))
,(re-dsm:pre-dsm re)
,(re-dsm:tsm re))
re-dsm:posix))
((re-dsm? re)
(doit 'make-re-dsm 'make-re-dsm/tsm (re-dsm:tsm re)
((re-submatch? re) (doit '%make-re-submatch '%make-re-submatch/posix
`(,(recur (re-submatch:body re))
,(re-submatch:pre-dsm re)
,(re-submatch:tsm re))
re-submatch:posix))
'make-re-dsm/posix
`(,(recur (re-dsm:body re))
,(re-dsm:pre-dsm re))
re-dsm:posix))
((re-submatch? re)
(doit 'make-re-submatch 'make-re-submatch/tsm (re-submatch:tsm re)
'make-re-submatch/posix
`(,(recur (re-submatch:body re))
,(re-submatch:pre-dsm re))
re-submatch:posix))
((re-bos? re) %re-bos)
((re-eos? re) %re-eos)

View File

@ -15,22 +15,22 @@
;;; submatches, then POST-DSM as dead submatches.
(define-record-type re-dsm :re-dsm
(%%make-re-dsm body pre-dsm tsm posix)
(really-make-re-dsm body pre-dsm tsm posix)
re-dsm?
(body re-dsm:body) ; A Regexp
(pre-dsm re-dsm:pre-dsm) ; Integer -- initial dead submatches
(tsm re-dsm:tsm) ; Total submatch count
(posix re-dsm:posix set-re-dsm:posix)) ; Posix bits
(define (%make-re-dsm body pre-dsm tsm) (%%make-re-dsm body pre-dsm tsm #f))
(define (make-re-dsm/tsm body pre-dsm tsm) (really-make-re-dsm body pre-dsm tsm #f))
;;; This is only used in code that the (RX ...) macro produces
;;; for static regexps.
(define (%make-re-dsm/posix body pre-dsm tsm posix-str tvec)
(%%make-re-dsm body pre-dsm tsm (new-cre posix-str tvec)))
(define (make-re-dsm/posix body pre-dsm tsm posix-str tvec)
(really-make-re-dsm body pre-dsm tsm (new-cre posix-str tvec)))
(define (make-re-dsm body pre-dsm post-dsm)
(%make-re-dsm body pre-dsm (+ post-dsm pre-dsm (re-tsm body))))
(make-re-dsm/tsm body pre-dsm (+ post-dsm pre-dsm (re-tsm body))))
;;; "Virtual field" for the RE-DSM record -- how many dead submatches
;;; come after the body:
@ -50,7 +50,7 @@
(let ((pre-dsm (+ pre-dsm pre-dsm1)))
(if (= tsm (re-tsm body1)) body1 ; Trivial DSM
(%make-re-dsm body1 pre-dsm tsm)))))) ; Non-trivial DSM
(make-re-dsm/tsm body1 pre-dsm tsm)))))) ; Non-trivial DSM
;;; Take a regexp RE and return an equivalent (re', pre-dsm) pair of values.
;;; Recurses into DSM records. It is the case that
@ -65,26 +65,31 @@
;;; Sequence: (& re ...)
;;; Sequence: (: re ...)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record-type re-seq :re-seq
(%%make-re-seq elts tsm posix)
(really-make-re-seq elts tsm posix)
re-seq?
(elts re-seq:elts) ; Regexp list
(tsm re-seq:tsm) ; Total submatch count
(posix re-seq:posix set-re-seq:posix)) ; Posix record
(define (%make-re-seq elts tsm) (%%make-re-seq elts tsm #f))
(define (make-re-seq/tsm elts tsm) (really-make-re-seq elts tsm #f))
;;; This is only used in code that (RE ...) macro produces for static regexps.
(define (%make-re-seq/posix elts tsm posix-str tvec)
(%%make-re-seq elts tsm (new-cre posix-str tvec)))
(define (make-re-seq/posix elts tsm posix-str tvec)
(really-make-re-seq elts tsm (new-cre posix-str tvec)))
(define (make-re-seq res)
(%make-re-seq res
(fold (lambda (re sm-count) (+ (re-tsm re) sm-count))
0 res)))
(make-re-seq/tsm res
(fold (lambda (re sm-count)
(let ((maybe-tsm (re-tsm re)))
(if (and (number? maybe-tsm)
(number? sm-count))
(+ maybe-tsm sm-count)
(unspecific))))
0 res)))
;;; Slightly smart sequence constructor:
;;; - Flattens nested sequences
@ -115,22 +120,27 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record-type re-choice :re-choice
(%%make-re-choice elts tsm posix)
(really-make-re-choice elts tsm posix)
re-choice?
(elts re-choice:elts) ; List of rel-items
(tsm re-choice:tsm) ; Total submatch count
(posix re-choice:posix set-re-choice:posix)) ; Posix string
(define (%make-re-choice elts tsm) (%%make-re-choice elts tsm #f))
(define (make-re-choice/tsm elts tsm) (really-make-re-choice elts tsm #f))
;;; This is only used in code that (RE ...) macro produces for static regexps.
(define (%make-re-choice/posix elts tsm posix-str tvec)
(%%make-re-choice elts tsm (new-cre posix-str tvec)))
(define (make-re-choice/posix elts tsm posix-str tvec)
(really-make-re-choice elts tsm (new-cre posix-str tvec)))
(define (make-re-choice res)
(%make-re-choice res
(fold (lambda (re sm-count) (+ (re-tsm re) sm-count))
0 res)))
(make-re-choice/tsm res
(fold (lambda (re sm-count)
(let ((maybe-tsm (re-tsm re)))
(if (and (number? maybe-tsm)
(number? sm-count))
(+ maybe-tsm sm-count)
(unspecific))))
0 res)))
;;; Slightly smart choice constructor:
;;; - Flattens nested choices
@ -181,7 +191,7 @@
;;; (= (re-repeat:tsm re) (re-tsm (re-repeat:body re)))
(define-record-type re-repeat :re-repeat
(%%make-re-repeat from to body tsm posix)
(really-make-re-repeat from to body tsm posix)
re-repeat?
(from re-repeat:from) ; Integer (Macro expander abuses.)
(to re-repeat:to) ; Integer or #f for infinite (Macro expander abuses.)
@ -189,26 +199,26 @@
(tsm re-repeat:tsm) ; Total submatch count
(posix re-repeat:posix set-re-repeat:posix)) ; Posix record
(define (%make-re-repeat from to body tsm)
(%%make-re-repeat from to body tsm #f))
(define (make-re-repeat/tsm from to body tsm)
(really-make-re-repeat from to body tsm #f))
;;; This is only used in code that (RE ...) macro produces for static regexps.
(define (%make-re-repeat/posix from to body tsm posix-str tvec)
(%%make-re-repeat from to body tsm (new-cre posix-str tvec)))
(define (make-re-repeat/posix from to body tsm posix-str tvec)
(really-make-re-repeat from to body tsm (new-cre posix-str tvec)))
(define (make-re-repeat from to body)
(%make-re-repeat (check-arg (lambda (from)
(or (not (integer? from)) ; Dynamic
(>= from 0)))
from
make-re-repeat)
(check-arg (lambda (to)
(or (not (integer? to)) ; #f or dynamic
(and (integer? to) (>= to 0))))
to
make-re-repeat)
body
(re-tsm body)))
(make-re-repeat/tsm (check-arg (lambda (from)
(or (not (integer? from)) ; Dynamic
(>= from 0)))
from
make-re-repeat)
(check-arg (lambda (to)
(or (not (integer? to)) ; #f or dynamic
(and (integer? to) (>= to 0))))
to
make-re-repeat)
body
(re-tsm body)))
;;; Slightly smart repeat constructor
;;; - Flattens nested repeats.
@ -281,19 +291,19 @@
;;; submatches.
(define-record-type re-submatch :re-submatch
(%%make-re-submatch body pre-dsm tsm posix)
(really-make-re-submatch body pre-dsm tsm posix)
re-submatch?
(body re-submatch:body) ; Regexp
(pre-dsm re-submatch:pre-dsm) ; Deleted submatches preceding the body
(tsm re-submatch:tsm) ; Total submatch count for the record
(posix re-submatch:posix set-re-submatch:posix)) ; Posix string
(define (%make-re-submatch body pre-dsm tsm)
(%%make-re-submatch body pre-dsm tsm #f))
(define (make-re-submatch/tsm body pre-dsm tsm)
(really-make-re-submatch body pre-dsm tsm #f))
;;; This is only used in code that (RE ...) macro produces for static regexps.
(define (%make-re-submatch/posix body pre-dsm tsm posix-str tvec)
(%%make-re-submatch body pre-dsm tsm (new-cre posix-str tvec)))
(define (make-re-submatch/posix body pre-dsm tsm posix-str tvec)
(really-make-re-submatch body pre-dsm tsm (new-cre posix-str tvec)))
;;; "Virtual field" for the RE-SUBMATCH record -- how many dead submatches
@ -307,7 +317,7 @@
(define (make-re-submatch body . maybe-pre+post-dsm)
(let-optionals maybe-pre+post-dsm ((pre-dsm 0) (post-dsm 0))
(%make-re-submatch body pre-dsm (+ pre-dsm 1 (re-tsm body) post-dsm))))
(make-re-submatch/tsm body pre-dsm (+ pre-dsm 1 (re-tsm body) post-dsm))))
;;; Slightly smart submatch constructor
;;; - DSM's unpacked
@ -319,7 +329,7 @@
(receive (body1 pre-dsm1) (open-dsm body)
(if (re-empty? body1)
(re-dsm re-empty tsm 0)
(%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm))))))
(make-re-submatch/tsm body1 (+ pre-dsm pre-dsm1) tsm))))))
@ -405,8 +415,10 @@
((re-repeat? re) (re-repeat:tsm re))
((re-dsm? re) (re-dsm:tsm re))
((re-submatch? re) (re-submatch:tsm re))
(else 0)))
((or (re-char-set? re) (re-string? re)
(re-bos? re) (re-eos? re)
(re-bol? re) (re-eol? re))
0)))
;;; (flush-submatches re)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -451,55 +463,55 @@
(define (uncase re)
(receive (new-re changed?)
(let recur ((re re))
(? ((re-seq? re)
(let ((elts (re-seq:elts re)))
(receive (new-elts elts-changed?)
(map/changed recur elts)
(if elts-changed?
(values (%make-re-seq new-elts (re-seq:tsm re)) #t)
(values re #f)))))
(let recur ((re re))
(? ((re-seq? re)
(let ((elts (re-seq:elts re)))
(receive (new-elts elts-changed?)
(map/changed recur elts)
(if elts-changed?
(values (make-re-seq/tsm new-elts (re-seq:tsm re)) #t)
(values re #f)))))
((re-choice? re)
(let ((elts (re-choice:elts re)))
(receive (new-elts elts-changed?)
(map/changed recur elts)
(if elts-changed?
(values (re-choice new-elts) #t)
(values re #f)))))
((re-choice? re)
(let ((elts (re-choice:elts re)))
(receive (new-elts elts-changed?)
(map/changed recur elts)
(if elts-changed?
(values (re-choice new-elts) #t)
(values re #f)))))
((re-char-set? re)
(let* ((cs (re-char-set:cset re))
(new-cs (uncase-char-set cs))) ; Better not be code.
(if (char-set= cs new-cs)
(values re #f)
(values (make-re-char-set new-cs) #t))))
((re-char-set? re)
(let* ((cs (re-char-set:cset re))
(new-cs (uncase-char-set cs))) ; Better not be code.
(if (char-set= cs new-cs)
(values re #f)
(values (make-re-char-set new-cs) #t))))
((re-repeat? re)
(receive (new-body body-changed?) (recur (re-repeat:body re))
(if body-changed?
(values (re-repeat (re-repeat:from re)
(re-repeat:to re)
new-body)
#t)
(values re #f))))
((re-repeat? re)
(receive (new-body body-changed?) (recur (re-repeat:body re))
(if body-changed?
(values (re-repeat (re-repeat:from re)
(re-repeat:to re)
new-body)
#t)
(values re #f))))
((re-submatch? re)
(receive (new-body body-changed?) (recur (re-submatch:body re))
(if body-changed?
(values (%make-re-submatch new-body
(re-submatch:pre-dsm re)
(re-submatch:tsm re))
#t)
(values re #f))))
((re-submatch? re)
(receive (new-body body-changed?) (recur (re-submatch:body re))
(if body-changed?
(values (make-re-submatch/tsm new-body
(re-submatch:pre-dsm re)
(re-submatch:tsm re))
#t)
(values re #f))))
((re-string? re)
(let ((cf-re (uncase-string (re-string:chars re))))
(if (re-string? cf-re)
(values re #f)
(values cf-re #t))))
((re-string? re)
(let ((cf-re (uncase-string (re-string:chars re))))
(if (re-string? cf-re)
(values re #f)
(values cf-re #t))))
(else (values re #f))))
(else (values re #f))))
new-re))

View File

@ -32,8 +32,11 @@
;;; of its expansion process.
(define (simplify-regexp re)
(receive (simp-re pre-dsm) (simp-re re)
(re-dsm simp-re pre-dsm (- (re-tsm re) (+ (re-tsm simp-re) pre-dsm)))))
(if (and (regexp? re)
(number? (re-tsm re)))
(receive (simp-re pre-dsm) (simp-re re)
(re-dsm simp-re pre-dsm (- (re-tsm re) (+ (re-tsm simp-re) pre-dsm))))
re))
(define (simp-re re)
(? ((re-string? re) (values re 0))
@ -68,7 +71,7 @@
(receive (body1 pre-dsm1) (simp-re (re-submatch:body re))
(if (re-empty? body1)
(values re-empty tsm)
(values (%make-re-submatch body1 (+ pre-dsm pre-dsm1) tsm)
(values (make-re-submatch/tsm body1 (+ pre-dsm pre-dsm1) tsm)
0)))))
;;; - Flatten nested DSM's.
@ -95,17 +98,18 @@
(define (simp-seq re)
(let ((tsm (re-seq:tsm re))
(elts (map simplify-regexp (re-seq:elts re))))
(if (pair? elts)
(call-with-current-continuation
(lambda (abort)
(receive (pre-dsm head tail) (simp-seq1 elts abort tsm)
(values (if (pair? tail)
(%make-re-seq (cons head tail) (- tsm pre-dsm))
head) ; Singleton seq
pre-dsm))))
(values re-trivial 0)))) ; Empty seq
(cond
((null? elts)
(values re-trivial 0)) ; Empty seq
((number? tsm)
(call-with-current-continuation
(lambda (abort)
(receive (pre-dsm head tail) (simp-seq1 elts abort tsm)
(values (if (pair? tail)
(make-re-seq/tsm (cons head tail) (- tsm pre-dsm))
head) ; Singleton seq
pre-dsm)))))
(else (values re tsm))))) ; dynamic components
;;; Simplify the non-empty sequence ELTS.
@ -113,6 +117,7 @@
;;; [head-elt-pre-dsm, head-elt, tail].
;;; - If any elt is the empty (impossible) re, abort by calling
;;; (abort elt tsm). TSM is otherwise unused.
;;; - If any elt is dynamic, abort as well.
(define (simp-seq1 elts abort tsm)
(let recur ((elt (car elts)) (elts (cdr elts)))
@ -212,7 +217,7 @@
tail)))
(values (if (pair? tail)
(if (pair? (cdr tail))
(%make-re-choice tail (- tsm pre-dsm))
(make-re-choice/tsm tail (- tsm pre-dsm))
(car tail)) ; Singleton choice
re-empty) ; Empty choice
pre-dsm)))))