diff --git a/Makefile.in b/Makefile.in index 02b489c..8c6bccd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -797,8 +797,6 @@ SCHEME =scsh/awk.scm \ scsh/pty.scm \ scsh/rdelim.scm \ scsh/rw.scm \ - scsh/rx/packages.scm \ - scsh/rx/cond-package.scm \ scsh/scsh-condition.scm \ scsh/scsh-interfaces.scm \ scsh/scsh-package.scm \ @@ -816,7 +814,6 @@ SCHEME =scsh/awk.scm \ scsh/tty.scm \ scsh/utilities.scm \ scsh/weaktables.scm \ - scsh/rx/cond-package.scm \ scsh/rx/packages.scm \ scsh/rx/re-match-syntax.scm \ scsh/rx/rx-lib.scm \ @@ -856,7 +853,6 @@ bs: build/build-scsh-image loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \ $(srcdir)/scsh/machine/packages.scm \ $(srcdir)/scsh/rx/packages.scm \ - $(srcdir)/scsh/rx/cond-package.scm \ $(srcdir)/scsh/scsh-package.scm \ $(srcdir)/scsh/lib/ccp-pack.scm \ $(srcdir)/scsh/lib/char-package.scm diff --git a/scsh/rx/cond-package.scm b/scsh/rx/cond-package.scm deleted file mode 100644 index 1907ace..0000000 --- a/scsh/rx/cond-package.scm +++ /dev/null @@ -1,160 +0,0 @@ -(define-structure conditionals - (export (define-simple-syntax :syntax) - (when :syntax) - (unless :syntax) - (? :syntax) - (switchq :syntax) - (switch :syntax) - (prog0 :syntax) - (land* :syntax)) - (open scheme) - (begin - -;;; (define-simple-syntax (name subforms ...) expansion) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define-syntax define-simple-syntax - (syntax-rules () - ((define-simple-syntax (name subforms ...) expansion) - (define-syntax name (syntax-rules () ((name subforms ...) expansion)))))) - - -;;; ? = COND -;;; (WHEN test body ...) (SWITCHQ = key clause ...) -;;; (UNLESS test body ...) (SWITCH = key clause ...) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Handy conditional forms. ? is so short that it renders WHEN pretty -;;; much useless. - -(define-simple-syntax (when test body ...) - (if test (begin body ...))) - -(define-simple-syntax (unless test body ...) - (if (not test) (begin body ...))) - -;;; ? is synonym for COND. -(define-simple-syntax (? clause ...) (cond clause ...)) - - -;;; (PROG0 val-exp exp ...) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define-simple-syntax (prog0 val-exp exp ...) - (let ((v val-exp)) exp ... v)) - - -;;; (land* (clause ...) body ...) -*- Scheme -*- -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Evaluate each clause. If any clause returns false, land* stops and -;;; returns false. If all the clauses evaluate to a true value, return -;;; the value of the body. -;;; -;;; The difference between LAND* and AND is that LAND* binds names to -;;; the values of its clauses, which may be used by subsequent clauses. -;;; Clauses are of the form -;;; (var exp) ; binds VAR to the value of EXP. -;;; (exp) ; No binding. -;;; var ; Reference -- no binding. -;;; -;;; Example: -;;; (land* ((probe (assq key alist))) -;;; (cdr probe)) -;;; -;;; LAND* is due to Oleg Kiselyov (http://pobox.com/~oleg); I wrote this -;;; simple implementation as a high-level R5RS DEFINE-SYNTAX macro. -;;; Olin 98/9/29 - -(define-syntax land* - (syntax-rules () - ((land* () body ...) (begin body ...)) - - ((land* ((var exp) clause ...) body ...) - (let ((var exp)) (and var (land* (clause ...) body ...)))) - - ((land* ((#f exp) clause ...) body ...) - (and exp (land* (clause ...) body ...))) - - ((land* ((exp) clause ...) body ...) - (and exp (land* (clause ...) body ...))) - - ((land* (var clause ...) body ...) - (and var (land* (clause ...) body ...))))) - - - -;;; Like CASE, but you specify the key-comparison procedure. -;;; SWITCH evaluates its keys each time through the conditional. -;;; SWITCHQ keys are not evaluated -- are simply constants. -;;; (switchq string=? (vector-ref vec i) -;;; (("plus" "minus") ...) -;;; (("times" "div") ...) -;;; (else ...)) - -(define-simple-syntax (switchq compare key clause ...) - (let ((k key) ; Eval KEY and COMPARE - (c compare)) ; just once, then call %switch. - (%switchq c k clause ...))) ; C, K are vars, hence replicable. - -(define-syntax %switchq - (syntax-rules (else) - ((%switchq compare key ((key1 ...) body1 body2 ...) rest ...) - (if (or (compare key 'key1) ...) - (begin body1 body2 ...) - (%switchq compare key rest ...))) - - ((%switchq compare key ((key1 ...)) rest ...) ; Null body. - (if (not (or (compare key 'key1) ...)) - (%switchq compare key rest ...))) - - ((%switchq compare key (else body ...)) - (begin body ...)) - - ((%switchq compare key) '#f))) - - -(define-simple-syntax (switch compare key clause ...) - (let ((k key) ; Eval KEY and COMPARE - (c compare)) ; just once, then call %switch. - (%switch c k clause ...))) ; C, K are vars, hence replicable. - -(define-syntax %switch - (syntax-rules (else) - ((%switch compare key ((key1 ...) body1 body2 ...) rest ...) - (if (or (compare key key1) ...) - (begin body1 body2 ...) - (%switch compare key rest ...))) - - ((%switch compare key ((key1 ...)) rest ...) ; Null body. - (if (not (or (compare key key1) ...)) - (%switch compare key rest ...))) - - ((%switch compare key (else body ...)) - (begin body ...)) - - ((%switch compare key) '#f))) - -;;; I can't get this to work -- S48 complains "too many ...'s". -;(define-syntax switchq -; (syntax-rules (else) -; ((switchq compare key clause ...) -; (letrec-syntax ((%switchq (syntax-rules (else) -; ((%switchq compare key -; ((key1 ...) body1 body2 ...) rest ...) -; (if (or (compare key 'key1) ...) -; (begin body1 body2 ...) -; (%switchq compare key rest ...))) -; -; ; Null body. -; ((%switchq compare key ((key1 ...)) rest ...) -; (if (not (or (compare key 'key1) ...)) -; (%switchq compare key rest ...))) -; -; ((%switchq compare key (else body ...)) -; (begin body ...)) -; -; ((%switchq compare key) '#f)))) -; -; (let ((k key) ; Eval KEY and COMPARE -; (c compare)) ; just once, then call %switch. -; (%switchq c k clause ...)))))); C, K are vars, hence replicable. -)) diff --git a/scsh/rx/packages.scm b/scsh/rx/packages.scm index a2a2126..634a530 100644 --- a/scsh/rx/packages.scm +++ b/scsh/rx/packages.scm @@ -231,7 +231,6 @@ ;; re-posix-parsers ; regexp->posix-string let-opt sort ; Posix renderer - conditionals define-record-types defrec-package receiving @@ -275,7 +274,6 @@ (define-structure rx-lib rx-lib-interface (open re-internals - conditionals re-level-0 (subset srfi-1 (fold)) srfi-14 @@ -342,7 +340,7 @@ (define-structure re-folders re-folders-interface - (open re-level-0 let-opt conditionals error-package scheme) + (open re-level-0 let-opt error-package scheme) (files re-fold) ; (optimize auto-integrate) ) diff --git a/scsh/rx/parse.scm b/scsh/rx/parse.scm index d0e8faf..8c91ca6 100644 --- a/scsh/rx/parse.scm +++ b/scsh/rx/parse.scm @@ -22,8 +22,6 @@ ;;; is the char-set parsing and unparsing, which deal with ranges of ;;; characters. We assume an 8-bit ASCII superset. -;;; Imports: -;;; ? for COND, and SWITCHQ conditional form. ;;; every ;;; This code is much hairier than it would otherwise be because of the @@ -52,23 +50,24 @@ ;;; in the form of embedded code in some of the regexp's fields? (define (static-regexp? re) - (? ((re-seq? re) (every static-regexp? (re-seq:elts re))) - ((re-choice? re) (every static-regexp? (re-choice:elts re))) + (cond + ((re-seq? re) (every static-regexp? (re-seq:elts re))) + ((re-choice? re) (every static-regexp? (re-choice:elts re))) + + ((re-char-set? re) (char-set? (re-char-set:cset re))) ; Might be code. - ((re-char-set? re) (char-set? (re-char-set:cset re))) ; Might be code. + ((re-repeat? re) ; FROM & TO fields might be code. + (let ((to (re-repeat:to re))) + (and (integer? (re-repeat:from re)) + (or (not to) (integer? to)) + (static-regexp? (re-repeat:body re))))) - ((re-repeat? re) ; FROM & TO fields might be code. - (let ((to (re-repeat:to re))) - (and (integer? (re-repeat:from re)) - (or (not to) (integer? to)) - (static-regexp? (re-repeat:body re))))) + ((re-dsm? re) (static-regexp? (re-dsm:body re))) + ((re-submatch? re) (static-regexp? (re-submatch:body re))) - ((re-dsm? re) (static-regexp? (re-dsm:body re))) - ((re-submatch? re) (static-regexp? (re-submatch:body re))) - - (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not - (re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code. - (re-string? re))))) + (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not + (re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code. + (re-string? re))))) ;;; Two useful standard char sets @@ -149,153 +148,164 @@ (re-repeat from to seq) `(,(r 're-repeat) ',from ',to ,(regexp->scheme seq r))))) - (? ((char? sre) (parse-char-re sre case-sensitive? cset?)) - ((string? sre) (parse-string-re sre case-sensitive? cset?)) + (cond + ((char? sre) (parse-char-re sre case-sensitive? cset?)) + ((string? sre) (parse-string-re sre case-sensitive? cset?)) - ((c sre %bos) (non-cset) re-bos) - ((c sre %eos) (non-cset) re-eos) + ((c sre %bos) (non-cset) re-bos) + ((c sre %eos) (non-cset) re-eos) - ((c sre %bol) (non-cset) re-bol) - ((c sre %eol) (non-cset) re-eol) + ((c sre %bol) (non-cset) re-bol) + ((c sre %eol) (non-cset) re-eol) - ((pair? sre) - (let ((hygn-eq? (lambda (the-sym) (or (c (car sre) (r the-sym)) - (c (car sre) the-sym))))) - (cond - ((hygn-eq? '*) - (non-cset) - (build-re-repeat 0 #f (cdr sre))) - ((hygn-eq? '+) - (non-cset) - (build-re-repeat 1 #f (cdr sre))) - ((hygn-eq? '?) - (non-cset) - (build-re-repeat 0 1 (cdr sre))) - ((hygn-eq? '=) ; #### - (non-cset) - (let ((n (cadr sre))) - (build-re-repeat n n (cddr sre)))) - ((hygn-eq? '>=) - (non-cset) - (build-re-repeat (cadr sre) #f (cddr sre))) - ((hygn-eq? '**) - (non-cset) - (build-re-repeat (cadr sre) (caddr sre) - (cdddr sre))) + ((pair? sre) + (let ((hygn-eq? (lambda (the-sym) (or (c (car sre) (r the-sym)) + (c (car sre) the-sym))))) + (cond + ((hygn-eq? '*) + (non-cset) + (build-re-repeat 0 #f (cdr sre))) + ((hygn-eq? '+) + (non-cset) + (build-re-repeat 1 #f (cdr sre))) + ((hygn-eq? '?) + (non-cset) + (build-re-repeat 0 1 (cdr sre))) + ((hygn-eq? '=) + (non-cset) + (let ((n (cadr sre))) + (build-re-repeat n n (cddr sre)))) + ((hygn-eq? '>=) + (non-cset) + (build-re-repeat (cadr sre) #f (cddr sre))) + ((hygn-eq? '**) + (non-cset) + (build-re-repeat (cadr sre) (caddr sre) + (cdddr sre))) - ;; Choice is special wrt cset? because it's "polymorphic". - ;; Note that RE-CHOICE guarantees to construct a char-set - ;; or single-char string regexp if all of its args are char - ;; classes. - ((or (hygn-eq? '|) - (hygn-eq? 'or)) - (let ((elts (map (lambda (sre) - (recur sre case-sensitive? cset?)) - (cdr sre)))) - (if cset? - (assoc-cset-op char-set-union 'char-set-union elts r) - (re-choice elts)))) + ;; Choice is special wrt cset? because it's "polymorphic". + ;; Note that RE-CHOICE guarantees to construct a char-set + ;; or single-char string regexp if all of its args are char + ;; classes. + ((or (hygn-eq? '|) + (hygn-eq? 'or)) + (let ((elts (map (lambda (sre) + (recur sre case-sensitive? cset?)) + (cdr sre)))) + (if cset? + (assoc-cset-op char-set-union 'char-set-union elts r) + (re-choice elts)))) - ((or (hygn-eq? ':) - (hygn-eq? 'seq)) - (non-cset) (parse-seq (cdr sre))) + ((or (hygn-eq? ':) + (hygn-eq? 'seq)) + (non-cset) + (parse-seq (cdr sre))) - ((hygn-eq? 'submatch) - (non-cset) - (let ((seq (parse-seq (cdr sre)))) - (if (static-regexp? seq) - (re-submatch seq) - `(,(r 're-submatch) ,(regexp->scheme seq r))))) - ((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre)) - (cadr sre) - (caddr sre))) + ((hygn-eq? 'submatch) + (non-cset) + (let ((seq (parse-seq (cdr sre)))) + (if (static-regexp? seq) + (re-submatch seq) + `(,(r 're-submatch) ,(regexp->scheme seq r))))) + ((hygn-eq? 'dsm) + (non-cset) + (re-dsm (parse-seq (cdddr sre)) + (cadr sre) + (caddr sre))) - ;; We could be more aggressive and push the uncase op down into - ;; partially-static regexps, but enough is enough. - ((hygn-eq? 'uncase) - (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. - (if cset? + ;; We could be more aggressive and push the uncase op down into + ;; partially-static regexps, but enough is enough. + ((hygn-eq? 'uncase) + (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. + (if cset? - (if (re-char-set? re-or-cset) ; A char set or code - (uncase-char-set re-or-cset) ; producing a char set. - `(,(r 'uncase) ,re-or-cset)) + (if (re-char-set? re-or-cset) ; A char set or code + (uncase-char-set re-or-cset) ; producing a char set. + `(,(r 'uncase) ,re-or-cset)) - (if (static-regexp? re-or-cset) ; A regexp or code - (uncase re-or-cset) ; producing a regexp. - `(,(r 'uncase) - ,(regexp->scheme (simplify-regexp re-or-cset) r)))))) + (if (static-regexp? re-or-cset) ; A regexp or code + (uncase re-or-cset) ; producing a regexp. + `(,(r 'uncase) + ,(regexp->scheme (simplify-regexp re-or-cset) r)))))) - ;; These just change the lexical case-sensitivity context. - ((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f)) - ((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t)) + ;; These just change the lexical case-sensitivity context. + ((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f)) + ((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t)) - ;; , and ,@ - ((hygn-eq? 'unquote) - (let ((exp (cadr sre))) - (if cset? - `(,%coerce-dynamic-charset ,exp) - `(,%flush-submatches (,%coerce-dynamic-regexp ,exp))))) - ((hygn-eq? 'unquote-splicing) - (let ((exp (cadr sre))) - (if cset? - `(,%coerce-dynamic-charset ,exp) - `(,%coerce-dynamic-regexp ,exp)))) + ;; , and ,@ + ((hygn-eq? 'unquote) + (let ((exp (cadr sre))) + (if cset? + `(,%coerce-dynamic-charset ,exp) + `(,%flush-submatches (,%coerce-dynamic-regexp ,exp))))) + ((hygn-eq? 'unquote-splicing) + (let ((exp (cadr sre))) + (if cset? + `(,%coerce-dynamic-charset ,exp) + `(,%coerce-dynamic-regexp ,exp)))) - ((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union - (map parse-char-class (cdr sre)) - r)) - (cs (if (char-set? cs) - (char-set-complement cs) - `(,(r 'char-set-complement) ,cs)))) - (if cset? cs (make-re-char-set cs)))) + ((hygn-eq? '~) + (let* ((cs (assoc-cset-op char-set-union 'char-set-union + (map parse-char-class (cdr sre)) + r)) + (cs (if (char-set? cs) + (char-set-complement cs) + `(,(r 'char-set-complement) ,cs)))) + (if cset? cs (make-re-char-set cs)))) - ((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection - (map parse-char-class (cdr sre)) - r))) - (if cset? cs (make-re-char-set cs)))) + ((hygn-eq? '&) + (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection + (map parse-char-class (cdr sre)) + r))) + (if cset? cs (make-re-char-set cs)))) - ((hygn-eq? '-) (if (pair? (cdr sre)) - (let* ((cs1 (parse-char-class (cadr sre))) - (cs2 (assoc-cset-op char-set-union 'char-set-union - (map parse-char-class (cddr sre)) - r)) - (cs (if (and (char-set? cs1) (char-set? cs2)) - (char-set-difference cs1 cs2) - `(,(r 'char-set-difference) - ,(if (char-set? cs1) - (char-set->scheme cs1 r) - cs1) - . ,(if (char-set? cs2) - (list (char-set->scheme cs2 r)) - (cdr cs2)))))) - (if cset? cs (make-re-char-set cs))) - (error "SRE set-difference operator (- ...) requires at least one argument"))) + ((hygn-eq? '-) + (if (pair? (cdr sre)) + (let* ((cs1 (parse-char-class (cadr sre))) + (cs2 (assoc-cset-op char-set-union 'char-set-union + (map parse-char-class (cddr sre)) + r)) + (cs (if (and (char-set? cs1) (char-set? cs2)) + (char-set-difference cs1 cs2) + `(,(r 'char-set-difference) + ,(if (char-set? cs1) + (char-set->scheme cs1 r) + cs1) + . ,(if (char-set? cs2) + (list (char-set->scheme cs2 r)) + (cdr cs2)))))) + (if cset? cs (make-re-char-set cs))) + (error "SRE set-difference operator (- ...) requires at least one argument"))) - ((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) - (if cset? cset (make-re-char-set cset)))) + ((hygn-eq? '/) + (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) + (if cset? + cset + (make-re-char-set cset)))) - ((hygn-eq? 'posix-string) - (if (and (= 1 (length (cdr sre))) - (string? (cadr sre))) - (posix-string->regexp (cadr sre)) - (error "Illegal (posix-string ...) SRE body." sre))) + ((hygn-eq? 'posix-string) + (if (and (= 1 (length (cdr sre))) + (string? (cadr sre))) + (posix-string->regexp (cadr sre)) + (error "Illegal (posix-string ...) SRE body." sre))) - (else (if (every string? sre) ; A set spec -- ("wxyz"). - (let* ((cs (apply char-set-union - (map string->char-set sre))) - (cs (if case-sensitive? cs (uncase-char-set cs)))) - (if cset? cs (make-re-char-set cs))) + (else + (if (every string? sre) ; A set spec -- ("wxyz"). + (let* ((cs (apply char-set-union + (map string->char-set sre))) + (cs (if case-sensitive? cs (uncase-char-set cs)))) + (if cset? cs (make-re-char-set cs))) - (error "Illegal SRE" sre)))))) + (error "Illegal SRE" sre)))))) - ;; It must be a char-class name (ANY, ALPHABETIC, etc.) - (else - (letrec ((hygn-memq? (lambda (sym-list) - (if (null? sym-list) - #f - (or (c sre (r (car sym-list))) - (c sre (car sym-list)) - (hygn-memq? (cdr sym-list))))))) + ;; It must be a char-class name (ANY, ALPHABETIC, etc.) + (else + (letrec ((hygn-memq? (lambda (sym-list) + (if (null? sym-list) + #f + (or (c sre (r (car sym-list))) + (c sre (car sym-list)) + (hygn-memq? (cdr sym-list))))))) (let ((cs (cond ((hygn-memq? '(nonl)) nonl-chars) ((hygn-memq? '(lower-case lower)) char-set:lower-case) @@ -343,11 +353,12 @@ (define (assoc-cset-op op op-name elts r) (receive (csets code-chunks) (partition char-set? elts) (if (pair? code-chunks) - (? ((pair? csets) - `(,(r op-name) ,(char-set->scheme (apply op csets) r) - . ,code-chunks)) - ((pair? (cdr code-chunks)) `(,(r op-name) . ,code-chunks)) - (else (car code-chunks))) ; Just one. + (cond + ((pair? csets) + `(,(r op-name) ,(char-set->scheme (apply op csets) r) + . ,code-chunks)) + ((pair? (cdr code-chunks)) `(,(r op-name) . ,code-chunks)) + (else (car code-chunks))) ; Just one. (apply op csets)))) ;;; Parse a (/ ...) char-class into a character set in @@ -484,17 +495,17 @@ (if a (if biga (if space - (and one (switch char-set= cs - ((char-set:full) full) - ((nonl-chars) nonl) - ((char-set:printing) print) - ((char-set:ascii) ascii) + (and one (cond + ((char-set= cs char-set:full) full) + ((char-set= cs nonl-chars) nonl) + ((char-set= cs char-set:printing) print) + ((char-set= cs char-set:ascii) ascii) (else #f))) (if one - (switch char-set= cs - ((char-set:letter+digit) alphanum) - ((char-set:graphic) graph) - ((char-set:hex-digit) hex) + (cond + ((char-set= cs char-set:letter+digit) alphanum) + ((char-set= cs char-set:graphic) graph) + ((char-set= cs char-set:hex-digit) hex) (else #f)) (and (char-set= cs char-set:letter) alpha))) (and (char-set= cs char-set:lower-case) lower)) ; a, not A @@ -504,13 +515,13 @@ (if one (and (not space) (char-set= cs char-set:digit) num) (if space - (switch char-set= cs - ((char-set:whitespace) white) - ((char-set:blank) blank) + (cond + (( char-set= cs char-set:whitespace) white) + (( char-set= cs char-set:blank) blank) (else #f)) - (switch char-set= cs - ((char-set:punctuation) punct) - ((char-set:iso-control) ctl) + (cond + ((char-set= cs char-set:punctuation) punct) + ((char-set= cs char-set:iso-control) ctl) (else #f)))))))) @@ -525,20 +536,21 @@ 'char-set:printing 'char-set:iso-control 'char-set:hex-digit 'char-set:blank 'char-set:ascii)))) - (? ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code. - ((char-set-empty? cs) (r 'char-set:empty)) - ((try cs) => r) - ((try (char-set-complement cs)) => - (lambda (name) `(,(r 'char-set-complement) ,name))) + (cond + ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code. + ((char-set-empty? cs) (r 'char-set:empty)) + ((try cs) => r) + ((try (char-set-complement cs)) => + (lambda (name) `(,(r 'char-set-complement) ,name))) - (else - (receive (loose+ ranges+) (char-set->in-pair cs) - (receive (loose- ranges-) (char-set->in-pair (char-set-complement cs)) - (let ((makeit (r 'spec->char-set))) - (if (< (+ (length loose-) (* 12 (length ranges-))) - (+ (length loose+) (* 12 (length ranges+)))) - `(,makeit #f ,(list->string loose-) ',ranges-) - `(,makeit #t ,(list->string loose+) ',ranges+))))))))) + (else + (receive (loose+ ranges+) (char-set->in-pair cs) + (receive (loose- ranges-) (char-set->in-pair (char-set-complement cs)) + (let ((makeit (r 'spec->char-set))) + (if (< (+ (length loose-) (* 12 (length ranges-))) + (+ (length loose+) (* 12 (length ranges+)))) + `(,makeit #f ,(list->string loose-) ',ranges-) + `(,makeit #t ,(list->string loose+) ',ranges+))))))))) @@ -557,20 +569,21 @@ 'hex-digit 'blank 'ascii))) (nchars (char-set-size cs))) - (? ((zero? nchars) `(,(r '|))) - ((= 1 nchars) (apply string (char-set->list cs))) - ((try cs) => r) - ((try (char-set-complement cs)) => - (lambda (name) `(,(r '~) ,name))) - (else (receive (cs rp comp?) (char-set->in-sexp-spec cs) - (let ((args (append (? ((string=? cs "") '()) - ((= 1 (string-length cs)) `(,cs)) - (else `((,cs)))) - (if (string=? rp "") '() - (list `(,(r '/) ,rp)))))) - (if (and (= 1 (length args)) (not comp?)) - (car args) - `(,(r (if comp? '~ '|)) . ,args))))))) + (cond + ((zero? nchars) `(,(r '|))) + ((= 1 nchars) (apply string (char-set->list cs))) + ((try cs) => r) + ((try (char-set-complement cs)) => + (lambda (name) `(,(r '~) ,name))) + (else (receive (cs rp comp?) (char-set->in-sexp-spec cs) + (let ((args (append (cond ((string=? cs "") '()) + ((= 1 (string-length cs)) `(,cs)) + (else `((,cs)))) + (if (string=? rp "") '() + (list `(,(r '/) ,rp)))))) + (if (and (= 1 (length args)) (not comp?)) + (car args) + `(,(r (if comp? '~ '|)) . ,args))))))) `(,(r 'unquote) ,cs))) ; dynamic -- , @@ -592,44 +605,45 @@ (define (regexp->sre/renamer re r) (let recur ((re re)) - (? ((re-string? re) (re-string:chars re)) + (cond + ((re-string? re) (re-string:chars re)) - ((re-seq? re) `(,(r ':) . ,(regexp->sres/renamer re r))) + ((re-seq? re) `(,(r ':) . ,(regexp->sres/renamer re r))) - ((re-choice? re) - (let ((elts (re-choice:elts re)) - (%| (r '|))) - (if (pair? elts) - `(,%| . ,(map recur elts)) - (let ((tsm (re-choice:tsm re))) - (if (zero? tsm) `(,%|) `(,(r 'dsm) ,tsm 0 (,%|))))))) + ((re-choice? re) + (let ((elts (re-choice:elts re)) + (%| (r '|))) + (if (pair? elts) + `(,%| . ,(map recur elts)) + (let ((tsm (re-choice:tsm re))) + (if (zero? tsm) `(,%|) `(,(r 'dsm) ,tsm 0 (,%|))))))) - ((re-char-set? re) (char-set->sre (re-char-set:cset re) r)) + ((re-char-set? re) (char-set->sre (re-char-set:cset re) r)) - ((re-repeat? re) - (let ((from (re-repeat:from re)) - (to (re-repeat:to re)) - (bodies (regexp->sres/renamer (re-repeat:body re) r))) - (? ((and (eqv? from 0) (not to)) `(,(r '*) . ,bodies)) - ((and (eqv? from 0) (eqv? to 1)) `(,(r '?) . ,bodies)) - ((and (eqv? from 1) (not to)) `(,(r '+) . ,bodies)) - ((eqv? from to) `(,(r '=) ,to . bodies)) - (to `(,(r '**) ,from ,to . ,bodies)) - (else `(,(r '>=) ,from . ,bodies))))) + ((re-repeat? re) + (let ((from (re-repeat:from re)) + (to (re-repeat:to re)) + (bodies (regexp->sres/renamer (re-repeat:body re) r))) + (cond ((and (eqv? from 0) (not to)) `(,(r '*) . ,bodies)) + ((and (eqv? from 0) (eqv? to 1)) `(,(r '?) . ,bodies)) + ((and (eqv? from 1) (not to)) `(,(r '+) . ,bodies)) + ((eqv? from to) `(,(r '=) ,to . bodies)) + (to `(,(r '**) ,from ,to . ,bodies)) + (else `(,(r '>=) ,from . ,bodies))))) - ((re-dsm? re) - `(,(r 'dsm) ,(re-dsm:pre-dsm re) ,(re-dsm:post-dsm re) - . ,(regexp->sres/renamer (re-dsm:body re) r))) + ((re-dsm? re) + `(,(r 'dsm) ,(re-dsm:pre-dsm re) ,(re-dsm:post-dsm re) + . ,(regexp->sres/renamer (re-dsm:body re) r))) - ((re-submatch? re) - `(,(r 'submatch) . ,(regexp->sres/renamer (re-submatch:body re) r))) + ((re-submatch? re) + `(,(r 'submatch) . ,(regexp->sres/renamer (re-submatch:body re) r))) - ((re-bos? re) (r 'bos)) - ((re-eos? re) (r 'eos)) - ((re-bol? re) (r 'bol)) - ((re-eol? re) (r 'eol)) + ((re-bos? re) (r 'bos)) + ((re-eos? re) (r 'eos)) + ((re-bol? re) (r 'bol)) + ((re-eol? re) (r 'eol)) - (else re)))) ; Presumably it's code. + (else re)))) ; Presumably it's code. (define (regexp->sre re) (regexp->sre/renamer re (lambda (x) x))) diff --git a/scsh/rx/posixstr.scm b/scsh/rx/posixstr.scm index bccee2e..d3c4607 100644 --- a/scsh/rx/posixstr.scm +++ b/scsh/rx/posixstr.scm @@ -98,28 +98,29 @@ (define (translate-regexp re) - (? ((re-string? re) (translate-string (re-string:chars re))) + (cond + ((re-string? re) (translate-string (re-string:chars re))) - ((re-repeat? re) (translate-repeat re)) - ((re-choice? re) (translate-choice re)) - ((re-seq? re) (translate-seq re)) - ((re-char-set? re) (translate-char-set (re-char-set:cset re))) + ((re-repeat? re) (translate-repeat re)) + ((re-choice? re) (translate-choice re)) + ((re-seq? re) (translate-seq re)) + ((re-char-set? re) (translate-char-set (re-char-set:cset re))) - ((re-submatch? re) (translate-submatch re)) + ((re-submatch? re) (translate-submatch re)) - ((re-bos? re) (values "^" 1 0 '#())) - ((re-eos? re) (values "$" 1 0 '#())) + ((re-bos? re) (values "^" 1 0 '#())) + ((re-eos? re) (values "$" 1 0 '#())) - ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) - ((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) + ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) + ((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) - ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) - (body (re-dsm:body re))) - (translate-dsm body pre-dsm - (- (re-dsm:tsm re) - (+ pre-dsm (re-tsm body)))))) + ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) + (body (re-dsm:body re))) + (translate-dsm body pre-dsm + (- (re-dsm:tsm re) + (+ pre-dsm (re-tsm body)))))) - (else (error "Illegal regular expression" re)))) + (else (error "Illegal regular expression" re)))) ;;; Translate reloc-elt ELT = (N . RE) from a sequence or choice @@ -248,35 +249,36 @@ (body (re-repeat:body re)) (tsm (re-repeat:tsm re))) - (? ((and to (> from to)) ; Unsatisfiable - (values "[^\000-\377]" 1 0 (n-falses tsm))) + (cond + ((and to (> from to)) ; Unsatisfiable + (values "[^\000-\377]" 1 0 (n-falses tsm))) - ((and to (= from to 1)) (translate-seq body)) ; RE{1,1} => RE + ((and to (= from to 1)) (translate-seq body)) ; RE{1,1} => RE - ((and to (= to 0)) ; RE{0,0} => "" - (values "" 2 0 (n-falses tsm))) + ((and to (= to 0)) ; RE{0,0} => "" + (values "" 2 0 (n-falses tsm))) - (else ; General case - (receive (s level pcount submatches) (translate-regexp body) - (receive (s level pcount submatches) ; Coerce S to level <2. - (if (> level 1) - (values (string-append "(" s ")") - 0 - (+ pcount 1) - (mapv (lambda (i) (and i (+ i 1))) submatches)) - (values s level pcount submatches)) + (else ; General case + (receive (s level pcount submatches) (translate-regexp body) + (receive (s level pcount submatches) ; Coerce S to level <2. + (if (> level 1) + (values (string-append "(" s ")") + 0 + (+ pcount 1) + (mapv (lambda (i) (and i (+ i 1))) submatches)) + (values s level pcount submatches)) - (values (if to - (? ((and (= from 0) (= to 1)) (string-append s "?")) - ((= from to) - (string-append s "{" (number->string to) "}")) - (else - (string-append s "{" (number->string from) - "," (number->string to) "}"))) - (? ((= from 0) (string-append s "*")) - ((= from 1) (string-append s "+")) - (else (string-append s "{" (number->string from) ",}")))) - 1 pcount submatches))))))) + (values (if to + (cond ((and (= from 0) (= to 1)) (string-append s "?")) + ((= from to) + (string-append s "{" (number->string to) "}")) + (else + (string-append s "{" (number->string from) + "," (number->string to) "}"))) + (cond ((= from 0) (string-append s "*")) + ((= from 1) (string-append s "+")) + (else (string-append s "{" (number->string from) ",}")))) + 1 pcount submatches))))))) @@ -372,19 +374,20 @@ (receive (loose ranges) (char-set->in-pair cset) (hack-bracket-spec loose ranges in?))))) - (? ((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set + (cond + ((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set - ((= 1 nchars) ; Singleton set - (translate-string (string (car (char-set->list cset))))) + ((= 1 nchars) ; Singleton set + (translate-string (string (car (char-set->list cset))))) - ;; General case. Try both [...] and [^...]. - (else (let ((s- (->bracket-string cset #t)) - (s+ (->bracket-string - (char-set-delete (char-set-complement cset) *nul*) - #f))) - (values (if (< (string-length s-) (string-length s+)) - s- s+) - 1 0 '#()))))))) + ;; General case. Try both [...] and [^...]. + (else (let ((s- (->bracket-string cset #t)) + (s+ (->bracket-string + (char-set-delete (char-set-complement cset) *nul*) + #f))) + (values (if (< (string-length s-) (string-length s+)) + s- s+) + 1 0 '#()))))))) ;;; Commentary @@ -539,18 +542,19 @@ (shrink-range-finish-up end start (- end 1)))) (define (shrink-range-finish-up c start end) - (? ((> start end) (values (list (ascii->char c)) '())) ; Empty range + (cond + ((> start end) (values (list (ascii->char c)) '())) ; Empty range - ((= start end) ; Collapse singleton range. - (values (list (ascii->char c) (ascii->char start)) - '())) + ((= start end) ; Collapse singleton range. + (values (list (ascii->char c) (ascii->char start)) + '())) - ((= (+ start 1) end) ; Collapse doubleton range. - (values (list (ascii->char c) (ascii->char start) (ascii->char end)) - '())) + ((= (+ start 1) end) ; Collapse doubleton range. + (values (list (ascii->char c) (ascii->char start) (ascii->char end)) + '())) - (else (values (list (ascii->char c)) - (list (cons (ascii->char start) (ascii->char end))))))) + (else (values (list (ascii->char c)) + (list (cons (ascii->char start) (ascii->char end))))))) ;;; We assume the bracket-spec is not a singleton, not empty, and not complete. @@ -573,16 +577,16 @@ (ranges (cdr ranges))) (receive (new-loose new-ranges) (recur ranges) (receive (new-loose0 new-ranges0) - (? ((char=? #\] start) - (shrink-range-start range)) + (cond ((char=? #\] start) + (shrink-range-start range)) - ((char=? #\] end) - (shrink-range-end range)) + ((char=? #\] end) + (shrink-range-end range)) - ((char=? #\- start) - (shrink-range-start range)) + ((char=? #\- start) + (shrink-range-start range)) - (else (values '() (list range)))) + (else (values '() (list range)))) (values (append new-loose0 new-loose) (append new-ranges0 new-ranges))))) (values loose '()))) @@ -590,38 +594,39 @@ (let ((loose (sort-list loose loose<=)) ; Sort loose chars and ranges. (ranges (sort-list ranges range<))) - (? ((or (not (equal? loose0 loose)) ; Loop if anything changed. - (not (equal? ranges0 ranges))) - (lp loose ranges end-hyphen?)) + (cond + ((or (not (equal? loose0 loose)) ; Loop if anything changed. + (not (equal? ranges0 ranges))) + (lp loose ranges end-hyphen?)) - ;; If the first range opens with .=:, and the last loose char is [, - ;; shrink it out & loop. - ((and (pair? ranges) - (memv (caar ranges) '(#\. #\= #\:)) - (pair? loose) - (char=? #\[ (car (reverse loose)))) - (receive (new-loose new-ranges) - (shrink-range-start (car ranges)) - (lp (append new-loose loose) (append new-ranges (cdr ranges)) end-hyphen?))) + ;; If the first range opens with .=:, and the last loose char is [, + ;; shrink it out & loop. + ((and (pair? ranges) + (memv (caar ranges) '(#\. #\= #\:)) + (pair? loose) + (char=? #\[ (car (reverse loose)))) + (receive (new-loose new-ranges) + (shrink-range-start (car ranges)) + (lp (append new-loose loose) (append new-ranges (cdr ranges)) end-hyphen?))) - ;; If there are no loose chars, the first range begins with ^, and - ;; we're doing an IN range, shrink out the ^. - ((and in? (null? loose) (pair? ranges) (char=? #\^ (caar ranges))) - (receive (new-loose new-ranges) (shrink-range-start (car ranges)) - (lp (append new-loose loose) (append new-ranges ranges) end-hyphen?))) + ;; If there are no loose chars, the first range begins with ^, and + ;; we're doing an IN range, shrink out the ^. + ((and in? (null? loose) (pair? ranges) (char=? #\^ (caar ranges))) + (receive (new-loose new-ranges) (shrink-range-start (car ranges)) + (lp (append new-loose loose) (append new-ranges ranges) end-hyphen?))) - ;; If both [ and - are in the loose char set, - ;; pull - out as special end-hypen. - ((and (pair? loose) - (pair? (cdr loose)) - (char=? (car loose) #\[) - (char=? (car loose) #\-)) - (lp (cons (car loose) (cddr loose)) ranges #t)) + ;; If both [ and - are in the loose char set, + ;; pull - out as special end-hypen. + ((and (pair? loose) + (pair? (cdr loose)) + (char=? (car loose) #\[) + (char=? (car loose) #\-)) + (lp (cons (car loose) (cddr loose)) ranges #t)) - ;; No change! Build the answer... - (else (string-append (if in? "[" "[^") - (list->string loose) - (apply string-append - (map (lambda (r) (string (car r) #\- (cdr r))) - ranges)) - "]")))))))) + ;; No change! Build the answer... + (else (string-append (if in? "[" "[^") + (list->string loose) + (apply string-append + (map (lambda (r) (string (car r) #\- (cdr r))) + ranges)) + "]")))))))) diff --git a/scsh/rx/re-fold.scm b/scsh/rx/re-fold.scm index 2a85879..db24f80 100644 --- a/scsh/rx/re-fold.scm +++ b/scsh/rx/re-fold.scm @@ -45,14 +45,15 @@ (error "Illegal START parameter" regexp-fold re kons knil s finish start)) (let lp ((i start) (val knil)) - (? ((regexp-search re s i) => - (lambda (m) - (let ((next-i (match:end m 0))) - (if (= next-i (match:start m 0)) - (error "An empty-string regexp match has put regexp-fold into an infinite loop." - re s start next-i) - (lp next-i (kons i m val)))))) - (else (finish i val)))))) + (cond + ((regexp-search re s i) => + (lambda (m) + (let ((next-i (match:end m 0))) + (if (= next-i (match:start m 0)) + (error "An empty-string regexp match has put regexp-fold into an infinite loop." + re s start next-i) + (lp next-i (kons i m val)))))) + (else (finish i val)))))) ;;; regexp-fold-right re kons knil s [finish start] -> value ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -79,19 +80,21 @@ (error "Illegal START parameter" regexp-fold-right re kons knil s finish start)) - (? ((regexp-search re s start) => - (lambda (m) - (finish (match:start m 0) - (let recur ((last-m m)) - (? ((regexp-search re s (match:end last-m 0)) => - (lambda (m) - (let ((i (match:start m 0))) - (if (= i (match:end m 0)) - (error "An empty-string regexp match has put regexp-fold-right into an infinite loop." - re s start i) - (kons last-m i (recur m)))))) - (else (kons last-m (string-length s) knil))))))) - (else (finish (string-length s) knil))))) + (cond + ((regexp-search re s start) => + (lambda (m) + (finish (match:start m 0) + (let recur ((last-m m)) + (cond + ((regexp-search re s (match:end last-m 0)) => + (lambda (m) + (let ((i (match:start m 0))) + (if (= i (match:end m 0)) + (error "An empty-string regexp match has put regexp-fold-right into an infinite loop." + re s start i) + (kons last-m i (recur m)))))) + (else (kons last-m (string-length s) knil))))))) + (else (finish (string-length s) knil))))) ;;; regexp-for-each re proc s [start] -> unspecific ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -104,11 +107,12 @@ (if (> start (string-length s)) (apply error "Illegal START parameter" regexp-for-each re proc s start) (let lp ((i start)) - (? ((regexp-search re s i) => - (lambda (m) - (let ((next-i (match:end m 0))) - (if (= (match:start m 0) next-i) - (error "An empty-string regexp match has put regexp-for-each into an infinite loop." - re proc s start next-i)) - (proc m) - (lp next-i))))))))) + (cond + ((regexp-search re s i) => + (lambda (m) + (let ((next-i (match:end m 0))) + (if (= (match:start m 0) next-i) + (error "An empty-string regexp match has put regexp-for-each into an infinite loop." + re proc s start next-i)) + (proc m) + (lp next-i))))))))) diff --git a/scsh/rx/re-high.scm b/scsh/rx/re-high.scm index 3aa9af3..3b4bbf4 100644 --- a/scsh/rx/re-high.scm +++ b/scsh/rx/re-high.scm @@ -17,28 +17,29 @@ (set re cre) ; cache it, cre))))) ; and return it. - (? ((re-seq? re) - (check-cache re-seq:posix set-re-seq:posix)) - ((re-choice? re) - (check-cache re-choice:posix set-re-choice:posix)) - ((re-repeat? re) - (check-cache re-repeat:posix set-re-repeat:posix)) - ((re-char-set? re) - (check-cache re-char-set:posix set-re-char-set:posix)) - ((re-string? re) - (check-cache re-string:posix set-re-string:posix)) - ((re-submatch? re) - (check-cache re-submatch:posix set-re-submatch:posix)) - ((re-dsm? re) - (check-cache re-dsm:posix set-re-dsm:posix)) + (cond + ((re-seq? re) + (check-cache re-seq:posix set-re-seq:posix)) + ((re-choice? re) + (check-cache re-choice:posix set-re-choice:posix)) + ((re-repeat? re) + (check-cache re-repeat:posix set-re-repeat:posix)) + ((re-char-set? re) + (check-cache re-char-set:posix set-re-char-set:posix)) + ((re-string? re) + (check-cache re-string:posix set-re-string:posix)) + ((re-submatch? re) + (check-cache re-submatch:posix set-re-submatch:posix)) + ((re-dsm? re) + (check-cache re-dsm:posix set-re-dsm:posix)) - ((re-bos? re) (or bos-cre (set! bos-cre (compile)))) - ((re-eos? re) (or eos-cre (set! eos-cre (compile)))) + ((re-bos? re) (or bos-cre (set! bos-cre (compile)))) + ((re-eos? re) (or eos-cre (set! eos-cre (compile)))) - ((re-bol? re) (error "BOL regexp not supported in this implementation.")) - ((re-eol? re) (error "EOL regexp not supported in this implementation.")) + ((re-bol? re) (error "BOL regexp not supported in this implementation.")) + ((re-eol? re) (error "EOL regexp not supported in this implementation.")) - (else (error "compile-regexp -- not a regexp" re))))) + (else (error "compile-regexp -- not a regexp" re))))) (define bos-cre #f) (define eos-cre #f) diff --git a/scsh/rx/re.scm b/scsh/rx/re.scm index 4f6058d..df386e6 100644 --- a/scsh/rx/re.scm +++ b/scsh/rx/re.scm @@ -103,10 +103,10 @@ (if (pair? res) (let* ((re (car res)) (tail (recur (cdr res)))) - (? ((re-seq? re) ; Flatten nested seqs - (append (recur (re-seq:elts re)) tail)) - ((re-trivial? re) tail) ; Drop trivial elts - (else (cons re tail)))) + (cond ((re-seq? re) ; Flatten nested seqs + (append (recur (re-seq:elts re)) tail)) + ((re-trivial? re) tail) ; Drop trivial elts + (else (cons re tail)))) '())))) (if (pair? res) @@ -158,10 +158,10 @@ (if (pair? res) ; & drop empty re's. (let* ((re (car res)) (tail (recur (cdr res)))) - (? ((re-choice? re) ; Flatten nested choices - (append (recur (re-choice:elts re)) tail)) - ((re-empty? re) tail) ; Drop empty re's. - (else (cons re tail)))) + (cond ((re-choice? re) ; Flatten nested choices + (append (recur (re-choice:elts re)) tail)) + ((re-empty? re) tail) ; Drop empty re's. + (else (cons re tail)))) '())))) ;; If all elts are char-class re's, fold them together. (if (every static-char-class? res) @@ -254,32 +254,33 @@ dsm0))) (values from to body dsm0))))) - (? ((and (eqv? from 1) (eqv? to 1)) ; re{1,1} => re - (values body1 pre-dsm)) + (cond + ((and (eqv? from 1) (eqv? to 1)) ; re{1,1} => re + (values body1 pre-dsm)) - ((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => "" - (values re-trivial (+ (re-tsm body1) pre-dsm))) + ((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => "" + (values re-trivial (+ (re-tsm body1) pre-dsm))) - ;; re{m,n} => re-empty when m>n: - ((and (integer? from) (integer? to) (> from to)) - (values re-empty (+ (re-tsm body1) pre-dsm))) + ;; re{m,n} => re-empty when m>n: + ((and (integer? from) (integer? to) (> from to)) + (values re-empty (+ (re-tsm body1) pre-dsm))) - ;; Reduce the body = re-empty case. - ((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in) - (values (if (> from 0) re-empty re-trivial) ; (* (in)) => "" - pre-dsm)) + ;; Reduce the body = re-empty case. + ((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in) + (values (if (> from 0) re-empty re-trivial) ; (* (in)) => "" + pre-dsm)) - ;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1. - ((and (integer? from) - (or (and (integer? to) (<= from to)) (not to)) - (or (re-eos? body1) - (re-bos? body1) - (and (re-string? body1) - (string=? "" (re-string:chars body1))))) - (values body1 pre-dsm)) + ;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1. + ((and (integer? from) + (or (and (integer? to) (<= from to)) (not to)) + (or (re-eos? body1) + (re-bos? body1) + (and (re-string? body1) + (string=? "" (re-string:chars body1))))) + (values body1 pre-dsm)) - (else (values (make-re-repeat from to body1) ; general case - pre-dsm))))) + (else (values (make-re-repeat from to body1) ; general case + pre-dsm))))) @@ -410,15 +411,16 @@ ;;; Return the total number of submatches bound in RE. (define (re-tsm re) - (? ((re-seq? re) (re-seq:tsm re)) - ((re-choice? re) (re-choice:tsm re)) - ((re-repeat? re) (re-repeat:tsm re)) - ((re-dsm? re) (re-dsm:tsm re)) - ((re-submatch? re) (re-submatch:tsm re)) - ((or (re-char-set? re) (re-string? re) - (re-bos? re) (re-eos? re) - (re-bol? re) (re-eol? re)) - 0))) + (cond + ((re-seq? re) (re-seq:tsm re)) + ((re-choice? re) (re-choice:tsm re)) + ((re-repeat? re) (re-repeat:tsm re)) + ((re-dsm? re) (re-dsm:tsm re)) + ((re-submatch? re) (re-submatch:tsm re)) + ((or (re-char-set? re) (re-string? re) + (re-bos? re) (re-eos? re) + (re-bol? re) (re-eol? re)) + 0))) ;;; (flush-submatches re) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -426,19 +428,20 @@ ;;; stripped out -- (= 0 (re-tsm (flush-submatches re))). (define (flush-submatches re) - (? ((zero? (re-tsm re)) re) ; RE has no submatches. + (cond + ((zero? (re-tsm re)) re) ; RE has no submatches. - ((re-seq? re) (re-seq (map flush-submatches (re-seq:elts re)))) - ((re-choice? re) (re-choice (map flush-submatches (re-choice:elts re)))) + ((re-seq? re) (re-seq (map flush-submatches (re-seq:elts re)))) + ((re-choice? re) (re-choice (map flush-submatches (re-choice:elts re)))) - ((re-repeat? re) (re-repeat (re-repeat:from re) - (re-repeat:to re) - (flush-submatches (re-repeat:body re)))) + ((re-repeat? re) (re-repeat (re-repeat:from re) + (re-repeat:to re) + (flush-submatches (re-repeat:body re)))) - ((re-submatch? re) (flush-submatches (re-submatch:body re))) - ((re-dsm? re) (flush-submatches (re-dsm:body re))) + ((re-submatch? re) (flush-submatches (re-submatch:body re))) + ((re-dsm? re) (flush-submatches (re-dsm:body re))) - (else re))) + (else re))) ;;; Map F over ELTS. (F x) returns two values -- the "real" return value, @@ -464,54 +467,55 @@ (define (uncase re) (receive (new-re changed?) (let recur ((re re)) - (? ((re-seq? re) - (let ((elts (re-seq:elts re))) - (receive (new-elts elts-changed?) - (map/changed recur elts) - (if elts-changed? - (values (make-re-seq/tsm new-elts (re-seq:tsm re)) #t) - (values re #f))))) + (cond + ((re-seq? re) + (let ((elts (re-seq:elts re))) + (receive (new-elts elts-changed?) + (map/changed recur elts) + (if elts-changed? + (values (make-re-seq/tsm new-elts (re-seq:tsm re)) #t) + (values re #f))))) - ((re-choice? re) - (let ((elts (re-choice:elts re))) - (receive (new-elts elts-changed?) - (map/changed recur elts) - (if elts-changed? - (values (re-choice new-elts) #t) - (values re #f))))) + ((re-choice? re) + (let ((elts (re-choice:elts re))) + (receive (new-elts elts-changed?) + (map/changed recur elts) + (if elts-changed? + (values (re-choice new-elts) #t) + (values re #f))))) - ((re-char-set? re) - (let* ((cs (re-char-set:cset re)) - (new-cs (uncase-char-set cs))) ; Better not be code. - (if (char-set= cs new-cs) - (values re #f) - (values (make-re-char-set new-cs) #t)))) + ((re-char-set? re) + (let* ((cs (re-char-set:cset re)) + (new-cs (uncase-char-set cs))) ; Better not be code. + (if (char-set= cs new-cs) + (values re #f) + (values (make-re-char-set new-cs) #t)))) - ((re-repeat? re) - (receive (new-body body-changed?) (recur (re-repeat:body re)) - (if body-changed? - (values (re-repeat (re-repeat:from re) - (re-repeat:to re) - new-body) - #t) - (values re #f)))) + ((re-repeat? re) + (receive (new-body body-changed?) (recur (re-repeat:body re)) + (if body-changed? + (values (re-repeat (re-repeat:from re) + (re-repeat:to re) + new-body) + #t) + (values re #f)))) - ((re-submatch? re) - (receive (new-body body-changed?) (recur (re-submatch:body re)) - (if body-changed? - (values (make-re-submatch/tsm new-body - (re-submatch:pre-dsm re) - (re-submatch:tsm re)) - #t) - (values re #f)))) + ((re-submatch? re) + (receive (new-body body-changed?) (recur (re-submatch:body re)) + (if body-changed? + (values (make-re-submatch/tsm new-body + (re-submatch:pre-dsm re) + (re-submatch:tsm re)) + #t) + (values re #f)))) - ((re-string? re) - (let ((cf-re (uncase-string (re-string:chars re)))) - (if (re-string? cf-re) - (values re #f) - (values cf-re #t)))) + ((re-string? re) + (let ((cf-re (uncase-string (re-string:chars re)))) + (if (re-string? cf-re) + (values re #f) + (values cf-re #t)))) - (else (values re #f)))) + (else (values re #f)))) new-re)) @@ -537,11 +541,14 @@ (define (uncase-string s) ;; SEQ is a list of chars and doubleton char-sets. (let* ((seq (string-fold-right (lambda (c lis) - (cons (? ((char-lower-case? c) (char-set c (char-upcase c))) - ((char-upper-case? c) (char-set c (char-downcase c))) - (else c)) + (cons (cond + ((char-lower-case? c) + (char-set c (char-upcase c))) + ((char-upper-case? c) + (char-set c (char-downcase c))) + (else c)) lis)) - '() s)) + '() s)) ;; Coalesce adjacent chars together into a string. (fixup (lambda (chars seq) diff --git a/scsh/rx/rx-lib.scm b/scsh/rx/rx-lib.scm index 07c3fe6..500106b 100644 --- a/scsh/rx/rx-lib.scm +++ b/scsh/rx/rx-lib.scm @@ -4,24 +4,26 @@ ;;; char-set, or regexp value. Coerce one of these to a regexp value. (define (coerce-dynamic-regexp x) - (? ((string? x) (make-re-string x)) - ((char? x) (make-re-string (string x))) - ((char-set? x) (make-re-char-set x)) - ((regexp? x) x) - (else (error "Cannot coerce value to regular expression." x)))) + (cond + ((string? x) (make-re-string x)) + ((char? x) (make-re-string (string x))) + ((char-set? x) (make-re-char-set x)) + ((regexp? x) x) + (else (error "Cannot coerce value to regular expression." x)))) ;;; In a char-set context (e.g., as an operand of the SRE - operator), ;;; a , or form must be coercable to a char-set. (define (coerce-dynamic-charset x) - (? ((string? x) - (if (= 1 (string-length x)) (string->char-set x) - (error "Multi-char string not allowed as , or ,@ SRE in char-class context." - x))) - ((char? x) (char-set x)) - ((char-set? x) x) - ((re-char-set? x) (re-char-set:cset x)) - (else (error "Cannot coerce value to character set" x)))) + (cond + ((string? x) + (if (= 1 (string-length x)) (string->char-set x) + (error "Multi-char string not allowed as , or ,@ SRE in char-class context." + x))) + ((char? x) (char-set x)) + ((char-set? x) x) + ((re-char-set? x) (re-char-set:cset x)) + (else (error "Cannot coerce value to character set" x)))) (define (spec->char-set in? loose ranges) diff --git a/scsh/rx/simp.scm b/scsh/rx/simp.scm index af38923..2b2f78b 100644 --- a/scsh/rx/simp.scm +++ b/scsh/rx/simp.scm @@ -39,27 +39,28 @@ re)) (define (simp-re re) - (? ((re-string? re) (values re 0)) - ((re-seq? re) (simp-seq re)) - ((re-choice? re) (simp-choice re)) + (cond + ((re-string? re) (values re 0)) + ((re-seq? re) (simp-seq re)) + ((re-choice? re) (simp-choice re)) - ;; Singleton char-sets reduce to the character. - ;; Bear in mind the cset field might be Scheme code instead - ;; of an actual char set if the regexp is dynamic. - ((re-char-set? re) - (values (let ((cs (re-char-set:cset re))) - (if (and (char-set? cs) - (= 1 (char-set-size cs))) - (make-re-string (string (car (char-set->list cs)))) - re)) - 0)) + ;; Singleton char-sets reduce to the character. + ;; Bear in mind the cset field might be Scheme code instead + ;; of an actual char set if the regexp is dynamic. + ((re-char-set? re) + (values (let ((cs (re-char-set:cset re))) + (if (and (char-set? cs) + (= 1 (char-set-size cs))) + (make-re-string (string (car (char-set->list cs)))) + re)) + 0)) - ((re-repeat? re) (simp-repeat re)) + ((re-repeat? re) (simp-repeat re)) - ((re-submatch? re) (simp-submatch re)) - ((re-dsm? re) (simp-dsm re)) + ((re-submatch? re) (simp-submatch re)) + ((re-dsm? re) (simp-dsm re)) - (else (values re 0)))) + (else (values re 0)))) @@ -122,54 +123,56 @@ (define (simp-seq1 elts abort tsm) (let recur ((elt (car elts)) (elts (cdr elts))) (receive (elt pre-dsm) (open-dsm elt) - (? ((re-seq? elt) ; Flatten nested seqs. - (let ((sub-elts (re-seq:elts elt))) - (recur (re-dsm (car sub-elts) pre-dsm 0) - (append (cdr sub-elts) elts)))) + (cond + ((re-seq? elt) ; Flatten nested seqs. + (let ((sub-elts (re-seq:elts elt))) + (recur (re-dsm (car sub-elts) pre-dsm 0) + (append (cdr sub-elts) elts)))) - ((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty - ; (impossible) re. - ((pair? elts) - (receive (next-pre-dsm next tail) ; Simplify the tail, - (recur (car elts) (cdr elts)) ; then think about - ; the head: - ;; This guy is called when we couldn't find any other - ;; simplification. If ELT contains live submatches, then - ;; there really is nothing to be done at this step -- just - ;; assemble the pieces together and return them. If ELT - ;; *doesn't* contain any live submatches, do the same, but - ;; bubble its following next-pre-dsm submatches forwards. - (define (no-simp) - (if (has-live-submatches? elt) - (values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail)) - (values (+ pre-dsm next-pre-dsm) elt (cons next tail)))) + ((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty + ; (impossible) re. + ((pair? elts) + (receive (next-pre-dsm next tail) ; Simplify the tail, + (recur (car elts) (cdr elts)) ; then think about + ; the head: + ;; This guy is called when we couldn't find any other + ;; simplification. If ELT contains live submatches, then + ;; there really is nothing to be done at this step -- just + ;; assemble the pieces together and return them. If ELT + ;; *doesn't* contain any live submatches, do the same, but + ;; bubble its following next-pre-dsm submatches forwards. + (define (no-simp) + (if (has-live-submatches? elt) + (values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail)) + (values (+ pre-dsm next-pre-dsm) elt (cons next tail)))) - ;; Coalesces two adjacent bol's, two adjacent eol's, etc. - (define (coalesce-anchor anchor?) - (if (and (anchor? elt) (anchor? next)) - (values (+ pre-dsm next-pre-dsm) elt tail) - (no-simp))) + ;; Coalesces two adjacent bol's, two adjacent eol's, etc. + (define (coalesce-anchor anchor?) + (if (and (anchor? elt) (anchor? next)) + (values (+ pre-dsm next-pre-dsm) elt tail) + (no-simp))) - (? ((re-trivial? elt) ; Drop trivial re's. - (values (+ pre-dsm next-pre-dsm) next tail)) + (cond + ((re-trivial? elt) ; Drop trivial re's. + (values (+ pre-dsm next-pre-dsm) next tail)) - ;; Coalesce adjacent strings - ((re-string? elt) - (if (re-string? next) - (values (+ pre-dsm next-pre-dsm) - (make-re-string (string-append (re-string:chars elt) - (re-string:chars next))) - tail) - (no-simp))) + ;; Coalesce adjacent strings + ((re-string? elt) + (if (re-string? next) + (values (+ pre-dsm next-pre-dsm) + (make-re-string (string-append (re-string:chars elt) + (re-string:chars next))) + tail) + (no-simp))) - ;; Coalesce adjacent bol/eol/bos/eos's. - ((re-bol? elt) (coalesce-anchor re-bol?)) - ((re-eol? elt) (coalesce-anchor re-eol?)) - ((re-bos? elt) (coalesce-anchor re-bos?)) - ((re-eos? elt) (coalesce-anchor re-eos?)) - (else (no-simp))))) + ;; Coalesce adjacent bol/eol/bos/eos's. + ((re-bol? elt) (coalesce-anchor re-bol?)) + ((re-eol? elt) (coalesce-anchor re-eol?)) + ((re-bos? elt) (coalesce-anchor re-bos?)) + ((re-eos? elt) (coalesce-anchor re-eos?)) + (else (no-simp))))) - (else (values pre-dsm elt '())))))) + (else (values pre-dsm elt '())))))) @@ -241,11 +244,12 @@ (tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail)) (tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail)) (tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail)) - (tail (? ((zero? numchars) tail) ; Drop empty char set. - ((= 1 numchars) ; {c} => "c" - (cons (make-re-string (string (car (char-set->list cset)))) - tail)) - (else (cons (make-re-char-set cset) tail))))) + (tail (cond + ((zero? numchars) tail) ; Drop empty char set. + ((= 1 numchars) ; {c} => "c" + (cons (make-re-string (string (car (char-set->list cset)))) + tail)) + (else (cons (make-re-char-set cset) tail))))) tail)) @@ -271,32 +275,33 @@ ;; Flatten nested choices. (let ((sub-elts (re-seq:elts elt))) (receive (tail-pre-dsm cset bos? eos? bol? eol? tail) - (recur (append sub-elts elts) - prev-cset - prev-bos? prev-eos? - prev-bol? prev-eol?) + (recur (append sub-elts elts) + prev-cset + prev-bos? prev-eos? + prev-bol? prev-eol?) (values (+ pre-dsm tail-pre-dsm) cset bos? eos? bol? eol? tail))) ;; Simplify the tail, then think about the head. (receive (tail-pre-dsm cset bos? eos? bol? eol? tail) - (recur elts - (? ((and (re-string? elt) - (= 1 (string-length (re-string:chars elt)))) - (char-set-union prev-cset - (string->char-set (re-string:chars elt)))) + (recur elts + (cond + ((and (re-string? elt) + (= 1 (string-length (re-string:chars elt)))) + (char-set-union prev-cset + (string->char-set (re-string:chars elt)))) - ;; The cset might be a Scheme exp. - ((and (re-char-set? elt) - (char-set? (re-char-set:cset elt))) - (char-set-union prev-cset - (re-char-set:cset elt))) + ;; The cset might be a Scheme exp. + ((and (re-char-set? elt) + (char-set? (re-char-set:cset elt))) + (char-set-union prev-cset + (re-char-set:cset elt))) - (else prev-cset)) - (or prev-bos? (re-bos? elt)) - (or prev-eos? (re-eos? elt)) - (or prev-bol? (re-bol? elt)) - (or prev-eol? (re-eol? elt))) + (else prev-cset)) + (or prev-bos? (re-bos? elt)) + (or prev-eos? (re-eos? elt)) + (or prev-bol? (re-bol? elt)) + (or prev-eol? (re-eol? elt))) ;; This guy is called when we couldn't find any other ;; simplification. If ELT contains live submatches, then we @@ -334,29 +339,30 @@ cset bos? eos? bol? eol? (cons elt tail)))) - (? ((and (re-char-set? elt) - (char-set? (re-char-set:cset elt))) ; Might be Scheme code - (values (+ pre-dsm tail-pre-dsm) - (char-set-union cset (re-char-set:cset elt)) - bos? eos? bol? eol? tail)) + (cond + ((and (re-char-set? elt) + (char-set? (re-char-set:cset elt))) ; Might be Scheme code + (values (+ pre-dsm tail-pre-dsm) + (char-set-union cset (re-char-set:cset elt)) + bos? eos? bol? eol? tail)) - ;; Treat a singleton string "c" as a singleton set {c}. - ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) - (values (+ pre-dsm tail-pre-dsm) - (char-set-union cset (string->char-set (re-string:chars elt))) - bos? eos? bol? eol? tail)) + ;; Treat a singleton string "c" as a singleton set {c}. + ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) + (values (+ pre-dsm tail-pre-dsm) + (char-set-union cset (string->char-set (re-string:chars elt))) + bos? eos? bol? eol? tail)) - ;; Coalesce bol/eol/bos/eos's. - ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset - #t eos? bol? eol? tail)) - ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? #t bol? eol? tail)) - ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? eos? #t eol? tail)) - ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? eos? bol? #t tail)) + ;; Coalesce bol/eol/bos/eos's. + ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset + #t eos? bol? eol? tail)) + ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? #t bol? eol? tail)) + ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? eos? #t eol? tail)) + ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? eos? bol? #t tail)) - (else (no-simp))))))) + (else (no-simp))))))) (values 0 char-set:empty #f #f #f #f '())))) @@ -378,15 +384,15 @@ (define (has-live-submatches? re) (or (re-submatch? re) - (? ((re-seq? re) (every has-live-submatches? (re-seq:elts re))) - ((re-choice? re) (every has-live-submatches? (re-choice:elts re))) - ((re-repeat? re) (has-live-submatches? (re-repeat:body re))) - ((re-dsm? re) (has-live-submatches? (re-dsm:body re))) + (cond ((re-seq? re) (every has-live-submatches? (re-seq:elts re))) + ((re-choice? re) (every has-live-submatches? (re-choice:elts re))) + ((re-repeat? re) (has-live-submatches? (re-repeat:body re))) + ((re-dsm? re) (has-live-submatches? (re-dsm:body re))) - ;; If it's not one of these things, then this isn't a regexp -- it's - ;; a chunk of Scheme code producing a regexp, and we conservatively - ;; return #T -- the expression *might* produce a regexp containing - ;; a live submatch: - (else (not (or (re-char-set? re) (re-string? re) - (re-bos? re) (re-eos? re) - (re-bol? re) (re-eol? re))))))) + ;; If it's not one of these things, then this isn't a regexp -- it's + ;; a chunk of Scheme code producing a regexp, and we conservatively + ;; return #T -- the expression *might* produce a regexp containing + ;; a live submatch: + (else (not (or (re-char-set? re) (re-string? re) + (re-bos? re) (re-eos? re) + (re-bol? re) (re-eol? re)))))))