From 4290ccae21ca9a565b1c1bbed2544c302e5c6b6a Mon Sep 17 00:00:00 2001 From: sperber Date: Wed, 30 Oct 2002 12:33:51 +0000 Subject: [PATCH] Remove the Olinist CONDITITIONALS package, also fixing some atrocious uses of whitespace, newline, and indentation. This saves another 60k or so in scsh.image, actually getting us below the level of 0.6.2. --- Makefile.in | 4 - scsh/rx/cond-package.scm | 160 -------------- scsh/rx/packages.scm | 4 +- scsh/rx/parse.scm | 456 ++++++++++++++++++++------------------- scsh/rx/posixstr.scm | 203 ++++++++--------- scsh/rx/re-fold.scm | 62 +++--- scsh/rx/re-high.scm | 39 ++-- scsh/rx/re.scm | 193 +++++++++-------- scsh/rx/rx-lib.scm | 28 +-- scsh/rx/simp.scm | 234 ++++++++++---------- 10 files changed, 628 insertions(+), 755 deletions(-) delete mode 100644 scsh/rx/cond-package.scm diff --git a/Makefile.in b/Makefile.in index 02b489c..8c6bccd 100644 --- a/Makefile.in +++ b/Makefile.in @@ -797,8 +797,6 @@ SCHEME =scsh/awk.scm \ scsh/pty.scm \ scsh/rdelim.scm \ scsh/rw.scm \ - scsh/rx/packages.scm \ - scsh/rx/cond-package.scm \ scsh/scsh-condition.scm \ scsh/scsh-interfaces.scm \ scsh/scsh-package.scm \ @@ -816,7 +814,6 @@ SCHEME =scsh/awk.scm \ scsh/tty.scm \ scsh/utilities.scm \ scsh/weaktables.scm \ - scsh/rx/cond-package.scm \ scsh/rx/packages.scm \ scsh/rx/re-match-syntax.scm \ scsh/rx/rx-lib.scm \ @@ -856,7 +853,6 @@ bs: build/build-scsh-image loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \ $(srcdir)/scsh/machine/packages.scm \ $(srcdir)/scsh/rx/packages.scm \ - $(srcdir)/scsh/rx/cond-package.scm \ $(srcdir)/scsh/scsh-package.scm \ $(srcdir)/scsh/lib/ccp-pack.scm \ $(srcdir)/scsh/lib/char-package.scm diff --git a/scsh/rx/cond-package.scm b/scsh/rx/cond-package.scm deleted file mode 100644 index 1907ace..0000000 --- a/scsh/rx/cond-package.scm +++ /dev/null @@ -1,160 +0,0 @@ -(define-structure conditionals - (export (define-simple-syntax :syntax) - (when :syntax) - (unless :syntax) - (? :syntax) - (switchq :syntax) - (switch :syntax) - (prog0 :syntax) - (land* :syntax)) - (open scheme) - (begin - -;;; (define-simple-syntax (name subforms ...) expansion) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define-syntax define-simple-syntax - (syntax-rules () - ((define-simple-syntax (name subforms ...) expansion) - (define-syntax name (syntax-rules () ((name subforms ...) expansion)))))) - - -;;; ? = COND -;;; (WHEN test body ...) (SWITCHQ = key clause ...) -;;; (UNLESS test body ...) (SWITCH = key clause ...) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Handy conditional forms. ? is so short that it renders WHEN pretty -;;; much useless. - -(define-simple-syntax (when test body ...) - (if test (begin body ...))) - -(define-simple-syntax (unless test body ...) - (if (not test) (begin body ...))) - -;;; ? is synonym for COND. -(define-simple-syntax (? clause ...) (cond clause ...)) - - -;;; (PROG0 val-exp exp ...) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define-simple-syntax (prog0 val-exp exp ...) - (let ((v val-exp)) exp ... v)) - - -;;; (land* (clause ...) body ...) -*- Scheme -*- -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Evaluate each clause. If any clause returns false, land* stops and -;;; returns false. If all the clauses evaluate to a true value, return -;;; the value of the body. -;;; -;;; The difference between LAND* and AND is that LAND* binds names to -;;; the values of its clauses, which may be used by subsequent clauses. -;;; Clauses are of the form -;;; (var exp) ; binds VAR to the value of EXP. -;;; (exp) ; No binding. -;;; var ; Reference -- no binding. -;;; -;;; Example: -;;; (land* ((probe (assq key alist))) -;;; (cdr probe)) -;;; -;;; LAND* is due to Oleg Kiselyov (http://pobox.com/~oleg); I wrote this -;;; simple implementation as a high-level R5RS DEFINE-SYNTAX macro. -;;; Olin 98/9/29 - -(define-syntax land* - (syntax-rules () - ((land* () body ...) (begin body ...)) - - ((land* ((var exp) clause ...) body ...) - (let ((var exp)) (and var (land* (clause ...) body ...)))) - - ((land* ((#f exp) clause ...) body ...) - (and exp (land* (clause ...) body ...))) - - ((land* ((exp) clause ...) body ...) - (and exp (land* (clause ...) body ...))) - - ((land* (var clause ...) body ...) - (and var (land* (clause ...) body ...))))) - - - -;;; Like CASE, but you specify the key-comparison procedure. -;;; SWITCH evaluates its keys each time through the conditional. -;;; SWITCHQ keys are not evaluated -- are simply constants. -;;; (switchq string=? (vector-ref vec i) -;;; (("plus" "minus") ...) -;;; (("times" "div") ...) -;;; (else ...)) - -(define-simple-syntax (switchq compare key clause ...) - (let ((k key) ; Eval KEY and COMPARE - (c compare)) ; just once, then call %switch. - (%switchq c k clause ...))) ; C, K are vars, hence replicable. - -(define-syntax %switchq - (syntax-rules (else) - ((%switchq compare key ((key1 ...) body1 body2 ...) rest ...) - (if (or (compare key 'key1) ...) - (begin body1 body2 ...) - (%switchq compare key rest ...))) - - ((%switchq compare key ((key1 ...)) rest ...) ; Null body. - (if (not (or (compare key 'key1) ...)) - (%switchq compare key rest ...))) - - ((%switchq compare key (else body ...)) - (begin body ...)) - - ((%switchq compare key) '#f))) - - -(define-simple-syntax (switch compare key clause ...) - (let ((k key) ; Eval KEY and COMPARE - (c compare)) ; just once, then call %switch. - (%switch c k clause ...))) ; C, K are vars, hence replicable. - -(define-syntax %switch - (syntax-rules (else) - ((%switch compare key ((key1 ...) body1 body2 ...) rest ...) - (if (or (compare key key1) ...) - (begin body1 body2 ...) - (%switch compare key rest ...))) - - ((%switch compare key ((key1 ...)) rest ...) ; Null body. - (if (not (or (compare key key1) ...)) - (%switch compare key rest ...))) - - ((%switch compare key (else body ...)) - (begin body ...)) - - ((%switch compare key) '#f))) - -;;; I can't get this to work -- S48 complains "too many ...'s". -;(define-syntax switchq -; (syntax-rules (else) -; ((switchq compare key clause ...) -; (letrec-syntax ((%switchq (syntax-rules (else) -; ((%switchq compare key -; ((key1 ...) body1 body2 ...) rest ...) -; (if (or (compare key 'key1) ...) -; (begin body1 body2 ...) -; (%switchq compare key rest ...))) -; -; ; Null body. -; ((%switchq compare key ((key1 ...)) rest ...) -; (if (not (or (compare key 'key1) ...)) -; (%switchq compare key rest ...))) -; -; ((%switchq compare key (else body ...)) -; (begin body ...)) -; -; ((%switchq compare key) '#f)))) -; -; (let ((k key) ; Eval KEY and COMPARE -; (c compare)) ; just once, then call %switch. -; (%switchq c k clause ...)))))); C, K are vars, hence replicable. -)) diff --git a/scsh/rx/packages.scm b/scsh/rx/packages.scm index a2a2126..634a530 100644 --- a/scsh/rx/packages.scm +++ b/scsh/rx/packages.scm @@ -231,7 +231,6 @@ ;; re-posix-parsers ; regexp->posix-string let-opt sort ; Posix renderer - conditionals define-record-types defrec-package receiving @@ -275,7 +274,6 @@ (define-structure rx-lib rx-lib-interface (open re-internals - conditionals re-level-0 (subset srfi-1 (fold)) srfi-14 @@ -342,7 +340,7 @@ (define-structure re-folders re-folders-interface - (open re-level-0 let-opt conditionals error-package scheme) + (open re-level-0 let-opt error-package scheme) (files re-fold) ; (optimize auto-integrate) ) diff --git a/scsh/rx/parse.scm b/scsh/rx/parse.scm index d0e8faf..8c91ca6 100644 --- a/scsh/rx/parse.scm +++ b/scsh/rx/parse.scm @@ -22,8 +22,6 @@ ;;; is the char-set parsing and unparsing, which deal with ranges of ;;; characters. We assume an 8-bit ASCII superset. -;;; Imports: -;;; ? for COND, and SWITCHQ conditional form. ;;; every ;;; This code is much hairier than it would otherwise be because of the @@ -52,23 +50,24 @@ ;;; in the form of embedded code in some of the regexp's fields? (define (static-regexp? re) - (? ((re-seq? re) (every static-regexp? (re-seq:elts re))) - ((re-choice? re) (every static-regexp? (re-choice:elts re))) + (cond + ((re-seq? re) (every static-regexp? (re-seq:elts re))) + ((re-choice? re) (every static-regexp? (re-choice:elts re))) + + ((re-char-set? re) (char-set? (re-char-set:cset re))) ; Might be code. - ((re-char-set? re) (char-set? (re-char-set:cset re))) ; Might be code. + ((re-repeat? re) ; FROM & TO fields might be code. + (let ((to (re-repeat:to re))) + (and (integer? (re-repeat:from re)) + (or (not to) (integer? to)) + (static-regexp? (re-repeat:body re))))) - ((re-repeat? re) ; FROM & TO fields might be code. - (let ((to (re-repeat:to re))) - (and (integer? (re-repeat:from re)) - (or (not to) (integer? to)) - (static-regexp? (re-repeat:body re))))) + ((re-dsm? re) (static-regexp? (re-dsm:body re))) + ((re-submatch? re) (static-regexp? (re-submatch:body re))) - ((re-dsm? re) (static-regexp? (re-dsm:body re))) - ((re-submatch? re) (static-regexp? (re-submatch:body re))) - - (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not - (re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code. - (re-string? re))))) + (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not + (re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code. + (re-string? re))))) ;;; Two useful standard char sets @@ -149,153 +148,164 @@ (re-repeat from to seq) `(,(r 're-repeat) ',from ',to ,(regexp->scheme seq r))))) - (? ((char? sre) (parse-char-re sre case-sensitive? cset?)) - ((string? sre) (parse-string-re sre case-sensitive? cset?)) + (cond + ((char? sre) (parse-char-re sre case-sensitive? cset?)) + ((string? sre) (parse-string-re sre case-sensitive? cset?)) - ((c sre %bos) (non-cset) re-bos) - ((c sre %eos) (non-cset) re-eos) + ((c sre %bos) (non-cset) re-bos) + ((c sre %eos) (non-cset) re-eos) - ((c sre %bol) (non-cset) re-bol) - ((c sre %eol) (non-cset) re-eol) + ((c sre %bol) (non-cset) re-bol) + ((c sre %eol) (non-cset) re-eol) - ((pair? sre) - (let ((hygn-eq? (lambda (the-sym) (or (c (car sre) (r the-sym)) - (c (car sre) the-sym))))) - (cond - ((hygn-eq? '*) - (non-cset) - (build-re-repeat 0 #f (cdr sre))) - ((hygn-eq? '+) - (non-cset) - (build-re-repeat 1 #f (cdr sre))) - ((hygn-eq? '?) - (non-cset) - (build-re-repeat 0 1 (cdr sre))) - ((hygn-eq? '=) ; #### - (non-cset) - (let ((n (cadr sre))) - (build-re-repeat n n (cddr sre)))) - ((hygn-eq? '>=) - (non-cset) - (build-re-repeat (cadr sre) #f (cddr sre))) - ((hygn-eq? '**) - (non-cset) - (build-re-repeat (cadr sre) (caddr sre) - (cdddr sre))) + ((pair? sre) + (let ((hygn-eq? (lambda (the-sym) (or (c (car sre) (r the-sym)) + (c (car sre) the-sym))))) + (cond + ((hygn-eq? '*) + (non-cset) + (build-re-repeat 0 #f (cdr sre))) + ((hygn-eq? '+) + (non-cset) + (build-re-repeat 1 #f (cdr sre))) + ((hygn-eq? '?) + (non-cset) + (build-re-repeat 0 1 (cdr sre))) + ((hygn-eq? '=) + (non-cset) + (let ((n (cadr sre))) + (build-re-repeat n n (cddr sre)))) + ((hygn-eq? '>=) + (non-cset) + (build-re-repeat (cadr sre) #f (cddr sre))) + ((hygn-eq? '**) + (non-cset) + (build-re-repeat (cadr sre) (caddr sre) + (cdddr sre))) - ;; Choice is special wrt cset? because it's "polymorphic". - ;; Note that RE-CHOICE guarantees to construct a char-set - ;; or single-char string regexp if all of its args are char - ;; classes. - ((or (hygn-eq? '|) - (hygn-eq? 'or)) - (let ((elts (map (lambda (sre) - (recur sre case-sensitive? cset?)) - (cdr sre)))) - (if cset? - (assoc-cset-op char-set-union 'char-set-union elts r) - (re-choice elts)))) + ;; Choice is special wrt cset? because it's "polymorphic". + ;; Note that RE-CHOICE guarantees to construct a char-set + ;; or single-char string regexp if all of its args are char + ;; classes. + ((or (hygn-eq? '|) + (hygn-eq? 'or)) + (let ((elts (map (lambda (sre) + (recur sre case-sensitive? cset?)) + (cdr sre)))) + (if cset? + (assoc-cset-op char-set-union 'char-set-union elts r) + (re-choice elts)))) - ((or (hygn-eq? ':) - (hygn-eq? 'seq)) - (non-cset) (parse-seq (cdr sre))) + ((or (hygn-eq? ':) + (hygn-eq? 'seq)) + (non-cset) + (parse-seq (cdr sre))) - ((hygn-eq? 'submatch) - (non-cset) - (let ((seq (parse-seq (cdr sre)))) - (if (static-regexp? seq) - (re-submatch seq) - `(,(r 're-submatch) ,(regexp->scheme seq r))))) - ((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre)) - (cadr sre) - (caddr sre))) + ((hygn-eq? 'submatch) + (non-cset) + (let ((seq (parse-seq (cdr sre)))) + (if (static-regexp? seq) + (re-submatch seq) + `(,(r 're-submatch) ,(regexp->scheme seq r))))) + ((hygn-eq? 'dsm) + (non-cset) + (re-dsm (parse-seq (cdddr sre)) + (cadr sre) + (caddr sre))) - ;; We could be more aggressive and push the uncase op down into - ;; partially-static regexps, but enough is enough. - ((hygn-eq? 'uncase) - (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. - (if cset? + ;; We could be more aggressive and push the uncase op down into + ;; partially-static regexps, but enough is enough. + ((hygn-eq? 'uncase) + (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. + (if cset? - (if (re-char-set? re-or-cset) ; A char set or code - (uncase-char-set re-or-cset) ; producing a char set. - `(,(r 'uncase) ,re-or-cset)) + (if (re-char-set? re-or-cset) ; A char set or code + (uncase-char-set re-or-cset) ; producing a char set. + `(,(r 'uncase) ,re-or-cset)) - (if (static-regexp? re-or-cset) ; A regexp or code - (uncase re-or-cset) ; producing a regexp. - `(,(r 'uncase) - ,(regexp->scheme (simplify-regexp re-or-cset) r)))))) + (if (static-regexp? re-or-cset) ; A regexp or code + (uncase re-or-cset) ; producing a regexp. + `(,(r 'uncase) + ,(regexp->scheme (simplify-regexp re-or-cset) r)))))) - ;; These just change the lexical case-sensitivity context. - ((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f)) - ((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t)) + ;; These just change the lexical case-sensitivity context. + ((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f)) + ((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t)) - ;; , and ,@ - ((hygn-eq? 'unquote) - (let ((exp (cadr sre))) - (if cset? - `(,%coerce-dynamic-charset ,exp) - `(,%flush-submatches (,%coerce-dynamic-regexp ,exp))))) - ((hygn-eq? 'unquote-splicing) - (let ((exp (cadr sre))) - (if cset? - `(,%coerce-dynamic-charset ,exp) - `(,%coerce-dynamic-regexp ,exp)))) + ;; , and ,@ + ((hygn-eq? 'unquote) + (let ((exp (cadr sre))) + (if cset? + `(,%coerce-dynamic-charset ,exp) + `(,%flush-submatches (,%coerce-dynamic-regexp ,exp))))) + ((hygn-eq? 'unquote-splicing) + (let ((exp (cadr sre))) + (if cset? + `(,%coerce-dynamic-charset ,exp) + `(,%coerce-dynamic-regexp ,exp)))) - ((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union - (map parse-char-class (cdr sre)) - r)) - (cs (if (char-set? cs) - (char-set-complement cs) - `(,(r 'char-set-complement) ,cs)))) - (if cset? cs (make-re-char-set cs)))) + ((hygn-eq? '~) + (let* ((cs (assoc-cset-op char-set-union 'char-set-union + (map parse-char-class (cdr sre)) + r)) + (cs (if (char-set? cs) + (char-set-complement cs) + `(,(r 'char-set-complement) ,cs)))) + (if cset? cs (make-re-char-set cs)))) - ((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection - (map parse-char-class (cdr sre)) - r))) - (if cset? cs (make-re-char-set cs)))) + ((hygn-eq? '&) + (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection + (map parse-char-class (cdr sre)) + r))) + (if cset? cs (make-re-char-set cs)))) - ((hygn-eq? '-) (if (pair? (cdr sre)) - (let* ((cs1 (parse-char-class (cadr sre))) - (cs2 (assoc-cset-op char-set-union 'char-set-union - (map parse-char-class (cddr sre)) - r)) - (cs (if (and (char-set? cs1) (char-set? cs2)) - (char-set-difference cs1 cs2) - `(,(r 'char-set-difference) - ,(if (char-set? cs1) - (char-set->scheme cs1 r) - cs1) - . ,(if (char-set? cs2) - (list (char-set->scheme cs2 r)) - (cdr cs2)))))) - (if cset? cs (make-re-char-set cs))) - (error "SRE set-difference operator (- ...) requires at least one argument"))) + ((hygn-eq? '-) + (if (pair? (cdr sre)) + (let* ((cs1 (parse-char-class (cadr sre))) + (cs2 (assoc-cset-op char-set-union 'char-set-union + (map parse-char-class (cddr sre)) + r)) + (cs (if (and (char-set? cs1) (char-set? cs2)) + (char-set-difference cs1 cs2) + `(,(r 'char-set-difference) + ,(if (char-set? cs1) + (char-set->scheme cs1 r) + cs1) + . ,(if (char-set? cs2) + (list (char-set->scheme cs2 r)) + (cdr cs2)))))) + (if cset? cs (make-re-char-set cs))) + (error "SRE set-difference operator (- ...) requires at least one argument"))) - ((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) - (if cset? cset (make-re-char-set cset)))) + ((hygn-eq? '/) + (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) + (if cset? + cset + (make-re-char-set cset)))) - ((hygn-eq? 'posix-string) - (if (and (= 1 (length (cdr sre))) - (string? (cadr sre))) - (posix-string->regexp (cadr sre)) - (error "Illegal (posix-string ...) SRE body." sre))) + ((hygn-eq? 'posix-string) + (if (and (= 1 (length (cdr sre))) + (string? (cadr sre))) + (posix-string->regexp (cadr sre)) + (error "Illegal (posix-string ...) SRE body." sre))) - (else (if (every string? sre) ; A set spec -- ("wxyz"). - (let* ((cs (apply char-set-union - (map string->char-set sre))) - (cs (if case-sensitive? cs (uncase-char-set cs)))) - (if cset? cs (make-re-char-set cs))) + (else + (if (every string? sre) ; A set spec -- ("wxyz"). + (let* ((cs (apply char-set-union + (map string->char-set sre))) + (cs (if case-sensitive? cs (uncase-char-set cs)))) + (if cset? cs (make-re-char-set cs))) - (error "Illegal SRE" sre)))))) + (error "Illegal SRE" sre)))))) - ;; It must be a char-class name (ANY, ALPHABETIC, etc.) - (else - (letrec ((hygn-memq? (lambda (sym-list) - (if (null? sym-list) - #f - (or (c sre (r (car sym-list))) - (c sre (car sym-list)) - (hygn-memq? (cdr sym-list))))))) + ;; It must be a char-class name (ANY, ALPHABETIC, etc.) + (else + (letrec ((hygn-memq? (lambda (sym-list) + (if (null? sym-list) + #f + (or (c sre (r (car sym-list))) + (c sre (car sym-list)) + (hygn-memq? (cdr sym-list))))))) (let ((cs (cond ((hygn-memq? '(nonl)) nonl-chars) ((hygn-memq? '(lower-case lower)) char-set:lower-case) @@ -343,11 +353,12 @@ (define (assoc-cset-op op op-name elts r) (receive (csets code-chunks) (partition char-set? elts) (if (pair? code-chunks) - (? ((pair? csets) - `(,(r op-name) ,(char-set->scheme (apply op csets) r) - . ,code-chunks)) - ((pair? (cdr code-chunks)) `(,(r op-name) . ,code-chunks)) - (else (car code-chunks))) ; Just one. + (cond + ((pair? csets) + `(,(r op-name) ,(char-set->scheme (apply op csets) r) + . ,code-chunks)) + ((pair? (cdr code-chunks)) `(,(r op-name) . ,code-chunks)) + (else (car code-chunks))) ; Just one. (apply op csets)))) ;;; Parse a (/ ...) char-class into a character set in @@ -484,17 +495,17 @@ (if a (if biga (if space - (and one (switch char-set= cs - ((char-set:full) full) - ((nonl-chars) nonl) - ((char-set:printing) print) - ((char-set:ascii) ascii) + (and one (cond + ((char-set= cs char-set:full) full) + ((char-set= cs nonl-chars) nonl) + ((char-set= cs char-set:printing) print) + ((char-set= cs char-set:ascii) ascii) (else #f))) (if one - (switch char-set= cs - ((char-set:letter+digit) alphanum) - ((char-set:graphic) graph) - ((char-set:hex-digit) hex) + (cond + ((char-set= cs char-set:letter+digit) alphanum) + ((char-set= cs char-set:graphic) graph) + ((char-set= cs char-set:hex-digit) hex) (else #f)) (and (char-set= cs char-set:letter) alpha))) (and (char-set= cs char-set:lower-case) lower)) ; a, not A @@ -504,13 +515,13 @@ (if one (and (not space) (char-set= cs char-set:digit) num) (if space - (switch char-set= cs - ((char-set:whitespace) white) - ((char-set:blank) blank) + (cond + (( char-set= cs char-set:whitespace) white) + (( char-set= cs char-set:blank) blank) (else #f)) - (switch char-set= cs - ((char-set:punctuation) punct) - ((char-set:iso-control) ctl) + (cond + ((char-set= cs char-set:punctuation) punct) + ((char-set= cs char-set:iso-control) ctl) (else #f)))))))) @@ -525,20 +536,21 @@ 'char-set:printing 'char-set:iso-control 'char-set:hex-digit 'char-set:blank 'char-set:ascii)))) - (? ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code. - ((char-set-empty? cs) (r 'char-set:empty)) - ((try cs) => r) - ((try (char-set-complement cs)) => - (lambda (name) `(,(r 'char-set-complement) ,name))) + (cond + ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code. + ((char-set-empty? cs) (r 'char-set:empty)) + ((try cs) => r) + ((try (char-set-complement cs)) => + (lambda (name) `(,(r 'char-set-complement) ,name))) - (else - (receive (loose+ ranges+) (char-set->in-pair cs) - (receive (loose- ranges-) (char-set->in-pair (char-set-complement cs)) - (let ((makeit (r 'spec->char-set))) - (if (< (+ (length loose-) (* 12 (length ranges-))) - (+ (length loose+) (* 12 (length ranges+)))) - `(,makeit #f ,(list->string loose-) ',ranges-) - `(,makeit #t ,(list->string loose+) ',ranges+))))))))) + (else + (receive (loose+ ranges+) (char-set->in-pair cs) + (receive (loose- ranges-) (char-set->in-pair (char-set-complement cs)) + (let ((makeit (r 'spec->char-set))) + (if (< (+ (length loose-) (* 12 (length ranges-))) + (+ (length loose+) (* 12 (length ranges+)))) + `(,makeit #f ,(list->string loose-) ',ranges-) + `(,makeit #t ,(list->string loose+) ',ranges+))))))))) @@ -557,20 +569,21 @@ 'hex-digit 'blank 'ascii))) (nchars (char-set-size cs))) - (? ((zero? nchars) `(,(r '|))) - ((= 1 nchars) (apply string (char-set->list cs))) - ((try cs) => r) - ((try (char-set-complement cs)) => - (lambda (name) `(,(r '~) ,name))) - (else (receive (cs rp comp?) (char-set->in-sexp-spec cs) - (let ((args (append (? ((string=? cs "") '()) - ((= 1 (string-length cs)) `(,cs)) - (else `((,cs)))) - (if (string=? rp "") '() - (list `(,(r '/) ,rp)))))) - (if (and (= 1 (length args)) (not comp?)) - (car args) - `(,(r (if comp? '~ '|)) . ,args))))))) + (cond + ((zero? nchars) `(,(r '|))) + ((= 1 nchars) (apply string (char-set->list cs))) + ((try cs) => r) + ((try (char-set-complement cs)) => + (lambda (name) `(,(r '~) ,name))) + (else (receive (cs rp comp?) (char-set->in-sexp-spec cs) + (let ((args (append (cond ((string=? cs "") '()) + ((= 1 (string-length cs)) `(,cs)) + (else `((,cs)))) + (if (string=? rp "") '() + (list `(,(r '/) ,rp)))))) + (if (and (= 1 (length args)) (not comp?)) + (car args) + `(,(r (if comp? '~ '|)) . ,args))))))) `(,(r 'unquote) ,cs))) ; dynamic -- , @@ -592,44 +605,45 @@ (define (regexp->sre/renamer re r) (let recur ((re re)) - (? ((re-string? re) (re-string:chars re)) + (cond + ((re-string? re) (re-string:chars re)) - ((re-seq? re) `(,(r ':) . ,(regexp->sres/renamer re r))) + ((re-seq? re) `(,(r ':) . ,(regexp->sres/renamer re r))) - ((re-choice? re) - (let ((elts (re-choice:elts re)) - (%| (r '|))) - (if (pair? elts) - `(,%| . ,(map recur elts)) - (let ((tsm (re-choice:tsm re))) - (if (zero? tsm) `(,%|) `(,(r 'dsm) ,tsm 0 (,%|))))))) + ((re-choice? re) + (let ((elts (re-choice:elts re)) + (%| (r '|))) + (if (pair? elts) + `(,%| . ,(map recur elts)) + (let ((tsm (re-choice:tsm re))) + (if (zero? tsm) `(,%|) `(,(r 'dsm) ,tsm 0 (,%|))))))) - ((re-char-set? re) (char-set->sre (re-char-set:cset re) r)) + ((re-char-set? re) (char-set->sre (re-char-set:cset re) r)) - ((re-repeat? re) - (let ((from (re-repeat:from re)) - (to (re-repeat:to re)) - (bodies (regexp->sres/renamer (re-repeat:body re) r))) - (? ((and (eqv? from 0) (not to)) `(,(r '*) . ,bodies)) - ((and (eqv? from 0) (eqv? to 1)) `(,(r '?) . ,bodies)) - ((and (eqv? from 1) (not to)) `(,(r '+) . ,bodies)) - ((eqv? from to) `(,(r '=) ,to . bodies)) - (to `(,(r '**) ,from ,to . ,bodies)) - (else `(,(r '>=) ,from . ,bodies))))) + ((re-repeat? re) + (let ((from (re-repeat:from re)) + (to (re-repeat:to re)) + (bodies (regexp->sres/renamer (re-repeat:body re) r))) + (cond ((and (eqv? from 0) (not to)) `(,(r '*) . ,bodies)) + ((and (eqv? from 0) (eqv? to 1)) `(,(r '?) . ,bodies)) + ((and (eqv? from 1) (not to)) `(,(r '+) . ,bodies)) + ((eqv? from to) `(,(r '=) ,to . bodies)) + (to `(,(r '**) ,from ,to . ,bodies)) + (else `(,(r '>=) ,from . ,bodies))))) - ((re-dsm? re) - `(,(r 'dsm) ,(re-dsm:pre-dsm re) ,(re-dsm:post-dsm re) - . ,(regexp->sres/renamer (re-dsm:body re) r))) + ((re-dsm? re) + `(,(r 'dsm) ,(re-dsm:pre-dsm re) ,(re-dsm:post-dsm re) + . ,(regexp->sres/renamer (re-dsm:body re) r))) - ((re-submatch? re) - `(,(r 'submatch) . ,(regexp->sres/renamer (re-submatch:body re) r))) + ((re-submatch? re) + `(,(r 'submatch) . ,(regexp->sres/renamer (re-submatch:body re) r))) - ((re-bos? re) (r 'bos)) - ((re-eos? re) (r 'eos)) - ((re-bol? re) (r 'bol)) - ((re-eol? re) (r 'eol)) + ((re-bos? re) (r 'bos)) + ((re-eos? re) (r 'eos)) + ((re-bol? re) (r 'bol)) + ((re-eol? re) (r 'eol)) - (else re)))) ; Presumably it's code. + (else re)))) ; Presumably it's code. (define (regexp->sre re) (regexp->sre/renamer re (lambda (x) x))) diff --git a/scsh/rx/posixstr.scm b/scsh/rx/posixstr.scm index bccee2e..d3c4607 100644 --- a/scsh/rx/posixstr.scm +++ b/scsh/rx/posixstr.scm @@ -98,28 +98,29 @@ (define (translate-regexp re) - (? ((re-string? re) (translate-string (re-string:chars re))) + (cond + ((re-string? re) (translate-string (re-string:chars re))) - ((re-repeat? re) (translate-repeat re)) - ((re-choice? re) (translate-choice re)) - ((re-seq? re) (translate-seq re)) - ((re-char-set? re) (translate-char-set (re-char-set:cset re))) + ((re-repeat? re) (translate-repeat re)) + ((re-choice? re) (translate-choice re)) + ((re-seq? re) (translate-seq re)) + ((re-char-set? re) (translate-char-set (re-char-set:cset re))) - ((re-submatch? re) (translate-submatch re)) + ((re-submatch? re) (translate-submatch re)) - ((re-bos? re) (values "^" 1 0 '#())) - ((re-eos? re) (values "$" 1 0 '#())) + ((re-bos? re) (values "^" 1 0 '#())) + ((re-eos? re) (values "$" 1 0 '#())) - ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) - ((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) + ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) + ((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) - ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) - (body (re-dsm:body re))) - (translate-dsm body pre-dsm - (- (re-dsm:tsm re) - (+ pre-dsm (re-tsm body)))))) + ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) + (body (re-dsm:body re))) + (translate-dsm body pre-dsm + (- (re-dsm:tsm re) + (+ pre-dsm (re-tsm body)))))) - (else (error "Illegal regular expression" re)))) + (else (error "Illegal regular expression" re)))) ;;; Translate reloc-elt ELT = (N . RE) from a sequence or choice @@ -248,35 +249,36 @@ (body (re-repeat:body re)) (tsm (re-repeat:tsm re))) - (? ((and to (> from to)) ; Unsatisfiable - (values "[^\000-\377]" 1 0 (n-falses tsm))) + (cond + ((and to (> from to)) ; Unsatisfiable + (values "[^\000-\377]" 1 0 (n-falses tsm))) - ((and to (= from to 1)) (translate-seq body)) ; RE{1,1} => RE + ((and to (= from to 1)) (translate-seq body)) ; RE{1,1} => RE - ((and to (= to 0)) ; RE{0,0} => "" - (values "" 2 0 (n-falses tsm))) + ((and to (= to 0)) ; RE{0,0} => "" + (values "" 2 0 (n-falses tsm))) - (else ; General case - (receive (s level pcount submatches) (translate-regexp body) - (receive (s level pcount submatches) ; Coerce S to level <2. - (if (> level 1) - (values (string-append "(" s ")") - 0 - (+ pcount 1) - (mapv (lambda (i) (and i (+ i 1))) submatches)) - (values s level pcount submatches)) + (else ; General case + (receive (s level pcount submatches) (translate-regexp body) + (receive (s level pcount submatches) ; Coerce S to level <2. + (if (> level 1) + (values (string-append "(" s ")") + 0 + (+ pcount 1) + (mapv (lambda (i) (and i (+ i 1))) submatches)) + (values s level pcount submatches)) - (values (if to - (? ((and (= from 0) (= to 1)) (string-append s "?")) - ((= from to) - (string-append s "{" (number->string to) "}")) - (else - (string-append s "{" (number->string from) - "," (number->string to) "}"))) - (? ((= from 0) (string-append s "*")) - ((= from 1) (string-append s "+")) - (else (string-append s "{" (number->string from) ",}")))) - 1 pcount submatches))))))) + (values (if to + (cond ((and (= from 0) (= to 1)) (string-append s "?")) + ((= from to) + (string-append s "{" (number->string to) "}")) + (else + (string-append s "{" (number->string from) + "," (number->string to) "}"))) + (cond ((= from 0) (string-append s "*")) + ((= from 1) (string-append s "+")) + (else (string-append s "{" (number->string from) ",}")))) + 1 pcount submatches))))))) @@ -372,19 +374,20 @@ (receive (loose ranges) (char-set->in-pair cset) (hack-bracket-spec loose ranges in?))))) - (? ((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set + (cond + ((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set - ((= 1 nchars) ; Singleton set - (translate-string (string (car (char-set->list cset))))) + ((= 1 nchars) ; Singleton set + (translate-string (string (car (char-set->list cset))))) - ;; General case. Try both [...] and [^...]. - (else (let ((s- (->bracket-string cset #t)) - (s+ (->bracket-string - (char-set-delete (char-set-complement cset) *nul*) - #f))) - (values (if (< (string-length s-) (string-length s+)) - s- s+) - 1 0 '#()))))))) + ;; General case. Try both [...] and [^...]. + (else (let ((s- (->bracket-string cset #t)) + (s+ (->bracket-string + (char-set-delete (char-set-complement cset) *nul*) + #f))) + (values (if (< (string-length s-) (string-length s+)) + s- s+) + 1 0 '#()))))))) ;;; Commentary @@ -539,18 +542,19 @@ (shrink-range-finish-up end start (- end 1)))) (define (shrink-range-finish-up c start end) - (? ((> start end) (values (list (ascii->char c)) '())) ; Empty range + (cond + ((> start end) (values (list (ascii->char c)) '())) ; Empty range - ((= start end) ; Collapse singleton range. - (values (list (ascii->char c) (ascii->char start)) - '())) + ((= start end) ; Collapse singleton range. + (values (list (ascii->char c) (ascii->char start)) + '())) - ((= (+ start 1) end) ; Collapse doubleton range. - (values (list (ascii->char c) (ascii->char start) (ascii->char end)) - '())) + ((= (+ start 1) end) ; Collapse doubleton range. + (values (list (ascii->char c) (ascii->char start) (ascii->char end)) + '())) - (else (values (list (ascii->char c)) - (list (cons (ascii->char start) (ascii->char end))))))) + (else (values (list (ascii->char c)) + (list (cons (ascii->char start) (ascii->char end))))))) ;;; We assume the bracket-spec is not a singleton, not empty, and not complete. @@ -573,16 +577,16 @@ (ranges (cdr ranges))) (receive (new-loose new-ranges) (recur ranges) (receive (new-loose0 new-ranges0) - (? ((char=? #\] start) - (shrink-range-start range)) + (cond ((char=? #\] start) + (shrink-range-start range)) - ((char=? #\] end) - (shrink-range-end range)) + ((char=? #\] end) + (shrink-range-end range)) - ((char=? #\- start) - (shrink-range-start range)) + ((char=? #\- start) + (shrink-range-start range)) - (else (values '() (list range)))) + (else (values '() (list range)))) (values (append new-loose0 new-loose) (append new-ranges0 new-ranges))))) (values loose '()))) @@ -590,38 +594,39 @@ (let ((loose (sort-list loose loose<=)) ; Sort loose chars and ranges. (ranges (sort-list ranges range<))) - (? ((or (not (equal? loose0 loose)) ; Loop if anything changed. - (not (equal? ranges0 ranges))) - (lp loose ranges end-hyphen?)) + (cond + ((or (not (equal? loose0 loose)) ; Loop if anything changed. + (not (equal? ranges0 ranges))) + (lp loose ranges end-hyphen?)) - ;; If the first range opens with .=:, and the last loose char is [, - ;; shrink it out & loop. - ((and (pair? ranges) - (memv (caar ranges) '(#\. #\= #\:)) - (pair? loose) - (char=? #\[ (car (reverse loose)))) - (receive (new-loose new-ranges) - (shrink-range-start (car ranges)) - (lp (append new-loose loose) (append new-ranges (cdr ranges)) end-hyphen?))) + ;; If the first range opens with .=:, and the last loose char is [, + ;; shrink it out & loop. + ((and (pair? ranges) + (memv (caar ranges) '(#\. #\= #\:)) + (pair? loose) + (char=? #\[ (car (reverse loose)))) + (receive (new-loose new-ranges) + (shrink-range-start (car ranges)) + (lp (append new-loose loose) (append new-ranges (cdr ranges)) end-hyphen?))) - ;; If there are no loose chars, the first range begins with ^, and - ;; we're doing an IN range, shrink out the ^. - ((and in? (null? loose) (pair? ranges) (char=? #\^ (caar ranges))) - (receive (new-loose new-ranges) (shrink-range-start (car ranges)) - (lp (append new-loose loose) (append new-ranges ranges) end-hyphen?))) + ;; If there are no loose chars, the first range begins with ^, and + ;; we're doing an IN range, shrink out the ^. + ((and in? (null? loose) (pair? ranges) (char=? #\^ (caar ranges))) + (receive (new-loose new-ranges) (shrink-range-start (car ranges)) + (lp (append new-loose loose) (append new-ranges ranges) end-hyphen?))) - ;; If both [ and - are in the loose char set, - ;; pull - out as special end-hypen. - ((and (pair? loose) - (pair? (cdr loose)) - (char=? (car loose) #\[) - (char=? (car loose) #\-)) - (lp (cons (car loose) (cddr loose)) ranges #t)) + ;; If both [ and - are in the loose char set, + ;; pull - out as special end-hypen. + ((and (pair? loose) + (pair? (cdr loose)) + (char=? (car loose) #\[) + (char=? (car loose) #\-)) + (lp (cons (car loose) (cddr loose)) ranges #t)) - ;; No change! Build the answer... - (else (string-append (if in? "[" "[^") - (list->string loose) - (apply string-append - (map (lambda (r) (string (car r) #\- (cdr r))) - ranges)) - "]")))))))) + ;; No change! Build the answer... + (else (string-append (if in? "[" "[^") + (list->string loose) + (apply string-append + (map (lambda (r) (string (car r) #\- (cdr r))) + ranges)) + "]")))))))) diff --git a/scsh/rx/re-fold.scm b/scsh/rx/re-fold.scm index 2a85879..db24f80 100644 --- a/scsh/rx/re-fold.scm +++ b/scsh/rx/re-fold.scm @@ -45,14 +45,15 @@ (error "Illegal START parameter" regexp-fold re kons knil s finish start)) (let lp ((i start) (val knil)) - (? ((regexp-search re s i) => - (lambda (m) - (let ((next-i (match:end m 0))) - (if (= next-i (match:start m 0)) - (error "An empty-string regexp match has put regexp-fold into an infinite loop." - re s start next-i) - (lp next-i (kons i m val)))))) - (else (finish i val)))))) + (cond + ((regexp-search re s i) => + (lambda (m) + (let ((next-i (match:end m 0))) + (if (= next-i (match:start m 0)) + (error "An empty-string regexp match has put regexp-fold into an infinite loop." + re s start next-i) + (lp next-i (kons i m val)))))) + (else (finish i val)))))) ;;; regexp-fold-right re kons knil s [finish start] -> value ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -79,19 +80,21 @@ (error "Illegal START parameter" regexp-fold-right re kons knil s finish start)) - (? ((regexp-search re s start) => - (lambda (m) - (finish (match:start m 0) - (let recur ((last-m m)) - (? ((regexp-search re s (match:end last-m 0)) => - (lambda (m) - (let ((i (match:start m 0))) - (if (= i (match:end m 0)) - (error "An empty-string regexp match has put regexp-fold-right into an infinite loop." - re s start i) - (kons last-m i (recur m)))))) - (else (kons last-m (string-length s) knil))))))) - (else (finish (string-length s) knil))))) + (cond + ((regexp-search re s start) => + (lambda (m) + (finish (match:start m 0) + (let recur ((last-m m)) + (cond + ((regexp-search re s (match:end last-m 0)) => + (lambda (m) + (let ((i (match:start m 0))) + (if (= i (match:end m 0)) + (error "An empty-string regexp match has put regexp-fold-right into an infinite loop." + re s start i) + (kons last-m i (recur m)))))) + (else (kons last-m (string-length s) knil))))))) + (else (finish (string-length s) knil))))) ;;; regexp-for-each re proc s [start] -> unspecific ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -104,11 +107,12 @@ (if (> start (string-length s)) (apply error "Illegal START parameter" regexp-for-each re proc s start) (let lp ((i start)) - (? ((regexp-search re s i) => - (lambda (m) - (let ((next-i (match:end m 0))) - (if (= (match:start m 0) next-i) - (error "An empty-string regexp match has put regexp-for-each into an infinite loop." - re proc s start next-i)) - (proc m) - (lp next-i))))))))) + (cond + ((regexp-search re s i) => + (lambda (m) + (let ((next-i (match:end m 0))) + (if (= (match:start m 0) next-i) + (error "An empty-string regexp match has put regexp-for-each into an infinite loop." + re proc s start next-i)) + (proc m) + (lp next-i))))))))) diff --git a/scsh/rx/re-high.scm b/scsh/rx/re-high.scm index 3aa9af3..3b4bbf4 100644 --- a/scsh/rx/re-high.scm +++ b/scsh/rx/re-high.scm @@ -17,28 +17,29 @@ (set re cre) ; cache it, cre))))) ; and return it. - (? ((re-seq? re) - (check-cache re-seq:posix set-re-seq:posix)) - ((re-choice? re) - (check-cache re-choice:posix set-re-choice:posix)) - ((re-repeat? re) - (check-cache re-repeat:posix set-re-repeat:posix)) - ((re-char-set? re) - (check-cache re-char-set:posix set-re-char-set:posix)) - ((re-string? re) - (check-cache re-string:posix set-re-string:posix)) - ((re-submatch? re) - (check-cache re-submatch:posix set-re-submatch:posix)) - ((re-dsm? re) - (check-cache re-dsm:posix set-re-dsm:posix)) + (cond + ((re-seq? re) + (check-cache re-seq:posix set-re-seq:posix)) + ((re-choice? re) + (check-cache re-choice:posix set-re-choice:posix)) + ((re-repeat? re) + (check-cache re-repeat:posix set-re-repeat:posix)) + ((re-char-set? re) + (check-cache re-char-set:posix set-re-char-set:posix)) + ((re-string? re) + (check-cache re-string:posix set-re-string:posix)) + ((re-submatch? re) + (check-cache re-submatch:posix set-re-submatch:posix)) + ((re-dsm? re) + (check-cache re-dsm:posix set-re-dsm:posix)) - ((re-bos? re) (or bos-cre (set! bos-cre (compile)))) - ((re-eos? re) (or eos-cre (set! eos-cre (compile)))) + ((re-bos? re) (or bos-cre (set! bos-cre (compile)))) + ((re-eos? re) (or eos-cre (set! eos-cre (compile)))) - ((re-bol? re) (error "BOL regexp not supported in this implementation.")) - ((re-eol? re) (error "EOL regexp not supported in this implementation.")) + ((re-bol? re) (error "BOL regexp not supported in this implementation.")) + ((re-eol? re) (error "EOL regexp not supported in this implementation.")) - (else (error "compile-regexp -- not a regexp" re))))) + (else (error "compile-regexp -- not a regexp" re))))) (define bos-cre #f) (define eos-cre #f) diff --git a/scsh/rx/re.scm b/scsh/rx/re.scm index 4f6058d..df386e6 100644 --- a/scsh/rx/re.scm +++ b/scsh/rx/re.scm @@ -103,10 +103,10 @@ (if (pair? res) (let* ((re (car res)) (tail (recur (cdr res)))) - (? ((re-seq? re) ; Flatten nested seqs - (append (recur (re-seq:elts re)) tail)) - ((re-trivial? re) tail) ; Drop trivial elts - (else (cons re tail)))) + (cond ((re-seq? re) ; Flatten nested seqs + (append (recur (re-seq:elts re)) tail)) + ((re-trivial? re) tail) ; Drop trivial elts + (else (cons re tail)))) '())))) (if (pair? res) @@ -158,10 +158,10 @@ (if (pair? res) ; & drop empty re's. (let* ((re (car res)) (tail (recur (cdr res)))) - (? ((re-choice? re) ; Flatten nested choices - (append (recur (re-choice:elts re)) tail)) - ((re-empty? re) tail) ; Drop empty re's. - (else (cons re tail)))) + (cond ((re-choice? re) ; Flatten nested choices + (append (recur (re-choice:elts re)) tail)) + ((re-empty? re) tail) ; Drop empty re's. + (else (cons re tail)))) '())))) ;; If all elts are char-class re's, fold them together. (if (every static-char-class? res) @@ -254,32 +254,33 @@ dsm0))) (values from to body dsm0))))) - (? ((and (eqv? from 1) (eqv? to 1)) ; re{1,1} => re - (values body1 pre-dsm)) + (cond + ((and (eqv? from 1) (eqv? to 1)) ; re{1,1} => re + (values body1 pre-dsm)) - ((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => "" - (values re-trivial (+ (re-tsm body1) pre-dsm))) + ((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => "" + (values re-trivial (+ (re-tsm body1) pre-dsm))) - ;; re{m,n} => re-empty when m>n: - ((and (integer? from) (integer? to) (> from to)) - (values re-empty (+ (re-tsm body1) pre-dsm))) + ;; re{m,n} => re-empty when m>n: + ((and (integer? from) (integer? to) (> from to)) + (values re-empty (+ (re-tsm body1) pre-dsm))) - ;; Reduce the body = re-empty case. - ((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in) - (values (if (> from 0) re-empty re-trivial) ; (* (in)) => "" - pre-dsm)) + ;; Reduce the body = re-empty case. + ((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in) + (values (if (> from 0) re-empty re-trivial) ; (* (in)) => "" + pre-dsm)) - ;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1. - ((and (integer? from) - (or (and (integer? to) (<= from to)) (not to)) - (or (re-eos? body1) - (re-bos? body1) - (and (re-string? body1) - (string=? "" (re-string:chars body1))))) - (values body1 pre-dsm)) + ;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1. + ((and (integer? from) + (or (and (integer? to) (<= from to)) (not to)) + (or (re-eos? body1) + (re-bos? body1) + (and (re-string? body1) + (string=? "" (re-string:chars body1))))) + (values body1 pre-dsm)) - (else (values (make-re-repeat from to body1) ; general case - pre-dsm))))) + (else (values (make-re-repeat from to body1) ; general case + pre-dsm))))) @@ -410,15 +411,16 @@ ;;; Return the total number of submatches bound in RE. (define (re-tsm re) - (? ((re-seq? re) (re-seq:tsm re)) - ((re-choice? re) (re-choice:tsm re)) - ((re-repeat? re) (re-repeat:tsm re)) - ((re-dsm? re) (re-dsm:tsm re)) - ((re-submatch? re) (re-submatch:tsm re)) - ((or (re-char-set? re) (re-string? re) - (re-bos? re) (re-eos? re) - (re-bol? re) (re-eol? re)) - 0))) + (cond + ((re-seq? re) (re-seq:tsm re)) + ((re-choice? re) (re-choice:tsm re)) + ((re-repeat? re) (re-repeat:tsm re)) + ((re-dsm? re) (re-dsm:tsm re)) + ((re-submatch? re) (re-submatch:tsm re)) + ((or (re-char-set? re) (re-string? re) + (re-bos? re) (re-eos? re) + (re-bol? re) (re-eol? re)) + 0))) ;;; (flush-submatches re) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -426,19 +428,20 @@ ;;; stripped out -- (= 0 (re-tsm (flush-submatches re))). (define (flush-submatches re) - (? ((zero? (re-tsm re)) re) ; RE has no submatches. + (cond + ((zero? (re-tsm re)) re) ; RE has no submatches. - ((re-seq? re) (re-seq (map flush-submatches (re-seq:elts re)))) - ((re-choice? re) (re-choice (map flush-submatches (re-choice:elts re)))) + ((re-seq? re) (re-seq (map flush-submatches (re-seq:elts re)))) + ((re-choice? re) (re-choice (map flush-submatches (re-choice:elts re)))) - ((re-repeat? re) (re-repeat (re-repeat:from re) - (re-repeat:to re) - (flush-submatches (re-repeat:body re)))) + ((re-repeat? re) (re-repeat (re-repeat:from re) + (re-repeat:to re) + (flush-submatches (re-repeat:body re)))) - ((re-submatch? re) (flush-submatches (re-submatch:body re))) - ((re-dsm? re) (flush-submatches (re-dsm:body re))) + ((re-submatch? re) (flush-submatches (re-submatch:body re))) + ((re-dsm? re) (flush-submatches (re-dsm:body re))) - (else re))) + (else re))) ;;; Map F over ELTS. (F x) returns two values -- the "real" return value, @@ -464,54 +467,55 @@ (define (uncase re) (receive (new-re changed?) (let recur ((re re)) - (? ((re-seq? re) - (let ((elts (re-seq:elts re))) - (receive (new-elts elts-changed?) - (map/changed recur elts) - (if elts-changed? - (values (make-re-seq/tsm new-elts (re-seq:tsm re)) #t) - (values re #f))))) + (cond + ((re-seq? re) + (let ((elts (re-seq:elts re))) + (receive (new-elts elts-changed?) + (map/changed recur elts) + (if elts-changed? + (values (make-re-seq/tsm new-elts (re-seq:tsm re)) #t) + (values re #f))))) - ((re-choice? re) - (let ((elts (re-choice:elts re))) - (receive (new-elts elts-changed?) - (map/changed recur elts) - (if elts-changed? - (values (re-choice new-elts) #t) - (values re #f))))) + ((re-choice? re) + (let ((elts (re-choice:elts re))) + (receive (new-elts elts-changed?) + (map/changed recur elts) + (if elts-changed? + (values (re-choice new-elts) #t) + (values re #f))))) - ((re-char-set? re) - (let* ((cs (re-char-set:cset re)) - (new-cs (uncase-char-set cs))) ; Better not be code. - (if (char-set= cs new-cs) - (values re #f) - (values (make-re-char-set new-cs) #t)))) + ((re-char-set? re) + (let* ((cs (re-char-set:cset re)) + (new-cs (uncase-char-set cs))) ; Better not be code. + (if (char-set= cs new-cs) + (values re #f) + (values (make-re-char-set new-cs) #t)))) - ((re-repeat? re) - (receive (new-body body-changed?) (recur (re-repeat:body re)) - (if body-changed? - (values (re-repeat (re-repeat:from re) - (re-repeat:to re) - new-body) - #t) - (values re #f)))) + ((re-repeat? re) + (receive (new-body body-changed?) (recur (re-repeat:body re)) + (if body-changed? + (values (re-repeat (re-repeat:from re) + (re-repeat:to re) + new-body) + #t) + (values re #f)))) - ((re-submatch? re) - (receive (new-body body-changed?) (recur (re-submatch:body re)) - (if body-changed? - (values (make-re-submatch/tsm new-body - (re-submatch:pre-dsm re) - (re-submatch:tsm re)) - #t) - (values re #f)))) + ((re-submatch? re) + (receive (new-body body-changed?) (recur (re-submatch:body re)) + (if body-changed? + (values (make-re-submatch/tsm new-body + (re-submatch:pre-dsm re) + (re-submatch:tsm re)) + #t) + (values re #f)))) - ((re-string? re) - (let ((cf-re (uncase-string (re-string:chars re)))) - (if (re-string? cf-re) - (values re #f) - (values cf-re #t)))) + ((re-string? re) + (let ((cf-re (uncase-string (re-string:chars re)))) + (if (re-string? cf-re) + (values re #f) + (values cf-re #t)))) - (else (values re #f)))) + (else (values re #f)))) new-re)) @@ -537,11 +541,14 @@ (define (uncase-string s) ;; SEQ is a list of chars and doubleton char-sets. (let* ((seq (string-fold-right (lambda (c lis) - (cons (? ((char-lower-case? c) (char-set c (char-upcase c))) - ((char-upper-case? c) (char-set c (char-downcase c))) - (else c)) + (cons (cond + ((char-lower-case? c) + (char-set c (char-upcase c))) + ((char-upper-case? c) + (char-set c (char-downcase c))) + (else c)) lis)) - '() s)) + '() s)) ;; Coalesce adjacent chars together into a string. (fixup (lambda (chars seq) diff --git a/scsh/rx/rx-lib.scm b/scsh/rx/rx-lib.scm index 07c3fe6..500106b 100644 --- a/scsh/rx/rx-lib.scm +++ b/scsh/rx/rx-lib.scm @@ -4,24 +4,26 @@ ;;; char-set, or regexp value. Coerce one of these to a regexp value. (define (coerce-dynamic-regexp x) - (? ((string? x) (make-re-string x)) - ((char? x) (make-re-string (string x))) - ((char-set? x) (make-re-char-set x)) - ((regexp? x) x) - (else (error "Cannot coerce value to regular expression." x)))) + (cond + ((string? x) (make-re-string x)) + ((char? x) (make-re-string (string x))) + ((char-set? x) (make-re-char-set x)) + ((regexp? x) x) + (else (error "Cannot coerce value to regular expression." x)))) ;;; In a char-set context (e.g., as an operand of the SRE - operator), ;;; a , or form must be coercable to a char-set. (define (coerce-dynamic-charset x) - (? ((string? x) - (if (= 1 (string-length x)) (string->char-set x) - (error "Multi-char string not allowed as , or ,@ SRE in char-class context." - x))) - ((char? x) (char-set x)) - ((char-set? x) x) - ((re-char-set? x) (re-char-set:cset x)) - (else (error "Cannot coerce value to character set" x)))) + (cond + ((string? x) + (if (= 1 (string-length x)) (string->char-set x) + (error "Multi-char string not allowed as , or ,@ SRE in char-class context." + x))) + ((char? x) (char-set x)) + ((char-set? x) x) + ((re-char-set? x) (re-char-set:cset x)) + (else (error "Cannot coerce value to character set" x)))) (define (spec->char-set in? loose ranges) diff --git a/scsh/rx/simp.scm b/scsh/rx/simp.scm index af38923..2b2f78b 100644 --- a/scsh/rx/simp.scm +++ b/scsh/rx/simp.scm @@ -39,27 +39,28 @@ re)) (define (simp-re re) - (? ((re-string? re) (values re 0)) - ((re-seq? re) (simp-seq re)) - ((re-choice? re) (simp-choice re)) + (cond + ((re-string? re) (values re 0)) + ((re-seq? re) (simp-seq re)) + ((re-choice? re) (simp-choice re)) - ;; Singleton char-sets reduce to the character. - ;; Bear in mind the cset field might be Scheme code instead - ;; of an actual char set if the regexp is dynamic. - ((re-char-set? re) - (values (let ((cs (re-char-set:cset re))) - (if (and (char-set? cs) - (= 1 (char-set-size cs))) - (make-re-string (string (car (char-set->list cs)))) - re)) - 0)) + ;; Singleton char-sets reduce to the character. + ;; Bear in mind the cset field might be Scheme code instead + ;; of an actual char set if the regexp is dynamic. + ((re-char-set? re) + (values (let ((cs (re-char-set:cset re))) + (if (and (char-set? cs) + (= 1 (char-set-size cs))) + (make-re-string (string (car (char-set->list cs)))) + re)) + 0)) - ((re-repeat? re) (simp-repeat re)) + ((re-repeat? re) (simp-repeat re)) - ((re-submatch? re) (simp-submatch re)) - ((re-dsm? re) (simp-dsm re)) + ((re-submatch? re) (simp-submatch re)) + ((re-dsm? re) (simp-dsm re)) - (else (values re 0)))) + (else (values re 0)))) @@ -122,54 +123,56 @@ (define (simp-seq1 elts abort tsm) (let recur ((elt (car elts)) (elts (cdr elts))) (receive (elt pre-dsm) (open-dsm elt) - (? ((re-seq? elt) ; Flatten nested seqs. - (let ((sub-elts (re-seq:elts elt))) - (recur (re-dsm (car sub-elts) pre-dsm 0) - (append (cdr sub-elts) elts)))) + (cond + ((re-seq? elt) ; Flatten nested seqs. + (let ((sub-elts (re-seq:elts elt))) + (recur (re-dsm (car sub-elts) pre-dsm 0) + (append (cdr sub-elts) elts)))) - ((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty - ; (impossible) re. - ((pair? elts) - (receive (next-pre-dsm next tail) ; Simplify the tail, - (recur (car elts) (cdr elts)) ; then think about - ; the head: - ;; This guy is called when we couldn't find any other - ;; simplification. If ELT contains live submatches, then - ;; there really is nothing to be done at this step -- just - ;; assemble the pieces together and return them. If ELT - ;; *doesn't* contain any live submatches, do the same, but - ;; bubble its following next-pre-dsm submatches forwards. - (define (no-simp) - (if (has-live-submatches? elt) - (values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail)) - (values (+ pre-dsm next-pre-dsm) elt (cons next tail)))) + ((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty + ; (impossible) re. + ((pair? elts) + (receive (next-pre-dsm next tail) ; Simplify the tail, + (recur (car elts) (cdr elts)) ; then think about + ; the head: + ;; This guy is called when we couldn't find any other + ;; simplification. If ELT contains live submatches, then + ;; there really is nothing to be done at this step -- just + ;; assemble the pieces together and return them. If ELT + ;; *doesn't* contain any live submatches, do the same, but + ;; bubble its following next-pre-dsm submatches forwards. + (define (no-simp) + (if (has-live-submatches? elt) + (values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail)) + (values (+ pre-dsm next-pre-dsm) elt (cons next tail)))) - ;; Coalesces two adjacent bol's, two adjacent eol's, etc. - (define (coalesce-anchor anchor?) - (if (and (anchor? elt) (anchor? next)) - (values (+ pre-dsm next-pre-dsm) elt tail) - (no-simp))) + ;; Coalesces two adjacent bol's, two adjacent eol's, etc. + (define (coalesce-anchor anchor?) + (if (and (anchor? elt) (anchor? next)) + (values (+ pre-dsm next-pre-dsm) elt tail) + (no-simp))) - (? ((re-trivial? elt) ; Drop trivial re's. - (values (+ pre-dsm next-pre-dsm) next tail)) + (cond + ((re-trivial? elt) ; Drop trivial re's. + (values (+ pre-dsm next-pre-dsm) next tail)) - ;; Coalesce adjacent strings - ((re-string? elt) - (if (re-string? next) - (values (+ pre-dsm next-pre-dsm) - (make-re-string (string-append (re-string:chars elt) - (re-string:chars next))) - tail) - (no-simp))) + ;; Coalesce adjacent strings + ((re-string? elt) + (if (re-string? next) + (values (+ pre-dsm next-pre-dsm) + (make-re-string (string-append (re-string:chars elt) + (re-string:chars next))) + tail) + (no-simp))) - ;; Coalesce adjacent bol/eol/bos/eos's. - ((re-bol? elt) (coalesce-anchor re-bol?)) - ((re-eol? elt) (coalesce-anchor re-eol?)) - ((re-bos? elt) (coalesce-anchor re-bos?)) - ((re-eos? elt) (coalesce-anchor re-eos?)) - (else (no-simp))))) + ;; Coalesce adjacent bol/eol/bos/eos's. + ((re-bol? elt) (coalesce-anchor re-bol?)) + ((re-eol? elt) (coalesce-anchor re-eol?)) + ((re-bos? elt) (coalesce-anchor re-bos?)) + ((re-eos? elt) (coalesce-anchor re-eos?)) + (else (no-simp))))) - (else (values pre-dsm elt '())))))) + (else (values pre-dsm elt '())))))) @@ -241,11 +244,12 @@ (tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail)) (tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail)) (tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail)) - (tail (? ((zero? numchars) tail) ; Drop empty char set. - ((= 1 numchars) ; {c} => "c" - (cons (make-re-string (string (car (char-set->list cset)))) - tail)) - (else (cons (make-re-char-set cset) tail))))) + (tail (cond + ((zero? numchars) tail) ; Drop empty char set. + ((= 1 numchars) ; {c} => "c" + (cons (make-re-string (string (car (char-set->list cset)))) + tail)) + (else (cons (make-re-char-set cset) tail))))) tail)) @@ -271,32 +275,33 @@ ;; Flatten nested choices. (let ((sub-elts (re-seq:elts elt))) (receive (tail-pre-dsm cset bos? eos? bol? eol? tail) - (recur (append sub-elts elts) - prev-cset - prev-bos? prev-eos? - prev-bol? prev-eol?) + (recur (append sub-elts elts) + prev-cset + prev-bos? prev-eos? + prev-bol? prev-eol?) (values (+ pre-dsm tail-pre-dsm) cset bos? eos? bol? eol? tail))) ;; Simplify the tail, then think about the head. (receive (tail-pre-dsm cset bos? eos? bol? eol? tail) - (recur elts - (? ((and (re-string? elt) - (= 1 (string-length (re-string:chars elt)))) - (char-set-union prev-cset - (string->char-set (re-string:chars elt)))) + (recur elts + (cond + ((and (re-string? elt) + (= 1 (string-length (re-string:chars elt)))) + (char-set-union prev-cset + (string->char-set (re-string:chars elt)))) - ;; The cset might be a Scheme exp. - ((and (re-char-set? elt) - (char-set? (re-char-set:cset elt))) - (char-set-union prev-cset - (re-char-set:cset elt))) + ;; The cset might be a Scheme exp. + ((and (re-char-set? elt) + (char-set? (re-char-set:cset elt))) + (char-set-union prev-cset + (re-char-set:cset elt))) - (else prev-cset)) - (or prev-bos? (re-bos? elt)) - (or prev-eos? (re-eos? elt)) - (or prev-bol? (re-bol? elt)) - (or prev-eol? (re-eol? elt))) + (else prev-cset)) + (or prev-bos? (re-bos? elt)) + (or prev-eos? (re-eos? elt)) + (or prev-bol? (re-bol? elt)) + (or prev-eol? (re-eol? elt))) ;; This guy is called when we couldn't find any other ;; simplification. If ELT contains live submatches, then we @@ -334,29 +339,30 @@ cset bos? eos? bol? eol? (cons elt tail)))) - (? ((and (re-char-set? elt) - (char-set? (re-char-set:cset elt))) ; Might be Scheme code - (values (+ pre-dsm tail-pre-dsm) - (char-set-union cset (re-char-set:cset elt)) - bos? eos? bol? eol? tail)) + (cond + ((and (re-char-set? elt) + (char-set? (re-char-set:cset elt))) ; Might be Scheme code + (values (+ pre-dsm tail-pre-dsm) + (char-set-union cset (re-char-set:cset elt)) + bos? eos? bol? eol? tail)) - ;; Treat a singleton string "c" as a singleton set {c}. - ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) - (values (+ pre-dsm tail-pre-dsm) - (char-set-union cset (string->char-set (re-string:chars elt))) - bos? eos? bol? eol? tail)) + ;; Treat a singleton string "c" as a singleton set {c}. + ((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) + (values (+ pre-dsm tail-pre-dsm) + (char-set-union cset (string->char-set (re-string:chars elt))) + bos? eos? bol? eol? tail)) - ;; Coalesce bol/eol/bos/eos's. - ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset - #t eos? bol? eol? tail)) - ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? #t bol? eol? tail)) - ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? eos? #t eol? tail)) - ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset - bos? eos? bol? #t tail)) + ;; Coalesce bol/eol/bos/eos's. + ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset + #t eos? bol? eol? tail)) + ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? #t bol? eol? tail)) + ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? eos? #t eol? tail)) + ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset + bos? eos? bol? #t tail)) - (else (no-simp))))))) + (else (no-simp))))))) (values 0 char-set:empty #f #f #f #f '())))) @@ -378,15 +384,15 @@ (define (has-live-submatches? re) (or (re-submatch? re) - (? ((re-seq? re) (every has-live-submatches? (re-seq:elts re))) - ((re-choice? re) (every has-live-submatches? (re-choice:elts re))) - ((re-repeat? re) (has-live-submatches? (re-repeat:body re))) - ((re-dsm? re) (has-live-submatches? (re-dsm:body re))) + (cond ((re-seq? re) (every has-live-submatches? (re-seq:elts re))) + ((re-choice? re) (every has-live-submatches? (re-choice:elts re))) + ((re-repeat? re) (has-live-submatches? (re-repeat:body re))) + ((re-dsm? re) (has-live-submatches? (re-dsm:body re))) - ;; If it's not one of these things, then this isn't a regexp -- it's - ;; a chunk of Scheme code producing a regexp, and we conservatively - ;; return #T -- the expression *might* produce a regexp containing - ;; a live submatch: - (else (not (or (re-char-set? re) (re-string? re) - (re-bos? re) (re-eos? re) - (re-bol? re) (re-eol? re))))))) + ;; If it's not one of these things, then this isn't a regexp -- it's + ;; a chunk of Scheme code producing a regexp, and we conservatively + ;; return #T -- the expression *might* produce a regexp containing + ;; a live submatch: + (else (not (or (re-char-set? re) (re-string? re) + (re-bos? re) (re-eos? re) + (re-bol? re) (re-eol? re)))))))