Remove the Olinist CONDITITIONALS package, also fixing some atrocious

uses of whitespace, newline, and indentation.
This saves another 60k or so in scsh.image, actually getting us below
the level of 0.6.2.
This commit is contained in:
sperber 2002-10-30 12:33:51 +00:00
parent 20b65e6bfa
commit 4290ccae21
10 changed files with 628 additions and 755 deletions

View File

@ -797,8 +797,6 @@ SCHEME =scsh/awk.scm \
scsh/pty.scm \ scsh/pty.scm \
scsh/rdelim.scm \ scsh/rdelim.scm \
scsh/rw.scm \ scsh/rw.scm \
scsh/rx/packages.scm \
scsh/rx/cond-package.scm \
scsh/scsh-condition.scm \ scsh/scsh-condition.scm \
scsh/scsh-interfaces.scm \ scsh/scsh-interfaces.scm \
scsh/scsh-package.scm \ scsh/scsh-package.scm \
@ -816,7 +814,6 @@ SCHEME =scsh/awk.scm \
scsh/tty.scm \ scsh/tty.scm \
scsh/utilities.scm \ scsh/utilities.scm \
scsh/weaktables.scm \ scsh/weaktables.scm \
scsh/rx/cond-package.scm \
scsh/rx/packages.scm \ scsh/rx/packages.scm \
scsh/rx/re-match-syntax.scm \ scsh/rx/re-match-syntax.scm \
scsh/rx/rx-lib.scm \ scsh/rx/rx-lib.scm \
@ -856,7 +853,6 @@ bs: build/build-scsh-image
loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \ loads = $(srcdir)/scsh/let-opt.scm $(srcdir)/scsh/scsh-interfaces.scm \
$(srcdir)/scsh/machine/packages.scm \ $(srcdir)/scsh/machine/packages.scm \
$(srcdir)/scsh/rx/packages.scm \ $(srcdir)/scsh/rx/packages.scm \
$(srcdir)/scsh/rx/cond-package.scm \
$(srcdir)/scsh/scsh-package.scm \ $(srcdir)/scsh/scsh-package.scm \
$(srcdir)/scsh/lib/ccp-pack.scm \ $(srcdir)/scsh/lib/ccp-pack.scm \
$(srcdir)/scsh/lib/char-package.scm $(srcdir)/scsh/lib/char-package.scm

View File

@ -1,160 +0,0 @@
(define-structure conditionals
(export (define-simple-syntax :syntax)
(when :syntax)
(unless :syntax)
(? :syntax)
(switchq :syntax)
(switch :syntax)
(prog0 :syntax)
(land* :syntax))
(open scheme)
(begin
;;; (define-simple-syntax (name subforms ...) expansion)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-syntax define-simple-syntax
(syntax-rules ()
((define-simple-syntax (name subforms ...) expansion)
(define-syntax name (syntax-rules () ((name subforms ...) expansion))))))
;;; ? = COND
;;; (WHEN test body ...) (SWITCHQ = key clause ...)
;;; (UNLESS test body ...) (SWITCH = key clause ...)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Handy conditional forms. ? is so short that it renders WHEN pretty
;;; much useless.
(define-simple-syntax (when test body ...)
(if test (begin body ...)))
(define-simple-syntax (unless test body ...)
(if (not test) (begin body ...)))
;;; ? is synonym for COND.
(define-simple-syntax (? clause ...) (cond clause ...))
;;; (PROG0 val-exp exp ...)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-simple-syntax (prog0 val-exp exp ...)
(let ((v val-exp)) exp ... v))
;;; (land* (clause ...) body ...) -*- Scheme -*-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Evaluate each clause. If any clause returns false, land* stops and
;;; returns false. If all the clauses evaluate to a true value, return
;;; the value of the body.
;;;
;;; The difference between LAND* and AND is that LAND* binds names to
;;; the values of its clauses, which may be used by subsequent clauses.
;;; Clauses are of the form
;;; (var exp) ; binds VAR to the value of EXP.
;;; (exp) ; No binding.
;;; var ; Reference -- no binding.
;;;
;;; Example:
;;; (land* ((probe (assq key alist)))
;;; (cdr probe))
;;;
;;; LAND* is due to Oleg Kiselyov (http://pobox.com/~oleg); I wrote this
;;; simple implementation as a high-level R5RS DEFINE-SYNTAX macro.
;;; Olin 98/9/29
(define-syntax land*
(syntax-rules ()
((land* () body ...) (begin body ...))
((land* ((var exp) clause ...) body ...)
(let ((var exp)) (and var (land* (clause ...) body ...))))
((land* ((#f exp) clause ...) body ...)
(and exp (land* (clause ...) body ...)))
((land* ((exp) clause ...) body ...)
(and exp (land* (clause ...) body ...)))
((land* (var clause ...) body ...)
(and var (land* (clause ...) body ...)))))
;;; Like CASE, but you specify the key-comparison procedure.
;;; SWITCH evaluates its keys each time through the conditional.
;;; SWITCHQ keys are not evaluated -- are simply constants.
;;; (switchq string=? (vector-ref vec i)
;;; (("plus" "minus") ...)
;;; (("times" "div") ...)
;;; (else ...))
(define-simple-syntax (switchq compare key clause ...)
(let ((k key) ; Eval KEY and COMPARE
(c compare)) ; just once, then call %switch.
(%switchq c k clause ...))) ; C, K are vars, hence replicable.
(define-syntax %switchq
(syntax-rules (else)
((%switchq compare key ((key1 ...) body1 body2 ...) rest ...)
(if (or (compare key 'key1) ...)
(begin body1 body2 ...)
(%switchq compare key rest ...)))
((%switchq compare key ((key1 ...)) rest ...) ; Null body.
(if (not (or (compare key 'key1) ...))
(%switchq compare key rest ...)))
((%switchq compare key (else body ...))
(begin body ...))
((%switchq compare key) '#f)))
(define-simple-syntax (switch compare key clause ...)
(let ((k key) ; Eval KEY and COMPARE
(c compare)) ; just once, then call %switch.
(%switch c k clause ...))) ; C, K are vars, hence replicable.
(define-syntax %switch
(syntax-rules (else)
((%switch compare key ((key1 ...) body1 body2 ...) rest ...)
(if (or (compare key key1) ...)
(begin body1 body2 ...)
(%switch compare key rest ...)))
((%switch compare key ((key1 ...)) rest ...) ; Null body.
(if (not (or (compare key key1) ...))
(%switch compare key rest ...)))
((%switch compare key (else body ...))
(begin body ...))
((%switch compare key) '#f)))
;;; I can't get this to work -- S48 complains "too many ...'s".
;(define-syntax switchq
; (syntax-rules (else)
; ((switchq compare key clause ...)
; (letrec-syntax ((%switchq (syntax-rules (else)
; ((%switchq compare key
; ((key1 ...) body1 body2 ...) rest ...)
; (if (or (compare key 'key1) ...)
; (begin body1 body2 ...)
; (%switchq compare key rest ...)))
;
; ; Null body.
; ((%switchq compare key ((key1 ...)) rest ...)
; (if (not (or (compare key 'key1) ...))
; (%switchq compare key rest ...)))
;
; ((%switchq compare key (else body ...))
; (begin body ...))
;
; ((%switchq compare key) '#f))))
;
; (let ((k key) ; Eval KEY and COMPARE
; (c compare)) ; just once, then call %switch.
; (%switchq c k clause ...)))))); C, K are vars, hence replicable.
))

View File

@ -231,7 +231,6 @@
;; re-posix-parsers ; regexp->posix-string ;; re-posix-parsers ; regexp->posix-string
let-opt let-opt
sort ; Posix renderer sort ; Posix renderer
conditionals
define-record-types define-record-types
defrec-package defrec-package
receiving receiving
@ -275,7 +274,6 @@
(define-structure rx-lib rx-lib-interface (define-structure rx-lib rx-lib-interface
(open re-internals (open re-internals
conditionals
re-level-0 re-level-0
(subset srfi-1 (fold)) (subset srfi-1 (fold))
srfi-14 srfi-14
@ -342,7 +340,7 @@
(define-structure re-folders re-folders-interface (define-structure re-folders re-folders-interface
(open re-level-0 let-opt conditionals error-package scheme) (open re-level-0 let-opt error-package scheme)
(files re-fold) (files re-fold)
; (optimize auto-integrate) ; (optimize auto-integrate)
) )

View File

@ -22,8 +22,6 @@
;;; is the char-set parsing and unparsing, which deal with ranges of ;;; is the char-set parsing and unparsing, which deal with ranges of
;;; characters. We assume an 8-bit ASCII superset. ;;; characters. We assume an 8-bit ASCII superset.
;;; Imports:
;;; ? for COND, and SWITCHQ conditional form.
;;; every ;;; every
;;; This code is much hairier than it would otherwise be because of the ;;; This code is much hairier than it would otherwise be because of the
@ -52,23 +50,24 @@
;;; in the form of embedded code in some of the regexp's fields? ;;; in the form of embedded code in some of the regexp's fields?
(define (static-regexp? re) (define (static-regexp? re)
(? ((re-seq? re) (every static-regexp? (re-seq:elts re))) (cond
((re-choice? re) (every static-regexp? (re-choice:elts re))) ((re-seq? re) (every static-regexp? (re-seq:elts re)))
((re-choice? re) (every static-regexp? (re-choice:elts re)))
((re-char-set? re) (char-set? (re-char-set:cset re))) ; Might be code.
((re-char-set? re) (char-set? (re-char-set:cset re))) ; Might be code. ((re-repeat? re) ; FROM & TO fields might be code.
(let ((to (re-repeat:to re)))
(and (integer? (re-repeat:from re))
(or (not to) (integer? to))
(static-regexp? (re-repeat:body re)))))
((re-repeat? re) ; FROM & TO fields might be code. ((re-dsm? re) (static-regexp? (re-dsm:body re)))
(let ((to (re-repeat:to re))) ((re-submatch? re) (static-regexp? (re-submatch:body re)))
(and (integer? (re-repeat:from re))
(or (not to) (integer? to))
(static-regexp? (re-repeat:body re)))))
((re-dsm? re) (static-regexp? (re-dsm:body re))) (else (or (re-bos? re) (re-eos? re) ; Otw, if it's not
((re-submatch? re) (static-regexp? (re-submatch:body re))) (re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code.
(re-string? re)))))
(else (or (re-bos? re) (re-eos? re) ; Otw, if it's not
(re-bol? re) (re-eol? re) ; one of these, ; then it's Scheme code.
(re-string? re)))))
;;; Two useful standard char sets ;;; Two useful standard char sets
@ -149,153 +148,164 @@
(re-repeat from to seq) (re-repeat from to seq)
`(,(r 're-repeat) ',from ',to ,(regexp->scheme seq r))))) `(,(r 're-repeat) ',from ',to ,(regexp->scheme seq r)))))
(? ((char? sre) (parse-char-re sre case-sensitive? cset?)) (cond
((string? sre) (parse-string-re sre case-sensitive? cset?)) ((char? sre) (parse-char-re sre case-sensitive? cset?))
((string? sre) (parse-string-re sre case-sensitive? cset?))
((c sre %bos) (non-cset) re-bos) ((c sre %bos) (non-cset) re-bos)
((c sre %eos) (non-cset) re-eos) ((c sre %eos) (non-cset) re-eos)
((c sre %bol) (non-cset) re-bol) ((c sre %bol) (non-cset) re-bol)
((c sre %eol) (non-cset) re-eol) ((c sre %eol) (non-cset) re-eol)
((pair? sre) ((pair? sre)
(let ((hygn-eq? (lambda (the-sym) (or (c (car sre) (r the-sym)) (let ((hygn-eq? (lambda (the-sym) (or (c (car sre) (r the-sym))
(c (car sre) the-sym))))) (c (car sre) the-sym)))))
(cond (cond
((hygn-eq? '*) ((hygn-eq? '*)
(non-cset) (non-cset)
(build-re-repeat 0 #f (cdr sre))) (build-re-repeat 0 #f (cdr sre)))
((hygn-eq? '+) ((hygn-eq? '+)
(non-cset) (non-cset)
(build-re-repeat 1 #f (cdr sre))) (build-re-repeat 1 #f (cdr sre)))
((hygn-eq? '?) ((hygn-eq? '?)
(non-cset) (non-cset)
(build-re-repeat 0 1 (cdr sre))) (build-re-repeat 0 1 (cdr sre)))
((hygn-eq? '=) ; #### ((hygn-eq? '=)
(non-cset) (non-cset)
(let ((n (cadr sre))) (let ((n (cadr sre)))
(build-re-repeat n n (cddr sre)))) (build-re-repeat n n (cddr sre))))
((hygn-eq? '>=) ((hygn-eq? '>=)
(non-cset) (non-cset)
(build-re-repeat (cadr sre) #f (cddr sre))) (build-re-repeat (cadr sre) #f (cddr sre)))
((hygn-eq? '**) ((hygn-eq? '**)
(non-cset) (non-cset)
(build-re-repeat (cadr sre) (caddr sre) (build-re-repeat (cadr sre) (caddr sre)
(cdddr sre))) (cdddr sre)))
;; Choice is special wrt cset? because it's "polymorphic". ;; Choice is special wrt cset? because it's "polymorphic".
;; Note that RE-CHOICE guarantees to construct a char-set ;; Note that RE-CHOICE guarantees to construct a char-set
;; or single-char string regexp if all of its args are char ;; or single-char string regexp if all of its args are char
;; classes. ;; classes.
((or (hygn-eq? '|) ((or (hygn-eq? '|)
(hygn-eq? 'or)) (hygn-eq? 'or))
(let ((elts (map (lambda (sre) (let ((elts (map (lambda (sre)
(recur sre case-sensitive? cset?)) (recur sre case-sensitive? cset?))
(cdr sre)))) (cdr sre))))
(if cset? (if cset?
(assoc-cset-op char-set-union 'char-set-union elts r) (assoc-cset-op char-set-union 'char-set-union elts r)
(re-choice elts)))) (re-choice elts))))
((or (hygn-eq? ':) ((or (hygn-eq? ':)
(hygn-eq? 'seq)) (hygn-eq? 'seq))
(non-cset) (parse-seq (cdr sre))) (non-cset)
(parse-seq (cdr sre)))
((hygn-eq? 'submatch) ((hygn-eq? 'submatch)
(non-cset) (non-cset)
(let ((seq (parse-seq (cdr sre)))) (let ((seq (parse-seq (cdr sre))))
(if (static-regexp? seq) (if (static-regexp? seq)
(re-submatch seq) (re-submatch seq)
`(,(r 're-submatch) ,(regexp->scheme seq r))))) `(,(r 're-submatch) ,(regexp->scheme seq r)))))
((hygn-eq? 'dsm) (non-cset) (re-dsm (parse-seq (cdddr sre)) ((hygn-eq? 'dsm)
(cadr sre) (non-cset)
(caddr sre))) (re-dsm (parse-seq (cdddr sre))
(cadr sre)
(caddr sre)))
;; We could be more aggressive and push the uncase op down into ;; We could be more aggressive and push the uncase op down into
;; partially-static regexps, but enough is enough. ;; partially-static regexps, but enough is enough.
((hygn-eq? 'uncase) ((hygn-eq? 'uncase)
(let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?. (let ((re-or-cset (parse-seq (cdr sre)))) ; Depending on CSET?.
(if cset? (if cset?
(if (re-char-set? re-or-cset) ; A char set or code (if (re-char-set? re-or-cset) ; A char set or code
(uncase-char-set re-or-cset) ; producing a char set. (uncase-char-set re-or-cset) ; producing a char set.
`(,(r 'uncase) ,re-or-cset)) `(,(r 'uncase) ,re-or-cset))
(if (static-regexp? re-or-cset) ; A regexp or code (if (static-regexp? re-or-cset) ; A regexp or code
(uncase re-or-cset) ; producing a regexp. (uncase re-or-cset) ; producing a regexp.
`(,(r 'uncase) `(,(r 'uncase)
,(regexp->scheme (simplify-regexp re-or-cset) r)))))) ,(regexp->scheme (simplify-regexp re-or-cset) r))))))
;; These just change the lexical case-sensitivity context. ;; These just change the lexical case-sensitivity context.
((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f)) ((hygn-eq? 'w/nocase) (parse-seq/context (cdr sre) #f))
((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t)) ((hygn-eq? 'w/case) (parse-seq/context (cdr sre) #t))
;; ,<exp> and ,@<exp> ;; ,<exp> and ,@<exp>
((hygn-eq? 'unquote) ((hygn-eq? 'unquote)
(let ((exp (cadr sre))) (let ((exp (cadr sre)))
(if cset? (if cset?
`(,%coerce-dynamic-charset ,exp) `(,%coerce-dynamic-charset ,exp)
`(,%flush-submatches (,%coerce-dynamic-regexp ,exp))))) `(,%flush-submatches (,%coerce-dynamic-regexp ,exp)))))
((hygn-eq? 'unquote-splicing) ((hygn-eq? 'unquote-splicing)
(let ((exp (cadr sre))) (let ((exp (cadr sre)))
(if cset? (if cset?
`(,%coerce-dynamic-charset ,exp) `(,%coerce-dynamic-charset ,exp)
`(,%coerce-dynamic-regexp ,exp)))) `(,%coerce-dynamic-regexp ,exp))))
((hygn-eq? '~) (let* ((cs (assoc-cset-op char-set-union 'char-set-union ((hygn-eq? '~)
(map parse-char-class (cdr sre)) (let* ((cs (assoc-cset-op char-set-union 'char-set-union
r)) (map parse-char-class (cdr sre))
(cs (if (char-set? cs) r))
(char-set-complement cs) (cs (if (char-set? cs)
`(,(r 'char-set-complement) ,cs)))) (char-set-complement cs)
(if cset? cs (make-re-char-set cs)))) `(,(r 'char-set-complement) ,cs))))
(if cset? cs (make-re-char-set cs))))
((hygn-eq? '&) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection ((hygn-eq? '&)
(map parse-char-class (cdr sre)) (let ((cs (assoc-cset-op char-set-intersection 'char-set-intersection
r))) (map parse-char-class (cdr sre))
(if cset? cs (make-re-char-set cs)))) r)))
(if cset? cs (make-re-char-set cs))))
((hygn-eq? '-) (if (pair? (cdr sre)) ((hygn-eq? '-)
(let* ((cs1 (parse-char-class (cadr sre))) (if (pair? (cdr sre))
(cs2 (assoc-cset-op char-set-union 'char-set-union (let* ((cs1 (parse-char-class (cadr sre)))
(map parse-char-class (cddr sre)) (cs2 (assoc-cset-op char-set-union 'char-set-union
r)) (map parse-char-class (cddr sre))
(cs (if (and (char-set? cs1) (char-set? cs2)) r))
(char-set-difference cs1 cs2) (cs (if (and (char-set? cs1) (char-set? cs2))
`(,(r 'char-set-difference) (char-set-difference cs1 cs2)
,(if (char-set? cs1) `(,(r 'char-set-difference)
(char-set->scheme cs1 r) ,(if (char-set? cs1)
cs1) (char-set->scheme cs1 r)
. ,(if (char-set? cs2) cs1)
(list (char-set->scheme cs2 r)) . ,(if (char-set? cs2)
(cdr cs2)))))) (list (char-set->scheme cs2 r))
(if cset? cs (make-re-char-set cs))) (cdr cs2))))))
(error "SRE set-difference operator (- ...) requires at least one argument"))) (if cset? cs (make-re-char-set cs)))
(error "SRE set-difference operator (- ...) requires at least one argument")))
((hygn-eq? '/) (let ((cset (range-class->char-set (cdr sre) case-sensitive?))) ((hygn-eq? '/)
(if cset? cset (make-re-char-set cset)))) (let ((cset (range-class->char-set (cdr sre) case-sensitive?)))
(if cset?
cset
(make-re-char-set cset))))
((hygn-eq? 'posix-string) ((hygn-eq? 'posix-string)
(if (and (= 1 (length (cdr sre))) (if (and (= 1 (length (cdr sre)))
(string? (cadr sre))) (string? (cadr sre)))
(posix-string->regexp (cadr sre)) (posix-string->regexp (cadr sre))
(error "Illegal (posix-string ...) SRE body." sre))) (error "Illegal (posix-string ...) SRE body." sre)))
(else (if (every string? sre) ; A set spec -- ("wxyz"). (else
(let* ((cs (apply char-set-union (if (every string? sre) ; A set spec -- ("wxyz").
(map string->char-set sre))) (let* ((cs (apply char-set-union
(cs (if case-sensitive? cs (uncase-char-set cs)))) (map string->char-set sre)))
(if cset? cs (make-re-char-set cs))) (cs (if case-sensitive? cs (uncase-char-set cs))))
(if cset? cs (make-re-char-set cs)))
(error "Illegal SRE" sre)))))) (error "Illegal SRE" sre))))))
;; It must be a char-class name (ANY, ALPHABETIC, etc.) ;; It must be a char-class name (ANY, ALPHABETIC, etc.)
(else (else
(letrec ((hygn-memq? (lambda (sym-list) (letrec ((hygn-memq? (lambda (sym-list)
(if (null? sym-list) (if (null? sym-list)
#f #f
(or (c sre (r (car sym-list))) (or (c sre (r (car sym-list)))
(c sre (car sym-list)) (c sre (car sym-list))
(hygn-memq? (cdr sym-list))))))) (hygn-memq? (cdr sym-list)))))))
(let ((cs (cond (let ((cs (cond
((hygn-memq? '(nonl)) nonl-chars) ((hygn-memq? '(nonl)) nonl-chars)
((hygn-memq? '(lower-case lower)) char-set:lower-case) ((hygn-memq? '(lower-case lower)) char-set:lower-case)
@ -343,11 +353,12 @@
(define (assoc-cset-op op op-name elts r) (define (assoc-cset-op op op-name elts r)
(receive (csets code-chunks) (partition char-set? elts) (receive (csets code-chunks) (partition char-set? elts)
(if (pair? code-chunks) (if (pair? code-chunks)
(? ((pair? csets) (cond
`(,(r op-name) ,(char-set->scheme (apply op csets) r) ((pair? csets)
. ,code-chunks)) `(,(r op-name) ,(char-set->scheme (apply op csets) r)
((pair? (cdr code-chunks)) `(,(r op-name) . ,code-chunks)) . ,code-chunks))
(else (car code-chunks))) ; Just one. ((pair? (cdr code-chunks)) `(,(r op-name) . ,code-chunks))
(else (car code-chunks))) ; Just one.
(apply op csets)))) (apply op csets))))
;;; Parse a (/ <range-spec> ...) char-class into a character set in ;;; Parse a (/ <range-spec> ...) char-class into a character set in
@ -484,17 +495,17 @@
(if a (if a
(if biga (if biga
(if space (if space
(and one (switch char-set= cs (and one (cond
((char-set:full) full) ((char-set= cs char-set:full) full)
((nonl-chars) nonl) ((char-set= cs nonl-chars) nonl)
((char-set:printing) print) ((char-set= cs char-set:printing) print)
((char-set:ascii) ascii) ((char-set= cs char-set:ascii) ascii)
(else #f))) (else #f)))
(if one (if one
(switch char-set= cs (cond
((char-set:letter+digit) alphanum) ((char-set= cs char-set:letter+digit) alphanum)
((char-set:graphic) graph) ((char-set= cs char-set:graphic) graph)
((char-set:hex-digit) hex) ((char-set= cs char-set:hex-digit) hex)
(else #f)) (else #f))
(and (char-set= cs char-set:letter) alpha))) (and (char-set= cs char-set:letter) alpha)))
(and (char-set= cs char-set:lower-case) lower)) ; a, not A (and (char-set= cs char-set:lower-case) lower)) ; a, not A
@ -504,13 +515,13 @@
(if one (if one
(and (not space) (char-set= cs char-set:digit) num) (and (not space) (char-set= cs char-set:digit) num)
(if space (if space
(switch char-set= cs (cond
((char-set:whitespace) white) (( char-set= cs char-set:whitespace) white)
((char-set:blank) blank) (( char-set= cs char-set:blank) blank)
(else #f)) (else #f))
(switch char-set= cs (cond
((char-set:punctuation) punct) ((char-set= cs char-set:punctuation) punct)
((char-set:iso-control) ctl) ((char-set= cs char-set:iso-control) ctl)
(else #f)))))))) (else #f))))))))
@ -525,20 +536,21 @@
'char-set:printing 'char-set:iso-control 'char-set:printing 'char-set:iso-control
'char-set:hex-digit 'char-set:blank 'char-set:hex-digit 'char-set:blank
'char-set:ascii)))) 'char-set:ascii))))
(? ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code. (cond
((char-set-empty? cs) (r 'char-set:empty)) ((not (char-set? cs)) cs) ; Dynamic -- *already* Scheme code.
((try cs) => r) ((char-set-empty? cs) (r 'char-set:empty))
((try (char-set-complement cs)) => ((try cs) => r)
(lambda (name) `(,(r 'char-set-complement) ,name))) ((try (char-set-complement cs)) =>
(lambda (name) `(,(r 'char-set-complement) ,name)))
(else (else
(receive (loose+ ranges+) (char-set->in-pair cs) (receive (loose+ ranges+) (char-set->in-pair cs)
(receive (loose- ranges-) (char-set->in-pair (char-set-complement cs)) (receive (loose- ranges-) (char-set->in-pair (char-set-complement cs))
(let ((makeit (r 'spec->char-set))) (let ((makeit (r 'spec->char-set)))
(if (< (+ (length loose-) (* 12 (length ranges-))) (if (< (+ (length loose-) (* 12 (length ranges-)))
(+ (length loose+) (* 12 (length ranges+)))) (+ (length loose+) (* 12 (length ranges+))))
`(,makeit #f ,(list->string loose-) ',ranges-) `(,makeit #f ,(list->string loose-) ',ranges-)
`(,makeit #t ,(list->string loose+) ',ranges+))))))))) `(,makeit #t ,(list->string loose+) ',ranges+)))))))))
@ -557,20 +569,21 @@
'hex-digit 'blank 'hex-digit 'blank
'ascii))) 'ascii)))
(nchars (char-set-size cs))) (nchars (char-set-size cs)))
(? ((zero? nchars) `(,(r '|))) (cond
((= 1 nchars) (apply string (char-set->list cs))) ((zero? nchars) `(,(r '|)))
((try cs) => r) ((= 1 nchars) (apply string (char-set->list cs)))
((try (char-set-complement cs)) => ((try cs) => r)
(lambda (name) `(,(r '~) ,name))) ((try (char-set-complement cs)) =>
(else (receive (cs rp comp?) (char-set->in-sexp-spec cs) (lambda (name) `(,(r '~) ,name)))
(let ((args (append (? ((string=? cs "") '()) (else (receive (cs rp comp?) (char-set->in-sexp-spec cs)
((= 1 (string-length cs)) `(,cs)) (let ((args (append (cond ((string=? cs "") '())
(else `((,cs)))) ((= 1 (string-length cs)) `(,cs))
(if (string=? rp "") '() (else `((,cs))))
(list `(,(r '/) ,rp)))))) (if (string=? rp "") '()
(if (and (= 1 (length args)) (not comp?)) (list `(,(r '/) ,rp))))))
(car args) (if (and (= 1 (length args)) (not comp?))
`(,(r (if comp? '~ '|)) . ,args))))))) (car args)
`(,(r (if comp? '~ '|)) . ,args)))))))
`(,(r 'unquote) ,cs))) ; dynamic -- ,<cset-exp> `(,(r 'unquote) ,cs))) ; dynamic -- ,<cset-exp>
@ -592,44 +605,45 @@
(define (regexp->sre/renamer re r) (define (regexp->sre/renamer re r)
(let recur ((re re)) (let recur ((re re))
(? ((re-string? re) (re-string:chars re)) (cond
((re-string? re) (re-string:chars re))
((re-seq? re) `(,(r ':) . ,(regexp->sres/renamer re r))) ((re-seq? re) `(,(r ':) . ,(regexp->sres/renamer re r)))
((re-choice? re) ((re-choice? re)
(let ((elts (re-choice:elts re)) (let ((elts (re-choice:elts re))
(%| (r '|))) (%| (r '|)))
(if (pair? elts) (if (pair? elts)
`(,%| . ,(map recur elts)) `(,%| . ,(map recur elts))
(let ((tsm (re-choice:tsm re))) (let ((tsm (re-choice:tsm re)))
(if (zero? tsm) `(,%|) `(,(r 'dsm) ,tsm 0 (,%|))))))) (if (zero? tsm) `(,%|) `(,(r 'dsm) ,tsm 0 (,%|)))))))
((re-char-set? re) (char-set->sre (re-char-set:cset re) r)) ((re-char-set? re) (char-set->sre (re-char-set:cset re) r))
((re-repeat? re) ((re-repeat? re)
(let ((from (re-repeat:from re)) (let ((from (re-repeat:from re))
(to (re-repeat:to re)) (to (re-repeat:to re))
(bodies (regexp->sres/renamer (re-repeat:body re) r))) (bodies (regexp->sres/renamer (re-repeat:body re) r)))
(? ((and (eqv? from 0) (not to)) `(,(r '*) . ,bodies)) (cond ((and (eqv? from 0) (not to)) `(,(r '*) . ,bodies))
((and (eqv? from 0) (eqv? to 1)) `(,(r '?) . ,bodies)) ((and (eqv? from 0) (eqv? to 1)) `(,(r '?) . ,bodies))
((and (eqv? from 1) (not to)) `(,(r '+) . ,bodies)) ((and (eqv? from 1) (not to)) `(,(r '+) . ,bodies))
((eqv? from to) `(,(r '=) ,to . bodies)) ((eqv? from to) `(,(r '=) ,to . bodies))
(to `(,(r '**) ,from ,to . ,bodies)) (to `(,(r '**) ,from ,to . ,bodies))
(else `(,(r '>=) ,from . ,bodies))))) (else `(,(r '>=) ,from . ,bodies)))))
((re-dsm? re) ((re-dsm? re)
`(,(r 'dsm) ,(re-dsm:pre-dsm re) ,(re-dsm:post-dsm re) `(,(r 'dsm) ,(re-dsm:pre-dsm re) ,(re-dsm:post-dsm re)
. ,(regexp->sres/renamer (re-dsm:body re) r))) . ,(regexp->sres/renamer (re-dsm:body re) r)))
((re-submatch? re) ((re-submatch? re)
`(,(r 'submatch) . ,(regexp->sres/renamer (re-submatch:body re) r))) `(,(r 'submatch) . ,(regexp->sres/renamer (re-submatch:body re) r)))
((re-bos? re) (r 'bos)) ((re-bos? re) (r 'bos))
((re-eos? re) (r 'eos)) ((re-eos? re) (r 'eos))
((re-bol? re) (r 'bol)) ((re-bol? re) (r 'bol))
((re-eol? re) (r 'eol)) ((re-eol? re) (r 'eol))
(else re)))) ; Presumably it's code. (else re)))) ; Presumably it's code.
(define (regexp->sre re) (regexp->sre/renamer re (lambda (x) x))) (define (regexp->sre re) (regexp->sre/renamer re (lambda (x) x)))

View File

@ -98,28 +98,29 @@
(define (translate-regexp re) (define (translate-regexp re)
(? ((re-string? re) (translate-string (re-string:chars re))) (cond
((re-string? re) (translate-string (re-string:chars re)))
((re-repeat? re) (translate-repeat re)) ((re-repeat? re) (translate-repeat re))
((re-choice? re) (translate-choice re)) ((re-choice? re) (translate-choice re))
((re-seq? re) (translate-seq re)) ((re-seq? re) (translate-seq re))
((re-char-set? re) (translate-char-set (re-char-set:cset re))) ((re-char-set? re) (translate-char-set (re-char-set:cset re)))
((re-submatch? re) (translate-submatch re)) ((re-submatch? re) (translate-submatch re))
((re-bos? re) (values "^" 1 0 '#())) ((re-bos? re) (values "^" 1 0 '#()))
((re-eos? re) (values "$" 1 0 '#())) ((re-eos? re) (values "$" 1 0 '#()))
((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation.")) ((re-bol? re) (error "Beginning-of-line regexp not supported in this implementation."))
((re-eol? re) (error "End-of-line regexp not supported in this implementation.")) ((re-eol? re) (error "End-of-line regexp not supported in this implementation."))
((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re)) ((re-dsm? re) (let ((pre-dsm (re-dsm:pre-dsm re))
(body (re-dsm:body re))) (body (re-dsm:body re)))
(translate-dsm body pre-dsm (translate-dsm body pre-dsm
(- (re-dsm:tsm re) (- (re-dsm:tsm re)
(+ pre-dsm (re-tsm body)))))) (+ pre-dsm (re-tsm body))))))
(else (error "Illegal regular expression" re)))) (else (error "Illegal regular expression" re))))
;;; Translate reloc-elt ELT = (N . RE) from a sequence or choice ;;; Translate reloc-elt ELT = (N . RE) from a sequence or choice
@ -248,35 +249,36 @@
(body (re-repeat:body re)) (body (re-repeat:body re))
(tsm (re-repeat:tsm re))) (tsm (re-repeat:tsm re)))
(? ((and to (> from to)) ; Unsatisfiable (cond
(values "[^\000-\377]" 1 0 (n-falses tsm))) ((and to (> from to)) ; Unsatisfiable
(values "[^\000-\377]" 1 0 (n-falses tsm)))
((and to (= from to 1)) (translate-seq body)) ; RE{1,1} => RE ((and to (= from to 1)) (translate-seq body)) ; RE{1,1} => RE
((and to (= to 0)) ; RE{0,0} => "" ((and to (= to 0)) ; RE{0,0} => ""
(values "" 2 0 (n-falses tsm))) (values "" 2 0 (n-falses tsm)))
(else ; General case (else ; General case
(receive (s level pcount submatches) (translate-regexp body) (receive (s level pcount submatches) (translate-regexp body)
(receive (s level pcount submatches) ; Coerce S to level <2. (receive (s level pcount submatches) ; Coerce S to level <2.
(if (> level 1) (if (> level 1)
(values (string-append "(" s ")") (values (string-append "(" s ")")
0 0
(+ pcount 1) (+ pcount 1)
(mapv (lambda (i) (and i (+ i 1))) submatches)) (mapv (lambda (i) (and i (+ i 1))) submatches))
(values s level pcount submatches)) (values s level pcount submatches))
(values (if to (values (if to
(? ((and (= from 0) (= to 1)) (string-append s "?")) (cond ((and (= from 0) (= to 1)) (string-append s "?"))
((= from to) ((= from to)
(string-append s "{" (number->string to) "}")) (string-append s "{" (number->string to) "}"))
(else (else
(string-append s "{" (number->string from) (string-append s "{" (number->string from)
"," (number->string to) "}"))) "," (number->string to) "}")))
(? ((= from 0) (string-append s "*")) (cond ((= from 0) (string-append s "*"))
((= from 1) (string-append s "+")) ((= from 1) (string-append s "+"))
(else (string-append s "{" (number->string from) ",}")))) (else (string-append s "{" (number->string from) ",}"))))
1 pcount submatches))))))) 1 pcount submatches)))))))
@ -372,19 +374,20 @@
(receive (loose ranges) (char-set->in-pair cset) (receive (loose ranges) (char-set->in-pair cset)
(hack-bracket-spec loose ranges in?))))) (hack-bracket-spec loose ranges in?)))))
(? ((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set (cond
((= 0 nchars) (values "[^\000-\177]" 1 0 '#())) ; Empty set
((= 1 nchars) ; Singleton set ((= 1 nchars) ; Singleton set
(translate-string (string (car (char-set->list cset))))) (translate-string (string (car (char-set->list cset)))))
;; General case. Try both [...] and [^...]. ;; General case. Try both [...] and [^...].
(else (let ((s- (->bracket-string cset #t)) (else (let ((s- (->bracket-string cset #t))
(s+ (->bracket-string (s+ (->bracket-string
(char-set-delete (char-set-complement cset) *nul*) (char-set-delete (char-set-complement cset) *nul*)
#f))) #f)))
(values (if (< (string-length s-) (string-length s+)) (values (if (< (string-length s-) (string-length s+))
s- s+) s- s+)
1 0 '#()))))))) 1 0 '#())))))))
;;; Commentary ;;; Commentary
@ -539,18 +542,19 @@
(shrink-range-finish-up end start (- end 1)))) (shrink-range-finish-up end start (- end 1))))
(define (shrink-range-finish-up c start end) (define (shrink-range-finish-up c start end)
(? ((> start end) (values (list (ascii->char c)) '())) ; Empty range (cond
((> start end) (values (list (ascii->char c)) '())) ; Empty range
((= start end) ; Collapse singleton range. ((= start end) ; Collapse singleton range.
(values (list (ascii->char c) (ascii->char start)) (values (list (ascii->char c) (ascii->char start))
'())) '()))
((= (+ start 1) end) ; Collapse doubleton range. ((= (+ start 1) end) ; Collapse doubleton range.
(values (list (ascii->char c) (ascii->char start) (ascii->char end)) (values (list (ascii->char c) (ascii->char start) (ascii->char end))
'())) '()))
(else (values (list (ascii->char c)) (else (values (list (ascii->char c))
(list (cons (ascii->char start) (ascii->char end))))))) (list (cons (ascii->char start) (ascii->char end)))))))
;;; We assume the bracket-spec is not a singleton, not empty, and not complete. ;;; We assume the bracket-spec is not a singleton, not empty, and not complete.
@ -573,16 +577,16 @@
(ranges (cdr ranges))) (ranges (cdr ranges)))
(receive (new-loose new-ranges) (recur ranges) (receive (new-loose new-ranges) (recur ranges)
(receive (new-loose0 new-ranges0) (receive (new-loose0 new-ranges0)
(? ((char=? #\] start) (cond ((char=? #\] start)
(shrink-range-start range)) (shrink-range-start range))
((char=? #\] end) ((char=? #\] end)
(shrink-range-end range)) (shrink-range-end range))
((char=? #\- start) ((char=? #\- start)
(shrink-range-start range)) (shrink-range-start range))
(else (values '() (list range)))) (else (values '() (list range))))
(values (append new-loose0 new-loose) (values (append new-loose0 new-loose)
(append new-ranges0 new-ranges))))) (append new-ranges0 new-ranges)))))
(values loose '()))) (values loose '())))
@ -590,38 +594,39 @@
(let ((loose (sort-list loose loose<=)) ; Sort loose chars and ranges. (let ((loose (sort-list loose loose<=)) ; Sort loose chars and ranges.
(ranges (sort-list ranges range<))) (ranges (sort-list ranges range<)))
(? ((or (not (equal? loose0 loose)) ; Loop if anything changed. (cond
(not (equal? ranges0 ranges))) ((or (not (equal? loose0 loose)) ; Loop if anything changed.
(lp loose ranges end-hyphen?)) (not (equal? ranges0 ranges)))
(lp loose ranges end-hyphen?))
;; If the first range opens with .=:, and the last loose char is [, ;; If the first range opens with .=:, and the last loose char is [,
;; shrink it out & loop. ;; shrink it out & loop.
((and (pair? ranges) ((and (pair? ranges)
(memv (caar ranges) '(#\. #\= #\:)) (memv (caar ranges) '(#\. #\= #\:))
(pair? loose) (pair? loose)
(char=? #\[ (car (reverse loose)))) (char=? #\[ (car (reverse loose))))
(receive (new-loose new-ranges) (receive (new-loose new-ranges)
(shrink-range-start (car ranges)) (shrink-range-start (car ranges))
(lp (append new-loose loose) (append new-ranges (cdr ranges)) end-hyphen?))) (lp (append new-loose loose) (append new-ranges (cdr ranges)) end-hyphen?)))
;; If there are no loose chars, the first range begins with ^, and ;; If there are no loose chars, the first range begins with ^, and
;; we're doing an IN range, shrink out the ^. ;; we're doing an IN range, shrink out the ^.
((and in? (null? loose) (pair? ranges) (char=? #\^ (caar ranges))) ((and in? (null? loose) (pair? ranges) (char=? #\^ (caar ranges)))
(receive (new-loose new-ranges) (shrink-range-start (car ranges)) (receive (new-loose new-ranges) (shrink-range-start (car ranges))
(lp (append new-loose loose) (append new-ranges ranges) end-hyphen?))) (lp (append new-loose loose) (append new-ranges ranges) end-hyphen?)))
;; If both [ and - are in the loose char set, ;; If both [ and - are in the loose char set,
;; pull - out as special end-hypen. ;; pull - out as special end-hypen.
((and (pair? loose) ((and (pair? loose)
(pair? (cdr loose)) (pair? (cdr loose))
(char=? (car loose) #\[) (char=? (car loose) #\[)
(char=? (car loose) #\-)) (char=? (car loose) #\-))
(lp (cons (car loose) (cddr loose)) ranges #t)) (lp (cons (car loose) (cddr loose)) ranges #t))
;; No change! Build the answer... ;; No change! Build the answer...
(else (string-append (if in? "[" "[^") (else (string-append (if in? "[" "[^")
(list->string loose) (list->string loose)
(apply string-append (apply string-append
(map (lambda (r) (string (car r) #\- (cdr r))) (map (lambda (r) (string (car r) #\- (cdr r)))
ranges)) ranges))
"]")))))))) "]"))))))))

View File

@ -45,14 +45,15 @@
(error "Illegal START parameter" (error "Illegal START parameter"
regexp-fold re kons knil s finish start)) regexp-fold re kons knil s finish start))
(let lp ((i start) (val knil)) (let lp ((i start) (val knil))
(? ((regexp-search re s i) => (cond
(lambda (m) ((regexp-search re s i) =>
(let ((next-i (match:end m 0))) (lambda (m)
(if (= next-i (match:start m 0)) (let ((next-i (match:end m 0)))
(error "An empty-string regexp match has put regexp-fold into an infinite loop." (if (= next-i (match:start m 0))
re s start next-i) (error "An empty-string regexp match has put regexp-fold into an infinite loop."
(lp next-i (kons i m val)))))) re s start next-i)
(else (finish i val)))))) (lp next-i (kons i m val))))))
(else (finish i val))))))
;;; regexp-fold-right re kons knil s [finish start] -> value ;;; regexp-fold-right re kons knil s [finish start] -> value
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -79,19 +80,21 @@
(error "Illegal START parameter" regexp-fold-right re kons knil s (error "Illegal START parameter" regexp-fold-right re kons knil s
finish start)) finish start))
(? ((regexp-search re s start) => (cond
(lambda (m) ((regexp-search re s start) =>
(finish (match:start m 0) (lambda (m)
(let recur ((last-m m)) (finish (match:start m 0)
(? ((regexp-search re s (match:end last-m 0)) => (let recur ((last-m m))
(lambda (m) (cond
(let ((i (match:start m 0))) ((regexp-search re s (match:end last-m 0)) =>
(if (= i (match:end m 0)) (lambda (m)
(error "An empty-string regexp match has put regexp-fold-right into an infinite loop." (let ((i (match:start m 0)))
re s start i) (if (= i (match:end m 0))
(kons last-m i (recur m)))))) (error "An empty-string regexp match has put regexp-fold-right into an infinite loop."
(else (kons last-m (string-length s) knil))))))) re s start i)
(else (finish (string-length s) knil))))) (kons last-m i (recur m))))))
(else (kons last-m (string-length s) knil)))))))
(else (finish (string-length s) knil)))))
;;; regexp-for-each re proc s [start] -> unspecific ;;; regexp-for-each re proc s [start] -> unspecific
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -104,11 +107,12 @@
(if (> start (string-length s)) (if (> start (string-length s))
(apply error "Illegal START parameter" regexp-for-each re proc s start) (apply error "Illegal START parameter" regexp-for-each re proc s start)
(let lp ((i start)) (let lp ((i start))
(? ((regexp-search re s i) => (cond
(lambda (m) ((regexp-search re s i) =>
(let ((next-i (match:end m 0))) (lambda (m)
(if (= (match:start m 0) next-i) (let ((next-i (match:end m 0)))
(error "An empty-string regexp match has put regexp-for-each into an infinite loop." (if (= (match:start m 0) next-i)
re proc s start next-i)) (error "An empty-string regexp match has put regexp-for-each into an infinite loop."
(proc m) re proc s start next-i))
(lp next-i))))))))) (proc m)
(lp next-i)))))))))

View File

@ -17,28 +17,29 @@
(set re cre) ; cache it, (set re cre) ; cache it,
cre))))) ; and return it. cre))))) ; and return it.
(? ((re-seq? re) (cond
(check-cache re-seq:posix set-re-seq:posix)) ((re-seq? re)
((re-choice? re) (check-cache re-seq:posix set-re-seq:posix))
(check-cache re-choice:posix set-re-choice:posix)) ((re-choice? re)
((re-repeat? re) (check-cache re-choice:posix set-re-choice:posix))
(check-cache re-repeat:posix set-re-repeat:posix)) ((re-repeat? re)
((re-char-set? re) (check-cache re-repeat:posix set-re-repeat:posix))
(check-cache re-char-set:posix set-re-char-set:posix)) ((re-char-set? re)
((re-string? re) (check-cache re-char-set:posix set-re-char-set:posix))
(check-cache re-string:posix set-re-string:posix)) ((re-string? re)
((re-submatch? re) (check-cache re-string:posix set-re-string:posix))
(check-cache re-submatch:posix set-re-submatch:posix)) ((re-submatch? re)
((re-dsm? re) (check-cache re-submatch:posix set-re-submatch:posix))
(check-cache re-dsm:posix set-re-dsm:posix)) ((re-dsm? re)
(check-cache re-dsm:posix set-re-dsm:posix))
((re-bos? re) (or bos-cre (set! bos-cre (compile)))) ((re-bos? re) (or bos-cre (set! bos-cre (compile))))
((re-eos? re) (or eos-cre (set! eos-cre (compile)))) ((re-eos? re) (or eos-cre (set! eos-cre (compile))))
((re-bol? re) (error "BOL regexp not supported in this implementation.")) ((re-bol? re) (error "BOL regexp not supported in this implementation."))
((re-eol? re) (error "EOL regexp not supported in this implementation.")) ((re-eol? re) (error "EOL regexp not supported in this implementation."))
(else (error "compile-regexp -- not a regexp" re))))) (else (error "compile-regexp -- not a regexp" re)))))
(define bos-cre #f) (define bos-cre #f)
(define eos-cre #f) (define eos-cre #f)

View File

@ -103,10 +103,10 @@
(if (pair? res) (if (pair? res)
(let* ((re (car res)) (let* ((re (car res))
(tail (recur (cdr res)))) (tail (recur (cdr res))))
(? ((re-seq? re) ; Flatten nested seqs (cond ((re-seq? re) ; Flatten nested seqs
(append (recur (re-seq:elts re)) tail)) (append (recur (re-seq:elts re)) tail))
((re-trivial? re) tail) ; Drop trivial elts ((re-trivial? re) tail) ; Drop trivial elts
(else (cons re tail)))) (else (cons re tail))))
'())))) '()))))
(if (pair? res) (if (pair? res)
@ -158,10 +158,10 @@
(if (pair? res) ; & drop empty re's. (if (pair? res) ; & drop empty re's.
(let* ((re (car res)) (let* ((re (car res))
(tail (recur (cdr res)))) (tail (recur (cdr res))))
(? ((re-choice? re) ; Flatten nested choices (cond ((re-choice? re) ; Flatten nested choices
(append (recur (re-choice:elts re)) tail)) (append (recur (re-choice:elts re)) tail))
((re-empty? re) tail) ; Drop empty re's. ((re-empty? re) tail) ; Drop empty re's.
(else (cons re tail)))) (else (cons re tail))))
'())))) '()))))
;; If all elts are char-class re's, fold them together. ;; If all elts are char-class re's, fold them together.
(if (every static-char-class? res) (if (every static-char-class? res)
@ -254,32 +254,33 @@
dsm0))) dsm0)))
(values from to body dsm0))))) (values from to body dsm0)))))
(? ((and (eqv? from 1) (eqv? to 1)) ; re{1,1} => re (cond
(values body1 pre-dsm)) ((and (eqv? from 1) (eqv? to 1)) ; re{1,1} => re
(values body1 pre-dsm))
((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => "" ((and (eqv? from 0) (eqv? to 0)) ; re{0,0} => ""
(values re-trivial (+ (re-tsm body1) pre-dsm))) (values re-trivial (+ (re-tsm body1) pre-dsm)))
;; re{m,n} => re-empty when m>n: ;; re{m,n} => re-empty when m>n:
((and (integer? from) (integer? to) (> from to)) ((and (integer? from) (integer? to) (> from to))
(values re-empty (+ (re-tsm body1) pre-dsm))) (values re-empty (+ (re-tsm body1) pre-dsm)))
;; Reduce the body = re-empty case. ;; Reduce the body = re-empty case.
((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in) ((and (re-empty? body1) (integer? from)) ; (+ (in)) => (in)
(values (if (> from 0) re-empty re-trivial) ; (* (in)) => "" (values (if (> from 0) re-empty re-trivial) ; (* (in)) => ""
pre-dsm)) pre-dsm))
;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1. ;; If BODY1 is eos, bos, or "", and m<=n, reduce to simply BODY1.
((and (integer? from) ((and (integer? from)
(or (and (integer? to) (<= from to)) (not to)) (or (and (integer? to) (<= from to)) (not to))
(or (re-eos? body1) (or (re-eos? body1)
(re-bos? body1) (re-bos? body1)
(and (re-string? body1) (and (re-string? body1)
(string=? "" (re-string:chars body1))))) (string=? "" (re-string:chars body1)))))
(values body1 pre-dsm)) (values body1 pre-dsm))
(else (values (make-re-repeat from to body1) ; general case (else (values (make-re-repeat from to body1) ; general case
pre-dsm))))) pre-dsm)))))
@ -410,15 +411,16 @@
;;; Return the total number of submatches bound in RE. ;;; Return the total number of submatches bound in RE.
(define (re-tsm re) (define (re-tsm re)
(? ((re-seq? re) (re-seq:tsm re)) (cond
((re-choice? re) (re-choice:tsm re)) ((re-seq? re) (re-seq:tsm re))
((re-repeat? re) (re-repeat:tsm re)) ((re-choice? re) (re-choice:tsm re))
((re-dsm? re) (re-dsm:tsm re)) ((re-repeat? re) (re-repeat:tsm re))
((re-submatch? re) (re-submatch:tsm re)) ((re-dsm? re) (re-dsm:tsm re))
((or (re-char-set? re) (re-string? re) ((re-submatch? re) (re-submatch:tsm re))
(re-bos? re) (re-eos? re) ((or (re-char-set? re) (re-string? re)
(re-bol? re) (re-eol? re)) (re-bos? re) (re-eos? re)
0))) (re-bol? re) (re-eol? re))
0)))
;;; (flush-submatches re) ;;; (flush-submatches re)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -426,19 +428,20 @@
;;; stripped out -- (= 0 (re-tsm (flush-submatches re))). ;;; stripped out -- (= 0 (re-tsm (flush-submatches re))).
(define (flush-submatches re) (define (flush-submatches re)
(? ((zero? (re-tsm re)) re) ; RE has no submatches. (cond
((zero? (re-tsm re)) re) ; RE has no submatches.
((re-seq? re) (re-seq (map flush-submatches (re-seq:elts re)))) ((re-seq? re) (re-seq (map flush-submatches (re-seq:elts re))))
((re-choice? re) (re-choice (map flush-submatches (re-choice:elts re)))) ((re-choice? re) (re-choice (map flush-submatches (re-choice:elts re))))
((re-repeat? re) (re-repeat (re-repeat:from re) ((re-repeat? re) (re-repeat (re-repeat:from re)
(re-repeat:to re) (re-repeat:to re)
(flush-submatches (re-repeat:body re)))) (flush-submatches (re-repeat:body re))))
((re-submatch? re) (flush-submatches (re-submatch:body re))) ((re-submatch? re) (flush-submatches (re-submatch:body re)))
((re-dsm? re) (flush-submatches (re-dsm:body re))) ((re-dsm? re) (flush-submatches (re-dsm:body re)))
(else re))) (else re)))
;;; Map F over ELTS. (F x) returns two values -- the "real" return value, ;;; Map F over ELTS. (F x) returns two values -- the "real" return value,
@ -464,54 +467,55 @@
(define (uncase re) (define (uncase re)
(receive (new-re changed?) (receive (new-re changed?)
(let recur ((re re)) (let recur ((re re))
(? ((re-seq? re) (cond
(let ((elts (re-seq:elts re))) ((re-seq? re)
(receive (new-elts elts-changed?) (let ((elts (re-seq:elts re)))
(map/changed recur elts) (receive (new-elts elts-changed?)
(if elts-changed? (map/changed recur elts)
(values (make-re-seq/tsm new-elts (re-seq:tsm re)) #t) (if elts-changed?
(values re #f))))) (values (make-re-seq/tsm new-elts (re-seq:tsm re)) #t)
(values re #f)))))
((re-choice? re) ((re-choice? re)
(let ((elts (re-choice:elts re))) (let ((elts (re-choice:elts re)))
(receive (new-elts elts-changed?) (receive (new-elts elts-changed?)
(map/changed recur elts) (map/changed recur elts)
(if elts-changed? (if elts-changed?
(values (re-choice new-elts) #t) (values (re-choice new-elts) #t)
(values re #f))))) (values re #f)))))
((re-char-set? re) ((re-char-set? re)
(let* ((cs (re-char-set:cset re)) (let* ((cs (re-char-set:cset re))
(new-cs (uncase-char-set cs))) ; Better not be code. (new-cs (uncase-char-set cs))) ; Better not be code.
(if (char-set= cs new-cs) (if (char-set= cs new-cs)
(values re #f) (values re #f)
(values (make-re-char-set new-cs) #t)))) (values (make-re-char-set new-cs) #t))))
((re-repeat? re) ((re-repeat? re)
(receive (new-body body-changed?) (recur (re-repeat:body re)) (receive (new-body body-changed?) (recur (re-repeat:body re))
(if body-changed? (if body-changed?
(values (re-repeat (re-repeat:from re) (values (re-repeat (re-repeat:from re)
(re-repeat:to re) (re-repeat:to re)
new-body) new-body)
#t) #t)
(values re #f)))) (values re #f))))
((re-submatch? re) ((re-submatch? re)
(receive (new-body body-changed?) (recur (re-submatch:body re)) (receive (new-body body-changed?) (recur (re-submatch:body re))
(if body-changed? (if body-changed?
(values (make-re-submatch/tsm new-body (values (make-re-submatch/tsm new-body
(re-submatch:pre-dsm re) (re-submatch:pre-dsm re)
(re-submatch:tsm re)) (re-submatch:tsm re))
#t) #t)
(values re #f)))) (values re #f))))
((re-string? re) ((re-string? re)
(let ((cf-re (uncase-string (re-string:chars re)))) (let ((cf-re (uncase-string (re-string:chars re))))
(if (re-string? cf-re) (if (re-string? cf-re)
(values re #f) (values re #f)
(values cf-re #t)))) (values cf-re #t))))
(else (values re #f)))) (else (values re #f))))
new-re)) new-re))
@ -537,11 +541,14 @@
(define (uncase-string s) (define (uncase-string s)
;; SEQ is a list of chars and doubleton char-sets. ;; SEQ is a list of chars and doubleton char-sets.
(let* ((seq (string-fold-right (lambda (c lis) (let* ((seq (string-fold-right (lambda (c lis)
(cons (? ((char-lower-case? c) (char-set c (char-upcase c))) (cons (cond
((char-upper-case? c) (char-set c (char-downcase c))) ((char-lower-case? c)
(else c)) (char-set c (char-upcase c)))
((char-upper-case? c)
(char-set c (char-downcase c)))
(else c))
lis)) lis))
'() s)) '() s))
;; Coalesce adjacent chars together into a string. ;; Coalesce adjacent chars together into a string.
(fixup (lambda (chars seq) (fixup (lambda (chars seq)

View File

@ -4,24 +4,26 @@
;;; char-set, or regexp value. Coerce one of these to a regexp value. ;;; char-set, or regexp value. Coerce one of these to a regexp value.
(define (coerce-dynamic-regexp x) (define (coerce-dynamic-regexp x)
(? ((string? x) (make-re-string x)) (cond
((char? x) (make-re-string (string x))) ((string? x) (make-re-string x))
((char-set? x) (make-re-char-set x)) ((char? x) (make-re-string (string x)))
((regexp? x) x) ((char-set? x) (make-re-char-set x))
(else (error "Cannot coerce value to regular expression." x)))) ((regexp? x) x)
(else (error "Cannot coerce value to regular expression." x))))
;;; In a char-set context (e.g., as an operand of the SRE - operator), ;;; In a char-set context (e.g., as an operand of the SRE - operator),
;;; a ,<exp> or form must be coercable to a char-set. ;;; a ,<exp> or form must be coercable to a char-set.
(define (coerce-dynamic-charset x) (define (coerce-dynamic-charset x)
(? ((string? x) (cond
(if (= 1 (string-length x)) (string->char-set x) ((string? x)
(error "Multi-char string not allowed as ,<exp> or ,@<exp> SRE in char-class context." (if (= 1 (string-length x)) (string->char-set x)
x))) (error "Multi-char string not allowed as ,<exp> or ,@<exp> SRE in char-class context."
((char? x) (char-set x)) x)))
((char-set? x) x) ((char? x) (char-set x))
((re-char-set? x) (re-char-set:cset x)) ((char-set? x) x)
(else (error "Cannot coerce value to character set" x)))) ((re-char-set? x) (re-char-set:cset x))
(else (error "Cannot coerce value to character set" x))))
(define (spec->char-set in? loose ranges) (define (spec->char-set in? loose ranges)

View File

@ -39,27 +39,28 @@
re)) re))
(define (simp-re re) (define (simp-re re)
(? ((re-string? re) (values re 0)) (cond
((re-seq? re) (simp-seq re)) ((re-string? re) (values re 0))
((re-choice? re) (simp-choice re)) ((re-seq? re) (simp-seq re))
((re-choice? re) (simp-choice re))
;; Singleton char-sets reduce to the character. ;; Singleton char-sets reduce to the character.
;; Bear in mind the cset field might be Scheme code instead ;; Bear in mind the cset field might be Scheme code instead
;; of an actual char set if the regexp is dynamic. ;; of an actual char set if the regexp is dynamic.
((re-char-set? re) ((re-char-set? re)
(values (let ((cs (re-char-set:cset re))) (values (let ((cs (re-char-set:cset re)))
(if (and (char-set? cs) (if (and (char-set? cs)
(= 1 (char-set-size cs))) (= 1 (char-set-size cs)))
(make-re-string (string (car (char-set->list cs)))) (make-re-string (string (car (char-set->list cs))))
re)) re))
0)) 0))
((re-repeat? re) (simp-repeat re)) ((re-repeat? re) (simp-repeat re))
((re-submatch? re) (simp-submatch re)) ((re-submatch? re) (simp-submatch re))
((re-dsm? re) (simp-dsm re)) ((re-dsm? re) (simp-dsm re))
(else (values re 0)))) (else (values re 0))))
@ -122,54 +123,56 @@
(define (simp-seq1 elts abort tsm) (define (simp-seq1 elts abort tsm)
(let recur ((elt (car elts)) (elts (cdr elts))) (let recur ((elt (car elts)) (elts (cdr elts)))
(receive (elt pre-dsm) (open-dsm elt) (receive (elt pre-dsm) (open-dsm elt)
(? ((re-seq? elt) ; Flatten nested seqs. (cond
(let ((sub-elts (re-seq:elts elt))) ((re-seq? elt) ; Flatten nested seqs.
(recur (re-dsm (car sub-elts) pre-dsm 0) (let ((sub-elts (re-seq:elts elt)))
(append (cdr sub-elts) elts)))) (recur (re-dsm (car sub-elts) pre-dsm 0)
(append (cdr sub-elts) elts))))
((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty ((re-empty? elt) (abort elt tsm)) ; Bomb out on the empty
; (impossible) re. ; (impossible) re.
((pair? elts) ((pair? elts)
(receive (next-pre-dsm next tail) ; Simplify the tail, (receive (next-pre-dsm next tail) ; Simplify the tail,
(recur (car elts) (cdr elts)) ; then think about (recur (car elts) (cdr elts)) ; then think about
; the head: ; the head:
;; This guy is called when we couldn't find any other ;; This guy is called when we couldn't find any other
;; simplification. If ELT contains live submatches, then ;; simplification. If ELT contains live submatches, then
;; there really is nothing to be done at this step -- just ;; there really is nothing to be done at this step -- just
;; assemble the pieces together and return them. If ELT ;; assemble the pieces together and return them. If ELT
;; *doesn't* contain any live submatches, do the same, but ;; *doesn't* contain any live submatches, do the same, but
;; bubble its following next-pre-dsm submatches forwards. ;; bubble its following next-pre-dsm submatches forwards.
(define (no-simp) (define (no-simp)
(if (has-live-submatches? elt) (if (has-live-submatches? elt)
(values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail)) (values pre-dsm elt (cons (re-dsm next next-pre-dsm 0) tail))
(values (+ pre-dsm next-pre-dsm) elt (cons next tail)))) (values (+ pre-dsm next-pre-dsm) elt (cons next tail))))
;; Coalesces two adjacent bol's, two adjacent eol's, etc. ;; Coalesces two adjacent bol's, two adjacent eol's, etc.
(define (coalesce-anchor anchor?) (define (coalesce-anchor anchor?)
(if (and (anchor? elt) (anchor? next)) (if (and (anchor? elt) (anchor? next))
(values (+ pre-dsm next-pre-dsm) elt tail) (values (+ pre-dsm next-pre-dsm) elt tail)
(no-simp))) (no-simp)))
(? ((re-trivial? elt) ; Drop trivial re's. (cond
(values (+ pre-dsm next-pre-dsm) next tail)) ((re-trivial? elt) ; Drop trivial re's.
(values (+ pre-dsm next-pre-dsm) next tail))
;; Coalesce adjacent strings ;; Coalesce adjacent strings
((re-string? elt) ((re-string? elt)
(if (re-string? next) (if (re-string? next)
(values (+ pre-dsm next-pre-dsm) (values (+ pre-dsm next-pre-dsm)
(make-re-string (string-append (re-string:chars elt) (make-re-string (string-append (re-string:chars elt)
(re-string:chars next))) (re-string:chars next)))
tail) tail)
(no-simp))) (no-simp)))
;; Coalesce adjacent bol/eol/bos/eos's. ;; Coalesce adjacent bol/eol/bos/eos's.
((re-bol? elt) (coalesce-anchor re-bol?)) ((re-bol? elt) (coalesce-anchor re-bol?))
((re-eol? elt) (coalesce-anchor re-eol?)) ((re-eol? elt) (coalesce-anchor re-eol?))
((re-bos? elt) (coalesce-anchor re-bos?)) ((re-bos? elt) (coalesce-anchor re-bos?))
((re-eos? elt) (coalesce-anchor re-eos?)) ((re-eos? elt) (coalesce-anchor re-eos?))
(else (no-simp))))) (else (no-simp)))))
(else (values pre-dsm elt '())))))) (else (values pre-dsm elt '()))))))
@ -241,11 +244,12 @@
(tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail)) (tail (if (and eol? (not prev-eol?)) (cons re-eol tail) tail))
(tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail)) (tail (if (and bol? (not prev-bol?)) (cons re-bol tail) tail))
(tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail)) (tail (if (and bos? (not prev-bos?)) (cons re-bos tail) tail))
(tail (? ((zero? numchars) tail) ; Drop empty char set. (tail (cond
((= 1 numchars) ; {c} => "c" ((zero? numchars) tail) ; Drop empty char set.
(cons (make-re-string (string (car (char-set->list cset)))) ((= 1 numchars) ; {c} => "c"
tail)) (cons (make-re-string (string (car (char-set->list cset))))
(else (cons (make-re-char-set cset) tail))))) tail))
(else (cons (make-re-char-set cset) tail)))))
tail)) tail))
@ -271,32 +275,33 @@
;; Flatten nested choices. ;; Flatten nested choices.
(let ((sub-elts (re-seq:elts elt))) (let ((sub-elts (re-seq:elts elt)))
(receive (tail-pre-dsm cset bos? eos? bol? eol? tail) (receive (tail-pre-dsm cset bos? eos? bol? eol? tail)
(recur (append sub-elts elts) (recur (append sub-elts elts)
prev-cset prev-cset
prev-bos? prev-eos? prev-bos? prev-eos?
prev-bol? prev-eol?) prev-bol? prev-eol?)
(values (+ pre-dsm tail-pre-dsm) (values (+ pre-dsm tail-pre-dsm)
cset bos? eos? bol? eol? tail))) cset bos? eos? bol? eol? tail)))
;; Simplify the tail, then think about the head. ;; Simplify the tail, then think about the head.
(receive (tail-pre-dsm cset bos? eos? bol? eol? tail) (receive (tail-pre-dsm cset bos? eos? bol? eol? tail)
(recur elts (recur elts
(? ((and (re-string? elt) (cond
(= 1 (string-length (re-string:chars elt)))) ((and (re-string? elt)
(char-set-union prev-cset (= 1 (string-length (re-string:chars elt))))
(string->char-set (re-string:chars elt)))) (char-set-union prev-cset
(string->char-set (re-string:chars elt))))
;; The cset might be a Scheme exp. ;; The cset might be a Scheme exp.
((and (re-char-set? elt) ((and (re-char-set? elt)
(char-set? (re-char-set:cset elt))) (char-set? (re-char-set:cset elt)))
(char-set-union prev-cset (char-set-union prev-cset
(re-char-set:cset elt))) (re-char-set:cset elt)))
(else prev-cset)) (else prev-cset))
(or prev-bos? (re-bos? elt)) (or prev-bos? (re-bos? elt))
(or prev-eos? (re-eos? elt)) (or prev-eos? (re-eos? elt))
(or prev-bol? (re-bol? elt)) (or prev-bol? (re-bol? elt))
(or prev-eol? (re-eol? elt))) (or prev-eol? (re-eol? elt)))
;; This guy is called when we couldn't find any other ;; This guy is called when we couldn't find any other
;; simplification. If ELT contains live submatches, then we ;; simplification. If ELT contains live submatches, then we
@ -334,29 +339,30 @@
cset bos? eos? bol? eol? cset bos? eos? bol? eol?
(cons elt tail)))) (cons elt tail))))
(? ((and (re-char-set? elt) (cond
(char-set? (re-char-set:cset elt))) ; Might be Scheme code ((and (re-char-set? elt)
(values (+ pre-dsm tail-pre-dsm) (char-set? (re-char-set:cset elt))) ; Might be Scheme code
(char-set-union cset (re-char-set:cset elt)) (values (+ pre-dsm tail-pre-dsm)
bos? eos? bol? eol? tail)) (char-set-union cset (re-char-set:cset elt))
bos? eos? bol? eol? tail))
;; Treat a singleton string "c" as a singleton set {c}. ;; Treat a singleton string "c" as a singleton set {c}.
((and (re-string? elt) (= 1 (string-length (re-string:chars elt)))) ((and (re-string? elt) (= 1 (string-length (re-string:chars elt))))
(values (+ pre-dsm tail-pre-dsm) (values (+ pre-dsm tail-pre-dsm)
(char-set-union cset (string->char-set (re-string:chars elt))) (char-set-union cset (string->char-set (re-string:chars elt)))
bos? eos? bol? eol? tail)) bos? eos? bol? eol? tail))
;; Coalesce bol/eol/bos/eos's. ;; Coalesce bol/eol/bos/eos's.
((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset ((re-bos? elt) (values (+ pre-dsm tail-pre-dsm) cset
#t eos? bol? eol? tail)) #t eos? bol? eol? tail))
((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset ((re-eos? elt) (values (+ pre-dsm tail-pre-dsm) cset
bos? #t bol? eol? tail)) bos? #t bol? eol? tail))
((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset ((re-bol? elt) (values (+ pre-dsm tail-pre-dsm) cset
bos? eos? #t eol? tail)) bos? eos? #t eol? tail))
((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset ((re-eol? elt) (values (+ pre-dsm tail-pre-dsm) cset
bos? eos? bol? #t tail)) bos? eos? bol? #t tail))
(else (no-simp))))))) (else (no-simp)))))))
(values 0 char-set:empty #f #f #f #f '())))) (values 0 char-set:empty #f #f #f #f '()))))
@ -378,15 +384,15 @@
(define (has-live-submatches? re) (define (has-live-submatches? re)
(or (re-submatch? re) (or (re-submatch? re)
(? ((re-seq? re) (every has-live-submatches? (re-seq:elts re))) (cond ((re-seq? re) (every has-live-submatches? (re-seq:elts re)))
((re-choice? re) (every has-live-submatches? (re-choice:elts re))) ((re-choice? re) (every has-live-submatches? (re-choice:elts re)))
((re-repeat? re) (has-live-submatches? (re-repeat:body re))) ((re-repeat? re) (has-live-submatches? (re-repeat:body re)))
((re-dsm? re) (has-live-submatches? (re-dsm:body re))) ((re-dsm? re) (has-live-submatches? (re-dsm:body re)))
;; If it's not one of these things, then this isn't a regexp -- it's ;; If it's not one of these things, then this isn't a regexp -- it's
;; a chunk of Scheme code producing a regexp, and we conservatively ;; a chunk of Scheme code producing a regexp, and we conservatively
;; return #T -- the expression *might* produce a regexp containing ;; return #T -- the expression *might* produce a regexp containing
;; a live submatch: ;; a live submatch:
(else (not (or (re-char-set? re) (re-string? re) (else (not (or (re-char-set? re) (re-string? re)
(re-bos? re) (re-eos? re) (re-bos? re) (re-eos? re)
(re-bol? re) (re-eol? re))))))) (re-bol? re) (re-eol? re)))))))