diff --git a/Makefile.in b/Makefile.in index 4f515ae..3dc8d8a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -124,7 +124,7 @@ SCSHOBJS = \ scsh/machine/libansi.o \ scsh/network1.o \ scsh/putenv.o \ - scsh/rx/re1.o \ + scsh/rx/regexp1.o \ scsh/select.o scsh/select1.o \ scsh/sleep1.o \ scsh/syscalls.o scsh/syscalls1.o \ @@ -132,11 +132,11 @@ SCSHOBJS = \ scsh/time.o scsh/time1.o \ scsh/tty.o scsh/tty1.o \ scsh/userinfo1.o \ - scsh/sighandlers1.o \ - scsh/regexp/libregex.a + scsh/sighandlers1.o -SCSH_INITIALIZERS = s48_init_syslog s48_init_userinfo s48_init_sighandlers \ - s48_init_re_low s48_init_syscalls2 s48_init_network s48_init_flock +SCSH_INITIALIZERS = s48_init_syslog s48_init_posix_regexp \ + s48_init_userinfo s48_init_sighandlers \ + s48_init_syscalls2 s48_init_network s48_init_flock UNIX_OBJS = c/unix/misc.o c/unix/io.o c/unix/fd-io.o c/unix/event.o @@ -233,10 +233,7 @@ scsh/syscalls1.o scsh/syscalls.o: scsh/syscalls1.h scsh/time1.o scsh/time.o: scsh/time1.h scsh/tty1.o scsh/tty.o: scsh/tty1.h -# Not really, but making regexp/libregex.a makes the regexp/regex.h file that -# re1.c actually does need. -# TODO: This is broken at the moment: regex.h is not made after checkout -scsh/rx/re1.o: scsh/rx/re1.h scsh/regexp/libregex.a +scsh/rx/regexp1.o: c/scheme48.h scsh/syscalls.o: scsh/syscalls1.h scsh/dirstuff1.h scsh/fdports1.h \ scsh/select1.h scsh/userinfo1.h @@ -424,7 +421,7 @@ clean-scm2c: distclean: clean rm -f Makefile config.log config.status c/sysdep.h config.cache \ - scsh/machine scsh/regexp/Makefile \ + scsh/machine \ scsh/endian.scm scsh/static.scm \ exportlist.aix $(RM) a.exe $(VM).base $(VM).def $(VM).exp @@ -759,6 +756,8 @@ SCHEME =scsh/awk.scm \ scsh/pty.scm \ scsh/rdelim.scm \ scsh/rw.scm \ + scsh/rx/packages.scm \ + scsh/rx/cond-package.scm \ scsh/scsh-condition.scm \ scsh/scsh-interfaces.scm \ scsh/scsh-package.scm \ @@ -837,9 +836,6 @@ scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image echo ",batch on") \ | ./$(VM) -o ./$(VM) -i $(CIG).image -h 10000000 -scsh/regexp/libregex.a: - cd ./scsh/regexp; $(MAKE) lib - install-scsh: scsh install-scsh-image $(RM) $(bindir)/$(RUNNABLE) $(INSTALL_PROGRAM) $(srcdir)/scsh/scsh $(bindir)/$(RUNNABLE) @@ -858,9 +854,8 @@ install-scsh-image: $(INSTALL_DATA) /tmp/scsh.image $(LIB)/scsh.image clean-scsh: - $(RM) scsh/*.o scsh/regexp/*.o scsh/rx/*.o scsh/machine/*.o + $(RM) scsh/*.o scsh/rx/*.o scsh/machine/*.o $(RM) scsh/*.image $(RM) $(LIBSCSHVM) $(LIBSCSH) scsh/scsh$(EXEEXT) - -cd scsh/regexp; $(MAKE) clean diff --git a/configure.in b/configure.in index 159346f..b824b72 100644 --- a/configure.in +++ b/configure.in @@ -396,6 +396,6 @@ fail AC_SUBST(TMPDIR) -AC_OUTPUT(Makefile scsh/regexp/Makefile scsh/endian.scm scsh/static.scm) +AC_OUTPUT(Makefile scsh/endian.scm scsh/static.scm) chmod +x scsh/static.scm diff --git a/scsh/rx/packages.scm b/scsh/rx/packages.scm index b993f71..8984c36 100644 --- a/scsh/rx/packages.scm +++ b/scsh/rx/packages.scm @@ -3,6 +3,47 @@ ;;; scsh-level-0, and export from there. ;;; -Olin 8/98 +;; From Scheme 48, only here temporarily + +(define-structure external-util (export immutable-copy-string) + (open scheme + primitives ;copy-bytes! + features) ;immutable? make-immutable! + (begin + (define (immutable-copy-string string) + (if (immutable? string) + string + (let ((copy (copy-string string))) + (make-immutable! copy) + copy))) + + ; Why isn't this available elsewhere? + + (define (copy-string string) + (let* ((length (string-length string)) + (new (make-string length #\?))) + (copy-bytes! string 0 new 0 length) + new)))) + +(define-interface posix-regexps-interface + (export make-regexp + (regexp-option :syntax) + regexp? + regexp-match + + match? + match-start + match-end + match-submatches + )) + +(define-structures ((posix-regexps posix-regexps-interface) + (posix-regexps-internal (export make-match))) + (open scheme define-record-types finite-types external-calls + signals + external-util) + (files regexp)) + (define-interface basic-re-interface (export (re-dsm? (proc (:value) :boolean)) (make-re-dsm (proc (:value :exact-integer :exact-integer) :value)) @@ -128,8 +169,7 @@ (define re-match-internals-interface (export (regexp-match:string (proc (:value) :string)) - (regexp-match:start (proc (:value) :vector)) - (regexp-match:end (proc (:value) :vector)))) + (regexp-match:submatches (proc (:value) :vector)))) (define-interface posix-re-interface @@ -167,7 +207,6 @@ (match:start (proc (:value &opt :exact-integer) :value)) (match:end (proc (:value &opt :exact-integer) :value)) (match:substring (proc (:value &opt :exact-integer) :value)) - (clean-up-cres (proc () :unspecific)) (regexp-search (proc (:value :string &opt :exact-integer) :value)) (regexp-search? (proc (:value :string &opt :exact-integer) @@ -205,6 +244,7 @@ define-record-types ; JMG debugging external-calls string-lib ; string-fold + posix-regexps scheme) (files re-low re simp re-high @@ -290,6 +330,7 @@ (define-structure re-subst re-subst-interface (open re-level-0 re-match-internals + posix-regexps scsh-utilities ; fold & some string utilities that need to be moved. scsh-level-0 ; write-string string-lib ; string-copy! @@ -332,8 +373,8 @@ ;;; re-high compile-regexp regexp-search regexp-search? ;;; re-subst regexp-substitute regexp-substitute/global ;;; re-low match:start match:end match:substring -;;; CRE record, new-cre, compile-posix-re->c-struct -;;; cre-search cre-search? clean-up-cres +;;; CRE record, new-cre +;;; cre-search cre-search? ;;; re-syntax sre-form? if-sre-form expand-rx ;;; re.scm The ADT. flush-submatches uncase uncase-char-set ;;; char-set-full? char-set-empty? diff --git a/scsh/rx/re-low.scm b/scsh/rx/re-low.scm index 9b7d0f4..e935746 100644 --- a/scsh/rx/re-low.scm +++ b/scsh/rx/re-low.scm @@ -1,38 +1,29 @@ ;;; Regular expression matching for scsh ;;; Copyright (c) 1994 by Olin Shivers. -(foreign-init-name "re_low") - -(foreign-source - "/* Make sure foreign-function stubs interface to the C funs correctly: */" - "#include " - "#include \"../regexp/regex.h\"" - "#include \"re1.h\"" - "" "" - ) - ;;; Match data for regexp matches. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define-record regexp-match - string ; The string against which we matched - start ; vector of starting indices - end) ; vector of ending indices + string + submatches) (define (match:start match . maybe-index) - (vector-ref (regexp-match:start match) - (:optional maybe-index 0))) + (match-start + (vector-ref (regexp-match:submatches match) + (:optional maybe-index 0)))) (define (match:end match . maybe-index) - (vector-ref (regexp-match:end match) - (:optional maybe-index 0))) + (match-start + (vector-ref (regexp-match:submatches match) + (:optional maybe-index 0)))) (define (match:substring match . maybe-index) (let* ((i (:optional maybe-index 0)) - (start (vector-ref (regexp-match:start match) i))) - (and start (substring (regexp-match:string match) - start - (vector-ref (regexp-match:end match) i))))) + (submatch (vector-ref (regexp-match:submatches match) i))) + (and submatch (substring (regexp-match:string match) + (match-start submatch) + (match-end submatch))))) ;;; Compiling regexps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -47,121 +38,59 @@ ;(define-record cre ; A compiled regular expression ; string ; The Posix string form of the regexp or #F. ; max-paren ; Max paren in STRING needed for submatches. -; (bytes #f) ; Pointer to the compiled form, in the C heap, or #F. -; (bytes/nm #f) ; Same as BYTES, but compiled with no-submatch. +; (regexp #f) ; Compiled form or #F. +; (regexp/nm #f) ; Same as REGEXP, but compiled with no-submatch. ; tvec ; Translation vector for the submatches ; ((disclose self) (list "cre" (cre:string self)))) (define-record-type cre :cre - (really-make-cre string max-paren bytes bytes/nm tvec debug) + (really-make-cre string max-paren regexp regexp/nm tvec debug) cre? (string cre:string set-cre:string) (max-paren cre:max-paren set-cre:max-paren) - (bytes cre:bytes set-cre:bytes) - (bytes/nm cre:bytes/nm set-cre:bytes/nm) + (regexp cre:regexp set-cre:regexp) + (regexp/nm cre:regexp/nm set-cre:regexp/nm) (tvec cre:tvec set-cre:tvec) (debug cre:debug set-cre:debug)) (define-record-discloser :cre (lambda (self) (list "cre" (cre:string self)))) -(define-record-resumer :cre (lambda (cre) - (set-cre:bytes cre #f) - (set-cre:bytes/nm cre #f))) - (define (make-cre str max-paren tvec) (really-make-cre str max-paren #f #f tvec #f)) - -(define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec)) +(define (new-cre str tvec) + (make-cre str (max-live-posix-submatch tvec) tvec)) (define (max-live-posix-submatch tvec) (vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec)) -(define (compile-posix-re->c-struct re-string sm?) - (let ((maybe-struct (%compile-re re-string sm?))) - (if (pair? maybe-struct) - (error (car maybe-struct) - (%regerror-msg (car maybe-struct) (cdr maybe-struct)) - compile-posix-re->c-struct re-string sm?) - maybe-struct))) - -;;; returns pointer as number or a pair of error number and 0 -(define-stubless-foreign %compile-re (pattern submatches?) "compile_re") - ;;; Searching with compiled regexps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; cre-search returns match info; cre-search? is just a predicate. +;; ### we do not look at START yet (define (cre-search cre start-vec end-vec str start) (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. - (and re-str - (let* ((C-bytes (or (cre:bytes cre) - (let ((C-bytes (compile-posix-re->c-struct re-str #t))) - (set-cre:bytes cre C-bytes) - (register-re-c-struct:bytes cre) - C-bytes))) - (retcode (%cre-search C-bytes str start - (cre:tvec cre) - (cre:max-paren cre) - start-vec end-vec))) - (if (integer? retcode) - (error retcode (%regerror-msg retcode C-bytes) - cre-search cre start-vec end-vec str start) - (and retcode (make-regexp-match str start-vec end-vec))))))) + (if (not re-str) + #f + (begin + (if (not (cre:regexp cre)) + (set-cre:regexp cre (make-regexp re-str + (regexp-option extended) + (regexp-option submatches)))) + (let ((ret (regexp-match (cre:regexp cre) str #t #f #f start))) + (if (not ret) + #f + (make-regexp-match str + (list->vector ret)))))))) -(define (cre-search? cre str start) +(define (cre-search? cre start-vec end-vec str start) (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. - (and re-str - (let* ((C-bytes (or (cre:bytes/nm cre) - (let ((C-bytes (compile-posix-re->c-struct re-str #f))) - (set-cre:bytes/nm cre C-bytes) - (register-re-c-struct:bytes/nm cre) - C-bytes))) - (retcode (%cre-search C-bytes str start '#() -1 '#() '#()))) - (if (integer? retcode) - (error retcode (%regerror-msg retcode C-bytes) - cre-search? cre str start) - retcode))))) - -; 0 success, #f no-match, or non-zero int error code: -(define-stubless-foreign %cre-search - (compiled-regexp str start tvec max-psm svec evec) "re_search") - - - -;;; Generate an error msg from an error code. - -(define-stubless-foreign %regerror-msg (errcode re) "re_errint2str") - - -;;; Reclaiming compiled regexp storage -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define-stubless-foreign %free-re (re) "free_re") - - -;;; Whenever we make a new CRE, add the appropriate finalizer, -;;; so the C regex_t structure can be freeed - -(define (free-bytes the-cre) - (if (cre:bytes the-cre) - (%free-re (cre:bytes the-cre)) - (warn "free-bytes called on #f"))) - -(define (free-bytes/nm the-cre) - (if (cre:bytes/nm the-cre) - (%free-re (cre:bytes/nm the-cre)) - (warn "free-bytes/nm called on #f"))) - -(define (register-re-c-struct:bytes cre) - (add-finalizer! cre free-bytes)) - -(define (register-re-c-struct:bytes/nm cre) - (add-finalizer! cre free-bytes/nm)) - - -(define (register-re-c-struct cre c-bytes) - (error "function register-re-c-struct no longer supported")) - -(define (clean-up-cres) - (warn "function clean-up-cres no longer supported")) + (if (not re-str) + #f + (begin + (if (not (cre:regexp/nm cre)) + (set-cre:regexp/nm cre (make-regexp re-str + (regexp-option extended)))) + (regexp-match (cre:regexp/nm cre) str #f #f #f))))) diff --git a/scsh/rx/re-subst.scm b/scsh/rx/re-subst.scm index a55b82b..68279c0 100644 --- a/scsh/rx/re-subst.scm +++ b/scsh/rx/re-subst.scm @@ -11,14 +11,17 @@ (define (regexp-substitute port match . items) (let* ((str (regexp-match:string match)) - (sv (regexp-match:start match)) - (ev (regexp-match:end match)) + (submatches (regexp-match:submatches match)) (range (lambda (item) ; Return start & end of (cond ((integer? item) ; ITEM's range in STR. - (values (vector-ref sv item) - (vector-ref ev item))) - ((eq? 'pre item) (values 0 (vector-ref sv 0))) - ((eq? 'post item) (values (vector-ref ev 0) + (let ((submatch (vector-ref submatches item))) + (values (match-start submatch) + (match-end submatch)))) + ((eq? 'pre item) (values 0 + (match-start + (vector-ref submatches 0)))) + ((eq? 'post item) (values (match-end + (vector-ref submatches 0)) (string-length str))) (else (error "Illegal substitution item." item @@ -54,11 +57,15 @@ (define (regexp-substitute/global port re str . items) (let ((str-len (string-length str)) - (range (lambda (start sv ev item) ; Return start & end of + (range (lambda (start submatches item) ; Return start & end of (cond ((integer? item) ; ITEM's range in STR. - (values (vector-ref sv item) - (vector-ref ev item))) - ((eq? 'pre item) (values start (vector-ref sv 0))) + (let ((submatch (vector-ref submatches item))) + (values (match-start submatch) + (match-end submatch)))) + ((eq? 'pre item) + (values start + (match-start + (vector-ref submatches 0)))) (else (error "Illegal substitution item." item regexp-substitute/global))))) @@ -73,10 +80,9 @@ (if (<= start str-len) (let ((match (regexp-search re str start))) (if match - (let* ((sv (regexp-match:start match)) - (ev (regexp-match:end match)) - (s (vector-ref sv 0)) - (e (vector-ref ev 0)) + (let* ((submatches (regexp-match:submatches match)) + (s (match-start (vector-ref submatches 0))) + (e (match-end (vector-ref submatches 0))) (empty? (= s e))) (for-each (lambda (item) (cond ((string? item) (write-string item port)) @@ -91,7 +97,7 @@ (recur (if empty? (+ 1 e) e))) (else (receive (si ei) - (range start sv ev item) + (range start submatches item) (write-string str port si ei))))) items)) @@ -99,14 +105,15 @@ ;; Either we're making a string, or >1 POST. (let* ((pieces (let recur ((start 0)) - (if (> start str-len) '() + (if (> start str-len) + '() (let ((match (regexp-search re str start)) (cached-post #f)) (if match - (let* ((sv (regexp-match:start match)) - (ev (regexp-match:end match)) - (s (vector-ref sv 0)) - (e (vector-ref ev 0)) + (let* ((submatches + (regexp-match:submatches match)) + (s (match-start (vector-ref submatches 0))) + (e (match-end (vector-ref submatches 0))) (empty? (= s e))) (fold (lambda (item pieces) (cond ((string? item) @@ -128,7 +135,7 @@ (append cached-post pieces)) (else (receive (si ei) - (range start sv ev item) + (range start submatches item) (cons (substring str si ei) pieces))))) '() items))