Use Scheme 48 regexp code instead of ours.
This commit is contained in:
parent
34c5cd67bd
commit
6213213e14
25
Makefile.in
25
Makefile.in
|
@ -124,7 +124,7 @@ SCSHOBJS = \
|
|||
scsh/machine/libansi.o \
|
||||
scsh/network1.o \
|
||||
scsh/putenv.o \
|
||||
scsh/rx/re1.o \
|
||||
scsh/rx/regexp1.o \
|
||||
scsh/select.o scsh/select1.o \
|
||||
scsh/sleep1.o \
|
||||
scsh/syscalls.o scsh/syscalls1.o \
|
||||
|
@ -132,11 +132,11 @@ SCSHOBJS = \
|
|||
scsh/time.o scsh/time1.o \
|
||||
scsh/tty.o scsh/tty1.o \
|
||||
scsh/userinfo1.o \
|
||||
scsh/sighandlers1.o \
|
||||
scsh/regexp/libregex.a
|
||||
scsh/sighandlers1.o
|
||||
|
||||
SCSH_INITIALIZERS = s48_init_syslog s48_init_userinfo s48_init_sighandlers \
|
||||
s48_init_re_low s48_init_syscalls2 s48_init_network s48_init_flock
|
||||
SCSH_INITIALIZERS = s48_init_syslog s48_init_posix_regexp \
|
||||
s48_init_userinfo s48_init_sighandlers \
|
||||
s48_init_syscalls2 s48_init_network s48_init_flock
|
||||
|
||||
UNIX_OBJS = c/unix/misc.o c/unix/io.o c/unix/fd-io.o c/unix/event.o
|
||||
|
||||
|
@ -233,10 +233,7 @@ scsh/syscalls1.o scsh/syscalls.o: scsh/syscalls1.h
|
|||
scsh/time1.o scsh/time.o: scsh/time1.h
|
||||
scsh/tty1.o scsh/tty.o: scsh/tty1.h
|
||||
|
||||
# Not really, but making regexp/libregex.a makes the regexp/regex.h file that
|
||||
# re1.c actually does need.
|
||||
# TODO: This is broken at the moment: regex.h is not made after checkout
|
||||
scsh/rx/re1.o: scsh/rx/re1.h scsh/regexp/libregex.a
|
||||
scsh/rx/regexp1.o: c/scheme48.h
|
||||
|
||||
scsh/syscalls.o: scsh/syscalls1.h scsh/dirstuff1.h scsh/fdports1.h \
|
||||
scsh/select1.h scsh/userinfo1.h
|
||||
|
@ -424,7 +421,7 @@ clean-scm2c:
|
|||
|
||||
distclean: clean
|
||||
rm -f Makefile config.log config.status c/sysdep.h config.cache \
|
||||
scsh/machine scsh/regexp/Makefile \
|
||||
scsh/machine \
|
||||
scsh/endian.scm scsh/static.scm \
|
||||
exportlist.aix
|
||||
$(RM) a.exe $(VM).base $(VM).def $(VM).exp
|
||||
|
@ -759,6 +756,8 @@ SCHEME =scsh/awk.scm \
|
|||
scsh/pty.scm \
|
||||
scsh/rdelim.scm \
|
||||
scsh/rw.scm \
|
||||
scsh/rx/packages.scm \
|
||||
scsh/rx/cond-package.scm \
|
||||
scsh/scsh-condition.scm \
|
||||
scsh/scsh-interfaces.scm \
|
||||
scsh/scsh-package.scm \
|
||||
|
@ -837,9 +836,6 @@ scsh/scsh.image: $(VM) $(SCHEME) $(CIG).image
|
|||
echo ",batch on") \
|
||||
| ./$(VM) -o ./$(VM) -i $(CIG).image -h 10000000
|
||||
|
||||
scsh/regexp/libregex.a:
|
||||
cd ./scsh/regexp; $(MAKE) lib
|
||||
|
||||
install-scsh: scsh install-scsh-image
|
||||
$(RM) $(bindir)/$(RUNNABLE)
|
||||
$(INSTALL_PROGRAM) $(srcdir)/scsh/scsh $(bindir)/$(RUNNABLE)
|
||||
|
@ -858,9 +854,8 @@ install-scsh-image:
|
|||
$(INSTALL_DATA) /tmp/scsh.image $(LIB)/scsh.image
|
||||
|
||||
clean-scsh:
|
||||
$(RM) scsh/*.o scsh/regexp/*.o scsh/rx/*.o scsh/machine/*.o
|
||||
$(RM) scsh/*.o scsh/rx/*.o scsh/machine/*.o
|
||||
$(RM) scsh/*.image
|
||||
$(RM) $(LIBSCSHVM) $(LIBSCSH) scsh/scsh$(EXEEXT)
|
||||
-cd scsh/regexp; $(MAKE) clean
|
||||
|
||||
|
||||
|
|
|
@ -396,6 +396,6 @@ fail
|
|||
AC_SUBST(TMPDIR)
|
||||
|
||||
|
||||
AC_OUTPUT(Makefile scsh/regexp/Makefile scsh/endian.scm scsh/static.scm)
|
||||
AC_OUTPUT(Makefile scsh/endian.scm scsh/static.scm)
|
||||
chmod +x scsh/static.scm
|
||||
|
||||
|
|
|
@ -3,6 +3,47 @@
|
|||
;;; scsh-level-0, and export from there.
|
||||
;;; -Olin <shivers@ai.mit.edu> 8/98
|
||||
|
||||
;; From Scheme 48, only here temporarily
|
||||
|
||||
(define-structure external-util (export immutable-copy-string)
|
||||
(open scheme
|
||||
primitives ;copy-bytes!
|
||||
features) ;immutable? make-immutable!
|
||||
(begin
|
||||
(define (immutable-copy-string string)
|
||||
(if (immutable? string)
|
||||
string
|
||||
(let ((copy (copy-string string)))
|
||||
(make-immutable! copy)
|
||||
copy)))
|
||||
|
||||
; Why isn't this available elsewhere?
|
||||
|
||||
(define (copy-string string)
|
||||
(let* ((length (string-length string))
|
||||
(new (make-string length #\?)))
|
||||
(copy-bytes! string 0 new 0 length)
|
||||
new))))
|
||||
|
||||
(define-interface posix-regexps-interface
|
||||
(export make-regexp
|
||||
(regexp-option :syntax)
|
||||
regexp?
|
||||
regexp-match
|
||||
|
||||
match?
|
||||
match-start
|
||||
match-end
|
||||
match-submatches
|
||||
))
|
||||
|
||||
(define-structures ((posix-regexps posix-regexps-interface)
|
||||
(posix-regexps-internal (export make-match)))
|
||||
(open scheme define-record-types finite-types external-calls
|
||||
signals
|
||||
external-util)
|
||||
(files regexp))
|
||||
|
||||
(define-interface basic-re-interface
|
||||
(export (re-dsm? (proc (:value) :boolean))
|
||||
(make-re-dsm (proc (:value :exact-integer :exact-integer) :value))
|
||||
|
@ -128,8 +169,7 @@
|
|||
|
||||
(define re-match-internals-interface
|
||||
(export (regexp-match:string (proc (:value) :string))
|
||||
(regexp-match:start (proc (:value) :vector))
|
||||
(regexp-match:end (proc (:value) :vector))))
|
||||
(regexp-match:submatches (proc (:value) :vector))))
|
||||
|
||||
|
||||
(define-interface posix-re-interface
|
||||
|
@ -167,7 +207,6 @@
|
|||
(match:start (proc (:value &opt :exact-integer) :value))
|
||||
(match:end (proc (:value &opt :exact-integer) :value))
|
||||
(match:substring (proc (:value &opt :exact-integer) :value))
|
||||
(clean-up-cres (proc () :unspecific))
|
||||
(regexp-search (proc (:value :string &opt :exact-integer)
|
||||
:value))
|
||||
(regexp-search? (proc (:value :string &opt :exact-integer)
|
||||
|
@ -205,6 +244,7 @@
|
|||
define-record-types ; JMG debugging
|
||||
external-calls
|
||||
string-lib ; string-fold
|
||||
posix-regexps
|
||||
scheme)
|
||||
|
||||
(files re-low re simp re-high
|
||||
|
@ -290,6 +330,7 @@
|
|||
(define-structure re-subst re-subst-interface
|
||||
(open re-level-0
|
||||
re-match-internals
|
||||
posix-regexps
|
||||
scsh-utilities ; fold & some string utilities that need to be moved.
|
||||
scsh-level-0 ; write-string
|
||||
string-lib ; string-copy!
|
||||
|
@ -332,8 +373,8 @@
|
|||
;;; re-high compile-regexp regexp-search regexp-search?
|
||||
;;; re-subst regexp-substitute regexp-substitute/global
|
||||
;;; re-low match:start match:end match:substring
|
||||
;;; CRE record, new-cre, compile-posix-re->c-struct
|
||||
;;; cre-search cre-search? clean-up-cres
|
||||
;;; CRE record, new-cre
|
||||
;;; cre-search cre-search?
|
||||
;;; re-syntax sre-form? if-sre-form expand-rx
|
||||
;;; re.scm The ADT. flush-submatches uncase uncase-char-set
|
||||
;;; char-set-full? char-set-empty?
|
||||
|
|
|
@ -1,38 +1,29 @@
|
|||
;;; Regular expression matching for scsh
|
||||
;;; Copyright (c) 1994 by Olin Shivers.
|
||||
|
||||
(foreign-init-name "re_low")
|
||||
|
||||
(foreign-source
|
||||
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
|
||||
"#include <sys/types.h>"
|
||||
"#include \"../regexp/regex.h\""
|
||||
"#include \"re1.h\""
|
||||
"" ""
|
||||
)
|
||||
|
||||
;;; Match data for regexp matches.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define-record regexp-match
|
||||
string ; The string against which we matched
|
||||
start ; vector of starting indices
|
||||
end) ; vector of ending indices
|
||||
string
|
||||
submatches)
|
||||
|
||||
(define (match:start match . maybe-index)
|
||||
(vector-ref (regexp-match:start match)
|
||||
(:optional maybe-index 0)))
|
||||
(match-start
|
||||
(vector-ref (regexp-match:submatches match)
|
||||
(:optional maybe-index 0))))
|
||||
|
||||
(define (match:end match . maybe-index)
|
||||
(vector-ref (regexp-match:end match)
|
||||
(:optional maybe-index 0)))
|
||||
(match-start
|
||||
(vector-ref (regexp-match:submatches match)
|
||||
(:optional maybe-index 0))))
|
||||
|
||||
(define (match:substring match . maybe-index)
|
||||
(let* ((i (:optional maybe-index 0))
|
||||
(start (vector-ref (regexp-match:start match) i)))
|
||||
(and start (substring (regexp-match:string match)
|
||||
start
|
||||
(vector-ref (regexp-match:end match) i)))))
|
||||
(submatch (vector-ref (regexp-match:submatches match) i)))
|
||||
(and submatch (substring (regexp-match:string match)
|
||||
(match-start submatch)
|
||||
(match-end submatch)))))
|
||||
|
||||
;;; Compiling regexps
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
@ -47,121 +38,59 @@
|
|||
;(define-record cre ; A compiled regular expression
|
||||
; string ; The Posix string form of the regexp or #F.
|
||||
; max-paren ; Max paren in STRING needed for submatches.
|
||||
; (bytes #f) ; Pointer to the compiled form, in the C heap, or #F.
|
||||
; (bytes/nm #f) ; Same as BYTES, but compiled with no-submatch.
|
||||
; (regexp #f) ; Compiled form or #F.
|
||||
; (regexp/nm #f) ; Same as REGEXP, but compiled with no-submatch.
|
||||
; tvec ; Translation vector for the submatches
|
||||
; ((disclose self) (list "cre" (cre:string self))))
|
||||
|
||||
(define-record-type cre :cre
|
||||
(really-make-cre string max-paren bytes bytes/nm tvec debug)
|
||||
(really-make-cre string max-paren regexp regexp/nm tvec debug)
|
||||
cre?
|
||||
(string cre:string set-cre:string)
|
||||
(max-paren cre:max-paren set-cre:max-paren)
|
||||
(bytes cre:bytes set-cre:bytes)
|
||||
(bytes/nm cre:bytes/nm set-cre:bytes/nm)
|
||||
(regexp cre:regexp set-cre:regexp)
|
||||
(regexp/nm cre:regexp/nm set-cre:regexp/nm)
|
||||
(tvec cre:tvec set-cre:tvec)
|
||||
(debug cre:debug set-cre:debug))
|
||||
|
||||
(define-record-discloser :cre
|
||||
(lambda (self) (list "cre" (cre:string self))))
|
||||
|
||||
(define-record-resumer :cre (lambda (cre)
|
||||
(set-cre:bytes cre #f)
|
||||
(set-cre:bytes/nm cre #f)))
|
||||
|
||||
(define (make-cre str max-paren tvec)
|
||||
(really-make-cre str max-paren #f #f tvec #f))
|
||||
|
||||
|
||||
(define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec))
|
||||
(define (new-cre str tvec)
|
||||
(make-cre str (max-live-posix-submatch tvec) tvec))
|
||||
|
||||
(define (max-live-posix-submatch tvec)
|
||||
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
|
||||
|
||||
(define (compile-posix-re->c-struct re-string sm?)
|
||||
(let ((maybe-struct (%compile-re re-string sm?)))
|
||||
(if (pair? maybe-struct)
|
||||
(error (car maybe-struct)
|
||||
(%regerror-msg (car maybe-struct) (cdr maybe-struct))
|
||||
compile-posix-re->c-struct re-string sm?)
|
||||
maybe-struct)))
|
||||
|
||||
;;; returns pointer as number or a pair of error number and 0
|
||||
(define-stubless-foreign %compile-re (pattern submatches?) "compile_re")
|
||||
|
||||
;;; Searching with compiled regexps
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; cre-search returns match info; cre-search? is just a predicate.
|
||||
|
||||
;; ### we do not look at START yet
|
||||
(define (cre-search cre start-vec end-vec str start)
|
||||
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
|
||||
(and re-str
|
||||
(let* ((C-bytes (or (cre:bytes cre)
|
||||
(let ((C-bytes (compile-posix-re->c-struct re-str #t)))
|
||||
(set-cre:bytes cre C-bytes)
|
||||
(register-re-c-struct:bytes cre)
|
||||
C-bytes)))
|
||||
(retcode (%cre-search C-bytes str start
|
||||
(cre:tvec cre)
|
||||
(cre:max-paren cre)
|
||||
start-vec end-vec)))
|
||||
(if (integer? retcode)
|
||||
(error retcode (%regerror-msg retcode C-bytes)
|
||||
cre-search cre start-vec end-vec str start)
|
||||
(and retcode (make-regexp-match str start-vec end-vec)))))))
|
||||
(if (not re-str)
|
||||
#f
|
||||
(begin
|
||||
(if (not (cre:regexp cre))
|
||||
(set-cre:regexp cre (make-regexp re-str
|
||||
(regexp-option extended)
|
||||
(regexp-option submatches))))
|
||||
(let ((ret (regexp-match (cre:regexp cre) str #t #f #f start)))
|
||||
(if (not ret)
|
||||
#f
|
||||
(make-regexp-match str
|
||||
(list->vector ret))))))))
|
||||
|
||||
(define (cre-search? cre str start)
|
||||
(define (cre-search? cre start-vec end-vec str start)
|
||||
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
|
||||
(and re-str
|
||||
(let* ((C-bytes (or (cre:bytes/nm cre)
|
||||
(let ((C-bytes (compile-posix-re->c-struct re-str #f)))
|
||||
(set-cre:bytes/nm cre C-bytes)
|
||||
(register-re-c-struct:bytes/nm cre)
|
||||
C-bytes)))
|
||||
(retcode (%cre-search C-bytes str start '#() -1 '#() '#())))
|
||||
(if (integer? retcode)
|
||||
(error retcode (%regerror-msg retcode C-bytes)
|
||||
cre-search? cre str start)
|
||||
retcode)))))
|
||||
|
||||
; 0 success, #f no-match, or non-zero int error code:
|
||||
(define-stubless-foreign %cre-search
|
||||
(compiled-regexp str start tvec max-psm svec evec) "re_search")
|
||||
|
||||
|
||||
|
||||
;;; Generate an error msg from an error code.
|
||||
|
||||
(define-stubless-foreign %regerror-msg (errcode re) "re_errint2str")
|
||||
|
||||
|
||||
;;; Reclaiming compiled regexp storage
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(define-stubless-foreign %free-re (re) "free_re")
|
||||
|
||||
|
||||
;;; Whenever we make a new CRE, add the appropriate finalizer,
|
||||
;;; so the C regex_t structure can be freeed
|
||||
|
||||
(define (free-bytes the-cre)
|
||||
(if (cre:bytes the-cre)
|
||||
(%free-re (cre:bytes the-cre))
|
||||
(warn "free-bytes called on #f")))
|
||||
|
||||
(define (free-bytes/nm the-cre)
|
||||
(if (cre:bytes/nm the-cre)
|
||||
(%free-re (cre:bytes/nm the-cre))
|
||||
(warn "free-bytes/nm called on #f")))
|
||||
|
||||
(define (register-re-c-struct:bytes cre)
|
||||
(add-finalizer! cre free-bytes))
|
||||
|
||||
(define (register-re-c-struct:bytes/nm cre)
|
||||
(add-finalizer! cre free-bytes/nm))
|
||||
|
||||
|
||||
(define (register-re-c-struct cre c-bytes)
|
||||
(error "function register-re-c-struct no longer supported"))
|
||||
|
||||
(define (clean-up-cres)
|
||||
(warn "function clean-up-cres no longer supported"))
|
||||
(if (not re-str)
|
||||
#f
|
||||
(begin
|
||||
(if (not (cre:regexp/nm cre))
|
||||
(set-cre:regexp/nm cre (make-regexp re-str
|
||||
(regexp-option extended))))
|
||||
(regexp-match (cre:regexp/nm cre) str #f #f #f)))))
|
||||
|
|
|
@ -11,14 +11,17 @@
|
|||
|
||||
(define (regexp-substitute port match . items)
|
||||
(let* ((str (regexp-match:string match))
|
||||
(sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match))
|
||||
(submatches (regexp-match:submatches match))
|
||||
(range (lambda (item) ; Return start & end of
|
||||
(cond ((integer? item) ; ITEM's range in STR.
|
||||
(values (vector-ref sv item)
|
||||
(vector-ref ev item)))
|
||||
((eq? 'pre item) (values 0 (vector-ref sv 0)))
|
||||
((eq? 'post item) (values (vector-ref ev 0)
|
||||
(let ((submatch (vector-ref submatches item)))
|
||||
(values (match-start submatch)
|
||||
(match-end submatch))))
|
||||
((eq? 'pre item) (values 0
|
||||
(match-start
|
||||
(vector-ref submatches 0))))
|
||||
((eq? 'post item) (values (match-end
|
||||
(vector-ref submatches 0))
|
||||
(string-length str)))
|
||||
(else (error "Illegal substitution item."
|
||||
item
|
||||
|
@ -54,11 +57,15 @@
|
|||
|
||||
(define (regexp-substitute/global port re str . items)
|
||||
(let ((str-len (string-length str))
|
||||
(range (lambda (start sv ev item) ; Return start & end of
|
||||
(range (lambda (start submatches item) ; Return start & end of
|
||||
(cond ((integer? item) ; ITEM's range in STR.
|
||||
(values (vector-ref sv item)
|
||||
(vector-ref ev item)))
|
||||
((eq? 'pre item) (values start (vector-ref sv 0)))
|
||||
(let ((submatch (vector-ref submatches item)))
|
||||
(values (match-start submatch)
|
||||
(match-end submatch))))
|
||||
((eq? 'pre item)
|
||||
(values start
|
||||
(match-start
|
||||
(vector-ref submatches 0))))
|
||||
(else (error "Illegal substitution item."
|
||||
item
|
||||
regexp-substitute/global)))))
|
||||
|
@ -73,10 +80,9 @@
|
|||
(if (<= start str-len)
|
||||
(let ((match (regexp-search re str start)))
|
||||
(if match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match))
|
||||
(s (vector-ref sv 0))
|
||||
(e (vector-ref ev 0))
|
||||
(let* ((submatches (regexp-match:submatches match))
|
||||
(s (match-start (vector-ref submatches 0)))
|
||||
(e (match-end (vector-ref submatches 0)))
|
||||
(empty? (= s e)))
|
||||
(for-each (lambda (item)
|
||||
(cond ((string? item) (write-string item port))
|
||||
|
@ -91,7 +97,7 @@
|
|||
(recur (if empty? (+ 1 e) e)))
|
||||
|
||||
(else (receive (si ei)
|
||||
(range start sv ev item)
|
||||
(range start submatches item)
|
||||
(write-string str port si ei)))))
|
||||
items))
|
||||
|
||||
|
@ -99,14 +105,15 @@
|
|||
|
||||
;; Either we're making a string, or >1 POST.
|
||||
(let* ((pieces (let recur ((start 0))
|
||||
(if (> start str-len) '()
|
||||
(if (> start str-len)
|
||||
'()
|
||||
(let ((match (regexp-search re str start))
|
||||
(cached-post #f))
|
||||
(if match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match))
|
||||
(s (vector-ref sv 0))
|
||||
(e (vector-ref ev 0))
|
||||
(let* ((submatches
|
||||
(regexp-match:submatches match))
|
||||
(s (match-start (vector-ref submatches 0)))
|
||||
(e (match-end (vector-ref submatches 0)))
|
||||
(empty? (= s e)))
|
||||
(fold (lambda (item pieces)
|
||||
(cond ((string? item)
|
||||
|
@ -128,7 +135,7 @@
|
|||
(append cached-post pieces))
|
||||
|
||||
(else (receive (si ei)
|
||||
(range start sv ev item)
|
||||
(range start submatches item)
|
||||
(cons (substring str si ei)
|
||||
pieces)))))
|
||||
'() items))
|
||||
|
|
Loading…
Reference in New Issue