1999-09-23 10:27:41 -04:00
|
|
|
;;; Regular expression matching for scsh
|
|
|
|
;;; Copyright (c) 1994 by Olin Shivers.
|
|
|
|
|
1999-09-23 13:46:46 -04:00
|
|
|
(foreign-init-name "re_low")
|
|
|
|
|
1999-09-23 10:27:41 -04:00
|
|
|
(foreign-source
|
|
|
|
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
|
|
|
|
"#include <sys/types.h>"
|
|
|
|
"#include \"../regexp/regex.h\""
|
|
|
|
"#include \"re1.h\""
|
|
|
|
"" ""
|
|
|
|
)
|
|
|
|
|
|
|
|
;;; Match data for regexp matches.
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
(define-record regexp-match
|
|
|
|
string ; The string against which we matched
|
|
|
|
start ; vector of starting indices
|
|
|
|
end) ; vector of ending indices
|
|
|
|
|
|
|
|
(define (match:start match . maybe-index)
|
|
|
|
(vector-ref (regexp-match:start match)
|
|
|
|
(:optional maybe-index 0)))
|
|
|
|
|
|
|
|
(define (match:end match . maybe-index)
|
|
|
|
(vector-ref (regexp-match:end match)
|
|
|
|
(:optional maybe-index 0)))
|
|
|
|
|
|
|
|
(define (match:substring match . maybe-index)
|
|
|
|
(let* ((i (:optional maybe-index 0))
|
|
|
|
(start (vector-ref (regexp-match:start match) i)))
|
|
|
|
(and start (substring (regexp-match:string match)
|
|
|
|
start
|
|
|
|
(vector-ref (regexp-match:end match) i)))))
|
|
|
|
|
|
|
|
;;; Compiling regexps
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; There's no legal Posix string expressing the empty match (e.g., (|))
|
|
|
|
;;; that will never match anything. So when we have one of these, we set
|
|
|
|
;;; the STRING field to #f. The matchers will spot this case and handle it
|
|
|
|
;;; specially.
|
|
|
|
|
|
|
|
;;; We compile the string two ways, on demand -- one for cre-search, and
|
|
|
|
;;; one for cre-search?.
|
|
|
|
|
1999-10-08 09:16:35 -04:00
|
|
|
;(define-record cre ; A compiled regular expression
|
|
|
|
; string ; The Posix string form of the regexp or #F.
|
|
|
|
; max-paren ; Max paren in STRING needed for submatches.
|
|
|
|
; (bytes #f) ; Pointer to the compiled form, in the C heap, or #F.
|
|
|
|
; (bytes/nm #f) ; Same as BYTES, but compiled with no-submatch.
|
|
|
|
; tvec ; Translation vector for the submatches
|
|
|
|
; ((disclose self) (list "cre" (cre:string self))))
|
|
|
|
|
|
|
|
(define-record-type cre :cre
|
|
|
|
(really-make-cre string max-paren bytes bytes/nm tvec debug)
|
|
|
|
cre?
|
|
|
|
(string cre:string set-cre:string)
|
|
|
|
(max-paren cre:max-paren set-cre:max-paren)
|
|
|
|
(bytes cre:bytes set-cre:bytes)
|
|
|
|
(bytes/nm cre:bytes/nm set-cre:bytes/nm)
|
|
|
|
(tvec cre:tvec set-cre:tvec)
|
|
|
|
(debug cre:debug set-cre:debug))
|
|
|
|
|
|
|
|
(define-record-discloser :cre
|
|
|
|
(lambda (self) (list "cre" (cre:string self))))
|
|
|
|
|
|
|
|
|
|
|
|
(define (make-cre str max-paren tvec)
|
|
|
|
(really-make-cre str max-paren #f #f tvec #f))
|
|
|
|
|
1999-09-23 10:27:41 -04:00
|
|
|
|
|
|
|
(define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec))
|
|
|
|
|
|
|
|
(define (max-live-posix-submatch tvec)
|
|
|
|
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
|
|
|
|
|
|
|
|
(define (compile-posix-re->c-struct re-string sm?)
|
|
|
|
(receive (errcode c-struct) (%compile-re re-string sm?)
|
|
|
|
(if (zero? errcode) c-struct
|
|
|
|
(error errcode (%regerror-msg errcode c-struct)
|
|
|
|
compile-posix-re->c-struct re-string sm?))))
|
|
|
|
|
|
|
|
(define-foreign %compile-re (compile_re (string-desc pattern) (bool submatches?))
|
|
|
|
integer ; 0 or error code
|
|
|
|
(C regex_t*))
|
|
|
|
|
|
|
|
|
|
|
|
;;; Searching with compiled regexps
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; cre-search returns match info; cre-search? is just a predicate.
|
|
|
|
|
|
|
|
(define (cre-search cre start-vec end-vec str start)
|
|
|
|
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
|
|
|
|
(and re-str
|
|
|
|
(let* ((C-bytes (or (cre:bytes cre)
|
|
|
|
(let ((C-bytes (compile-posix-re->c-struct re-str #t)))
|
|
|
|
(set-cre:bytes cre C-bytes)
|
1999-10-08 09:16:35 -04:00
|
|
|
(register-re-c-struct:bytes cre)
|
1999-09-23 10:27:41 -04:00
|
|
|
C-bytes)))
|
|
|
|
(retcode (%cre-search C-bytes str start
|
|
|
|
(cre:tvec cre)
|
|
|
|
(cre:max-paren cre)
|
|
|
|
start-vec end-vec)))
|
|
|
|
(if (integer? retcode)
|
|
|
|
(error retcode (%regerror-msg retcode C-bytes)
|
|
|
|
cre-search cre start-vec end-vec str start)
|
|
|
|
(and retcode (make-regexp-match str start-vec end-vec)))))))
|
|
|
|
|
|
|
|
(define (cre-search? cre str start)
|
|
|
|
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
|
|
|
|
(and re-str
|
|
|
|
(let* ((C-bytes (or (cre:bytes/nm cre)
|
|
|
|
(let ((C-bytes (compile-posix-re->c-struct re-str #f)))
|
|
|
|
(set-cre:bytes/nm cre C-bytes)
|
1999-10-08 09:16:35 -04:00
|
|
|
(register-re-c-struct:bytes/nm cre)
|
1999-09-23 10:27:41 -04:00
|
|
|
C-bytes)))
|
|
|
|
(retcode (%cre-search C-bytes str start '#() -1 '#() '#())))
|
|
|
|
(if (integer? retcode)
|
|
|
|
(error retcode (%regerror-msg retcode C-bytes)
|
|
|
|
cre-search? cre str start)
|
|
|
|
retcode)))))
|
|
|
|
|
|
|
|
(define-foreign %cre-search
|
|
|
|
(re_search ((C "const regex_t *~a") compiled-regexp)
|
|
|
|
(string-desc str)
|
|
|
|
(integer start)
|
|
|
|
(vector-desc tvec) (integer max-psm)
|
|
|
|
(vector-desc svec) (vector-desc evec))
|
|
|
|
desc) ; 0 success, #f no-match, or non-zero int error code.
|
|
|
|
|
|
|
|
|
|
|
|
;;; Generate an error msg from an error code.
|
|
|
|
|
|
|
|
(define-foreign %regerror-msg (re_errint2str (integer errcode)
|
|
|
|
((C "const regex_t *~a") re))
|
|
|
|
string)
|
|
|
|
|
|
|
|
|
|
|
|
;;; Reclaiming compiled regexp storage
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define-foreign %free-re (free_re ((C regex_t*) re)) ignore)
|
|
|
|
|
|
|
|
|
1999-10-08 09:16:35 -04:00
|
|
|
;;; Whenever we make a new CRE, add the appropriate finalizer,
|
|
|
|
;;; so the C regex_t structure can be freeed
|
|
|
|
|
|
|
|
(define (free-bytes the-cre)
|
|
|
|
(if (cre:bytes the-cre)
|
|
|
|
(%free-re (cre:bytes the-cre))
|
2000-10-19 04:22:37 -04:00
|
|
|
(warn "free-bytes called on #f")))
|
1999-10-08 09:16:35 -04:00
|
|
|
|
|
|
|
(define (free-bytes/nm the-cre)
|
|
|
|
(if (cre:bytes the-cre)
|
|
|
|
(%free-re (cre:bytes/nm the-cre))
|
2000-10-19 04:22:37 -04:00
|
|
|
(warn "free-bytes/nm called on #f")))
|
1999-10-08 09:16:35 -04:00
|
|
|
|
|
|
|
(define (register-re-c-struct:bytes cre)
|
|
|
|
(add-finalizer! cre free-bytes))
|
|
|
|
|
|
|
|
(define (register-re-c-struct:bytes/nm cre)
|
|
|
|
(add-finalizer! cre free-bytes/nm))
|
|
|
|
|
|
|
|
|
1999-09-23 10:27:41 -04:00
|
|
|
(define (register-re-c-struct cre c-bytes)
|
1999-10-08 09:16:35 -04:00
|
|
|
(error "function register-re-c-struct no longer supported"))
|
1999-09-23 10:27:41 -04:00
|
|
|
|
|
|
|
(define (clean-up-cres)
|
1999-10-08 09:16:35 -04:00
|
|
|
(warn "function clean-up-cres no longer supported"))
|