;;; Regular expression matching for scsh ;;; Copyright (c) 1994 by Olin Shivers. (foreign-init-name "re_low") (foreign-source "/* Make sure foreign-function stubs interface to the C funs correctly: */" "#include " "#include \"../regexp/regex.h\"" "#include \"re1.h\"" "" "" ) ;;; Match data for regexp matches. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define-record regexp-match string ; The string against which we matched start ; vector of starting indices end) ; vector of ending indices (define (match:start match . maybe-index) (vector-ref (regexp-match:start match) (:optional maybe-index 0))) (define (match:end match . maybe-index) (vector-ref (regexp-match:end match) (:optional maybe-index 0))) (define (match:substring match . maybe-index) (let* ((i (:optional maybe-index 0)) (start (vector-ref (regexp-match:start match) i))) (and start (substring (regexp-match:string match) start (vector-ref (regexp-match:end match) i))))) ;;; Compiling regexps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; There's no legal Posix string expressing the empty match (e.g., (|)) ;;; that will never match anything. So when we have one of these, we set ;;; the STRING field to #f. The matchers will spot this case and handle it ;;; specially. ;;; We compile the string two ways, on demand -- one for cre-search, and ;;; one for cre-search?. ;(define-record cre ; A compiled regular expression ; string ; The Posix string form of the regexp or #F. ; max-paren ; Max paren in STRING needed for submatches. ; (bytes #f) ; Pointer to the compiled form, in the C heap, or #F. ; (bytes/nm #f) ; Same as BYTES, but compiled with no-submatch. ; tvec ; Translation vector for the submatches ; ((disclose self) (list "cre" (cre:string self)))) (define-record-type cre :cre (really-make-cre string max-paren bytes bytes/nm tvec debug) cre? (string cre:string set-cre:string) (max-paren cre:max-paren set-cre:max-paren) (bytes cre:bytes set-cre:bytes) (bytes/nm cre:bytes/nm set-cre:bytes/nm) (tvec cre:tvec set-cre:tvec) (debug cre:debug set-cre:debug)) (define-record-discloser :cre (lambda (self) (list "cre" (cre:string self)))) (define-record-resumer :cre (lambda (cre) (set-cre:bytes cre #f) (set-cre:bytes/nm cre #f))) (define (make-cre str max-paren tvec) (really-make-cre str max-paren #f #f tvec #f)) (define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec)) (define (max-live-posix-submatch tvec) (vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec)) (define (compile-posix-re->c-struct re-string sm?) (let ((maybe-struct (%compile-re re-string sm?))) (if (pair? maybe-struct) (error (car maybe-struct) (%regerror-msg (car maybe-struct) (cdr maybe-struct)) compile-posix-re->c-struct re-string sm?) maybe-struct))) ;;; returns pointer as number or a pair of error number and 0 (define-stubless-foreign %compile-re (pattern submatches?) "compile_re") ;;; Searching with compiled regexps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; cre-search returns match info; cre-search? is just a predicate. (define (cre-search cre start-vec end-vec str start) (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. (and re-str (let* ((C-bytes (or (cre:bytes cre) (let ((C-bytes (compile-posix-re->c-struct re-str #t))) (set-cre:bytes cre C-bytes) (register-re-c-struct:bytes cre) C-bytes))) (retcode (%cre-search C-bytes str start (cre:tvec cre) (cre:max-paren cre) start-vec end-vec))) (if (integer? retcode) (error retcode (%regerror-msg retcode C-bytes) cre-search cre start-vec end-vec str start) (and retcode (make-regexp-match str start-vec end-vec))))))) (define (cre-search? cre str start) (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. (and re-str (let* ((C-bytes (or (cre:bytes/nm cre) (let ((C-bytes (compile-posix-re->c-struct re-str #f))) (set-cre:bytes/nm cre C-bytes) (register-re-c-struct:bytes/nm cre) C-bytes))) (retcode (%cre-search C-bytes str start '#() -1 '#() '#()))) (if (integer? retcode) (error retcode (%regerror-msg retcode C-bytes) cre-search? cre str start) retcode))))) ; 0 success, #f no-match, or non-zero int error code: (define-stubless-foreign %cre-search (compiled-regexp str start tvec max-psm svec evec) "re_search") ;;; Generate an error msg from an error code. (define-stubless-foreign %regerror-msg (errcode re) "re_errint2str") ;;; Reclaiming compiled regexp storage ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define-stubless-foreign %free-re (re) "free_re") ;;; Whenever we make a new CRE, add the appropriate finalizer, ;;; so the C regex_t structure can be freeed (define (free-bytes the-cre) (if (cre:bytes the-cre) (%free-re (cre:bytes the-cre)) (warn "free-bytes called on #f"))) (define (free-bytes/nm the-cre) (if (cre:bytes/nm the-cre) (%free-re (cre:bytes/nm the-cre)) (warn "free-bytes/nm called on #f"))) (define (register-re-c-struct:bytes cre) (add-finalizer! cre free-bytes)) (define (register-re-c-struct:bytes/nm cre) (add-finalizer! cre free-bytes/nm)) (define (register-re-c-struct cre c-bytes) (error "function register-re-c-struct no longer supported")) (define (clean-up-cres) (warn "function clean-up-cres no longer supported"))