scsh-0.6/scsh/rx/re-low.scm

170 lines
5.6 KiB
Scheme

;;; Regular expression matching for scsh
;;; Copyright (c) 1994 by Olin Shivers.
(foreign-init-name "re_low")
(foreign-source
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
"#include <sys/types.h>"
"#include \"../regexp/regex.h\""
"#include \"re1.h\""
"" ""
)
;;; Match data for regexp matches.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record regexp-match
string ; The string against which we matched
start ; vector of starting indices
end) ; vector of ending indices
(define (match:start match . maybe-index)
(vector-ref (regexp-match:start match)
(:optional maybe-index 0)))
(define (match:end match . maybe-index)
(vector-ref (regexp-match:end match)
(:optional maybe-index 0)))
(define (match:substring match . maybe-index)
(let* ((i (:optional maybe-index 0))
(start (vector-ref (regexp-match:start match) i)))
(and start (substring (regexp-match:string match)
start
(vector-ref (regexp-match:end match) i)))))
;;; Compiling regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; There's no legal Posix string expressing the empty match (e.g., (|))
;;; that will never match anything. So when we have one of these, we set
;;; the STRING field to #f. The matchers will spot this case and handle it
;;; specially.
;;; We compile the string two ways, on demand -- one for cre-search, and
;;; one for cre-search?.
;(define-record cre ; A compiled regular expression
; string ; The Posix string form of the regexp or #F.
; max-paren ; Max paren in STRING needed for submatches.
; (bytes #f) ; Pointer to the compiled form, in the C heap, or #F.
; (bytes/nm #f) ; Same as BYTES, but compiled with no-submatch.
; tvec ; Translation vector for the submatches
; ((disclose self) (list "cre" (cre:string self))))
(define-record-type cre :cre
(really-make-cre string max-paren bytes bytes/nm tvec debug)
cre?
(string cre:string set-cre:string)
(max-paren cre:max-paren set-cre:max-paren)
(bytes cre:bytes set-cre:bytes)
(bytes/nm cre:bytes/nm set-cre:bytes/nm)
(tvec cre:tvec set-cre:tvec)
(debug cre:debug set-cre:debug))
(define-record-discloser :cre
(lambda (self) (list "cre" (cre:string self))))
(define (make-cre str max-paren tvec)
(really-make-cre str max-paren #f #f tvec #f))
(define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec))
(define (max-live-posix-submatch tvec)
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
(define (compile-posix-re->c-struct re-string sm?)
(receive (errcode c-struct) (%compile-re re-string sm?)
(if (zero? errcode) c-struct
(error errcode (%regerror-msg errcode c-struct)
compile-posix-re->c-struct re-string sm?))))
(define-foreign %compile-re (compile_re (string-desc pattern) (bool submatches?))
integer ; 0 or error code
(C regex_t*))
;;; Searching with compiled regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; cre-search returns match info; cre-search? is just a predicate.
(define (cre-search cre start-vec end-vec str start)
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
(and re-str
(let* ((C-bytes (or (cre:bytes cre)
(let ((C-bytes (compile-posix-re->c-struct re-str #t)))
(set-cre:bytes cre C-bytes)
(register-re-c-struct:bytes cre)
C-bytes)))
(retcode (%cre-search C-bytes str start
(cre:tvec cre)
(cre:max-paren cre)
start-vec end-vec)))
(if (integer? retcode)
(error retcode (%regerror-msg retcode C-bytes)
cre-search cre start-vec end-vec str start)
(and retcode (make-regexp-match str start-vec end-vec)))))))
(define (cre-search? cre str start)
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
(and re-str
(let* ((C-bytes (or (cre:bytes/nm cre)
(let ((C-bytes (compile-posix-re->c-struct re-str #f)))
(set-cre:bytes/nm cre C-bytes)
(register-re-c-struct:bytes/nm cre)
C-bytes)))
(retcode (%cre-search C-bytes str start '#() -1 '#() '#())))
(if (integer? retcode)
(error retcode (%regerror-msg retcode C-bytes)
cre-search? cre str start)
retcode)))))
(define-foreign %cre-search
(re_search ((C "const regex_t *~a") compiled-regexp)
(string-desc str)
(integer start)
(vector-desc tvec) (integer max-psm)
(vector-desc svec) (vector-desc evec))
desc) ; 0 success, #f no-match, or non-zero int error code.
;;; Generate an error msg from an error code.
(define-foreign %regerror-msg (re_errint2str (integer errcode)
((C "const regex_t *~a") re))
string)
;;; Reclaiming compiled regexp storage
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-foreign %free-re (free_re ((C regex_t*) re)) ignore)
;;; Whenever we make a new CRE, add the appropriate finalizer,
;;; so the C regex_t structure can be freeed
(define (free-bytes the-cre)
(if (cre:bytes the-cre)
(%free-re (cre:bytes the-cre))
(warn "free-bytes called on #f")))
(define (free-bytes/nm the-cre)
(if (cre:bytes the-cre)
(%free-re (cre:bytes/nm the-cre))
(warn "free-bytes/nm called on #f")))
(define (register-re-c-struct:bytes cre)
(add-finalizer! cre free-bytes))
(define (register-re-c-struct:bytes/nm cre)
(add-finalizer! cre free-bytes/nm))
(define (register-re-c-struct cre c-bytes)
(error "function register-re-c-struct no longer supported"))
(define (clean-up-cres)
(warn "function clean-up-cres no longer supported"))