175 lines
5.5 KiB
Scheme
175 lines
5.5 KiB
Scheme
;;; Regular expression matching for scsh
|
|
;;; Copyright (c) 1994 by Olin Shivers.
|
|
|
|
(foreign-source
|
|
"/* Make sure foreign-function stubs interface to the C funs correctly: */"
|
|
"#include \"re1.h\""
|
|
"" ""
|
|
)
|
|
|
|
;;; Match data for regexp matches.
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define-record regexp-match
|
|
string ; The string against which we matched.
|
|
start ; 10 elt vec
|
|
end) ; 10 elt vec
|
|
|
|
(define (match:start match . maybe-index)
|
|
(let ((i (:optional maybe-index 0)))
|
|
(or (vector-ref (regexp-match:start match) i)
|
|
(error match:start "No sub-match found." match i))))
|
|
|
|
(define (match:end match . maybe-index)
|
|
(let ((i (:optional maybe-index 0)))
|
|
(or (vector-ref (regexp-match:end match) i)
|
|
(error match:start "No sub-match found." match i))))
|
|
|
|
(define (match:substring match . maybe-index)
|
|
(let* ((i (:optional maybe-index 0))
|
|
(start (vector-ref (regexp-match:start match) i)))
|
|
(if start
|
|
(substring (regexp-match:string match)
|
|
start
|
|
(vector-ref (regexp-match:end match) i))
|
|
(error match:substring "No sub-match found." match i))))
|
|
|
|
|
|
;;; Compiling regexps
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define-record %regexp
|
|
string ; The string form of the regexp.
|
|
bytes ; The compiled representation, stuffed into a Scheme string.
|
|
((disclose self) (list "Regexp" (%regexp:string self))))
|
|
|
|
(define regexp? %regexp?)
|
|
|
|
|
|
(define (make-regexp pattern)
|
|
(receive (err len) (%regexp-compiled-length pattern)
|
|
(if err (error err make-regexp pattern)
|
|
(let ((buf (make-string len)))
|
|
(%regexp-compile pattern buf)
|
|
(make-%regexp pattern buf)))))
|
|
|
|
(define-foreign %regexp-compiled-length (re_byte_len (string pattern))
|
|
static-string ; Error msg or #f
|
|
integer) ; number of bytes needed to compile REGEXP.
|
|
|
|
(define-foreign %regexp-compile (re_compile (string pattern)
|
|
(string-desc bytes))
|
|
static-string) ; Error msg or #f
|
|
|
|
|
|
;;; Executing compiled regexps
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define (regexp-exec regexp str . maybe-start)
|
|
(let ((start (:optional maybe-start 0))
|
|
(start-vec (make-vector 10))
|
|
(end-vec (make-vector 10)))
|
|
(receive (err match?)
|
|
(%regexp-exec (%regexp:bytes regexp) str start start-vec end-vec)
|
|
(if err (error err regexp-exec regexp str start)
|
|
(and match?
|
|
(make-regexp-match str start-vec end-vec))))))
|
|
|
|
(define-foreign %regexp-exec (re_exec (string-desc compiled-regexp)
|
|
(string s)
|
|
(integer start)
|
|
(vector-desc start-vec)
|
|
(vector-desc end-vec))
|
|
static-string ; Error msg or #f
|
|
bool) ; Matched?
|
|
|
|
|
|
;;; Compile&match regexps in one go
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; I could do this with the separate compile and execute procedures,
|
|
;;; but I go straight to C just for fun.
|
|
|
|
(define (string-match pattern string . maybe-start)
|
|
(let ((start (:optional maybe-start 0))
|
|
(start-vec (make-vector 10))
|
|
(end-vec (make-vector 10)))
|
|
(receive (err match?) (%string-match pattern string start
|
|
start-vec end-vec)
|
|
(if err (error err string-match pattern string start)
|
|
(and match? (make-regexp-match string start-vec end-vec))))))
|
|
|
|
(define-foreign %string-match (re_match (string pattern)
|
|
(string s)
|
|
(integer start)
|
|
(vector-desc start-vec)
|
|
(vector-desc end-vec))
|
|
static-string ; Error string or #f if all is ok.
|
|
bool) ; match?
|
|
|
|
|
|
|
|
;;; Substitutions
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define-foreign %regexp-subst (re_subst (string-desc compiled-regexp)
|
|
(string match)
|
|
(string str)
|
|
(integer start)
|
|
(vector-desc start-vec)
|
|
(vector-desc end-vec)
|
|
(string-desc outbuf))
|
|
static-string ; Error msg or #f
|
|
integer)
|
|
|
|
(define-foreign %regexp-subst-len (re_subst_len (string-desc compiled-regexp)
|
|
(string match)
|
|
(string str)
|
|
(integer start)
|
|
(vector-desc start-vec)
|
|
(vector-desc end-vec))
|
|
static-string ; Error msg or #f
|
|
integer)
|
|
|
|
;;; What does this do?
|
|
|
|
(define (regexp-subst re match replacement)
|
|
(let ((cr (%regexp:bytes re))
|
|
(str (regexp-match:string match))
|
|
(start-vec (regexp-match:start match))
|
|
(end-vec (regexp-match:end match)))
|
|
(receive (err out-len) (%regexp-subst-len cr str replacement 0
|
|
start-vec end-vec)
|
|
(if err (error err regexp-subst str replacement) ; More data here
|
|
(let ((out-buf (make-string out-len)))
|
|
(receive (err out-len) (%regexp-subst cr str replacement 0
|
|
start-vec end-vec out-buf)
|
|
(if err (error err regexp-subst str replacement)
|
|
(substring out-buf 0 out-len))))))))
|
|
|
|
;;; Miscellaneous
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; I do this one in C, I'm not sure why:
|
|
;;; It is used by MATCH-FILES.
|
|
|
|
(define-foreign %filter-C-strings!
|
|
(filter_stringvec (string pattern) ((C "char const ** ~a") cvec))
|
|
static-string ; error message -- #f if no error.
|
|
integer) ; number of files that pass the filter.
|
|
|
|
|
|
;;; Convert a string into a regex pattern that matches that string exactly --
|
|
;;; in other words, quote the special chars with backslashes.
|
|
|
|
(define (regexp-quote string)
|
|
(let lp ((i (- (string-length string) 1))
|
|
(result '()))
|
|
(if (< i 0) (list->string result)
|
|
(lp (- i 1)
|
|
(let* ((c (string-ref string i))
|
|
(result (cons c result)))
|
|
(if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+))
|
|
(cons #\\ result)
|
|
result))))))
|