;;; Regular expression matching for scsh ;;; Copyright (c) 1994 by Olin Shivers. (foreign-source "/* Make sure foreign-function stubs interface to the C funs correctly: */" "#include \"re1.h\"" "" "" ) ;;; Match data for regexp matches. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define-record regexp-match string ; The string against which we matched. start ; 10 elt vec end) ; 10 elt vec (define (match:start match . maybe-index) (vector-ref (regexp-match:start match) (:optional maybe-index 0))) (define (match:end match . maybe-index) (vector-ref (regexp-match:end match) (:optional maybe-index 0))) (define (match:substring match . maybe-index) (let* ((i (:optional maybe-index 0)) (start (vector-ref (regexp-match:start match) i))) (if start (substring (regexp-match:string match) start (vector-ref (regexp-match:end match) i)) (error match:substring "No sub-match found." match i)))) ;;; Compiling regexps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define-record %regexp string ; The string form of the regexp. bytes ; The compiled representation, stuffed into a Scheme string. ((disclose self) (list "Regexp" (%regexp:string self)))) (define regexp? %regexp?) (define (make-regexp pattern) (receive (err len) (%regexp-compiled-length pattern) (if err (error err make-regexp pattern) (let ((buf (make-string len))) (%regexp-compile pattern buf) (make-%regexp pattern buf))))) (define-foreign %regexp-compiled-length (re_byte_len (string pattern)) static-string ; Error msg or #f integer) ; number of bytes needed to compile REGEXP. (define-foreign %regexp-compile (re_compile (string pattern) (string-desc bytes)) static-string) ; Error msg or #f ;;; Executing compiled regexps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (regexp-exec regexp str . maybe-start) (let ((start (:optional maybe-start 0)) (start-vec (make-vector 10)) (end-vec (make-vector 10))) (receive (err match?) (%regexp-exec (%regexp:bytes regexp) str start start-vec end-vec) (if err (error err regexp-exec regexp str start) (and match? (make-regexp-match str start-vec end-vec)))))) (define-foreign %regexp-exec (re_exec (string-desc compiled-regexp) (string s) (integer start) (vector-desc start-vec) (vector-desc end-vec)) static-string ; Error msg or #f bool) ; Matched? ;;; Compile&match regexps in one go ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; I could do this with the separate compile and execute procedures, ;;; but I go straight to C just for fun. (define (string-match pattern string . maybe-start) (let ((start (:optional maybe-start 0)) (start-vec (make-vector 10)) (end-vec (make-vector 10))) (receive (err match?) (%string-match pattern string start start-vec end-vec) (if err (error err string-match pattern string start) (and match? (make-regexp-match string start-vec end-vec)))))) (define-foreign %string-match (re_match (string pattern) (string s) (integer start) (vector-desc start-vec) (vector-desc end-vec)) static-string ; Error string or #f if all is ok. bool) ; match? ;;; Substitutions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (regexp-substitute port match . items) (let* ((str (regexp-match:string match)) (sv (regexp-match:start match)) (ev (regexp-match:end match)) (range (lambda (item) ; Return start & end of (cond ((integer? item) ; ITEM's range in STR. (values (vector-ref sv item) (vector-ref ev item))) ((eq? 'pre item) (values 0 (vector-ref sv 0))) ((eq? 'post item) (values (vector-ref ev 0) (string-length str))) (else (error "Illegal substitution item." item regexp-substitute)))))) (if port ;; Output port case. (for-each (lambda (item) (if (string? item) (write-string item port) (receive (si ei) (range item) (write-string str port si ei)))) items) ;; Here's the string case. Make two passes -- one to ;; compute the length of the target string, one to fill it in. (let* ((len (reduce (lambda (i item) (+ i (if (string? item) (string-length item) (receive (si ei) (range item) (- ei si))))) 0 items)) (ans (make-string len))) (reduce (lambda (index item) (cond ((string? item) (string-replace! ans index item) (+ index (string-length item))) (else (receive (si ei) (range item) (substring-replace! ans index str si ei) (+ index (- ei si)))))) 0 items) ans)))) ;;; Miscellaneous ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Convert a string into a regex pattern that matches that string exactly -- ;;; in other words, quote the special chars with backslashes. (define (regexp-quote string) (let lp ((i (- (string-length string) 1)) (result '())) (if (< i 0) (list->string result) (lp (- i 1) (let* ((c (string-ref string i)) (result (cons c result))) (if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+)) (cons #\\ result) result)))))) ;;; Count the number of possible sub-matches in a regexp ;;; (i.e., the number of left parens). (define (regexp-num-submatches s) (let* ((len (string-length s)) (len-1 (- len 1))) (let lp ((i 0) (nsm 0)) (if (= i len) nsm (case (string-ref s i) ((#\\) (if (< i len-1) (lp (+ i 2) nsm) nsm)) ((#\() (lp (+ i 1) (+ nsm 1))) (else (lp (+ i 1) nsm)))))))