;;; Regular expression matching for scsh ;;; Copyright (c) 1994 by Olin Shivers. ;;; Match data for regexp matches. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define-record regexp-match string submatches) (define (match:start match . maybe-index) (match-start (vector-ref (regexp-match:submatches match) (:optional maybe-index 0)))) (define (match:end match . maybe-index) (match-end (vector-ref (regexp-match:submatches match) (:optional maybe-index 0)))) (define (match:substring match . maybe-index) (let* ((i (:optional maybe-index 0)) (submatch (vector-ref (regexp-match:submatches match) i))) (and submatch (substring (regexp-match:string match) (match-start submatch) (match-end submatch))))) ;;; Compiling regexps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; There's no legal Posix string expressing the empty match (e.g., (|)) ;;; that will never match anything. So when we have one of these, we set ;;; the STRING field to #f. The matchers will spot this case and handle it ;;; specially. ;;; We compile the string two ways, on demand -- one for cre-search, and ;;; one for cre-search?. ;(define-record cre ; A compiled regular expression ; string ; The Posix string form of the regexp or #F. ; max-paren ; Max paren in STRING needed for submatches. ; (regexp #f) ; Compiled form or #F. ; (regexp/nm #f) ; Same as REGEXP, but compiled with no-submatch. ; tvec ; Translation vector for the submatches ; ((disclose self) (list "cre" (cre:string self)))) (define-record-type cre :cre (really-make-cre string max-paren regexp regexp/nm tvec debug) cre? (string cre:string set-cre:string) (max-paren cre:max-paren set-cre:max-paren) (regexp cre:regexp set-cre:regexp) (regexp/nm cre:regexp/nm set-cre:regexp/nm) (tvec cre:tvec set-cre:tvec) (debug cre:debug set-cre:debug)) (define-record-discloser :cre (lambda (self) (list "cre" (cre:string self)))) (define (make-cre str max-paren tvec) (really-make-cre str max-paren #f #f tvec #f)) (define (new-cre str tvec) (make-cre str (max-live-posix-submatch tvec) tvec)) (define (max-live-posix-submatch tvec) (vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec)) ;;; Searching with compiled regexps ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; cre-search returns match info; cre-search? is just a predicate. (define (cre-search cre match-vec str start) (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. (if (not re-str) #f (begin (if (not (cre:regexp cre)) (set-cre:regexp cre (make-regexp re-str (regexp-option extended) (regexp-option submatches)))) (let ((ret (regexp-match (cre:regexp cre) str #t #f #f start))) (if (not ret) #f (make-regexp-match str (translate-submatches ret (cre:tvec cre) match-vec)))))))) (define (translate-submatches matches trans-vec match-vec) (let ((n-virtual-submatches (vector-length trans-vec))) (let loop ((virtual-index 0) (match-index 0) (matches matches)) (cond ((> virtual-index n-virtual-submatches) match-vec) ((if (zero? virtual-index) 0 (vector-ref trans-vec (- virtual-index 1))) => (lambda (actual-index) (if (= match-index actual-index) (begin (vector-set! match-vec virtual-index (car matches)) (loop (+ 1 virtual-index) (+ 1 match-index) (cdr matches))) (loop virtual-index (+ 1 match-index) (cdr matches))))) (else (loop (+ 1 virtual-index) match-index matches)))))) (define (cre-search? cre str start) (let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match. (if (not re-str) #f (begin (if (not (cre:regexp/nm cre)) (set-cre:regexp/nm cre (make-regexp re-str (regexp-option extended)))) (regexp-match (cre:regexp/nm cre) str #f #f #f)))))