scsh-0.6/scsh/rx/re-low.scm

118 lines
3.9 KiB
Scheme
Raw Permalink Normal View History

1999-09-23 10:27:41 -04:00
;;; Regular expression matching for scsh
;;; Copyright (c) 1994 by Olin Shivers.
;;; Match data for regexp matches.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-record regexp-match
string
submatches)
1999-09-23 10:27:41 -04:00
(define (match:start match . maybe-index)
(match-start
(vector-ref (regexp-match:submatches match)
(:optional maybe-index 0))))
1999-09-23 10:27:41 -04:00
(define (match:end match . maybe-index)
2001-10-01 11:19:08 -04:00
(match-end
(vector-ref (regexp-match:submatches match)
(:optional maybe-index 0))))
1999-09-23 10:27:41 -04:00
(define (match:substring match . maybe-index)
(let* ((i (:optional maybe-index 0))
(submatch (vector-ref (regexp-match:submatches match) i)))
(and submatch (substring (regexp-match:string match)
(match-start submatch)
(match-end submatch)))))
1999-09-23 10:27:41 -04:00
;;; Compiling regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; There's no legal Posix string expressing the empty match (e.g., (|))
;;; that will never match anything. So when we have one of these, we set
;;; the STRING field to #f. The matchers will spot this case and handle it
;;; specially.
;;; We compile the string two ways, on demand -- one for cre-search, and
;;; one for cre-search?.
1999-10-08 09:16:35 -04:00
;(define-record cre ; A compiled regular expression
; string ; The Posix string form of the regexp or #F.
; max-paren ; Max paren in STRING needed for submatches.
; (regexp #f) ; Compiled form or #F.
; (regexp/nm #f) ; Same as REGEXP, but compiled with no-submatch.
1999-10-08 09:16:35 -04:00
; tvec ; Translation vector for the submatches
; ((disclose self) (list "cre" (cre:string self))))
(define-record-type cre :cre
(really-make-cre string max-paren regexp regexp/nm tvec debug)
1999-10-08 09:16:35 -04:00
cre?
(string cre:string set-cre:string)
(max-paren cre:max-paren set-cre:max-paren)
(regexp cre:regexp set-cre:regexp)
(regexp/nm cre:regexp/nm set-cre:regexp/nm)
1999-10-08 09:16:35 -04:00
(tvec cre:tvec set-cre:tvec)
(debug cre:debug set-cre:debug))
(define-record-discloser :cre
(lambda (self) (list "cre" (cre:string self))))
(define (make-cre str max-paren tvec)
(really-make-cre str max-paren #f #f tvec #f))
(define (new-cre str tvec)
(make-cre str (max-live-posix-submatch tvec) tvec))
1999-09-23 10:27:41 -04:00
(define (max-live-posix-submatch tvec)
(vfold (lambda (sm mlpsm) (if sm (max mlpsm sm) mlpsm)) 0 tvec))
;;; Searching with compiled regexps
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; cre-search returns match info; cre-search? is just a predicate.
(define (cre-search cre match-vec str start)
1999-09-23 10:27:41 -04:00
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
(if (not re-str)
#f
(begin
(if (not (cre:regexp cre))
(set-cre:regexp cre (make-regexp re-str
(regexp-option extended)
(regexp-option submatches))))
(let ((ret (regexp-match (cre:regexp cre) str #t #f #f start)))
(if (not ret)
#f
(make-regexp-match str
(translate-submatches ret
(cre:tvec cre)
match-vec))))))))
(define (translate-submatches matches trans-vec match-vec)
(let ((n-virtual-submatches (vector-length trans-vec)))
(let loop ((virtual-index 0)
(match-index 0)
(matches matches))
(cond
((> virtual-index n-virtual-submatches)
match-vec)
((if (zero? virtual-index)
0
(vector-ref trans-vec (- virtual-index 1)))
=> (lambda (actual-index)
(if (= match-index actual-index)
(begin
(vector-set! match-vec virtual-index (car matches))
(loop (+ 1 virtual-index) (+ 1 match-index) (cdr matches)))
(loop virtual-index (+ 1 match-index) (cdr matches)))))
(else
(loop (+ 1 virtual-index) match-index matches))))))
2001-10-05 04:27:26 -04:00
(define (cre-search? cre str start)
1999-09-23 10:27:41 -04:00
(let ((re-str (cre:string cre))) ;; RE-STR = #F => empty match.
(if (not re-str)
#f
(begin
(if (not (cre:regexp/nm cre))
(set-cre:regexp/nm cre (make-regexp re-str
(regexp-option extended))))
(regexp-match (cre:regexp/nm cre) str #f #f #f)))))