Upgraded regexp system.

This commit is contained in:
shivers 1997-02-19 23:23:48 +00:00
parent 25aa2b845a
commit 06eb60980d
3 changed files with 55 additions and 48 deletions

View File

@ -16,14 +16,12 @@
end) ; 10 elt vec end) ; 10 elt vec
(define (match:start match . maybe-index) (define (match:start match . maybe-index)
(let ((i (:optional maybe-index 0))) (vector-ref (regexp-match:start match)
(or (vector-ref (regexp-match:start match) i) (:optional maybe-index 0)))
(error match:start "No sub-match found." match i))))
(define (match:end match . maybe-index) (define (match:end match . maybe-index)
(let ((i (:optional maybe-index 0))) (vector-ref (regexp-match:end match)
(or (vector-ref (regexp-match:end match) i) (:optional maybe-index 0)))
(error match:start "No sub-match found." match i))))
(define (match:substring match . maybe-index) (define (match:substring match . maybe-index)
(let* ((i (:optional maybe-index 0)) (let* ((i (:optional maybe-index 0))
@ -112,40 +110,45 @@
;;; Substitutions ;;; Substitutions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-foreign %regexp-subst (re_subst (string-desc compiled-regexp) (define (regexp-substitute port match . items)
(string match) (let ((str (regexp-match:string match))
(string str) (sv (regexp-match:start match))
(integer start) (ev (regexp-match:end match)))
(vector-desc start-vec) (if port
(vector-desc end-vec)
(string-desc outbuf))
static-string ; Error msg or #f
integer)
(define-foreign %regexp-subst-len (re_subst_len (string-desc compiled-regexp) ;; Output port case.
(string match) (for-each (lambda (item)
(string str) (cond ((string? item) (write-string item port))
(integer start) ((integer? item) (write-string str port
(vector-desc start-vec) (vector-ref sv item)
(vector-desc end-vec)) (vector-ref ev item)))
static-string ; Error msg or #f (else (error "Illegal substitution item."
integer) item
regexp-substitute))))
items)
;;; What does this do? ;; Here's the string case. Make two passes -- one to
;; compute the length of the target string, one to fill it in.
(let* ((len (reduce (lambda (i item)
(+ i (cond ((string? item) (string-length item))
((integer? (- (vector-ref ev item)
(vector-ref sv item))))
(else (error "Illegal substitution item."
item
regexp-substitute)))))
0 items))
(ans (make-string len)))
(define (regexp-subst re match replacement) (reduce (lambda (index item)
(let ((cr (%regexp:bytes re)) (cond ((string? item)
(str (regexp-match:string match)) (copy-string! ans index item)
(start-vec (regexp-match:start match)) (+ index (string-length item)))
(end-vec (regexp-match:end match))) (else (let ((si (vector-ref sv item))
(receive (err out-len) (%regexp-subst-len cr str replacement 0 (ei (vector-ref ev item)))
start-vec end-vec) (copy-substring! ans index str si ei)
(if err (error err regexp-subst str replacement) ; More data here (+ index (- ei si))))))
(let ((out-buf (make-string out-len))) 0 items)
(receive (err out-len) (%regexp-subst cr str replacement 0 ans))))
start-vec end-vec out-buf)
(if err (error err regexp-subst str replacement)
(substring out-buf 0 out-len))))))))
;;; Miscellaneous ;;; Miscellaneous
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -172,3 +175,17 @@
(if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+)) (if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+))
(cons #\\ result) (cons #\\ result)
result)))))) result))))))
;;; Count the number of possible sub-matches in a regexp
;;; (i.e., the number of left parens).
(define (regexp-num-submatches s)
(let* ((len (string-length s))
(len-1 (- len 1)))
(let lp ((i 0) (nsm 0))
(if (= i len) nsm
(case (string-ref s i)
((#\\) (if (< i len-1) (lp (+ i 2) nsm) nsm))
((#\() (lp (+ i 1) (+ nsm 1)))
(else (lp (+ i 1) nsm)))))))

View File

@ -40,7 +40,7 @@ char *re_compile(const char *re, scheme_value cr)
regexp *r = (regexp *) &STRING_REF(cr, 0); regexp *r = (regexp *) &STRING_REF(cr, 0);
regexp_error = 0; regexp_error = 0;
r = regcomp_comp(re, r, len); regcomp_comp(re, r, len);
return regexp_error; return regexp_error;
} }

View File

@ -10,15 +10,5 @@ char *re_match(const char *re, const char *string, int start,
scheme_value start_vec, scheme_value end_vec, scheme_value start_vec, scheme_value end_vec,
int *hit); int *hit);
char *re_subst_len(scheme_value cr, const char *match,
const char *src, int start,
scheme_value start_vec, scheme_value end_vec,
int *len);
char *re_subst(scheme_value cr, const char *match,
const char *src, int start,
scheme_value start_vec, scheme_value end_vec,
scheme_value outbuf, int *len);
char *filter_stringvec(const char *re, char const **stringvec, char *filter_stringvec(const char *re, char const **stringvec,
int *nummatch); int *nummatch);