diff --git a/scsh/re.scm b/scsh/re.scm index ba202f1..3e1c4de 100644 --- a/scsh/re.scm +++ b/scsh/re.scm @@ -16,14 +16,12 @@ end) ; 10 elt vec (define (match:start match . maybe-index) - (let ((i (:optional maybe-index 0))) - (or (vector-ref (regexp-match:start match) i) - (error match:start "No sub-match found." match i)))) + (vector-ref (regexp-match:start match) + (:optional maybe-index 0))) (define (match:end match . maybe-index) - (let ((i (:optional maybe-index 0))) - (or (vector-ref (regexp-match:end match) i) - (error match:start "No sub-match found." match i)))) + (vector-ref (regexp-match:end match) + (:optional maybe-index 0))) (define (match:substring match . maybe-index) (let* ((i (:optional maybe-index 0)) @@ -112,40 +110,45 @@ ;;; Substitutions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define-foreign %regexp-subst (re_subst (string-desc compiled-regexp) - (string match) - (string str) - (integer start) - (vector-desc start-vec) - (vector-desc end-vec) - (string-desc outbuf)) - static-string ; Error msg or #f - integer) +(define (regexp-substitute port match . items) + (let ((str (regexp-match:string match)) + (sv (regexp-match:start match)) + (ev (regexp-match:end match))) + (if port -(define-foreign %regexp-subst-len (re_subst_len (string-desc compiled-regexp) - (string match) - (string str) - (integer start) - (vector-desc start-vec) - (vector-desc end-vec)) - static-string ; Error msg or #f - integer) + ;; Output port case. + (for-each (lambda (item) + (cond ((string? item) (write-string item port)) + ((integer? item) (write-string str port + (vector-ref sv item) + (vector-ref ev item))) + (else (error "Illegal substitution item." + item + regexp-substitute)))) + items) -;;; What does this do? + ;; Here's the string case. Make two passes -- one to + ;; compute the length of the target string, one to fill it in. + (let* ((len (reduce (lambda (i item) + (+ i (cond ((string? item) (string-length item)) + ((integer? (- (vector-ref ev item) + (vector-ref sv item)))) + (else (error "Illegal substitution item." + item + regexp-substitute))))) + 0 items)) + (ans (make-string len))) -(define (regexp-subst re match replacement) - (let ((cr (%regexp:bytes re)) - (str (regexp-match:string match)) - (start-vec (regexp-match:start match)) - (end-vec (regexp-match:end match))) - (receive (err out-len) (%regexp-subst-len cr str replacement 0 - start-vec end-vec) - (if err (error err regexp-subst str replacement) ; More data here - (let ((out-buf (make-string out-len))) - (receive (err out-len) (%regexp-subst cr str replacement 0 - start-vec end-vec out-buf) - (if err (error err regexp-subst str replacement) - (substring out-buf 0 out-len)))))))) + (reduce (lambda (index item) + (cond ((string? item) + (copy-string! ans index item) + (+ index (string-length item))) + (else (let ((si (vector-ref sv item)) + (ei (vector-ref ev item))) + (copy-substring! ans index str si ei) + (+ index (- ei si)))))) + 0 items) + ans)))) ;;; Miscellaneous ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -172,3 +175,17 @@ (if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+)) (cons #\\ result) result)))))) + + +;;; Count the number of possible sub-matches in a regexp +;;; (i.e., the number of left parens). + +(define (regexp-num-submatches s) + (let* ((len (string-length s)) + (len-1 (- len 1))) + (let lp ((i 0) (nsm 0)) + (if (= i len) nsm + (case (string-ref s i) + ((#\\) (if (< i len-1) (lp (+ i 2) nsm) nsm)) + ((#\() (lp (+ i 1) (+ nsm 1))) + (else (lp (+ i 1) nsm))))))) diff --git a/scsh/re1.c b/scsh/re1.c index 3a4cf49..0ac7ce0 100644 --- a/scsh/re1.c +++ b/scsh/re1.c @@ -40,7 +40,7 @@ char *re_compile(const char *re, scheme_value cr) regexp *r = (regexp *) &STRING_REF(cr, 0); regexp_error = 0; - r = regcomp_comp(re, r, len); + regcomp_comp(re, r, len); return regexp_error; } diff --git a/scsh/re1.h b/scsh/re1.h index 3938f7e..983cc2e 100644 --- a/scsh/re1.h +++ b/scsh/re1.h @@ -10,15 +10,5 @@ char *re_match(const char *re, const char *string, int start, scheme_value start_vec, scheme_value end_vec, int *hit); -char *re_subst_len(scheme_value cr, const char *match, - const char *src, int start, - scheme_value start_vec, scheme_value end_vec, - int *len); - -char *re_subst(scheme_value cr, const char *match, - const char *src, int start, - scheme_value start_vec, scheme_value end_vec, - scheme_value outbuf, int *len); - char *filter_stringvec(const char *re, char const **stringvec, int *nummatch);