Upgraded regexp system.

This commit is contained in:
shivers 1997-02-19 23:23:48 +00:00
parent 25aa2b845a
commit 06eb60980d
3 changed files with 55 additions and 48 deletions

View File

@ -16,14 +16,12 @@
end) ; 10 elt vec
(define (match:start match . maybe-index)
(let ((i (:optional maybe-index 0)))
(or (vector-ref (regexp-match:start match) i)
(error match:start "No sub-match found." match i))))
(vector-ref (regexp-match:start match)
(:optional maybe-index 0)))
(define (match:end match . maybe-index)
(let ((i (:optional maybe-index 0)))
(or (vector-ref (regexp-match:end match) i)
(error match:start "No sub-match found." match i))))
(vector-ref (regexp-match:end match)
(:optional maybe-index 0)))
(define (match:substring match . maybe-index)
(let* ((i (:optional maybe-index 0))
@ -112,40 +110,45 @@
;;; Substitutions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define-foreign %regexp-subst (re_subst (string-desc compiled-regexp)
(string match)
(string str)
(integer start)
(vector-desc start-vec)
(vector-desc end-vec)
(string-desc outbuf))
static-string ; Error msg or #f
integer)
(define (regexp-substitute port match . items)
(let ((str (regexp-match:string match))
(sv (regexp-match:start match))
(ev (regexp-match:end match)))
(if port
(define-foreign %regexp-subst-len (re_subst_len (string-desc compiled-regexp)
(string match)
(string str)
(integer start)
(vector-desc start-vec)
(vector-desc end-vec))
static-string ; Error msg or #f
integer)
;; Output port case.
(for-each (lambda (item)
(cond ((string? item) (write-string item port))
((integer? item) (write-string str port
(vector-ref sv item)
(vector-ref ev item)))
(else (error "Illegal substitution item."
item
regexp-substitute))))
items)
;;; What does this do?
;; Here's the string case. Make two passes -- one to
;; compute the length of the target string, one to fill it in.
(let* ((len (reduce (lambda (i item)
(+ i (cond ((string? item) (string-length item))
((integer? (- (vector-ref ev item)
(vector-ref sv item))))
(else (error "Illegal substitution item."
item
regexp-substitute)))))
0 items))
(ans (make-string len)))
(define (regexp-subst re match replacement)
(let ((cr (%regexp:bytes re))
(str (regexp-match:string match))
(start-vec (regexp-match:start match))
(end-vec (regexp-match:end match)))
(receive (err out-len) (%regexp-subst-len cr str replacement 0
start-vec end-vec)
(if err (error err regexp-subst str replacement) ; More data here
(let ((out-buf (make-string out-len)))
(receive (err out-len) (%regexp-subst cr str replacement 0
start-vec end-vec out-buf)
(if err (error err regexp-subst str replacement)
(substring out-buf 0 out-len))))))))
(reduce (lambda (index item)
(cond ((string? item)
(copy-string! ans index item)
(+ index (string-length item)))
(else (let ((si (vector-ref sv item))
(ei (vector-ref ev item)))
(copy-substring! ans index str si ei)
(+ index (- ei si))))))
0 items)
ans))))
;;; Miscellaneous
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -172,3 +175,17 @@
(if (memv c '(#\[ #\] #\. #\* #\? #\( #\) #\| #\\ #\$ #\^ #\+))
(cons #\\ result)
result))))))
;;; Count the number of possible sub-matches in a regexp
;;; (i.e., the number of left parens).
(define (regexp-num-submatches s)
(let* ((len (string-length s))
(len-1 (- len 1)))
(let lp ((i 0) (nsm 0))
(if (= i len) nsm
(case (string-ref s i)
((#\\) (if (< i len-1) (lp (+ i 2) nsm) nsm))
((#\() (lp (+ i 1) (+ nsm 1)))
(else (lp (+ i 1) nsm)))))))

View File

@ -40,7 +40,7 @@ char *re_compile(const char *re, scheme_value cr)
regexp *r = (regexp *) &STRING_REF(cr, 0);
regexp_error = 0;
r = regcomp_comp(re, r, len);
regcomp_comp(re, r, len);
return regexp_error;
}

View File

@ -10,15 +10,5 @@ char *re_match(const char *re, const char *string, int start,
scheme_value start_vec, scheme_value end_vec,
int *hit);
char *re_subst_len(scheme_value cr, const char *match,
const char *src, int start,
scheme_value start_vec, scheme_value end_vec,
int *len);
char *re_subst(scheme_value cr, const char *match,
const char *src, int start,
scheme_value start_vec, scheme_value end_vec,
scheme_value outbuf, int *len);
char *filter_stringvec(const char *re, char const **stringvec,
int *nummatch);