Fixed some design and implementation bugs in regexp-substitute/global.

This commit is contained in:
shivers 1997-04-19 04:01:19 +00:00
parent aa5c661421
commit 5c5ae4dc99
3 changed files with 3411 additions and 3342 deletions

File diff suppressed because it is too large Load Diff

View File

@ -116,29 +116,58 @@ and returned instead.
\end{desc} \end{desc}
\defun{regexp-substitute/global}{port regexp string . items} \defun{regexp-substitute/global}{port regexp string . items}
{{\str} or \boolean} {{\str} or \undefined}
\begin{desc} \begin{desc}
This procedure is similar to \ex{regexp-substitute}, This procedure is similar to \ex{regexp-substitute},
but can be used to perform multiple match/substitute operations over but can be used to perform repeated match/substitute operations over
a string. a string.
It has the following differences with \ex{regexp-substitute}: It has the following differences with \ex{regexp-substitute}:
\begin{itemize} \begin{itemize}
\item It takes a regular expression and string to be matched as \item It takes a regular expression and string to be matched as
parameters, instead of a completed match structure. parameters, instead of a completed match structure.
\item If the regular expression doesn't match the string, this
procedure is the identity transform---it returns or outputs the
string.
\item If an item is \ex{'post}, the procedure recurses on the suffix string \item If an item is \ex{'post}, the procedure recurses on the suffix string
(the text from \var{string} following the match). (the text from \var{string} following the match).
\item If \var{port} is an output port: Including a \ex{'post} in the list of items is how one gets multiple
\begin{itemize} match/substitution operations.
\item There was at least one match: return true. \item If an item is a procedure, it is applied to the match structure for
\item No match: return false. a given match.
The procedure returns a string to be used in the result.
\end{itemize} \end{itemize}
\item \var{port} is \sharpf: Some examples:
\begin{itemize} {\small
\item There was at least one match: construct and return a string. \begin{widecode}
\item No match: return false. ;;; Replace occurrences of "Cotton" with "Jin".
\end{itemize} (regexp-substitute/global #f "Cotton" s
\end{itemize} 'pre "Jin" 'post)
;;; mm/dd/yy -> dd/mm/yy date conversion.
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
s ; Source string
'pre 2 "/" 1 "/" 3 'post)
;;; "9/29/61" -> "Sep 29, 1961" date conversion.
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
s ; Source string
'pre
;; Sleazy converter -- ignores "year 2000" issue, and blows up if
;; month is out of range.
(lambda (m)
(let ((mon (vector-ref '#("Jan" "Feb" "Mar" "Apr" "May" "Jun"
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
(- (string->number (match:substring m 1)) 1)))
(day (match:substring m 2))
(year (match:substring m 3)))
(string-append mon " " day ", 19" year)))
'post)
;;; Remove potentially offensive substrings from string S.
(regexp-substitute/global #f "Windows|tcl|Intel" s
'pre 'post)\end{widecode}}
\end{desc} \end{desc}

View File

@ -150,15 +150,15 @@
(define (regexp-substitute/global port str re . items) (define (regexp-substitute/global port re str . items)
(let ((range (lambda (start item) ; Return start & end of (let ((range (lambda (start sv ev item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR. (cond ((integer? item) ; ITEM's range in STR.
(values (vector-ref sv item) (values (vector-ref sv item)
(vector-ref ev item))) (vector-ref ev item)))
((eq? 'pre item) (values start (vector-ref sv 0))) ((eq? 'pre item) (values start (vector-ref sv 0)))
(else (error "Illegal substitution item." (else (error "Illegal substitution item."
item item
regexp-substitute))))) regexp-substitute/global)))))
(num-posts (reduce (lambda (count item) (num-posts (reduce (lambda (count item)
(+ count (if (eq? item 'post) 1 0))) (+ count (if (eq? item 'post) 1 0)))
0 items))) 0 items)))
@ -167,42 +167,51 @@
;; Output port case, with zero or one POST items. ;; Output port case, with zero or one POST items.
(let recur ((start 0)) (let recur ((start 0))
(let ((match (string-match re str start))) (let ((match (string-match re str start)))
(and match (if match
(let* ((sv (regexp-match:start match)) (let* ((sv (regexp-match:start match))
(ev (regexp-match:end match))) (ev (regexp-match:end match)))
(for-each (lambda (item) (for-each (lambda (item)
(cond ((string? item) (write-string item port)) (cond ((string? item) (write-string item port))
((eq? 'post item) ((procedure? item) (write-string (item match) port))
(let ((post-start (vector-ref ev 0))) ((eq? 'post item) (recur (vector-ref ev 0)))
(or (recur post-start) (else (receive (si ei)
(write-string item port post-start)))) (range start sv ev item)
(else (receive (si ei) (range start item)
(write-string str port si ei))))) (write-string str port si ei)))))
items) items))
#t))))
(let ((pieces (let recur ((start 0)) (write-string str port start)))) ; No match.
(let* ((pieces (let recur ((start 0))
(let ((match (string-match re str start)) (let ((match (string-match re str start))
(cached-post #f)) (cached-post #f))
(and match (if match
(let* ((sv (regexp-match:start match)) (let* ((sv (regexp-match:start match))
(ev (regexp-match:end match))) (ev (regexp-match:end match)))
(reduce (lambda (pieces item) (reduce (lambda (pieces item)
(cond ((string? item) (cons item pieces)) (cond ((string? item)
(cons item pieces))
((procedure? item)
(cons (item match) pieces))
((eq? 'post item) ((eq? 'post item)
(append (or cached-post (if (not cached-post)
(begin (set! cached-post (set! cached-post
(recur (vector-ref ev 0))) (recur (vector-ref ev 0))))
cached-post)) (append cached-post pieces))
pieces))
(else (receive (si ei) (range start item) (else (receive (si ei)
(cons (substring str si e1) (range start sv ev item)
(cons (substring str si ei)
pieces))))) pieces)))))
0 items))))))) '() items))
(and pieces
(let ((pieces (reverse pieces))) ;; No match. Return str[start,end].
(list (if (zero? start) str
(substring str start (string-length str))))))))
(pieces (reverse pieces)))
(if port (for-each (lambda (p) (write-string p port)) pieces) (if port (for-each (lambda (p) (write-string p port)) pieces)
(apply string-append pieces)))))))) (apply string-append pieces))))))