Fixed some design and implementation bugs in regexp-substitute/global.

This commit is contained in:
shivers 1997-04-19 04:01:19 +00:00
parent aa5c661421
commit 5c5ae4dc99
3 changed files with 3411 additions and 3342 deletions

File diff suppressed because it is too large Load Diff

View File

@ -116,30 +116,59 @@ and returned instead.
\end{desc} \end{desc}
\defun{regexp-substitute/global}{port regexp string . items} \defun{regexp-substitute/global}{port regexp string . items}
{{\str} or \boolean} {{\str} or \undefined}
\begin{desc} \begin{desc}
This procedure is similar to \ex{regexp-substitute}, This procedure is similar to \ex{regexp-substitute},
but can be used to perform multiple match/substitute operations over but can be used to perform repeated match/substitute operations over
a string. a string.
It has the following differences with \ex{regexp-substitute}: It has the following differences with \ex{regexp-substitute}:
\begin{itemize} \begin{itemize}
\item It takes a regular expression and string to be matched as \item It takes a regular expression and string to be matched as
parameters, instead of a completed match structure. parameters, instead of a completed match structure.
\item If the regular expression doesn't match the string, this
procedure is the identity transform---it returns or outputs the
string.
\item If an item is \ex{'post}, the procedure recurses on the suffix string \item If an item is \ex{'post}, the procedure recurses on the suffix string
(the text from \var{string} following the match). (the text from \var{string} following the match).
\item If \var{port} is an output port: Including a \ex{'post} in the list of items is how one gets multiple
\begin{itemize} match/substitution operations.
\item There was at least one match: return true. \item If an item is a procedure, it is applied to the match structure for
\item No match: return false. a given match.
\end{itemize} The procedure returns a string to be used in the result.
\item \var{port} is \sharpf:
\begin{itemize}
\item There was at least one match: construct and return a string.
\item No match: return false.
\end{itemize}
\end{itemize} \end{itemize}
Some examples:
{\small
\begin{widecode}
;;; Replace occurrences of "Cotton" with "Jin".
(regexp-substitute/global #f "Cotton" s
'pre "Jin" 'post)
;;; mm/dd/yy -> dd/mm/yy date conversion.
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
s ; Source string
'pre 2 "/" 1 "/" 3 'post)
;;; "9/29/61" -> "Sep 29, 1961" date conversion.
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
s ; Source string
'pre
;; Sleazy converter -- ignores "year 2000" issue, and blows up if
;; month is out of range.
(lambda (m)
(let ((mon (vector-ref '#("Jan" "Feb" "Mar" "Apr" "May" "Jun"
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
(- (string->number (match:substring m 1)) 1)))
(day (match:substring m 2))
(year (match:substring m 3)))
(string-append mon " " day ", 19" year)))
'post)
;;; Remove potentially offensive substrings from string S.
(regexp-substitute/global #f "Windows|tcl|Intel" s
'pre 'post)\end{widecode}}
\end{desc} \end{desc}
\subsection{Other string manipulation facilities} \subsection{Other string manipulation facilities}

View File

@ -150,59 +150,68 @@
(define (regexp-substitute/global port str re . items) (define (regexp-substitute/global port re str . items)
(let ((range (lambda (start item) ; Return start & end of (let ((range (lambda (start sv ev item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR. (cond ((integer? item) ; ITEM's range in STR.
(values (vector-ref sv item) (values (vector-ref sv item)
(vector-ref ev item))) (vector-ref ev item)))
((eq? 'pre item) (values start (vector-ref sv 0))) ((eq? 'pre item) (values start (vector-ref sv 0)))
(else (error "Illegal substitution item." (else (error "Illegal substitution item."
item item
regexp-substitute))))) regexp-substitute/global)))))
(num-posts (reduce (lambda (count item) (num-posts (reduce (lambda (count item)
(+ count (if (eq? item 'post) 1 0))) (+ count (if (eq? item 'post) 1 0)))
0 items))) 0 items)))
(if (and port (< num-posts 2)) (if (and port (< num-posts 2))
;; Output port case, with zero or one POST items. ;; Output port case, with zero or one POST items.
(let recur ((start 0)) (let recur ((start 0))
(let ((match (string-match re str start))) (let ((match (string-match re str start)))
(and match (if match
(let* ((sv (regexp-match:start match)) (let* ((sv (regexp-match:start match))
(ev (regexp-match:end match))) (ev (regexp-match:end match)))
(for-each (lambda (item) (for-each (lambda (item)
(cond ((string? item) (write-string item port)) (cond ((string? item) (write-string item port))
((eq? 'post item) ((procedure? item) (write-string (item match) port))
(let ((post-start (vector-ref ev 0))) ((eq? 'post item) (recur (vector-ref ev 0)))
(or (recur post-start) (else (receive (si ei)
(write-string item port post-start)))) (range start sv ev item)
(else (receive (si ei) (range start item) (write-string str port si ei)))))
(write-string str port si ei))))) items))
items)
#t))))
(let ((pieces (let recur ((start 0)) (write-string str port start)))) ; No match.
(let ((match (string-match re str start))
(cached-post #f)) (let* ((pieces (let recur ((start 0))
(and match (let ((match (string-match re str start))
(let* ((sv (regexp-match:start match)) (cached-post #f))
(ev (regexp-match:end match))) (if match
(reduce (lambda (pieces item) (let* ((sv (regexp-match:start match))
(cond ((string? item) (cons item pieces)) (ev (regexp-match:end match)))
((eq? 'post item) (reduce (lambda (pieces item)
(append (or cached-post (cond ((string? item)
(begin (set! cached-post (cons item pieces))
(recur (vector-ref ev 0)))
cached-post)) ((procedure? item)
pieces)) (cons (item match) pieces))
(else (receive (si ei) (range start item)
(cons (substring str si e1) ((eq? 'post item)
pieces))))) (if (not cached-post)
0 items))))))) (set! cached-post
(and pieces (recur (vector-ref ev 0))))
(let ((pieces (reverse pieces))) (append cached-post pieces))
(if port (for-each (lambda (p) (write-string p port)) pieces)
(apply string-append pieces)))))))) (else (receive (si ei)
(range start sv ev item)
(cons (substring str si ei)
pieces)))))
'() items))
;; No match. Return str[start,end].
(list (if (zero? start) str
(substring str start (string-length str))))))))
(pieces (reverse pieces)))
(if port (for-each (lambda (p) (write-string p port)) pieces)
(apply string-append pieces))))))