Fixed some design and implementation bugs in regexp-substitute/global.

This commit is contained in:
shivers 1997-04-19 04:01:19 +00:00
parent aa5c661421
commit 5c5ae4dc99
3 changed files with 3411 additions and 3342 deletions

File diff suppressed because it is too large Load Diff

View File

@ -116,30 +116,59 @@ and returned instead.
\end{desc}
\defun{regexp-substitute/global}{port regexp string . items}
{{\str} or \boolean}
{{\str} or \undefined}
\begin{desc}
This procedure is similar to \ex{regexp-substitute},
but can be used to perform multiple match/substitute operations over
but can be used to perform repeated match/substitute operations over
a string.
It has the following differences with \ex{regexp-substitute}:
\begin{itemize}
\item It takes a regular expression and string to be matched as
parameters, instead of a completed match structure.
\item If the regular expression doesn't match the string, this
procedure is the identity transform---it returns or outputs the
string.
\item If an item is \ex{'post}, the procedure recurses on the suffix string
(the text from \var{string} following the match).
\item If \var{port} is an output port:
\begin{itemize}
\item There was at least one match: return true.
\item No match: return false.
\end{itemize}
\item \var{port} is \sharpf:
\begin{itemize}
\item There was at least one match: construct and return a string.
\item No match: return false.
\end{itemize}
(the text from \var{string} following the match).
Including a \ex{'post} in the list of items is how one gets multiple
match/substitution operations.
\item If an item is a procedure, it is applied to the match structure for
a given match.
The procedure returns a string to be used in the result.
\end{itemize}
Some examples:
{\small
\begin{widecode}
;;; Replace occurrences of "Cotton" with "Jin".
(regexp-substitute/global #f "Cotton" s
'pre "Jin" 'post)
;;; mm/dd/yy -> dd/mm/yy date conversion.
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
s ; Source string
'pre 2 "/" 1 "/" 3 'post)
;;; "9/29/61" -> "Sep 29, 1961" date conversion.
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
s ; Source string
'pre
;; Sleazy converter -- ignores "year 2000" issue, and blows up if
;; month is out of range.
(lambda (m)
(let ((mon (vector-ref '#("Jan" "Feb" "Mar" "Apr" "May" "Jun"
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
(- (string->number (match:substring m 1)) 1)))
(day (match:substring m 2))
(year (match:substring m 3)))
(string-append mon " " day ", 19" year)))
'post)
;;; Remove potentially offensive substrings from string S.
(regexp-substitute/global #f "Windows|tcl|Intel" s
'pre 'post)\end{widecode}}
\end{desc}
\subsection{Other string manipulation facilities}

View File

@ -150,59 +150,68 @@
(define (regexp-substitute/global port str re . items)
(let ((range (lambda (start item) ; Return start & end of
(define (regexp-substitute/global port re str . items)
(let ((range (lambda (start sv ev item) ; Return start & end of
(cond ((integer? item) ; ITEM's range in STR.
(values (vector-ref sv item)
(vector-ref ev item)))
((eq? 'pre item) (values start (vector-ref sv 0)))
(else (error "Illegal substitution item."
item
regexp-substitute)))))
regexp-substitute/global)))))
(num-posts (reduce (lambda (count item)
(+ count (if (eq? item 'post) 1 0)))
0 items)))
(if (and port (< num-posts 2))
(if (and port (< num-posts 2))
;; Output port case, with zero or one POST items.
(let recur ((start 0))
(let ((match (string-match re str start)))
(and match
(let* ((sv (regexp-match:start match))
(ev (regexp-match:end match)))
(for-each (lambda (item)
(cond ((string? item) (write-string item port))
((eq? 'post item)
(let ((post-start (vector-ref ev 0)))
(or (recur post-start)
(write-string item port post-start))))
(else (receive (si ei) (range start item)
(write-string str port si ei)))))
items)
#t))))
;; Output port case, with zero or one POST items.
(let recur ((start 0))
(let ((match (string-match re str start)))
(if match
(let* ((sv (regexp-match:start match))
(ev (regexp-match:end match)))
(for-each (lambda (item)
(cond ((string? item) (write-string item port))
((procedure? item) (write-string (item match) port))
((eq? 'post item) (recur (vector-ref ev 0)))
(else (receive (si ei)
(range start sv ev item)
(write-string str port si ei)))))
items))
(let ((pieces (let recur ((start 0))
(let ((match (string-match re str start))
(cached-post #f))
(and match
(let* ((sv (regexp-match:start match))
(ev (regexp-match:end match)))
(reduce (lambda (pieces item)
(cond ((string? item) (cons item pieces))
((eq? 'post item)
(append (or cached-post
(begin (set! cached-post
(recur (vector-ref ev 0)))
cached-post))
pieces))
(else (receive (si ei) (range start item)
(cons (substring str si e1)
pieces)))))
0 items)))))))
(and pieces
(let ((pieces (reverse pieces)))
(if port (for-each (lambda (p) (write-string p port)) pieces)
(apply string-append pieces))))))))
(write-string str port start)))) ; No match.
(let* ((pieces (let recur ((start 0))
(let ((match (string-match re str start))
(cached-post #f))
(if match
(let* ((sv (regexp-match:start match))
(ev (regexp-match:end match)))
(reduce (lambda (pieces item)
(cond ((string? item)
(cons item pieces))
((procedure? item)
(cons (item match) pieces))
((eq? 'post item)
(if (not cached-post)
(set! cached-post
(recur (vector-ref ev 0))))
(append cached-post pieces))
(else (receive (si ei)
(range start sv ev item)
(cons (substring str si ei)
pieces)))))
'() items))
;; No match. Return str[start,end].
(list (if (zero? start) str
(substring str start (string-length str))))))))
(pieces (reverse pieces)))
(if port (for-each (lambda (p) (write-string p port)) pieces)
(apply string-append pieces))))))