Fixed some design and implementation bugs in regexp-substitute/global.
This commit is contained in:
parent
aa5c661421
commit
5c5ae4dc99
6603
doc/scsh-manual.ps
6603
doc/scsh-manual.ps
File diff suppressed because it is too large
Load Diff
|
@ -116,30 +116,59 @@ and returned instead.
|
|||
\end{desc}
|
||||
|
||||
\defun{regexp-substitute/global}{port regexp string . items}
|
||||
{{\str} or \boolean}
|
||||
{{\str} or \undefined}
|
||||
\begin{desc}
|
||||
This procedure is similar to \ex{regexp-substitute},
|
||||
but can be used to perform multiple match/substitute operations over
|
||||
but can be used to perform repeated match/substitute operations over
|
||||
a string.
|
||||
It has the following differences with \ex{regexp-substitute}:
|
||||
\begin{itemize}
|
||||
\item It takes a regular expression and string to be matched as
|
||||
parameters, instead of a completed match structure.
|
||||
\item If the regular expression doesn't match the string, this
|
||||
procedure is the identity transform---it returns or outputs the
|
||||
string.
|
||||
\item If an item is \ex{'post}, the procedure recurses on the suffix string
|
||||
(the text from \var{string} following the match).
|
||||
\item If \var{port} is an output port:
|
||||
\begin{itemize}
|
||||
\item There was at least one match: return true.
|
||||
\item No match: return false.
|
||||
\end{itemize}
|
||||
|
||||
\item \var{port} is \sharpf:
|
||||
\begin{itemize}
|
||||
\item There was at least one match: construct and return a string.
|
||||
\item No match: return false.
|
||||
\end{itemize}
|
||||
(the text from \var{string} following the match).
|
||||
Including a \ex{'post} in the list of items is how one gets multiple
|
||||
match/substitution operations.
|
||||
\item If an item is a procedure, it is applied to the match structure for
|
||||
a given match.
|
||||
The procedure returns a string to be used in the result.
|
||||
\end{itemize}
|
||||
|
||||
Some examples:
|
||||
{\small
|
||||
\begin{widecode}
|
||||
;;; Replace occurrences of "Cotton" with "Jin".
|
||||
(regexp-substitute/global #f "Cotton" s
|
||||
'pre "Jin" 'post)
|
||||
|
||||
;;; mm/dd/yy -> dd/mm/yy date conversion.
|
||||
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
|
||||
s ; Source string
|
||||
'pre 2 "/" 1 "/" 3 'post)
|
||||
|
||||
;;; "9/29/61" -> "Sep 29, 1961" date conversion.
|
||||
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
|
||||
s ; Source string
|
||||
|
||||
'pre
|
||||
;; Sleazy converter -- ignores "year 2000" issue, and blows up if
|
||||
;; month is out of range.
|
||||
(lambda (m)
|
||||
(let ((mon (vector-ref '#("Jan" "Feb" "Mar" "Apr" "May" "Jun"
|
||||
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
|
||||
(- (string->number (match:substring m 1)) 1)))
|
||||
(day (match:substring m 2))
|
||||
(year (match:substring m 3)))
|
||||
(string-append mon " " day ", 19" year)))
|
||||
'post)
|
||||
|
||||
;;; Remove potentially offensive substrings from string S.
|
||||
(regexp-substitute/global #f "Windows|tcl|Intel" s
|
||||
'pre 'post)\end{widecode}}
|
||||
|
||||
\end{desc}
|
||||
|
||||
\subsection{Other string manipulation facilities}
|
||||
|
|
93
scsh/re.scm
93
scsh/re.scm
|
@ -150,59 +150,68 @@
|
|||
|
||||
|
||||
|
||||
(define (regexp-substitute/global port str re . items)
|
||||
(let ((range (lambda (start item) ; Return start & end of
|
||||
(define (regexp-substitute/global port re str . items)
|
||||
(let ((range (lambda (start sv ev item) ; Return start & end of
|
||||
(cond ((integer? item) ; ITEM's range in STR.
|
||||
(values (vector-ref sv item)
|
||||
(vector-ref ev item)))
|
||||
((eq? 'pre item) (values start (vector-ref sv 0)))
|
||||
(else (error "Illegal substitution item."
|
||||
item
|
||||
regexp-substitute)))))
|
||||
regexp-substitute/global)))))
|
||||
(num-posts (reduce (lambda (count item)
|
||||
(+ count (if (eq? item 'post) 1 0)))
|
||||
0 items)))
|
||||
(if (and port (< num-posts 2))
|
||||
(if (and port (< num-posts 2))
|
||||
|
||||
;; Output port case, with zero or one POST items.
|
||||
(let recur ((start 0))
|
||||
(let ((match (string-match re str start)))
|
||||
(and match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match)))
|
||||
(for-each (lambda (item)
|
||||
(cond ((string? item) (write-string item port))
|
||||
((eq? 'post item)
|
||||
(let ((post-start (vector-ref ev 0)))
|
||||
(or (recur post-start)
|
||||
(write-string item port post-start))))
|
||||
(else (receive (si ei) (range start item)
|
||||
(write-string str port si ei)))))
|
||||
items)
|
||||
#t))))
|
||||
;; Output port case, with zero or one POST items.
|
||||
(let recur ((start 0))
|
||||
(let ((match (string-match re str start)))
|
||||
(if match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match)))
|
||||
(for-each (lambda (item)
|
||||
(cond ((string? item) (write-string item port))
|
||||
((procedure? item) (write-string (item match) port))
|
||||
((eq? 'post item) (recur (vector-ref ev 0)))
|
||||
(else (receive (si ei)
|
||||
(range start sv ev item)
|
||||
(write-string str port si ei)))))
|
||||
items))
|
||||
|
||||
(let ((pieces (let recur ((start 0))
|
||||
(let ((match (string-match re str start))
|
||||
(cached-post #f))
|
||||
(and match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match)))
|
||||
(reduce (lambda (pieces item)
|
||||
(cond ((string? item) (cons item pieces))
|
||||
((eq? 'post item)
|
||||
(append (or cached-post
|
||||
(begin (set! cached-post
|
||||
(recur (vector-ref ev 0)))
|
||||
cached-post))
|
||||
pieces))
|
||||
(else (receive (si ei) (range start item)
|
||||
(cons (substring str si e1)
|
||||
pieces)))))
|
||||
0 items)))))))
|
||||
(and pieces
|
||||
(let ((pieces (reverse pieces)))
|
||||
(if port (for-each (lambda (p) (write-string p port)) pieces)
|
||||
(apply string-append pieces))))))))
|
||||
(write-string str port start)))) ; No match.
|
||||
|
||||
(let* ((pieces (let recur ((start 0))
|
||||
(let ((match (string-match re str start))
|
||||
(cached-post #f))
|
||||
(if match
|
||||
(let* ((sv (regexp-match:start match))
|
||||
(ev (regexp-match:end match)))
|
||||
(reduce (lambda (pieces item)
|
||||
(cond ((string? item)
|
||||
(cons item pieces))
|
||||
|
||||
((procedure? item)
|
||||
(cons (item match) pieces))
|
||||
|
||||
((eq? 'post item)
|
||||
(if (not cached-post)
|
||||
(set! cached-post
|
||||
(recur (vector-ref ev 0))))
|
||||
(append cached-post pieces))
|
||||
|
||||
(else (receive (si ei)
|
||||
(range start sv ev item)
|
||||
(cons (substring str si ei)
|
||||
pieces)))))
|
||||
'() items))
|
||||
|
||||
;; No match. Return str[start,end].
|
||||
(list (if (zero? start) str
|
||||
(substring str start (string-length str))))))))
|
||||
(pieces (reverse pieces)))
|
||||
(if port (for-each (lambda (p) (write-string p port)) pieces)
|
||||
(apply string-append pieces))))))
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue