Fixed some design and implementation bugs in regexp-substitute/global.
This commit is contained in:
parent
aa5c661421
commit
5c5ae4dc99
6603
doc/scsh-manual.ps
6603
doc/scsh-manual.ps
File diff suppressed because it is too large
Load Diff
|
@ -116,30 +116,59 @@ and returned instead.
|
||||||
\end{desc}
|
\end{desc}
|
||||||
|
|
||||||
\defun{regexp-substitute/global}{port regexp string . items}
|
\defun{regexp-substitute/global}{port regexp string . items}
|
||||||
{{\str} or \boolean}
|
{{\str} or \undefined}
|
||||||
\begin{desc}
|
\begin{desc}
|
||||||
This procedure is similar to \ex{regexp-substitute},
|
This procedure is similar to \ex{regexp-substitute},
|
||||||
but can be used to perform multiple match/substitute operations over
|
but can be used to perform repeated match/substitute operations over
|
||||||
a string.
|
a string.
|
||||||
It has the following differences with \ex{regexp-substitute}:
|
It has the following differences with \ex{regexp-substitute}:
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item It takes a regular expression and string to be matched as
|
\item It takes a regular expression and string to be matched as
|
||||||
parameters, instead of a completed match structure.
|
parameters, instead of a completed match structure.
|
||||||
|
\item If the regular expression doesn't match the string, this
|
||||||
|
procedure is the identity transform---it returns or outputs the
|
||||||
|
string.
|
||||||
\item If an item is \ex{'post}, the procedure recurses on the suffix string
|
\item If an item is \ex{'post}, the procedure recurses on the suffix string
|
||||||
(the text from \var{string} following the match).
|
(the text from \var{string} following the match).
|
||||||
\item If \var{port} is an output port:
|
Including a \ex{'post} in the list of items is how one gets multiple
|
||||||
\begin{itemize}
|
match/substitution operations.
|
||||||
\item There was at least one match: return true.
|
\item If an item is a procedure, it is applied to the match structure for
|
||||||
\item No match: return false.
|
a given match.
|
||||||
\end{itemize}
|
The procedure returns a string to be used in the result.
|
||||||
|
|
||||||
\item \var{port} is \sharpf:
|
|
||||||
\begin{itemize}
|
|
||||||
\item There was at least one match: construct and return a string.
|
|
||||||
\item No match: return false.
|
|
||||||
\end{itemize}
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
|
Some examples:
|
||||||
|
{\small
|
||||||
|
\begin{widecode}
|
||||||
|
;;; Replace occurrences of "Cotton" with "Jin".
|
||||||
|
(regexp-substitute/global #f "Cotton" s
|
||||||
|
'pre "Jin" 'post)
|
||||||
|
|
||||||
|
;;; mm/dd/yy -> dd/mm/yy date conversion.
|
||||||
|
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
|
||||||
|
s ; Source string
|
||||||
|
'pre 2 "/" 1 "/" 3 'post)
|
||||||
|
|
||||||
|
;;; "9/29/61" -> "Sep 29, 1961" date conversion.
|
||||||
|
(regexp-substitute/global #f "([0-9]+)/([0-9]+)/([0-9]+)" ; mm/dd/yy
|
||||||
|
s ; Source string
|
||||||
|
|
||||||
|
'pre
|
||||||
|
;; Sleazy converter -- ignores "year 2000" issue, and blows up if
|
||||||
|
;; month is out of range.
|
||||||
|
(lambda (m)
|
||||||
|
(let ((mon (vector-ref '#("Jan" "Feb" "Mar" "Apr" "May" "Jun"
|
||||||
|
"Jul" "Aug" "Sep" "Oct" "Nov" "Dec")
|
||||||
|
(- (string->number (match:substring m 1)) 1)))
|
||||||
|
(day (match:substring m 2))
|
||||||
|
(year (match:substring m 3)))
|
||||||
|
(string-append mon " " day ", 19" year)))
|
||||||
|
'post)
|
||||||
|
|
||||||
|
;;; Remove potentially offensive substrings from string S.
|
||||||
|
(regexp-substitute/global #f "Windows|tcl|Intel" s
|
||||||
|
'pre 'post)\end{widecode}}
|
||||||
|
|
||||||
\end{desc}
|
\end{desc}
|
||||||
|
|
||||||
\subsection{Other string manipulation facilities}
|
\subsection{Other string manipulation facilities}
|
||||||
|
|
93
scsh/re.scm
93
scsh/re.scm
|
@ -150,59 +150,68 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(define (regexp-substitute/global port str re . items)
|
(define (regexp-substitute/global port re str . items)
|
||||||
(let ((range (lambda (start item) ; Return start & end of
|
(let ((range (lambda (start sv ev item) ; Return start & end of
|
||||||
(cond ((integer? item) ; ITEM's range in STR.
|
(cond ((integer? item) ; ITEM's range in STR.
|
||||||
(values (vector-ref sv item)
|
(values (vector-ref sv item)
|
||||||
(vector-ref ev item)))
|
(vector-ref ev item)))
|
||||||
((eq? 'pre item) (values start (vector-ref sv 0)))
|
((eq? 'pre item) (values start (vector-ref sv 0)))
|
||||||
(else (error "Illegal substitution item."
|
(else (error "Illegal substitution item."
|
||||||
item
|
item
|
||||||
regexp-substitute)))))
|
regexp-substitute/global)))))
|
||||||
(num-posts (reduce (lambda (count item)
|
(num-posts (reduce (lambda (count item)
|
||||||
(+ count (if (eq? item 'post) 1 0)))
|
(+ count (if (eq? item 'post) 1 0)))
|
||||||
0 items)))
|
0 items)))
|
||||||
(if (and port (< num-posts 2))
|
(if (and port (< num-posts 2))
|
||||||
|
|
||||||
;; Output port case, with zero or one POST items.
|
;; Output port case, with zero or one POST items.
|
||||||
(let recur ((start 0))
|
(let recur ((start 0))
|
||||||
(let ((match (string-match re str start)))
|
(let ((match (string-match re str start)))
|
||||||
(and match
|
(if match
|
||||||
(let* ((sv (regexp-match:start match))
|
(let* ((sv (regexp-match:start match))
|
||||||
(ev (regexp-match:end match)))
|
(ev (regexp-match:end match)))
|
||||||
(for-each (lambda (item)
|
(for-each (lambda (item)
|
||||||
(cond ((string? item) (write-string item port))
|
(cond ((string? item) (write-string item port))
|
||||||
((eq? 'post item)
|
((procedure? item) (write-string (item match) port))
|
||||||
(let ((post-start (vector-ref ev 0)))
|
((eq? 'post item) (recur (vector-ref ev 0)))
|
||||||
(or (recur post-start)
|
(else (receive (si ei)
|
||||||
(write-string item port post-start))))
|
(range start sv ev item)
|
||||||
(else (receive (si ei) (range start item)
|
(write-string str port si ei)))))
|
||||||
(write-string str port si ei)))))
|
items))
|
||||||
items)
|
|
||||||
#t))))
|
|
||||||
|
|
||||||
(let ((pieces (let recur ((start 0))
|
(write-string str port start)))) ; No match.
|
||||||
(let ((match (string-match re str start))
|
|
||||||
(cached-post #f))
|
(let* ((pieces (let recur ((start 0))
|
||||||
(and match
|
(let ((match (string-match re str start))
|
||||||
(let* ((sv (regexp-match:start match))
|
(cached-post #f))
|
||||||
(ev (regexp-match:end match)))
|
(if match
|
||||||
(reduce (lambda (pieces item)
|
(let* ((sv (regexp-match:start match))
|
||||||
(cond ((string? item) (cons item pieces))
|
(ev (regexp-match:end match)))
|
||||||
((eq? 'post item)
|
(reduce (lambda (pieces item)
|
||||||
(append (or cached-post
|
(cond ((string? item)
|
||||||
(begin (set! cached-post
|
(cons item pieces))
|
||||||
(recur (vector-ref ev 0)))
|
|
||||||
cached-post))
|
((procedure? item)
|
||||||
pieces))
|
(cons (item match) pieces))
|
||||||
(else (receive (si ei) (range start item)
|
|
||||||
(cons (substring str si e1)
|
((eq? 'post item)
|
||||||
pieces)))))
|
(if (not cached-post)
|
||||||
0 items)))))))
|
(set! cached-post
|
||||||
(and pieces
|
(recur (vector-ref ev 0))))
|
||||||
(let ((pieces (reverse pieces)))
|
(append cached-post pieces))
|
||||||
(if port (for-each (lambda (p) (write-string p port)) pieces)
|
|
||||||
(apply string-append pieces))))))))
|
(else (receive (si ei)
|
||||||
|
(range start sv ev item)
|
||||||
|
(cons (substring str si ei)
|
||||||
|
pieces)))))
|
||||||
|
'() items))
|
||||||
|
|
||||||
|
;; No match. Return str[start,end].
|
||||||
|
(list (if (zero? start) str
|
||||||
|
(substring str start (string-length str))))))))
|
||||||
|
(pieces (reverse pieces)))
|
||||||
|
(if port (for-each (lambda (p) (write-string p port)) pieces)
|
||||||
|
(apply string-append pieces))))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue