Remove documentation for BOW, EOW, WORD, and WORD+, as they have no
POSIX counterparts, and their meaning is locale-dependent.
This commit is contained in:
parent
00bba17d56
commit
357afa99ae
|
@ -72,8 +72,6 @@ providing:
|
||||||
\item repetition (\ex{*}, \ex{+}, \ex{?}, \ex{\{$m$,$n$\}})
|
\item repetition (\ex{*}, \ex{+}, \ex{?}, \ex{\{$m$,$n$\}})
|
||||||
\item character classes (\eg, \ex{[aeiou]}) and wildcard (\ex{.})
|
\item character classes (\eg, \ex{[aeiou]}) and wildcard (\ex{.})
|
||||||
\item beginning/end of string anchors (\verb|^|, \verb|$|)
|
\item beginning/end of string anchors (\verb|^|, \verb|$|)
|
||||||
\item beginning/end of line anchors
|
|
||||||
\item beginning/end of word anchors
|
|
||||||
\item case-sensitivity control
|
\item case-sensitivity control
|
||||||
\item submatch-marking
|
\item submatch-marking
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
@ -132,19 +130,12 @@ the next section is a friendlier tutorial introduction.
|
||||||
\\
|
\\
|
||||||
\ex{bos eos} & Beginning/end of string \\
|
\ex{bos eos} & Beginning/end of string \\
|
||||||
\ex{bol eol} & Beginning/end of line \\
|
\ex{bol eol} & Beginning/end of line \\
|
||||||
\ex{bow eow} & Beginning/end of word \\
|
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\caption{SRE syntax summary (part 1)}
|
\caption{SRE syntax summary (part 1)}
|
||||||
\end{boxedfigure}
|
\end{boxedfigure}
|
||||||
|
|
||||||
\begin{boxedfigure}{tbhp}
|
\begin{boxedfigure}{tbhp}
|
||||||
\begin{tabular}{lp{3in}}
|
\begin{tabular}{lp{3in}}
|
||||||
\ex{(word \var{sre} {\ldots})} & (: bow \var{sre} {\ldots} eow) \\
|
|
||||||
\ex{(word+ \var{cset-sre} {\ldots})}
|
|
||||||
& \cd{(word (+ (& (| alphanumeric "_")} \\
|
|
||||||
& \cd{ (| \var{cset-sre} {\ldots}))))} \\
|
|
||||||
\ex{word} & \ex{(word+ any)} \\
|
|
||||||
\\
|
|
||||||
\ex{(posix-string \var{string})} & Escape for Posix string notation \\
|
\ex{(posix-string \var{string})} & Escape for Posix string notation \\
|
||||||
\\
|
\\
|
||||||
\ex{\var{char}} & Singleton char set \\
|
\ex{\var{char}} & Singleton char set \\
|
||||||
|
@ -212,7 +203,7 @@ The chars are taken in pairs to form inclusive ranges.
|
||||||
| (w/case <cset-sre>)
|
| (w/case <cset-sre>)
|
||||||
| (w/nocase <cset-sre>)
|
| (w/nocase <cset-sre>)
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
\caption{%The \cd{~}, \cd{-}, \cd{&}, and \cd{word+} operators may only be
|
\caption{%The \cd{~}, \cd{-}, and \cd{&} operators may only be
|
||||||
applied to SRE's that specify character sets.
|
applied to SRE's that specify character sets.
|
||||||
These are the ``type-checking'' rules for character-set SRE's.}
|
These are the ``type-checking'' rules for character-set SRE's.}
|
||||||
\end{boxedfigure}
|
\end{boxedfigure}
|
||||||
|
@ -705,10 +696,10 @@ to produce a certain number of submatches---if that is part of \var{exp}'s
|
||||||
``contract.''
|
``contract.''
|
||||||
|
|
||||||
|
|
||||||
\paragraph{String, line, and word units}
|
\paragraph{String and line units}
|
||||||
|
|
||||||
The regexps \ex{bos} and \ex{eos} match the empty string at the beginning and
|
The regexps \ex{bos} and \ex{eos} match the empty string at the
|
||||||
end of the string, respectively.
|
beginning and end of the string, respectively.
|
||||||
|
|
||||||
The regexps \ex{bol} and \ex{eol} match the empty string at the beginning and
|
The regexps \ex{bol} and \ex{eol} match the empty string at the beginning and
|
||||||
end of a line, respectively. A line begins at the beginning of the string, and
|
end of a line, respectively. A line begins at the beginning of the string, and
|
||||||
|
@ -717,32 +708,6 @@ just before every newline character. The char class \ex{nonl} matches any
|
||||||
character except newline, and is useful in conjunction with line-based pattern
|
character except newline, and is useful in conjunction with line-based pattern
|
||||||
matching.
|
matching.
|
||||||
|
|
||||||
The regexps \ex{bow} and \ex{eow} match the empty string at the beginning and
|
|
||||||
end of a word, respectively. A word is a contiguous sequence of characters
|
|
||||||
that are either alphanumeric or the underscore character.
|
|
||||||
|
|
||||||
The regexp \ex{(word \var{sre} \ldots)} surrounds the sequence
|
|
||||||
\ex{(: \var{sre} \ldots)}with bow/eow delimiters. It is equivalent to
|
|
||||||
\begin{code}
|
|
||||||
(: bow \var{sre} \ldots eow)\end{code}%
|
|
||||||
%
|
|
||||||
|
|
||||||
The regexp \ex{(word+ \var{cset-sre} \ldots)} matches a word whose body is
|
|
||||||
one or more word characters matched by the char-set sre \var{cset-sre}.
|
|
||||||
It is equivalent to
|
|
||||||
\begin{code}
|
|
||||||
(word (+ (& (| alphanumeric "_")
|
|
||||||
(| \var{cset-sre} \ldots))))\end{code}%
|
|
||||||
%
|
|
||||||
For example, a word not containing x, y, or z is
|
|
||||||
\begin{code}
|
|
||||||
(word+ (~ ("xyz")))\end{code}%
|
|
||||||
%
|
|
||||||
The regexp \ex{word} matches one word; it is equivalent to
|
|
||||||
\begin{code}
|
|
||||||
(word+ any)
|
|
||||||
\end{code}%
|
|
||||||
|
|
||||||
\note{\ex{bol} and \ex{eol} are not supported by scsh's current
|
\note{\ex{bol} and \ex{eol} are not supported by scsh's current
|
||||||
regexp search engine, which is Spencer's Posix matcher. This is the only
|
regexp search engine, which is Spencer's Posix matcher. This is the only
|
||||||
element of the notation that is not supported by the current scsh
|
element of the notation that is not supported by the current scsh
|
||||||
|
@ -1262,9 +1227,6 @@ Note:\begin{itemize}
|
||||||
\item The string parser doesn't handle the exotica of character class
|
\item The string parser doesn't handle the exotica of character class
|
||||||
names such as \verb|[[:alnum:]]|; the current implementation was written
|
names such as \verb|[[:alnum:]]|; the current implementation was written
|
||||||
in in three hours.
|
in in three hours.
|
||||||
|
|
||||||
\item The unparser produces Spencer-specific strings for bow/eow
|
|
||||||
elements; otherwise, it's Posix all the way.
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{desc}
|
\end{desc}
|
||||||
|
|
||||||
|
@ -1327,8 +1289,6 @@ contained in the regular expression.
|
||||||
\defvarx{re-eos}{regexp}
|
\defvarx{re-eos}{regexp}
|
||||||
\defvarx{re-bol}{regexp}
|
\defvarx{re-bol}{regexp}
|
||||||
\defvarx{re-eol}{regexp}
|
\defvarx{re-eol}{regexp}
|
||||||
\defvarx{re-bow}{regexp}
|
|
||||||
\defvarx{re-eow}{regexp}
|
|
||||||
\begin{desc}
|
\begin{desc}
|
||||||
These variables are bound to the primitive anchor regexps.
|
These variables are bound to the primitive anchor regexps.
|
||||||
\end{desc}
|
\end{desc}
|
||||||
|
@ -1337,8 +1297,6 @@ These variables are bound to the primitive anchor regexps.
|
||||||
\defunx{re-eos?}{\object}{\boolean}
|
\defunx{re-eos?}{\object}{\boolean}
|
||||||
\defunx{re-bol?}{\object}{\boolean}
|
\defunx{re-bol?}{\object}{\boolean}
|
||||||
\defunx{re-eol?}{\object}{\boolean}
|
\defunx{re-eol?}{\object}{\boolean}
|
||||||
\defunx{re-bow?}{\object}{\boolean}
|
|
||||||
\defunx{re-eow?}{\object}{\boolean}
|
|
||||||
\begin{desc}
|
\begin{desc}
|
||||||
These predicates recognise the associated primitive anchor regexp.
|
These predicates recognise the associated primitive anchor regexp.
|
||||||
\end{desc}
|
\end{desc}
|
||||||
|
@ -1378,15 +1336,11 @@ regexps built using other constructors may or may not produce a true value.
|
||||||
|
|
||||||
% These are non-primitive predefined regexps of general utility.
|
% These are non-primitive predefined regexps of general utility.
|
||||||
|
|
||||||
\defvar {re-nonl}{regexp}
|
\defvarx {re-nonl}{regexp}
|
||||||
\defvarx{re-word}{regexp}
|
|
||||||
\begin{desc}
|
\begin{desc}
|
||||||
The variable \ex{re-nonl} is bound to a regular expression
|
The variable \ex{re-nonl} is bound to a regular expression
|
||||||
that matches any non-newline character
|
that matches any non-newline character
|
||||||
(corresponding to the SRE \verb|(~ #\newline)|).
|
(corresponding to the SRE \verb|(~ #\newline)|).
|
||||||
|
|
||||||
Similarly, \ex{re-word} is bound to a regular expression
|
|
||||||
that matches any word (corresponding to the SRE \ex{word}).
|
|
||||||
\end{desc}
|
\end{desc}
|
||||||
|
|
||||||
\defun{regexp?}{\object}{\boolean}
|
\defun{regexp?}{\object}{\boolean}
|
||||||
|
|
Loading…
Reference in New Issue