Remove documentation for BOW, EOW, WORD, and WORD+, as they have no

POSIX counterparts, and their meaning is locale-dependent.
This commit is contained in:
sperber 2002-02-16 16:52:27 +00:00
parent 00bba17d56
commit 357afa99ae
1 changed files with 81 additions and 127 deletions

View File

@ -72,8 +72,6 @@ providing:
\item repetition (\ex{*}, \ex{+}, \ex{?}, \ex{\{$m$,$n$\}})
\item character classes (\eg, \ex{[aeiou]}) and wildcard (\ex{.})
\item beginning/end of string anchors (\verb|^|, \verb|$|)
\item beginning/end of line anchors
\item beginning/end of word anchors
\item case-sensitivity control
\item submatch-marking
\end{itemize}
@ -132,19 +130,12 @@ the next section is a friendlier tutorial introduction.
\\
\ex{bos eos} & Beginning/end of string \\
\ex{bol eol} & Beginning/end of line \\
\ex{bow eow} & Beginning/end of word \\
\end{tabular}
\caption{SRE syntax summary (part 1)}
\end{boxedfigure}
\begin{boxedfigure}{tbhp}
\begin{tabular}{lp{3in}}
\ex{(word \var{sre} {\ldots})} & (: bow \var{sre} {\ldots} eow) \\
\ex{(word+ \var{cset-sre} {\ldots})}
& \cd{(word (+ (& (| alphanumeric "_")} \\
& \cd{ (| \var{cset-sre} {\ldots}))))} \\
\ex{word} & \ex{(word+ any)} \\
\\
\ex{(posix-string \var{string})} & Escape for Posix string notation \\
\\
\ex{\var{char}} & Singleton char set \\
@ -212,7 +203,7 @@ The chars are taken in pairs to form inclusive ranges.
| (w/case <cset-sre>)
| (w/nocase <cset-sre>)
\end{verbatim}
\caption{%The \cd{~}, \cd{-}, \cd{&}, and \cd{word+} operators may only be
\caption{%The \cd{~}, \cd{-}, and \cd{&} operators may only be
applied to SRE's that specify character sets.
These are the ``type-checking'' rules for character-set SRE's.}
\end{boxedfigure}
@ -705,10 +696,10 @@ to produce a certain number of submatches---if that is part of \var{exp}'s
``contract.''
\paragraph{String, line, and word units}
\paragraph{String and line units}
The regexps \ex{bos} and \ex{eos} match the empty string at the beginning and
end of the string, respectively.
The regexps \ex{bos} and \ex{eos} match the empty string at the
beginning and end of the string, respectively.
The regexps \ex{bol} and \ex{eol} match the empty string at the beginning and
end of a line, respectively. A line begins at the beginning of the string, and
@ -717,32 +708,6 @@ just before every newline character. The char class \ex{nonl} matches any
character except newline, and is useful in conjunction with line-based pattern
matching.
The regexps \ex{bow} and \ex{eow} match the empty string at the beginning and
end of a word, respectively. A word is a contiguous sequence of characters
that are either alphanumeric or the underscore character.
The regexp \ex{(word \var{sre} \ldots)} surrounds the sequence
\ex{(: \var{sre} \ldots)}with bow/eow delimiters. It is equivalent to
\begin{code}
(: bow \var{sre} \ldots eow)\end{code}%
%
The regexp \ex{(word+ \var{cset-sre} \ldots)} matches a word whose body is
one or more word characters matched by the char-set sre \var{cset-sre}.
It is equivalent to
\begin{code}
(word (+ (& (| alphanumeric "_")
(| \var{cset-sre} \ldots))))\end{code}%
%
For example, a word not containing x, y, or z is
\begin{code}
(word+ (~ ("xyz")))\end{code}%
%
The regexp \ex{word} matches one word; it is equivalent to
\begin{code}
(word+ any)
\end{code}%
\note{\ex{bol} and \ex{eol} are not supported by scsh's current
regexp search engine, which is Spencer's Posix matcher. This is the only
element of the notation that is not supported by the current scsh
@ -1262,9 +1227,6 @@ Note:\begin{itemize}
\item The string parser doesn't handle the exotica of character class
names such as \verb|[[:alnum:]]|; the current implementation was written
in in three hours.
\item The unparser produces Spencer-specific strings for bow/eow
elements; otherwise, it's Posix all the way.
\end{itemize}
\end{desc}
@ -1327,8 +1289,6 @@ contained in the regular expression.
\defvarx{re-eos}{regexp}
\defvarx{re-bol}{regexp}
\defvarx{re-eol}{regexp}
\defvarx{re-bow}{regexp}
\defvarx{re-eow}{regexp}
\begin{desc}
These variables are bound to the primitive anchor regexps.
\end{desc}
@ -1337,8 +1297,6 @@ These variables are bound to the primitive anchor regexps.
\defunx{re-eos?}{\object}{\boolean}
\defunx{re-bol?}{\object}{\boolean}
\defunx{re-eol?}{\object}{\boolean}
\defunx{re-bow?}{\object}{\boolean}
\defunx{re-eow?}{\object}{\boolean}
\begin{desc}
These predicates recognise the associated primitive anchor regexp.
\end{desc}
@ -1378,15 +1336,11 @@ regexps built using other constructors may or may not produce a true value.
% These are non-primitive predefined regexps of general utility.
\defvar {re-nonl}{regexp}
\defvarx{re-word}{regexp}
\defvarx {re-nonl}{regexp}
\begin{desc}
The variable \ex{re-nonl} is bound to a regular expression
that matches any non-newline character
(corresponding to the SRE \verb|(~ #\newline)|).
Similarly, \ex{re-word} is bound to a regular expression
that matches any word (corresponding to the SRE \ex{word}).
\end{desc}
\defun{regexp?}{\object}{\boolean}