diff --git a/doc/scsh-manual/array.sty b/doc/scsh-manual/array.sty new file mode 100644 index 0000000..88b9b7d --- /dev/null +++ b/doc/scsh-manual/array.sty @@ -0,0 +1,252 @@ +%% +%% This is file `/usr2/distrib/latex209/nfss/array.sty' generated +%% on <1991/11/22> with the docstrip utility (v1.1k). +%% +%% The original source files were: +%% +%% /usr2/users/latex3/source/array/array.doc + +%% +%% Copyright (C) 1989,1990,1991 by Frank Mittelbach, Rainer Schoepf. +%% All rights reserved. +%% +%% This file is part of the NFSS (New Font Selection Scheme) package. +%% +%% IMPORTANT NOTICE: +%% +%% You are not allowed to change this file. You may however copy this file +%% to a file with a different name and then change the copy if you obey +%% the restrictions on file changes described in readme.mz. +%% +%% You are allowed to distribute this file under the condition that it is +%% distributed together with all files mentioned in readme.mz3. If you +%% receive only some of these files from someone, complain! +%% +%% You are NOT ALLOWED to distribute this file alone. You are NOT ALLOWED +%% to take money for the distribution or use of either this file or a +%% changed version, except for a nominal charge for copying etc. +%% +%% For error reports in case of UNCHANGED versions see readme files. +%% +%% Please do not request updates from us directly. Distribution is done +%% through Mail-Servers and TeX organizations. +%% + +\def\fileversion{v2.0e} +\def\filedate{91/02/07} +\def\docdate {90/08/20} + +%% \CheckSum{681} +%% \CharacterTable +%% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z +%% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z +%% Digits \0\1\2\3\4\5\6\7\8\9 +%% Exclamation \! Double quote \" Hash (number) \# +%% Dollar \$ Percent \% Ampersand \& +%% Acute accent \' Left paren \( Right paren \) +%% Asterisk \* Plus \+ Comma \, +%% Minus \- Point \. Solidus \/ +%% Colon \: Semicolon \; Less than \< +%% Equals \= Greater than \> Question mark \? +%% Commercial at \@ Left bracket \[ Backslash \\ +%% Right bracket \] Circumflex \^ Underscore \_ +%% Grave accent \` Left brace \{ Vertical bar \| +%% Right brace \} Tilde \~} +%% +\@ifundefined{d@llar}{}{\endinput} +\typeout{Style-Option: `array' \fileversion + \space\space <\filedate> (F.M.)} +\typeout{English documentation dated \space <\docdate> (F.M.)} +\def\@addtopreamble#1{\xdef\@preamble{\@preamble #1}} +\def\@testpach#1{\@chclass + \ifnum \@lastchclass=6 \@ne \@chnum \@ne \else + \ifnum \@lastchclass=7 5 \else + \ifnum \@lastchclass=8 \tw@ \else + \ifnum \@lastchclass=9 \thr@@ + \else \z@ + \ifnum \@lastchclass = 10 \else + \@chnum + \if #1c\z@ \else + \if #1l\@ne \else + \if #1r\tw@ \else + \z@ \@chclass + \if#1|\@ne \else + \if #1!6 \else + \if #1@7 \else + \if #1<8 \else + \if #1>9 \else + 10 + \@chnum + \if #1m\thr@@\else + \if #1p4 \else + \if #1b5 \else + \z@ \@chclass \z@ \@preamerr \z@ \fi \fi \fi \fi + \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi} +\def\@xexpast#1*#2#3#4\@@{% + \@tempcnta #2 + \toks@={#1}\@temptokena={#3}% + \let\the@toksz\relax \let\the@toks\relax + \def\@tempa{\the@toksz}% + \ifnum\@tempcnta >0 \@whilenum\@tempcnta >0\do + {\edef\@tempa{\@tempa\the@toks}\advance \@tempcnta \m@ne}% + \let \@tempb \@xexpast \else + \let \@tempb \@xexnoop \fi + \def\the@toksz{\the\toks@}\def\the@toks{\the\@temptokena}% + \edef\@tempa{\@tempa}% + \expandafter \@tempb \@tempa #4\@@} +\def\prepnext@tok{\advance \count@ \@ne + \toks\count@={}} +\def\save@decl{\toks\count@ \expandafter{\@nextchar}} +\def\insert@column{% + \the@toks \the \@tempcnta + {\ignorespaces \@sharp \unskip}% + \the@toks \the \count@ \relax} +\newdimen\col@sep +\def\@acol{\@addtopreamble{\hskip\col@sep}} +\def\@mkpream#1{\gdef\@preamble{}\@lastchclass 4 \@firstamptrue + \let\@sharp\relax \let\@startpbox\relax \let\@endpbox\relax + \@xexpast #1*0x\@@ + \count@\m@ne + \let\the@toks\relax + \prepnext@tok + \expandafter \@tfor \expandafter \@nextchar + \expandafter :\expandafter =\@tempa \do + {\@testpach\@nextchar + \ifcase \@chclass \@classz \or \@classi \or \@classii + \or \save@decl \or \or \@classv \or \@classvi + \or \@classvii \or \@classviii \or \@classix + \or \@classx \fi + \@lastchclass\@chclass}% + \ifcase\@lastchclass + \@acol \or + \or + \@acol \or + \@preamerr \thr@@ \or + \@preamerr \tw@ \@addtopreamble\@sharp \or + \or + \else \@preamerr \@ne \fi + \def\the@toks{\the\toks}} +\def\@classx{% + \ifcase \@lastchclass + \@acolampacol \or + \@addamp \@acol \or + \@acolampacol \or + \or + \@acol \@firstampfalse \or + \@addamp + \fi} +\def\@classz{\@classx + \@tempcnta \count@ + \prepnext@tok + \@addtopreamble{\ifcase \@chnum + \hfil + \d@llar + \insert@column + \d@llar \hfil \or + \d@llar \insert@column \d@llar \hfil \or + \hfil\kern\z@ \d@llar \insert@column \d@llar \or + $\vcenter + \@startpbox{\@nextchar}\insert@column \@endpbox $\or + \vtop \@startpbox{\@nextchar}\insert@column \@endpbox \or + \vbox \@startpbox{\@nextchar}\insert@column \@endpbox + \fi}\prepnext@tok} +\def\@classix{\ifnum \@lastchclass = \thr@@ + \@preamerr \thr@@ \fi + \@classx} +\def\@classviii{\ifnum \@lastchclass >\z@ + \@preamerr 4\@chclass 6 \@classvi \fi} +\def\@arrayrule{\@addtopreamble \vline} +\def\@classvii{\ifnum \@lastchclass = \thr@@ + \@preamerr \thr@@ \fi} +\def\@classvi{\ifcase \@lastchclass + \@acol \or + \@addtopreamble{\hskip \doublerulesep}\or + \@acol \or + \@classvii + \fi} +\def\@classii{\advance \count@ \m@ne + \save@decl\prepnext@tok} +\def\@classv{\save@decl + \@addtopreamble{\d@llar\the@toks\the\count@\relax\d@llar}% + \prepnext@tok} +\def\@classi{\@classvi + \ifcase \@chnum \@arrayrule \or + \@classv \fi} +\def\@startpbox#1{\bgroup + \hsize #1 \@arrayparboxrestore + \vrule \@height \ht\@arstrutbox \@width \z@} +\def\@endpbox{\vrule \@width \z@ \@depth \dp \@arstrutbox \egroup} +\def\@array[#1]#2{% + \@tempdima \ht \strutbox + \advance \@tempdima by\extrarowheight + \setbox \@arstrutbox \hbox{\vrule + \@height \arraystretch \@tempdima + \@depth \arraystretch \dp \strutbox + \@width \z@}% + \begingroup + \@mkpream{#2}% + \xdef\@preamble{\ialign \@halignto + \bgroup \@arstrut \@preamble + \tabskip \z@ \cr}% + \endgroup + \if #1t\vtop \else \if#1b\vbox \else \vcenter \fi \fi + \bgroup + \let \@sharp ##\let \protect \relax + \lineskip \z@ + \baselineskip \z@ + \m@th + \let\\ \@arraycr \let\par\@empty \@preamble} +\newdimen \extrarowheight +\extrarowheight=0pt +\def\@arstrut{\unhcopy\@arstrutbox} +\def\@arraycr{{\ifnum 0=`}\fi + \@ifstar \@xarraycr \@xarraycr} +\def\@xarraycr{\@ifnextchar [% + \@argarraycr {\ifnum 0=`{\fi}\cr}} +\def\@argarraycr[#1]{\ifnum0=`{\fi}\ifdim #1>\z@ + \@xargarraycr{#1}\else \@yargarraycr{#1}\fi} +\def\@xargarraycr#1{\unskip + \@tempdima #1\advance\@tempdima \dp\@arstrutbox + \vrule \@depth\@tempdima \@width\z@ \cr} +\def\@yargarraycr#1{\cr\noalign{\vskip #1}} +\def\multicolumn#1#2#3{% + \multispan{#1}\begingroup + \def\@addamp{\if@firstamp \@firstampfalse \else + \@preamerr 5\fi}% + \@mkpream{#2}\@addtopreamble\@empty + \endgroup + \def\@sharp{#3}% + \@arstrut \@preamble \ignorespaces} +\def\array{\col@sep\arraycolsep + \def\d@llar{$}\gdef\@halignto{}% + \@tabarray} +\def\@tabarray{\@ifnextchar[{\@array}{\@array[c]}} +\def\tabular{\gdef\@halignto{}\@tabular} +\expandafter\def\csname tabular*\endcsname#1{% + \gdef\@halignto{to#1}\@tabular} +\def\@tabular{% + \leavevmode + \hbox \bgroup $\col@sep\tabcolsep \let\d@llar\@empty + \@tabarray} +\def\endarray{\crcr \egroup \egroup \gdef\@preamble{}} +\def\endtabular{\endarray $\egroup} +\expandafter\let\csname endtabular*\endcsname=\endtabular +\let\@ampacol=\relax \let\@expast=\relax +\let\@arrayclassiv=\relax \let\@arrayclassz=\relax +\let\@tabclassiv=\relax \let\@tabclassz=\relax +\let\@arrayacol=\relax \let\@tabacol=\relax +\let\@tabularcr=\relax \let\@@endpbox=\relax +\let\@argtabularcr=\relax \let\@xtabularcr=\relax +\def\@preamerr#1{\def\@tempd{{..} at wrong position: }% + \@latexerr{% + \ifcase #1 Illegal pream-token (\@nextchar): `c' used\or %0 + Missing arg: token ignored\or %1 + Empty preamble: `l' used\or %2 + >\@tempd token ignored\or %3 + <\@tempd changed to !{..}\or %4 + Only one colum-spec. allowed.\fi}\@ehc} %5 +\def\@tfor#1:=#2\do#3{\def\@fortmp{#2}\ifx\@fortmp\@empty + \else\@tforloop#2\@nil\@nil\@@#1{#3}\fi} +\endinput +%% +%% End of file `/usr2/distrib/latex209/nfss/array.sty'. diff --git a/doc/scsh-manual/awk.tex b/doc/scsh-manual/awk.tex index 15cbe6f..4331163 100644 --- a/doc/scsh-manual/awk.tex +++ b/doc/scsh-manual/awk.tex @@ -29,48 +29,10 @@ The field parsers can be applied to arbitrary strings (one common use is splitting environment variables such as \ex{\$PATH} at colons into its component elements). -\subsection{Reading delimited strings} -These procedures read in strings from ports delimited by characters -belonging to a specific set. -See section~\ref{sec:char-sets} for information on character set manipulation. - -\defun{read-delimited}{char-set [port]} {{\str} or eof} -\begin{desc} - Read until we encounter one of the chars in \var{char-set} or eof. - The terminating character is not included in the string returned, - nor is it removed from the input stream; the next input operation will - encounter it. If we get a string back, then \ex{(eof-object? (peek-char))} - tells if the string was terminated by a delimiter or eof. - - The \var{char-set} argument may be a charset, a string, a character, or a - character predicate; it is coerced to a charset. - - This operation is likely to be implemented very efficiently. In - the Scheme 48 implementation, the Unix port case is implemented directly - in C, and is much faster than the equivalent operation performed - in Scheme with \ex{peek-char} and \ex{read-char}. -\end{desc} - -\defun{read-delimited!} {char-set buf [port start end]} {nchars or eof or \#f} -\begin{desc} - A side-effecting variant of \ex{read-delimited}. - - The data is written into the string \var{buf} at the indices in the - half-open interval $[\var{start},\var{end})$; the default interval is the - whole string: $\var{start}=0$ and $\var{end}=\ex{(string-length - \var{buf})}$. The values of \var{start} and \var{end} must specify a - well-defined interval in \var{str}, \ie, $0 \le \var{start} \le \var{end} - \le \ex{(string-length \var{buf})}$. - - It returns \var{nbytes}, the number of bytes read. If the buffer filled up - without a delimiter character being found, \ex{\#f} is returned. If - the port is at eof when the read starts, the eof object is returned. - - If an integer is returned, then - \ex{(eof-object (peek-char port))} - tells if the string was terminated by a delimiter or eof. -\end{desc} - +The general delimited-input procedures described in +chapter~\ref{chapt:rdelim} are also useful for reading simple records, +such as single lines, paragraphs of text, or strings terminated by specific +characters. \subsection{Reading records} @@ -79,21 +41,22 @@ See section~\ref{sec:char-sets} for information on character set manipulation. Returns a procedure that reads records from a port. The procedure is invoked as follows: % - \codex{(\var{reader} \var{[port]}) $\longrightarrow$ \emph{{\str} or eof}} + \codex{(\var{reader} \var{[port]}) $\longrightarrow$ + \textrm{\textit{{\str} or eof}}} % A record is a sequence of characters terminated by one of the characters in \var{delims} or eof. If \var{elide-delims?} is true, then a contiguous sequence of delimiter chars are taken as a single record delimiter. If \var{elide-delims?} is false, then a delimiter char coming immediately - after a delimiter char produces an empty string record. The reader + after a delimiter char produces an empty-string record. The reader consumes the delimiting char(s) before returning from a read. - The \var{delims} set defaults to the set $\{\rm newline\}$. + The \var{delims} set defaults to the set $\{\mbox{newline}\}$. It may be a charset, string, character, or character predicate, and is coerced to a charset. The \var{elide-delims?} flag defaults to \ex{\#f}. - The \var{handle-delim} controls what is done with the record's + The \var{handle-delim} argument controls what is done with the record's terminating delimiter. \begin{inset} \begin{tabular}{lp{0.6\linewidth}} @@ -111,18 +74,6 @@ See section~\ref{sec:char-sets} for information on character set manipulation. a string or eof. \end{desc} -\defun{read-paragraph} {[port delimiter?]} {{\str} or eof} -\begin{desc} - This procedure skips blank lines, - then reads text from a port until a blank line or eof is found. - A ``blank line'' is a (possibly empty) line composed only of white space. - If \var{delimiter?} is true, the terminating blank line is included in the - return string; it defaults to \ex{\#f}. When the delimiter is included, - \verb|(match-string "\n[ \t]*\n$" paragraph)| - can be used to determine if the paragraph was terminated by a blank line - or by eof. -\end{desc} - \subsection{Parsing fields} @@ -183,7 +134,7 @@ These functions return a parser function that can be used as follows: \ex{'trim} & Delimiters are thrown away after parsing. (default) \\ \ex{'concat} & Delimiters are appended to the field preceding them. \\ \ex{'split} & Delimiters are returned as separate elements in - the field vector. + the field list. \end{tabular} \end{tightinset} @@ -193,7 +144,7 @@ These functions return a parser function that can be used as follows: error if there are more or fewer than $n$ fields in the record. If \var{num-fields} is a negative integer or zero, then $|n|$ fields are parsed, and the remainder of the string is returned in the last - element of the field vector; it is an error if fewer than $|n|$ fields + element of the field list; it is an error if fewer than $|n|$ fields can be parsed. The field parser produced is a procedure that can be employed as @@ -227,7 +178,7 @@ These functions return a parser function that can be used as follows: It is an error if a non-empty record does not end with a delimiter. To make the last delimiter optional, make sure the delimiter regexp - matches the end-of-string \verb|(regexp "$")|. + matches the end-of-string (regexp \ex{"\$"}). \item [\ex{infix-splitter}] Delimiters are interpreted as element \emph{separators}. If comma is the @@ -246,17 +197,17 @@ These functions return a parser function that can be used as follows: \end{tabular} \end{inset} - Note that separator semantics doesn't really allow for empty records -- - the straightforward grammar (\ie, \synvar{real-infix-record}) parses - an empty string as a singleton list whose one field is the empty string, - \ex{("")}, not as the empty record \ex{()}. This is unfortunate, + Note that separator semantics doesn't really allow for empty + records---the straightforward grammar (\ie, \synvar{real-infix-record}) + parses an empty string as a singleton list whose one field is the empty + string, \ex{("")}, not as the empty record \ex{()}. This is unfortunate, since it means that infix string parsing doesn't make \ex{string-append} - and \ex{vector-append} isomorphic. For example, + and \ex{append} isomorphic. For example, \codex{((infix-splitter ":") (string-append \var{x} ":" \var{y}))} doesn't always equal \begin{code} -(vector-append ((infix-splitter ":") \var{x}) - ((infix-splitter ":") \var{y}))\end{code} +(append ((infix-splitter ":") \var{x}) + ((infix-splitter ":") \var{y}))\end{code} It fails when \var{x} or \var{y} are the empty string. Terminator semantics \emph{does} preserve a similar isomorphism. @@ -322,7 +273,7 @@ Record & : suffix & \verb!:|$! suffix & : infix & non-: field \\ \subsection{Field readers} \defun{field-reader} {[field-parser rec-reader]} \proc - +\begin{desc} This utility returns a procedure that reads records with field structure from a port. The reader's interface is designed to make it useful in the \ex{awk} @@ -347,10 +298,11 @@ Record & : suffix & \verb!:|$! suffix & : infix & non-: field \\ For example, if port \ex{p} is open on \ex{/etc/passwd}, then \codex{((field-reader (infix-splitter ":" 7)) p)} returns two values: -\begin{code} +{\small +\begin{widecode} "dalbertz:mx3Uaqq0:107:22:David Albertz:/users/dalbertz:/bin/csh" ("dalbertz" "mx3Uaqq0" "107" "22" "David Albertz" "/users/dalbertz" - "/bin/csh")\end{code} + "/bin/csh")\end{widecode}} The \var{field-parser} defaults to the value of \ex{(field-splitter)}, a parser that picks out sequences of non-white-space strings. @@ -391,8 +343,10 @@ Record & : suffix & \verb!:|$! suffix & : infix & non-: field \\ \label{fig:field-readers} \end{boxedfigure} +\end{desc} -\subsection{Forward-progress guarantees and empty string matches} + +\subsection{Forward-progress guarantees and empty-string matches} A loop that pulls text off a string by repeatedly matching a regexp against that string can conceivably get stuck in an infinite loop if the regexp matches the empty string. For example, the regexps \verb|^|, @@ -438,7 +392,7 @@ Unix doesn't support peeking ahead into input streams. Scsh provides a loop macro and a set of field parsers that can be used to perform text processing very similar to the Awk programming language. -These basic functionality of Awk is factored in scsh into its component +The basic functionality of Awk is factored in scsh into its component parts. The control structure is provided by the \ex{awk} loop macro; the text I/O and parsers are provided by the field-reader subroutine library @@ -477,7 +431,7 @@ these values are bound to the variables given in the The first value returned is assumed to be the record; when it is the end-of-file object, the loop terminates. -For example, let's suppose we want to read items from \etc{/etc/password}, +For example, let's suppose we want to read items from \ex{/etc/password}, and we use the \ex{field-reader} procedure to define a record parser for \ex{/etc/passwd} entries: \codex{(define read-passwd (field-reader (infix-splitter ":" 7)))} @@ -538,10 +492,10 @@ it checks them all. \itum{\begin{tabular}[t]{l} -\ex{(range \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\ -\ex{(:range \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\ -\ex{(range: \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\ -\ex{(:range: \var{start-test} \var{stop-test} \vari{body}1 \ldots)} +\ex{(range\ \ \ \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\ +\ex{(:range\ \ \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\ +\ex{(range:\ \ \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\ +\ex{(:range:\ \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \end{tabular}} % These clauses become activated when \var{start-test} is true; @@ -554,10 +508,10 @@ it checks them all. The colons control whether or not the start and stop lines are processed by the clause. For example: \begin{inset}\begin{tabular}{l@{\qquad}l} - \ex{(range 1 5 \ldots)} & Lines \phantom{1} 2 3 4 \\ - \ex{(:range 1 5 \ldots)} & Lines 1 2 3 4 \\ - \ex{(range: 1 5 \ldots)} & Lines \phantom{1} 2 3 4 5 \\ - \ex{(:range: 1 5 \ldots)} & Lines 1 2 3 4 5 + \ex{(range\ \ \ 1 5\ \ \ldots)} & Lines \phantom{1} 2 3 4 \\ + \ex{(:range\ \ 1 5\ \ \ldots)} & Lines 1 2 3 4 \\ + \ex{(range:\ \ 1 5\ \ \ldots)} & Lines \phantom{1} 2 3 4 5 \\ + \ex{(:range: 1 5\ \ \ldots)} & Lines 1 2 3 4 5 \end{tabular} \end{inset} @@ -590,7 +544,7 @@ Here are some examples of \ex{awk} being used to process various types of input stream. \begin{code} -(define $ vector-ref) ; Saves typing. +(define $ nth) ; Saves typing. ;;; Print out the name and home-directory of everyone in /etc/passwd: (let ((read-passwd (field-reader (infix-splitter ":" 7)))) @@ -614,8 +568,8 @@ of input stream. \begin{code} ;;; Read a series of integers from stdin. This expression evaluates -;;; to the number of positive numbers were read. Note our "record-reader" -;;; is the standard Scheme READ procedure. +;;; to the number of positive numbers that were read. Note our +;;; "record-reader" is the standard Scheme READ procedure. (awk (read) (i) ((npos 0)) ((> i 0) (+ npos 1)))\end{code} diff --git a/doc/scsh-manual/boxedminipage.sty b/doc/scsh-manual/boxedminipage.sty new file mode 100644 index 0000000..19e3e9d --- /dev/null +++ b/doc/scsh-manual/boxedminipage.sty @@ -0,0 +1,45 @@ +% boxedminipage.sty +% +% adds the boxedminipage environment---just like minipage, but has a +% box round it! +% +% The thickneess of the rules around the box is controlled by +% \fboxrule, and the distance between the rules and the edges of the +% inner box is governed by \fboxsep. +% +% This code is based on Lamport's minipage code. + +\def\boxedminipage{\@ifnextchar [{\@iboxedminipage}{\@iboxedminipage[c]}} + +\def\@iboxedminipage[#1]#2{\leavevmode \@pboxswfalse + \if #1b\vbox + \else \if #1t\vtop + \else \ifmmode \vcenter + \else \@pboxswtrue $\vcenter + \fi + \fi + \fi\bgroup % start of outermost vbox/vtop/vcenter + \hsize #2 + \hrule\@height\fboxrule + \hbox\bgroup % inner hbox + \vrule\@width\fboxrule \hskip\fboxsep \vbox\bgroup % innermost vbox + \advance\hsize -2\fboxrule \advance\hsize-2\fboxsep + \textwidth\hsize \columnwidth\hsize + \@parboxrestore + \def\@mpfn{mpfootnote}\def\thempfn{\thempfootnote}\c@mpfootnote\z@ + \let\@footnotetext\@mpfootnotetext + \let\@listdepth\@mplistdepth \@mplistdepth\z@ + \@minipagerestore\@minipagetrue + \everypar{\global\@minipagefalse\everypar{}}} + +\def\endboxedminipage{% + \par\vskip-\lastskip + \ifvoid\@mpfootins\else + \vskip\skip\@mpfootins\footnoterule\unvbox\@mpfootins\fi + \egroup % ends the innermost \vbox + \hskip\fboxsep \vrule\@width\fboxrule + \egroup % ends the \hbox + \hrule\@height\fboxrule + \egroup% ends the vbox/vtop/vcenter + \if@pboxsw $\fi} + diff --git a/doc/scsh-manual/changes.tex b/doc/scsh-manual/changes.tex new file mode 100644 index 0000000..beba924 --- /dev/null +++ b/doc/scsh-manual/changes.tex @@ -0,0 +1,258 @@ +%&latex -*- latex -*- + +\chapter{Changes from the previous release} +\label{sec:changes} + +\newcommand{\itam}[1]{\item {#1} \\} + +This section details changes that have been made in scsh since +the previous release. + +Scsh is now much more robust. +All known bugs have been fixed. +There have been many improvements and extensions made. +We have also made made some incompatible changes. + +The sections below briefly describe these new features and changes; +the relevant sections of the manual give the full details. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{New features} +This release incorporates several new features into scsh. + +\begin{itemize} +\itam{Control of buffered I/O} +Scsh now allows you to control the buffering policy used for doing I/O +on a Scheme port. + +\itam{Here-strings} +Scsh now has a new lexical feature, \verb|#<<|, that provides +the ability to enter long, multi-line string constants in scsh programs. +Such a string is called a ``here string,'' by analogy to the common +shell ``here document'' \ex{<<} redirection. + +\itam{Delimited readers and read-line} +Scsh now has a powerful set of delimited readers. +These can be used to read input delimited by +a newline character (\ex{read-line}), +a blank line (\ex{read-paragraph}), +or the occurrence of any character in an arbitrary set (\ex{read-delimited}). + +While these procedures can be applied to any Scheme input port, +there is native-code support for performing delimited reads on +Unix input sources, so doing block input with these procedures should be +much faster than the equivalent character-at-a-time Scheme code. + +\itam{New system calls} +With the sole exception of signal handlers, scsh now has all of {\Posix}. +This release introduces +\begin{itemize} +\item \ex{select}, +\item full terminal device control, +\item support for pseudo-terminal ``pty'' devices, +\item file locking, +\item process timing, +\item \ex{set-file-times}, +\item \ex{seek} and \ex{tell}. +\end{itemize} + +Note that having \ex{select}, pseudo-terminals, and tty device control means +that it is now possible to implement interesting network protocols, such as +telnet servers and clients, directly in Scheme. + +\itam{New command-line switches} +There is a new set of command-line switches that make it possible +to write shell scripts using the {\scm} module system. +Scripts can use the new command-line switches to open dependent +modules and load dependent source code. +Scripts can also be written in the {\scm} module language, +which allows you to use it both as a standalone shell script, +and as a code module that can be loaded and used by other Scheme programs. + +\itam{Static heap linking} +There is a new facility that allows you to compile a heap image +to a \ex{.o} file that can be linked with the scsh virtual machine. +This produces a standalone executable binary, makes startup time +near-instantaneous, and greatly improves memory performance---the +initial heap image is placed in the process' text pages, +where it is shared by different scsh processes, and does not occupy +space in the run-time heap. + +\oops{The static heap linker was not documented and installed in time + for this release.} + + +\end{itemize} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Incompatible improvements} +Some features of scsh have been improved in ways that are +not backwards-compatible with previous releases. +These changes should not affect most code; +however, please note the changes and modify your code accordingly. + +\begin{itemize} +\itam{New process-object data-type returned by \ex{fork}} +Previous releases were prone to fill up the kernel's process table +if a program forked large numbers of processes and subsequently failed +to use \ex{wait} to reclaim the entries in the kernel's process table. +(This is a problem in standard C environments, as well.) + +Scsh 0.4 introduces a new mechanism for automatically managing subprocesses. +Processes are no longer represented by an integer process id, +which is impossible to garbage-collect, but by an +abstract process data type that encapsulates the process id. +All processes are represented using the new data structures; +see the relevant section of the manual for further details. + +\itam{Better stdio/current-port synchronisation} +The \ex{(begin \ldots)} process form now does a \ex{stdio->stdports} +call before executing its body. +This means that the Scheme code in the body ``sees'' any external +redirections. +For example, it means that if a \ex{begin} form in the middle of a pipeline +performs I/O on the current input and output ports, it will be communicating +with its upstream and downstream pipes. +\Eg, this code works as intended without the need for explicit synchronisation: +\begin{verbatim} +(run (| (gunzip) + ;; Kill line 1 and insert doubled-sided + ;; code at head of Postscript. + (begin (read-line) ; Eat first line. + (display "%!PS-Adobe-2.0\\n") + (display "statusdict /setduplexmode known ") + (display "{statusdict begin true ") + (display "setduplexmode end} if\n") + (exec-epf (cat))) + (lpr)) + (< paper.ps))\end{verbatim} +Arranging for the \ex{begin} process form to synchronise +the current I/O ports with stdio means that all process forms now +see their epf's redirections. + +\itam{\ex{file-match} more robust} +The \ex{file-match} procedure now catches any error condition +signalled by a match procedure, +and treats it as if the procedure had simply returned {\sharpf}, +\ie, match failure. +This means \ex{file-match} no longer gets blown out of the water by +trying to apply a function like \ex{file-directory?} to a dangling symlink, +and other related OS errors. + +\itam{Standard input now unbuffered} +Scsh's startup code now makes the initial current input port +(corresponding to file descriptor 0) unbuffered. +This keeps the shell from ``stealing'' input meant for subprocesses. +However, it does slow down character-at-a-time input processing. +If you are writing a program that is tolerant of buffered input, +and wish the efficiency gains, you can reset the buffering policy +yourself. + +\itam{``writeable'' now spelled ``writable''} +We inconsistently spelled \ex{file-writable?} and \ex{file-not-writable?} +in the manual and the implementation. +We have now standardised on the common spelling ``writable'' in both. +The older bindings still exist in release 0.4, but will go away in future +releases. + +\itam{\protect\ex{char-set-member?} replaced} +We have de-released the \ex{char-set-member?} procedure. +The scsh 0.3 version of this procedure took arguments +in the following order: + \codex{(char-set-member? \var{char} \var{char-set})} +This argument order is in accordance with standard mathematical useage +(\ie, $x \in S$), and also consistent with the R4RS +\ex{member}, \ex{memq} and \ex{memv} procedures. +It is, however, exactly opposite from the argument order +used by the \ex{char-set-member?} in MIT Scheme's character-set library. +If we left things as they were, we risked problems with code +ported over from MIT Scheme. +On the other hand, changing to conformance with MIT Scheme meant +inconsistency with common mathematical notation and other long-standing +Scheme procedures. +Either way was bound to introduce confusion. + +We've taken the approach of simply removing the \ex{char-set-member?} +procedure altogether, and replacing it with a new procedure: +\codex{(char-set-contains? \var{cset} \var{char})} +Note that the argument order is consistent with the name. + +\itam{\ex{file-attributes} now \ex{file-info}} +In keeping with the general convention in scsh of naming procedures +that retrieve information about system resources \ex{\ldots-info} +(\eg, \ex{tty-info}, \ex{user-info}, \ex{group-info}), +the \ex{file-attributes} procedure is now named \ex{file-info}. + +We continue to export a \ex{file-attributes} binding for the current +release, but it will go away in future releases. + +\itam{Renaming of I/O synchronisation procedures} +The \ex{(stdio->stdports \var{thunk})} procedure has been +renamed \ex{with-stdio-ports*}; +there is now a corresponding \ex{with-stdio-ports} special form. +The \ex{stdio->stdports} procedure is now a nullary procedure +that side-effects the current set of current I/O port bindings. + +\itam{New meta-arg line-two syntax} +Scsh now uses a simplified grammar for describing command-line +arguments read by the ``meta-arg'' switch from line two of a shell script. +If you were using this feature in previous releases, the three incompatible +changes of which to be aware are: +(1) tab is no longer allowed as an argument delimiter, +(2) a run of space characters is not equivalent to a single space, +(3) empty arguments are written a different way. +\end{itemize} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Backwards-compatible improvements} + +Some existing features in scsh have been improved in ways that will +not effect existing code. + +\begin{itemize} +\itam{Improved error reporting} +Exception handlers that print out error messages and warnings now +print their messages on the error output port, +instead of the current output port. +Previous releases used the current output port, +a problem inherited from Scheme 48. + +Previous scsh releases flushed the Scheme 48 debugging tables when +creating the standard scsh heap image. +This trimmed the size of the heap image, but made error messages much +less comprehensible. +We now retain the debugging tables. +This bloats the heap image up by about 600kb. And worth it, too. + +(We also have some new techniques for eliminating the run-time memory +penalty imposed by these large heap images. +Scsh's new static-heap technology allows for this data to be linked +into the text pages of the vm's binary, where it will not be touched +by the GC or otherwise affect the memory system until it is referenced.) + +Finally, scsh now generates more informative error messages for syscall +errors. +For example, a file-open error previously told you what the error was +(\eg, ``Permission denied,'' or ``No such file or directory''), +but not which file you had tried to open. +We've improved this. + +\itam{Closing a port twice allowed} +Scsh used to generate an error if you attempted to close a port +that had already been closed. +This is now allowed. +The close procedure returns a boolean to indicate whether the port had +already been closed or not. + +\itam{Better time precision} +The \ex{time+ticks} procedure now returns sub-second precision on OS's +that support it. + +\itam{Nicer print-methods for basic data-types} +Scsh's standard record types now print more informatively. +For example, a process object includes the process id in its +printed representation: the process object for process id 2653 +prints as \verb|#{proc 2653}|. + +\end{itemize} diff --git a/doc/scsh-manual/code.sty b/doc/scsh-manual/code.sty new file mode 100644 index 0000000..2786d61 --- /dev/null +++ b/doc/scsh-manual/code.sty @@ -0,0 +1,296 @@ +% code.sty: -*- latex -*- +% Latex macros for a "weak" verbatim mode. +% -- like verbatim, except \, {, and } have their usual meanings. + +% Environments: code, tightcode, codeaux, codebox, centercode +% Commands: \dcd, \cddollar, \cdmath, \cd, \codeallowbreaks, \codeskip, \^ +% Already defined in LaTeX, but of some relevance: \#, \$, \%, \&, \_, \{, \} + +% Changelog at the end of the file. + +% These commands give you an environment, code, that is like verbatim +% except that you can still insert commands in the middle of the environment: +% \begin{code} +% for(x=1; x] option, then the following newline will +% be read *after* ^M is bound to \cr, so we're cool. If there isn't +% an option given (i.e., default to [c]), then the @\ifnextchar will +% gobble up the newline as it gobbles whitespace. So we insert the +% \cr explicitly. Isn't TeX fun? +\def\codebox{\leavevmode\@ifnextchar[{\@codebox}{\@codebox[c]\cr}} %] + +\def\@codebox[#1]% + {\hbox\bgroup$\if #1t\vtop \else \if#1b\vbox \else \vcenter \fi\fi\bgroup% + \tabskip\z@\setupcode\cd@obeycr% just before cd@obey + \halign\bgroup##\hfil\span} + +\def\endcodebox{\crcr\egroup\egroup\m@th$\egroup} + +% Center the box on the page: +\newenvironment{centercode}% + {\begin{center}\begin{codebox}[c]}% + {\end{codebox}\end{center}} + + +%% code, codeaux, tightcode +%%============================================================================= +%% Code environment as described above. Lines are kept on one page. +%% This actually works by setting a huge penalty for breaking +%% between lines of code. Code is indented same as other displayed paras. +%% Note: to increase left margin, use \begin{codeaux}{\leftmargin=1in}. + +% To allow pagebreaks, say \codeallowbreaks immediately inside the env. +% You can allow breaks at specific lines with a \pagebreak form. + +%% N.B.: The \global\@ignoretrue command must be performed just inside +%% the *last* \end{...} before the following text. If not, you will +%% get an extra space on the following line. Blech. + +%% This environment takes two arguments. +%% The second, required argument is the \list parameters to override the +%% \@listi... defaults. +%% - Usefully set by clients: \topsep \leftmargin +%% - Possible, but less useful: \partopsep +%% The first, optional argument is the extra \parskip glue that you get around +%% \list environments. It defaults to the value of \parskip. +\def\codeaux{\@ifnextchar[{\@codeaux}{\@codeaux[\parskip]}} %] +\def\@codeaux[#1]#2{% + \bgroup\parskip#1% + \begin{list}{}% + {\parsep\z@\rightskip\z@\listparindent\z@\itemindent\z@#2}% + \item[]\setupcode\cd@obeylines}% +\def\endcodeaux{\end{list}\leavevmode\egroup\ignorespaces\global\@ignoretrue} + +%% Code env is codeaux with the default margin and spacing \list params: +\def\code{\codeaux{}} \let\endcode=\endcodeaux + +%% Like code, but with no extra vertical space above and below. +\def\tightcode{\codeaux[=0pt]{\topsep\z@}}% +\let\endtightcode\endcodeaux +% {\vspace{-1\parskip}\begin{codeaux}{\partopsep\z@\topsep\z@}}% +% {\end{codeaux}\vspace{-1\parskip}} + + + +% Reasonable separation between lines of code +\newcommand{\codeskip}{\penalty0\vspace{2ex}} + + +% \cd is used to build a code environment in the middle of text. +% Note: only difference from display code is that cr's are taken +% as unbreakable spaces instead of linebreaks. + +\def\cd{\leavevmode\begingroup\ifmmode\let\startcode=\startmcode\else% + \let\startcode\starttcode\fi% + \setupcode\cd@obeycrsp\startcode} + +\def\starttcode#1{#1\endgroup} +\def\startmcode#1{\hbox{#1}\endgroup} + + +% Restore $&#^_~% to their normal catcodes +% Define \^ to give the ^ char. +% \dcd points to this guy inside a code env. +\def\cd@dcd{\catcode`\$=3\catcode`\&=4\catcode`\#=6\catcode`\^=7% + \catcode`\_=8\catcode`\~=13\catcode`\%=14\def\^{\char`\^}} + +% Selectively enable $, and $^_ as special. +% \cd@mathspecial also defines \^ give the ^ char. +% \cddollar and \cdmath point to these guys inside a code env. +\def\cd@dollarspecial{\catcode`\$=3} +\def\cd@mathspecial{\catcode`\$=3\catcode`\^=7\catcode`\_=8% + \def\^{\char`\^}} + + +% Change log: +% Started off as some macros found in C. Rich's library. +% Olin 1/90: +% Removed \makeatletter, \makeatother's -- they shouldn't be there, +% because style option files are read with makeatletter. The terminal +% makeatother screwed things up for the following style options. +% Olin 3/91: +% Rewritten. +% - Changed things so blank lines don't get compressed out (the \leavevmove +% in \cd@cr and \cd@crwb). +% - Changed names to somewhat less horrible choices. +% - Added lots of doc, so casual hackers can more easily mess with all this. +% - Removed `'"@ from the set of hacked chars, since they are already +% non-special. +% - Removed the bigcode env, which effect can be had with the \codeallowbreaks +% command. +% - Removed the \@noligs command, since it's already defined in latex.tex. +% - Win big with the new \dcd, \cddollar, and \cdmath commands. +% - Now, *only* the chars \{} are special inside the code env. If you need +% more, use the \dcd command inside a group. +% - \cd now works inside math mode. (But if you use it in a superscript, +% it still comes out full size. You must explicitly put a \scriptsize\tt +% inside the \cd: $x^{\cd{\scriptsize\tt...}}$. A \leavevmode was added +% so that if you begin a paragraph with a \cd{...}, TeX realises you +% are starting a paragraph. +% - Added the codebox env. Tricky bit involving the first line hacked +% with help from David Long. +% Olin 8/94 +% Changed the font commands for LaTeX2e. diff --git a/doc/scsh-manual/ct.sty b/doc/scsh-manual/ct.sty new file mode 100644 index 0000000..1edfbc0 --- /dev/null +++ b/doc/scsh-manual/ct.sty @@ -0,0 +1,6 @@ +% Loads cmtt fonts in on \tt. -*- latex -*- +% I prefer these to the Courier fonts that latex gives you w/postscript styles. +% Courier is too spidery and too wide -- it's hard to get 80 chars on a line. +% -Olin + +\renewcommand{\ttdefault}{cmtt} diff --git a/doc/scsh-manual/decls.tex b/doc/scsh-manual/decls.tex new file mode 100644 index 0000000..0a3ae03 --- /dev/null +++ b/doc/scsh-manual/decls.tex @@ -0,0 +1,271 @@ +\makeatletter +\def\ie{\mbox{\emph{i.e.}}} % \mbox keeps the last period from +\def\Ie{\mbox{\emph{I.e.}}} % looking like an end-of-sentence. +\def\eg{\mbox{\emph{e.g.}}} +\def\Eg{\mbox{\emph{E.g.}}} +\def\etc{{\em etc.}} + +\def\Lisp{\textsc{Lisp}} +\def\CommonLisp{\textsc{Common Lisp}} +\def\Ascii{\textsc{Ascii}} +\def\Ansi{\textsc{Ansi}} +\def\Unix{{Unix}} % Not smallcaps, according to Bart. +\def\Scheme{{Scheme}} +\def\scm{{Scheme 48}} +\def\R4RS{R4RS} +\def\Posix{\textsc{Posix}} + +\def\sharpf{\normalfont\texttt{\#f}} +\def\sharpt{\normalfont\texttt{\#t}} +\newcommand{\synteq}{\textnormal{::=}} + +\def\maketildeother{\catcode`\~=12} +\def\maketildeactive{\catcode`\~=13} +\def\~{\char`\~} + +% One-line code examples +%\newcommand{\codex}[1]% One line, centred. Tight spacing. +% {$$\abovedisplayskip=.75ex plus 1ex minus .5ex% +% \belowdisplayskip=\abovedisplayskip% +% \abovedisplayshortskip=0ex plus .5ex% +% \belowdisplayshortskip=\abovedisplayshortskip% +% \hbox{\ttt #1}$$} +%\newcommand{\codex}[1]{\begin{tightinset}\ex{#1}\end{tightinset}\ignorespaces} +\newcommand{\codex}[1]{\begin{leftinset}\ex{#1}\end{leftinset}\ignorespaces} + +\def\widecode{\codeaux{\leftmargin=0pt\topsep=0pt}} +\def\endwidecode{\endcodeaux} + +% For multiletter vars in math mode: +\newcommand{\var}[1]{\mbox{\frenchspacing\it{#1}}} +\newcommand{\vari}[2]{${\mbox{\it{#1}}}_{#2}$} + +%% What you frequently want when you say \tt: +\def\ttchars{\catcode``=13\@noligs\frenchspacing} +\def\ttt{\normalfont\ttfamily\ttchars} + +% Works in math mode; all special chars remain special; cheaper than \cd. +% Will not be correct size in super and subscripts, though. +\newcommand{\ex}[1]{{\normalfont\texttt{\ttchars #1}}} + +\newenvironment{inset} + {\bgroup\parskip=1ex plus 1ex\begin{list}{}% + {\topsep=0pt\rightmargin\leftmargin}% + \item[]}% + {\end{list}\leavevmode\egroup\global\@ignoretrue} + +\newenvironment{leftinset} + {\bgroup\parskip=1ex plus 1ex\begin{list}{}% + {\topsep=0pt}% + \item[]}% + {\end{list}\leavevmode\egroup\global\@ignoretrue} + +\newenvironment{tightinset} + {\bgroup\parskip=0pt\begin{list}{}% + {\topsep=0pt\rightmargin\leftmargin}% + \item[]}% + {\end{list}\leavevmode\egroup\global\@ignoretrue} + +\newenvironment{tightleftinset} + {\bgroup\parskip=0pt\begin{list}{}% + {\topsep=0pt}% + \item[]}% + {\end{list}\leavevmode\egroup\global\@ignoretrue} + +\long\def\remark#1{\bgroup\small\begin{quote}\textsl{Remark: } #1\end{quote}\egroup} +\newenvironment{remarkenv}{\bgroup\small\begin{quote}\textsl{Remark: }}% + {\end{quote}\egroup} +\newcommand{\oops}[1]{\bgroup\small\begin{quote}\textsl{Oops: } #1\end{quote}\egroup} + +\newcommand{\note}[1]{\{Note #1\}} + +\newcommand{\itum}[1]{\item{\bf #1}\\*} + +% For use in code. The \llap magicness makes the lambda exactly as wide as +% the other chars in \tt; the \hskip shifts it right a bit so it doesn't +% crowd the left paren -- which is necessary if \tt is cmtt. +% Note that (\l{x y} (+ x y)) uses the same number of columns in TeX form +% as it produces when typeset. This makes it easy to line up the columns +% in your input. \l is bound to some useless command in LaTeX, so we have to +% define it w/renewcommand. +\let\oldl\l %Save the old \l on \oldl +\renewcommand{\l}[1]{\ \llap{$\lambda$\hskip-.05em}\ (#1)} + +% This one is for the rare (lambda x ...) case -- it doesn't have the +% column-invariant property. Oh, well. +\newcommand{\lx}[1]{\ \llap{$\lambda$\hskip-.05em}\ {#1}} + +% For subcaptions +\newcommand{\subcaption}[1] +{\unskip\vspace{-2mm}\begin{center}\unskip\em#1\end{center}} + +%%% T release notes stuff +\newlength{\notewidth} +\setlength{\notewidth}{\textwidth} +\addtolength{\notewidth}{-1.25in} + +%\newcommand{\remark} [1] +% {\par\vspace{\parskip} +% \parbox[t]{.75in}{\sc Remark:} +% \parbox[t]{\notewidth}{\em #1} +% \vspace{\parskip} +% } + +\newenvironment{optiontable}% + {\begin{tightinset}\renewcommand{\arraystretch}{1.5}% + \begin{tabular}{@{}>{\ttt}ll@{}}}% + {\end{tabular}\end{tightinset}}% + +\newenvironment{desctable}[1]% + {\begin{inset}\renewcommand{\arraystretch}{1.5}% + \begin{tabular}{lp{#1}}}% + {\end{tabular}\end{inset}} + +\def\*{{\ttt *}} + +% Names of things + +\newcommand{\keyword} [1]{\index{#1}{\normalfont\textsf{#1}}} + +\newcommand{\evalto}{$\Longrightarrow$\ } +\renewcommand{\star}{$^*$\/} +\newcommand{\+}{$^+$} + +% Semantic domains, used to indicate the type of a value + +\newcommand{\sem}{\normalfont\itshape} %semantic font +\newcommand{\semvar}[1]{\textit{#1}} %semantic font +\newcommand{\synvar}[1]{\textrm{\textit{$<$#1$>$}}} %syntactic font +\newcommand{\type}{\sem} +\newcommand{\zeroormore}[1]{{\sem #1$_1$ \ldots #1$_n$}} +\newcommand{\oneormore}[1]{{\sem #1$_1$ #1$_2$ \ldots #1$_n$}} + +\newcommand{\proc} {{\sem procedure}} +\newcommand{\boolean} {{\sem boolean}} +\newcommand{\true} {{\sem true}} +\newcommand{\false} {{\sem false}} + +\newcommand{\num} {{\sem number}} +\newcommand{\fixnum} {{\sem fixnum}} +\newcommand{\integer} {{\sem integer}} +\newcommand{\real} {{\sem real}} + +\newcommand{\character} {{\sem character}} +\newcommand{\str} {{\sem string}} +\newcommand{\sym} {{\sem symbol}} + +\newcommand{\location} {{\sem location}} +\newcommand{\object} {{\sem object}} + +\newcommand{\error} {{\sem error}} +\newcommand{\syntaxerror} {{\sem syntax error}} +\newcommand{\readerror} {{\sem read error}} +\newcommand{\undefined} {{\sem undefined}} +\newcommand{\noreturn} {{\sem no return value}} + +\newcommand{\port} {{\sem port}} + +% semantic variables + +\newcommand{\identifier} {{\sem identifier}} +\newcommand{\identifiers} {\zeroormore{\}} +\newcommand{\expr} {{\sem expression}} +\newcommand{\body} {{\sem body}} +\newcommand{\valueofbody} {{\sem value~of~body}} +\newcommand{\emptylist} {{\sem empty~list}} +\newcommand{\car} {\keyword{car}} +\newcommand{\cdr} {\keyword{cdr}} + + +% generally useful things + +% For line-breaking \tt stuff. +\renewcommand{\=}{\discretionary{-}{}{-}} +\newcommand{\ob}{\discretionary{}{}{}} % Optional break. + +\newcommand{\indx}[1]{#1 \index{ #1 }} +%\newcommand{\gloss}[1]{#1 \glossary{ #1 }} + +% This lossage produces #2 if #1 is zero length, otw #3. +% We use it to conditionally add a space between the procedure and +% the args in procedure prototypes, but only if there are any args-- +% we want to produce "(read)", not "(read )". +\newlength{\voidlen} +\newcommand{\testvoid}[3]{\settowidth\voidlen{#1}\ifdim\voidlen>0in{#3}\else{#2}\fi} + + +% Typeset a definition prototype line, e.g.: +% (cons ) -> pair procedure +% +% Five args are: proc-name args ret-value(s) type index-entry +\newcommand{\dfnix}[4]% FIVE args, really. + {\hbox to \linewidth{\ttchars% + {\ttt(#1\testvoid{#2}{}{\ }{\sem{#2}}\testvoid{#2}{}{\/})\hskip 1em minus +0.5em$\longrightarrow$\hskip 1em minus 0.5em{\sem{#3}}\hfill\quad\textnormal{#4}}}\index} + +\newcommand{\dfnx}[4] {\dfnix{#1}{#2}{#3}{#4}{#1@\texttt{#1}}} + +\newcommand{\dfn} {\par\medskip\dfnx} % Takes 4 args, actually. +\newcommand{\dfni} {\par\medskip\dfnix} % Takes 5 args, actually. + +\newcommand{\defvar} {\par\medskip\defvarx} % Takes 4 args, actually. +\newcommand{\defvarx}[2]% + {\index{#1} + \hbox to \linewidth{\ttchars{{\ttt{#1}} \hfill #2}}}% + +% Typeset the protocol line, then do the following descriptive text indented. +% If you want to group two procs together, do the first one with a \dfn, +% then the second one, and the documentation, with a \defndescx. + +% This one doesn't put whitespace above. Use it immediately after a \dfn +% to group two prototype lines together. +\newenvironment{dfndescx}[4]% + {\dfnx{#1}{#2}{#3}{#4}\begin{desc}}{\end{desc}} + +\newenvironment{dfndesc}[4] % This one puts whitespace above. + {\par\medskip\begin{dfndescx}{#1}{#2}{#3}{#4}} + {\end{dfndescx}} + +\newenvironment{desc}% + {\nopagebreak[2]% + \smallskip + \bgroup\begin{list}{}{\topsep=0pt\parskip=0pt}\item[]} + {\end{list}\leavevmode\egroup\global\@ignoretrue} + +\newcommand{\defun} [3] {\dfn{#1}{#2}{#3}{procedure}} % preskip +\newcommand{\defunx}[3]{\dfnx{#1}{#2}{#3}{procedure}} % no skip + +\newenvironment{defundescx}[3]% + {\begin{dfndescx}{#1}{#2}{#3}{procedure}} + {\end{dfndescx}} + +\newenvironment{defundesc}[3]% + {\begin{dfndesc}{#1}{#2}{#3}{procedure}} + {\end{dfndesc}} + + +\newenvironment{column}{\begin{tabular}[t]{@{}l@{}}}{\end{tabular}} + +\newenvironment{exampletable}% + {\begin{leftinset}% + \newcommand{\header}[1]{\multicolumn{2}{@{}l@{}}{##1}\\}% + \newcommand{\splitline}[2]% + {\multicolumn{2}{@{}l@{}}{##1}\\\multicolumn{2}{@{}l@{}}{\qquad\evalto\quad{##2}}} + \begin{tabular}{@{}l@{\quad\evalto\quad}l@{}}}% + {\end{tabular}\end{leftinset}} + +% Put on blank lines in a code env to allow a pagebreak. +\newcommand{\cb}{\pagebreak[0]} + +\newenvironment{boxedcode} + {\begin{inset}\tabular{|l|}\hline} + {\\ \hline \end{tabular}\end{inset}} + +% A ragged-right decl that doesn't redefine \\ -- for use in tables. +\newcommand{\raggedrightparbox}{\let\temp=\\\raggedright\let\\=\temp} + +\newenvironment{boxedfigure}[1]% + {\begin{figure}[#1]\begin{boxedminipage}{\linewidth}\vskip 1.5ex} + {\end{boxedminipage}\end{figure}} + +\makeatother diff --git a/doc/scsh-manual/draftfooters.sty b/doc/scsh-manual/draftfooters.sty new file mode 100644 index 0000000..862436d --- /dev/null +++ b/doc/scsh-manual/draftfooters.sty @@ -0,0 +1,76 @@ +% Document style option "draftfooter" +% -- usage: \documentstyle[...,draftfooter,...]{...} +% -- puts "DRAFT" with date and time in page footer +% +% Olin Shivers 1/17/94 +% - Hacked from code I used in my dissertation and from code in a +% drafthead.sty package written by Stephen Page sdpage@uk.ac.oxford.prg. +%---------------------------------------------------------------------------- + +% +% compute the time in hours and minutes; make new variables \timehh and \timemm +% +\newcount\timehh\newcount\timemm +\timehh=\time +\divide\timehh by 60 \timemm=\time +\count255=\timehh\multiply\count255 by -60 \advance\timemm by \count255 +% + +\def\draftbox{{\protect\small\bf \fbox{DRAFT}}} +\def\drafttime{% + {\protect\small\sl\today\ -- \ifnum\timehh<10 0\fi% + \number\timehh\,:\,\ifnum\timemm<10 0\fi\number\timemm}} +\def\drafttimer{\protect\makebox[0pt][r]{\drafttime}} +\def\drafttimel{\protect\makebox[0pt][l]{\drafttime}} + +\def\thepagel{\protect\makebox[0pt][l]{\rm\thepage}} +\def\thepager{\protect\makebox[0pt][r]{\rm\thepage}} + +% Header is empty. +% Footer is "date DRAFT pageno" +\def\ps@plain{ + \let\@mkboth\@gobbletwo + \let\@oddhead\@empty \let\@evenhead\@empty + + \def\@oddfoot{\reset@font\rm\drafttimel\hfil\draftbox\hfil\thepager} + \if@twoside + \def\@evenfoot{\reset@font\rm\thepagel\hfil\draftbox\hfil\drafttimer} + \else \let\@evenfoot\@oddfoot + \fi +} + +% Aux macro -- sets footer to be "date DRAFT". +\def\@draftfooters{ + \def\@oddfoot{\reset@font\rm\drafttimel\hfil\draftbox} + \if@twoside + \def\@evenfoot{\reset@font\rm\draftbox\hfil\drafttimer} + \else \let\@evenfoot\@oddfoot + \fi + } + +% Header is empty. +% Footer is "date DRAFT". +\def\ps@empty{ + \let\@mkboth\@gobbletwo + \let\@oddhead\@empty \let\@evenhead\@empty + \@draftfooters + } + +% Header is defined by the document style (article, book, etc.). +% Footer is "date DRAFT". +\let\@draftoldhead\ps@headings +\def\ps@headings{ + \@draftoldhead % Do the default \pagestyle{headings} stuff. + \@draftfooters % Then define the draft footers: + } + +% Header is defined by the document style (article, book, etc.), +% and filled in by user's \markboth and \markright commands. +% Footer is "date DRAFT". +\let\@draftoldmyhead\ps@myheadings +\def\ps@myheadings{ + \@draftoldmyhead % Do the default \pagestyle{myheadings} stuff. + \@draftfooters % Then define the draft footers: + } + +\ps@plain diff --git a/doc/scsh-manual/front.tex b/doc/scsh-manual/front.tex index 512df8c..b393af9 100644 --- a/doc/scsh-manual/front.tex +++ b/doc/scsh-manual/front.tex @@ -1,10 +1,10 @@ %&latex -*- latex -*- \title{Scsh Reference Manual} -\subtitle{For Scsh release 0.3 \\ - December 25, 1994} +\subtitle{For scsh release 0.4} +% December 25, 1994 \author{Olin Shivers and Brian D.~Carlstrom} -\date{12/94} +\date{October 31, 1995} \maketitle %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/doc/scsh-manual/headings.sty b/doc/scsh-manual/headings.sty new file mode 100644 index 0000000..c928f58 --- /dev/null +++ b/doc/scsh-manual/headings.sty @@ -0,0 +1,16 @@ +% headings.tex -*- latex -*- +% Quieter headings that the ones used in article.sty. +% This is not a style option. Don't say [headings]. +% Instead, say \input{headings} after the \documentstyle. +% -Olin 7/91 + +\makeatletter + +\def\section{\@startsection {section}{1}{\z@}{-3.5ex plus -1ex minus + -.2ex}{2.3ex plus .2ex}{\large\normalfont\bfseries}} +\def\subsection{\@startsection{subsection}{2}{\z@}{-3.25ex plus -1ex minus + -.2ex}{1.5ex plus .2ex}{\normalsize\normalfont\bfseries}} +\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-3.25ex plus +-1ex minus -.2ex}{1.5ex plus .2ex}{\normalsize\normalfont\bfseries}} + +\makeatother diff --git a/doc/scsh-manual/intro.tex b/doc/scsh-manual/intro.tex index b458128..294575e 100644 --- a/doc/scsh-manual/intro.tex +++ b/doc/scsh-manual/intro.tex @@ -5,14 +5,86 @@ This is a draft manual for scsh, a {\Unix} shell that is embedded within {\Scheme}. -Scsh comes built on top of {\scm}, and it has two components: +Scsh is a Scheme system designed for writing useful standalone Unix +programs and shell scripts---it spans a wide range of application, +from ``script'' applications usually handled with perl or sh, +to more standard systems applications usually written in C. + +Scsh comes built on top of {\scm}, and has two components: a process notation for running programs and setting up pipelines and redirections, -and a complete syscall library for low-level access to the OS. +and a complete syscall library for low-level access to the operating system. This manual gives a complete description of scsh. A general discussion of the design principles behind scsh can be found in a companion paper, ``A Scheme Shell.'' +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Obtaining scsh} +Scsh is distributed via net publication. +We place new releases at well-known network sites, +and allow them to propagate from there. +We currently release scsh to the following Internet sites: +\begin{inset}\begin{flushleft} +\ex{ftp://ftp-swiss.ai.mit.edu/pub/su/} \\ +\ex{http://www-swiss.ai.mit.edu/scsh/scsh.html} +\ex{http://www.cs.indiana.edu/scheme-repository/} \\ +\end{flushleft} +\end{inset} +These sites are + the MIT Project Mac ftp server, + the Scheme Shell home page, and + the Indiana Scheme Repository home page, +respectively. +Each should have a compressed tar file of the entire scsh release, +which includes all the source code and the manual, +and a separate file containing just this manual in Postscript form, +for those who simply wish to read about the system. + +However, nothing is certain for long on the Net. +Probably the best way to get a copy of scsh is to use a network +resource-discovery tool, such as archie, +to find ftp servers storing scsh tar files. +Take the set of sites storing the most recent release of scsh, +choose one close to your site, and download the tar file. + +\section{Building scsh} +Scsh currently runs on a fairly large set of Unix systems, including +Linux, NetBSD, SunOS, Solaris, AIX, NeXTSTEP, Irix, and HP-UX. +We use the Gnu project's autoconfig tool to generate self-configuring +shell scripts that customise the scsh Makefile for different OS variants. +This means that if you use one of the common Unix implementations, +building scsh should require exactly the following steps: +\begin{inset} +\begin{tabular}{l@{\qquad}l} +\ex{gunzip scsh-0.4.0.tar.gz} & \emph{Uncompress the release tar file.} \\ +\ex{untar xfv scsh-0.4.0.tar} & \emph{Unpack the source code.} \\ +\ex{cd scsh-0.4.0} & \emph{Move to the source directory.} \\ +\ex{./configure} & \emph{Examine host; build Makefile.} \\ +\ex{make} & \emph{Build system.} +\end{tabular} +\end{inset} +When you are done, you should have a virtual machine compiled in +file \ex{scshvm}, and a heap image in file \ex{scsh/scsh.image}. +Typing +\begin{code} +make install +\end{code} +will install these programs in your installation directory +(by default, \ex{/usr/local}), along with a small stub startup +binary, \ex{scsh}. + +If you don't have the patience to do this, you can start up +a Scheme shell immediately after the initial make by simply +saying +\codex{./scshvm -o ./scshvm -i scsh/scsh.image} +See chapter~\ref{chapt:running} for full details on installation +locations and startup options. + +It is not too difficult to port scsh to another Unix platform if your +OS is not supported by the current release. +See the release notes for more details on how to do this. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Caveats} It is important to note what scsh is \emph{not}, as well as what it is. @@ -20,13 +92,13 @@ Scsh, in the current release, is primarily designed for the writing of shell scripts---programming. It is not a very comfortable system for interactive command use: the current release lacks job control, command-line editing, a terse, -convenient command syntax, and it can not be made to read in an initialisation +convenient command syntax, and it does not read in an initialisation file analogous to \ex{.login} or \ex{.profile}. -We hope to address all of these problems in future releases; +We hope to address all of these issues in future releases; we even have designs for several of these features; -but the system as-released does not currently address these issues. +but the system as-released does not currently provide these features. -As a first release, the system has some rough edges. +In the current release, the system has some rough edges. It is quite slow to start up; we hope to fix that by providing a static-heap linker in the next release. For now, the initial image load takes about a cpu second. @@ -37,6 +109,7 @@ using markup, so we can generate html, info nodes, and {\TeX} output from the single source without having to deal with Texinfo. But it's all there is, for now. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Naming conventions} Scsh follows a general naming scheme that consistently employs a set of abbreviations. @@ -90,11 +163,11 @@ Some of the common ones are: \item[\ex{create-}] Procedures that create objects in the file system (files, directories, - temp files, fifos, etc), begin with \ex{create-\ldots}. + temp files, fifos, \etc), begin with \ex{create-\ldots}. \item [\ex{delete-}] Procedures that delete objects from the file system (files, - directories, temp files, fifos, etc), begin with \ex{delete-\ldots}. + directories, temp files, fifos, \etc), begin with \ex{delete-\ldots}. \item[ \ex{\var{record}:\var{field}} ] Procedures that access fields of a record are usually written @@ -118,11 +191,13 @@ For example, the various {\Unix} signal integers have the names \ex{signal/cont}, \ex{signal/kill}, \ex{signal/int}, \ex{signal/hup}, and so forth. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Lexical issues} Scsh's lexical syntax is just {\R4RS} {\Scheme}, with the following exceptions. -Scsh differs from {\R4RS} {\Scheme} in the following ways: +\subsection{Extended symbol syntax} +Scsh's symbol syntax differs from {\R4RS} {\Scheme} in the following ways: \begin{itemize} \item In scsh, symbol case is preserved by \ex{read} and is significant on symbol comparison. This means @@ -132,10 +207,7 @@ Scsh differs from {\R4RS} {\Scheme} in the following ways: \item ``\ex{-}'' and ``\ex{+}'' are allowed to begin symbols. So the following are legitimate symbols: \codex{-O2 -geometry +Wn} -\end{itemize} -% -Scsh also extends {\R4RS} lexical syntax in the following ways: -\begin{itemize} + \item ``\ex{|}'' and ``\ex{.}'' are symbol constituents. This allows \ex{|} for the pipe symbol, and \ex{..} for the parent-directory symbol. (Of course, ``\ex{.}'' alone is not a symbol, but a @@ -144,16 +216,119 @@ Scsh also extends {\R4RS} lexical syntax in the following ways: \item A symbol may begin with a digit. So the following are legitimate symbols: \codex{9x15 80x36-3+440} - -\item Strings are allowed to contain the {\Ansi} C escape sequences - such as \verb|\n| and \verb|\161|. - -\item \ex{\#!} is a comment read-macro similar to \ex{;}. - This is used to write shell scripts. When the reader - encounters \ex{\#!}, it skips characters until it finds - the sequence new\-line/{\ob}ex\-cla\-ma\-tion-{\ob}point/{\ob}sharp-{\ob}sign/{\ob}new\-line. \end{itemize} +\subsection{Extended string syntax} +Scsh strings are allowed to contain the {\Ansi} C escape sequences + such as \verb|\n| and \verb|\161|. + +\subsection{Block comments and executable interpreter-triggers} +Scsh allows source files to begin with a header of the form +\codex{\#!/usr/local/bin/scsh -s} +The Unix operating system treats source files beginning with the headers +of this form specially; +they can be directly executed by the operating system +(see chapter~\ref{chapt:running} for information on how to use this feature). +The scsh interpreter ignores this special header by treating \ex{\#!} as a +comment marker similar to \ex{;}. +When the scsh reader encounters \ex{\#!}, it skips characters until it finds +the closing sequence +new\-line/{\ob}ex\-cla\-ma\-tion-{\ob}point/{\ob}sharp-{\ob}sign/{\ob}new\-line. + +Although the form of the \ex{\#!} read-macro was chosen to support +interpreter-triggers for executable Unix scripts, +it is a general block-comment sequence and can be used as such +anywhere in a scsh program. + +\subsection{Here-strings} +The read macro \ex{\#<} is used to introduce ``here-strings'' +in programs, similar to the \ex{<<} ``here document'' redirections +provided by sh and csh. +There are two kinds of here-string, character-delimited and line-delimited; +they are both introduced by the \ex{\#<} sequence. + +\subsubsection{Character-delimited here-strings} +A \emph{character-delimited} here-string has the form +\codex{\#<\emph{x}...stuff...\emph{x}} +where \emph{x} is any single character +(except \ex{<}, see below), +which is used to delimit the string bounds. +Some examples: +\begin{inset} +\begin{tabular}{ll} + Here-string syntax & Ordinary string syntax \\ \hline + \verb:#<|Hello, world.|: & \verb:"Hello, world.": \\ + \verb:# \l diff --git a/doc/scsh-manual/mantitle.sty b/doc/scsh-manual/mantitle.sty new file mode 100644 index 0000000..362e642 --- /dev/null +++ b/doc/scsh-manual/mantitle.sty @@ -0,0 +1,75 @@ +% This is the title page style stolen from the Texinfo design, +% and expressed as a LaTeX style option. It is useful for manuals. +% +% Note that I play some *really* revolting games here to override +% the vertical and horizontal margins temporarily for the title page. +% The layout assumes you have 8.5" x 11" paper. You'd have to redo this +% for A4 or another size. +% -Olin 7/94 + + +% Fonts for title page: +\DeclareFixedFont{\titlefont}% + {\encodingdefault}{\familydefault}{bx}{\shapedefault}{20.5pt} +\DeclareFixedFont{\authorfnt}% + {\encodingdefault}{\familydefault}{bx}{\shapedefault}{14.4pt} +\DeclareFixedFont{\subtitlefnt}% + {\encodingdefault}{\familydefault}{m}{\shapedefault}{11} + +%\def\authorrm{\normalfont\selectfont\fontseries{bx}\fontsize{14.4}{14.4}} +%\def\subtitlefnt{\normalfont\selectfont\fontsize{11}{11}} + +\newskip\titlepagetopglue \titlepagetopglue = 2.5in + + +\newlength{\widewidth} +\setlength{\widewidth}{6.5in} +\newlength{\negwidemargin} +\setlength{\negwidemargin}{-\oddsidemargin} % Reset the margin +\addtolength{\negwidemargin}{-1in} % to edge of page +\addtolength{\negwidemargin}{1in} % Then move right one inch. + +%\def\wideline#1{\hbox to 0pt{\hspace\negwidemargin\hbox to\widewidth{#1}}} +\def\wideline#1{\hbox{\makebox[0pt][l]{\hspace\negwidemargin\hbox to\widewidth{#1}}}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\def\maketitle{\begin{titlepage} + \thispagestyle{empty} + \let\footnotesize\small \let\footnoterule\relax + \null + \parindent=0pt + \def\subtitlefont{\normalbaselineskip = 13pt \normalbaselines \subtitlefnt}% + \def\authorfont{\normalbaselineskip = 16pt \normalbaselines \authorfnt}% +% + % Leave some space at the very top of the page. + \vspace*{-1in}\vspace*{-\topmargin}\vspace*{-\headheight}\vspace*{-\headsep} + \vglue\titlepagetopglue +% + \wideline{\titlefont \@title \hfill} % title +% \vskip4pt + \vskip -0.3\baselineskip + \wideline{\leaders\hrule height 4pt\hfill} + \wideline{\hfill\subtitlefont\begin{tabular}[t]{@{}r@{}}\@subtitle% + \end{tabular}\hskip 1in} % subtitle +% + % author + \vskip 0pt plus 1filll + \wideline{\authorfont \begin{tabular}[t]{@{}c@{}}\@author + \end{tabular}\hfill} +% +% \vskip4pt + \vskip -0.3\baselineskip + \wideline{\leaders\hrule height 2pt\hfill} + + % This weirdness puts the bottom line 2.75 in from the bottom of + % an 11in page. + \vskip \textheight \vskip \headsep \vskip \headheight + \vskip \topmargin \vskip 1in \vskip -11in \vskip 2.75in + + \gdef\@author{}\gdef\@title{}\gdef\@subtitle{}\let\maketitle\relax + \end{titlepage} + \setcounter{page}{2} + } + +\def\subtitle#1{\gdef\@subtitle{#1}} +\def\@subtitle{} diff --git a/doc/scsh-manual/matter.sty b/doc/scsh-manual/matter.sty new file mode 100644 index 0000000..f0c4fda --- /dev/null +++ b/doc/scsh-manual/matter.sty @@ -0,0 +1,16 @@ +%&latex -*- latex -*- +% Implement the \frontmatter, \mainmatter, and \backmatter macros, +% so I can use them in reports, not just books. + +\newif\if@mainmatter \@mainmattertrue + +\newcommand\frontmatter{% + \cleardoublepage\@mainmatterfalse\pagenumbering{roman}} + +\newcommand\mainmatter{% + \cleardoublepage\@mainmattertrue% + \pagenumbering{arabic}\setcounter{page}{1}} + +\newcommand\backmatter{% + \if@openright\cleardoublepage\else\clearpage\fi% + \@mainmatterfalse} diff --git a/doc/scsh-manual/miscprocs.tex b/doc/scsh-manual/miscprocs.tex index 04cee3e..f6d6987 100644 --- a/doc/scsh-manual/miscprocs.tex +++ b/doc/scsh-manual/miscprocs.tex @@ -3,6 +3,7 @@ \chapter{Miscellaneous routines} \section{Integer bitwise ops} +\label{sec:bitwise} \defun{arithmetic-shift} {i j} \integer \defunx {bitwise-and} {i j} \integer \defunx {bitwise-ior} {i j} \integer @@ -39,4 +40,6 @@ from the lists returned by the field-readers (chapter~\ref{chapt:fr-awk}). and want to be able to recover your shell state, you can fork off a subshell with the following form: \codex{(run (begin (repl)))} + {\ldots}or, rephrased for the proceduralists: + \codex{(wait (fork repl))} \end{desc} diff --git a/doc/scsh-manual/mysize10.sty b/doc/scsh-manual/mysize10.sty new file mode 100644 index 0000000..94c52c0 --- /dev/null +++ b/doc/scsh-manual/mysize10.sty @@ -0,0 +1,22 @@ +%&latex -*- latex -*- +\if@twoside + \oddsidemargin 44pt + \evensidemargin 82pt + \marginparwidth 107pt +\else + \oddsidemargin 63pt + \evensidemargin 63pt + \marginparwidth 90pt +\fi +\marginparsep 11pt + +\topmargin 27pt +\headheight 12pt +\headsep 25pt +\topskip = 10pt +\footskip 30pt + +\textheight = 43\baselineskip +\advance\textheight by \topskip +\textwidth 345pt +\endinput diff --git a/doc/scsh-manual/network.tex b/doc/scsh-manual/network.tex index aa2441d..3651903 100644 --- a/doc/scsh-manual/network.tex +++ b/doc/scsh-manual/network.tex @@ -9,23 +9,21 @@ standard). However, Berkeley sockets are a \emph{de facto} standard, being found on most Unix workstations and PC operating systems. -Future releases of scsh will contain more high-level support for -networking applications. -We have Scheme implementations for the ftp, telnet, smtp, finger, and -http protocols, as well as an html parser. -When this code is included in a future release, this chapter -will describe the interfaces. -We are also contemplating a tail-recursive RPC mechanism, -but have done no development work. +It is fairly straightforward to add higher-level network protocols +such as smtp, telnet, or http on top of the the basic socket-level +support scsh provides. +The Scheme Underground has also released a network library with +many of these protocols as a companion to the current release of scsh. +See this code for examples showing the use of the sockets interface. \section{High-level interface} -For convenience, and too avoid some of the messy details of the socket +For convenience, and to avoid some of the messy details of the socket interface, we provide a high level socket interface. These routines attempt to make it easy to write simple clients and servers without having to think of many of the details of initiating socket connections. We welcome suggested improvements to this interface, including better -names, which right now are solely descriptions of the procedure's action.. +names, which right now are solely descriptions of the procedure's action. This might be fine for people who already understand sockets, but does not help the new networking programmer. @@ -184,8 +182,7 @@ than an empty string for addresses in the {\Unix} address-family. The procedures in this section are presented in the order in which a typical program will use them. Consult a text on network systems -programming for more information on sockets. -\footnote{ +programming for more information on sockets.\footnote{ Some recommended ones are: \begin{itemize} @@ -273,12 +270,18 @@ shutdown/sends+receives\end{code} \section{Performing input and output on sockets} \defun {receive-message} {socket length [flags]} {[string-or-\sharpf socket-address]} -\defunx {receive-message!} {socket string [start] [end] [flags]} {[count-or-\sharpf socket-address]} -\defunx {receive-message/partial} {socket length [flags]} {[string-or-\sharpf socket-address]} -\defunx {receive-message!/partial} {socket string [start] [end] [flags]} {[count-or-\sharpf socket-address]} - -\defun {send-message} {socket string [start] [end] [flags] [socket-address]} \undefined -\defunx {send-message/partial} {socket string [start] [end] [flags] [socket-address]} {count} +\dfnix {receive-message!} {socket string [start] [end] [flags]} + {[count-or-\sharpf socket-address]}{procedure} + {receive-message"!@\texttt{receive-message"!}} +\defunx {receive-message/partial} {socket length [flags]} + {[string-or-\sharpf socket-address]} +\dfnix {receive-message!/partial} {socket string [start] [end] [flags]} + {[count-or-\sharpf socket-address]}{procedure} + {receive-message"!/partial@\texttt{receive-message"!/partial}} +\defun {send-message} {socket string [start] [end] [flags] [socket-address]} + \undefined +\defunx {send-message/partial} + {socket string [start] [end] [flags] [socket-address]} {count} \begin{desc} For most uses, standard input and output routines such as diff --git a/doc/scsh-manual/procnotation.tex b/doc/scsh-manual/procnotation.tex index deacedb..d6cd0bd 100644 --- a/doc/scsh-manual/procnotation.tex +++ b/doc/scsh-manual/procnotation.tex @@ -105,7 +105,7 @@ It is equivalent to the three redirections: (= 2 ,(error-output-port))\end{code} % The redirections are done in the indicated order. This will cause an error if -the one of current i/o ports isn't a {\Unix} port (\eg, if one is a string +one of the current i/o ports isn't a {\Unix} port (\eg, if one is a string port). This {\Scheme}/{\Unix} i/o synchronisation can also be had in {\Scheme} code (as opposed to a redirection spec) with the \ex{(stdports->stdio)} @@ -169,6 +169,11 @@ The second clause \ex{(3 1)} causes \vari{pf}{\!1}'s file descriptor 3 to be connected to \vari{pf}{\!2}'s file descriptor 1. %---this is unusual, and not expected to occur very often. +The \ex{begin} process form does a \ex{stdio->stdports} synchronisation +in the child process before executing the body of the form. +This guarantees that the \ex{begin} form, like all other process forms, +``sees'' the effects of any associated I/O redirections. + Note that {\R4RS} does not specify whether or not \ex{|} and \ex{|+} are readable symbols. Scsh does. @@ -185,12 +190,13 @@ There are three basic {\Scheme} forms that use extended process forms: \dfnx {\&} {. \var{epf}} {\integer} {syntax} \dfnx {run} {. \var{epf}} {\integer} {syntax} \begin{desc} +\index{exec-epf} \index{\&} \index{run} The \ex{(exec-epf . \var{epf})} form nukes the current process: it establishes the i/o redirections and then overlays the current process with the requested computation. The \ex{(\& . \var{epf})} form is similar, except that the process is forked -off in background. The form returns the subprocess' pid. +off in background. The form returns the subprocess' process object. The \ex{(run . \var{epf})} form runs the process in foreground: after forking off the computation, it waits for the subprocess to exit, @@ -235,7 +241,7 @@ Having a solid procedural foundation also allows for general notational experimentation using {\Scheme}'s macros. For example, the programmer can build his own pipeline notation on top of the \ex{fork} and \ex{fork/pipe} procedures. -Chapter~\ref{chapter:syscalls} gives the full story on all the procedures +Chapter~\ref{chapt:syscalls} gives the full story on all the procedures in the syscall library. \subsection{Interfacing process output to {\Scheme}} @@ -349,23 +355,27 @@ for manipulating processes. \subsection{Pids and ports together} -\dfn {run/port+pid} {. \var{epf}} {[port fixnum]} {syntax} -\defunx {run/port+pid*} {thunk} {[port fixnum]} +\dfn {run/port+proc} {. \var{epf}} {[port proc]} {syntax} +\defunx {run/port+proc*} {thunk} {[port proc]} \begin{desc} This special form and its analogous procedure can be used if the programmer also wishes access to the process' pid, exit status, or other information. They both fork off a subprocess, returning two values: -a port open on the process' stdout, and the subprocess's pid. +a port open on the process' stdout (and current output port), +and the subprocess's process object. +A process object encapsulates the subprocess' process id and exit code; +it is the value passed to the \ex{wait} system call. + For example, to uncompress a tech report, reading the uncompressed data into scsh, and also be able to track the exit status of the decompression process, use the following: \begin{code} -(receive (port pid) (run/port+pid (zcat tr91-145.tex.Z)) +(receive (port child) (run/port+proc (zcat tr91-145.tex.Z)) (let* ((paper (port->string port)) - (status (wait pid))) - {\rm\ldots{}use \ex{paper}, \ex{status}, and \ex{pid} here\ldots}))\end{code} + (status (wait child))) + {\rm\ldots{}use \ex{paper}, \ex{status}, and \ex{child} here\ldots}))\end{code} % Note that you must \emph{first} do the \ex{port->string} and \emph{then} do the wait---the other way around may lock up when the diff --git a/doc/scsh-manual/rdelim.tex b/doc/scsh-manual/rdelim.tex new file mode 100644 index 0000000..eae40e9 --- /dev/null +++ b/doc/scsh-manual/rdelim.tex @@ -0,0 +1,139 @@ +%&latex -*- latex -*- + +\chapter{Reading delimited strings} +\label{chapt:rdelim} + +Scsh provides a set of procedures that read delimited strings from +input ports. +There are procedures to read a single line of text +(terminated by a newline character), +a single paragraph (terminated by a blank line), +and general delimited strings +(terminated by a character belonging to an arbitrary character set). + +These procedures can be applied to any Scheme input port. +However, the scsh virtual machine has native-code support for performing +delimited reads on Unix ports, and these input operations should be +particularly fast---much faster than doing the equivalent character-at-a-time +operation from Scheme code. + +All of the delimited input operations described below take a \ex{handle-delim} +parameter, which determines what the procedure does with the terminating +delimiter character. +There are four possible choices for a \ex{handle-delim} parameter: +\begin{inset} +\begin{tabular}{|l|l|} \hline + \ex{handle-delim} & Meaning \\ \hline\hline + \ex{'trim} & Ignore delimiter character. \\ + \ex{'peek} & Leave delimiter character in input stream. \\ + \ex{'concat} & Append delimiter character to returned value. \\ + \ex{'split} & Return delimiter as second value. \\ + \hline +\end{tabular} +\end{inset} +The last three cases allow the programmer to distinguish between strings +that are terminated by a delimiter character, and strings that are +terminated by an end-of-file. + + +\begin{defundesc} {read-line} {[port handle-newline]} {{\str} or eof-object} + Reads and returns one line of text; on eof, returns the eof object. + A line is terminated by newline or eof. + + \var{handle-newline} determines what \ex{read-line} does with the + newline or EOF that terminates the line; it takes the general set + of values described for the general \ex{handle-delim} case above, + and defaults to \ex{'trim} (discard the newline). + Using this argument allows one to tell whether or not the last line of + input in a file is newline terminated. +\end{defundesc} + +\defun{read-paragraph} {[port handle-delim]} {{\str} or eof} +\begin{desc} + This procedure skips blank lines, + then reads text from a port until a blank line or eof is found. + A ``blank line'' is a (possibly empty) line composed only of white space. + The \var{handle-delim} parameter determines how the terminating + blank line is handled. + It is described above, and defaults to \ex{'trim}. + The \ex{'peek} option is not available. +\end{desc} + + +The following procedures read in strings from ports delimited by characters +belonging to a specific set. +See section~\ref{sec:char-sets} for information on character set manipulation. + +\defun{read-delimited}{char-set [port handle-delim]} {{\str} or eof} +\begin{desc} + Read until we encounter one of the chars in \var{char-set} or eof. + The \var{handle-delim} parameter determines how the terminating character + is handled. It is described above, and defaults to \ex{'peek}. + + The \var{char-set} argument may be a charset, a string, a character, or a + character predicate; it is coerced to a charset. +\end{desc} + +\dfni{read-delimited!} {char-set buf [port handle-delim start end]} + {nchars or eof or \#f}{procedure} + {read-delimited"!@\texttt{read-delimited"!}} +\begin{desc} + A side-effecting variant of \ex{read-delimited}. + + The data is written into the string \var{buf} at the indices in the + half-open interval $[\var{start},\var{end})$; the default interval is the + whole string: $\var{start}=0$ and $\var{end}=\ex{(string-length + \var{buf})}$. The values of \var{start} and \var{end} must specify a + well-defined interval in \var{str}, \ie, $0 \le \var{start} \le \var{end} + \le \ex{(string-length \var{buf})}$. + + It returns \var{nbytes}, the number of bytes read. If the buffer filled up + without a delimiter character being found, \ex{\#f} is returned. If + the port is at eof when the read starts, the eof object is returned. + + If an integer is returned (\ie, the read is successfully terminated by + reading a delimiter character), then the \var{handle-delim} parameter + determines how the terminating character is handled. + It is described above, and defaults to \ex{'peek}. +\end{desc} + + + +\dfni{\%read-delimited!} {char-set buf gobble? [port start end]} + {[char-or-eof-or-\#f \integer]}{procedure} + {"%read-delimited"!@\verb:"%read-delimited"!:} +\begin{desc} +This low-level delimited reader uses an alternate interface. +It returns two values: \var{terminator} and \var{num-read}. +\begin{description} +\item [terminator] + A value describing why the read was terminated: + \begin{flushleft} + \begin{tabular}{l@{\qquad$\Rightarrow$\qquad}l} + Character or eof-object & Read terminated by this value. \\ + \ex{\#f} & Filled buffer without finding a delimiter. + \end{tabular} + \end{flushleft} + +\item [num-read] + Number of characters read into \var{buf}. +\end{description} + +If the read is successfully terminated by reading a delimiter character, +then the \var{gobble?} parameter determines what to do with the terminating +character. +If true, the character is removed from the input stream; +if false, the character is left in the input stream where a subsequent +read operation will retrieve it. +In either case, the character is also the first value returned by +the procedure call. +\end{desc} + +%Note: +%- Invariant: TERMINATOR = #f => NUM-READ = END - START. +%- Invariant: TERMINATOR = eof-object and NUM-READ = 0 => at EOF. +%- When determining the TERMINATOR return value, ties are broken +% favoring character or the eof-object over #f. That is, if the buffer +% fills up, %READ-DELIMITED! will peek at one more character from the +% input stream to determine if it terminates the input. If so, that +% is returned, not #f. diff --git a/doc/scsh-manual/running.tex b/doc/scsh-manual/running.tex index acab341..9dd336d 100644 --- a/doc/scsh-manual/running.tex +++ b/doc/scsh-manual/running.tex @@ -1,47 +1,621 @@ %&latex -*- latex -*- \chapter{Running scsh} +\label{chapt:running} Scsh is currently implemented on top of {\scm}, a freely-available {\Scheme} implementation written by Jonathan Rees and Richard Kelsey. {\scm} uses a byte-code interpreter for good code density, portability -and medium efficiency. It is {\R4RS}. The version on top of which scsh is -currently built (0.36) lacks floating point. +and medium efficiency. It is {\R4RS}. It also has a module system designed by Jonathan Rees. Scsh's design is not {\scm} specific, although the current implementation -is necessarily so. Scsh is intended to be implementable in other {\Scheme} -implementations---although such a port may require some work. -The {\scm} vm that scsh uses is a specially modified version; +is necessarily so. +Scsh is intended to be implementable in other {\Scheme} implementations. +The {\scm} virtual machine that scsh uses is a specially modified version; standard {\scm} virtual machines cannot be used with the scsh heap image. +There are several different ways to invoke scsh. +You can run it as an interactive Scheme system, with a standard +read-eval-print interaction loop. +Scsh can also be invoked as the interpreter for a shell script by putting +a ``\verb|#!/usr/local/bin/scsh -s|'' line at the top of the shell script. + +Descending a level, it is also possible to invoke the underlying virtual +machine byte-code interpreter directly on dumped heap images. +Scsh programs can be pre-compiled to byte-codes and dumped as raw, +binary heap images. +Writing heap images strips out unused portions of the scsh runtime +(such as the compiler, the debugger, and other complex subsystems), +reducing memory demands and saving loading and compilation times. +The heap image format allows for an initial \verb|#!/usr/local/lib/scsh/scshvm| trigger +on the first line of the image, making heap images directly executable as +another kind of shell script. + +Finally, scsh's static linker system allows dumped heap images to be compiled +to a raw Unix a.out(5) format, which can be linked into the text section +of the vm binary. +This produces a true Unix executable binary file. +Since the byte codes comprising the program are in the file's text section, +they are not traced or copied by the garbage collector, do not occupy space +in the vm's heap, and do not need to be loaded and linked at startup time. +This reduces the program's startup time, memory requirements, +and paging overhead. + +This chapter will cover these various ways of invoking scsh programs. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Scsh command-line switches} + +When the scsh top-level starts up, it scans the command line +for switches that control its behaviour. +These arguments are removed from the command line; +the remaining arguments can be accessed as the value of +the scsh variable \ex{command-line-arguments}. + +\subsection{Scripts and programs} + +The scsh command-line switches provide sophisticated support for +the authors of shell scripts and programs; +they also allow the programmer to write programs +that use the {\scm} module system. + +There is a difference between a \emph{script}, which performs its action +\emph{as it is loaded}, and a \emph{program}, which is loaded/linked, +and then performs its action by having control transferred to an entry point +(\eg, the \ex{main()} function in C programs) that was defined by the +load/link operation. + +A \emph{script}, by the above definition, cannot be compiled by the simple +mechanism of loading it into a scsh process and dumping out a heap image---it +executes as it loads. It does not have a top-level \ex{main()}-type entry +point. + +It is more flexible and useful to implement a system +as a program than as a script. +Programs can be compiled straightforwardly; +they can also export procedural interfaces for use by other Scheme packages. +However, scsh supports both the script and the program style of programming. + +\subsection{Inserting interpreter triggers into scsh programs} +When Unix tries to execute an executable file whose first 16 bits are +the character pair ``\ex{\#!}'', it treats the file not as machine-code +to be directly executed by the native processor, but as source code to +be executed by some interpreter. +The interpreter to use is specified immediately after the ``\ex{\#!}'' +sequence on the first line of the source file +(along with one optional initial argument). +The kernel reads in the name of the interpreter, and executes that instead. +The interpreter is passed the source filename as its first argument, with +the original arguments following. +Consult the Unix man page for the \ex{exec} system call for more information. + +Scsh allows Scheme programs to have these triggers placed on +their first line. +Scsh treats the character sequence ``\ex{\#!}'' as a block-comment sequence,% +\footnote{Why a block-comment instead of an end-of-line delimited comment? + See the section on meta-args.} +and skips all following characters until it reads the comment-terminating +sequence newline/exclamation-point/sharp-sign/newline (\ie, the +sequence ``\ex{!\#}'' occurring on its own line). + +In this way, the programmer can arrange for an initial +\begin{code} +#!/usr/local/bin/scsh -s +!#\end{code} +header appearing in a Scheme program +to be ignored when the program is loaded into scsh. + +\subsection{Module system} +Scsh uses the {\scm} module system, which defines +\emph{packages}, \emph{structures}, and \emph{interfaces}. +% +\begin{description} + +\item [Package] A package is an environment---that is, a set of +variable/value bindings. +You can evaluate Scheme forms inside a package, or load a file into a package. +Packages export sets of bindings; these sets are called \emph{structures}. + +\item [Structure] A structure is a named view on a package---a set of + bindings. Other packages can \emph{open} the structure, importing its + bindings into their environment. Packages can provide more than one + structure, revealing different portions of the package's environment. + +\item [Interface] An interface is the ``type'' of a structure. An + interface is the set of names exported by a structure. These names + can also be marked with other static information (\eg, advisory type + declarations, or syntax information). +\end{description} +More information on the the {\scm} module system can be found in the +file \ex{module.ps} in the \ex{doc} directory of the {\scm} and scsh releases. + +Programming Scheme with a module system is different from programming +in older Scheme implementations, +and the associated development problems are consequently different. +In Schemes that lack modular abstraction mechanisms, +everything is accessible; the major problem is preventing name-space conflicts. +In Scheme 48, name-space conflicts vanish; the major problem is that not +all bindings are accessible from every place. +It takes a little extra work to specify what packages export which values. + +It may take you a little while to get used to the new style of program +development. +Although scsh can be used without referring to the module system at +all, we recommend taking the time to learn and use it. +The effort will pay off in the construction of modular, factorable programs. + +\subsection{Switches} +\label{sec:scsh-switches} +The scsh top-level takes command-line switches in the following format: +% +\codex{scsh [\var{meta-arg}] [\vari{switch}i {\ldots}] + [\var{end-option} \vari{arg}1 {\ldots} \vari{arg}n]} +where +\begin{inset} +\begin{flushleft} +\begin{tabular}{ll@{\qquad}l} +\var{meta-arg:} & \verb|\| \var{script-file-name} \\ +\\ +\var{switch:} & \ex{-e} \var{entry-point} + & Specify top-level entry-point. \\ + + & \ex{-o} \var{structure} + & Open structure in current package. \\ + + & \ex{-m} \var{structure} + & Switch to package. \\ + + & \ex{-n} \var{new-package} + & Switch to new package. \\ \\ + + + & \ex{-lm} \var{module-file-name} + & Load module into config package. \\ + + & \ex{-l} \var{file-name} + & Load file into current package. \\ + + + & \ex{-dm} & Do script module. \\ + & \ex{-ds} & Do script. \\ +\\ +\var{end-option:} & \ex{-s} \var{script} \\ + & \ex{-c} \var{exp} \\ + & \ex{--} +\end{tabular} +\end{flushleft} +\end{inset} +% +These command-line switches +essentially provide a little linker language for linking a shell script or a +program together with {\scm} modules. +The command-line processor serially opens structures and loads code into a +given package. +Switches that side-effect a package operate on a particular ``current'' +package; there are switches to change this package. +(These switches provide functionality equivalent to the interactive + \ex{,open} \ex{,load} \ex{,in} and \ex{,new} commands.) +Except where indicated, switches specify actions that are executed in a +left-to-right order. +The initial current package is the user package, which is completely +empty and opens (imports the bindings of) the R4RS and scsh structures. + +If the Scheme process is started up in an interactive mode, then the current +package in force at the end of switch scanning is the one inside which +the interactive read-eval-print loop is started. + +The command-line switch processor works in two passes: +it first parses the switches, building a list of actions to perform, +then the actions are performed serially. +The switch list is terminated by one of the \var{end-option} switches. +The \vari{arg}{i} arguments occurring after an end-option switch are +passed to the scsh program as the value of \ex{command-line-arguments} +and the tail of the list returned by \ex{(command-line)}. +That is, an \var{end-option} switch separates switches that control +the scsh ``machine'' from the actual arguments being passed to the scsh +program that runs on that machine. + +The following switches and end options are defined: +\begin{itemize} +\def\Item#1{\item{\ex{#1}}\\} + +\Item{-o \var{struct}} + Open the structure in the current package. + +\Item{-n \var{package}} + Make and enter a new package. The package has an associated structure + named \var{package} with an empty export list. + If \var{package} is the string ``\ex{\#f}'', + the new package is anonmyous, with no associated named structure. + + The new package initially opens no other structures, + not even the R4RS bindings. You must follow a ``\ex{-n foo}'' + switch with ``\ex{-o scheme}'' to access the standard identifiers such + as \ex{car} and \ex{define}. + +\Item{-m \var{struct}} + Change the current package to the package underlying + structure \var{struct}. + (The \ex{-m} stands for ``module.'') + +\Item{-lm \var{module-file-name}} + Load the specified file into scsh's config package --- the file + must contain source written in the Scheme 48 module language + (``load module''). Does not alter the current package. + +\Item{-l \var{file-name}} + Load the specified file into the current package. + +\Item{-c \var{exp}} + Evaluate expression \var{exp} in the current package and exit. + This is called \ex{-c} after a common shell convention (see sh and csh). + The expression is evaluated in the the current package (and hence is + affected by \ex{-m}'s and \ex{-n}'s.) + + When the scsh top-level constructs the scsh command-line in this case, + it takes \ex{"scsh"} to be the program name. + This switch terminates argument scanning; following args become + the tail of the command-line list. + +\Item{-e \var{entry-point}} + Specify an entry point for a program. The \var{entry-point} is + a variable that is taken from the current package in force at the end + of switch evaluation. The entry point does not have to be exported + by the package in a structure; it can be internal to the package. + The top level passes control to the entry point by applying it to + the command-line list (so programs executing in private + packages can reference their command-line arguments without opening + the \ex{scsh} package to access the \ex{(command-line)} procedure). + Note that, like the list returned by the \ex{(command-line)} procedure, + the list passed to the entry point includes the name + of the program being executed (as the first element of the list), + not just the arguments to the program. + + A \ex{-e} switch can occur anywhere in the switch list, but it is the + \emph{last} action performed by switch scanning if it occurs. + (We violate ordering here as the shell-script \ex{\#!} mechanism + prevents you from putting the \emph{-e} switch last, where it belongs.) + +\Item{-s \var{script}} + Specify a file to load. + A \ex{-ds} (do-script) or \ex{-dm} (do-module) switch occurring earlier in + the switch list gives the place where the script should be loaded. If + there is no \ex{-ds} or \ex{-dm} switch, then the script is loaded at the + end of switch scanning, into the module that is current at the end of + switch scanning. + + We use the \ex{-ds} switch to violate left-to-right switch execution order + as the \ex{-s} switch is \emph{required} to be last + (because of the \ex{\#!} machinery), + independent of when/where in the switch-processing order + it should be loaded. + + When the scsh top-level constructs the scsh command-line in this case, + it takes \var{script} to be the program name. + This switch terminates switch parsing; following args are ignored + by the switch-scanner and are passed through to the program as + the tail of the command-line list. + +\Item{--} + Terminate argument scanning and start up scsh in interactive mode. + If the argument list just runs out, without either a terminating + \ex{-s} or \ex{--} arg, then scsh also starts up in interactive mode, + with an empty \ex{command-line-arguments} list + (for example, simply entering \ex{scsh} at a shell prompt with no + args at all). + + When the scsh top-level constructs the scsh command-line in this case, + it takes \ex{"scsh"} to be the program name. + This switch terminates switch parsing; following args are ignored + by the switch-scanner and are passed through to the program as + the tail of the command-line list. + +\Item{-ds} + Specify when to load the script (``do-script''). If this switch occurs, + the switch list \emph{must} be terminated by a \ex{-s \var{script}} + switch. The script is loaded into the package that is current at the + \ex{-ds} switch. + +\Item{-dm} + As above, but the current module is ignored. The script is loaded into the + \ex{config} package (``do-module''), and hence must be written in the + {\scm} module language. + This switch doesn't affect the current module---after executing this + switch, the current module is the same as as it was before. + + This switch is provided to make it easy to write shell scripts in the + {\scm} module language. +\end{itemize} + +\subsection{The meta argument} +\label{sec:meta-arg} +The scsh switch parser takes a special command-line switch, +a single backslash called the ``meta-argument,'' which is useful for +shell scripts. +If the initial command-line argument is a ``\verb|\|'' +argument, followed by a filename argument \var{fname}, scsh will open the file +\var{fname} and read more arguments from the second line of this file. +This list of arguments will then replace the ``\verb|\|'' argument---\ie, +the new arguments are inserted in front of \var{fname}, +and the argument parser resumes argument scanning. +This is used to overcome a limitation of the \ex{\#!} feature: +the \ex{\#!} line can only specify a single argument after the interpreter. +For example, we might hope the following scsh script, \ex{ekko}, +would implement a simple-minded version of the Unix \ex{echo} program: +\begin{code} +#!/usr/local/bin/scsh -e main -s +!# +(define (main args) + (map (\l{arg} (display arg) (display " ")) + (cdr args)) + (newline))\end{code} +% +The idea would be that the command + \codex{ekko Hi there.} +would by expanded by the \ex{exec(2)} kernel call into +% +\begin{code} +/usr/local/bin/scsh -e main -s ekko Hi there.\end{code} +% +In theory, this would cause scsh to start up, load in file \ex{ekko}, +call the entry point on the command-line list +\codex{(main '("ekko" "Hi" "there."))} +and exit. + +Unfortunately, the {\Unix} \ex{exec(2)} syscall's support for scripts is +not very general or well-designed. +It will not handle multiple arguments; +the \ex{\#!} line is usually required to contain no more than 32 characters; +it is not recursive. +If these restrictions are violated, most Unix systems will not provide accurate +error reporting, but either fail silently, or simply incorrectly implement +the desired functionality. +These are the facts of Unix life. + +In the \ex{ekko} example above, our \ex{\#!} trigger line has three +arguments (``\ex{-e}'', ``\ex{main}'', and ``\ex{-s}''), so it will not +work. +The meta-argument is how we work around this problem. +We must instead invoke the scsh interpreter with the single \cd{\\} argument, +and put the rest of the arguments on line two of the program. +Here's the correct program: +% +\begin{code} +#!/usr/local/bin/scsh \\ +-e main -s +!# +(define (main args) + (map (\l{arg} (display arg) (display " ")) + (cdr args)) + (newline))\end{code} +% +Now, the invocation starts as + \codex{ekko Hi there.} +and is expanded by exec(2) into +\begin{code} +/usr/local/bin/scsh \\ ekko Hi there.\end{code} +When scsh starts up, it expands the ``\cd{\\}'' argument into the arguments +read from line two of \ex{ekko}, producing this argument list: +\begin{code}\cddollar +\underline{-e main -s ekko} Hi there. + $\uparrow$ +{\rm{}Expanded from} \cd{\\} ekko\end{code} +% +With this argument list, processing proceeds as we intended. + +\subsubsection{Secondary argument syntax} +Scsh uses a very simple grammar to encode the extra arguments on +the second line of the scsh script. +The only special characters are space, tab, newline, and backslash. +\begin{itemize} +\item Each space character terminates an argument. + This means that two spaces in a row introduce an empty-string argument. + +\item The tab character is not permitted + (unless you quote it with the backslash character described below). + This is to prevent the insidious bug where you believe you have + six space characters, but you really have a tab character, + and \emph{vice-versa}. + +\item The newline character terminates the sequence of arguments, + and will also terminate a final non-empty argument. + (However, a newline following a space does not introduce a final + empty-string argument; it only terminates the argument list.) + +\item The backslash character is the escape character. + It escapes backslash, space, tab, and newline, turning off their + special functions, and allowing them to be included in arguments. + The {\Ansi} C escape sequences, such as \verb|\n| and \verb|\t| are + also supported; these also produce argument-constituents---\verb|\n| + doesn't act like a terminating newline. + The escape sequence \verb|\|\emph{nnn} for \emph{exactly} three + octal digits reads as the character whose {\Ascii} code is \emph{nnn}. + It is an error if backslash is followed by just one or two octal digits: + \verb|\3Q| is an error. + Octal escapes are always constituent chars. + Backslash followed by other chars is not allowed + (so we can extend the escape-code space later if we like). +\end{itemize} + +You have to construct these line-two argument lines carefully. +In particular, beware of trailing spaces at the end of the line---they'll +give you extra trailing empty-string arguments. +Here's an example: +% +\begin{inset} +\begin{verbatim} +#!/bin/interpreter \ +foo bar quux\ yow\end{verbatim} +\end{inset} +% +would produce the arguments +% +\codex{("foo" "bar" "" "quux yow")} + +\subsection{Examples} + +\begin{itemize} +\def\Item#1{\item{\ex{#1}}\\} +\def\progItem#1{\item{Program \ex{#1}}\\} + +\Item{scsh -dm -m myprog -e top -s myprog.scm} + Load \ex{myprog.scm} into the \ex{config} package, then shift to the + \ex{myprog} package and call \ex{(top '("myprog.scm"))}, then exit. + This sort of invocation is typically used in \ex{\#!} script lines + (see below). + +\Item{scsh -c '(display "Hello, world.")'} + A simple program. + +\Item{scsh -o bigscheme} + Start up interactively in the user package after opening + structure \ex{bigscheme}. + +\Item{scsh -o bigscheme -- Three args passed} + Start up interactively in the user package after opening \ex{bigscheme}. + The \ex{command-line-args} variable in the scsh package is bound to the + list \ex{("Three" "args" "passed")}, and the \ex{(command-line)} + procedure returns the list \ex{("scsh" "Three" "args" "passed")}. + + +\progItem{ekko} +This shell script, called \ex{ekko}, implements a version of +the Unix \ex{echo} program: +\begin{code} +#!/usr/local/bin/scsh -s +!# +(for-each (\l{arg} (display arg) (display " ")) + command-line-args)\end{code} + +Note this short program is an example of a \emph{script}---it +executes as it loads. +The Unix rule for executing \ex{\#!} shell scripts causes +\codex{ekko Hello, world.} +to expand as +\codex{/usr/local/bin/scsh -s ekko Hello, world.} + +\progItem{ekko} +This is the same program, \emph{not} as a script. +Writing it this way makes it possible to compile the program +(and then, for instance, dump it out as a heap image). +% +\begin{code} +#!/usr/local/bin/scsh \\ +-e top -s +!# +(define (top args) + (for-each (\l{arg} (display arg) (display " ")) + (cdr args)))\end{code} +% +The \ex{exec(2)} expansion of the \ex{\#!} line together with +the scsh expansion of the ``\verb|\ ekko|'' meta-argument +(see section~\ref{sec:meta-arg}) gives the following command-line expansion: +\begin{code} +ekko Hello, world. + {\evalto} /usr/local/bin/scsh \\ ekko Hello, world. + {\evalto} /usr/local/bin/scsh -e top -s ekko Hello, world.\end{code} + +\progItem{sort} +This is a program to replace the Unix \ex{sort} utility---sorting lines +read from stdin, and printing the results on stdout. +Note that the source code defines a general sorting package, +which is useful (1) as a Scheme module exporting sort procedures +to other Scheme code, and (2) as a standalone program invoked from +the \ex{top} procedure. +\begin{code} +#!/usr/local/bin/scsh \\ +-dm -m sort-toplevel -e top -s +!# + +;;; This is a sorting module. TOP procedure exports +;;; the functionality as a Unix program akin to sort(1). +(define-structures ((sort-struct (export sort-list + sort-vector!)) + (sort-toplevel (export top))) + (open scheme) + + (begin (define (sort-list elts <=) {\ldots}) + (define (sort-vec! vec <=) {\ldots}) + + ;; Parse the command line and + ;; sort stdin to stdout. + (define (top args) + {\ldots})))\end{code} + +The expansion below shows how the command-line scanner +(1) loads the config file \ex{sort} (written in the {\scm} module language), +(2) switches to the package underlying the \ex{sort-toplevel} structure, +(3) calls \ex{(top '("sort" "foo" "bar"))} in the package, and finally +(4) exits. +% +{\small +\begin{centercode} +sort foo bar +{\evalto} /usr/local/bin/scsh \\ sort foo bar +{\evalto} /usr/local/bin/scsh -dm -m sort-toplevel -e top -s sort foo bar\end{centercode}} + +An alternate method would have used a \ex{-n -o sort-toplevel} +sequence of switches to specify a top-level package. + +\end{itemize} + +Note that the sort example can be compiled into a Unix program by +loading the file into an scsh process, and dumping a heap with top-level +\ex{top}. Even if we don't want to export the sort's functionality as a +subroutine library, it is still useful to write the sort program with the +module language. The command line design allows us to run this program as +either an interpreted script (given the \ex{\#!} args in the header) or as a +compiled heap image. + +\subsection{Process exit values} +Scsh ignores the value produced by its top-level computation when determining +its exit status code. +If the top-level computation completed with no errors, +scsh dies with exit code 0. +For example, a scsh process whose top-level is specified by a \ex{-c \var{exp}} +or a \ex{-e \var{entry}} entry point ignores the value produced +by evaluating \var{exp} and calling \var{entry}, respectively. +If these computations terminate with no errors, the scsh process +exits with an exit code of 0. + +To return a specific exit status, use the \ex{exit} procedure explicitly, \eg, + \codex{scsh -c "(exit (run (| (fmt) (mail shivers@lcs.mit.edu))))"} + + +\section{The scsh virtual machine} To run the {\scm} implementation of scsh, you run a specially modified copy of the {\scm} virtual machine with a scsh heap image. -This command starts the vm up with a 1Mword heap (split into two semispaces): - \codex{scshvm -o scshvm -h 1000000 -i scsh.image arg1 arg2 \ldots} -The vm peels off initial vm arguments -up to the \ex{-i} heap image argument, which terminates vm argument parsing. -The rest of the arguments are passed off to the scsh top-level. -Scsh's top-level removes scsh arguments; the rest show up as the value -of \ex{command-line-arguments}. - -Alternatively, you can run the scsh top-level binary. -This is nothing but a small cover program that invokes the -scsh vm on the scsh heap image for you. +The scsh binary is actually nothing but a small cover program that invokes the +byte-code interpreter on the scsh heap image for you. This allows you to simply start up an interactive scsh from a command line, as well as write shell scripts that begin with the simple trigger \codex{\#!/usr/local/bin/scsh -s} -\section{VM arguments} +You can also directly execute the virtual machine, +which takes its own set of command-line switches.. +For example, +this command starts the vm up with a 1Mword heap (split into two semispaces): + \codex{scshvm -o scshvm -h 1000000 -i scsh.image arg1 arg2 \ldots} +The vm peels off initial vm arguments +up to the \ex{-i} heap image argument, which terminates vm argument parsing. +The rest of the arguments are passed off to the scsh top-level. +Scsh's top-level removes scsh switches, as discussed in the previous section; +the rest show up as the value of \ex{command-line-arguments}. + +Directly executing the vm can be useful to specify non-standard switches, or +invoke the virtual machine on special heap images, which can contain +pre-compiled scsh programs with their own top-level procedures. + +\subsection{VM arguments} \label{sec:vm-args} -Scsh uses a special version of the {\scm} virtual machine. -It takes arguments in the following form: +The vm takes arguments in the following form: \codex{scshvm [\var{meta-arg}] [\var{vm-options}\+] [\var{end-option} \var{scheme-args}]} where \begin{inset} \begin{tabular}{ll} -\var{meta-arg:} & \verb|\ |\var{script} \\ +\var{meta-arg:} & \verb|\ |\var{filename} \\ \\ \var{vm-option}: & \ex{-h }\var{heap-size-in-words} \\ & \ex{-s }\var{stack-size-in-words} \\ @@ -52,156 +626,128 @@ where \end{tabular} \end{inset} -\subsection{The meta argument} -The {\scm} vm takes a special command-line switch, a single backslash called -the ``meta-switch,'' which is useful for shell scripts. -While parsing the command-line arguments, if the vm sees a ``\verb|\|'' -argument, followed by a filename argument \var{fname}, it will open the file -\var{fname}, and read more arguments from the second line of this file. -This list of arguments will then replace the ``\verb|\|'' argument---\ie, -the new arguments are inserted in front of \var{fname}, -and the argument parser resumes argument scanning. -This is used to overcome a limitation of the \ex{\#!} feature: -the \ex{\#!} line can only specify a single argument after the interpreter. -For example, we might hope the following scsh script, \ex{ekko}, -would implement a simple-minded version of \ex{echo(1)}: -\begin{code} -#!/bin/scshvm -o /bin/scshvm -i /lib/scsh.image -s -!# -(map (\l{arg} (display arg) (display " ")) - command-line-arguments) -(newline)\end{code} -% -The idea would be that the command - \codex{ekko Hi there.} -would by expanded by \ex{exec(2)} into -% -\begin{code} -/bin/scshvm -o /bin/scshvm -i /lib/scsh.image -s ekko Hi there.\end{code} -% -In theory, this would cause scsh to start up, set \ex{command-line-arguments} -to \ex{("Hi" "there.")}, load the source file \ex{ekko}, and exit. +The vm's meta-switch ``\verb|\ |\var{filename}'' is handled the same +as scsh's meta-switch, and serves the same purpose. -However, the {\Unix} \ex{exec(2)} call will not handle multiple arguments -on the \ex{\#!} line, so this script won't work. We must instead invoke -the {\scm} vm with the single \cd{\\} argument, and put the rest of the -arguments on line two of the script. Here's the correct script:\footnote{ - In fact, I'm playing fast and loose with the actual pathnames - used in this example: \ex{scshvm} is probably not going to be found in - \ex{/bin}. I've abbreviated things so the long argument lists will fit - into one line of text. - See the following sections for the full details.} -% -\begin{code} -#!/bin/scshvm \\ --o /bin/scshvm -i /lib/scsh.image -s -!# -(map (\l{arg} (display arg) (display " ")) - command-line-arguments) -(newline)\end{code} -% -Now, the invocation starts as - \codex{ekko Hi there.} -and is expanded by exec(2) into -\begin{code} -/bin/scshvm \\ ekko Hi there.\end{code} -When scshvm starts up, it expands the ``\cd{\\}'' argument into the arguments -read from line two of \ex{ekko}, producing this argument list: -\begin{code}\cddollar -\underline{-o /bin/scshvm -i /lib/scsh.image -s ekko} Hi there. - $\uparrow$ - {\rm{}Expanded from} \cd{\\} ekko\end{code} -% -With this argument list, processing proceeds as we intended. - -\subsection{VM options} +\subsubsection{VM options} The \ex{-o \var{object-file-name}} switch tells the vm where to find relocation information for its foreign-function calls. Scsh will use a pre-compiled default if it is not specified. -Scsh must have this information to run, +Scsh \emph{must} have this information to run, since scsh's syscall interfaces are done with foreign-function calls. The \ex{-h} and \ex{-s} options tell the vm how much space to allocate for the heap and stack. +The heap size value is the total number of words allocated for the heap; +this space is then split into two semi-spaces for {\scm}'s stop-and-copy +collector. -\subsection{End options} +\subsubsection{End options} End options terminate argument parsing. The \ex{-i} switch is followed by the name of a heap image for the -vm to execute, and terminates vm argument parsing; -following arguments are passed off to the heap image's top-level program. +vm to execute. +The \var{image-file-name} string is also taken to be the name of the program +being executed by the VM; this name becomes the head of the argument +list passed to the heap image's top-level entry point. +The tail of the argument list is constructed from all following arguments. + The \ex{--} switch terminates argument parsing without giving -a specific heap image; the vm will start up with using a default +a specific heap image; the vm will start up using a default heap (whose location is compiled into the vm). +All the following arguments comprise the tail of the list passed off to +the heap image's top-level procedure. Notice that you are not allowed to pass arguments to the heap image's -top-level program (\eg, scsh) without delimiting them with \ex{-i} +top-level procedure (\eg, scsh) without delimiting them with \ex{-i} or \ex{--} flags. -\section{Scsh arguments} -\label{sec:scsh-args} +\subsection{Inserting interpreter triggers into heap images} +{\scm}'s heap image format allows for an informational header: +when the vm loads in a heap image, it ignores all data occurring before +the first control-L character (\textsc{Ascii} 12). +This means that you can insert a ``\ex{\#!}'' trigger line into a +heap image, making it a form of executable ``shell script.'' +Since the vm requires multiple arguments to be given on the command +line, you must use the meta-switch. +Here's an example heap-image header: +\begin{code} +#!/usr/local/lib/scsh/scshvm \\ +-o /usr/local/lib/scsh/scshvm -i +{\ldots} \textnormal{\emph{Your heap image goes here}} \ldots\end{code} -Scsh's top-level argument parser takes arguments in a simple -format: -\codex{scsh [\var{end-option} \vari{arg}1 {\ldots} \vari{arg}n]} -where -\begin{inset} -\begin{tabular}{ll} -\var{end-option:} & \ex{-s} \var{script} \\ - & \ex{--} -\end{tabular} -\end{inset} -The \ex{-s} argument causes scsh to load a script file and exit. -It also terminates argument parsing; following arguments are passed -to the scsh program as the value of \ex{command-line-arguments}. +\subsection{Inserting a double-level trigger into Scheme programs} +If you're a nerd, you may enjoy doing a double-level machine shift +in the trigger line of your Scheme programs with the following magic: +\begin{code}\small +#!/usr/local/lib/scsh/scshvm \\ +-o /usr/local/lib/scsh/scshvm -i /usr/local/lib/scsh/scsh.image -s +!# +{\ldots} \textnormal{\emph{Your Scheme program goes here}} \ldots\end{code} -If the \ex{-s} argument is not given, scsh runs in interactive mode, -with a standard {\scm} prompt-read-eval-print loop. - -The \ex{--} switch terminates argument parsing without specifying a -script to load; it allows the user to pass arguments to an interactive scsh. - -Shell scripts can be written and invoked with a \ex{\#!} initial line. -Scsh defines the sequence \ex{\#!} to be a read-macro similar -to the comment character \ex{;}. -The read-macro causes scsh to skip characters until it reads a newline, -\ex{!}, \ex{\#}, newline sequence. -So an initial \ex{\#!} line is ignored by scsh. - -% Since there is no standalone scsh binary, scsh scripts must do a double -% level-shift, invoking the vm on the scsh heap image on the scsh script. -% The appropriate magic top three lines are: -% \begin{code} -% #!/usr/local/bin/scshvm \\ -% -o /usr/local/bin/scshvm -i /usr/local/lib/scsh/scsh.image -s -% !# -% {\ldots} \textnormal{\emph{Your Scheme code goes here}} \ldots\end{code} - -\section{Compiling shell scripts} -The {\Scheme} implementation of scsh allows you to create a heap image -with your own top-level procedure. +\section{Compiling scsh programs} +Scsh allows you to create a heap image with your own top-level procedure. Adding the pair of lines \begin{code} -#!/usr/local/bin/scshvm \\\\ --o /usr/local/bin/scshvm -i -\end{code} +#!/usr/local/lib/scsh/scshvm \\ +-o /usr/local/lib/scsh/scshvm -i\end{code} to the top of the heap image will turn it into an executable {\Unix} file. +You can create heap images with the following two procedures. + \defun{dump-scsh-program}{main fname}{\undefined} \begin{desc} This procedure writes out a scsh heap image. When the - heap image is executed by the {\Scheme} vm, it will call - the \var{main} procedure on no arguments and then exit. - The {\Scheme} vm will parse command-line arguments as - described in section~\ref{sec:vm-args}, and bind remaining - arguments to the \ex{command-line-arguments} variable before - calling \ex{main}. Further argument parsing (as described for - scsh in section~\ref{sec:scsh-args} is not performed. + heap image is executed by the {\scm} vm, it will call + the \var{main} procedure, passing it the vm's argument list. + When \ex{main} returns an integer value $i$, the vm exits with + exit status $i$. + The {\Scheme} vm will parse command-line switches as + described in section~\ref{sec:vm-args}; remaining arguments + form the tail of the command-line list that is passed to \ex{main}. + (The head of the list is the name of the program being executed + by the vm.) + Further argument parsing + (as described for scsh in section~\ref{sec:scsh-switches}) + is not performed. The heap image created by \ex{dump-scsh-program} has unused code and data pruned out, so small programs compile to much smaller heap images. \end{desc} +\defun{dump-scsh}{fname}{\undefined} +\begin{desc} + This procedure writes out a heap image with the standard + scsh top-level. + When the image is resumed by the vm, it will parse and + execute scsh command-line switches as described in section + \ref{sec:scsh-switches}. + + You can use this procedure to write out custom scsh heap images + that have specific packages preloaded and start up in specific + packages. +\end{desc} + +Unfortunately, {\scm} does not support separate compilation of +Scheme files or Scheme modules. +The only way to compile is to load source and then dump out a +heap image. +One occasionally hears rumours that this is being addressed +by the {\scm} development team. + +\section{Statically linking heap images} +Brian Carlstrom has written code to process {\scm} heap images +into \ex{.o} files that can be linked with a virtual machine +binary to produce a standalone machine-code executable. + +The source code comes with the current distribution, but it has not been +integrated into the system or documented in time for this +release. + +%Either he integrates it into the system and documents it for release +%0.4, or his body will soon be occupying a shallow grave behind Tech Square. + + \section{Standard file locations} Because the scshvm binary is intended to be used for writing shell scripts, it is important that the binary be installed in a standard @@ -219,6 +765,7 @@ with a symbolic link to it from The {\scm} image format allows heap images to have \ex{\#!} triggers, so \ex{scsh.image} should have a \ex{\#!} trigger of the following form: \begin{code} -#!/usr/local/bin/scshvm \\ --o /usr/local/bin/scshvm -i +#!/usr/local/lib/scsh/scshvm \\ +-o /usr/local/lib/scsh/scshvm -i {\ldots} \textnormal{\emph{heap image goes here}} \ldots\end{code} + diff --git a/doc/scsh-manual/strings.tex b/doc/scsh-manual/strings.tex index ed91ff3..7837de0 100644 --- a/doc/scsh-manual/strings.tex +++ b/doc/scsh-manual/strings.tex @@ -1,11 +1,12 @@ +% -*- latex -*- \chapter{Strings and characters} Scsh provides a set of procedures for processing strings and characters. The procedures provided match regular expressions, search strings, parse file-names, and manipulate sets of characters. -Also see chapter \ref{chapt:fr-awk} on record I/O, field parsing, -and the awk loop. +Also see chapters \ref{chapt:rdelim} and \ref{chapt:fr-awk} +on record I/O, field parsing, and the awk loop. The procedures documented there allow you to read character-delimited records from ports, use regular expressions to split the records into fields (for example, splitting a string at every occurrence of colon or white-space), @@ -35,7 +36,7 @@ The code uses Henry Spencer's regular expression package. \end{defundesc} \begin{defundesc} {match:start} {match [match-number]} \fixnum - Returns the start position of the match denoted by \var{match-number} + Returns the start position of the match denoted by \var{match-number}. The whole regexp is 0. Each further number represents positions enclosed by \ex{(\ldots)} sections. \var{Match-number} defaults to 0. \end{defundesc} @@ -50,10 +51,9 @@ The code uses Henry Spencer's regular expression package. \var{Match-number} defaults to 0 (the whole match). \end{defundesc} -\remark{ - What do these guys do when there is no match corresponding to - \var{match-number}? - Return {\sharpf} or signal error? {\sharpf} probably best.} +\oops{Scsh regex matching doesn't currently flag un-matched subexpressions +in the \ex{match:begin}, \ex{match:end}, and \ex{match:substring} functions. +This needs to be fixed.} Regular expression matching compiles patterns into special data structures which can be efficiently used to match against strings. @@ -95,13 +95,9 @@ In other words, it quotes the regular expression, prepending backslashes to all the special regexp characters in \var{str}. \begin{code} (regexp-quote "*Hello* world.") - {\evalto}"\\*Hello\\* world\\."\end{code} + {\evalto}"\\\\*Hello\\\\* world\\\\."\end{code} \end{desc} -\oops{Scsh regex matching doesn't currently flag un-matched subexpressions -in the \ex{match:begin}, \ex{match:end}, and \ex{match:substring} functions. -This needs to be fixed.} - \subsection{Other string manipulation facilities} \defun {index} {string char [start]} {{\fixnum} or false} @@ -191,8 +187,8 @@ slashes to a single slash. A file-name in \emph{directory form} is either a file-name terminated by a slash, \eg, ``\ex{/src/des/}'', or the empty string, ``''. -The empty string corresponds to the current working directory, who's -file-name is dot (``\ex{.}''). +The empty string corresponds to the current working directory, +whose file-name is dot (``\ex{.}''). Working backwards from the append-a-slash rule, we extend the syntax of {\Posix} file-names to define the empty string to be a file-name form of the root directory ``\ex{/}''. @@ -223,6 +219,27 @@ interpreted in file-name form, \ie, as root. \subsubsection{Procedures} +\defun {file-name-directory?} {fname} \boolean +\defunx {file-name-non-directory?} {fname} \boolean +\begin{desc} +These predicates return true if the string is in directory form, or +file-name form (see the above discussion of these two forms). +Note that they both return true on the ambiguous case of empty string, +which is both a directory (current working directory), and a file name +(the file-system root). +\begin{center} +\begin{tabular}{lll} +File name & \ex{\ldots-directory?} & \ex{\ldots-non-directory?} \\ +\hline +\ex{"src/des"} & \ex{\sharpf} & \ex{\sharpt} \\ +\ex{"src/des/"} & \ex{\sharpt} & \ex{\sharpf} \\ +\ex{"/"} & \ex{\sharpt} & \ex{\sharpf} \\ +\ex{"."} & \ex{\sharpf} & \ex{\sharpt} \\ +\ex{""} & \ex{\sharpt} & \ex{\sharpt} +\end{tabular} +\end{center} +\end{desc} + \begin{defundesc} {file-name-as-directory} {fname} \str Convert a file-name to directory form. Basically, add a trailing slash if needed: @@ -465,14 +482,14 @@ is also frequently useful for expanding file-names. \label{sec:char-sets} Scsh provides a \ex{char-set} type for expressing sets of characters. -These sets are used by some of the delimited input procedures +These sets are used by some of the delimited-input procedures (section~\ref{sec:field-reader}). The character set package that scsh uses was taken from Project Mac's MIT Scheme. \defun{char-set?}{x}\boolean \begin{desc} -Returns true if the object \ex{x} is a character set. +Returns true if the object \var{x} is a character set. \end{desc} \subsection{Creating character sets} @@ -501,7 +518,7 @@ Returns a character set containing every character \var{c} such that \defun{ascii-range->char-set}{lower upper}{char-set} \begin{desc} Returns a character set containing every character whose {\Ascii} -code lies in the range $[\var{lower},\var{upper}]$ inclusive. +code lies in the half-open range $[\var{lower},\var{upper})$. \end{desc} \subsection{Querying character sets} @@ -510,9 +527,19 @@ code lies in the range $[\var{lower},\var{upper}]$ inclusive. This procedure returns a list of the members of \var{char-set}. \end{desc} -\defunx{char-set-member?}{char char-set}\boolean +\defunx{char-set-contains?}{char-set char}\boolean \begin{desc} This procedure tests \var{char} for membership in set \var{char-set}. +\remark{Previous releases of scsh called this procedure \ex{char-set-member?}, +reversing the order of the arguments. +This made sense, but was unfortunately the reverse order in which the +arguments appear in MIT Scheme. +A reasonable argument order was not backwards-compatible with MIT Scheme; +on the other hand, the MIT Scheme argument order was counter-intuitive +and at odds with common mathematical notation and the \ex{member} family +of R4RS procedures. + +We sought to escape the dilemma by shifting to a new name.} \end{desc} \subsection{Character set algebra} diff --git a/doc/scsh-manual/syscalls.tex b/doc/scsh-manual/syscalls.tex index 19e0b63..b381845 100644 --- a/doc/scsh-manual/syscalls.tex +++ b/doc/scsh-manual/syscalls.tex @@ -1,13 +1,13 @@ %&latex -*- latex -*- \chapter{System Calls} -\label{chapter:syscalls} +\label{chapt:syscalls} Scsh provides (almost) complete access to the basic {\Unix} kernel services: -processes, files, signals and so forth. These procedures comprise a first cut -at a {\Scheme} binding for {\Posix}, with a few extras thrown in (\eg, -symbolic links, \ex{fchown}, \ex{fstat}). A few have been punted for the -current release (tty control, ioctl, and a few others.) +processes, files, signals and so forth. These procedures comprise a +{\Scheme} binding for {\Posix}, with a few of the more standard extensions +thrown in (\eg, symbolic links, \ex{fchown}, \ex{fstat}, sockets). + \section{Errors} Scsh syscalls never return error codes, and do not use a global @@ -141,8 +141,7 @@ In scsh, most standard {\R4RS} i/o operations (such as \ex{display} or When doing i/o with a file descriptor, the i/o operation is done directly on the file, bypassing any buffered data that may have accumulated in an associated port. -Note that character-at-a-time operations -(\eg, \ex{read-char} and \ex{read-line}) +Note that character-at-a-time operations such as \ex{read-char} are likely to be quite slow when performed directly upon file descriptors. @@ -188,7 +187,15 @@ These special forms are simply syntactic sugar for the {\ttt with\=current\=input\=port*} procedure and friends. \end{desc} -\defun {close} {port/fd} {\undefined} +\defun {set-current-input-port!} {port}{\undefined} +\defunx{set-current-output-port!}{port}{\undefined} +\defunx{set-error-output-port!} {port}{\undefined} +\begin{desc} +These procedures alter the dynamic binding of the current I/O port procedures +to new values. +\end{desc} + +\defun {close} {port/fd} {\boolean} \begin{desc} Close the port or file descriptor. @@ -203,12 +210,21 @@ These special forms are simply syntactic sugar for the \begin{code} (close (fdes->inport fd)) (close (fdes->outport fd))\end{code} + + The procedure returns true if it closed an open port. + If the port was already closed, it returns false; + this is not an error. \end{desc} -\defun {stdports->stdio}{} {\undefined} -\defunx {stdio->stdports} {thunk} {value(s) of thunk} +\defun {stdports->stdio}{} {\undefined} +\defunx {stdio->stdports}{} {\undefined} \begin{desc} - \ex{(stdports->stdio)} is exactly equivalent to the series of + These two procedures are used to synchronise Unix' standard I/O + file descriptors and Scheme's current I/O ports. + + \ex{(stdports->stdio)} causes the standard I/O file descriptors + (0, 1, and 2) to take their values from the current I/O ports. + It is exactly equivalent to the series of redirections:\footnote{Why not \ex{move->fdes}? Because the current output port and error port might be the same port.} @@ -217,7 +233,22 @@ These special forms are simply syntactic sugar for the (dup (current-output-port) 1) (dup (error-output-port) 2)\end{code} % - \ex{stdio->stdports} binds the standard ports \ex{(current-input-port)}, + \ex{stdio->stdports} causes the bindings of the current I/O ports + to be changed to ports constructed over the standard I/O file + descriptors. + It is exactly equivalent to the series of assignments +\begin{code} +(set-current-input-port! (fdes->inport 0)) +(set-current-output-port! (fdes->inport 1)) +(set-error-output-port! (fdes->inport 2))\end{code} +However, you are more likely to find the dynamic-extent variant, +\ex{with-stdio-ports*}, below, to be of use in general programming. +\end{desc} + +\defun{with-stdio-ports*} {thunk} {value(s) of thunk} +\dfnx {with-stdio-ports} {body \ldots} {value(s) of body}{syntax} +\begin{desc} + \ex{with-stdio-ports*} binds the standard ports \ex{(current-input-port)}, \ex{(current-output-port)}, and \ex{(error-output-port)} to be ports on file descriptors 0, 1, 2, and then calls \var{thunk}. It is equivalent to: @@ -226,9 +257,13 @@ These special forms are simply syntactic sugar for the (with-current-output-port (fdes->inport 1) (with-error-output-port (fdes->outport 2) (thunk))))\end{code} +% +The \ex{with-stdio-ports} special form is merely syntactic sugar. \end{desc} + + \subsection{String ports} {\scm} has string ports, which you can use. Scsh has not committed to the particular interface or names that {\scm} uses, so be warned that the @@ -242,7 +277,7 @@ interface described herein may be liable to change. \defun {make-string-output-port} {} {\port} \defunx {string-output-port-output} {port} {\port} \begin{desc} -A string output port is a port collects the characters given to it into +A string output port is a port that collects the characters given to it into a string. The accumulated string is retrieved by applying \ex{string-output-port-output} to the port. @@ -250,9 +285,10 @@ to the port. \defun {call-with-string-output-port} {procedure} {\str} \begin{desc} - The procedure is called on a port. When it returns, + The \var{procedure} value is called on a port. When it returns, \ex{call-with-string-output-port} returns a string containing the - characters written to the port. + characters that were written to that port during the execution + of \var{procedure}. \end{desc} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -298,22 +334,25 @@ allocated for this file descriptor). This is used to ensure that there is at most one open port for each open file descriptor. The port data structure for file ports has two fields besides the descriptor: -revealed and closed?. When a file port is closed with \ex{(close port)}, the -port's file descriptor is closed, its entry in the port table is cleared, and -the port's closed? field is set to true. +\var{revealed} and \var{closed?}. +When a file port is closed with \ex{(close port)}, +the port's file descriptor is closed, its entry in the port table is cleared, +and the port's \var{closed?} field is set to true. When a file descriptor is closed with \ex{(close fdes)}, any associated port is shifted to a new file descriptor created with \ex{(dup fdes)}. -The port has its revealed count reset to zero. See discussion below. -To really put a stake through a descriptor's heart, you must say one of +The port has its revealed count reset to zero (and hence becomes eligible +for closing on GC). See discussion below. +To really put a stake through a descriptor's heart without waiting for +associated ports to be GC'd, you must say one of % \begin{code} (close (fdes->inport fdes)) (close (fdes->output fdes))\end{code} -The revealed field is an aid to garbage collection. It is an integer +The \var{revealed} field is an aid to garbage collection. It is an integer semaphore. If it is zero, the port's file descriptor can be closed when -the port is collected. Essentially, the revealed field reflects whether +the port is collected. Essentially, the \var{revealed} field reflects whether or not the port's file descriptor has escaped to the {\Scheme} user. If the {\Scheme} user doesn't know what file descriptor is associated with a given port, then he can't possibly retain an ``integer handle'' on the @@ -321,22 +360,23 @@ port after dropping pointers to the port itself, so the garbage collector is free to close the file. Ports allocated with \ex{open-output-file} and \ex{open-input-file} are -unrevealed ports---\ie, revealed is initialised to 0. No one knows the port's -file descriptor, so the file descriptor can be closed when the port is -collected. +unrevealed ports---\ie, \var{revealed} is initialised to 0. +No one knows the port's file descriptor, so the file descriptor can be closed +when the port is collected. The functions \ex{fdes->output-port}, \ex{fdes->input-port}, \ex{port->fdes} are used to shift back and forth between file descriptors and ports. When \ex{port->fdes} reveals a port's file descriptor, it increments the port's -revealed field. When the user is through with the file descriptor, he can -call \ex{(release-port-handle port)}, which decrements the count. The function -\ex{(call/fdes fdes/port proc)} automates this protocol. \ex{call/fdes} uses -\ex{dynamic-wind} to enforce the protocol. If \ex{proc} throws out of the -\ex{call/fdes}, unwind handler releases the descriptor handle; if the user -subsequently tries to throw \emph{back} into \ex{proc}'s context, the wind handler -raises an error. When the user maps a file descriptor to a port with -\ex{fdes->outport} or \ex{fdes->inport}, the port has its revealed field -incremented. +\var{revealed} field. When the user is through with the file descriptor, he +can call \ex{(release-port-handle \var{port})}, which decrements the count. +The function \ex{(call/fdes fdes/port \var{proc})} automates this protocol. +\ex{call/fdes} uses \ex{dynamic-wind} to enforce the protocol. +If \var{proc} throws out of the \ex{call/fdes} application, +the unwind handler releases the descriptor handle; +if the user subsequently tries to throw \emph{back} into \var{proc}'s +context, the wind handler raises an error. When the user maps a file +descriptor to a port with \ex{fdes->outport} or \ex{fdes->inport}, the port +has its revealed field incremented. Not all file descriptors are created by requests to make ports. Some are inherited on process invocation via \ex{exec(2)}, and are simply part of the @@ -345,14 +385,10 @@ allocated for these file descriptors, is should be considered as a revealed port. For example, when the {\Scheme} shell's process starts up, it opens ports on file descriptors 0, 1, and 2 for the initial values of \ex{(current-input-port)}, \ex{(current-output-port)}, and -\ex{(error-output-port)}. These ports are initialised with revealed set to 1, +\ex{(error-output-port)}. +These ports are initialised with \var{revealed} set to 1, so that stdin, stdout, and stderr are not closed even if the user drops the -port. A fine point: the stdin file descriptor is allocated an unbuffered -port. Because shells frequently share stdin with subprocesses, if the shell -does buffered reads, it might ``steal'' input intended for a subprocess. For -this reason, all shells, including sh, csh, and scsh, read stdin unbuffered. -Responsibility for deciding which other files must be opened unbuffered rests -with the shell programmer. +port. Unrevealed file ports have the nice property that they can be closed when all pointers to the port are dropped. This can happen during gc, or at an @@ -367,18 +403,19 @@ the garbage collector. This is critical, since shell programming absolutely requires access to the {\Unix} file descriptors, as their numerical values are a critical part of the process interface. -A port's underlying file descriptor can be shifted around with \ex{dup(2)} when -convenient. That is, the actual fd on top of which a port is constructed can be -shifted around underneath the port by the scsh kernel when necessary. This is -important, because when the user is setting up file descriptors prior to a -\ex{exec(2)}, he may explicitly use a file descriptor that has already been -allocated to some port. In this case, the scsh kernel just shifts the port's -file descriptor to some new location with \ex{dup}, freeing up its old -descriptor. This prevents errors from happening in the following scenario. -Suppose we have a file open on port \ex{f}. Now we want to run a program that -reads input on file 0, writes output to file 1, errors to file 2, and logs -execution information on file 3. We want to run this program with input from -\ex{f}. So we write: +A port's underlying file descriptor can be shifted around with \ex{dup(2)} +when convenient. That is, the actual file descriptor on top of which a port is +constructed can be shifted around underneath the port by the scsh kernel when +necessary. This is important, because when the user is setting up file +descriptors prior to a \ex{exec(2)}, he may explicitly use a file descriptor +that has already been allocated to some port. In this case, the scsh kernel +just shifts the port's file descriptor to some new location with \ex{dup}, +freeing up its old descriptor. This prevents errors from happening in the +following scenario. Suppose we have a file open on port \ex{f}. Now we want +to run a program that reads input on file 0, writes output to file 1, errors +to file 2, and logs execution information on file 3. We want to run this +program with input from \ex{f}. +So we write: % \begin{code} (run (/usr/shivers/bin/prog) @@ -491,7 +528,7 @@ Decrement the port's revealed count. \defunx{dup->outport} {port/fd [newfd]} {port} \defunx{dup->fdes} {port/fd [newfd]} {fd} \begin{desc} -These procedures subsume the functionality of C's \ex{dup()} and \ex{dup2()}. +These procedures provide the functionality of C's \ex{dup()} and \ex{dup2()}. The different routines return different types of values: \ex{dup->inport}, \ex{dup->outport}, and \ex{dup->fdes} return input ports, output ports, and integer file descriptors, respectively. @@ -523,11 +560,30 @@ we would like to do output to that tty, we can simply use \ex{(dup->outport p)} to produce an equivalent output port for the tty. \end{desc} -\begin{defundesc} {file-seek} {fd/port offset whence} {\undefined} -\var{whence} is one of \{\ex{seek/set}, \ex{seek/delta}, \ex{seek/end}\}. +\defun {seek} {fd/port offset [whence]} {\integer} +\begin{desc} +Reposition the I/O cursor for a file descriptor or port. +\var{whence} is one of \{\ex{seek/set}, \ex{seek/delta}, \ex{seek/end}\}, +and defaults to \ex{seek/set}. +If \ex{seek/set}, then \var{offset} is an absolute index into the file; +if \ex{seek/delta}, then \var{offset} is a relative offset from the current + I/O cursor; +if \ex{seek/end}, then \var{offset} is a relative offset from the end of file. +The \var{fd/port} argument may be a port or an integer file descriptor. +Not all such values are seekable; +this is dependent on the OS implementation. +The return value is the resulting position of the I/O cursor in the I/O stream. \oops{The current implementation doesn't handle \var{offset} arguments that are not immediate integers (\ie, representable in 30 bits).} -\end{defundesc} +\end{desc} + + +\defun {tell} {fd/port} {\integer} +\begin{desc} +Returns the position of the I/O cursor in the the I/O stream. +Not all file descriptors or ports support cursor-reporting; +this is dependent on the OS implementation. +\end{desc} \begin{defundesc} {open-file} {fname flags [perms]} {\port} \var{Perms} defaults to \cd{#o666}. @@ -546,7 +602,7 @@ open/exclusive . ; Your Unix may have . ; a few more.\end{code} % -Returns a port. The port is an input port if the \ex{flags} permit it, +Returns a port. The port is an input port if the \var{flags} permit it, otherwise an output port. \R4RS/\scm/scsh do not have input/output ports, so it's one or the other. This should be fixed. (You can hack simultaneous i/o on a file by opening it r/w, taking the result input port, @@ -575,21 +631,10 @@ and duping it to an output port with \ex{dup->outport}.) Returns two ports, the read and write end-points of a {\Unix} pipe. \end{defundesc} -\begin{defundesc} {read-line} {[fd/port retain-newline?]} {{\str} or eof-object} - Reads and returns one line of text; on eof, returns the eof object. - A line is terminated by newline or eof. - - \var{retain-newline?}\ - defaults to {\sharpf}; if true, a terminating newline is included in the - result string, otherwise it is trimmed. - Using this argument allows one to tell whether or not the last line of - input in a file is newline terminated. -\end{defundesc} - - \defun{read-string}{nbytes [fd/port]} {{\str} or \sharpf} -\begin{defundescx} - {read-string!} {str [fd/port start end]} {nread or \sharpf} +\dfnix{read-string!} {str [fd/port start end]} {nread or \sharpf}{procedure} + {read-string"!@\texttt{read-string"!}} +\begin{desc} These calls read exactly as much data as you requested, unless there is not enough data (eof). \ex{read-string!} reads the data into string \var{str} @@ -612,11 +657,12 @@ Returns two ports, the read and write end-points of a {\Unix} pipe. Any partially-read data is included in the error exception packet. Error returns on non-blocking input are considered an error. -\end{defundescx} +\end{desc} \defun {read-string/partial} {nbytes [fd/port]} {{\str} or \sharpf} -\begin{defundescx} - {read-string!/partial} {str [fd/port start end]} {nread or \sharpf} +\dfnix{read-string!/partial} {str [fd/port start end]} {nread or \sharpf} + {procedure}{read-string"!/partial@\texttt{read-string"!/partial}} +\begin{desc} % These are atomic best-effort/forward-progress calls. Best effort: they may read less than you request if there is a @@ -654,14 +700,55 @@ Returns two ports, the read and write end-points of a {\Unix} pipe. They are also useful when you wish to efficiently process data in large blocks, and your algorithm is insensitive to the block size of any particular read operation. -\end{defundescx} +\end{desc} + +\defun {select }{rvec wvec evec [timeout]}{rvec' wvec' evec'} +\defunx{select!}{rvec wvec evec [timeout]}{nr nw ne} +\begin{desc} +The \ex{select} procedure allows a process to block and wait for events on +multiple I/O channels. +The \var{rvec} and \var{evec} arguments are vectors of input ports and +integer file descriptors; \var{wvec} is a vector of output ports and +integer file descriptors. +The procedure returns three vectors whose elements are subsets of the +corresponding arguments. +Every element of \var{rvec'} is ready for input; +every element of \var{wvec'} is ready for output; +every element of \var{evec'} has an exceptional condition pending. + +The \ex{select} call will block until at least one of the I/O channels +passed to it is ready for operation. +The \var{timeout} value can be used to force the call to time-out +after a given number of seconds. It defaults to the special value +\ex{\#f}, meaning wait indefinitely. A zero value can be used to poll +the I/O channels. + +If an I/O channel appears more than once in a given vector---perhaps +occuring once as a Scheme port, and once as the port's underlying +integer file descriptor---only one of these two references may appear +in the returned vector. +Buffered I/O ports are handled specially---if an input port's buffer is +not empty, or an output port's buffer is not yet full, then these +ports are immediately considered eligible for I/O without using +the actual, primitive \ex{select} system call to check the underlying +file descriptor. +This works pretty well for buffered input ports, but is a little +problematic for buffered output ports. + +The \ex{select!} procedure is similar, but indicates the subset +of active I/O channels by side-effecting the argument vectors. +Non-active I/O channels in the argument vectors are overwritten with +{\sharpf} values. +The call returns the number of active elements remaining in each +vector. +As a convenience, the vectors passed in to \ex{select!} are +allowed to contain {\sharpf} values as well as integers and ports. + +\remark{I have found the \ex{select!} interface to be the more + useful of the two. After the system call, it allows you + to check a specific I/O channel in constant time.} +\end{desc} -\begin{defundesc} - {select}{readfds writefds exceptfds timeout}{rfds wfds efds} - % - \remark{Unimplemented. Should we implement a set-of abstraction first, - Or just use a twos-complement bitvector encoding with bignums?} -\end{defundesc} \begin{defundescx}{write-string}{string [fd/port start end]}\undefined This procedure writes all the data requested. @@ -720,6 +807,48 @@ Returns two ports, the read and write end-points of a {\Unix} pipe. and is an error (the problem is the subsequent flush operation). \end{defundescx} +\subsection{Buffered I/O} + +{\scm} ports use buffered I/O---data is transferred to or from the +OS in blocks. Scsh provides control of this mechanism: the programmer +may force saved-up output data to be transferred to the OS when +he chooses, +and may also choose which I/O buffering policy to employ for a given +port (or turn buffering off completely). + +It can be useful to turn I/O buffering off in some cases, for example +when an I/O stream is to be shared by multiple subprocesses. +For this reason, scsh allocates an unbuffered port for file descriptor 0 +at start-up time. +Because shells frequently share stdin with subprocesses, if the shell +does buffered reads, it might ``steal'' input intended for a subprocess. For +this reason, all shells, including sh, csh, and scsh, read stdin unbuffered. +Applications that can tolerate buffered input on stdin can reset +\ex{(current-input-port)} to block buffering for higher performance. + +\begin{defundesc}{set-port-buffering}{port policy [size]}\undefined +This procedure allows the programmer to assign a particular I/O buffering +policy to a port, and to choose the size of the associated buffer. +It may only be used on new ports, \ie, before I/O is performed on the port. +There are three buffering policies that may be chosen: + \begin{inset} + \begin{tabular}{l@{\qquad}l} + \ex{bufpol/block} & General block buffering (general default) \\ + \ex{bufpol/line} & Line buffering (tty default) \\ + \ex{bufpol/none} & Direct I/O---no buffering + \end{tabular} + \end{inset} +The line buffering policy flushes output whenever a newline is output; +whenever the buffer is full; or whenever an input is read from stdin. +Line buffering is the default for ports open on terminal devices. + +The \var{size} argument requests an I/O buffer of \var{size} bytes. +If not given, a reasonable default is used; if given and zero, +buffering is turned off +(\ie, $\var{size} = 0$ for any policy is equivalent to + $\var{policy} = \ex{bufpol/none}$). +\end{defundesc} + \begin{defundesc}{force-output} {[fd/port]}{\noreturn} This procedure does nothing when applied to an integer file descriptor or unbuffered port. @@ -727,6 +856,118 @@ Returns two ports, the read and write end-points of a {\Unix} pipe. and raises a write-error exception on error. Returns no value. \end{defundesc} +\begin{defundesc}{flush-all-ports} {}{\noreturn} + This procedure flushes all open output ports with buffered data. +\end{defundesc} + +\subsection{File locking} + +Scsh provides {\Posix} advisory file locking. +\emph{Advisory} locks are locks that can be checked by user code, +but do not affect other I/O operations. +For example, if a process has an exclusive lock on a region of a file, +other processes will not be able to obtain locks on that region of the file, +but they will still be able to read and write the file with no hindrance. +Using advisory locks requires cooperation amongst the agents accessing +the shared resource. + +\remark{ +Unfortunately, {\Posix} file locks are associated with actual files, +not with associated open file descriptors. +Once a process locks a file, using some file descriptor \var{fd}, +the next time \emph{any} file descriptor referencing that file is closed, +all associated locks are released. +Scsh moves Scheme ports from file descriptor to file descriptor with +\ex{dup()} and \ex{close()} as required by the runtime, +so it is impossible to keep file locks open across one of these shifts. +Hence we can only offer {\Posix} advisory file locking directly on raw +integer file descriptors; +regrettably, there are no facilities for locking Scheme ports. + +Note that once a Scheme port is revealed in scsh, the runtime will not +shift the port around with \ex{dup()} and \ex{close()}. +This means the file-locking procedures can then be applied to the port's +associated file descriptor. + +NeXTSTEP users should also note that even minimalist {\Posix} file locking +is not supported for NFS-mounted files in NeXTSTEP; NeXT claims they will +fix this in NS release 4. +} + +{\Posix} allows the user to lock a region of a file with either +an exclusive or shared lock. +Locked regions are described by the \emph{lock-region} record: +\begin{code} +(define-record lock-region + exclusive? + start + len + whence + pid)\end{code} +\index{lock-region?} +\index{lock-region:exclusive?} \index{lock-region:whence} +\index{lock-region:start} \index{lock-region:end} +\index{lock-region:len} \index{lock-region:pid} +% +The \ex{exclusive?} field is true if the lock is exclusive; +false if it is shared. +The \ex{whence} field is one of the values from the \ex{seek} call: +\ex{seek/set}, \ex{seek/delta}, or \ex{seek/end}, +and determines the interpretation of the \ex{start} field: +\begin{itemize} +\item If \ex{seek/set}, the \ex{start} value is simply an absolute index +into the file. +\item If \ex{seek/delta}, the \ex{start} value is an offset from the +file descriptor's current position in the file. +\item If \ex{seek/end}, the \ex{start} value is an offset from the +end of the file. +\end{itemize} +The region of the file being locked is given by the \ex{start} and \ex{len} +fields. +The \ex{pid} field gives the process id of the process holding the region +lock, when relevant (see \ex{get-lock-region} below). + +\begin{defundesc}{make-lock-region}{exclusive? start len [whence]}{lock-region} +This procedure makes a lock-region record. +The \ex{whence} field defaults to \ex{seek/set}. +\end{defundesc} + +\defun {lock-region}{fdes lock}{\undefined} +\defunx{lock-region/no-block}{fdes lock}{\boolean} +\begin{desc} +These procedures lock a region of the file referenced by file descriptor +\var{fdes}. +The \ex{lock-region} procedure blocks until the lock is granted; +the non-blocking variant returns a boolean indicating whether or not +the lock was granted. +\end{desc} + +\begin{defundesc}{get-lock-region}{fdes lock}{lock-region or \sharpf} +Return the first lock region on \var{fdes} that overlaps with +the lock region \var{lock}. +If there is no such lock, return false. +This procedure fills out the \ex{pid} field of the returned lock region, +and is the only procedure that has anything to do with this field. +\end{defundesc} + +\begin{defundesc}{unlock-region}{fdes lock}{\undefined} +Release a lock from a file. +\end{defundesc} + +\defun{with-region-lock*}{fdes lock thunk}{value(s) of thunk} +\dfnx{with-region-lock}{fdes lock body \ldots}{value(s) of body}{syntax} +\begin{desc} +This procedure obtains the requested lock, and then calls +\ex{(\var{thunk})}. When \var{thunk} returns, the lock is released. +A non-local exit (\eg, throwing to a saved continuation or raising +an exception) also causes the lock to be released. + +After a normal return from \var{thunk}, its return values are returned +by \ex{with-region-lock*}. +The \ex{with-region-lock} special form is equivalent syntactic sugar. +\end{desc} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{File system} @@ -805,6 +1046,17 @@ of any type from the file system: files, (empty) directories, symlinks, fifos, Setting file user or group ownership usually requires root privileges. \end{desc} +\defun {set-file-times} {fname [access-time mod-time]} {\undefined} +\begin{desc} + This procedure sets the access and modified times for the file + \var{fname} to the supplied values (see section~\ref{sec:time} + for the scsh representation of time). + If neither time argument is supplied, they are both taken to be + the current time. You must provide both times or neither. + If the procedure completes successfully, the file's time of last + status-change (\ex{ctime}) is set to the current time. +\end{desc} + \defun {sync-file} {fd/port} \undefined \defunx{sync-file-system}{} \undefined \begin{desc} @@ -825,8 +1077,8 @@ of any type from the file system: files, (empty) directories, symlinks, fifos, The specified file is truncated to \var{len} bytes in length. \end{defundesc} -\begin{defundesc}{file-attributes} {fname/fd/port [chase?]} {file-info-record} - The \ex{file-attributes} procedure +\begin{defundesc}{file-info} {fname/fd/port [chase?]} {file-info-record} + The \ex{file-info} procedure returns a record structure containing everything there is to know about a file. If the \var{chase?} flag is true (the default), then the procedure chases symlinks and reports on @@ -848,10 +1100,9 @@ of any type from the file system: files, (empty) directories, symlinks, fifos, uid ; Owner of file. gid ; File's group id. size ; Size of file, in bytes. - atime ; Last access time. - mtime ; Last status-change time. - ctime) ; Creation time.\end{code} -\index{file-info} + atime ; Time of last access. + mtime ; Time of last mod. + ctime) ; Time of last status change.\end{code} \index{file-info:type}\index{file-info:device}\index{file-info:inode}% \index{file-info:mode}\index{file-info:nlinks}\index{file-info:uid}% \index{file-info:gid}\index{file-info:size}\index{file-info:atime}% @@ -864,10 +1115,10 @@ of any type from the file system: files, (empty) directories, symlinks, fifos, A file-info record is discriminated with the \ex{file-info?} predicate. The following procedures all return selected information about -a file; they are built on top of \ex{file-attributes}, and are +a file; they are built on top of \ex{file-info}, and are called with the same arguments that are passed to it. \begin{inset} -\newcommand{\Ex}[1]{\ex{#1}\index{\tt{#1}}} +\newcommand{\Ex}[1]{\ex{#1}\index{#1@{\tt{#1}}}} \begin{tabular}{ll} Procedure & returns \\\hline \Ex{file-type} & type \\ @@ -888,6 +1139,16 @@ Example: ;; All my files in /usr/tmp: (filter (\l{f} (= (file-owner f) (user-uid))) (directory-files "/usr/tmp")))\end{code} + +\remark{\ex{file-info} was named \ex{file-attributes} in releases of scsh + prior to release 0.4. We changed the name to \ex{file-info} for + consistency with the other information-retrieval procedures in + scsh: \ex{user-info}, \ex{group-info}, \ex{host-info}, + \ex{network-info }, \ex{service-info}, and \ex{protocol-info}. + + The \ex{file-attributes} binding is still supported in the current + release of scsh, but is deprecated, and may go away in a future + release.} \end{defundesc} \defun {file-directory?}{fname/fd/port [chase?]}{\boolean} @@ -899,7 +1160,7 @@ Example: \begin{desc} These procedures are file-type predicates that test the type of a given file. -The are applied to the same arguments to which \ex{file-attributes} is applied; +They are applied to the same arguments to which \ex{file-info} is applied; the sole exception is \ex{file-symlink?}, which does not take the optional \var{chase?} second argument. \begin{inset} @@ -912,7 +1173,7 @@ For example, \end{desc} \defun {file-not-readable?} {fname} \boolean -\defunx{file-not-writeable?} {fname} \boolean +\defunx{file-not-writable?} {fname} \boolean \defunx{file-not-executable?} {fname} \boolean \begin{desc} Returns: @@ -1107,6 +1368,11 @@ All wild-card characters in \var{str} are quoted with a backslash. which is used as a match predicate. It will be repeatedly called with a candidate file-name to test. The file-name will be the entire path accumulated. + If the procedure raises an error condition, \ex{file-match} will + catch the error and treat it as a failed match. + This keeps \ex{file-match} from being blown out of the water + by applying tests to dangling symlinks and other similar situations. + \end{itemize} Some examples: @@ -1192,7 +1458,7 @@ delimiter. allocated when the file is opened. This will work if the file only needs to be opened once. \item If the file needs to be opened twice or more, create it in a - protected directory, \ex, \verb|$HOME|. + protected directory, \eg, \verb|$HOME|. \item Ensure that \ex{/usr/tmp} has its sticky bit set. This requires system administrator privileges. \end{enumerate} @@ -1351,10 +1617,11 @@ an exception. \end{desc} -\defun {\%exec} {prog arglist env} \undefined -\defunx {exec-path-search} {fname pathlist} \str +\defun {\%exec} {prog arglist env} \undefined +\defunx{exec-path-search} {fname pathlist} {{\str} or \sharpf} \begin{desc} -\var{Arglist} is a list of arguments; +The \ex{\%exec} procedure is the low-level interface to the system call. +The \var{arglist} parameter is a list of arguments; \var{env} is either a string$\rightarrow$string alist or {\sharpt}. The new program's \cd{argv[0]} will be taken from \ex{(car \var{arglist})}, \emph{not} from \var{prog}. @@ -1362,17 +1629,22 @@ An environment of {\sharpt} means the current process' environment. \verb|%exec| does not flush buffered output (see \ex{flush-all-ports}). +All exec procedures, including \verb|%exec|, coerce the \cd{prog} and \cd{arg} +values to strings using the usual conversion rules: numbers are converted to +decimal numerals, and symbols converted to their print-names. + \ex{exec-path-search} searches the directories of \var{pathlist} looking for an occurrence of file \ex{fname}. If no executable file is found, it returns {\sharpf}. If \ex{fname} contains a slash character, the path search is short-circuited, but the procedure still checks to ensure that the file exists and is executable---if not, it still returns {\sharpf}. +Users of this procedure should be aware that it invites a potential race +condition: between checking the file with \ex{exec-path-search} and executing +it with \ex{\%exec}, the file's status might change. +The only atomic way to do the search is to loop over the candidate +file names, exec'ing each one and looping when the exec operation fails. See \cd{$path} and \ex{exec-path-list}, below. - -All exec procedures, including \verb|%exec|, coerce the \cd{prog} and \cd{arg} -values to strings using the usual conversion rules: numbers are converted to -decimal numerals, and symbols converted to their print-names. \end{desc} \defun {exit} {[status]} \noreturn @@ -1384,27 +1656,41 @@ The low-level \verb|%exit| procedure immediately terminates the process without flushing buffered output. \end{desc} +\begin{defundesc} {call-terminally} {thunk} \noreturn + \ex{call-terminally} calls its thunk. When the thunk returns, the process + exits. Although \ex{call-terminally} could be implemented as + \codex{(\l{thunk} (thunk) (exit 0))} + an implementation can take advantage of the fact that this procedure never + returns. For example, the runtime can start with a fresh stack and also + start with a fresh dynamic environment, where shadowed bindings are + discarded. This can allow the old stack and dynamic environment to be + collected (assuming this data is not reachable through some live + continuation). +\end{defundesc} + \begin{defundesc}{suspend}{} \undefined Suspend the current process with a SIGSTOP signal. \end{defundesc} -\defun {fork} {[thunk]} {pid or \sharpf} -\defunx {\%fork} {[thunk]} {pid or \sharpf} +\defun {fork} {[thunk]} {proc or \sharpf} +\defunx {\%fork} {[thunk]} {proc or \sharpf} \begin{desc} \ex{fork} with no arguments is like C \ex{fork()}. - In the parent process, it returns - the child's pid. In the child process, it returns {\sharpf}. + In the parent process, it returns the child's \emph{process object} + (see below for more information on process objects). + In the child process, it returns {\sharpf}. \ex{fork} with an argument only returns in the parent process, returning - the child pid. The child process calls \var{thunk} and then exits. + the child's process object. + The child process calls \var{thunk} and then exits. \ex{fork} flushes buffered output before forking, and sets the child process to non-interactive. \verb|%fork| does not perform this bookkeeping; it simply forks. \end{desc} -\defun {fork/pipe} {[thunk]} {pid or \sharpf} -\defunx{\%fork/pipe} {[thunk]} {pid or \sharpf} +\defun {fork/pipe} {[thunk]} {proc or \sharpf} +\defunx{\%fork/pipe} {[thunk]} {proc or \sharpf} \begin{desc} Like \ex{fork} and \ex{\%fork}, but the parent and child communicate via a pipe connecting the parent's stdin to the child's stdout. These procedures @@ -1429,10 +1715,42 @@ Suspend the current process with a SIGSTOP signal. (c)))\end{code} % which returns the pid of \ex{c}'s process. + + Note that these procedures affect file descriptors, not ports. + That is, the pipe is allocated connecting the child's file descriptor + 1 to the parent's file descriptor 0. + \emph{Any previous Scheme port built over these affected file descriptors + is shifted to a new, unused file descriptor with \ex{dup} before + allocating the I/O pipe.} + This means, for example, that the ports bound to \ex{(current-input-port)} + and \ex{(current-output-port)} in either process are not affected---they + still refer to the same I/O sources and sinks as before. + Remember the simple scsh rule: Scheme ports are bound to I/O sources + and sinks, \emph{not} particular file descriptors. + + If the child process wishes to rebind the current output port + to the pipe on file descriptor 1, it can do this using + \ex{with-current-output-port} or a related form. + Similarly, if the parent wishes to change the current input port + to the pipe on file descriptor 0, it can do this using + \ex{set-current-input-port!} or a related form. + Here is an example showing how to set up the I/O ports on both sides + of the pipe: +\begin{code} +(fork/pipe (\l{} + (with-current-output-port (fdes->outport 1) + (display "Hello, world.\\n")))) + +(set-current-input-port! (fdes->inport 0) +(read-line) ; Read the string output by the child.\end{code} +None of this is necessary when the I/O is performed by an exec'd +program in the child or parent process, only when the pipe will +be referenced by Scheme code through one of the default current I/O +ports. \end{desc} -\defun {fork/pipe+} {conns [thunk]} {pid or \sharpf} -\defunx {\%fork/pipe+} {conns [thunk]} {pid or \sharpf} +\defun {fork/pipe+} {conns [thunk]} {proc or \sharpf} +\defunx {\%fork/pipe+} {conns [thunk]} {proc or \sharpf} \begin{desc} Like \ex{fork/pipe}, but the pipe connections between the child and parent are specified by the connection list \var{conns}. @@ -1441,31 +1759,255 @@ Suspend the current process with a SIGSTOP signal. process form for a description of connection lists. \end{desc} -\begin{defundesc} {wait} {[pid]} {status [pid]} - Simply calling \ex{(wait)} will wait for any child to die, then - return the child's exit status and pid as multiple values. +\subsection{Process objects and process reaping} +\label{sec:proc-objects} +Scsh uses \emph{process objects} to represent Unix processes. +They are created by the \ex{fork} procedure, and have the following +exposed structure: +\begin{code} +(define-record proc + pid)\end{code} +\index{proc}\index{proc?}\index{proc:pid} +The only exposed slot in a proc record is the process' pid, +the integer id assigned by Unix to the process. +The only exported primitive procedures for manipulating process objects +are \ex{proc?} and \ex{proc:pid}. +Process objects are created with the \ex{fork} procedure. - With an argument, \ex{(wait \var{pid})} waits for that specific process, - then returns its exit status as a single value. - - If a candidate child has already exited but not yet been waited for, - \ex{wait} returns immediately. - - \remark{Describe the way that wait reaps defunct processes into - the internal table. Document all the architected wait machinery.} +\begin{defundesc}{pid->proc}{pid [probe?]}{proc} +This procedure maps integer Unix process ids to scsh process objects. +It is intended for use in interactive and debugging code, +and is deprecated for use in production code. +If there is no process object in the system indexed by the given pid, +\ex{pid->proc}'s action is determined by the \var{probe?} parameter +(default \sharpf): +\begin{center} +\begin{tabular}{|l|l|} +\hline +\var{probe?} & Return \\ \hline\hline +\sharpf & \emph{signal error condition.} \\ \hline +\ex{'create} & Create new proc object. \\ \hline +True value & \sharpf \\ \hline +\end{tabular} +\end{center} \end{defundesc} -When a child process dies, its parent can call the \ex{wait} procedure -to recover the exit status of the child. -The exit status is a small integer that can be encodes information +Sometime after a child process terminates, scsh will perform a \ex{wait} +system call on the child in background, caching the process' exit status +in the child's proc object. +This is called ``reaping'' the process. +Once the child has been waited, the Unix kernel can free the storage allocated +for the dead process' exit information, so process reaping prevents the process +table from becoming cluttered with un-waited dead child processes +(a.k.a. ``zombies''). +This can be especially severe if the scsh process never waits on child +processes at all; if the process table overflows with forgotten zombies, +the OS may be unable to fork further processes. + +Reaping a child process moves its exit status information from the kernel +into the scsh process, where it is cached inside the child's process object. +If the scsh user drops all pointers to the process object, it will simply be +garbage collected. +On the other hand, if the scsh program retains a pointer to the process object, +it can use scsh's \ex{wait} system call to synchronise with the child and +retrieve its exit status multiple times (this is not possible with simple +Unix integer pids in C---the programmer can only wait on a pid once). + +Thus, process objects allow scsh programmer to do two things not allowed +in other programming environments: +\begin{itemize} +\item Subprocesses that are never waited on are still removed from the + process table, and their associated exit status data is eventually + automatically garbage collected. +\item Subprocesses can be waited on multiple times. +\end{itemize} + +However, note that once a child has exited, if the scsh programmer +drops all pointers to the child's proc object, the child's exit status +will be reaped and thrown away. +This is the intended behaviour, and it means that integer pids are not +enough to cause a process's exit status to be retained by the scsh runtime. +(This is because it is clearly impossible to GC data referenced by integers.) + +As a convenience for interactive use and debugging, all procedures that +take process objects will also accept integer Unix pids as arguments, +coercing them to the corresponding process objects. +Since integer process ids are not reliable ways to keep a child's exit +status from being reaped and garbage collected, programmers are encouraged +to use process objects in production code. + +\begin{defundesc}{autoreap-policy}{[policy]}{old-policy} +The scsh programmer can choose different policies for automatic +process reaping. +The policy is determined by applying this procedure to one of the +values \ex{'early}, \ex{'late}, or {\sharpf} (\ie, no autoreap). +\begin{description} +\item [early] + The child is reaped from the {\Unix} kernel's process table + into scsh as soon as possible after it dies. In the current + release of scsh, this happens at the next call to + \ex{wait}---when scsh is asked to wait for a particular + child to exit, it will reap \emph{all} outstanding zombies. + When signal handlers are added to a future release of scsh, + early autoreaping will use the \ex{SIGCHLD} signal to reap + zombies with minimum delay. + +\item [late] + The child is not autoreaped until it dies \emph{and} the scsh program + drops all pointers to its process object. That is, the process + table is cleaned out during garbage collection. + +\item [\sharpf] + If autoreaping is turned off, process reaping is completely under + control of the programmer, who can force outstanding zombies to + be reaped by manually calling the \ex{reap-zombies} procedure + (see below). +\end{description} +Note that under any of the autoreap policies, a particular process $p$ can +be manually reaped into scsh by simply calling \ex{(wait $p$)}. +\emph{All} zombies can be manually reaped with \ex{reap-zombies}. + +The \ex{autoreap-policy} procedure returns the policy's previous value. +Calling \ex{autoreap-policy} with no arguments returns the current +policy without no change. +\end{defundesc} + + +\begin{defundesc}{reap-zombies}{}{\boolean} +This procedure reaps all outstanding exited child processes into scsh. +It returns true if there are no more child processes to wait on, and +false if there are outstanding processes still running or suspended. +\end{defundesc} + +\subsubsection{Issues with process reaping} +Reaping a process does not reveal its process group at the time of +death; this information is lost when the process reaped. +This means that a dead, reaped process is \emph{not eligible} as a return +value for a future \ex{wait-process-group} call. +This is not likely to be a problem for most code, as programs almost +never wait on exited processes by process group. +Process group waiting is usually applied to \emph{stopped} processes, +which are never reaped. +So it is unlikely that this will be a problem for most programs. + +%%% Actually, this is *not* a problem if you stick with proc objects, instead +%%% of using pids, so I commented it out. +% +%\paragraph{Pid aliasing} +%Second, once a process has been reaped, its 16-bit process id becomes +%available to Unix for re-use. +%So it is conceivable that a long time in the future, a \ex{fork} operation +%could produce a subprocess with the identical pid, causing \ex{wait} +%operations on the old, dead, reaped child, and the new child to become +%confused. +%This kind of pid aliasing is intrinsic to the nature of Unix's single-use pid +%deallocation policy, +%but is very, very unlikely to happen in practice, +%given the 16-bit size of the pid space. +%Scsh will detect occurences of pid aliasing, +%in the unlikely event that one occurs. +%When \ex{fork} creates a proc object, it checks to see if the scsh heap +%contains an already existing proc object with the same pid as the newly forked +%process. +%If so, an exception is raised; if not handled by the program, this will stop +%the program, either killing the process or invoking an interactive debugger. + +Automatic process reaping is a useful programming convenience. +However, if a program is careful to wait for all children, and does not wish +automatic reaping to happen, the programmer can simply turn process +autoreaping off. + +Programs that do not wish to use automatic process reaping should be +aware that some scsh routines create subprocesses but do not return +the child's pid: \ex{run/port*}, and its related procedures and +special forms (\ex{run/strings}, \emph{et al.}). +Automatic process reaping will clean the child processes created by +these procedures out of the kernel's process table. +If a program doesn't use process reaping, it should either avoid these +forms, or use \ex{wait-any} to wait for the children to exit. + +\subsection{Process waiting} + +\defun {wait} {proc/pid [flags]} {status} +\begin{desc} + This procedure waits until a child process exits, and returns its + exit code. The \var{proc/pid} argument is either a process object + (section \ref{sec:proc-objects}) or an integer process id. + \ex{Wait} returns the child's exit status code (or suspension code, + if the \ex{wait/stopped-children} option is used, see below). + Status values can be queried with the procedures in section + \ref{sec:wait-codes}. + + The \var{flags} argument is an integer whose bits specify + additional options. It is composed by or'ing together the following + flags: + \begin{center} + \begin{tabular}{|l|l|} + \hline + Flag & Meaning \\ \hline \hline + \ex{wait/poll} & Return {\sharpf} immediately if + child still active. \\ \hline + \ex{wait/stopped-children} & Wait for suspend as well as exit. \\ \hline + \end{tabular} + \end{center} +\end{desc} + +\begin{defundesc} {wait-any} {[flags]} {[proc status]} + The optional \var{flags} argument is as for \ex{wait}. + This procedure waits for any child process to exit (or stop, if the + \ex{wait/stopped-children} flag is used) + It returns the process' process object and status code. + If there are no children left for which to wait, the two values + \ex{[{\sharpf} {\sharpt}]} are returned. + If the \ex{wait/poll} flag is used, and none of the children + are immediately eligble for waiting, + then the values \ex{[{\sharpf} {\sharpf}]} are returned: + \begin{center} + \begin{tabular}{|l|l|} + \hline + [{\sharpf} {\sharpf}] & Poll, none ready \\ \hline + [{\sharpf} {\sharpt}] & No children \\ \hline + \end{tabular} + \end{center} + + \ex{Wait-any} will not return a process that has been previously waited + by any other process-wait procedure (\ex{wait}, \ex{wait-any}, + and \ex{wait-process-group}). + It will return reaped processes that haven't yet been waited. + + The use of \ex{wait-any} is deprecated. +\end{defundesc} + +\begin{defundesc} {wait-process-group} {proc/pid [flags]} {[proc status]} + This procedure waits for any child whose process group is \var{proc/pid} + (either a process object or a pid). + The \var{flags} argument is as for \ex{wait}. + + Note that if the programmer wishes to wait for exited processes + by process group, the program should take care not to use process + reaping (section \ref{sec:proc-objects}), as this loses + process group information. However, most process-group waiting is + for stopped processes (to implement job control), so this is rarely + an issue, as stopped processes are not subject to reaping. +\end{defundesc} + + +\subsection{Analysing process status codes} +\label{sec:wait-codes} +When a child process dies (or is suspended), its parent can call the \ex{wait} +procedure to recover the exit (or suspension) status of the child. +The exit status is a small integer that encodes information describing how the child terminated. -The bit-level format of the exit status is not defined by {\Posix} -(you must use the following three functions to decode one). +The bit-level format of the exit status is not defined by {\Posix}; +you must use the following three functions to decode one. However, if a child terminates normally with exit code 0, {\Posix} does require \ex{wait} to return an exit status that is exactly zero. So \ex{(zero? \var{status})} is a correct way to test for non-error, -normal termination. +normal termination, \eg, +\begin{code} +(if (zero? (run (rcp scsh.tar.gz lambda.csd.hku.hk:))) + (delete-file "scsh.tar.gz"))\end{code} \defun {status:exit-val}{status}{{\integer} or \sharpf} \defunx{status:stop-sig}{status}{{\integer} or \sharpf} @@ -1487,19 +2029,6 @@ returns the signal that terminated the child. Otherwise, this function returns false. \end{desc} -\begin{defundesc} {call-terminally} {thunk} \noreturn - \ex{call-terminally} calls its thunk. When the thunk returns, the process - exits. Although \ex{call-terminally} could be implemented as - \codex{(\l{thunk} (thunk) (exit 0))} - an implementation can take advantage of the fact that this procedure never - returns. For example, the runtime can start with a fresh stack and also - start with a fresh dynamic environment, where shadowed bindings are - discarded. This can allow the old stack and dynamic environment to be - collected (assuming this data is not reachable through some live - continuation). -\end{defundesc} - - %% Dereleased until we have a more portable implementation. %\defun{halts?}{proc}\boolean @@ -1550,14 +2079,15 @@ The special form \ex{with-cwd} is simply syntactic sugar for \ex{with-cwd*}. \defun {pid}{} \fixnum \defunx {parent-pid}{} \fixnum -\defunx {process-group} {[pid]} \fixnum -\defunx {set-process-group} {[pid] pgrp} \undefined % [not implemented] +\defunx {process-group} {} \fixnum +\defunx {set-process-group} {[proc] pgrp} \undefined % [not implemented] \begin{desc} \ex{(pid)} and \ex{(parent-pid)} retrieve the process id for the current process and its parent. -If the OS supports process groups, a process' process group can be -retrieved and set with \ex{process-group} and \ex{set-process-group}. -The affected process for these two procedures defaults to the current +\ex{(process-group)} returns the process group of the current process. +A process' process-group can be set with \ex{set-process-group}; +the value \var{pid} specifies the affected process. It may be either +a process object or an integer process id, and defaults to the current process. \end{desc} @@ -1589,19 +2119,29 @@ The \ex{set-uid} and \ex{set-gid} routines correspond to the {\Posix} \begin{desc} Returns four values: \begin{tightinset} -\begin{tabular}{l} +\begin{flushleft} user CPU time in clock-ticks \\ system CPU time in clock-ticks \\ user CPU time of all descendant processes \\ system CPU time of all descendant processes -\end{tabular} +\end{flushleft} \end{tightinset} +Note that CPU time clock resolution is not the same as +the real-time clock resolution provided by \ex{time+ticks}. +That's Unix. +\end{desc} + +\defun{cpu-ticks/sec}{} {integer} +\begin{desc} +Returns the resolution of the CPU timer in clock ticks per second. +This can be used to convert the times reported by \ex{process-times} +to seconds. \end{desc} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{User and group db access} -These procedures are used to access the user and group database +\section{User and group database access} +These procedures are used to access the user and group databases (\eg, the ones traditionally stored in \ex{/etc/passwd} and \ex{/etc/group}.) \defun {user-info} {uid/name} {record} @@ -1630,7 +2170,7 @@ form. \defun {group-info} {gid/name} {record} \begin{desc} Return a \ex{group-info} record giving the recorded information for a -particular user: +particular group: \index{group-info} \index{group-info:name} \index{group-info:gid} @@ -1638,7 +2178,7 @@ particular user: \begin{code} (define-record group-info name gid members)\end{code} -The \var{gid/name} argument is either an integer gid or a string user-name. +The \var{gid/name} argument is either an integer gid or a string group-name. \end{desc} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1650,7 +2190,7 @@ The \var{gid/name} argument is either an integer gid or a string user-name. The list of strings \ex{command-line-arguments} contains the arguments passed to the scsh process on the command line. Calling \ex{(command-line)} returns the complete \ex{argv} -string list, including the program. So if we run a shell script +string list, including the program. So if we run a scsh program \codex{/usr/shivers/bin/myls -CF src} then \ex{command-line-arguments} is \codex{("-CF" "src")} @@ -1698,20 +2238,10 @@ Example: -title ,title -e ,@command-line-arguments))))\end{code} % -A subtlety: there are two ways to invoke a scsh program. -One is as a simple binary, -the other is as an interpreted script via the {\Unix} -\ex{\#!} \ex{exec(2)} feature. -When a binary is running with scsh code, \ex{(command-line)} returns exactly -the command line. -However, when the scsh interpreter is invoked with a scsh script -specified on the command line, then the scsh startup code doctors the list -returned by \ex{(command-line)} to make the shell script itself be the program -(\ie, \ex{(argv 0)}), instead of the string \ex{"scsh"}, -or whatever the real \ex{(argv 0)} value is. -In addition, scsh will delete scsh-specific flags from the argument -list. -So if we have a shell script in file \ex{fullecho}: +A subtlety: when the scsh interpreter is used to execute a scsh program, +the program name reported in the head of the \ex{(command-line)} list +is the scsh program, {\em not} the interpreter. +For example, if we have a shell script in file \ex{fullecho}: \begin{code} #!/usr/local/bin/scsh -s !# @@ -1724,46 +2254,40 @@ the program will print out not \codex{/usr/local/bin/scsh -s fullecho hello world} -This argument line processing ensures that if a scsh script is subsequently -compiled into a standalone executable, that its semantics will be +This argument line processing ensures that if a scsh program is subsequently +compiled into a standalone executable or byte-compiled to a heap-image +executable by the {\scm} virtual machine, its semantics will be unchanged---the arglist processing is invariant. In effect, the \codex{/usr/local/bin/scsh -s} is not part of the program; it's a specification for the machine to execute the program on, so it is not properly part of the program's argument list. -\remark{The truth: - The above discussion assumes some things that don't exist: - \begin{itemize} - \item An implementation of scsh that allows scsh scripts to - be compiled to native code binaries. - \item A native code binary implementation of the scsh interpreter. - \end{itemize} - What there is right now is just the {\scm} virtual machine, - invoked with a scsh heap image. - } \end{desc} \section{System parameters} -\defun {maximum-fds}{}\fixnum -\defunx {page-size}{} \fixnum -\defunx {system-name}{} \str +%\defun {maximum-fds}{}\fixnum +%\defunx {page-size}{} \fixnum +\defun {system-name}{} \str \begin{desc} -Only \ex{system-name} is implemented. +Returns the name of the host on which we are executing. +This may be a local name, such as ``solar,'' as opposed to a +fully-qualified domain name such as ``solar.csie.ntu.edu.tw.'' \end{desc} - \section{Signal system} Signal numbers are bound to the variables \ex{signal/hup}, \ex{signal/int}, \ldots -\defun {signal-process} {pid sig} \undefined -\defunx {signal-procgroup} {prgrp sig} \undefined +\defun {signal-process} {proc sig} \undefined +\defunx {signal-process-group} {prgrp sig} \undefined \begin{desc} These two procedures send signals to a specific process, and all the processes in a specific process group, respectively. +The \var{proc} and \var{prgrp} arguments are either processes +or integer process ids. \end{desc} I haven't done signal handlers yet. Should be straightforward: a mechanism @@ -1781,14 +2305,14 @@ interval (\eg, a microsecond timer), as this is not in {\Posix}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Time} -This time package, does not currently work with NeXTSTEP, as NeXTSTEP -does not provide a Posix-compliant time interface that will successfully -link. +\label{sec:time} +This time package does not currently work with NeXTSTEP, as NeXTSTEP +does not provide a {\Posix}-compliant time library that will even link. Scsh's time system is fairly sophisticated, particularly with respect to its careful treatment of time zones. However, casual users shouldn't be intimidated; -most of the complexity is optional, +all of the complexity is optional, and defaulting all the optional arguments reduces the system to a simple interface. @@ -1797,9 +2321,9 @@ to a simple interface. official name for what is colloquially referred to as ``Greenwich Mean Time.'' -Posix allows a single time zone to specify \emph{two} different offsets from -UTC: one standard one, and one for ``summer time.'' Summer time is frequently -some sort of daylight savings time. +{\Posix} allows a single time zone to specify \emph{two} different offsets +from UTC: one standard one, and one for ``summer time.'' +Summer time is frequently some sort of daylight savings time. The scsh time package consistently uses this terminology: we never say ``gmt'' or ``dst;'' we always say ``utc'' and ``summer time.'' @@ -1809,7 +2333,10 @@ We have two types: \emph{time} and \emph{date}. \index{time} A \emph{time} specifies an instant in the history of the universe. -It is location and time-zone independent. A time is a real value +It is location and time-zone independent.\footnote{Physics pedants please note: + The scsh authors live in a Newtonian universe. We disclaim responsibility + for calculations performed in non-ANSI standard light-cones.} +A time is a real value giving the number of elapsed seconds since the Unix ``epoch'' (Midnight, January 1, 1970 UTC). Time values provide arbitrary time resolution, @@ -1845,7 +2372,7 @@ instant in time. If the \ex{tz-name} field is given, it is a time-zone string such as \ex{"EST"} or \ex{"HKT"} understood by the OS. -Since Posix time-zone strings can specify dual standard/summer time-zones +Since {\Posix} time-zone strings can specify dual standard/summer time-zones (e.g., "EST5EDT" specifies U.S. Eastern Standard/Eastern Daylight Time), the value of the \ex{summer?} field is used to resolve the amiguous boundary cases. For example, on the morning of the Fall daylight savings @@ -1889,7 +2416,7 @@ This is described for each procedure below. Integer & Seconds of offset from UTC. For example, New York City is -18000 (-5 hours), San Francisco is -28800 (-8 hours). \\ -String & A Posix time zone string understood by the OS +String & A {\Posix} time zone string understood by the OS (\ie., the sort of time zone assigned to the \ex{\$TZ} environment variable). \end{tabular} @@ -1905,7 +2432,7 @@ String & A Posix time zone string understood by the OS \defunx{ticks/sec} {} \real \begin{desc} The current time, with sub-second resolution. - Sub-second resolution is not provided by Posix, + Sub-second resolution is not provided by {\Posix}, but is available on many systems. The time is returned as elapsed seconds since the Unix epoch, plus a number of sub-second ``ticks.'' @@ -1954,7 +2481,7 @@ String & A Posix time zone string understood by the OS and is as described in the time-zone section. If the \var{tz} argument is an integer, the date's \ex{tz-name} - field is a Posix time zone of the form + field is a {\Posix} time zone of the form ``\ex{UTC+\emph{hh}:\emph{mm}:\emph{ss}}''; the trailing \ex{:\emph{mm}:\emph{ss}} portion is deleted if it is zeroes. \end{desc} @@ -2082,7 +2609,8 @@ true & Resolve an ambiguous time in favor of summer time. % It is ignored if the time zone doesn't have a summer variant. %\end{desc} -\defun {fill-in-date!}{date}{date} +\dfni {fill-in-date!}{date}{date}{procedure} + {fill-in-date"!@\texttt{fill-in-date"!}} \begin{desc} This procedure fills in missing, redundant slots in a date record. In decreasing order of priority: @@ -2310,3 +2838,6 @@ It computes \ex{(user-login-name)} from the system call \ex{(user-uid)}. stores the value in the global variable \ex{exec-path-list}. This list is used for \ex{exec-path} and \ex{exec-path/env} searches. \end{desc} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\input{tty} diff --git a/doc/scsh-manual/test.tex b/doc/scsh-manual/test.tex new file mode 100644 index 0000000..e146a95 --- /dev/null +++ b/doc/scsh-manual/test.tex @@ -0,0 +1,17 @@ +%&latex -*- latex -*- + +\documentclass[twoside]{report} +\usepackage{code,boxedminipage,draftfooters,palatino,ct,makeidx, + headings,mantitle,array,matter,mysize10} + +\parskip = 3pt plus 3pt +\sloppy + +\input{decls} +%%% End preamble + +\begin{document} + +\include{args} + +\end{document} diff --git a/doc/scsh-manual/todo.tex b/doc/scsh-manual/todo.tex index b61e002..b9818c6 100644 --- a/doc/scsh-manual/todo.tex +++ b/doc/scsh-manual/todo.tex @@ -1,14 +1,31 @@ -%&latex -*- latex -*- +>%&latex -*- latex -*- \chapter{Todo} {\parindent 0pt -The {\LaTeX} hackery needs yet another serious pass. Most importantly, -long procedure ``declarations'' need to be broken across two lines. +We'd love to have have people implement these subsystems and +fold them into the scsh release: +\begin{itemize} +\item More network protocols. Telnet and ftp would be the most important. +\item An ILU interface. +\item An RPC system, with ``tail-recursion.'' +\item Interfaces to relational db's. This would be quite useful for + Web servers. +\end{itemize} -Fix up 0-or-more and 1-or-more parameter typesetting, with subscripts. +Manual hacking: +\begin{itemize} +\item The {\LaTeX} hackery needs yet another serious pass. Most importantly, + long procedure ``declarations'' need to be broken across two lines. -Parameter subscripts need to be made real subscripts. +\item Fix up 0-or-more and 1-or-more parameter typesetting, with subscripts. + +\item Parameter subscripts need to be made real subscripts. + +\item Soup up the markup processor, and redo manual in markup. Generate + LaTeX, HTML, and info versions. Alternatively, persuade some kind + soul to hand-port manual to HTML or info. +\end{itemize} Job control, after \ex{jcontrol.scm} @@ -21,11 +38,7 @@ Interrupt system. Make it all coexist with S48 threads as well as can be done for Unix. The DEC SRC tech report gives a good discussion of the issues. -Support for file locking: \ex{(lock-file fd op)}, \ex{with-file-locked}, \ldots - -Testing broken symlinks -- new value for \var{chase?} flag? - -Interactive flag machinery +Testing broken symlinks---new value for \var{chase?} flag? Rename and release \ex{ensure-file-name-is-\{non,\}directory}. @@ -35,18 +48,7 @@ Other things should be available: hash tables, sort, list utils, pattern matchers. But things start to overload. The module system is the appropriate way to use these. -Support for writing scripts that use the module language. - -Need calls to control port i/o buffering. - Need to do file-control (\ie, \ex{fcntl()}). \ex{fcntl} is ugly. Better to have a procedure for each different operation. - -Tty stuff and control tty. - -More documentation for the \ex{wait()} machinery. - -We need a general time/date parser, that can convert strings like -``Thursday after Christmas'' into date records. } diff --git a/doc/scsh-manual/tty.tex b/doc/scsh-manual/tty.tex new file mode 100644 index 0000000..191fa3c --- /dev/null +++ b/doc/scsh-manual/tty.tex @@ -0,0 +1,707 @@ +%&latex -*- latex -*- +% Fix OXTABS footnote bug +% Figures should be dumped out earlier? Pack two to a page? + +\section{Terminal device control} +\label{sect:tty} + +\newcommand{\fr}[1]{\makebox[0pt][r]{#1}} + +% \ex{#1} and also generates an index entry. +\newcommand{\exi}[1]{\index{#1@\texttt{#1}}\ex{#1}} +\newcommand{\indextt}[1]{\index{#1@\texttt{#1}}} + +Scsh provides a complete set of routines for manipulating terminal +devices---putting them in ``raw'' mode, changing and querying their +special characters, modifying their i/o speeds, and so forth. +The scsh interface is designed both for generality and portability +across different Unix platforms, so you don't have to rewrite your +program each time you move to a new system. +We've also made an effort to use reasonable, Scheme-like names for +the multitudinous named constants involved, so when you are reading +code, you'll have less likelihood of getting lost in a bewildering +maze of obfuscatory constants named \ex{ICRNL}, \ex{INPCK}, \ex{IUCLC}, +and \ex{ONOCR}. + +This section can only lay out the basic functionality of the terminal +device interface. +For further details, see the termios(3) man page on your system, +or consult one of the standard {\Unix} texts. + +\subsection{Portability across OS variants} +Terminal-control software is inescapably complex, ugly, and low-level. +Unix variants each provide their own way of controlling terminal +devices, making it difficult to provide interfaces that are +portable across different Unix systems. +Scsh's terminal support is based primarily upon the {\Posix} termios +interface. +Programs that can be written using only the {\Posix} interface are likely +to be widely portable. + +The bulk of the documentation that follows consists of several pages worth +of tables defining different named constants that enable and disable different +features of the terminal driver. +Some of these flags are {\Posix}; others are taken from the two common +branches of Unix development, SVR4 and 4.3+ Berkeley. +Scsh guarantees that the non-{\Posix} constants will be bound identifiers. +\begin{itemize} +\item If your OS supports a particular non-{\Posix} flag, + its named constant will be bound to the flag's value. +\item If your OS doesn't support the flag, its named constant + will be present, but bound to \sharpf. +\end{itemize} +This means that if you want to use SVR4 or Berkeley features in a program, +your program can portably test the values of the flags before using +them---the flags can reliably be referenced without producing OS-dependent +``unbound variable'' errors. + +Finally, note that although {\Posix}, SVR4, and Berkeley cover the lion's +share of the terminal-driver functionality, +each operating system inevitably has non-standard extensions. +While a particular scsh implementation may provide these extensions, +they are not portable, and so are not documented here. + +\subsection{The tty-info record type} + +The primary data-structure that describes a terminal's mode is +a \ex{tty-info} record, defined as follows: +\index{tty-info record type} +\indextt{tty-info:control-chars} +\indextt{tty-info:input-flags} +\indextt{tty-info:output-flags} +\indextt{tty-info:control-flags} +\indextt{tty-info:local-flags} +\indextt{tty-info:input-speed} +\indextt{tty-info:output-speed} +\indextt{tty-info:min} +\indextt{tty-info:time} +\indextt{tty-info?} +\begin{code} +(define-record tty-info + control-chars ; String: Magic input chars + input-flags ; Int: Input processing + output-flags ; Int: Output processing + control-flags ; Int: Serial-line control + local-flags ; Int: Line-editting UI + input-speed ; Int: Code for input speed + output-speed ; Int: Code for output speed + min ; Int: Raw-mode input policy + time) ; Int: Raw-mode input policy\end{code} + +\subsubsection{The control-characters string} +The \ex{control-chars} field is a character string; +its characters may be indexed by integer values taken from +table~\ref{table:ttychars}. + +As discussed above, +only the {\Posix} entries in table~\ref{table:ttychars} are guaranteed +to be legal, integer indices. +A program can reliably test the OS to see if the non-{\Posix} +characters are supported by checking the index constants. +If the control-character function is supported by the terminal driver, +then the corresponding index will be bound to an integer; +if it is not supported, the index will be bound to \sharpf. + +To disable a given control-character function, set its corresponding +entry in the \ex{tty-info:control-chars} string to the +special character \exi{disable-tty-char} +(and then use the \ex{(set-tty-info \var{fd/port} \var{info})} procedure +to update the terminal's state). + +\subsubsection{The flag fields} +The \ex{tty-info} record's \ex{input-flags}, \ex{output-flags}, +\ex{control-flags}, and \ex{local-flags} fields are all bit sets +represented as two's-complement integers. +Their values are composed by or'ing together values taken from +the named constants listed in tables~\ref{table:ttyin} +through \ref{table:ttylocal}. + +As discussed above, +only the {\Posix} entries listed in these tables are guaranteed +to be legal, integer flag values. +A program can reliably test the OS to see if the non-{\Posix} +flags are supported by checking the named constants. +If the feature is supported by the terminal driver, +then the corresponding flag will be bound to an integer; +if it is not supported, the flag will be bound to \sharpf. + +%%%%% I managed to squeeze this into the DEFINE-RECORD's comments. +% Here is a small table classifying the four flag fields by +% the kind of features they determine: +% \begin{center} +% \begin{tabular}{|ll|}\hline +% Field & Affects \\ \hline \hline +% \ex{input-flags} & Processing of input chars \\ +% \ex{output-flags} & Processing of output chars \\ +% \ex{control-flags} & Controlling of terminal's serial line \\ +% \ex{local-flags} & Details of the line-editting user interface \\ +% \hline +% \end{tabular} +% \end{center} + +%%% +%%% The figures used to go here. +%%% + +\subsubsection{The speed fields} +The \ex{input-speed} and \ex{output-speed} fields determine the +I/O rate of the terminal's line. +The value of these fields is an integer giving the speed +in bits-per-second. +The following speeds are supported by {\Posix}: +\begin{center} +\begin{tabular}{rrrr} +0 & 134 & 600 & 4800 \\ +50 & 150 & 1200 & 9600 \\ +75 & 200 & 1800 & 19200 \\ +110 & 300 & 2400 & 38400 \\ +\end{tabular} +\end{center} +Your OS may accept others; it may also allow the special symbols +\ex{'exta} and \ex{'extb}. + +\subsubsection{The min and time fields} +The integer \ex{min} and \ex{time} fields determine input blocking +behaviour during non-canonical (raw) input; otherwise, they are ignored. +See the termios(3) man page for further details. + +Be warned that {\Posix} allows the base system call's representation +of the \ex{tty-info} record to share storage for the \ex{min} field +and the \ex{ttychar/eof} element of the control-characters string, +and for the \ex{time} field and the \ex{ttychar/eol} element +of the control-characters string. +Many implementations in fact do this. + +To stay out of trouble, set the \ex{min} and \ex{time} fields only +if you are putting the terminal into raw mode; +set the eof and eol control-characters only if you are putting +the terminal into canonical mode. +It's ugly, but it's {\Unix}. + +\subsection{Using tty-info records} + +\defun{make-tty-info}{if of cf lf ispeed ospeed min time} + {tty-info-record} +\defunx{copy-tty-info}{tty-info-record}{tty-info-record} +\begin{desc} +These procedures make it possible to create new \ex{tty-info} records. +The typical method for creating a new record is to copy one retrieved +by a call to the \ex{tty-info} procedure, then modify the copy as desired. +Note that the \ex{make-tty-info} procedure does not take a parameter +to define the new record's control characters.\footnote{ + Why? Because the length of the string varies from Unix to Unix. + For example, the word-erase control character (typically control-w) + is provided by most Unixes, but not part of the {\Posix} spec.} +Instead, it simply returns a \ex{tty-info} record whose control-character +string has all elements initialised to {\Ascii} nul. +You may then install the special characters by assigning to the string. +Similarly, the control-character string in the record produced by +\ex{copy-tty-info} does not share structure with the string in the record +being copied, so you may mutate it freely. +\end{desc} + + +\defun{tty-info}{fd/port}{tty-info-record} +\begin{desc} +The \var{fd/port} parameter is an integer file descriptor or Scheme I/O port +opened on a terminal device. +This procedure returns a \ex{tty-info} record describing the terminal's +current mode. +\end{desc} + +\defun {set-tty-info/now} {fd/port info}{no-value} +\defunx{set-tty-info/drain}{fd/port info}{no-value} +\defunx{set-tty-info/flush}{fd/port info}{no-value} +\begin{desc} +The \var{fd/port} parameter is an integer file descriptor or Scheme I/O port +opened on a terminal device. +The procedure chosen determines when and how the terminal's mode is altered: +\begin{center} +\begin{tabular}{|ll|} \hline +Procedure & Meaning \\ \hline \hline +\ex{set-tty-info/now} & Make change immediately. \\ +\ex{set-tty-info/drain} & Drain output, then change. \\ +\ex{set-tty-info/flush} & Drain output, flush input, then change. \\ \hline +\end{tabular} +\end{center} +\end{desc} + +\subsection{Other terminal-device procedures} +\defun{send-tty-break}{fd/port [duration]}{no-value} +\begin{desc} +Send a break signal on the terminal associated with file descriptor +or Scheme I/O port \var{fd/port}. +A break signal is a sequence of continuous zeros on the terminal's transmission +line. + +The \var{duration} argument determines the length of the break signal. +A zero value (the default) causes a break of between +0.25 and 0.5 seconds to be sent; +other values determine a period in a manner that will depend upon local +community standards. +\end{desc} + +\defun{drain-tty}{fd/port}{no-value} +\begin{desc} +This procedure waits until all the output written to the +terminal device has been transmitted to the device. +If \var{fd/port} is an output port with buffered I/O +enabled, then the port's buffered characters are flushed before +waiting for the device to drain. +\end{desc} + +\defun {flush-tty/input} {fd/port}{no-value} +\defunx{flush-tty/output}{fd/port}{no-value} +\defunx{flush-tty/both} {fd/port}{no-value} +\begin{desc} +These procedures discard the unread input chars or unwritten +output chars in the tty's kernel buffers. +\end{desc} + +\defun {start-tty-output}{fd/port} {no-value} +\defunx{stop-tty-output} {fd/port} {no-value} +\defunx{start-tty-input} {fd/port} {no-value} +\defunx{stop-tty-input} {fd/port} {no-value} +\begin{desc} +These procedures can be used to control a terminal's input and output flow. +The \ex{stop-tty-output} and \ex{start-tty-output} procedures suspend +and resume output from a terminal device. +The \ex{stop-tty-input} and \ex{start-tty-input} procedures transmit +the special STOP and START characters to the terminal with the intention +of stopping and starting terminal input flow. +\end{desc} + +% \defun {encode-baud-rate}{speed}{code} +% \defunx{decode-baud-rate}{code}{speed} +% \begin{desc} +% These procedures can be used to map between the special codes +% that are legal values for the \ex{tty-info:input-speed} and +% \ex{tty-info:output-speed} fields, and actual integer bits-per-second speeds. +% The codes are the values bound to the +% \ex{baud/4800}, \ex{baud/9600}, and other named constants defined above. +% For example: +% \begin{code} +% (decode-baud-rate baud/9600) {\evalto} 9600 +% +% ;;; These two expressions are identical: +% (set-tty-info:input-speed ti baud/14400) +% (set-tty-info:input-speed ti (encode-baud-rate 14400))\end{code} +% \end{desc} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Control terminals, sessions, and terminal process groups} + +\defun{open-control-tty}{tty-name [flags]}{port} +\begin{desc} +This procedure opens terminal device \var{tty-name} as the process' +control terminal +(see the \ex{termios} man page for more information on control terminals). +The \var{tty-name} argument is a file-name such as \ex{/dev/ttya}. +The \var{flags} argument is a value suitable as the second argument +to the \ex{open-file} call; it defaults to \ex{open/read+write}, causing +the terminal to be opened for both input and output. + +The port returned is an input port if the \var{flags} permit it, +otherwise an output port. +\R4RS/\scm/scsh do not have input/output ports, +so it's one or the other. +However, you can get both read and write ports open on a terminal +by opening it read/write, taking the result input port, +and duping it to an output port with \ex{dup->outport}. + +This procedure guarantees to make the opened terminal the +process' control terminal only if the process does not have +an assigned control terminal at the time of the call. +If the scsh process already has a control terminal, the results are undefined. + +To arrange for the process to have no control terminal prior to calling +this procedure, use the \ex{become-session-leader} procedure. + +\oops{The control terminal code was added just before release time + for scsh release 0.4. Control terminals are one of the less-standardised + elements of Unix. We can't guarantee that the terminal is definitely + attached as a control terminal; we were only able to test this out + on HP-UX. If you intend to use this feature on your OS, you should + test it out first. If your OS requires the use of the \ex{TIOCSCTTY} + \ex{ioctl}, uncomment the appropriate few lines of code in the + file \ex{tty1.c} and send us email.} +\end{desc} + +\defun{become-session-leader}{}{\integer} +\begin{desc} +This is the C \ex{setsid()} call. +{\Posix} job-control has a three-level hierarchy: +session/process-group/process. +Every session has an associated control terminal. +This procedure places the current process into a brand new session, +and disassociates the process from any previous control terminal. +You may subsequently use \ex{open-control-tty} to open a new control +terminal. + +It is an error to call this procedure if the current process is already +a process-group leader. +One way to guarantee this is not the case is only to call this procedure +after forking. +\end{desc} + + +\defun {tty-process-group}{fd/port}{\integer} +\defunx{set-tty-process-group}{fd/port pgrp}{\undefined} +\begin{desc} +This pair of procedures gets and sets the process group of a given +terminal. +\end{desc} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Pseudo-terminals} +Scsh implements an interface to Berkeley-style pseudo-terminals. + +\defun{fork-pty-session}{thunk}{[process pty-in pty-out tty-name]} +\begin{desc} +This procedure gives a convenient high-level interface to pseudo-terminals. +It first allocates a pty/tty pair of devices, and then forks a child +to execute procedure \var{thunk}. +In the child process +\begin{itemize} +\item Stdio and the current I/O ports are bound to the terminal device. +\item The child is placed in its own, new session + (see \ex{become\=session\=leader}). +\item The terminal device becomes the new session's controlling terminal + (see \ex{open-control-tty}). +\item The \ex{(error-output-port)} is unbuffered. +\end{itemize} + +The \ex{fork-pty-session} procedure returns four values: +the child's process object, two ports open on the controlling pty device, +and the name of the child's corresponding terminal device. +\end{desc} + +\defun{open-pty}{}{pty-inport tty-name} +\begin{desc} +This procedure finds a free pty/tty pair, and opens the pty device +with read/write access. +It returns a port on the pty, +and the name of the corresponding terminal device. + +The port returned is an input port---Scheme doesn't allow input/output +ports. +However, you can easily use \ex{(dup->outport \var{pty-inport})} +to produce a matching output port. +You may wish to turn off I/O buffering for this output port. +\end{desc} + + +\defun {pty-name->tty-name}{pty-name}{tty-name} +\defunx{tty-name->pty-name}{tty-name}{pty-name} +\begin{desc} +These two procedures map between corresponding terminal and pty controller +names. +For example, +\begin{code} +(pty-name->tty-name "/dev/ptyq3") {\evalto} "/dev/ttyq3" +(tty-name->pty-name "/dev/ttyrc") {\evalto} "/dev/ptyrc"\end{code} + +\remark{This is rather Berkeley-specific. SVR4 ptys are rare enough that + I've no real idea if it generalises across the Unix gap. Experts + are invited to advise. Users feel free to not worry---the predominance + of current popular Unix systems use Berkeley ptys.} +\end{desc} + +\defunx{make-pty-generator}{}{\proc} +\begin{desc} +This procedure returns a generator of candidate pty names. +Each time the returned procedure is called, it produces a +new candidate. +Software that wishes to search through the set of available ptys +can use a pty generator to iterate over them. +After producing all the possible ptys, a generator returns {\sharpf} +every time it is called. +Example: +\begin{code} +(define pg (make-pty-generator)) +(pg) {\evalto} "/dev/ptyp0" +(pg) {\evalto} "/dev/ptyp1" + \vdots +(pg) {\evalto} "/dev/ptyqe" +(pg) {\evalto} "/dev/ptyqf" \textit{(Last one)} +(pg) {\evalto} {\sharpf} +(pg) {\evalto} {\sharpf} + \vdots\end{code} +\end{desc} + + +% Flag tables +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +% Control-chars indices +%%%%%%%%%%%%%%%%%%%%%%% +\begin{table}[p] +\begin{center} +\begin{tabular}{|lll|} \hline +Scsh & C & Typical char \\ +\hline\hline +{\Posix} & & \\ +\exi{ttychar/delete-char} & \ex{ERASE} & del \\ +\exi{ttychar/delete-line} & \ex{KILL} & \verb|^U| \\ +\exi{ttychar/eof} & \ex{EOF} & \verb|^D| \\ +\exi{ttychar/eol} & \ex{EOL} & \\ +\exi{ttychar/interrupt} & \ex{INTR} & \verb|^C| \\ +\exi{ttychar/quit} & \ex{QUIT} & \verb|^\| \\ +\exi{ttychar/suspend} & \ex{SUSP} & \verb|^Z| \\ +\exi{ttychar/start} & \ex{START} & \verb|^Q| \\ +\exi{ttychar/stop} & \ex{STOP} & \verb|^S| \\ + +\hline\hline +{SVR4 and 4.3+BSD} & & \\ +\exi{ttychar/delayed-suspend} & \ex{DSUSP} & \verb|^Y| \\ +\exi{ttychar/delete-word} & \ex{WERASE} & \verb|^W| \\ +\exi{ttychar/discard} & \ex{DISCARD} & \verb|^O| \\ +\exi{ttychar/eol2} & \ex{EOL2} & \\ +\exi{ttychar/literal-next} & \ex{LNEXT} & \verb|^V| \\ +\exi{ttychar/reprint} & \ex{REPRINT} & \verb|^R| \\ + +\hline\hline +{4.3+BSD} & & \\ +\exi{ttychar/status} & \ex{STATUS} & \verb|^T| \\ +\hline +\end{tabular} +\end{center} +\caption{Indices into the \protect\ex{tty-info} record's + \protect\var{control-chars} string, + and the character traditionally found at each index. + Only the indices for the {\Posix} entries are guaranteed to + be non-\sharpf.} +\label{table:ttychars} +\end{table} + +% Input flags +%%%%%%%%%%%%% +\begin{table}[p] +\begin{center}\small +\begin{tabular}{|lll|} \hline +Scsh & C & Meaning \\ +\hline\hline +\Posix & & \\ +\exi{ttyin/check-parity} + & \ex{INPCK} & Check parity. \\ +\exi{ttyin/ignore-bad-parity-chars} + & \ex{IGNPAR} & Ignore chars with parity errors. \\ +\exi{ttyin/mark-parity-errors} + & \ex{PARMRK} & Insert chars to mark parity errors.\\ +\exi{ttyin/ignore-break} + & \ex{IGNBRK} & Ignore breaks. \\ +\exi{ttyin/interrupt-on-break} + & \ex{BRKINT} & Signal on breaks. \\ +\exi{ttyin/7bits} + & \ex{ISTRIP} & Strip char to seven bits. \\ +\exi{ttyin/cr->nl} + & \ex{ICRNL} & Map carriage-return to newline. \\ +\exi{ttyin/ignore-cr} + & \ex{IGNCR} & Ignore carriage-returns. \\ +\exi{ttyin/nl->cr} + & \ex{INLCR} & Map newline to carriage-return. \\ +\exi{ttyin/input-flow-ctl} + & \ex{IXOFF} & Enable input flow control. \\ +\exi{ttyin/output-flow-ctl} + & \ex{IXON} & Enable output flow control. \\ + +\hline\hline +{SVR4 and 4.3+BSD} & & \\ +\exi{ttyin/xon-any} & \ex{IXANY} & Any char restarts after stop. \\ +\exi{ttyin/beep-on-overflow} & \ex{IMAXBEL} & Ring bell when queue full. \\ + +\hline\hline +{SVR4} & & \\ +\exi{ttyin/lowercase} & \ex{IUCLC} & Map upper case to lower case. \\ +\hline +\end{tabular} +\end{center} +\caption{Input-flags. These are the named flags for the \protect\ex{tty-info} + record's \protect\var{input-flags} field. + These flags generally control the processing of input chars. + Only the {\Posix} entries are guaranteed to be non-\sharpf. + } +\label{table:ttyin} +\end{table} + +% Output flags +%%%%%%%%%%%%%% +\begin{table}[p] +\begin{center}%\small +\begin{tabular}{|lll|} \hline +Scsh & C & Meaning \\ \hline\hline + +\multicolumn{3}{|l|}{\Posix} \\ +\exi{ttyout/enable} & \ex{OPOST} & Enable output processing. \\ + +\hline\hline +\multicolumn{3}{|l|}{SVR4 and 4.3+BSD} \\ +\exi{ttyout/nl->crnl} & \ex{ONLCR} & Map nl to cr-nl. \\ + +\hline\hline +\multicolumn{3}{|l|}{4.3+BSD} \\ +\exi{ttyout/discard-eot} & \ex{ONOEOT} & Discard EOT chars. \\ +\exi{ttyout/expand-tabs} & \ex{OXTABS}\footnote{ + Note this is distinct from the SVR4-equivalent + \ex{ttyout/tab-delayx} flag defined in + table~\ref{table:ttydelays}.} + & Expand tabs. \\ + +\hline\hline +\multicolumn{3}{|l|}{SVR4} \\ +\exi{ttyout/cr->nl} & \ex{OCRNL} & Map cr to nl. \\ +\exi{ttyout/nl-does-cr} & \ex{ONLRET}& Nl performs cr as well. \\ +\exi{ttyout/no-col0-cr} & \ex{ONOCR} & No cr output in column 0. \\ +\exi{ttyout/delay-w/fill-char} & \ex{OFILL} & Send fill char to delay. \\ +\exi{ttyout/fill-w/del} & \ex{OFDEL} & Fill char is {\Ascii} DEL. \\ +\exi{ttyout/uppercase} & \ex{OLCUC} & Map lower to upper case. \\ +\hline +\end{tabular} +\end{center} +\caption{Output-flags. These are the named flags for the \protect\ex{tty-info} + record's \protect\var{output-flags} field. + These flags generally control the processing of output chars. + Only the {\Posix} entries are guaranteed to be non-\sharpf.} +\label{table:ttyout} +\end{table} + +% Delay flags +%%%%%%%%%%%%% +\begin{table}[p] +\begin{tabular}{r|ll|} \cline{2-3} +& Value & Comment \\ \cline{2-3} +{Backspace delay} & \exi{ttyout/bs-delay} & Bit-field mask \\ + & \exi{ttyout/bs-delay0} & \\ + & \exi{ttyout/bs-delay1} & \\ + +\cline{2-3} +{Carriage-return delay} & \exi{ttyout/cr-delay} & Bit-field mask \\ + & \exi{ttyout/cr-delay0} & \\ + & \exi{ttyout/cr-delay1} & \\ + & \exi{ttyout/cr-delay2} & \\ + & \exi{ttyout/cr-delay3} & \\ + +\cline{2-3} +{Form-feed delay} & \exi{ttyout/ff-delay} & Bit-field mask \\ + & \exi{ttyout/ff-delay0} & \\ + & \exi{ttyout/ff-delay1} & \\ + +\cline{2-3} +{Horizontal-tab delay} & \exi{ttyout/tab-delay} & Bit-field mask \\ + & \exi{ttyout/tab-delay0} & \\ + & \exi{ttyout/tab-delay1} & \\ + & \exi{ttyout/tab-delay2} & \\ + & \exi{ttyout/tab-delayx} & Expand tabs \\ + +\cline{2-3} +{Newline delay} & \exi{ttyout/nl-delay} & Bit-field mask \\ + & \exi{ttyout/nl-delay0} & \\ + & \exi{ttyout/nl-delay1} & \\ + +\cline{2-3} +{Vertical tab delay} & \exi{ttyout/vtab-delay} & Bit-field mask \\ + & \exi{ttyout/vtab-delay0} & \\ + & \exi{ttyout/vtab-delay1} & \\ + +\cline{2-3} +{All} & \exi{ttyout/all-delay} & Total bit-field mask \\ +\cline{2-3} +\end{tabular} + +\caption{Delay constants. These are the named flags for the + \protect\ex{tty-info} record's \protect\var{output-flags} field. + These flags control the output delays associated with printing + special characters. + They are non-{\Posix}, and have non-{\sharpf} values + only on SVR4 systems.} +\label{table:ttydelays} +\end{table} + +% Control flags +%%%%%%%%%%%%%%% +\begin{table}[p] +\begin{center}%\small +\begin{tabular}{|lll|} \hline +Scsh & C & Meaning \\ + +\hline\hline +\multicolumn{3}{|l|}{\Posix} \\ +\exi{ttyc/char-size} & \ex{CSIZE} & Character size mask \\ +\exi{ttyc/char-size5} & \ex{CS5} & 5 bits \\ +\exi{ttyc/char-size6} & \ex{CS6} & 6 bits \\ +\exi{ttyc/char-size7} & \ex{CS7} & 7 bits \\ +\exi{ttyc/char-size8} & \ex{CS8} & 8 bits \\ +\exi{ttyc/enable-parity}& \ex{PARENB} & Generate and detect parity. \\ +\exi{ttyc/odd-parity} & \ex{PARODD} & Odd parity. \\ +\exi{ttyc/enable-read} & \ex{CREAD} & Enable reception of chars. \\ +\exi{ttyc/hup-on-close} & \ex{HUPCL} & Hang up on last close. \\ +\exi{ttyc/no-modem-sync}& \ex{LOCAL} & Ignore modem lines. \\ +\exi{ttyc/2-stop-bits} & \ex{CSTOPB} & Send two stop bits. \\ + +\hline\hline +\multicolumn{3}{|l|}{4.3+BSD} \\ +\exi{ttyc/ignore-flags} & \ex{CIGNORE} & Ignore control flags. \\ +\exi{ttyc/CTS-output-flow-ctl} & \verb|CCTS_OFLOW| & CTS flow control of output \\ +\exi{ttyc/RTS-input-flow-ctl} & \verb|CRTS_IFLOW| & RTS flow control of input \\ +\exi{ttyc/carrier-flow-ctl} & \ex{MDMBUF} & \\ +\hline +\end{tabular} +\end{center} + +\caption{Control-flags. These are the named flags for the \protect\ex{tty-info} + record's \protect\var{control-flags} field. + These flags generally control the details of the terminal's + serial line. + Only the {\Posix} entries are guaranteed to be non-\sharpf.} +\label{table:ttyctl} +\end{table} + +% Local flags +%%%%%%%%%%%%% +\begin{table}[p] +\begin{center}\small +\begin{tabular}{|lll|} \hline +Scsh & C & Meaning \\ + +\hline\hline +\multicolumn{3}{|l|}{\Posix} \\ +\exi{ttyl/canonical} & \ex{ICANON} & Canonical input processing. \\ +\exi{ttyl/echo} & \ex{ECHO} & Enable echoing. \\ +\exi{ttyl/echo-delete-line} & \ex{ECHOK} & Echo newline after line kill. \\ +\exi{ttyl/echo-nl} & \ex{ECHONL} & Echo newline even if echo is off. \\ +\exi{ttyl/visual-delete}& \ex{ECHOE} & Visually erase chars. \\ +\exi{ttyl/enable-signals} & \ex{ISIG} & Enable \verb|^|C, \verb|^|Z signalling. \\ +\exi{ttyl/extended} & \ex{IEXTEN} & Enable extensions. \\ +\exi{ttyl/no-flush-on-interrupt} + & \ex{NOFLSH} & Don't flush after interrupt. \\ +\exi{ttyl/ttou-signal} & \ex{ITOSTOP} & \ex{SIGTTOU} on background output. \\ + +\hline\hline +\multicolumn{3}{|l|}{SVR4 and 4.3+BSD} \\ +\exi{ttyl/echo-ctl} & \ex{ECHOCTL} + & Echo control chars as ``\verb|^X|''. \\ +\exi{ttyl/flush-output} & \ex{FLUSHO} & Output is being flushed. \\ +\exi{ttyl/hardcopy-delete} & \ex{ECHOPRT} & Visual erase for hardcopy. \\ +\exi{ttyl/reprint-unread-chars} & \ex{PENDIN} & Retype pending input. \\ +\exi{ttyl/visual-delete-line} & \ex{ECHOKE} & Visually erase a line-kill. \\ + +\hline\hline +\multicolumn{3}{|l|}{4.3+BSD} \\ +\exi{ttyl/alt-delete-word} & \ex{ALTWERASE} & Alternate word erase algorithm \\ +\exi{ttyl/no-kernel-status} & \ex{NOKERNINFO} & No kernel status on \verb|^T|. \\ + +\hline\hline +\multicolumn{3}{|l|}{SVR4} \\ +\exi{ttyl/case-map} & \ex{XCASE} & Canonical case presentation \\ +\hline +\end{tabular} +\end{center} + +\caption{Local-flags. These are the named flags for the \protect\ex{tty-info} + record's \protect\var{local-flags} field. + These flags generally control the details of the line-editting + user interface. + Only the {\Posix} entries are guaranteed to be non-\sharpf.} +\label{table:ttylocal} +\end{table} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/doc/scsh-manual/xman.tex b/doc/scsh-manual/xman.tex new file mode 100644 index 0000000..2b63f4f --- /dev/null +++ b/doc/scsh-manual/xman.tex @@ -0,0 +1,38 @@ +%&latex -*- latex -*- + +% This is the reference manual for the Scheme Shell. + +\documentclass[twoside]{report} +\usepackage{code,boxedminipage,draftfooters,makeidx,palatino,ct, + headings,mantitle,array,matter,mysize10,a4wide} + +% Style issues +\parskip = 3pt plus 3pt +\sloppy + +\input{decls} +\makeindex +%%% End preamble + +\begin{document} + +\frontmatter +\include{front} + +\mainmatter +\include{intro} +\include{procnotation} +\include{syscalls} +\include{network} +\include{strings} +\include{rdelim} +\include{awk} +\include{miscprocs} +\include{running} +\include{changes} +\include{todo} + +\backmatter +\printindex + +\end{document}