Created scsh-manual from 0.5 tree

This commit is contained in:
mainzelm 2001-07-13 06:59:22 +00:00
parent 8726142251
commit 0af8a890a8
32 changed files with 10787 additions and 0 deletions

40
doc/scsh-manual/Makefile Normal file
View File

@ -0,0 +1,40 @@
.SUFFIXES: .idx .ind .tex .dvi .ps $(.SUFFIXES)
TEX= front.tex intro.tex procnotation.tex syscalls.tex network.tex \
strings.tex awk.tex miscprocs.tex running.tex todo.tex
man.dvi: $(TEX) man.ind
man.ind: man.idx
.dvi.ps:
dvips -o $@ $<
.tex.dvi:
latex $<
rm $*.log
.idx.ind:
makeindex $<
clean:
-rm *.log
rm -r html
INSTALL_DATA= install -c -m 644
tar:
tar cf - *.tex sty | gzip > man.tar.gz
html:
tex2page man
install: man.ps
@echo WARNING:
@echo WARNING: this depends on /u/su/scsh/scsh
@echo WARNING: pointing to the current release
@echo WARNING:
$(INSTALL_DATA) cheat.txt /u/su/scsh/scsh/doc/
$(INSTALL_DATA) man.ps /u/su/scsh/scsh/doc/scsh-manual.ps
$(INSTALL_DATA) $(TEX) /u/su/scsh/scsh/doc/scsh-manual/
$(INSTALL_DATA) sty/* /u/su/scsh/scsh/doc/scsh-manual/sty/

35
doc/scsh-manual/THANKS Normal file
View File

@ -0,0 +1,35 @@
Michel.Schinz@studi.epfl.ch
Documentation error in STRING-OUTPUT-PORT-OUTPUT.
Reported 12/19.
Victor Zandy
character-gobbling in (record-reader) caused by 'trim / 'peek
default misunderstanding in delimited readers. Fixed 4/5/96
Michael Becker
reap-policy = early can still lose if you loop and fork.
fork now reaps & retries if it loses and the policy is early reap.
This is a kludge until I have sigchld handlers.
Fixed 4/5/96
Tod Olson
Reported painfully slow delimited-reader I/O in November.
Michel.Schinz@studi.epfl.ch
Reported some picky little typos in the manual.
Shriram
Doc bugs in defrec.scm
euler@lavielle.COM (Lutz Euler) 2/24/97
Manual bugs and a bug in stdio->stdports.
Alan Bawden 4/97
Lots of good bug reports and fixes.
Jim Blandy 4/97
Fixes for meta.scm
Kevin Esler 4/97
Updated Irix port

32
doc/scsh-manual/ack.txt Normal file
View File

@ -0,0 +1,32 @@
Acknowledgements
Who should I thank? My so-called "colleagues," who laugh at me behind my
back, all the while becoming famous on *my* work? My worthless graduate
students, whose computer skills appear to be limited to downloading bitmaps
off of netnews? My parents, who are still waiting for me to quit "fooling
around with computers," go to med school, and become a radiologist? My
department chairman, a manager who gives one new insight into and sympathy for
disgruntled postal workers?
My God, no one could blame me--no one!--if I went off the edge and just lost
it completely one day. I couldn't get through the day as it is without the
Prozac and Jack Daniels I keep on the shelf, behind my Tops-20 JSYS manuals.
I start getting the shakes real bad around 10am, right before my advisor
meetings. A 10 oz. Jack 'n Zac helps me get through the meetings without one
of my students winding up with his severed head in a bowling-ball bag. They
look at me funny; they think I twitch a lot. I'm not twitching. I'm
controlling my impulse to snag my 9mm Sig-Sauer out from my day-pack and make
a few strong points about the quality of undergraduate education in Amerika.
If I thought anyone cared, if I thought anyone would even be reading this, I'd
probably make an effort to keep up appearances until the last possible
moment. But no one does, and no one will. So I can pretty much say exactly
what I think.
Oh, yes, the *acknowledgements.* I think not. I did it. I did it all,
by myself.
Olin Shivers
Cambridge
September 4, 1994

252
doc/scsh-manual/array.sty Normal file
View File

@ -0,0 +1,252 @@
%%
%% This is file `/usr2/distrib/latex209/nfss/array.sty' generated
%% on <1991/11/22> with the docstrip utility (v1.1k).
%%
%% The original source files were:
%%
%% /usr2/users/latex3/source/array/array.doc
%%
%% Copyright (C) 1989,1990,1991 by Frank Mittelbach, Rainer Schoepf.
%% All rights reserved.
%%
%% This file is part of the NFSS (New Font Selection Scheme) package.
%%
%% IMPORTANT NOTICE:
%%
%% You are not allowed to change this file. You may however copy this file
%% to a file with a different name and then change the copy if you obey
%% the restrictions on file changes described in readme.mz.
%%
%% You are allowed to distribute this file under the condition that it is
%% distributed together with all files mentioned in readme.mz3. If you
%% receive only some of these files from someone, complain!
%%
%% You are NOT ALLOWED to distribute this file alone. You are NOT ALLOWED
%% to take money for the distribution or use of either this file or a
%% changed version, except for a nominal charge for copying etc.
%%
%% For error reports in case of UNCHANGED versions see readme files.
%%
%% Please do not request updates from us directly. Distribution is done
%% through Mail-Servers and TeX organizations.
%%
\def\fileversion{v2.0e}
\def\filedate{91/02/07}
\def\docdate {90/08/20}
%% \CheckSum{681}
%% \CharacterTable
%% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z
%% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z
%% Digits \0\1\2\3\4\5\6\7\8\9
%% Exclamation \! Double quote \" Hash (number) \#
%% Dollar \$ Percent \% Ampersand \&
%% Acute accent \' Left paren \( Right paren \)
%% Asterisk \* Plus \+ Comma \,
%% Minus \- Point \. Solidus \/
%% Colon \: Semicolon \; Less than \<
%% Equals \= Greater than \> Question mark \?
%% Commercial at \@ Left bracket \[ Backslash \\
%% Right bracket \] Circumflex \^ Underscore \_
%% Grave accent \` Left brace \{ Vertical bar \|
%% Right brace \} Tilde \~}
%%
\@ifundefined{d@llar}{}{\endinput}
\typeout{Style-Option: `array' \fileversion
\space\space <\filedate> (F.M.)}
\typeout{English documentation dated \space <\docdate> (F.M.)}
\def\@addtopreamble#1{\xdef\@preamble{\@preamble #1}}
\def\@testpach#1{\@chclass
\ifnum \@lastchclass=6 \@ne \@chnum \@ne \else
\ifnum \@lastchclass=7 5 \else
\ifnum \@lastchclass=8 \tw@ \else
\ifnum \@lastchclass=9 \thr@@
\else \z@
\ifnum \@lastchclass = 10 \else
\@chnum
\if #1c\z@ \else
\if #1l\@ne \else
\if #1r\tw@ \else
\z@ \@chclass
\if#1|\@ne \else
\if #1!6 \else
\if #1@7 \else
\if #1<8 \else
\if #1>9 \else
10
\@chnum
\if #1m\thr@@\else
\if #1p4 \else
\if #1b5 \else
\z@ \@chclass \z@ \@preamerr \z@ \fi \fi \fi \fi
\fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi}
\def\@xexpast#1*#2#3#4\@@{%
\@tempcnta #2
\toks@={#1}\@temptokena={#3}%
\let\the@toksz\relax \let\the@toks\relax
\def\@tempa{\the@toksz}%
\ifnum\@tempcnta >0 \@whilenum\@tempcnta >0\do
{\edef\@tempa{\@tempa\the@toks}\advance \@tempcnta \m@ne}%
\let \@tempb \@xexpast \else
\let \@tempb \@xexnoop \fi
\def\the@toksz{\the\toks@}\def\the@toks{\the\@temptokena}%
\edef\@tempa{\@tempa}%
\expandafter \@tempb \@tempa #4\@@}
\def\prepnext@tok{\advance \count@ \@ne
\toks\count@={}}
\def\save@decl{\toks\count@ \expandafter{\@nextchar}}
\def\insert@column{%
\the@toks \the \@tempcnta
{\ignorespaces \@sharp \unskip}%
\the@toks \the \count@ \relax}
\newdimen\col@sep
\def\@acol{\@addtopreamble{\hskip\col@sep}}
\def\@mkpream#1{\gdef\@preamble{}\@lastchclass 4 \@firstamptrue
\let\@sharp\relax \let\@startpbox\relax \let\@endpbox\relax
\@xexpast #1*0x\@@
\count@\m@ne
\let\the@toks\relax
\prepnext@tok
\expandafter \@tfor \expandafter \@nextchar
\expandafter :\expandafter =\@tempa \do
{\@testpach\@nextchar
\ifcase \@chclass \@classz \or \@classi \or \@classii
\or \save@decl \or \or \@classv \or \@classvi
\or \@classvii \or \@classviii \or \@classix
\or \@classx \fi
\@lastchclass\@chclass}%
\ifcase\@lastchclass
\@acol \or
\or
\@acol \or
\@preamerr \thr@@ \or
\@preamerr \tw@ \@addtopreamble\@sharp \or
\or
\else \@preamerr \@ne \fi
\def\the@toks{\the\toks}}
\def\@classx{%
\ifcase \@lastchclass
\@acolampacol \or
\@addamp \@acol \or
\@acolampacol \or
\or
\@acol \@firstampfalse \or
\@addamp
\fi}
\def\@classz{\@classx
\@tempcnta \count@
\prepnext@tok
\@addtopreamble{\ifcase \@chnum
\hfil
\d@llar
\insert@column
\d@llar \hfil \or
\d@llar \insert@column \d@llar \hfil \or
\hfil\kern\z@ \d@llar \insert@column \d@llar \or
$\vcenter
\@startpbox{\@nextchar}\insert@column \@endpbox $\or
\vtop \@startpbox{\@nextchar}\insert@column \@endpbox \or
\vbox \@startpbox{\@nextchar}\insert@column \@endpbox
\fi}\prepnext@tok}
\def\@classix{\ifnum \@lastchclass = \thr@@
\@preamerr \thr@@ \fi
\@classx}
\def\@classviii{\ifnum \@lastchclass >\z@
\@preamerr 4\@chclass 6 \@classvi \fi}
\def\@arrayrule{\@addtopreamble \vline}
\def\@classvii{\ifnum \@lastchclass = \thr@@
\@preamerr \thr@@ \fi}
\def\@classvi{\ifcase \@lastchclass
\@acol \or
\@addtopreamble{\hskip \doublerulesep}\or
\@acol \or
\@classvii
\fi}
\def\@classii{\advance \count@ \m@ne
\save@decl\prepnext@tok}
\def\@classv{\save@decl
\@addtopreamble{\d@llar\the@toks\the\count@\relax\d@llar}%
\prepnext@tok}
\def\@classi{\@classvi
\ifcase \@chnum \@arrayrule \or
\@classv \fi}
\def\@startpbox#1{\bgroup
\hsize #1 \@arrayparboxrestore
\vrule \@height \ht\@arstrutbox \@width \z@}
\def\@endpbox{\vrule \@width \z@ \@depth \dp \@arstrutbox \egroup}
\def\@array[#1]#2{%
\@tempdima \ht \strutbox
\advance \@tempdima by\extrarowheight
\setbox \@arstrutbox \hbox{\vrule
\@height \arraystretch \@tempdima
\@depth \arraystretch \dp \strutbox
\@width \z@}%
\begingroup
\@mkpream{#2}%
\xdef\@preamble{\ialign \@halignto
\bgroup \@arstrut \@preamble
\tabskip \z@ \cr}%
\endgroup
\if #1t\vtop \else \if#1b\vbox \else \vcenter \fi \fi
\bgroup
\let \@sharp ##\let \protect \relax
\lineskip \z@
\baselineskip \z@
\m@th
\let\\ \@arraycr \let\par\@empty \@preamble}
\newdimen \extrarowheight
\extrarowheight=0pt
\def\@arstrut{\unhcopy\@arstrutbox}
\def\@arraycr{{\ifnum 0=`}\fi
\@ifstar \@xarraycr \@xarraycr}
\def\@xarraycr{\@ifnextchar [%
\@argarraycr {\ifnum 0=`{\fi}\cr}}
\def\@argarraycr[#1]{\ifnum0=`{\fi}\ifdim #1>\z@
\@xargarraycr{#1}\else \@yargarraycr{#1}\fi}
\def\@xargarraycr#1{\unskip
\@tempdima #1\advance\@tempdima \dp\@arstrutbox
\vrule \@depth\@tempdima \@width\z@ \cr}
\def\@yargarraycr#1{\cr\noalign{\vskip #1}}
\def\multicolumn#1#2#3{%
\multispan{#1}\begingroup
\def\@addamp{\if@firstamp \@firstampfalse \else
\@preamerr 5\fi}%
\@mkpream{#2}\@addtopreamble\@empty
\endgroup
\def\@sharp{#3}%
\@arstrut \@preamble \ignorespaces}
\def\array{\col@sep\arraycolsep
\def\d@llar{$}\gdef\@halignto{}%
\@tabarray}
\def\@tabarray{\@ifnextchar[{\@array}{\@array[c]}}
\def\tabular{\gdef\@halignto{}\@tabular}
\expandafter\def\csname tabular*\endcsname#1{%
\gdef\@halignto{to#1}\@tabular}
\def\@tabular{%
\leavevmode
\hbox \bgroup $\col@sep\tabcolsep \let\d@llar\@empty
\@tabarray}
\def\endarray{\crcr \egroup \egroup \gdef\@preamble{}}
\def\endtabular{\endarray $\egroup}
\expandafter\let\csname endtabular*\endcsname=\endtabular
\let\@ampacol=\relax \let\@expast=\relax
\let\@arrayclassiv=\relax \let\@arrayclassz=\relax
\let\@tabclassiv=\relax \let\@tabclassz=\relax
\let\@arrayacol=\relax \let\@tabacol=\relax
\let\@tabularcr=\relax \let\@@endpbox=\relax
\let\@argtabularcr=\relax \let\@xtabularcr=\relax
\def\@preamerr#1{\def\@tempd{{..} at wrong position: }%
\@latexerr{%
\ifcase #1 Illegal pream-token (\@nextchar): `c' used\or %0
Missing arg: token ignored\or %1
Empty preamble: `l' used\or %2
>\@tempd token ignored\or %3
<\@tempd changed to !{..}\or %4
Only one colum-spec. allowed.\fi}\@ehc} %5
\def\@tfor#1:=#2\do#3{\def\@fortmp{#2}\ifx\@fortmp\@empty
\else\@tforloop#2\@nil\@nil\@@#1{#3}\fi}
\endinput
%%
%% End of file `/usr2/distrib/latex209/nfss/array.sty'.

672
doc/scsh-manual/awk.tex Normal file
View File

@ -0,0 +1,672 @@
%&latex -*- latex -*-
\chapter{Awk, record I/O, and field parsing}
\label{chapt:fr-awk}
{\Unix} programs frequently process streams of records,
where each record is delimited by a newline,
and records are broken into fields with other delimiters
(for example, the colon character in \ex{/etc/passwd}).
Scsh has procedures that allow the programmer to easily
do this kind of processing.
Scsh's field parsers can also be used to parse other kinds
of delimited strings, such as colon-separated \verb|$PATH| lists.
These routines can be used with scsh's \ex{awk} loop construct
to conveniently perform pattern-directed computation over streams
of records.
\section{Record I/O and field parsing}
\label{sec:field-reader}
The procedures in this section are used to read records from
I/O streams and parse them into fields.
A record is defined as text terminated by some delimiter (usually a newline).
A record can be split into fields by using regular expressions in
one of several ways: to \emph{match} fields, to \emph{separate} fields,
or to \emph{terminate} fields.
The field parsers can be applied to arbitrary strings (one common use is
splitting environment variables such as \ex{\$PATH} at colons into its
component elements).
The general delimited-input procedures described in
chapter~\ref{chapt:rdelim} are also useful for reading simple records,
such as single lines, paragraphs of text, or strings terminated by specific
characters.
\subsection{Reading records}
\defun{record-reader} {[delims elide-delims? handle-delim]} {\proc}
\begin{desc}
Returns a procedure that reads records from a port. The
procedure is invoked as follows:
%
\codex{(\var{reader} \var{[port]}) $\longrightarrow$
\textrm{\textit{{\str} or eof}}}
%
A record is a sequence of characters terminated by one of the characters
in \var{delims} or eof. If \var{elide-delims?} is true, then a contiguous
sequence of delimiter chars are taken as a single record delimiter. If
\var{elide-delims?} is false, then a delimiter char coming immediately
after a delimiter char produces an empty-string record. The reader
consumes the delimiting char(s) before returning from a read.
The \var{delims} set defaults to the set $\{\mbox{newline}\}$.
It may be a charset, string, character, or character predicate,
and is coerced to a charset.
The \var{elide-delims?} flag defaults to \ex{\#f}.
The \var{handle-delim} argument controls what is done with the record's
terminating delimiter.
\begin{inset}
\begin{tabular}{lp{0.6\linewidth}}
\ex{'trim} & Delimiters are trimmed. (The default)\\
\ex{'split}& Reader returns delimiter string as a second argument.
If record is terminated by EOF, then the eof object is
returned as this second argument. \\
\ex{'concat} & The record and its delimiter are returned as
a single string.
\end{tabular}
\end{inset}
The reader procedure returned takes one optional argument, the port
from which to read, which defaults to the current input port. It returns
a string or eof.
\end{desc}
\subsection{Parsing fields}
\label{sec:field-splitter}
\defun {field-splitter} {[field num-fields]} \proc
\defunx {infix-splitter} {[delim num-fields handle-delim]} \proc
\defunx {suffix-splitter} {[delim num-fields handle-delim]} \proc
\defunx {sloppy-suffix-splitter} {[delim num-fields handle-delim]} \proc
\begin{desc}
These functions return a parser function that can be used as follows:
\codex{(\var{parser} \var{string} \var{[start]}) $\longrightarrow$
\var{string-list}}
The returned parsers split strings into fields defined
by regular expressions. You can parse by specifying a pattern that
\emph{separates} fields, a pattern that \emph{terminates} fields, or
a pattern that \emph{matches} fields:
\begin{inset}
\begin{tabular}{l@{\qquad}l}
Procedure & Pattern \\ \hline
\ex{field-splitter} & matches fields \\
\ex{infix-splitter} & separates fields \\
\ex{suffix-splitter}& terminates fields \\
\ex{sloppy-suffix-splitter} & terminates fields
\end{tabular}
\end{inset}
These parser generators are controlled by a range of options, so that you
can precisely specify what kind of parsing you want. However, these
options default to reasonable values for general use.
Defaults:
\begin{tightinset}
\begin{tabular}{l@{\quad=\quad }ll}
\var{delim} & \ex{(rx (| (+ white) eos))} & (suffix delimiter: white space or eos) \\
\multicolumn{1}{l}{} & \ex{(rx (+ white))} & (infix delimiter: white space) \\
\var{field} & \verb|(rx (+ (~ white)))| & (non-white-space) \\
\var{num-fields} & \verb|#f| & (as many fields as possible) \\
\var{handle-delim} & \verb|'trim| & (discard delimiter chars)
\end{tabular}
\end{tightinset}
{\ldots}which means: break the string at white space, discarding the
white space, and parse as many fields as possible.
The \var{delim} parameter is a regular expression matching the text
that occurs between fields.
See chapter~\ref{chapt:sre} for information on regular expressions,
and the \ex{rx} form used to specify them.
In the separator case,
it defaults to a pattern matching white space;
in the terminator case,
it defaults to white space or end-of-string.
The \var{field} parameter is a regular expression used
to match fields. It defaults to non-white-space.
The \var{delim} patterns may also be given as a string,
character, or char-set, which are coerced to regular expressions.
So the following expressions are all equivalent,
each producing a function that splits strings apart at colons:
\begin{inset}
\begin{verbatim}
(infix-splitter (rx ":"))
(infix-splitter ":")
(infix-splitter #\:)
(infix-splitter (char-set #\:))\end{verbatim}
\end{inset}
The boolean \var{handle-delim} determines what to do with delimiters.
\begin{tightinset}\begin{tabular}{ll}
\ex{'trim} & Delimiters are thrown away after parsing. (default) \\
\ex{'concat} & Delimiters are appended to the field preceding them. \\
\ex{'split} & Delimiters are returned as separate elements in
the field list.
\end{tabular}
\end{tightinset}
The \var{num-fields} argument used to create the parser specifies how many
fields to parse. If \ex{\#f} (the default), the procedure parses them all.
If a positive integer $n$, exactly that many fields are parsed; it is an
error if there are more or fewer than $n$ fields in the record. If
\var{num-fields} is a negative integer or zero, then $|n|$ fields
are parsed, and the remainder of the string is returned in the last
element of the field list; it is an error if fewer than $|n|$ fields
can be parsed.
The field parser produced is a procedure that can be employed as
follows:
\codex{(\var{parse} \var{string} \var{[start]}) \evalto \var{string-list}}
The optional \var{start} argument (default 0) specifies where in the string
to begin the parse. It is an error if
$\var{start} > \ex{(string-length \var{string})}$.
The parsers returned by the four parser generators implement different
kinds of field parsing:
\begin{description}
\item[\ex{field-splitter}]
The regular expression specifies the actual field.
\item[\ex{suffix-splitter}]
Delimiters are interpreted as element \emph{terminators}.
If vertical-bar is the the delimiter, then the string \ex{""}
is the empty record \ex{()}, \ex{"foo|"} produces a one-field record
\ex{("foo")}, and \ex{"foo"} is an error.
The syntax of suffix-delimited records is:
\begin{inset}
\begin{tabular}{lcll}
\synvar{record} & ::= & \ex{""} \qquad (Empty record) \\
& $|$ & \synvar{element} \synvar{delim}
\synvar{record}
\end{tabular}
\end{inset}
It is an error if a non-empty record does not end with a delimiter.
To make the last delimiter optional, make sure the delimiter regexp
matches the end-of-string (sre \ex{eos}).
\item [\ex{infix-splitter}]
Delimiters are interpreted as element \emph{separators}. If comma is the
delimiter, then the string \ex{"foo,"} produces a two-field
record \ex{("foo" "")}.
The syntax of infix-delimited records is:
\begin{inset}
\begin{tabular}{lcll}
\synvar{record} & ::= & \ex{""} \qquad (Forced to be empty record) \\
& $|$ & \synvar{real-infix-record} \\
\\
\synvar{real-infix-record} & ::= & \synvar{element} \synvar{delim}
\synvar{real-infix-record} \\
& $|$ & \synvar{element}
\end{tabular}
\end{inset}
Note that separator semantics doesn't really allow for empty
records---the straightforward grammar (\ie, \synvar{real-infix-record})
parses an empty string as a singleton list whose one field is the empty
string, \ex{("")}, not as the empty record \ex{()}. This is unfortunate,
since it means that infix string parsing doesn't make \ex{string-append}
and \ex{append} isomorphic. For example,
\codex{((infix-splitter ":") (string-append \var{x} ":" \var{y}))}
doesn't always equal
\begin{code}
(append ((infix-splitter ":") \var{x})
((infix-splitter ":") \var{y}))\end{code}
It fails when \var{x} or \var{y} are the empty string.
Terminator semantics \emph{does} preserve a similar isomorphism.
However, separator semantics is frequently what other Unix software
uses, so to parse their strings, we need to use it. For example,
Unix \verb|$PATH| lists have separator semantics. The path list
\ex{"/bin:"} is broken up into \ex{("/bin" "")}, not \ex{("/bin")}.
Comma-separated lists should also be parsed this way.
\item[\ex{sloppy-suffix}]
The same as the \ex{suffix} case, except that the parser will skip an
initial delimiter string if the string begins with one instead of parsing
an initial empty field. This can be used, for example, to field-split a
sequence of English text at white-space boundaries, where the string may
begin or end with white space, by using regex
\begin{code}{(rx (| (+ white) eos))}\end{code}
(But you would be better off using \ex{field-splitter} in this case.)
\end{description}
\end{desc}
Figure~\ref{fig:splitters} shows how the different parser grammars
split apart the same strings.
%
\begin{boxedfigure}{tbp}
\begin{center}\small
\begin{tabular}{lllll}
Record & : suffix & \verb!:|$! suffix & : infix & non-: field \\
\hline
\ex{""} & \ex{()} & \ex{()} & \ex{()} & \ex{()} \\
\ex{":"} & \ex{("")} & \ex{("")} & \ex{("" "")} & \ex{()} \\
\ex{"foo:"} & \ex{("foo")} & \ex{("foo")} & \ex{("foo" "")} & \ex{("foo")} \\
\ex{":foo"}& \emph{error} & \ex{("" "foo")}& \ex{("" "foo")}& \ex{("foo")} \\
\ex{"foo:bar"} & \emph{error} & \ex{("foo" "bar")} & \ex{("foo" "bar")} & \ex{("foo" "bar")}
\end{tabular}
\end{center}
\caption{Using different grammars to split records into fields.}
\label{fig:splitters}
\end{boxedfigure}
%
Having to choose between the different grammars requires you to decide
what you want, but at least you can be precise about what you are parsing.
Take fifteen seconds and think it out. Say what you mean; mean what you
say.
\defun{join-strings} {string-list [delimiter grammar]} \str
\begin{desc}
This procedure is a simple unparser---it pastes strings together using
the delimiter string.
The \var{grammar} argument is one of the symbols \ex{infix} (the default)
or \ex{suffix}; it determines whether the
delimiter string is used as a separator or as a terminator.
The delimiter is the string used to delimit elements; it defaults to
a single space \ex{" "}.
Example:
\begin{code}
(join-strings '("foo" "bar" "baz") ":")
\qquad{\evalto} "foo:bar:baz"\end{code}
\end{desc}
\subsection{Field readers}
\defun{field-reader} {[field-parser rec-reader]} \proc
\begin{desc}
This utility returns a procedure that reads records with field structure
from a port.
The reader's interface is designed to make it useful in the \ex{awk}
loop macro (section~\ref{sec:awk}).
The reader is used as follows:
\codex{(\var{reader} \var{[port]}) {\evalto} \var{[raw-record parsed-record]} or \var{[eof ()]}}
When the reader is applied to an input port (default: the current
input port), it reads a record using \var{rec-reader}. If this record isn't
the eof object, it is parsed with \var{field-parser}. These two
values---the record, and its parsed representation---are returned
as multiple values from the reader.
When called at eof, the reader returns [eof-object \ex{()}].
Although the record reader typically returns a string, and
the field-parser typically takes a string argument, this is not
required. The record reader can produce, and the field-parser consume,
values of any type. However, the empty list returned as the
parsed value on eof is hardwired into the field reader.
For example, if port \ex{p} is open on \ex{/etc/passwd}, then
\codex{((field-reader (infix-splitter ":" 7)) p)}
returns two values:
{\small
\begin{widecode}
"dalbertz:mx3Uaqq0:107:22:David Albertz:/users/dalbertz:/bin/csh"
("dalbertz" "mx3Uaqq0" "107" "22" "David Albertz" "/users/dalbertz"
"/bin/csh")\end{widecode}}
The \var{field-parser} defaults to the value of \ex{(field-splitter)},
a parser that picks out sequences of non-white-space strings.
The \var{rec-reader} defaults to \ex{read-line}.
Figure~\ref{fig:field-readers} shows \ex{field-reader} being
used to read different kinds of Unix records.
\begin{boxedfigure}{tbhp}
\begin{centercode}
;;; /etc/passwd reader
(field-reader (infix-splitter ":" 7))
; wandy:3xuncWdpKhR.:73:22:Wandy Saetan:/usr/wandy:/bin/csh
;;; Two ls -l output readers
(field-reader (infix-splitter (rx (+ white)) 8))
(field-reader (infix-splitter (rx (+ white)) -7))
; -rw-r--r-- 1 shivers 22880 Sep 24 12:45 scsh.scm
;;; Internet hostname reader
(field-reader (field-splitter (rx (+ (~ ".")))))
; stat.sinica.edu.tw
;;; Internet IP address reader
(field-reader (field-splitter (rx (+ (~ "."))) 4))
; 18.24.0.241
;;; Line of integers
(let ((parser (field-splitter (rx (? ("+-")) (+ digit)))))
(field-reader (\l{s} (map string->number (parser s))))
; 18 24 0 241
;;; Same as above.
(let ((reader (field-reader (field-splitter (rx (? ("+-"))
(+ digit))))))
(\lx{maybe-port} (map string->number (apply reader maybe-port))))
; Yale beat harvard 26 to 7.\end{centercode}
\caption{Some examples of \protect\ex{field-reader}}
\label{fig:field-readers}
\end{boxedfigure}
\end{desc}
\subsection{Forward-progress guarantees and empty-string matches}
A loop that pulls text off a string by repeatedly matching a regexp
against that string can conceivably get stuck in an infinite loop if
the regexp matches the empty string. For example, the SREs
\ex{bos}, \ex{eos}, \ex{(* any)}, and \ex{(| "foo" (* (~ "f")))}
can all match the empty string.
The routines in this package that iterate through strings with regular
expressions are careful to handle this empty-string case.
If a regexp matches the empty string, the next search starts, not from
the end of the match (which in the empty string case is also the
beginning---that's the problem), but from the next character over.
This is the correct behaviour. Regexps match the longest possible
string at a given location, so if the regexp matched the empty string
at location $i$, then it is guaranteed it could not have matched
a longer pattern starting with character $i$. So we can safely begin
our search for the next match at char $i+1$.
With this provision, every iteration through the loop makes some forward
progress, and the loop is guaranteed to terminate.
This has the effect you want with field parsing. For example, if you split
a string with the empty pattern, you will explode the string into its
individual characters:
\codex{((suffix-splitter (rx)) "foo") {\evalto} ("" "f" "o" "o")}
However, even though this boundary case is handled correctly, we don't
recommend using it. Say what you mean---just use a field splitter:
\codex{((field-splitter (rx any)) "foo") {\evalto} ("f" "o" "o")}
Or, more efficiently,
\codex{((\l{s} (map string (string->list s))) "foo")}
\subsection{Reader limitations}
Since all of the readers in this package require the ability to peek
ahead one char in the input stream, they cannot be applied to raw
integer file descriptors, only Scheme input ports. This is because
Unix doesn't support peeking ahead into input streams.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Awk}
\label{sec:awk}
Scsh provides a loop macro and a set of field parsers that can
be used to perform text processing very similar to the Awk programming
language.
The basic functionality of Awk is factored in scsh into its component
parts.
The control structure is provided by the \ex{awk} loop macro;
the text I/O and parsers are provided by the field-reader subroutine library
(section~\ref{sec:field-reader}).
This factoring allows the programmer to compose the basic loop structure
with any parser or input mechanism at all.
If the parsers provided by the field-reader package are insufficient,
the programmer can write a custom parser in Scheme and use it with
equal ease in the awk framework.
Awk-in-scheme is given by a loop macro called \ex{awk}. It looks like
this:
\begin{code}\cdmath
(awk \synvar{next-record} \synvar{record\&field-vars}
{\rm[\synvar{counter}]} \synvar{state-var-decls}
\synvar{clause$_1$} \ldots)\index{awk}\end{code}
The body of the loop is a series of clauses, each one representing
a kind of condition/action pair. The loop repeatedly reads a record,
and then executes each clause whose condition is satisfied by the record.
Here's an example that reads lines from port \ex{p}
and prints the line number and line of every line containing the
string ``\ex{Church-Rosser}'':
\begin{code}
(awk (read-line) (ln) lineno ()
("Church-Rosser" (format #t "~d: ~s~%" lineno ln)))\end{code}
This example has just one clause in the loop body, the one that
tests for matches against the regular expression ``\ex{Church-Rosser}''.
The \synvar{next-record} form is an expression that is evaluated each time
through the loop to produce a record to process.
This expression can return multiple values;
these values are bound to the variables given in the
\synvar{record\&field-vars} list of variables.
The first value returned is assumed to be the record;
when it is the end-of-file object, the loop terminates.
For example, let's suppose we want to read items from \ex{/etc/password},
and we use the \ex{field-reader} procedure to define a record parser for
\ex{/etc/passwd} entries:
\codex{(define read-passwd (field-reader (infix-splitter ":" 7)))}
binds \ex{read-passwd} to a procedure that reads in a line of text when
it is called, and splits the text at colons. It returns two values:
the entire line read, and a seven-element list of the split-out fields.
(See section~\ref{sec:field-reader} for more on \ex{field-reader} and
\ex{infix-splitter}.)
So if the \synvar{next-record} form in an \ex{awk} expression is
\ex{(read-passwd)}, then \synvar{record\&field-vars} must be a list of
two variables, \eg,
\codex{(record field-vec)}
since \ex{read-passwd} returns two values.
Note that \ex{awk} allows us to use \emph{any} record reader we want in the
loop, returning whatever number of values we like. These values
don't have to be strings or string lists. The only requirement
is that the record reader return the eof object as its first value
when the loop should terminate.
The \ex{awk} loop allows the programmer to have loop variables. These are
declared and initialised by the \synvar{state-var-decls} form, a
\codex{((\var{var} \var{init-exp}) (\var{var} \var{init-exp}) \ldots)}
list rather like the \ex{let} form. Whenever a clause in the loop body
executes, it evaluates to as many values as there are state variables,
updating them.
The optional \synvar{counter} variable is an iteration counter.
It is bound to 0 when the loop starts.
The counter is incremented each time a non-eof record is read.
There are several kinds of loop clause. When evaluating the body of the
loop, \ex{awk} evaluates \emph{all} the clauses sequentially.
Unlike \ex{cond}, it does not stop after the first clause is satisfied;
it checks them all.
\begin{itemize}
\itum{\ex{(\var{test} \vari{body}1 \vari{body}2 \ldots)}}
If \var{test} is true, execute the body forms. The last body form
is the value of the clause. The test and body forms are evaluated
in the scope of the record and state variables.
The \var{test} form can be one of:
\begin{inset}
\begin{tabular}{lp{0.6\linewidth}}
\var{integer}: & The test is true for that iteration of the loop.
The first iteration is \#1. \\
\var{sre}: & A regular expression, in SRE notation
(see chapter~\ref{chapt:sre}) can be used as
a test. The test is successful if the pattern
matches the record.
In particular, note that any string is an SRE. \\
\ex{(when \var{expr})}: &
The body of a \ex{when} test is evaluated as a
Scheme boolean expression in the inner scope of the
\ex{awk} form. \\
\var{expr}: & If the form is none of the above, it is treated as
a Scheme expression---in practice, the \ex{when}
keyword is only needed in cases where SRE/Scheme
expression ambiguity might occur.
\end{tabular}
\end{inset}
\itum{\begin{tabular}[t]{l}
\ex{(range\ \ \ \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\
\ex{(:range\ \ \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\
\ex{(range:\ \ \var{start-test} \var{stop-test} \vari{body}1 \ldots)} \\
\ex{(:range:\ \var{start-test} \var{stop-test} \vari{body}1 \ldots)}
\end{tabular}}
%
These clauses become activated when \var{start-test} is true;
they stay active on all further iterations until \var{stop-test}
is true.
So, to print out the first ten lines of a file, we use the clause:
\codex{(:range: 1 10 (display record))}
The colons control whether or not the start and stop lines
are processed by the clause. For example:
\begin{inset}\begin{tabular}{l@{\qquad}l}
\ex{(range\ \ \ 1 5\ \ \ldots)} & Lines \phantom{1} 2 3 4 \\
\ex{(:range\ \ 1 5\ \ \ldots)} & Lines 1 2 3 4 \\
\ex{(range:\ \ 1 5\ \ \ldots)} & Lines \phantom{1} 2 3 4 5 \\
\ex{(:range: 1 5\ \ \ldots)} & Lines 1 2 3 4 5
\end{tabular}
\end{inset}
A line can trigger both tests, either simultaneously starting and
stopping an active region, or simultaneously stopping one and starting
a new one, so ranges can abut seamlessly.
\itum{\ex{(else \vari{body}1 \vari{body}2 \ldots)}}
If no other clause has executed since the top of the loop, or
since the last \ex{else} clause, this clause executes.
\itum{\ex{(\var{test} => \var{exp})}}
If evaluating \ex{test} produces a true value,
apply \var{exp} to that value.
If \var{test} is a regular expression, then \var{exp} is applied
to the match data structure returned by the regexp match routine.
\itum{\ex{(after \vari{body}1 \ldots)}}
This clause executes when the loop encounters EOF. The body forms
execute in the scope of the state vars and the record-count var,
if there are any. The value of the last body form is the value
of the entire awk form.
If there is no \ex{after} clause, \ex{awk} returns the loop's state
variables as multiple values.
\end{itemize}
\subsection{Examples}
Here are some examples of \ex{awk} being used to process various types
of input stream.
\begin{code}
(define $ nth) ; Saves typing.
;;; Print out the name and home-directory of everyone in /etc/passwd:
(let ((read-passwd (field-reader (infix-splitter ":" 7))))
(call-with-input-file "/etc/passwd"
(lambda (port)
(awk (read-passwd port) (record fields) ()
(#t (format #t "~a's home directory is ~a~%"
($ fields 0)
($ fields 5)))))))\end{code}
\begin{code}
;;; Print out the user-name and home-directory of everyone whose
;;; name begins with "S"
(let ((read-passwd (field-reader (infix-splitter ":" 7))))
(call-with-input-file "/etc/passwd"
(lambda (port)
(awk (read-passwd port) (record fields) ()
((: bos "S")
(format #t "~a's home directory is ~a~%"
($ fields 0)
($ fields 5)))))))\end{code}
\begin{code}
;;; Read a series of integers from stdin. This expression evaluates
;;; to the number of positive numbers that were read. Note our
;;; "record-reader" is the standard Scheme READ procedure.
(awk (read) (i) ((npos 0))
((> i 0) (+ npos 1)))\end{code}
\begin{code}
;;; Filter -- pass only lines containing my name.
(awk (read-line) (line) ()
("Olin" (display line) (newline)))\end{code}
\begin{code}
;;; Count the number of non-comment lines of code in my Scheme source.
(awk (read-line) (line) ((nlines 0))
((: bos (* white) ";") nlines) ; A comment line.
(else (+ nlines 1))) ; Not a comment line.\end{code}
\begin{code}
;;; Read numbers, counting the evens and odds.
(awk (read) (val) ((evens 0) (odds 0))
((> val 0) (display "pos ") (values evens odds)) ; Tell me about
((< val 0) (display "neg ") (values evens odds)) ; sign, too.
(else (display "zero ") (values evens odds))
((even? val) (values (+ evens 1) odds))
(else (values evens (+ odds 1))))\end{code}
\begin{code}
;;; Determine the max length of all the lines in the file.
(awk (read-line) (line) ((max-len 0))
(#t (max max-len (string-length line))))\end{code}
\begin{code}
;;; (This could also be done with PORT-FOLD:)
(port-fold (current-input-port) read-line
(lambda (line maxlen) (max (string-length line) maxlen))
0)\end{code}
\begin{code}
;;; Print every line longer than 80 chars.
;;; Prefix each line with its line #.
(awk (read-line) (line) lineno ()
((> (string-length line) 80)
(format #t "~d: ~s~%" lineno line)))\end{code}
\begin{code}
;;; Strip blank lines from input.
(awk (read-line) (line) ()
((~ white) (display line) (newline)))\end{code}
\begin{code}
;;; Sort the entries in /etc/passwd by login name.
(for-each (lambda (entry) (display (cdr entry)) (newline)) ; Out
(sort (lambda (x y) (string<? (car x) (car y))) ; Sort
(let ((read (field-reader (infix-splitter ":" 7)))) ; In
(awk (read) (line fields) ((ans '()))
(#t (cons (cons ($ fields 0) line) ans))))))\end{code}
\begin{code}
;;; Prefix line numbers to the input stream.
(awk (read-line) (line) lineno ()
(#t (format #t "~d:\\t~a~%" lineno line)))\end{code}
\section{Backwards compatibility}
Previous scsh releases provided an \ex{awk} form with a different syntax,
designed around regular expressions written in Posix notation as strings,
rather than SREs.
This form is still available in a separate module for old code.
It'll be documented in the next release of this manual. Dig around
in the sources for it.

View File

@ -0,0 +1,45 @@
% boxedminipage.sty
%
% adds the boxedminipage environment---just like minipage, but has a
% box round it!
%
% The thickneess of the rules around the box is controlled by
% \fboxrule, and the distance between the rules and the edges of the
% inner box is governed by \fboxsep.
%
% This code is based on Lamport's minipage code.
\def\boxedminipage{\@ifnextchar [{\@iboxedminipage}{\@iboxedminipage[c]}}
\def\@iboxedminipage[#1]#2{\leavevmode \@pboxswfalse
\if #1b\vbox
\else \if #1t\vtop
\else \ifmmode \vcenter
\else \@pboxswtrue $\vcenter
\fi
\fi
\fi\bgroup % start of outermost vbox/vtop/vcenter
\hsize #2
\hrule\@height\fboxrule
\hbox\bgroup % inner hbox
\vrule\@width\fboxrule \hskip\fboxsep \vbox\bgroup % innermost vbox
\advance\hsize -2\fboxrule \advance\hsize-2\fboxsep
\textwidth\hsize \columnwidth\hsize
\@parboxrestore
\def\@mpfn{mpfootnote}\def\thempfn{\thempfootnote}\c@mpfootnote\z@
\let\@footnotetext\@mpfootnotetext
\let\@listdepth\@mplistdepth \@mplistdepth\z@
\@minipagerestore\@minipagetrue
\everypar{\global\@minipagefalse\everypar{}}}
\def\endboxedminipage{%
\par\vskip-\lastskip
\ifvoid\@mpfootins\else
\vskip\skip\@mpfootins\footnoterule\unvbox\@mpfootins\fi
\egroup % ends the innermost \vbox
\hskip\fboxsep \vrule\@width\fboxrule
\egroup % ends the \hbox
\hrule\@height\fboxrule
\egroup% ends the vbox/vtop/vcenter
\if@pboxsw $\fi}

296
doc/scsh-manual/code.sty Normal file
View File

@ -0,0 +1,296 @@
% code.sty: -*- latex -*-
% Latex macros for a "weak" verbatim mode.
% -- like verbatim, except \, {, and } have their usual meanings.
% Environments: code, tightcode, codeaux, codebox, centercode
% Commands: \dcd, \cddollar, \cdmath, \cd, \codeallowbreaks, \codeskip, \^
% Already defined in LaTeX, but of some relevance: \#, \$, \%, \&, \_, \{, \}
% Changelog at the end of the file.
% These commands give you an environment, code, that is like verbatim
% except that you can still insert commands in the middle of the environment:
% \begin{code}
% for(x=1; x<loop_bound; x++)
% y += x^3; /* {\em Add in {\tt x} cubed} */
% \end{code}
%
% All characters are ordinary except \{}. To get \{} in your text,
% you use the commands \\, \{, and \}.
% These macros mess with the definition of the special chars (e.g., ^_~%).
% The characters \{} are left alone, so you can still have embedded commands:
% \begin{code} f(a,b,\ldots,y,z) \end{code}
% However, if your embedded commands use the formerly-special chars, as in
% \begin{code} x := x+1 /* \mbox{\em This is $y^3$} */ \end{code}
% then you lose. The $ and ^ chars are scanned in as non-specials,
% so they don't work. If the chars are scanned *outside* the code env,
% then you have no problem:
% \def\ycube{$y^3$}
% \begin{code} x := x+1 /* {\em This is \ycube} */ \end{code}
% If you must put special chars inside the code env, you do it by
% prefixing them with the special \dcd ("decode") command, that
% reverts the chars to back to special status:
% \begin{code} x := x+1 /* {\dcd\em This is $y^3$} */ \end{code}
% \dcd's scope is bounded by its enclosing braces. It is only defined within
% the code env. You can also turn on just $ with the \cddollar command;
% you can turn on just $^_ with the \cdmath command. See below.
%
% Alternatively, just use \(...\) for $...$, \sp for ^, and \sb for _.
% WARNING:
% Like \verb, you cannot put a \cd{...} inside an argument to a macro
% or a command. If you try, for example,
% \mbox{\cd{$x^y$}}
% you will lose. That is because the text "\cd{$x^y$}" gets read in
% as \mbox's argument before the \cd executes. But the \cd has to
% have a chance to run before LaTeX ever reads the $x^y$ so it can
% turn off the specialness of $ and ^. So, \cd has to appear at
% top level, not inside an argument. Similarly, you can't have
% a \cd or a \code inside a macro (Although you could use \gdef to
% define a macro *inside* a \cd, which you could then use outside.
% Don't worry about this if you don't understand it.)
% BUG: In the codebox env, the effect of a \dcd, \cddollar, or \cdmath
% command is reset at the end of each line. This can be hacked by
% messing with the \halign's preamble, if you feel up to it.
% Useage note: the initial newline after the \begin{code} or
% \begin{codebox} is eaten, but the last newline is not.
% So,
% \begin{code}
% foo
% bar
% \end{code}
% leaves one more blank line after bar than does
% \begin{code}
% foo
% bar\end{code}
% Moral: get in the habit of terminating code envs without a newline
% (as in the second example).
%
% All this stuff tweaks the meaning of space, tab, and newline.
%===============================================================================
% \cd@obeyspaces
% Turns all spaces into non-breakable spaces.
% Note: this is like \@vobeyspaces except without spurious space in defn.
% @xobeysp is basically a space; it's defined in latex.tex.
%
{\catcode`\ =\active\gdef\cd@obeyspaces{\catcode`\ =\active\let =\@xobeysp}}
% \cd@obeytabs
% Turns all tabs into 8 non-breakable spaces (which is bogus).
%
{\catcode`\^^I=\active %
\gdef\cd@obeytabs{\catcode`\^^I=\active\let^^I=\cd@tab}}
\def\cd@tab{\@xobeysp\@xobeysp\@xobeysp\@xobeysp\@xobeysp\@xobeysp\@xobeysp\@xobeysp}
% \cd@obeylines
% Turns all cr's into linebreaks. Pagebreaks are not permitted between lines.
% This is copied from lplain.tex's \obeylines, with the cr def'n changed.
%
{\catcode`\^^M=\active % these lines must end with %
\gdef\cd@obeylines{\catcode`\^^M=\active\let^^M=\cd@cr}}
% What ^M turns into. This def'n keeps blank lines from being compressed out.
\def\cd@cr{\par\penalty10000\leavevmode} % TeX magicness
%\def\cd@cr{\par\penalty10000\mbox{}} % LaTeX
% \codeallowbreaks
% Same as \cd@obeylines, except pagebreaks are allowed.
% Put this command inside a code env to allow pagebreaks.
{\catcode`\^^M=\active % these lines must end with %
\gdef\codeallowbreaks{\catcode`\^^M\active\let^^M\cd@crbr}}
%\def\cd@crbr{\leavevmode\endgraf} % What ^M turns into.
\def\cd@crbr{\par\leavevmode} % What ^M turns into.
% \cd@obeycrsp
% Turns cr's into non-breakable spaces. Used by \cd.
{\catcode`\^^M=\active % these lines must end with %
\gdef\cd@obeycrsp{\catcode`\^^M=\active\let^^M=\@xobeysp}}
% =============================================================================
% Set up code environment, in which most of the common special characters
% appearing in code are treated verbatim, namely: $&#^_~%
% \ { } are still enabled so that macros can be called in this
% environment. Use \\, \{, and \} to use these characters verbatim
% in this environment.
%
% Inside a group, you can make
% all the hacked chars special with the \dcd command
% $ special with the \cddollar command
% $^_ special with the \cdmath command.
% If you have a bunch of math $..$'s in your code env, then a global \cddollar
% or \cdmath at the beginning of the env can save a lot of trouble.
% When chars are special (e.g., after a \dcd), you can still get #$%&_{} with
% \#, \$, \%, \&, \_, \{, and \} -- this is standard LaTeX.
% Additionally, \\ gives \ inside the code env, and when \cdmath
% makes ^ special, it also defines \^ to give ^.
%The hacked characters can be made special again
% within a group by using the \dcd command.
% Note: this environment allows no breaking of lines whatsoever; not
% at spaces or hypens. To arrange for a break use the standard \- command,
% or a \discretionary{}{}{} which breaks, but inserts nothing. This is useful,
% for example for allowing hypenated identifiers to be broken, e.g.
% \def\={\discretionary{}{}{}} %optional break
% FOO-\=BAR.
\def\setupcode{\parsep=0pt\parindent=0pt%
\normalfont\ttfamily\frenchspacing\catcode``=13\@noligs%
\def\\{\char`\\}%
\let\dcd=\cd@dcd\let\cddollar=\cd@dollarspecial\let\cdmath=\cd@mathspecial%
\@makeother\$\@makeother\&\@makeother\#%
\@makeother\^\@makeother\_\@makeother\~%
\@makeother\%\cd@obeytabs\cd@obeyspaces}
% other: $&#^_~%
% left special: \{}
% unnecessary: @`'"
%% codebox, centercode
%%=============================================================================
%% The codebox env makes a box exactly as wide as it needs to be
%% (i.e., as wide as the longest line of code is). This is useful
%% if you want to center a chunk of code, or flush it right, or
%% something like that. The optional argument to the environment,
%% [t], [c], or [b], specifies how to vertically align the codebox,
%% just as with arrays or other boxes. Default is [c].
%% Must be a newline immediately after "\begin{codebox}[t]"!
{\catcode`\^^M=\active % these lines must end with %
\gdef\cd@obeycr{\catcode`\^^M=\active\let^^M=\cr}}
% If there is a [<letter>] option, then the following newline will
% be read *after* ^M is bound to \cr, so we're cool. If there isn't
% an option given (i.e., default to [c]), then the @\ifnextchar will
% gobble up the newline as it gobbles whitespace. So we insert the
% \cr explicitly. Isn't TeX fun?
\def\codebox{\leavevmode\@ifnextchar[{\@codebox}{\@codebox[c]\cr}} %]
\def\@codebox[#1]%
{\hbox\bgroup$\if #1t\vtop \else \if#1b\vbox \else \vcenter \fi\fi\bgroup%
\tabskip\z@\setupcode\cd@obeycr% just before cd@obey
\halign\bgroup##\hfil\span}
\def\endcodebox{\crcr\egroup\egroup\m@th$\egroup}
% Center the box on the page:
\newenvironment{centercode}%
{\begin{center}\begin{codebox}[c]}%
{\end{codebox}\end{center}}
%% code, codeaux, tightcode
%%=============================================================================
%% Code environment as described above. Lines are kept on one page.
%% This actually works by setting a huge penalty for breaking
%% between lines of code. Code is indented same as other displayed paras.
%% Note: to increase left margin, use \begin{codeaux}{\leftmargin=1in}.
% To allow pagebreaks, say \codeallowbreaks immediately inside the env.
% You can allow breaks at specific lines with a \pagebreak form.
%% N.B.: The \global\@ignoretrue command must be performed just inside
%% the *last* \end{...} before the following text. If not, you will
%% get an extra space on the following line. Blech.
%% This environment takes two arguments.
%% The second, required argument is the \list parameters to override the
%% \@listi... defaults.
%% - Usefully set by clients: \topsep \leftmargin
%% - Possible, but less useful: \partopsep
%% The first, optional argument is the extra \parskip glue that you get around
%% \list environments. It defaults to the value of \parskip.
\def\codeaux{\@ifnextchar[{\@codeaux}{\@codeaux[\parskip]}} %]
\def\@codeaux[#1]#2{%
\bgroup\parskip#1%
\begin{list}{}%
{\parsep\z@\rightskip\z@\listparindent\z@\itemindent\z@#2}%
\item[]\setupcode\cd@obeylines}%
\def\endcodeaux{\end{list}\leavevmode\egroup\ignorespaces\global\@ignoretrue}
%% Code env is codeaux with the default margin and spacing \list params:
\def\code{\codeaux{}} \let\endcode=\endcodeaux
%% Like code, but with no extra vertical space above and below.
\def\tightcode{\codeaux[=0pt]{\topsep\z@}}%
\let\endtightcode\endcodeaux
% {\vspace{-1\parskip}\begin{codeaux}{\partopsep\z@\topsep\z@}}%
% {\end{codeaux}\vspace{-1\parskip}}
% Reasonable separation between lines of code
\newcommand{\codeskip}{\penalty0\vspace{2ex}}
% \cd is used to build a code environment in the middle of text.
% Note: only difference from display code is that cr's are taken
% as unbreakable spaces instead of linebreaks.
\def\cd{\leavevmode\begingroup\ifmmode\let\startcode=\startmcode\else%
\let\startcode\starttcode\fi%
\setupcode\cd@obeycrsp\startcode}
\def\starttcode#1{#1\endgroup}
\def\startmcode#1{\hbox{#1}\endgroup}
% Restore $&#^_~% to their normal catcodes
% Define \^ to give the ^ char.
% \dcd points to this guy inside a code env.
\def\cd@dcd{\catcode`\$=3\catcode`\&=4\catcode`\#=6\catcode`\^=7%
\catcode`\_=8\catcode`\~=13\catcode`\%=14\def\^{\char`\^}}
% Selectively enable $, and $^_ as special.
% \cd@mathspecial also defines \^ give the ^ char.
% \cddollar and \cdmath point to these guys inside a code env.
\def\cd@dollarspecial{\catcode`\$=3}
\def\cd@mathspecial{\catcode`\$=3\catcode`\^=7\catcode`\_=8%
\def\^{\char`\^}}
% Change log:
% Started off as some macros found in C. Rich's library.
% Olin 1/90:
% Removed \makeatletter, \makeatother's -- they shouldn't be there,
% because style option files are read with makeatletter. The terminal
% makeatother screwed things up for the following style options.
% Olin 3/91:
% Rewritten.
% - Changed things so blank lines don't get compressed out (the \leavevmove
% in \cd@cr and \cd@crwb).
% - Changed names to somewhat less horrible choices.
% - Added lots of doc, so casual hackers can more easily mess with all this.
% - Removed `'"@ from the set of hacked chars, since they are already
% non-special.
% - Removed the bigcode env, which effect can be had with the \codeallowbreaks
% command.
% - Removed the \@noligs command, since it's already defined in latex.tex.
% - Win big with the new \dcd, \cddollar, and \cdmath commands.
% - Now, *only* the chars \{} are special inside the code env. If you need
% more, use the \dcd command inside a group.
% - \cd now works inside math mode. (But if you use it in a superscript,
% it still comes out full size. You must explicitly put a \scriptsize\tt
% inside the \cd: $x^{\cd{\scriptsize\tt...}}$. A \leavevmode was added
% so that if you begin a paragraph with a \cd{...}, TeX realises you
% are starting a paragraph.
% - Added the codebox env. Tricky bit involving the first line hacked
% with help from David Long.
% Olin 8/94
% Changed the font commands for LaTeX2e.

105
doc/scsh-manual/css.t2p Normal file
View File

@ -0,0 +1,105 @@
% css.t2p
% Dorai Sitaram
% 19 Jan 2001
% A basic style for HTML documents generated
% with tex2page.
\cssblock
body {
color: black;
background-color: #e5e5e5;
/*background-color: beige;*/
margin-top: 2em;
margin-left: 8%;
margin-right: 8%;
}
h1,h2,h3,h4,h5,h6 {
margin-top: .5em;
}
.partheading {
font-size: 70%;
}
.chapterheading {
font-size: 70%;
}
pre {
margin-left: 2em;
}
ol {
list-style-type: decimal;
}
ol ol {
list-style-type: lower-alpha;
}
ol ol ol {
list-style-type: lower-roman;
}
ol ol ol ol {
list-style-type: upper-alpha;
}
.scheme {
color: brown;
}
.scheme .keyword {
color: #990000;
font-weight: bold;
}
.scheme .builtin {
color: #990000;
}
.scheme .variable {
color: navy;
}
.scheme .global {
color: purple;
}
.scheme .selfeval {
color: green;
}
.scheme .comment {
color: teal;
}
.navigation {
color: red;
text-align: right;
font-style: italic;
}
.disable {
/* color: #e5e5e5; */
color: gray;
}
.smallcaps {
font-size: 75%;
}
.smallprint {
color: gray;
font-size: 75%;
text-align: right;
}
.smallprint hr {
text-align: left;
width: 40%;
}
\endcssblock

6
doc/scsh-manual/ct.sty Normal file
View File

@ -0,0 +1,6 @@
% Loads cmtt fonts in on \tt. -*- latex -*-
% I prefer these to the Courier fonts that latex gives you w/postscript styles.
% Courier is too spidery and too wide -- it's hard to get 80 chars on a line.
% -Olin
\renewcommand{\ttdefault}{cmtt}

273
doc/scsh-manual/decls.tex Normal file
View File

@ -0,0 +1,273 @@
\makeatletter
\def\ie{\mbox{\emph{i.e.}}} % \mbox keeps the last period from
\def\Ie{\mbox{\emph{I.e.}}} % looking like an end-of-sentence.
\def\eg{\mbox{\emph{e.g.}}}
\def\Eg{\mbox{\emph{E.g.}}}
\def\etc{{\em etc.}}
\def\Lisp{\textsc{Lisp}}
\def\CommonLisp{\textsc{Common Lisp}}
\def\Ascii{\textsc{Ascii}}
\def\Ansi{\textsc{Ansi}}
\def\Unix{{Unix}} % Not smallcaps, according to Bart.
\def\Scheme{{Scheme}}
\def\scm{{Scheme 48}}
\def\R4RS{R4RS}
\def\Posix{\textsc{Posix}}
\def\sharpf{\normalfont\texttt{\#f}}
\def\sharpt{\normalfont\texttt{\#t}}
\newcommand{\synteq}{\textnormal{::=}}
\def\maketildeother{\catcode`\~=12}
\def\maketildeactive{\catcode`\~=13}
\def\~{\char`\~}
\newcommand{\evalsto}{\ensuremath{\Rightarrow}}
% One-line code examples
%\newcommand{\codex}[1]% One line, centred. Tight spacing.
% {$$\abovedisplayskip=.75ex plus 1ex minus .5ex%
% \belowdisplayskip=\abovedisplayskip%
% \abovedisplayshortskip=0ex plus .5ex%
% \belowdisplayshortskip=\abovedisplayshortskip%
% \hbox{\ttt #1}$$}
%\newcommand{\codex}[1]{\begin{tightinset}\ex{#1}\end{tightinset}\ignorespaces}
\newcommand{\codex}[1]{\begin{leftinset}\ex{#1}\end{leftinset}\ignorespaces}
\def\widecode{\codeaux{\leftmargin=0pt\topsep=0pt}}
\def\endwidecode{\endcodeaux}
% For multiletter vars in math mode:
\newcommand{\var}[1]{\mbox{\frenchspacing\it{#1}}}
\newcommand{\vari}[2]{\ensuremath{\mbox{\it{#1}}_{#2}}}
%% What you frequently want when you say \tt:
\def\ttchars{\catcode``=13\@noligs\frenchspacing}
\def\ttt{\normalfont\ttfamily\ttchars}
% Works in math mode; all special chars remain special; cheaper than \cd.
% Will not be correct size in super and subscripts, though.
\newcommand{\ex}[1]{{\normalfont\texttt{\ttchars #1}}}
\newenvironment{inset}
{\bgroup\parskip=1ex plus 1ex\begin{list}{}%
{\topsep=0pt\rightmargin\leftmargin}%
\item[]}%
{\end{list}\leavevmode\egroup\global\@ignoretrue}
\newenvironment{leftinset}
{\bgroup\parskip=1ex plus 1ex\begin{list}{}%
{\topsep=0pt}%
\item[]}%
{\end{list}\leavevmode\egroup\global\@ignoretrue}
\newenvironment{tightinset}
{\bgroup\parskip=0pt\begin{list}{}%
{\topsep=0pt\rightmargin\leftmargin}%
\item[]}%
{\end{list}\leavevmode\egroup\global\@ignoretrue}
\newenvironment{tightleftinset}
{\bgroup\parskip=0pt\begin{list}{}%
{\topsep=0pt}%
\item[]}%
{\end{list}\leavevmode\egroup\global\@ignoretrue}
\long\def\remark#1{\bgroup\small\begin{quote}\textsl{Remark: } #1\end{quote}\egroup}
\newenvironment{remarkenv}{\bgroup\small\begin{quote}\textsl{Remark: }}%
{\end{quote}\egroup}
\newcommand{\oops}[1]{\bgroup\small\begin{quote}\textsl{Oops: } #1\end{quote}\egroup}
\newcommand{\note}[1]{\{Note #1\}}
\newcommand{\itum}[1]{\item{\bf #1}\\*}
% For use in code. The \llap magicness makes the lambda exactly as wide as
% the other chars in \tt; the \hskip shifts it right a bit so it doesn't
% crowd the left paren -- which is necessary if \tt is cmtt.
% Note that (\l{x y} (+ x y)) uses the same number of columns in TeX form
% as it produces when typeset. This makes it easy to line up the columns
% in your input. \l is bound to some useless command in LaTeX, so we have to
% define it w/renewcommand.
\let\oldl\l %Save the old \l on \oldl
\renewcommand{\l}[1]{\ \llap{$\lambda$\hskip-.05em}\ (#1)}
% This one is for the rare (lambda x ...) case -- it doesn't have the
% column-invariant property. Oh, well.
\newcommand{\lx}[1]{\ \llap{$\lambda$\hskip-.05em}\ {#1}}
% For subcaptions
\newcommand{\subcaption}[1]
{\unskip\vspace{-2mm}\begin{center}\unskip\em#1\end{center}}
%%% T release notes stuff
\newlength{\notewidth}
\setlength{\notewidth}{\textwidth}
\addtolength{\notewidth}{-1.25in}
%\newcommand{\remark} [1]
% {\par\vspace{\parskip}
% \parbox[t]{.75in}{\sc Remark:}
% \parbox[t]{\notewidth}{\em #1}
% \vspace{\parskip}
% }
\newenvironment{optiontable}%
{\begin{tightinset}\renewcommand{\arraystretch}{1.5}%
\begin{tabular}{@{}>{\ttt}ll@{}}}%
{\end{tabular}\end{tightinset}}%
\newenvironment{desctable}[1]%
{\begin{inset}\renewcommand{\arraystretch}{1.5}%
\begin{tabular}{lp{#1}}}%
{\end{tabular}\end{inset}}
\def\*{{\ttt *}}
% Names of things
\newcommand{\keyword} [1]{\index{#1}{\normalfont\textsf{#1}}}
\newcommand{\evalto}{$\Longrightarrow$\ }
\renewcommand{\star}{$^*$\/}
\newcommand{\+}{$^+$}
% Semantic domains, used to indicate the type of a value
\newcommand{\sem}{\normalfont\itshape} %semantic font
\newcommand{\semvar}[1]{\textit{#1}} %semantic font
\newcommand{\synvar}[1]{\textrm{\textit{$<$#1$>$}}} %syntactic font
\newcommand{\type}{\sem}
\newcommand{\zeroormore}[1]{{\sem #1$_1$ \ldots #1$_n$}}
\newcommand{\oneormore}[1]{{\sem #1$_1$ #1$_2$ \ldots #1$_n$}}
\newcommand{\proc} {{\sem procedure}}
\newcommand{\boolean} {{\sem boolean}}
\newcommand{\true} {{\sem true}}
\newcommand{\false} {{\sem false}}
\newcommand{\num} {{\sem number}}
\newcommand{\fixnum} {{\sem fixnum}}
\newcommand{\integer} {{\sem integer}}
\newcommand{\real} {{\sem real}}
\newcommand{\character} {{\sem character}}
\newcommand{\str} {{\sem string}}
\newcommand{\sym} {{\sem symbol}}
\newcommand{\location} {{\sem location}}
\newcommand{\object} {{\sem object}}
\newcommand{\error} {{\sem error}}
\newcommand{\syntaxerror} {{\sem syntax error}}
\newcommand{\readerror} {{\sem read error}}
\newcommand{\undefined} {{\sem undefined}}
\newcommand{\noreturn} {{\sem no return value}}
\newcommand{\port} {{\sem port}}
% semantic variables
\newcommand{\identifier} {{\sem identifier}}
\newcommand{\identifiers} {\zeroormore{\<ident>}}
\newcommand{\expr} {{\sem expression}}
\newcommand{\body} {{\sem body}}
\newcommand{\valueofbody} {{\sem value~of~body}}
\newcommand{\emptylist} {{\sem empty~list}}
\newcommand{\car} {\keyword{car}}
\newcommand{\cdr} {\keyword{cdr}}
% generally useful things
% For line-breaking \tt stuff.
\renewcommand{\=}{\discretionary{-}{}{-}}
\newcommand{\ob}{\discretionary{}{}{}} % Optional break.
\newcommand{\indx}[1]{#1 \index{ #1 }}
%\newcommand{\gloss}[1]{#1 \glossary{ #1 }}
% This lossage produces #2 if #1 is zero length, otw #3.
% We use it to conditionally add a space between the procedure and
% the args in procedure prototypes, but only if there are any args--
% we want to produce "(read)", not "(read )".
\newlength{\voidlen}
\newcommand{\testvoid}[3]{\settowidth\voidlen{#1}\ifdim\voidlen>0in{#3}\else{#2}\fi}
% Typeset a definition prototype line, e.g.:
% (cons <arg1> <arg2>) -> pair procedure
%
% Five args are: proc-name args ret-value(s) type index-entry
\newcommand{\dfnix}[4]% FIVE args, really.
{\hbox to \linewidth{\ttchars%
{\ttt(#1\testvoid{#2}{}{\ }{\sem{#2}}\testvoid{#2}{}{\/})\hskip 1em minus
0.5em$\longrightarrow$\hskip 1em minus 0.5em{\sem{#3}}\hfill\quad\textnormal{#4}}}\index}
\newcommand{\dfnx}[4] {\dfnix{#1}{#2}{#3}{#4}{#1@\texttt{#1}}}
\newcommand{\dfn} {\par\medskip\dfnx} % Takes 4 args, actually.
\newcommand{\dfni} {\par\medskip\dfnix} % Takes 5 args, actually.
\newcommand{\defvar} {\par\medskip\defvarx} % Takes 4 args, actually.
\newcommand{\defvarx}[2]%
{\index{#1}
\hbox to \linewidth{\ttchars{{\ttt{#1}} \hfill #2}}}%
% Typeset the protocol line, then do the following descriptive text indented.
% If you want to group two procs together, do the first one with a \dfn,
% then the second one, and the documentation, with a \defndescx.
% This one doesn't put whitespace above. Use it immediately after a \dfn
% to group two prototype lines together.
\newenvironment{dfndescx}[4]%
{\dfnx{#1}{#2}{#3}{#4}\begin{desc}}{\end{desc}}
\newenvironment{dfndesc}[4] % This one puts whitespace above.
{\par\medskip\begin{dfndescx}{#1}{#2}{#3}{#4}}
{\end{dfndescx}}
\newenvironment{desc}%
{\nopagebreak[2]%
\smallskip
\bgroup\begin{list}{}{\topsep=0pt\parskip=0pt}\item[]}
{\end{list}\leavevmode\egroup\global\@ignoretrue}
\newcommand{\defun} [3] {\dfn{#1}{#2}{#3}{procedure}} % preskip
\newcommand{\defunx}[3]{\dfnx{#1}{#2}{#3}{procedure}} % no skip
\newenvironment{defundescx}[3]%
{\begin{dfndescx}{#1}{#2}{#3}{procedure}}
{\end{dfndescx}}
\newenvironment{defundesc}[3]%
{\begin{dfndesc}{#1}{#2}{#3}{procedure}}
{\end{dfndesc}}
\newenvironment{column}{\begin{tabular}[t]{@{}l@{}}}{\end{tabular}}
\newenvironment{exampletable}%
{\begin{leftinset}%
\newcommand{\header}[1]{\multicolumn{2}{@{}l@{}}{##1}\\}%
\newcommand{\splitline}[2]%
{\multicolumn{2}{@{}l@{}}{##1}\\\multicolumn{2}{@{}l@{}}{\qquad\evalto\quad{##2}}}
\begin{tabular}{@{}l@{\quad\evalto\quad}l@{}}}%
{\end{tabular}\end{leftinset}}
% Put on blank lines in a code env to allow a pagebreak.
\newcommand{\cb}{\pagebreak[0]}
\newenvironment{boxedcode}
{\begin{inset}\tabular{|l|}\hline}
{\\ \hline \end{tabular}\end{inset}}
% A ragged-right decl that doesn't redefine \\ -- for use in tables.
\newcommand{\raggedrightparbox}{\let\temp=\\\raggedright\let\\=\temp}
\newenvironment{boxedfigure}[1]%
{\begin{figure}[#1]\begin{boxedminipage}{\linewidth}\vskip 1.5ex}
{\end{boxedminipage}\end{figure}}
\makeatother

View File

@ -0,0 +1,76 @@
% Document style option "draftfooter"
% -- usage: \documentstyle[...,draftfooter,...]{...}
% -- puts "DRAFT" with date and time in page footer
%
% Olin Shivers 1/17/94
% - Hacked from code I used in my dissertation and from code in a
% drafthead.sty package written by Stephen Page sdpage@uk.ac.oxford.prg.
%----------------------------------------------------------------------------
%
% compute the time in hours and minutes; make new variables \timehh and \timemm
%
\newcount\timehh\newcount\timemm
\timehh=\time
\divide\timehh by 60 \timemm=\time
\count255=\timehh\multiply\count255 by -60 \advance\timemm by \count255
%
\def\draftbox{{\protect\small\bf \fbox{DRAFT}}}
\def\drafttime{%
{\protect\small\sl\today\ -- \ifnum\timehh<10 0\fi%
\number\timehh\,:\,\ifnum\timemm<10 0\fi\number\timemm}}
\def\drafttimer{\protect\makebox[0pt][r]{\drafttime}}
\def\drafttimel{\protect\makebox[0pt][l]{\drafttime}}
\def\thepagel{\protect\makebox[0pt][l]{\rm\thepage}}
\def\thepager{\protect\makebox[0pt][r]{\rm\thepage}}
% Header is empty.
% Footer is "date DRAFT pageno"
\def\ps@plain{
\let\@mkboth\@gobbletwo
\let\@oddhead\@empty \let\@evenhead\@empty
\def\@oddfoot{\reset@font\rm\drafttimel\hfil\draftbox\hfil\thepager}
\if@twoside
\def\@evenfoot{\reset@font\rm\thepagel\hfil\draftbox\hfil\drafttimer}
\else \let\@evenfoot\@oddfoot
\fi
}
% Aux macro -- sets footer to be "date DRAFT".
\def\@draftfooters{
\def\@oddfoot{\reset@font\rm\drafttimel\hfil\draftbox}
\if@twoside
\def\@evenfoot{\reset@font\rm\draftbox\hfil\drafttimer}
\else \let\@evenfoot\@oddfoot
\fi
}
% Header is empty.
% Footer is "date DRAFT".
\def\ps@empty{
\let\@mkboth\@gobbletwo
\let\@oddhead\@empty \let\@evenhead\@empty
\@draftfooters
}
% Header is defined by the document style (article, book, etc.).
% Footer is "date DRAFT".
\let\@draftoldhead\ps@headings
\def\ps@headings{
\@draftoldhead % Do the default \pagestyle{headings} stuff.
\@draftfooters % Then define the draft footers:
}
% Header is defined by the document style (article, book, etc.),
% and filled in by user's \markboth and \markright commands.
% Footer is "date DRAFT".
\let\@draftoldmyhead\ps@myheadings
\def\ps@myheadings{
\@draftoldmyhead % Do the default \pagestyle{myheadings} stuff.
\@draftfooters % Then define the draft footers:
}
\ps@plain

56
doc/scsh-manual/front.tex Normal file
View File

@ -0,0 +1,56 @@
%&latex -*- latex -*-
\title{Scsh Reference Manual}
\subtitle{For scsh release 0.5.3}
\author{Olin Shivers and Brian D.~Carlstrom}
\date{June 2001}
\maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Some code-changes for tex2page and latex output. NF
\texonly
\chapter*{Acknowledgements}
\endtexonly
\htmlonly
\\ \ex{Acknowledgements} \\ \\
\endhtmlonly
Who should I thank?
My so-called ``colleagues,'' who laugh at me behind my back,
all the while becoming famous on {\em my\/} work?
My worthless graduate students, whose computer skills appear to
be limited to downloading bitmaps off of netnews?
My parents, who are still waiting for me to quit ``fooling around with
computers,'' go to med school, and become a radiologist?
My department chairman, a manager who gives one new insight into
and sympathy for disgruntled postal workers?
My God, no one could blame me---no one!---if I went off the edge and just
lost it completely one day.
I couldn't get through the day as it is without the Prozac and Jack Daniels
I keep on the shelf, behind my Tops-20 JSYS manuals.
I start getting the shakes real bad around 10am, right before my
advisor meetings. A 10 oz.\ Jack 'n Zac helps me get through the
meetings without one of my students winding up with his severed head
in a bowling-ball bag. They look at me funny; they think I twitch a
lot. I'm not twitching. I'm controlling my impulse to snag my 9mm
Sig-Sauer out from my day-pack and make a few strong points about
the quality of undergraduate education in Amerika.
If I thought anyone cared, if I thought anyone would even be reading this,
I'd probably make an effort to keep up appearances until the last possible
moment. But no one does, and no one will. So I can pretty much say exactly
what I think.
Oh, yes, the {\em acknowledgements.}
I think not. I did it. I did it all, by myself.
\begin{flushright}
\begin{tabular}{l}
Olin Shivers \\
Cambridge \\
September 4, 1994
\end{tabular}
\end{flushright}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\tableofcontents

View File

@ -0,0 +1,16 @@
% headings.tex -*- latex -*-
% Quieter headings that the ones used in article.sty.
% This is not a style option. Don't say [headings].
% Instead, say \input{headings} after the \documentstyle.
% -Olin 7/91
\makeatletter
\def\section{\@startsection {section}{1}{\z@}{-3.5ex plus -1ex minus
-.2ex}{2.3ex plus .2ex}{\large\normalfont\bfseries}}
\def\subsection{\@startsection{subsection}{2}{\z@}{-3.25ex plus -1ex minus
-.2ex}{1.5ex plus .2ex}{\normalsize\normalfont\bfseries}}
\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-3.25ex plus
-1ex minus -.2ex}{1.5ex plus .2ex}{\normalsize\normalfont\bfseries}}
\makeatother

454
doc/scsh-manual/intro.tex Normal file
View File

@ -0,0 +1,454 @@
%&latex -*- latex -*-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\chapter{Introduction}
This is the reference manual for scsh,
a {\Unix} shell that is embedded within {\Scheme}.
Scsh is a Scheme system designed for writing useful standalone Unix
programs and shell scripts---it spans a wide range of application,
from ``script'' applications usually handled with perl or sh,
to more standard systems applications usually written in C.
Scsh comes built on top of {\scm}, and has two components:
a process notation for running programs and setting up pipelines
and redirections,
and a complete syscall library for low-level access to the operating system.
This manual gives a complete description of scsh.
A general discussion of the design principles behind scsh can be found
in a companion paper, ``A Scheme Shell.''
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Copyright \& source-code license}
Scsh is open source. The complete sources come with the standard
distribution, which can be downloaded off the net.
For years, scsh's underlying Scheme implementation, Scheme 48, did not have an
open-source copyright. However, around 1999/2000, the Scheme 48 authors
graciously retrofitted a BSD-style open-source copyright onto the system.
Swept up by the fervor, we tacked an ideologically hip license onto scsh
source, ourselves (BSD-style, as well). Not that we ever cared before what you
did with the system.
As a result, the whole system is now open source, top-to-bottom.
We note that the code is a rich source for other Scheme implementations
to mine. Not only the \emph{code}, but the \emph{APIs} are available
for implementors working on Scheme environments for systems programming.
These APIs represent years of work, and should provide a big head-start
on any related effort. (Just don't call it ``scsh,'' unless it's
\emph{exactly} compliant with the scsh interfaces.)
Take all the code you like; we'll just write more.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Obtaining scsh}
Scsh is distributed via net publication.
We place new releases at well-known network sites,
and allow them to propagate from there.
We currently release scsh to the following Internet sites:
\begin{inset}\begin{flushleft}
\ex{\urlh{ftp://ftp-swiss.ai.mit.edu/pub/su/}{ftp://ftp-swiss.ai.mit.edu/pub/su/}} \\
\ex{\urlh{http://www-swiss.ai.mit.edu/scsh/scsh.html}{http://www-swiss.ai.mit.edu/scsh/scsh.html}} \\
\ex{\urlh{http://www.cs.indiana.edu/scheme-repository/}{http://www.cs.indiana.edu/scheme-repository/}} \\
\end{flushleft}
\end{inset}
These sites are
the MIT Project Mac ftp server,
the Scheme Shell home page, and
the Indiana Scheme Repository home page,
respectively.
Each should have a compressed tar file of the entire scsh release,
which includes all the source code and the manual,
and a separate file containing just this manual in Postscript form,
for those who simply wish to read about the system.
However, nothing is certain for long on the Net.
Probably the best way to get a copy of scsh is to use a network
resource-discovery tool, such as archie,
to find ftp servers storing scsh tar files.
Take the set of sites storing the most recent release of scsh,
choose one close to your site, and download the tar file.
\section{Building scsh}
Scsh currently runs on a fairly large set of Unix systems, including
Linux, NetBSD, SunOS, Solaris, AIX, NeXTSTEP, Irix, and HP-UX.
We use the Gnu project's autoconfig tool to generate self-configuring
shell scripts that customise the scsh Makefile for different OS variants.
This means that if you use one of the common Unix implementations,
building scsh should require exactly the following steps:
\begin{inset}
\begin{tabular}{l@{\qquad}l}
\ex{gunzip scsh.tar.gz} & \emph{Uncompress the release tar file.} \\
\ex{untar xfv scsh.tar} & \emph{Unpack the source code.} \\
\ex{cd scsh-0.5} & \emph{Move to the source directory.} \\
\ex{./configure} & \emph{Examine host; build Makefile.} \\
\ex{make} & \emph{Build system.}
\end{tabular}
\end{inset}
When you are done, you should have a virtual machine compiled in
file \ex{scshvm}, and a heap image in file \ex{scsh/scsh.image}.
Typing
\begin{code}
make install
\end{code}
will install these programs in your installation directory
(by default, \ex{/usr/local}), along with a small stub startup
binary, \ex{scsh}.
If you don't have the patience to do this, you can start up
a Scheme shell immediately after the initial make by simply
saying
\codex{./scshvm -o ./scshvm -i scsh/scsh.image}
See chapter~\ref{chapt:running} for full details on installation
locations and startup options.
It is not too difficult to port scsh to another Unix platform if your
OS is not supported by the current release.
See the release notes for more details on how to do this.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Caveats}
It is important to note what scsh is \emph{not}, as well as what it is.
Scsh, in the current release, is primarily designed for the writing of
shell scripts---programming.
It is not a very comfortable system for interactive command use:
the current release lacks job control, command-line editing, a terse,
convenient command syntax, and it does not read in an initialisation
file analogous to \ex{.login} or \ex{.profile}.
We hope to address all of these issues in future releases;
we even have designs for several of these features;
but the system as-released does not currently provide these features.
In the current release, the system has some rough edges.
It is quite slow to start up---loading the initial image into the
{\scm} virtual machine induces a noticeable delay.
This can be fixed with the static heap linker provided with this release.
We welcome parties interested in porting the manual to a more portable
XML or SGML format; please contact us if you are interested in doing so.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Naming conventions}
Scsh follows a general naming scheme that consistently employs a set of
abbreviations.
This is intended to make it easier to remember the names of things.
Some of the common ones are:
\begin{description}
\item [\ex{fdes}]
Means ``file descriptor,'' a small integer used in {\Unix}
to represent I/O channels.
\item [\ex{\ldots*}]
A given bit of functionality sometimes comes in two related forms,
the first being a \emph{special form} that contains a body of
{\Scheme} code to be executed in some context,
and the other being a \emph{procedure} that takes a procedural
argument (a ``thunk'') to be called in the same context.
The procedure variant is named by taking the name of the special form,
and appending an asterisk. For example:
\begin{code}
;;; Special form:
(with-cwd "/etc"
(for-each print-file (directory-files))
(display "All done"))
;;; Procedure:
(with-cwd* "/etc"
(lambda ()
(for-each print-file (directory-files))
(display "All done")))\end{code}
\item [\ex{\var{action}/\var{modifier}}]
The infix ``\ex{/}'' is pronounced ``with,'' as in
\ex{exec/env}---``exec with environment.''
\item [\ex{call/\ldots}]
Procedures that call their argument on some computed value
are usually named ``\ex{call/\ldots},'' \eg,
\ex{(call/fdes \var{port} \var{proc})}, which calls \var{proc}
on \var{port}'s file descriptor, returning whatever \var{proc}
returns. The abbreviated name means ``call with file descriptor.''
\item [\ex{with-\ldots}]
Procedures that call their argument, and special forms that execute
their bodies in some special dynamic context frequently have
names of the form \ex{with-\ldots}. For example,
\ex{(with-env \var{env} \vari{body}1 \ldots)} and
\ex{(with-env* \var{env} \var{thunk})}. These forms set
the process environment body, execute their body or thunk,
and then return after resetting the environment to its original
state.
\item[\ex{create-}]
Procedures that create objects in the file system (files, directories,
temp files, fifos, \etc), begin with \ex{create-\ldots}.
\item [\ex{delete-}]
Procedures that delete objects from the file system (files,
directories, temp files, fifos, \etc), begin with \ex{delete-\ldots}.
\item[ \ex{\var{record}:\var{field}} ]
Procedures that access fields of a record are usually written
with a colon between the name of the record and the name of the
field, as in \ex{user-info:home-dir}.
\item[\ex{\%\ldots}]
A percent sign is used to prefix lower-level scsh primitives
that are not commonly used.
\item[\ex{-info}]
Data structures packaging up information about various OS
entities frequently end in \ldots\ex{-info}. Examples:
\ex{user-info}, \ex{file-info}, \ex{group-info}, and \ex{host-info}.
\end{description}
%
Enumerated constants from some set \var{s} are usually named
\ex{\var{s}/\vari{const}1}, \ex{\var{s}/\vari{const}2}, \ldots.
For example, the various {\Unix} signal integers have the names
\ex{signal/cont}, \ex{signal/kill}, \ex{signal/int}, \ex{signal/hup},
and so forth.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Lexical issues}
Scsh's lexical syntax is just {\R4RS} {\Scheme}, with the following
exceptions.
\subsection{Extended symbol syntax}
Scsh's symbol syntax differs from {\R4RS} {\Scheme} in the following ways:
\begin{itemize}
\item In scsh, symbol case is preserved by \ex{read} and is significant on
symbol comparison. This means
\codex{(run (less Readme))}
displays the right file.
\item ``\ex{-}'' and ``\ex{+}'' are allowed to begin symbols.
So the following are legitimate symbols:
\codex{-O2 -geometry +Wn}
\item ``\ex{|}'' and ``\ex{.}'' are symbol constituents.
This allows \ex{|} for the pipe symbol, and \ex{..} for the parent-directory
symbol. (Of course, ``\ex{.}'' alone is not a symbol, but a
dotted-pair marker.)
\item A symbol may begin with a digit.
So the following are legitimate symbols:
\codex{9x15 80x36-3+440}
\end{itemize}
\subsection{Extended string syntax}
Scsh strings are allowed to contain the {\Ansi} C escape sequences
such as \verb|\n| and \verb|\161|.
\subsection{Block comments and executable interpreter-triggers}
Scsh allows source files to begin with a header of the form
\codex{\#!/usr/local/bin/scsh -s}
The Unix operating system treats source files beginning with the headers
of this form specially;
they can be directly executed by the operating system
(see chapter~\ref{chapt:running} for information on how to use this feature).
The scsh interpreter ignores this special header by treating \ex{\#!} as a
comment marker similar to \ex{;}.
When the scsh reader encounters \ex{\#!}, it skips characters until it finds
the closing sequence
new\-line/{\ob}ex\-cla\-ma\-tion-{\ob}point/{\ob}sharp-{\ob}sign/{\ob}new\-line.
Although the form of the \ex{\#!} read-macro was chosen to support
interpreter-triggers for executable Unix scripts,
it is a general block-comment sequence and can be used as such
anywhere in a scsh program.
\subsection{Here-strings}
The read macro \ex{\#<} is used to introduce ``here-strings''
in programs, similar to the \ex{<<} ``here document'' redirections
provided by sh and csh.
There are two kinds of here-string, character-delimited and line-delimited;
they are both introduced by the \ex{\#<} sequence.
\subsubsection{Character-delimited here-strings}
A \emph{character-delimited} here-string has the form
\codex{\#<\emph{x}...stuff...\emph{x}}
where \emph{x} is any single character
(except \ex{<}, see below),
which is used to delimit the string bounds.
Some examples:
\begin{inset}
\begin{tabular}{ll}
Here-string syntax & Ordinary string syntax \\ \hline
\verb:#<|Hello, world.|: & \verb:"Hello, world.": \\
\verb:#<!"Ouch," he said.!: & \verb:"\"Ouch,\" he said.":
\end{tabular}
\end{inset}
%
There is no interpretation of characters within the here-string;
the characters are all copied verbatim.
\subsubsection{Line-delimited here-strings}
If the sequence begins "\ex{\#<<}" then it introduces a \emph{line-delimited}
here-string.
These are similar to the ``here documents'' of sh and csh.
Line-delimited here-strings are delimited by the rest of the text line that
follows the "\ex{\#<<}" sequence.
For example:
\begin{code}
#<<FOO
Hello, there.
This is read by Scheme as a string,
terminated by the first occurrence
of newline-F-O-O-newline or newline-F-O-O-eof.
FOO\end{code}
%
Thus,
\begin{code}
#<<foo
Hello, world.
foo\end{code}
%
is the same thing as
\codex{"Hello, world."}
Line-delimited here-strings are useful for writing down long, constant
strings---such as long, multi-line \ex{format} strings,
or arguments to Unix programs, \eg,
\begin{code}
;; Free up some disk space for my netnews files.
(run (csh -c #<<EOF
cd /urops
rm -rf *
echo All done.
EOF
))\end{code}
The advantage they have over the double-quote syntax
(\eg, \ex{"Hello, world."})
is that there is no need to backslash-quote special characters internal
to the string, such as the double-quote or backslash characters.
The detailed syntax of line-delimited here-strings is as follows.
The characters "\ex{\#<<}" begin the here-string.
The characters between the "\ex{\#<<}" and the next newline are the
\emph{delimiter line}.
All characters between the "\ex{\#<<}" and the next newline comprise the
delimiter line---including any white space.
The body of the string begins on the following line,
and is terminated by a line of text which exactly matches the
delimiter line.
This terminating line can be ended by either a newline or end-of-file.
Absolutely no interpretation is done on the input string.
Control characters, white space, quotes, backslash---everything
is copied as-is.
The newline immediately preceding the terminating delimiter line is
not included in the result string
(leave an extra blank line if you need to put a final
newline in the here-string---see the example above).
If EOF is encountered before reading the end of the here-string,
an error is signalled.
\subsection{Dot}
It is unfortunate that the single-dot token, ``\ex{.}'', is both
a fundamental {\Unix} file name and a deep, primitive syntactic token
in {\Scheme}---it means the following will not parse correctly in scsh:
\codex{(run/strings (find . -name *.c -print))}
You must instead quote the dot:
\codex{(run/strings (find "." -name *.c -print))}
When you write shell scripts that manipulate the file system,
keep in mind the special status of the dot token.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Record types and the \texttt{define-record} form}
\label{sec:defrec}
\index{define-record@\texttt{define-record}}
Scsh's interfaces occasionally provide data in structured record types;
an example is the \ex{file-info} record whose various fields describe the size,
protection, last date of modification, and other pertinent data for a
particular file.
These record types are described in this manual using the \ex{define-record}
notation, which looks like the following:
%
\begin{code}
(define-record ship
x
y
(size 100))\end{code}
%
This form defines a \var{ship} record, with three fields:
its x and y coordinates, and its size.
The values of the \var{x} and \var{y} fields are specified as parameters
to the ship-building procedure, \ex{(make-ship \var{x} \var{y})},
and the \var{size} field is initialised to 100.
All told, the \ex{define-record} form above defines the following procedures:
%
\begin{center}
\begin{tabular}{|ll|}
\multicolumn{1}{l}{Procedure} & \multicolumn{1}{l}{Definition} \\
\hline
(make-ship \var{x} \var{y}) & Create a new \var{ship} record. \\
\hline
(ship:x \var{ship}) & Retrieve the \var{x} field. \\
(ship:y \var{ship}) & Retrieve the \var{y} field. \\
(ship:size \var{ship}) & Retrieve the \var{size} field. \\
\hline
(set-ship:x \var{ship} \var{new-x}) & Assign the \var{x} field. \\
(set-ship:y \var{ship} \var{new-y}) & Assign the \var{y} field. \\
(set-ship:size \var{ship} \var{new-size}) & Assign the \var{size} field. \\
\hline
(modify-ship:x \var{ship} \var{xfun}) & Modify \var{x} field with \var{xfun}. \\
(modify-ship:y \var{ship} \var{yfun}) & Modify \var{y} field with \var{yfun}. \\
(modify-ship:size \var{ship} \var{sizefun}) & Modify \var{size} field with \var{sizefun}. \\
\hline
(ship? \var{object}) & Type predicate. \\
\hline
(copy-ship \var{ship}) & Shallow-copy of the record. \\
\hline
\end{tabular}
\end{center}
%
An implementation of \ex{define-record} is available as a macro for Scheme
programmers to define their own record types;
the syntax is accessed by opening the package \ex{defrec-package}, which
exports the single syntax form \ex{define-record}.
See the source code for the \ex{defrec-package} module
for further details of the macro.
You must open this package to access the form.
Scsh does not export a record-definition package by default as there are
several from which to choose.
Besides the \ex{define-record} macro, which Shivers prefers\footnote{He wrote
it.}, you might instead wish to employ the notationally-distinct
\ex{define-record-type} macro that Jonathan Rees
prefers,\footnote{He wrote it.}
or the identically named but wholly different \ex{define-record-type}
macro that Richard Kelsey prefers.\footnote{He wrote it.}
The former can be found in file \ex{rts/jar-defrecord.scm} and package
\ex{define-record-types}; the latter can be found in file
\ex{big/defrecord.scm} and package \ex{defrecord}.
Alternatively, you may define your own, of course.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{A word about {\Unix} standards}
``The wonderful thing about {\Unix} standards is that there are so many
to choose from.''
You may be totally bewildered about the multitude of various standards that
exist.
Rest assured that this nowhere in this manual will you encounter an attempt
to spell it all out for you;
you could not read and internalise such a twisted account without
bleeding from the nose and ears.
However, you might keep in mind the following simple fact: of all the
standards, {\Posix} is the least common denominator.
So when this manual repeatedly refers to {\Posix}, the point is ``the
thing we are describing should be portable just about anywhere.''
Scsh sticks to {\Posix} when at all possible; its major departure is
symbolic links, which aren't in {\Posix} (see---it
really \emph{is} a least common denominator).

126
doc/scsh-manual/man.t2p Normal file
View File

@ -0,0 +1,126 @@
% man.t2p
% Dorai Sitaram
% Feb 6, 2000
% This file contains the tex2page macros needed to process
% the scsh LaTeX document scsh-n.n.n/doc/scsh-manual/man.tex.
% Copy (or link) this file alongside man.tex and run
%
% tex2page man
\input css.t2p
\dontuseimgforhtmlmath
\let\pagebreak\relax
\let\small\relax
%\let\PRIMtableofcontents\tableofcontents
%\def\tableofcontents{\chapter*{Contents}\PRIMtableofcontents}
\def\subtitle#1{\def\savesubtitle{#1}}
\def\maketitle{
\subject{\TIIPtitle}
{\bf \hr}
\rightline{\savesubtitle}
\bigskip\bigskip
\bigskip\bigskip
{\bf\TIIPauthor}
{\bf\hr}
}
\let\PRIMdocument\document
\def\document{\PRIMdocument
\let\ttchars\relax
\let\ttt\tt
%\def\~{\rawhtml~\endrawhtml}
\def\~{\char`\~}
\def\cd#1{{\tt\def\\{\char`\\}\defcsactive\${\char`\$}\defcsactive\&{\char`\&}#1}}
\def\cddollar{\undefcsactive\$}
\def\cdmath{\undefcsactive\$}
\def\codeallowbreaks{\relax}
\def\defvarx#1#2{\index{#1}\leftline{{\tt #1} \qquad #2}}
\let\PRIMflushright\flushright
\def\flushright{\PRIMflushright\TIIPtabularborder=0 }
\let\PRIMfigure\figure
\let\PRIMendfigure\endfigure
\def\figure{\par\hrule\PRIMfigure}
\def\endfigure{\PRIMendfigure\hrule\par}
\let\PRIMtable\table
\let\PRIMendtable\endtable
\def\table{\par\hrule\PRIMtable}
\def\endtable{\PRIMendtable\hrule\par}
\imgdef\vdots{\bf.\par.\par.}
\evalh{
(define all-blanks?
(lambda (s)
(andmap
char-whitespace?
(string->list s))))
}
\def\spaceifnotempty{\evalh{
(let ((x (ungroup (get-token))))
(unless (all-blanks? x)
(emit #\space)))
}}
\def\dfnix#1#2#3#4{\leftline{{\tt(#1\spaceifnotempty{#2}{\it#2})} \quad $\longrightarrow$ \quad {\it #3} \qquad (#4)} \index}
\def\ex#1{{\tt #1}}
\def\l#1{lambda (#1)}
\def\lx#1{lambda {#1}}
%\def\notenum#1{}
%\def\project#1{}
\def\var#1{{\it #1\/}}
\def\vari#1#2{\mbox{{\it #1\/}\undefcsactive\$$_{#2}$}}
\renewenvironment{boxedfigure}{\def\srecomment#1{\\#1\\}%
\begin{figure}\pagestyle}{\end{figure}}
\newenvironment{centercode}{\begin{code}}{\end{code}}
\def\setupcode{\tt%
\def\\{\char`\\}%
\defcsactive\${\$}%
\def\evalto{==> }%
\defcsactive\%{\%}\obeywhitespace}
\newenvironment{code}{\begin{quote}\bgroup\setupcode\GOBBLEOPTARG}
{\egroup\end{quote}}
\newenvironment{codebox}{\begin{tableplain}\bgroup\setupcode\GOBBLEOPTARG}
{\egroup\end{tableplain}}
\renewenvironment{desc}{\begin{quote}}{\end{quote}}
\renewenvironment{exampletable}{%
\def\header#1{\\\leftline{#1}\\}%
\def\splitline#1#2{\\\leftline{#1}\\\leftline{#2}}%
\begin{tabular}{}}{\end{tabular}}
\newenvironment{tightcode}{\begin{code}}{\end{code}}
\renewenvironment{widecode}{\begin{code}}{\end{code}}
\renewenvironment{inset}{\begin{quote}}{\end{quote}}
\renewenvironment{leftinset}{\begin{quote}}{\end{quote}}
\renewenvironment{tightinset}{\begin{quote}}{\end{quote}}
\renewenvironment{tightleftinset}{\begin{quote}}{\end{quote}}
}

65
doc/scsh-manual/man.tex Normal file
View File

@ -0,0 +1,65 @@
% -*- latex -*-
% This is the reference manual for the Scheme Shell.
\documentclass[twoside]{report}
\usepackage{code,boxedminipage,makeidx,palatino,ct,
headings,mantitle,array,matter,mysize10,tex2page}
\texonly
\usepackage[dvipdfm,hyperindex,hypertex,
pdftitle={scsh manual, release 0.5.3},
pdfauthor={Olin Shivers and Brian D.~Carlstrom}
colorlinks=true,linkcolor=blue,pagecolor=blue,urlcolor=blue,
pdfstartview=FitH,pdfview=FitH]{hyperref}
\endtexonly
% These fonts are good choices for screen-readable pdf, but the man needs
% a pass over the layout, since the this tt font will blow out the width
% of some examples, making them wrap and generally screwing them up. Maybe this
% should also be a LaTeX option, so we can get palatino on the hardcopy
% runs and these fonts on pdf runs...
%\renewcommand{\rmdefault}{phv}
%\renewcommand{\sfdefault}{phv}
%\renewcommand{\ttdefault}{pcr}
% Style issues
\parskip = 3pt plus 3pt
\sloppy
%\includeonly{syscalls}
\input{decls}
\makeindex
%%% End preamble
\begin{document}
\frontmatter
\include{front}
\mainmatter
\include{intro}
\include{procnotation}
\include{syscalls}
\include{network}
\include{strings}
\include{sre}
\include{rdelim}
\include{awk}
\include{miscprocs}
\include{running}
\include{todo}
\backmatter
\printindex
\end{document}
% General things to do when converting ASCII text to LaTeX:
% Build a set of \breakondash, \breakondot, \breakonslash commands
% that will enable breaking in \tt. This is better than \=, etc.
%
% Check for ..., quote char, double-dashes --
% Double-word check
% lambda -> \l

View File

@ -0,0 +1,76 @@
% This is the title page style stolen from the Texinfo design,
% and expressed as a LaTeX style option. It is useful for manuals.
%
% Note that I play some *really* revolting games here to override
% the vertical and horizontal margins temporarily for the title page.
% The layout assumes you have 8.5" x 11" paper. You'd have to redo this
% for A4 or another size.
% -Olin 7/94
% Fonts for title page:
\DeclareFixedFont{\titlefont}%
{\encodingdefault}{\familydefault}{bx}{\shapedefault}{20.5pt}
\DeclareFixedFont{\authorfnt}%
{\encodingdefault}{\familydefault}{bx}{\shapedefault}{14.4pt}
\DeclareFixedFont{\subtitlefnt}%
{\encodingdefault}{\familydefault}{m}{\shapedefault}{11}
%\def\authorrm{\normalfont\selectfont\fontseries{bx}\fontsize{14.4}{14.4}}
%\def\subtitlefnt{\normalfont\selectfont\fontsize{11}{11}}
\newskip\titlepagetopglue \titlepagetopglue = 2.5in
\newlength{\widewidth}
\setlength{\widewidth}{6.5in}
\newlength{\negwidemargin}
\setlength{\negwidemargin}{-\oddsidemargin} % Reset the margin
\addtolength{\negwidemargin}{-1in} % to edge of page
\addtolength{\negwidemargin}{1in} % Then move right one inch.
%\def\wideline#1{\hbox to 0pt{\hspace\negwidemargin\hbox to\widewidth{#1}}}
\def\wideline#1{\hbox{\makebox[0pt][l]{\hspace\negwidemargin\hbox to\widewidth{#1}}}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\def\maketitle{\begin{titlepage}
\thispagestyle{empty}
\let\footnotesize\small \let\footnoterule\relax
\null
\parindent=0pt
\def\subtitlefont{\normalbaselineskip = 13pt \normalbaselines \subtitlefnt}%
\def\authorfont{\normalbaselineskip = 16pt \normalbaselines \authorfnt}%
%
% Leave some space at the very top of the page.
\vspace*{-1in}\vspace*{-\topmargin}\vspace*{-\headheight}\vspace*{-\headsep}
\vglue\titlepagetopglue
%
\wideline{\titlefont \@title \hfill} % title
% \vskip4pt
\vskip -0.3\baselineskip
\wideline{\leaders\hrule height 4pt\hfill}
\wideline{\hfill\subtitlefont\begin{tabular}[t]{@{}r@{}}\@subtitle%
\\\@date%
\end{tabular}} % subtitle
%
% author
\vskip 0pt plus 1filll
\wideline{\authorfont \begin{tabular}[t]{@{}c@{}}\@author
\end{tabular}\hfill}
%
% \vskip4pt
\vskip -0.3\baselineskip
\wideline{\leaders\hrule height 2pt\hfill}
% This weirdness puts the bottom line 2.75 in from the bottom of
% an 11in page.
\vskip \textheight \vskip \headsep \vskip \headheight
\vskip \topmargin \vskip 1in \vskip -11in \vskip 2.75in
\gdef\@author{}\gdef\@title{}\gdef\@subtitle{}\let\maketitle\relax
\end{titlepage}
\setcounter{page}{2}
}
\def\subtitle#1{\gdef\@subtitle{#1}}
\def\@subtitle{}

View File

@ -0,0 +1,16 @@
%&latex -*- latex -*-
% Implement the \frontmatter, \mainmatter, and \backmatter macros,
% so I can use them in reports, not just books.
\newif\if@mainmatter \@mainmattertrue
\newcommand\frontmatter{%
\cleardoublepage\@mainmatterfalse\pagenumbering{roman}}
\newcommand\mainmatter{%
\cleardoublepage\@mainmattertrue%
\pagenumbering{arabic}\setcounter{page}{1}}
\newcommand\backmatter{%
\if@openright\cleardoublepage\else\clearpage\fi%
\@mainmatterfalse}

View File

@ -0,0 +1,45 @@
%&latex -*- latex -*-
\chapter{Miscellaneous routines}
\section{Integer bitwise ops}
\label{sec:bitwise}
\defun{arithmetic-shift} {i j} \integer
\defunx {bitwise-and} {i j} \integer
\defunx {bitwise-ior} {i j} \integer
\defunx {bitwise-not} {i} \integer
\defunx {bitwise-xor} {i j} \integer
\begin{desc}
These operations operate on integers representing semi-infinite
bit strings, using a 2's-complement encoding.
\ex{arithmetic-shift} shifts \var{i} by \var{j} bits.
A left shift is $j > 0$; a right shift is $j < 0$.
\end{desc}
\section{List procedures}
\defun{nth}{list i}\object
\begin{desc}
Returns the $i^{\mathrm th}$ element of \var{list}.
The first element (the car) is \ex{(nth \var{list} 0)},
the second element is \ex{(nth \var{list} 1)}, and so on.
This procedure is provided as it is useful for accessing elements
from the lists returned by the field-readers (chapter~\ref{chapt:fr-awk}).
\end{desc}
\section{Top level}
\defun{repl}{}\undefined
\begin{desc}
This runs a {\scm} read-eval-print loop,
reading forms from the current input port,
and writing their values to the current output port.
If you wish to try something dangerous,
and want to be able to recover your shell state, you can
fork off a subshell with the following form:
\codex{(run (begin (repl)))}
{\ldots}or, rephrased for the proceduralists:
\codex{(wait (fork repl))}
\end{desc}

View File

@ -0,0 +1,22 @@
%&latex -*- latex -*-
\if@twoside
\oddsidemargin 44pt
\evensidemargin 82pt
\marginparwidth 107pt
\else
\oddsidemargin 63pt
\evensidemargin 63pt
\marginparwidth 90pt
\fi
\marginparsep 11pt
\topmargin 27pt
\headheight 12pt
\headsep 25pt
\topskip = 10pt
\footskip 30pt
\textheight = 43\baselineskip
\advance\textheight by \topskip
\textwidth 345pt
\endinput

426
doc/scsh-manual/network.tex Normal file
View File

@ -0,0 +1,426 @@
%&latex -*- latex -*-
\chapter{Networking}
The Scheme Shell provides a BSD-style sockets interface.
There is not an official standard for a network interface
for scsh to adopt (this is the subject of the forthcoming Posix.8
standard).
However, Berkeley sockets are a \emph{de facto} standard,
being found on most Unix workstations and PC operating systems.
It is fairly straightforward to add higher-level network protocols
such as smtp, telnet, or http on top of the the basic socket-level
support scsh provides.
The Scheme Underground has also released a network library with
many of these protocols as a companion to the current release of scsh.
See this code for examples showing the use of the sockets interface.
\section{High-level interface}
For convenience, and to avoid some of the messy details of the socket
interface, we provide a high level socket interface. These routines
attempt to make it easy to write simple clients and servers without
having to think of many of the details of initiating socket connections.
We welcome suggested improvements to this interface, including better
names, which right now are solely descriptions of the procedure's action.
This might be fine for people who already understand sockets,
but does not help the new networking programmer.
\defun {socket-connect} {protocol-family socket-type . args} {socket}
\begin{desc}
\ex{socket-connect} is intended for creating client applications.
\var{protocol-family} is specified as either the
\ex{protocol-family/internet} or \ex{protocol-family/unix}.
\var{socket-type} is specified as either \ex{socket-type/stream} or
\ex{socket-type/datagram}. See \ex{socket} for a more complete
description of these terms.
The variable \var{args} list is meant to specify protocol family
specific information. For Internet sockets, this consists of two
arguments: a host name and a port number. For {\Unix} sockets, this
consists of a pathname.
\ex{socket-connect} returns a \ex{socket} which can be used for input
and output from a remote server. See \ex{socket} for a description of
the \emph{socket record}.
\end{desc}
\defun {bind-listen-accept-loop} {protocol-family proc arg} {does-not-return}
\begin{desc}
\ex{bind-listen-accept-loop} is intended for creating server
applications. \var{protocol-family} is specified as either the
\ex{protocol-family/internet} or \ex{protocol-family/unix}.
\var{proc} is a procedure of two arguments: a \ex{socket} and a
{socket-address}. \var{arg} specifies a port number for Internet sockets
or a pathname for {\Unix} sockets. See \ex{socket} for a more complete
description of these terms.
\var{proc} is called with a socket and a socket address each time there
is a connection from a client application. The socket allows
communications with the client. The socket address specifies the
address of the remote client.
This procedure does not return, but loops indefinitely accepting
connections from client programs.
\end{desc}
\section{Sockets}
\defun {create-socket} {protocol-family type [protocol]} {socket}
\defunx {create-socket-pair} {type} {[socket$_{1}$ socket$_{2}$]}
\defunx {close-socket} {socket} \undefined
\begin{desc}
A socket is one end of a network connection. Three specific properties
of sockets are specified at creation time: the protocol-family, type,
and protocol.
The \var{protocol-family} specifies the protocol family to be used with
the socket. This also determines the address family of socket addresses,
which are described in more detail below. Scsh currently supports the
{\Unix} internal protocols and the Internet protocols using the
following constants:
\begin{code}\codeallowbreaks
protocol-family/unspecified
protocol-family/unix
protocol-family/internet\end{code}
The \var{type} specifies the style of communication. Examples that your
operating system probably provides are stream and datagram sockets.
Others maybe available depending on your system. Typical values are:
\begin{code}\codeallowbreaks
socket-type/stream
socket-type/datagram
socket-type/raw\end{code}
The \var{protocol} specifies a particular protocol to use within a
protocol family and type. Usually only one choice exists, but it's
probably safest to set this explicitly. See the protocol database
routines for information on looking up protocol constants.
New sockets are typically created with \ex{create-socket}. However,
\ex{create-socket-pair} can also be used to create a pair of connected
sockets in the \ex{protocol-family/unix} protocol-family. The value of a
returned socket is a \emph{socket record}, defined to have the following
structure:
\begin{code}
(define-record socket
family ; protocol family
inport ; input-port
outport) ; output-port\end{code}
The \ex{family} specifies the protocol family of the socket. The
\ex{inport} and \ex{outport} fields are ports that can be used for input
and output, respectively. For a stream socket, they are only usable
after a connection has been established via \ex{connect-socket} or
\ex{accept-connection}. For a datagram socket, \var{outport} can be
immediately using \ex{send-message}, and \var{inport} can be used after
\ex{bind} has created a local address.
\ex{close-socket} provides a convenient way to close a socket's port. It
is preferred to explicitly closing the inport and outport because using
\ex{close} on sockets is not currently portable across operating systems.
\end{desc}
\section{Socket addresses}
The format of a socket-address depends on the address family of the
socket. Address-family-specific routines are provided to convert
protocol-specific addresses to socket addresses. The value returned by
these routines is a \emph{socket-address record}, defined to have the
following visible structure:
\begin{code}
(define-record socket-address
family) ; address family\end{code}
The \ex{family} is one of the following constants:
\begin{code}\codeallowbreaks
address-family/unspecified
address-family/unix
address-family/internet\end{code}
\defun {unix-address->socket-address} {pathname} {socket-address}
\begin{desc}
\ex{unix-address->socket-address} returns a \var{socket-address} based
on the string \var{pathname}. There is a system dependent limit on the
length of \var{pathname}.
\end{desc}
\defun {internet-address->socket-address} {host-address service-port} {socket-address}
\begin{desc}
\ex{internet-address->socket-address} returns a \var{socket-address} based
on an integer \var{host-address} and an integer \var{service-port}.
Besides being a 32-bit host address, an Internet host address can also
be one of the following constants:
\begin{code}\codeallowbreaks
internet-address/any
internet-address/loopback
internet-address/broadcast\end{code}
The use of \ex{internet-address/any} is described below in
\ex{bind-socket}. \ex{internet-address/loopback} is an address that
always specifies the local machine. \ex{internet-address/broadcast} is
used for network broadcast communications.
For information on obtaining a host's address, see the \ex{host-info}
function.
\end{desc}
\defun {socket-address->unix-address} {socket-address} {pathname}
\defunx {socket-address->internet-address} {socket-address} {[host-address service-port]}
\begin{desc}
The routines \ex{socket-address->internet-address} and
\ex{socket-address->unix-address} return the address-family-specific addresses.
Be aware that most implementations don't correctly return anything more
than an empty string for addresses in the {\Unix} address-family.
\end{desc}
\section{Socket primitives}
The procedures in this section are presented in the order in which a
typical program will use them. Consult a text on network systems
programming for more information on sockets.\footnote{
Some recommended ones are:
\begin{itemize}
\item ``Unix Network Programming'' by W. Richard Stevens
\item ``An Introductory 4.3BSD Interprocess Communication Tutorial.''
(reprinted in UNIX Programmer's Supplementary Documents Volume 1, PS1:7)
\item ``An Advanced 4.3BSD Interprocess Communication Tutorial.''
(reprinted in UNIX Programmer's Supplementary Documents Volume 1, PS1:8)
\end{itemize}
}
The last two tutorials are freely available as part of BSD. In the
absence of these, your {\Unix} manual pages for socket might be a good
starting point for information.
\defun {connect-socket} {socket socket-address} \undefined
\begin{desc}
\ex{connect-socket} sets up a connection from a \var{socket}
to a remote \var{socket-address}. A connection has different meanings
depending on the socket type. A stream socket must be connected before
use. A datagram socket can be connected multiple times, but need not be
connected at all if the remote address is specified with each
\ex{send-message}, described below. Also, datagram sockets
may be disassociated from a remote address by connecting to a null
remote address.
\end{desc}
\defun {bind-socket} {socket socket-address} \undefined
\begin{desc}
\ex{bind-socket} assigns a certain local \var{socket-address} to a
\var{socket}. Binding a socket reserves the local address. To receive
connections after binding the socket, use \ex{listen-socket} for stream
sockets and \ex{receive-message} for datagram sockets.
Binding an Internet socket with a host address of
\ex{internet-address/any} indicates that the caller does
not care to specify from which local network interface connections are
received. Binding an Internet socket with a service port number of zero
indicates that the caller has no preference as to the port number
assigned.
Binding a socket in the {\Unix} address family creates a socket special
file in the file system that must be deleted before the address can be
reused. See \ex{delete-file}.
\end{desc}
\defun {listen-socket} {socket backlog} \undefined
\begin{desc}
\ex{listen-socket} allows a stream \var{socket} to start receiving connections,
allowing a queue of up to \var{backlog} connection requests. Queued
connections may be accepted by \ex{accept-connection}.
\end{desc}
\defun {accept-connection} {socket} {[new-socket socket-address]}
\begin{desc}
\ex{accept-connection} receives a connection on a \var{socket}, returning
a new socket that can be used for this connection and the remote socket
address associated with the connection.
\end{desc}
\defun {socket-local-address} {socket} {socket-address}
\defunx {socket-remote-address} {socket} {socket-address}
\begin{desc}
Sockets can be associated with a local address or a remote address or
both. \ex{socket-local-address} returns the local \var{socket-address}
record associated with \var{socket}. \ex{socket-remote-address} returns
the remote \var{socket-address} record associated with \var{socket}.
\end{desc}
\defun {shutdown-socket} {socket how-to} \undefined
\begin{desc}
\ex{shutdown-socket} shuts down part of a full-duplex socket.
The method of shutting done is specified by the \var{how-to} argument,
one of:
\begin{code}\codeallowbreaks
shutdown/receives
shutdown/sends
shutdown/sends+receives\end{code}
\end{desc}
\section{Performing input and output on sockets}
\defun {receive-message} {socket length [flags]} {[string-or-\sharpf socket-address]}
\dfnix {receive-message!} {socket string [start] [end] [flags]}
{[count-or-\sharpf socket-address]}{procedure}
{receive-message"!@\texttt{receive-message"!}}
\defunx {receive-message/partial} {socket length [flags]}
{[string-or-\sharpf socket-address]}
\dfnix {receive-message!/partial} {socket string [start] [end] [flags]}
{[count-or-\sharpf socket-address]}{procedure}
{receive-message"!/partial@\texttt{receive-message"!/partial}}
\defun {send-message} {socket string [start] [end] [flags] [socket-address]}
\undefined
\defunx {send-message/partial}
{socket string [start] [end] [flags] [socket-address]} {count}
\begin{desc}
For most uses, standard input and output routines such as
\ex{read-string} and \ex{write-string} should suffice. However, in some
cases an extended interface is required. The \ex{receive-message} and
\ex{send-message} calls parallel the \ex{read-string} and
\ex{write-string} calls with a similar naming scheme.
One additional feature of these routines is that \ex{receive-message}
returns the remote \var{socket-address} and \var{send-message} takes an
optional remote
\ex{socket-address}. This allows a program to know the source of input
from a datagram socket and to use a datagram socket for output without
first connecting it.
All of these procedures take an optional \var{flags} field. This
argument is an integer bit-mask, composed by or'ing together the
following constants:
\begin{code}\codeallowbreaks
message/out-of-band
message/peek
message/dont-route\end{code}
See \ex{read-string} and \ex{write-string} for a more detailed
description of the arguments and return values.
\end{desc}
\section{Socket options}
\defun {socket-option} {socket level option} {value}
\defunx {set-socket-option} {socket level option value} \undefined
\begin{desc}
\ex{socket-option} and \ex{set-socket-option} allow the inspection and
modification, respectively, of several options available on sockets. The
\var{level} argument specifies what protocol level is to be examined or
affected. A level of \ex{level/socket} specifies the highest possible
level that is available on all socket types. A specific protocol number
can also be used as provided by \ex{protocol-info}, described below.
There are several different classes of socket options. The first class
consists of boolean options which can be either true or false. Examples
of this option type are:
\begin{code}\codeallowbreaks
socket/debug
socket/accept-connect
socket/reuse-address
socket/keep-alive
socket/dont-route
socket/broadcast
socket/use-loop-back
socket/oob-inline
socket/use-privileged
socket/cant-signal
tcp/no-delay\end{code}
Value options are another category of socket options. Options of this
type are an integer value. Examples of this option type are:
\begin{code}\codeallowbreaks
socket/send-buffer
socket/receive-buffer
socket/send-low-water
socket/receive-low-water
socket/error
socket/type
ip/time-to-live
tcp/max-segment\end{code}
A third option type specifies how long for data to linger after a socket
has been closed. There is only one option of this type:
\ex{socket/linger}. It is set with either \sharpf to disable it or an
integer number of seconds to linger and returns a value of the same type
upon inspection.
The fourth and final option type of this time is a timeout option. There
are two examples of this option type: \ex{socket/send-timeout} and
\ex{socket/receive-timeout}. These are set with a real number of
microseconds resolution and returns a value of the same type upon
inspection.
\end{desc}
\section{Database-information entries}
\defun {host-info} {name-or-socket-address} {host-info}
\defunx {network-info} {name-or-socket-address} {network-info}
\defunx {service-info} {name-or-number [protocol-name]} {service-info}
\defunx {protocol-info} {name-or-number} {protocol-info}
\begin{desc}
\ex{host-info} allows a program to look up a host entry based on either
its string \var{name} or \var{socket-address}. The value returned by this
routine is a \emph{host-info record}, defined to have the following
structure:
\begin{code}
(define-record host-info
name ; Host name
aliases ; Alternative names
addresses) ; Host addresses\end{code}
\ex{host-info} could fail and raise an error for one of the following
reasons:
\begin{code}\codeallowbreaks
herror/host-not-found
herror/try-again
herror/no-recovery
herror/no-data
herror/no-address\end{code}
\ex{network-info} allows a program to look up a network entry based on either
its string \var{name} or \var{socket-address}. The value returned by this
routine is a \emph{network-info record}, defined to have the following
structure:
\begin{code}
(define-record network-info
name ; Network name
aliases ; Alternative names
net) ; Network number\end{code}
\ex{service-info} allows a program to look up a service entry based
on either its string \var{name} or integer \var{port}. The value returned
by this routine is a \emph{service-info record}, defined to have the
following structure:
\begin{code}
(define-record service-info
name ; Service name
aliases ; Alternative names
port ; Port number
protocol) ; Protocol name\end{code}
\ex{protocol-info} allows a program to look up a protocol entry based
on either its string \var{name} or integer \var{number}. The value returned
by this routine is a \emph{protocol-info record}, defined to have the
following structure:
\begin{code}
(define-record protocol-info
name ; Protocol name
aliases ; Alternative names
number) ; Protocol number)\end{code}
\end{desc}

View File

@ -0,0 +1,543 @@
%&latex -*- latex -*-
\chapter{Process notation}
\label{sec:proc-forms}
Scsh has a notation for controlling {\Unix} processes that takes the
form of s-expressions; this notation can then be embedded inside of
standard {\Scheme} code.
The basic elements of this notation are \emph{process forms},
\emph{extended process forms}, and \emph{redirections}.
\section{Extended process forms and i/o redirections}
An \emph{extended process form} is a specification of a {\Unix} process to
run, in a particular I/O environment:
\codex{\var{epf} {\synteq} (\var{pf} $ \var{redir}_1$ {\ldots} $ \var{redir}_n $)}
where \var{pf} is a process form and the $\var{redir}_i$ are redirection specs.
A \emph{redirection spec} is one of:
\begin{inset}
\begin{tabular}{@{}l@{\qquad{\tt; }}l@{}}
\ex{(< \var{[fdes]} \var{file-name})} & \ex{Open file for read.}
\\\ex{(> \var{[fdes]} \var{file-name})} & \ex{Open file create/truncate.}
\\\ex{(<< \var{[fdes]} \var{object})} & \ex{Use \var{object}'s printed rep.}
\\\ex{(>> \var{[fdes]} \var{file-name})} & \ex{Open file for append.}
\\\ex{(= \var{fdes} \var{fdes/port})} & \ex{Dup2}
\\\ex{(- \var{fdes/port})} & \ex{Close \var{fdes/port}.}
\\\ex{stdports} & \ex{0,1,2 dup'd from standard ports.}
\end{tabular}
\end{inset}
The input redirections default to file descriptor 0;
the output redirections default to file descriptor 1.
The subforms of a redirection are implicitly backquoted,
and symbols stand for their print-names.
So \ex{(> ,x)} means
``output to the file named by {\Scheme} variable \ex{x},''
and \ex{(< /usr/shivers/.login)} means ``read from \ex{/usr/shivers/.login}.''
\pagebreak
Here are two more examples of i/o redirection:
%
\begin{center}
\begin{codebox}
(< ,(vector-ref fv i))
(>> 2 /tmp/buf)\end{codebox}
\end{center}
%
These two redirections cause the file \ex{fv[i]} to be opened on stdin, and
\ex{/tmp/buf} to be opened for append writes on stderr.
The redirection \ex{(<< \var{object})} causes input to come from the
printed representation of \var{object}.
For example,
\codex{(<< "The quick brown fox jumped over the lazy dog.")}
causes reads from stdin to produce the characters of the above string.
The object is converted to its printed representation using the \ex{display}
procedure, so
\codex{(<< (A five element list))}
is the same as
\codex{(<< "(A five element list)")}
is the same as
\codex{(<< ,(reverse '(list element five A))){\rm.}}
(Here we use the implicit backquoting feature to compute the list to
be printed.)
The redirection \ex{(= \var{fdes} \var{fdes/port})} causes \var{fdes/port}
to be dup'd into file descriptor \var{fdes}.
For example, the redirection
\codex{(= 2 1)}
causes stderr to be the same as stdout.
\var{fdes/port} can also be a port, for example:
\codex{(= 2 ,(current-output-port))}
causes stderr to be dup'd from the current output port.
In this case, it is an error if the port is not a file port
(\eg, a string port).
More complex redirections can be accomplished using the \ex{begin}
process form, discussed below, which gives the programmer full control
of i/o redirection from {\Scheme}.
\subsection{Port and file descriptor sync}
\begin{sloppypar}
It's important to remember that rebinding Scheme's current I/O ports
(\eg, using \ex{call-with-input-file} to rebind the value of
\ex{(current-input-port)})
does \emph{not} automatically ``rebind'' the file referenced by the
{\Unix} stdio file descriptors 0, 1, and 2.
This is impossible to do in general, since some {\Scheme} ports are
not representable as {\Unix} file descriptors.
For example, many {\Scheme} implementations provide ``string ports,''
that is, ports that collect characters sent to them into memory buffers.
The accumulated string can later be retrieved from the port as a string.
If a user were to bind \ex{(current-output-port)} to such a port, it would
be impossible to associate file descriptor 1 with this port, as it
cannot be represented in {\Unix}.
So, if the user subsequently forked off some other program as a subprocess,
that program would of course not see the {\Scheme} string port as its standard
output.
\end{sloppypar}
To keep stdio synced with the values of {\Scheme}'s current i/o ports,
use the special redirection \ex{stdports}.
This causes 0, 1, 2 to be redirected from the current {\Scheme} standard ports.
It is equivalent to the three redirections:
\begin{code}
(= 0 ,(current-input-port))
(= 1 ,(current-output-port))
(= 2 ,(error-output-port))\end{code}
%
The redirections are done in the indicated order. This will cause an error if
one of the current i/o ports isn't a {\Unix} port (\eg, if one is a string
port).
This {\Scheme}/{\Unix} i/o synchronisation can also be had in {\Scheme} code
(as opposed to a redirection spec) with the \ex{(stdports->stdio)}
procedure.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Process forms}
A \emph{process form} specifies a computation to perform as an independent
{\Unix} process. It can be one of the following:
%
\begin{leftinset}
\begin{codebox}
(begin . \var{scheme-code})
(| \vari{pf}{\!1} {\ldots} \vari{pf}{\!n})
(|+ \var{connect-list} \vari{pf}{\!1} {\ldots} \vari{pf}{\!n})
(epf . \var{epf})
(\var{prog} \vari{arg}{1} {\ldots} \vari{arg}{n})
\end{codebox}
\qquad
\begin{codebox}
; Run \var{scheme-code} in a fork.
; Simple pipeline
; Complex pipeline
; An extended process form.
; Default: exec the program.
\end{codebox}
\end{leftinset}
%
The default case \ex{(\var{prog} \vari{arg}1 {\ldots} \vari{arg}n)}
is also implicitly backquoted.
That is, it is equivalent to:
%
\codex{(begin (apply exec-path `(\var{prog} \vari{arg}1 {\ldots} \vari{arg}n)))}
%
\ex{Exec-path} is the version of the \ex{\urlh{http://www.FreeBSD.org/cgi/man.cgi?query=exec&apropos=0&sektion=0&manpath=FreeBSD+4.3-RELEASE&format=html}{exec()}} system call that
uses scsh's path list to search for an executable.
The program and the arguments must be either strings, symbols, or integers.
Symbols and integers are coerced to strings.
A symbol's print-name is used.
Integers are converted to strings in base 10.
Using symbols instead of strings is convenient, since it suppresses the
clutter of the surrounding \ex{"\ldots"} quotation marks.
To aid this purpose, scsh reads symbols in a case-sensitive manner,
so that you can say
\codex{(more Readme)}
and get the right file.
A \var{connect-list} is a specification of how two processes are to be wired
together by pipes.
It has the form \ex{((\vari{from}1 \vari{from}2 {\ldots} \var{to}) \ldots)}
and is implicitly backquoted.
For example,
%
\codex{(|+ ((1 2 0) (3 1)) \vari{pf}{\!1} \vari{pf}{\!2})}
%
runs \vari{pf}{\!1} and \vari{pf}{\!2}.
The first clause \ex{(1 2 0)} causes \vari{pf}{\!1}'s
stdout (1) and stderr (2) to be connected via pipe
to \vari{pf}{\!2}'s stdin (0).
The second clause \ex{(3 1)} causes \vari{pf}{\!1}'s file descriptor 3 to be
connected to \vari{pf}{\!2}'s file descriptor 1.
%---this is unusual, and not expected to occur very often.
The \ex{begin} process form does a \ex{stdio->stdports} synchronisation
in the child process before executing the body of the form.
This guarantees that the \ex{begin} form, like all other process forms,
``sees'' the effects of any associated I/O redirections.
Note that {\R4RS} does not specify whether or not \ex{|} and \ex{|+}
are readable symbols. Scsh does.
\section{Using extended process forms in \Scheme}
Process forms and extended process forms are \emph{not} {\Scheme}.
They are a different notation for expressing computation that, like {\Scheme},
is based upon s-expressions.
Extended process forms are used in {\Scheme} programs by embedding them inside
special {\Scheme} forms.
There are three basic {\Scheme} forms that use extended process forms:
\ex{exec-epf}, \cd{&}, and \ex{run}.
\dfn {exec-epf} {. \var{epf}} {\noreturn} {syntax}
\dfnx {\&} {. \var{epf}} {proc} {syntax}
\dfnx {run} {. \var{epf}} {proc} {syntax}
\begin{desc}
\index{exec-epf} \index{\&} \index{run}
The \ex{(exec-epf . \var{epf})} form nukes the current process: it establishes
the i/o redirections and then overlays the current process with the requested
computation.
The \ex{(\& . \var{epf})} form is similar, except that the process is forked
off in background. The form returns the subprocess' process object.
The \ex{(run . \var{epf})} form runs the process in foreground:
after forking off the computation, it waits for the subprocess to exit,
and returns its exit status.
These special forms are macros that expand into the equivalent
series of system calls.
The definition of the \ex{exec-epf} macro is non-trivial,
as it produces the code to handle i/o redirections and set up pipelines.
However, the definitions of the \cd{&} and \ex{run} macros are very simple:
\begin{leftinset}
\begin{tabular}{@{}l@{\quad$\equiv$\quad}l@{}}
\cd{(& . \var{epf})} & \ex{(fork (\l{} (exec-epf . \var{epf})))} \\
\ex{(run . \var{epf})} & \cd{(wait (& . \var{epf}))}
\end{tabular}
\end{leftinset}
\end{desc}
\subsection{Procedures and special forms}
It is a general design principle in scsh that all functionality
made available through special syntax is also available in a
straightforward procedural form.
So there are procedural equivalents for all of the process notation.
In this way, the programmer is not restricted by the particular details of
the syntax.
Here are some of the syntax/procedure equivalents:
\begin{inset}
\begin{tabular}{@{}|ll|@{}}
\hline
Notation & Procedure \\ \hline \hline
\ex{|} & \ex{fork/pipe} \\
\ex{|+} & \ex{fork/pipe+} \\
\ex{exec-epf} & \ex{exec-path} \\
redirection & \ex{open}, \ex{dup} \\
\cd{&} & \ex{fork} \\
\ex{run} & $\ex{wait} + \ex{fork}$ \\
\hline
\end{tabular}
\end{inset}
%
Having a solid procedural foundation also allows for general notational
experimentation using {\Scheme}'s macros.
For example, the programmer can build his own pipeline notation on top of the
\ex{fork} and \ex{fork/pipe} procedures.
Chapter~\ref{chapt:syscalls} gives the full story on all the procedures
in the syscall library.
\subsection{Interfacing process output to {\Scheme}}
\label{sec:io-interface}
There is a family of procedures and special forms that can be used
to capture the output of processes as {\Scheme} data.
%
\dfn {run/port} {. \var{epf}} {port} {syntax}
\dfnx{run/file} {. \var{epf}} {\str} {syntax}
\dfnx{run/string} {. \var{epf}} {\str} {syntax}
\dfnx{run/strings} {. \var{epf}} {{\str} list} {syntax}
\dfnx{run/sexp} {. \var{epf}} {object} {syntax}
\dfnx{run/sexps} {. \var{epf}} {list} {syntax}
\begin{desc}
These forms all fork off subprocesses, collecting the process' output
to stdout in some form or another.
The subprocess runs with file descriptor 1 and the current output port
bound to a pipe.
\begin{desctable}{0.7\linewidth}
\ex{run/port} & Value is a port open on process's stdout.
Returns immediately after forking child. \\
\ex{run/file} & Value is name of a temp file containing process's output.
Returns when process exits. \\
\ex{run/string} & Value is a string containing process' output.
Returns when eof read. \\
\ex{run/strings}& Splits process' output into a list of
newline-delimited strings. Returns when eof read. \\
\ex{run/sexp} & Reads a single object from process' stdout with \ex{read}.
Returns as soon as the read completes. \\
\ex{run/sexps} & Repeatedly reads objects from process' stdout with \ex{read}.
Returns accumulated list upon eof.
\end{desctable}
The delimiting newlines are not included in the strings returned by
\ex{run/strings}.
These special forms just expand into calls to the following analogous
procedures.
\end{desc}
\defun {run/port*} {thunk} {port}
\defunx {run/file*} {thunk} {\str}
\defunx {run/string*} {thunk} {\str}
\defunx {run/strings*} {thunk} {{\str} list}
\defunx {run/sexp*} {thunk} {object}
\defunx {run/sexps*} {thunk} {object list}
\begin{desc}
For example, \ex{(run/port . \var{epf})} expands into
\codex{(run/port* (\l{} (exec-epf . \var{epf}))).}
\end{desc}
The following procedures are also of utility for generally parsing
input streams in scsh:
\defun {port->string} {port} {\str}
\defunx {port->sexp-list} {port} {list}
\defunx {port->string-list} {port} {{\str} list}
\defunx {port->list} {reader port} {list}
\begin{desc}
\ex{Port->string} reads the port until eof,
then returns the accumulated string.
\ex{Port->sexp-list} repeatedly reads data from the port until eof,
then returns the accumulated list of items.
\ex{Port->string-list} repeatedly reads newline-terminated strings from the
port until eof, then returns the accumulated list of strings.
The delimiting newlines are not part of the returned strings.
\ex{Port->list} generalises these two procedures.
It uses \var{reader} to repeatedly read objects from a port.
It accumulates these objects into a list, which is returned upon eof.
The \ex{port->string-list} and \ex{port->sexp-list} procedures
are trivial to define, being merely \ex{port->list} curried with
the appropriate parsers:
\begin{code}\cddollar
(port->string-list \var{port}) $\equiv$ (port->list read-line \var{port})
(port->sexp-list \var{port}) $\equiv$ (port->list read \var{port})\end{code}
%
The following compositions also hold:
\begin{code}\cddollar
run/string* $\equiv$ port->string $\circ$ run/port*
run/strings* $\equiv$ port->string-list $\circ$ run/port*
run/sexp* $\equiv$ read $\circ$ run/port*
run/sexps* $\equiv$ port->sexp-list $\circ$ run/port*\end{code}
\end{desc}
\defun{port-fold}{port reader op . seeds} {\object\star}
\begin{desc}
This procedure can be used to perform a variety of iterative operations
over an input stream.
It repeatedly uses \var{reader} to read an object from \var{port}.
If the first read returns eof, then the entire \ex{port-fold}
operation returns the seeds as multiple values.
If the first read operation returns some other value $v$, then
\var{op} is applied to $v$ and the seeds:
\ex{(\var{op} \var{v} . \var{seeds})}.
This should return a new set of seed values, and the reduction then loops,
reading a new value from the port, and so forth.
(If multiple seed values are used, then \var{op} must return multiple values.)
For example, \ex{(port->list \var{reader} \var{port})}
could be defined as
\codex{(reverse (port-fold \var{port} \var{reader} cons '()))}
An imperative way to look at \ex{port-fold} is to say that it
abstracts the idea of a loop over a stream of values read from
some port, where the seed values express the loop state.
\remark{This procedure was formerly named \texttt{\indx{reduce-port}}.
The old binding is still provided, but is deprecated and will
probably vanish in a future release.}
\end{desc}
\section{More complex process operations}
The procedures and special forms in the previous section provide for the
common case, where the programmer is only interested in the output of the
process.
These special forms and procedures provide more complicated facilities
for manipulating processes.
\subsection{Pids and ports together}
\dfn {run/port+proc} {. \var{epf}} {[port proc]} {syntax}
\defunx {run/port+proc*} {thunk} {[port proc]}
\begin{desc}
This special form and its analogous procedure can be used
if the programmer also wishes access to the process' pid, exit status,
or other information.
They both fork off a subprocess, returning two values:
a port open on the process' stdout (and current output port),
and the subprocess's process object.
A process object encapsulates the subprocess' process id and exit code;
it is the value passed to the \ex{wait} system call.
For example, to uncompress a tech report, reading the uncompressed
data into scsh, and also be able to track the exit status of
the decompression process, use the following:
\begin{code}
(receive (port child) (run/port+proc (zcat tr91-145.tex.Z))
(let* ((paper (port->string port))
(status (wait child)))
{\rm\ldots{}use \ex{paper}, \ex{status}, and \ex{child} here\ldots}))\end{code}
%
Note that you must \emph{first} do the \ex{port->string} and
\emph{then} do the wait---the other way around may lock up when the
zcat fills up its output pipe buffer.
\end{desc}
\subsection{Multiple stream capture}
Occasionally, the programmer may want to capture multiple distinct output
streams from a process. For instance, he may wish to read the stdout and
stderr streams into two distinct strings. This is accomplished with the
\ex{run/collecting} form and its analogous procedure, \ex{run/collecting*}.
%
\dfn {run/collecting} {fds . epf} {[port\ldots]} {syntax}
\defunx {run/collecting*} {fds thunk} {[port\ldots]}
\begin{desc}
\ex{Run/collecting} and \ex{run/collecting*} run processes that produce
multiple output streams and return ports open on these streams. To avoid
issues of deadlock, \ex{run/collecting} doesn't use pipes. Instead, it first
runs the process with output to temp files, then returns ports open on the
temp files. For example,
%
\codex{(run/collecting (1 2) (ls))}
%
runs \ex{ls} with stdout (fd 1) and stderr (fd 2) redirected to temporary
files.
When the \ex{ls} is done, \ex{run/collecting} returns three values: the
\ex{ls} process' exit status, and two ports open on the temporary files. The
files are deleted before \ex{run/collecting} returns, so when the ports are
closed, they vanish. The \ex{fds} list of file descriptors is implicitly
backquoted by the special-form version.
For example, if Kaiming has his mailbox protected, then
\begin{code}
(receive (status out err)
(run/collecting (1 2) (cat /usr/kmshea/mbox))
(list status (port->string out) (port->string err)))\end{code}
%
might produce the list
\codex{(256 "" "cat: /usr/kmshea/mbox: Permission denied")}
What is the deadlock hazard that causes \ex{run/collecting} to use temp files?
Processes with multiple output streams can lock up if they use pipes
to communicate with {\Scheme} i/o readers. For example, suppose
some {\Unix} program \ex{myprog} does the following:
\begin{enumerate}
\item First, outputs a single ``\ex{(}'' to stderr.
\item Then, outputs a megabyte of data to stdout.
\item Finally, outputs a single ``\ex{)}'' to stderr, and exits.
\end{enumerate}
Our scsh programmer decides to run \ex{myprog} with stdout and stderr redirected
\emph{via {\Unix} pipes} to the ports \ex{port1} and \ex{port2}, respectively.
He gets into trouble when he subsequently says \ex{(read port2)}.
The {\Scheme} \ex{read} routine reads the open paren, and then hangs in a
\ex{\urlh{http://www.FreeBSD.org/cgi/man.cgi?query=read&apropos=0&sektion=0&manpath=FreeBSD+4.3-RELEASE&format=html}{read()}} system call trying to read a matching close paren.
But before \ex{myprog} sends the close paren down the stderr
pipe, it first tries to write a megabyte of data to the stdout pipe.
However, {\Scheme} is not reading that pipe---it's stuck waiting for input on
stderr.
So the stdout pipe quickly fills up, and \ex{myprog} hangs, waiting for the
pipe to drain.
The \ex{myprog} child is stuck in a stdout/\ex{port1} write;
the {\Scheme} parent is stuck in a stderr/\ex{port2} read.
Deadlock.
Here's a concrete example that does exactly the above:
\begin{code}
(receive (status port1 port2)
(run/collecting (1 2)
(begin
;; Write an open paren to stderr.
(run (echo "(") (= 1 2))
;; Copy a lot of stuff to stdout.
(run (cat /usr/dict/words))
;; Write a close paren to stderr.
(run (echo ")") (= 1 2))))
;; OK. Here, I have a port PORT1 built over a pipe
;; connected to the BEGIN subproc's stdout, and
;; PORT2 built over a pipe connected to the BEGIN
;; subproc's stderr.
(read port2) ; Should return the empty list.
(port->string port1)) ; Should return a big string.\end{code}
%
In order to avoid this problem, \ex{run/collecting} and \ex{run/collecting*}
first run the child process to completion, buffering all the output
streams in temp files (using the \ex{temp-file-channel} procedure, see below).
When the child process exits, ports open on the buffered output are returned.
This approach has two disadvantages over using pipes:
\begin{itemize}
\item The total output from the child output is temporarily written
to the disk before returning from \ex{run/collecting}. If this output
is some large intermediate result, the disk could fill up.
\item The child producer and {\Scheme} consumer are serialised; there is
no concurrency overlap in their execution.
\end{itemize}
%
However, it remains a simple solution that avoids deadlock. More
sophisticated solutions can easily be programmed up as
needed---\ex{run/collecting*} itself is only 12 lines of simple code.
See \ex{temp-file-channel} for more information on creating temp files
as communication channels.
\end{desc}
\section{Conditional process sequencing forms}
These forms allow conditional execution of a sequence of processes.
\dfn{||} {\vari{pf}1 \ldots \var{pf}n} {\boolean} {syntax}
\begin{desc}
Run each proc until one completes successfully (\ie, exit status zero).
Return true if some proc completes successfully; otherwise \sharpf.
\end{desc}
\dfn{\&\&} {\vari{pf}1 \ldots \var{pf}n} {\boolean} {syntax}
\begin{desc}
Run each proc until one fails (\ie, exit status non-zero).
Return true if all procs complete successfully; otherwise \sharpf.
\end{desc}
\section{Process filters}
These procedures are useful for forking off processes to filter
text streams.
\begin{defundesc}{char-filter}{filter}{\proc}
The \var{filter} argument is a character$\rightarrow$character procedure.
Returns a procedure that when called, repeatedly reads a character
from the current input port, applies \var{filter} to the character,
and writes the result to the current output port.
The procedure returns upon reaching eof on the input port.
For example, to downcase a stream of text in a spell-checking pipeline,
instead of using the {\Unix} \ex{tr A-Z a-z} command, we can say:
\begin{code}
(run (| (delatex)
(begin ((char-filter char-downcase))) ; tr A-Z a-z
(spell)
(sort)
(uniq))
(< scsh.tex)
(> spell-errors.txt))\end{code}
\end{defundesc}
\begin{defundesc}{string-filter}{filter [buflen]}{\proc}
The \var{filter} argument is a string$\rightarrow$string procedure.
Returns a procedure that when called, repeatedly reads a string
from the current input port, applies \var{filter} to the string,
and writes the result to the current output port.
The procedure returns upon reaching eof on the input port.
The optional \var{buflen} argument controls the number of characters
each internal read operation requests; this means that \var{filter}
will never be applied to a string longer than \var{buflen} chars.
The default \var{buflen} value is 1024.
\end{defundesc}

148
doc/scsh-manual/rdelim.tex Normal file
View File

@ -0,0 +1,148 @@
%&latex -*- latex -*-
\chapter{Reading delimited strings}
\label{chapt:rdelim}
Scsh provides a set of procedures that read delimited strings from
input ports.
There are procedures to read a single line of text
(terminated by a newline character),
a single paragraph (terminated by a blank line),
and general delimited strings
(terminated by a character belonging to an arbitrary character set).
These procedures can be applied to any Scheme input port.
However, the scsh virtual machine has native-code support for performing
delimited reads on Unix ports, and these input operations should be
particularly fast---much faster than doing the equivalent character-at-a-time
operation from Scheme code.
All of the delimited input operations described below take a \ex{handle-delim}
parameter, which determines what the procedure does with the terminating
delimiter character.
There are four possible choices for a \ex{handle-delim} parameter:
\begin{inset}
\begin{tabular}{|l|l|} \hline
\ex{handle-delim} & Meaning \\ \hline\hline
\ex{'trim} & Ignore delimiter character. \\
\ex{'peek} & Leave delimiter character in input stream. \\
\ex{'concat} & Append delimiter character to returned value. \\
\ex{'split} & Return delimiter as second value. \\
\hline
\end{tabular}
\end{inset}
The first case, \ex{'trim}, is the standard default for all the routines
described in this section.
The last three cases allow the programmer to distinguish between strings
that are terminated by a delimiter character, and strings that are
terminated by an end-of-file.
\begin{defundesc} {read-line} {[port handle-newline]} {{\str} or eof-object}
Reads and returns one line of text; on eof, returns the eof object.
A line is terminated by newline or eof.
\var{handle-newline} determines what \ex{read-line} does with the
newline or EOF that terminates the line; it takes the general set
of values described for the general \ex{handle-delim} case above,
and defaults to \ex{'trim} (discard the newline).
Using this argument allows one to tell whether or not the last line of
input in a file is newline terminated.
\end{defundesc}
\defun{read-paragraph} {[port handle-delim]} {{\str} or eof}
\begin{desc}
This procedure skips blank lines,
then reads text from a port until a blank line or eof is found.
A ``blank line'' is a (possibly empty) line composed only of white space.
The \var{handle-delim} parameter determines how the terminating
blank line is handled.
It is described above, and defaults to \ex{'trim}.
The \ex{'peek} option is not available.
\end{desc}
The following procedures read in strings from ports delimited by characters
belonging to a specific set.
See section~\ref{sec:char-sets} for information on character set manipulation.
\defun{read-delimited}{char-set [port handle-delim]} {{\str} or eof}
\begin{desc}
Read until we encounter one of the chars in \var{char-set} or eof.
The \var{handle-delim} parameter determines how the terminating character
is handled. It is described above, and defaults to \ex{'trim}.
The \var{char-set} argument may be a charset, a string, a character, or a
character predicate; it is coerced to a charset.
\end{desc}
\dfni{read-delimited!} {char-set buf [port handle-delim start end]}
{nchars or eof or \#f}{procedure}
{read-delimited"!@\texttt{read-delimited"!}}
\begin{desc}
A side-effecting variant of \ex{read-delimited}.
The data is written into the string \var{buf} at the indices in the
half-open interval $[\var{start},\var{end})$; the default interval is the
whole string: $\var{start}=0$ and $\var{end}=\ex{(string-length
\var{buf})}$. The values of \var{start} and \var{end} must specify a
well-defined interval in \var{str}, \ie, $0 \le \var{start} \le \var{end}
\le \ex{(string-length \var{buf})}$.
It returns \var{nbytes}, the number of bytes read. If the buffer filled up
without a delimiter character being found, \ex{\#f} is returned. If
the port is at eof when the read starts, the eof object is returned.
If an integer is returned (\ie, the read is successfully terminated by
reading a delimiter character), then the \var{handle-delim} parameter
determines how the terminating character is handled.
It is described above, and defaults to \ex{'trim}.
\end{desc}
\dfni{\%read-delimited!} {char-set buf gobble? [port start end]}
{[char-or-eof-or-\#f \integer]}{procedure}
{"%read-delimited"!@\verb:"%read-delimited"!:}
\begin{desc}
This low-level delimited reader uses an alternate interface.
It returns two values: \var{terminator} and \var{num-read}.
\begin{description}
\item [terminator]
A value describing why the read was terminated:
\begin{flushleft}
\begin{tabular}{l@{\qquad$\Rightarrow$\qquad}l}
Character or eof-object & Read terminated by this value. \\
\ex{\#f} & Filled buffer without finding a delimiter.
\end{tabular}
\end{flushleft}
\item [num-read]
Number of characters read into \var{buf}.
\end{description}
If the read is successfully terminated by reading a delimiter character,
then the \var{gobble?} parameter determines what to do with the terminating
character.
If true, the character is removed from the input stream;
if false, the character is left in the input stream where a subsequent
read operation will retrieve it.
In either case, the character is also the first value returned by
the procedure call.
\end{desc}
%Note:
%- Invariant: TERMINATOR = #f => NUM-READ = END - START.
%- Invariant: TERMINATOR = eof-object and NUM-READ = 0 => at EOF.
%- When determining the TERMINATOR return value, ties are broken
% favoring character or the eof-object over #f. That is, if the buffer
% fills up, %READ-DELIMITED! will peek at one more character from the
% input stream to determine if it terminates the input. If so, that
% is returned, not #f.
\begin{defundesc} {skip-char-set} {skip-chars [port]} {\integer}
Skip characters occurring in the set \var{skip-chars};
return the number of characters skipped.
The \var{skip-chars} argument may be a charset, a string, a character, or a
character predicate; it is coerced to a charset.
\end{defundesc}

934
doc/scsh-manual/running.tex Normal file
View File

@ -0,0 +1,934 @@
%&latex -*- latex -*-
\chapter{Running scsh}
\label{chapt:running}
Scsh is currently implemented on top of {\scm}, a freely-available
{\Scheme} implementation written by Jonathan Rees and Richard Kelsey.
{\scm} uses a byte-code interpreter for good code density, portability
and medium efficiency. It is {\R4RS}.
It also has a module system designed by Jonathan Rees.
Scsh's design is not {\scm} specific, although the current implementation
is necessarily so.
Scsh is intended to be implementable in other {\Scheme} implementations.
The {\scm} virtual machine that scsh uses is a specially modified version;
standard {\scm} virtual machines cannot be used with the scsh heap image.
There are several different ways to invoke scsh.
You can run it as an interactive Scheme system, with a standard
read-eval-print interaction loop.
Scsh can also be invoked as the interpreter for a shell script by putting
a ``\verb|#!/usr/local/bin/scsh -s|'' line at the top of the shell script.
Descending a level, it is also possible to invoke the underlying virtual
machine byte-code interpreter directly on dumped heap images.
Scsh programs can be pre-compiled to byte-codes and dumped as raw,
binary heap images.
Writing heap images strips out unused portions of the scsh runtime
(such as the compiler, the debugger, and other complex subsystems),
reducing memory demands and saving loading and compilation times.
The heap image format allows for an initial \verb|#!/usr/local/lib/scsh/scshvm| trigger
on the first line of the image, making heap images directly executable as
another kind of shell script.
Finally, scsh's static linker system allows dumped heap images to be compiled
to a raw Unix a.out(5) format, which can be linked into the text section
of the vm binary.
This produces a true Unix executable binary file.
Since the byte codes comprising the program are in the file's text section,
they are not traced or copied by the garbage collector, do not occupy space
in the vm's heap, and do not need to be loaded and linked at startup time.
This reduces the program's startup time, memory requirements,
and paging overhead.
This chapter will cover these various ways of invoking scsh programs.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Scsh command-line switches}
When the scsh top-level starts up, it scans the command line
for switches that control its behaviour.
These arguments are removed from the command line;
the remaining arguments can be accessed as the value of
the scsh variable \ex{command-line-arguments}.
\subsection{Scripts and programs}
The scsh command-line switches provide sophisticated support for
the authors of shell scripts and programs;
they also allow the programmer to write programs
that use the {\scm} module system.
There is a difference between a \emph{script}, which performs its action
\emph{as it is loaded}, and a \emph{program}, which is loaded/linked,
and then performs its action by having control transferred to an entry point
(\eg, the \ex{main()} function in C programs) that was defined by the
load/link operation.
A \emph{script}, by the above definition, cannot be compiled by the simple
mechanism of loading it into a scsh process and dumping out a heap image---it
executes as it loads. It does not have a top-level \ex{main()}-type entry
point.
It is more flexible and useful to implement a system
as a program than as a script.
Programs can be compiled straightforwardly;
they can also export procedural interfaces for use by other Scheme packages.
However, scsh supports both the script and the program style of programming.
\subsection{Inserting interpreter triggers into scsh programs}
When Unix tries to execute an executable file whose first 16 bits are
the character pair ``\ex{\#!}'', it treats the file not as machine-code
to be directly executed by the native processor, but as source code to
be executed by some interpreter.
The interpreter to use is specified immediately after the ``\ex{\#!}''
sequence on the first line of the source file
(along with one optional initial argument).
The kernel reads in the name of the interpreter, and executes that instead.
The interpreter is passed the source filename as its first argument, with
the original arguments following.
Consult the Unix man page for the \ex{exec} system call for more information.
Scsh allows Scheme programs to have these triggers placed on
their first line.
Scsh treats the character sequence ``\ex{\#!}'' as a block-comment sequence,%
\footnote{Why a block-comment instead of an end-of-line delimited comment?
See the section on meta-args.}
and skips all following characters until it reads the comment-terminating
sequence newline/exclamation-point/sharp-sign/newline (\ie, the
sequence ``\ex{!\#}'' occurring on its own line).
In this way, the programmer can arrange for an initial
\begin{code}
#!/usr/local/bin/scsh -s
!#\end{code}
header appearing in a Scheme program
to be ignored when the program is loaded into scsh.
\subsection{Module system}
Scsh uses the {\scm} module system, which defines
\emph{packages}, \emph{structures}, and \emph{interfaces}.
%
\begin{description}
\item [Package] A package is an environment---that is, a set of
variable/value bindings.
You can evaluate Scheme forms inside a package, or load a file into a package.
Packages export sets of bindings; these sets are called \emph{structures}.
\item [Structure] A structure is a named view on a package---a set of
bindings. Other packages can \emph{open} the structure, importing its
bindings into their environment. Packages can provide more than one
structure, revealing different portions of the package's environment.
\item [Interface] An interface is the ``type'' of a structure. An
interface is the set of names exported by a structure. These names
can also be marked with other static information (\eg, advisory type
declarations, or syntax information).
\end{description}
More information on the the {\scm} module system can be found in the
file \ex{module.ps} in the \ex{doc} directory of the {\scm} and scsh releases.
Programming Scheme with a module system is different from programming
in older Scheme implementations,
and the associated development problems are consequently different.
In Schemes that lack modular abstraction mechanisms,
everything is accessible; the major problem is preventing name-space conflicts.
In Scheme 48, name-space conflicts vanish; the major problem is that not
all bindings are accessible from every place.
It takes a little extra work to specify what packages export which values.
It may take you a little while to get used to the new style of program
development.
Although scsh can be used without referring to the module system at
all, we recommend taking the time to learn and use it.
The effort will pay off in the construction of modular, factorable programs.
\subsubsection{Module warning}
Programmers who open both the \ex{scheme} and \ex{scsh} structures in their
own packages should make sure to always put the \ex{scsh} reference first.
\begin{center}
\begin{tabular}{l@{\qquad}l}
Do this: & Not this: \strut \\
\quad{\begin{codebox}[b]
(define-structure web-server
(open scsh
scheme
net-hax
\vdots)
(file web))\end{codebox}}
&
\quad{\begin{codebox}[b]
(define-structure web-server
(open scheme
scsh
net-hax
\vdots)
(file web))\end{codebox}}\\
%
Open \ex{scsh} before \ex{scheme}. &
Not \ex{scsh} after \ex{scheme}.
\end{tabular}
\end{center}
Ordering the two packages like this is necessary because scsh overrides
some of the standard R4RS Scheme definitions exported by the \ex{scheme}
package with its own definitions.
For example, scsh's versions of the R4RS I/O functions such as \ex{display}
and \ex{write} take integer file descriptors as arguments, as well as Scheme
ports.
If you open the \ex{scheme} structure before the \ex{scsh} structure,
you'll get the standard {\scm} definitions, which is not what you want.
\subsection{Switches}
\label{sec:scsh-switches}
The scsh top-level takes command-line switches in the following format:
%
\codex{scsh [\var{meta-arg}] [\vari{switch}i {\ldots}]
[\var{end-option} \vari{arg}1 {\ldots} \vari{arg}n]}
where
\begin{inset}
\begin{flushleft}
\begin{tabular}{ll@{\qquad}l}
\var{meta-arg:} & \verb|\| \var{script-file-name} \\
\\
\var{switch:} & \ex{-e} \var{entry-point}
& Specify top-level entry-point. \\
& \ex{-o} \var{structure}
& Open structure in current package. \\
& \ex{-m} \var{structure}
& Switch to package. \\
& \ex{-n} \var{new-package}
& Switch to new package. \\ \\
& \ex{-lm} \var{module-file-name}
& Load module into config package. \\
& \ex{-l} \var{file-name}
& Load file into current package. \\
& \ex{-dm} & Do script module. \\
& \ex{-ds} & Do script. \\
\\
\var{end-option:} & \ex{-s} \var{script} \\
& \ex{-sfd} \var{num} \\
& \ex{-c} \var{exp} \\
& \ex{--}
\end{tabular}
\end{flushleft}
\end{inset}
%
These command-line switches
essentially provide a little linker language for linking a shell script or a
program together with {\scm} modules.
The command-line processor serially opens structures and loads code into a
given package.
Switches that side-effect a package operate on a particular ``current''
package; there are switches to change this package.
(These switches provide functionality equivalent to the interactive
\ex{,open} \ex{,load} \ex{,in} and \ex{,new} commands.)
Except where indicated, switches specify actions that are executed in a
left-to-right order.
The initial current package is the user package, which is completely
empty and opens (imports the bindings of) the R4RS and scsh structures.
If the Scheme process is started up in an interactive mode, then the current
package in force at the end of switch scanning is the one inside which
the interactive read-eval-print loop is started.
The command-line switch processor works in two passes:
it first parses the switches, building a list of actions to perform,
then the actions are performed serially.
The switch list is terminated by one of the \var{end-option} switches.
The \vari{arg}{i} arguments occurring after an end-option switch are
passed to the scsh program as the value of \ex{command-line-arguments}
and the tail of the list returned by \ex{(command-line)}.
That is, an \var{end-option} switch separates switches that control
the scsh ``machine'' from the actual arguments being passed to the scsh
program that runs on that machine.
The following switches and end options are defined:
\begin{itemize}
\def\Item#1{\item{\ex{#1}}\\}
\Item{-o \var{struct}}
Open the structure in the current package.
\Item{-n \var{package}}
Make and enter a new package. The package has an associated structure
named \var{package} with an empty export list.
If \var{package} is the string ``\ex{\#f}'',
the new package is anonmyous, with no associated named structure.
The new package initially opens no other structures,
not even the R4RS bindings. You must follow a ``\ex{-n foo}''
switch with ``\ex{-o scheme}'' to access the standard identifiers such
as \ex{car} and \ex{define}.
\Item{-m \var{struct}}
Change the current package to the package underlying
structure \var{struct}.
(The \ex{-m} stands for ``module.'')
\Item{-lm \var{module-file-name}}
Load the specified file into scsh's config package --- the file
must contain source written in the Scheme 48 module language
(``load module''). Does not alter the current package.
\Item{-l \var{file-name}}
Load the specified file into the current package.
\Item{-c \var{exp}}
Evaluate expression \var{exp} in the current package and exit.
This is called \ex{-c} after a common shell convention (see sh and csh).
The expression is evaluated in the the current package (and hence is
affected by \ex{-m}'s and \ex{-n}'s.)
When the scsh top-level constructs the scsh command-line in this case,
it takes \ex{"scsh"} to be the program name.
This switch terminates argument scanning; following args become
the tail of the command-line list.
\Item{-e \var{entry-point}}
Specify an entry point for a program. The \var{entry-point} is
a variable that is taken from the current package in force at the end
of switch evaluation. The entry point does not have to be exported
by the package in a structure; it can be internal to the package.
The top level passes control to the entry point by applying it to
the command-line list (so programs executing in private
packages can reference their command-line arguments without opening
the \ex{scsh} package to access the \ex{(command-line)} procedure).
Note that, like the list returned by the \ex{(command-line)} procedure,
the list passed to the entry point includes the name
of the program being executed (as the first element of the list),
not just the arguments to the program.
A \ex{-e} switch can occur anywhere in the switch list, but it is the
\emph{last} action performed by switch scanning if it occurs.
(We violate ordering here as the shell-script \ex{\#!} mechanism
prevents you from putting the \emph{-e} switch last, where it belongs.)
\Item{-s \var{script}}
Specify a file to load.
A \ex{-ds} (do-script) or \ex{-dm} (do-module) switch occurring earlier in
the switch list gives the place where the script should be loaded. If
there is no \ex{-ds} or \ex{-dm} switch, then the script is loaded at the
end of switch scanning, into the module that is current at the end of
switch scanning.
We use the \ex{-ds} switch to violate left-to-right switch execution order
as the \ex{-s} switch is \emph{required} to be last
(because of the \ex{\#!} machinery),
independent of when/where in the switch-processing order
it should be loaded.
When the scsh top-level constructs the scsh command-line in this case,
it takes \var{script} to be the program name.
This switch terminates switch parsing; following args are ignored
by the switch-scanner and are passed through to the program as
the tail of the command-line list.
\Item{-sfd \var{num}}
Loads the script from file descriptor \var{num}.
This switch is like the \ex{-s} switch,
except that the script is loaded from one of the process' open input
file descriptors.
For example, to have the script loaded from standard input, specify
\ex{-sfd 0}.
\Item{--}
Terminate argument scanning and start up scsh in interactive mode.
If the argument list just runs out, without either a terminating
\ex{-s} or \ex{--} arg, then scsh also starts up in interactive mode,
with an empty \ex{command-line-arguments} list
(for example, simply entering \ex{scsh} at a shell prompt with no
args at all).
When the scsh top-level constructs the scsh command-line in this case,
it takes \ex{"scsh"} to be the program name.
This switch terminates switch parsing; following args are ignored
by the switch-scanner and are passed through to the program as
the tail of the command-line list.
\Item{-ds}
Specify when to load the script (``do-script''). If this switch occurs,
the switch list \emph{must} be terminated by a \ex{-s \var{script}}
switch. The script is loaded into the package that is current at the
\ex{-ds} switch.
\Item{-dm}
As above, but the current module is ignored. The script is loaded into the
\ex{config} package (``do-module''), and hence must be written in the
{\scm} module language.
This switch doesn't affect the current module---after executing this
switch, the current module is the same as as it was before.
This switch is provided to make it easy to write shell scripts in the
{\scm} module language.
\end{itemize}
\subsection{The meta argument}
\label{sec:meta-arg}
The scsh switch parser takes a special command-line switch,
a single backslash called the ``meta-argument,'' which is useful for
shell scripts.
If the initial command-line argument is a ``\verb|\|''
argument, followed by a filename argument \var{fname}, scsh will open the file
\var{fname} and read more arguments from the second line of this file.
This list of arguments will then replace the ``\verb|\|'' argument---\ie,
the new arguments are inserted in front of \var{fname},
and the argument parser resumes argument scanning.
This is used to overcome a limitation of the \ex{\#!} feature:
the \ex{\#!} line can only specify a single argument after the interpreter.
For example, we might hope the following scsh script, \ex{ekko},
would implement a simple-minded version of the Unix \ex{echo} program:
\begin{code}
#!/usr/local/bin/scsh -e main -s
!#
(define (main args)
(map (\l{arg} (display arg) (display " "))
(cdr args))
(newline))\end{code}
%
The idea would be that the command
\codex{ekko Hi there.}
would by expanded by the \ex{\urlh{http://www.FreeBSD.org/cgi/man.cgi?query=exec&apropos=0&sektion=0&manpath=FreeBSD+4.3-RELEASE&format=html}{exec(2)}} kernel call into
%
\begin{code}
/usr/local/bin/scsh -e main -s ekko Hi there.\end{code}
%
In theory, this would cause scsh to start up, load in file \ex{ekko},
call the entry point on the command-line list
\codex{(main '("ekko" "Hi" "there."))}
and exit.
Unfortunately, the {\Unix} \ex{\urlh{http://www.FreeBSD.org/cgi/man.cgi?query=exec&apropos=0&sektion=0&manpath=FreeBSD+4.3-RELEASE&format=html}{exec(2)}} syscall's support for scripts is
not very general or well-designed.
It will not handle multiple arguments;
the \ex{\#!} line is usually required to contain no more than 32 characters;
it is not recursive.
If these restrictions are violated, most Unix systems will not provide accurate
error reporting, but either fail silently, or simply incorrectly implement
the desired functionality.
These are the facts of Unix life.
In the \ex{ekko} example above, our \ex{\#!} trigger line has three
arguments (``\ex{-e}'', ``\ex{main}'', and ``\ex{-s}''), so it will not
work.
The meta-argument is how we work around this problem.
We must instead invoke the scsh interpreter with the single \cd{\\} argument,
and put the rest of the arguments on line two of the program.
Here's the correct program:
%
\begin{code}
#!/usr/local/bin/scsh \\
-e main -s
!#
(define (main args)
(map (\l{arg} (display arg) (display " "))
(cdr args))
(newline))\end{code}
%
Now, the invocation starts as
\codex{ekko Hi there.}
and is expanded by exec(2) into
\begin{code}
/usr/local/bin/scsh \\ ekko Hi there.\end{code}
When scsh starts up, it expands the ``\cd{\\}'' argument into the arguments
read from line two of \ex{ekko}, producing this argument list:
\begin{code}\cddollar
\underline{-e main -s ekko} Hi there.
$\uparrow$
{\rm{}Expanded from} \cd{\\} ekko\end{code}
%
With this argument list, processing proceeds as we intended.
\subsubsection{Secondary argument syntax}
Scsh uses a very simple grammar to encode the extra arguments on
the second line of the scsh script.
The only special characters are space, tab, newline, and backslash.
\begin{itemize}
\item Each space character terminates an argument.
This means that two spaces in a row introduce an empty-string argument.
\item The tab character is not permitted
(unless you quote it with the backslash character described below).
This is to prevent the insidious bug where you believe you have
six space characters, but you really have a tab character,
and \emph{vice-versa}.
\item The newline character terminates an argument, like the space character,
and also terminates the argument sequence.
This means that an empty line parses to the singleton list whose one
element is the empty string: \ex{("")}.
The grammar doesn't admit the empty list.
\item The backslash character is the escape character.
It escapes backslash, space, tab, and newline, turning off their
special functions, and allowing them to be included in arguments.
The {\Ansi} C escape sequences (\verb|\b|, \verb|\n|, \verb|\r|
and \verb|\t|) are also supported;
these also produce argument-constituents---\verb|\n| doesn't act
like a terminating newline.
The escape sequence \verb|\|\emph{nnn} for \emph{exactly} three
octal digits reads as the character whose {\Ascii} code is \emph{nnn}.
It is an error if backslash is followed by just one or two octal digits:
\verb|\3Q| is an error.
Octal escapes are always constituent chars.
Backslash followed by other chars is not allowed
(so we can extend the escape-code space later if we like).
\end{itemize}
You have to construct these line-two argument lines carefully.
In particular, beware of trailing spaces at the end of the line---they'll
give you extra trailing empty-string arguments.
Here's an example:
%
\begin{inset}
\begin{verbatim}
#!/bin/interpreter \
foo bar quux\ yow\end{verbatim}
\end{inset}
%
would produce the arguments
%
\codex{("foo" "bar" "" "quux yow")}
\subsection{Examples}
\begin{itemize}
\def\Item#1{\item{\ex{#1}}\\}
\def\progItem#1{\item{Program \ex{#1}}\\}
\Item{scsh -dm -m myprog -e top -s myprog.scm}
Load \ex{myprog.scm} into the \ex{config} package, then shift to the
\ex{myprog} package and call \ex{(top '("myprog.scm"))}, then exit.
This sort of invocation is typically used in \ex{\#!} script lines
(see below).
\Item{scsh -c '(display "Hello, world.")'}
A simple program.
\Item{scsh -o bigscheme}
Start up interactively in the user package after opening
structure \ex{bigscheme}.
\Item{scsh -o bigscheme -- Three args passed}
Start up interactively in the user package after opening \ex{bigscheme}.
The \ex{command-line-args} variable in the scsh package is bound to the
list \ex{("Three" "args" "passed")}, and the \ex{(command-line)}
procedure returns the list \ex{("scsh" "Three" "args" "passed")}.
\progItem{ekko}
This shell script, called \ex{ekko}, implements a version of
the Unix \ex{echo} program:
\begin{code}
#!/usr/local/bin/scsh -s
!#
(for-each (\l{arg} (display arg) (display " "))
command-line-args)\end{code}
Note this short program is an example of a \emph{script}---it
executes as it loads.
The Unix rule for executing \ex{\#!} shell scripts causes
\codex{ekko Hello, world.}
to expand as
\codex{/usr/local/bin/scsh -s ekko Hello, world.}
\progItem{ekko}
This is the same program, \emph{not} as a script.
Writing it this way makes it possible to compile the program
(and then, for instance, dump it out as a heap image).
%
\begin{code}
#!/usr/local/bin/scsh \\
-e top -s
!#
(define (top args)
(for-each (\l{arg} (display arg) (display " "))
(cdr args)))\end{code}
%
The \ex{\urlh{http://www.FreeBSD.org/cgi/man.cgi?query=exec&apropos=0&sektion=0&manpath=FreeBSD+4.3-RELEASE&format=html}{exec(2)}} expansion of the \ex{\#!} line together with
the scsh expansion of the ``\verb|\ ekko|'' meta-argument
(see section~\ref{sec:meta-arg}) gives the following command-line expansion:
\begin{code}
ekko Hello, world.
{\evalto} /usr/local/bin/scsh \\ ekko Hello, world.
{\evalto} /usr/local/bin/scsh -e top -s ekko Hello, world.\end{code}
\progItem{sort}
This is a program to replace the Unix \ex{sort} utility---sorting lines
read from stdin, and printing the results on stdout.
Note that the source code defines a general sorting package,
which is useful (1) as a Scheme module exporting sort procedures
to other Scheme code, and (2) as a standalone program invoked from
the \ex{top} procedure.
\begin{code}
#!/usr/local/bin/scsh \\
-dm -m sort-toplevel -e top -s
!#
;;; This is a sorting module. TOP procedure exports
;;; the functionality as a Unix program akin to sort(1).
(define-structures ((sort-struct (export sort-list
sort-vector!))
(sort-toplevel (export top)))
(open scheme)
(begin (define (sort-list elts <=) {\ldots})
(define (sort-vec! vec <=) {\ldots})
;; Parse the command line and
;; sort stdin to stdout.
(define (top args)
{\ldots})))\end{code}
The expansion below shows how the command-line scanner
(1) loads the config file \ex{sort} (written in the {\scm} module language),
(2) switches to the package underlying the \ex{sort-toplevel} structure,
(3) calls \ex{(top '("sort" "foo" "bar"))} in the package, and finally
(4) exits.
%
{\small
\begin{centercode}
sort foo bar
{\evalto} /usr/local/bin/scsh \\ sort foo bar
{\evalto} /usr/local/bin/scsh -dm -m sort-toplevel -e top -s sort foo bar\end{centercode}}
An alternate method would have used a
\begin{code}
-n #f -o sort-toplevel\end{code}
sequence of switches to specify a top-level package.
\end{itemize}
Note that the sort example can be compiled into a Unix program by
loading the file into an scsh process, and dumping a heap with top-level
\ex{top}. Even if we don't want to export the sort's functionality as a
subroutine library, it is still useful to write the sort program with the
module language. The command line design allows us to run this program as
either an interpreted script (given the \ex{\#!} args in the header) or as a
compiled heap image.
\subsection{Process exit values}
Scsh ignores the value produced by its top-level computation when determining
its exit status code.
If the top-level computation completed with no errors,
scsh dies with exit code 0.
For example, a scsh process whose top-level is specified by a \ex{-c \var{exp}}
or a \ex{-e \var{entry}} entry point ignores the value produced
by evaluating \var{exp} and calling \var{entry}, respectively.
If these computations terminate with no errors, the scsh process
exits with an exit code of 0.
To return a specific exit status, use the \ex{exit} procedure explicitly, \eg,
\begin{tightcode}
scsh -c \\
"(exit (status:exit-val (run (| (fmt) (mail shivers)))))"\end{tightcode}
\section{The scsh virtual machine}
To run the {\scm} implementation of scsh, you run a specially modified
copy of the {\scm} virtual machine with a scsh heap image.
The scsh binary is actually nothing but a small cover program that invokes the
byte-code interpreter on the scsh heap image for you.
This allows you to simply start up an interactive scsh from a command
line, as well as write shell scripts that begin with the simple trigger
\codex{\#!/usr/local/bin/scsh -s}
You can also directly execute the virtual machine,
which takes its own set of command-line switches..
For example,
this command starts the vm up with a 1Mword heap (split into two semispaces):
\codex{scshvm -o scshvm -h 1000000 -i scsh.image arg1 arg2 \ldots}
The vm peels off initial vm arguments
up to the \ex{-i} heap image argument, which terminates vm argument parsing.
The rest of the arguments are passed off to the scsh top-level.
Scsh's top-level removes scsh switches, as discussed in the previous section;
the rest show up as the value of \ex{command-line-arguments}.
Directly executing the vm can be useful to specify non-standard switches, or
invoke the virtual machine on special heap images, which can contain
pre-compiled scsh programs with their own top-level procedures.
\subsection{VM arguments}
\label{sec:vm-args}
The vm takes arguments in the following form:
\codex{scshvm [\var{meta-arg}] [\var{vm-options}\+] [\var{end-option} \var{scheme-args}]}
where
\begin{inset}
\begin{tabular}{ll}
\var{meta-arg:} & \verb|\ |\var{filename} \\
\\
\var{vm-option}: & \ex{-h }\var{heap-size-in-words} \\
& \ex{-s }\var{stack-size-in-words} \\
& \ex{-o }\var{object-file-name} \\
\\
\var{end-option:} & \ex{-i }\var{image-file-name} \\
& \ex{--}
\end{tabular}
\end{inset}
The vm's meta-switch ``\verb|\ |\var{filename}'' is handled the same
as scsh's meta-switch, and serves the same purpose.
\subsubsection{VM options}
The \ex{-o \var{object-file-name}} switch tells the vm where to find
relocation information for its foreign-function calls.
Scsh will use a pre-compiled default if it is not specified.
Scsh \emph{must} have this information to run,
since scsh's syscall interfaces are done with foreign-function calls.
The \ex{-h} and \ex{-s} options tell the vm how much space to allocate
for the heap and stack.
The heap size value is the total number of words allocated for the heap;
this space is then split into two semi-spaces for {\scm}'s stop-and-copy
collector.
\subsubsection{End options}
End options terminate argument parsing.
The \ex{-i} switch is followed by the name of a heap image for the
vm to execute.
The \var{image-file-name} string is also taken to be the name of the program
being executed by the VM; this name becomes the head of the argument
list passed to the heap image's top-level entry point.
The tail of the argument list is constructed from all following arguments.
The \ex{--} switch terminates argument parsing without giving
a specific heap image; the vm will start up using a default
heap (whose location is compiled into the vm).
All the following arguments comprise the tail of the list passed off to
the heap image's top-level procedure.
Notice that you are not allowed to pass arguments to the heap image's
top-level procedure (\eg, scsh) without delimiting them with \ex{-i}
or \ex{--} flags.
\subsection{Inserting interpreter triggers into heap images}
{\scm}'s heap image format allows for an informational header:
when the vm loads in a heap image, it ignores all data occurring before
the first control-L character (\textsc{Ascii} 12).
This means that you can insert a ``\ex{\#!}'' trigger line into a
heap image, making it a form of executable ``shell script.''
Since the vm requires multiple arguments to be given on the command
line, you must use the meta-switch.
Here's an example heap-image header:
\begin{code}
#!/usr/local/lib/scsh/scshvm \\
-o /usr/local/lib/scsh/scshvm -i
{\ldots} \textnormal{\emph{Your heap image goes here}} \ldots\end{code}
\subsection{Inserting a double-level trigger into Scheme programs}
If you're a nerd, you may enjoy doing a double-level machine shift
in the trigger line of your Scheme programs with the following magic:
\begin{code}\small
#!/usr/local/lib/scsh/scshvm \\
-o /usr/local/lib/scsh/scshvm -i /usr/local/lib/scsh/scsh.image -s
!#
{\ldots} \textnormal{\emph{Your Scheme program goes here}} \ldots\end{code}
\section{Compiling scsh programs}
Scsh allows you to create a heap image with your own top-level procedure.
Adding the pair of lines
\begin{code}
#!/usr/local/lib/scsh/scshvm \\
-o /usr/local/lib/scsh/scshvm -i\end{code}
to the top of the heap image will turn it into an executable {\Unix} file.
You can create heap images with the following two procedures.
\defun{dump-scsh-program}{main fname}{\undefined}
\begin{desc}
This procedure writes out a scsh heap image. When the
heap image is executed by the {\scm} vm, it will call
the \var{main} procedure, passing it the vm's argument list.
When \ex{main} returns an integer value $i$, the vm exits with
exit status $i$.
The {\Scheme} vm will parse command-line switches as
described in section~\ref{sec:vm-args}; remaining arguments
form the tail of the command-line list that is passed to \ex{main}.
(The head of the list is the name of the program being executed
by the vm.)
Further argument parsing
(as described for scsh in section~\ref{sec:scsh-switches})
is not performed.
The heap image created by \ex{dump-scsh-program} has unused
code and data pruned out, so small programs compile to much smaller
heap images.
\end{desc}
\defun{dump-scsh}{fname}{\undefined}
\begin{desc}
This procedure writes out a heap image with the standard
scsh top-level.
When the image is resumed by the vm, it will parse and
execute scsh command-line switches as described in section
\ref{sec:scsh-switches}.
You can use this procedure to write out custom scsh heap images
that have specific packages preloaded and start up in specific
packages.
\end{desc}
Unfortunately, {\scm} does not support separate compilation of
Scheme files or Scheme modules.
The only way to compile is to load source and then dump out a
heap image.
One occasionally hears rumours that this is being addressed
by the {\scm} development team.
\section{Statically linking heap images}
The static heap linker converts a {\scm} bytecode image contained
in a .image file to a C representation. This C code is then compiled and
linked in with a virtual machine, producing a single executable.
Some of the benefits are:
\begin{itemize}
\item Instantaneous start-up time.
\item Improved paging; scsh images can be shared between different
processes.
\item Vastly reduced GC copying---the whole initial image
is moved out of the heap, and neither traced nor copied.
\item Result program no longer depends on the filesystem for its
initial image.
\end{itemize}
The static heap linker takes arguments in the following form:
\codex{scsh-hlink \var{image} \var{executable} [\var{option} \ldots]}
It reads in the heap image \var{image}, translates it into C code,
compiles the C code, and links it against the scsh vm, producing the
standalone binary file \var{executable}.
Each C file represents part of the heap image as a constant C \ex{long} vector
that looks something like this:
{\small\begin{verbatim}
const long p116[]={0x882,0x24,0x19,
0x882,(long)(&p19[785])+7,(long)(&p119[125])+7,
0x882,(long)(&p119[128])+7,(long)(&p119[131])+7,
0x882,(long)(&p102[348])+7,(long)(&p3[114])+7,
0xfc2,0x2030200,0x7100209,0x1091002,0x1c075a,
0x882,(long)(&p29[1562])+7,(long)(&p119[137])+7,
0x882,(long)(&p78[692])+7,(long)(&p119[140])+7,
.
.
.
};
\end{verbatim}}%
%
Translating to a C declaration gives us freedom from the various
object-file formats.\footnote{This idea is due to Jonathan Rees.}
Note that the const declaration allows the compiler to put this array in the
text pages of the executable.
The heap is split into parts because many C compilers cannot handle
multi-megabyte initialised vector declarations.
The allowed options to the heap linker are:
\begin{itemize}
\def\Item#1{\item{\ex{#1}}\\}
\Item{--temp \var{dir}} The temporary directory to hold .c and .o files.
The default is typically configured to be
\ex{/usr/tmp}, and can be overridden by the
environment variable \ex{TMPDIR}.
Example:
\codex{--temp /tmp}
\Item{--cc \var{command}} The command to run the C compiler.
The default can be overridden by the environment
variable \ex{CC}.
Example:
\codex{--cc "gcc -g -O"}
\Item{--ld \var{command}} The arguments to run the C compiler as a linker.
The default can be overridden by the
environment variable \ex{LDFLAGS}.
Example:
\codex{--ld "-Wl,-E"}
\Item{--libs \var{libs}} The libraries needed to link the VM and heap.
The default can be overridden by the
environment variable \ex{LIBS}.
Example:
\codex{--libs "-ldld -lld -lm"}
\end{itemize}
Be warned that the current heap linker has many shortcomings.
\begin{itemize}
\item It is extremely slow. Really, really slow. Translating the standard
scsh heap image into a standalone binary takes well over an hour on a
40Mb/133Mhz Pentium system.
A memory-starved 486 could take all night.
\item It cannot be applied to itself. The current implementation
works by replacing some of the heap-dumping code. This means
you cannot load the heap-linker code into a scsh system and
subsequently use \ex{dump-scsh-program} to create a heap-linker
heap image.
\item The interface leaves a lot to be desired.
\begin{itemize}
\item It requires the heap image to be referenced by a file-name;
the linker will not allow you to feed it the input heap image
on a port.
\item The heap-image is linked against the vm contained in
\begin{tightcode}
/usr/local/lib/scsh/libscshvm.a\end{tightcode}
This is wired in at the time scsh is installed on your system.
\item There is no Scheme procedural interface.
\end{itemize}
\item The program produced uses the default VM argv parser \verb|process_args|
from the scsh source file \ex{main.c} to process the command line
before handing it off to the heap image's top-level procedure.
This is not what you want for many programs.
The system needs to be changed to allow users to override this default
with their own VM argument parsers.
\item A possible problem is the Unix limits on the number of command
line arguments. The heap-linker calls the C linker with a large number of
object files. Its conceivable that on some Unix systems this could fail
now or if scsh grows in the future. The solution could be to create
library archives of a few dozen files and then link the result few dozen
library archives to make the executable.
\end{itemize}
In spite of these many shortcomings, we are providing the static linker
as it stands in this release so that people may get some experience with
it.
Here is an example of how one might use the heap linker:
\begin{code}
scsh-hlink scsh.image fastscsh\end{code}
We'd love it if someone would dive into the source and improve it.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Standard file locations}
Because the scshvm binary is intended to be used for writing shell
scripts, it is important that the binary be installed in a standard
place, so that shell scripts can dependably refer to it.
The standard directory for the scsh tree should be \ex{/usr/local/lib/scsh/}.
Whenever possible, the vm should be located in
\codex{/usr/local/lib/scsh/scshvm}
and a scsh heap image should be located in
\codex{/usr/local/lib/scsh/scsh.image}
The top-level scsh program should be located in
\codex{/usr/local/lib/scsh/scsh}
with a symbolic link to it from
\codex{/usr/local/bin/scsh}
The {\scm} image format allows heap images to have \ex{\#!} triggers,
so \ex{scsh.image} should have a \ex{\#!} trigger of the following form:
\begin{code}
#!/usr/local/lib/scsh/scshvm \\
-o /usr/local/lib/scsh/scshvm -i
{\ldots} \textnormal{\emph{heap image goes here}} \ldots\end{code}

1477
doc/scsh-manual/sre.tex Normal file

File diff suppressed because it is too large Load Diff

496
doc/scsh-manual/strings.tex Normal file
View File

@ -0,0 +1,496 @@
% -*- latex -*-
\chapter{Strings and characters}
Strings are the basic communication medium for {\Unix} processes, so a
Unix programming environment must have reasonable facilities for manipulating
them.
Scsh provides a powerful set of procedures for processing strings and
characters.
Besides the the facilities described in this chapter, scsh also provides
\begin{itemize}
\itum{Regular expressions (chapter~\ref{chapt:sre})}
A complete regular-expression system.
\itum{Field parsing, delimited record I/O and the awk loop
(chapter~\ref{chapt:fr-awk})}
These procedures let you read in chunks of text delimited by selected
characters, and
parse each record into fields based on regular expressions
(for example, splitting a string at every occurrence of colon or
white-space).
The \ex{awk} form allows you to loop over streams of these records
in a convenient way.
\itum{The SRFI-13 string libraries}
This pair of libraries contains procedures that create, fold, iterate over,
search, compare, assemble, cut, hash, case-map, and otherwise manipulate
strings.
They are provided by the \ex{string-lib} and \ex{string-lib-internals}
packages, and are also available in the default \ex{scsh} package.
More documentation on these procedures can be found at URLs
\begin{tightinset}
% The gratuitous mbox makes xdvi render the hyperlinks better.
\texonly
\mbox{\url{http://srfi.schemers.org/srfi-13/srfi-13.html}}\\
\url{http://srfi.schemers.org/srfi-13/srfi-13.txt}
\endtexonly
% Changed the \mbox into \urlh for tex2page to avoid problems runing tex2page
\htmlonly
\urlh{http://srfi.schemers.org/srfi-13/srfi-13.html}{http://srfi.schemers.org/srfi-13/srfi-13.html}\\
\urlh{http://srfi.schemers.org/srfi-13/srfi-13.txt}{http://srfi.schemers.org/srfi-13/srfi-13.txt}
\endhtmlonly
\end{tightinset}
\itum{The SRFI-14 character-set library}
This library provides a set-of-characters abstraction, which is frequently
useful when searching, parsing, filtering or otherwise operating on
strings and character data. The SRFI is provided by the \ex{char-set-lib}
package; it's bindings are also available in the default \ex{scsh} package.
More documentation on this library can be found at URLs
\begin{tightinset}
% The gratuitous mbox makes xdvi render the hyperlinks better.
\texonly
\mbox{\url{http://srfi.schemers.org/srfi-14/srfi-14.html}}\\
\url{http://srfi.schemers.org/srfi-14/srfi-14.txt}
\endtexonly
% Changed the \mbox into \urlh for tex2page to avoid problems runing tex2page
\htmlonly
\urlh{http://srfi.schemers.org/srfi-14/srfi-14.html}{http://srfi.schemers.org/srfi-14/srfi-14.html}\\
\urlh{http://srfi.schemers.org/srfi-14/srfi-14.txt}{http://srfi.schemers.org/srfi-14/srfi-14.txt}
\endhtmlonly
\end{tightinset}
\end{itemize}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Manipulating file names}
\label{sec:filenames}
These procedures do not access the file-system at all; they merely operate
on file-name strings. Much of this structure is patterned after the gnu emacs
design. Perhaps a more sophisticated system would be better, something
like the pathname abstractions of {\CommonLisp} or MIT Scheme. However,
being {\Unix}-specific, we can be a little less general.
\subsection{Terminology}
These procedures carefully adhere to the {\Posix} standard for file-name
resolution, which occasionally entails some slightly odd things.
This section will describe these rules, and give some basic terminology.
A \emph{file-name} is either the file-system root (``/''),
or a series of slash-terminated directory components, followed by
a a file component.
Root is the only file-name that may end in slash.
Some examples:
\begin{center}
\begin{tabular}{lll}
File name & Dir components & File component \\\hline
\ex{src/des/main.c} & \ex{("src" "des")} & \ex{"main.c"} \\
\ex{/src/des/main.c} & \ex{("" "src" "des")} & \ex{"main.c"} \\
\ex{main.c} & \ex{()} & \ex{"main.c"} \\
\end{tabular}
\end{center}
Note that the relative filename \ex{src/des/main.c} and the absolute filename
\ex{/src/des/main.c} are distinguished by the presence of the root component
\ex{""} in the absolute path.
Multiple embedded slashes within a path have the same meaning as
a single slash.
More than two leading slashes at the beginning of a path have the same
meaning as a single leading slash---they indicate that the file-name
is an absolute one, with the path leading from root.
However, {\Posix} permits the OS to give special meaning to
\emph{two} leading slashes.
For this reason, the routines in this section do not simplify two leading
slashes to a single slash.
A file-name in \emph{directory form} is either a file-name terminated by
a slash, \eg, ``\ex{/src/des/}'', or the empty string, ``''.
The empty string corresponds to the current working directory,
whose file-name is dot (``\ex{.}'').
Working backwards from the append-a-slash rule,
we extend the syntax of {\Posix} file-names to define the empty string
to be a file-name form of the root directory ``\ex{/}''.
(However, ``\ex{/}'' is also acceptable as a file-name form for root.)
So the empty string has two interpretations:
as a file-name form, it is the file-system root;
as a directory form, it is the current working directory.
Slash is also an ambiguous form: \ex{/} is both a directory-form and
a file-name form.
The directory form of a file-name is very rarely used.
Almost all of the procedures in scsh name directories by giving
their file-name form (without the trailing slash), not their directory form.
So, you say ``\ex{/usr/include}'', and ``\ex{.}'', not
``\ex{/usr/include/}'' and ``''.
The sole exceptions are
\ex{file-name-as-directory} and \ex{directory-as-file-name},
whose jobs are to convert back-and-forth between these forms,
and \ex{file-name-directory}, whose job it is to split out the
directory portion of a file-name.
However, most procedures that expect a directory argument will coerce
a file-name in directory form to file-name form if it does not have
a trailing slash.
Bear in mind that the ambiguous case, empty string, will be
interpreted in file-name form, \ie, as root.
\subsection{Procedures}
\defun {file-name-directory?} {fname} \boolean
\defunx {file-name-non-directory?} {fname} \boolean
\begin{desc}
These predicates return true if the string is in directory form, or
file-name form (see the above discussion of these two forms).
Note that they both return true on the ambiguous case of empty string,
which is both a directory (current working directory), and a file name
(the file-system root).
\begin{center}
\begin{tabular}{lll}
File name & \ex{\ldots-directory?} & \ex{\ldots-non-directory?} \\
\hline
\ex{"src/des"} & \ex{\sharpf} & \ex{\sharpt} \\
\ex{"src/des/"} & \ex{\sharpt} & \ex{\sharpf} \\
\ex{"/"} & \ex{\sharpt} & \ex{\sharpf} \\
\ex{"."} & \ex{\sharpf} & \ex{\sharpt} \\
\ex{""} & \ex{\sharpt} & \ex{\sharpt}
\end{tabular}
\end{center}
\end{desc}
\begin{defundesc} {file-name-as-directory} {fname} \str
Convert a file-name to directory form.
Basically, add a trailing slash if needed:
\begin{exampletable}
\ex{(file-name-as-directory "src/des")} & \ex{"src/des/"} \\
\ex{(file-name-as-directory "src/des/")} & \ex{"src/des/"} \\[2ex]
%
\header{\ex{.}, \ex{/}, and \ex{""} are special:}
\ex{(file-name-as-directory ".")} & \ex{""} \\
\ex{(file-name-as-directory "/")} & \ex{"/"} \\
\ex{(file-name-as-directory "")} & \ex{"/"}
\end{exampletable}
\end{defundesc}
\begin{defundesc} {directory-as-file-name} {fname} \str
Convert a directory to a simple file-name.
Basically, kill a trailing slash if one is present:
\begin{exampletable}
\ex{(directory-as-file-name "foo/bar/")} & \ex{"foo/bar"} \\[2ex]
%
\header{\ex{/} and \ex{""} are special:}
\ex{(directory-as-file-name "/")} & \ex{"/"} \\
\ex{(directory-as-file-name "")} & \ex{"."} (\ie, the cwd) \\
\end{exampletable}
\end{defundesc}
\begin{defundesc} {file-name-absolute?} {fname} \boolean
Does \var{fname} begin with a root or \ex{\~} component?
(Recognising \ex{\~} as a home-directory specification
is an extension of {\Posix} rules.)
%
\begin{exampletable}
\ex{(file-name-absolute? "/usr/shivers")} & {\sharpt} \\
\ex{(file-name-absolute? "src/des")} & {\sharpf} \\
\ex{(file-name-absolute? "\~/src/des")} & {\sharpt} \\[2ex]
%
\header{Non-obvious case:}
\ex{(file-name-absolute? "")} & {\sharpt} (\ie, root)
\end{exampletable}
\end{defundesc}
\begin{defundesc} {file-name-directory} {fname} {{\str} or false}
Return the directory component of \var{fname} in directory form.
If the file-name is already in directory form, return it as-is.
%
\begin{exampletable}
\ex{(file-name-directory "/usr/bdc")} & \ex{"/usr/"} \\
{\ex{(file-name-directory "/usr/bdc/")}} &
{\ex{"/usr/bdc/"}} \\
\ex{(file-name-directory "bdc/.login")} & \ex{"bdc/"} \\
\ex{(file-name-directory "main.c")} & \ex{""} \\[2ex]
%
\header{Root has no directory component:}
\ex{(file-name-directory "/")} & \ex{""} \\
\ex{(file-name-directory "")} & \ex{""}
\end{exampletable}
\end{defundesc}
\begin{defundesc} {file-name-nondirectory} {fname} \str
Return non-directory component of fname.
%
\begin{exampletable}
{\ex{(file-name-nondirectory "/usr/ian")}} &
{\ex{"ian"}} \\
\ex{(file-name-nondirectory "/usr/ian/")} & \ex{""} \\
{\ex{(file-name-nondirectory "ian/.login")}} &
{\ex{".login"}} \\
\ex{(file-name-nondirectory "main.c")} & \ex{"main.c"} \\
\ex{(file-name-nondirectory "")} & \ex{""} \\
\ex{(file-name-nondirectory "/")} & \ex{"/"}
\end{exampletable}
\end{defundesc}
\begin{defundesc} {split-file-name} {fname} {{\str} list}
Split a file-name into its components.
%
\begin{exampletable}
\splitline{\ex{(split-file-name "src/des/main.c")}}
{\ex{("src" "des" "main.c")}} \\[1.5ex]
%
\splitline{\ex{(split-file-name "/src/des/main.c")}}
{\ex{("" "src" "des" "main.c")}} \\[1.5ex]
%
\splitline{\ex{(split-file-name "main.c")}} {\ex{("main.c")}} \\[1.5ex]
%
\splitline{\ex{(split-file-name "/")}} {\ex{("")}}
\end{exampletable}
\end{defundesc}
\begin{defundesc} {path-list->file-name} {path-list [dir]} \str
Inverse of \ex{split-file-name}.
\begin{code}
(path-list->file-name '("src" "des" "main.c"))
{\evalto} "src/des/main.c"
(path-list->file-name '("" "src" "des" "main.c"))
{\evalto} "/src/des/main.c"
\cb
{\rm{}Optional \var{dir} arg anchors relative path-lists:}
(path-list->file-name '("src" "des" "main.c")
"/usr/shivers")
{\evalto} "/usr/shivers/src/des/main.c"\end{code}
%
The optional \var{dir} argument is usefully \ex{(cwd)}.
\end{defundesc}
\begin{defundesc} {file-name-extension} {fname} \str
Return the file-name's extension.
%
\begin{exampletable}
\ex{(file-name-extension "main.c")} & \ex{".c"} \\
\ex{(file-name-extension "main.c.old")} & \ex{".old"} \\
\ex{(file-name-extension "/usr/shivers")} & \ex{""}
\end{exampletable}
%
\begin{exampletable}
\header{Weird cases:}
\ex{(file-name-extension "foo.")} & \ex{"."} \\
\ex{(file-name-extension "foo..")} & \ex{"."}
\end{exampletable}
%
\begin{exampletable}
\header{Dot files are not extensions:}
\ex{(file-name-extension "/usr/shivers/.login")} & \ex{""}
\end{exampletable}
\end{defundesc}
\begin{defundesc} {file-name-sans-extension} {fname} \str
Return everything but the extension.
%
\begin{exampletable}
\ex{(file-name-sans-extension "main.c")} & \ex{"main"} \\
\ex{(file-name-sans-extension "main.c.old")} & \ex{"main.c""} \\
\splitline{\ex{(file-name-sans-extension "/usr/shivers")}}
{\ex{"/usr/shivers"}}
\end{exampletable}
%
\begin{exampletable}
\header{Weird cases:}
\ex{(file-name-sans-extension "foo.")} & \ex{"foo"} \\
\ex{(file-name-sans-extension "foo..")} & \ex{"foo."} \\[2ex]
%
\header{Dot files are not extensions:}
\splitline{\ex{(file-name-sans-extension "/usr/shivers/.login")}}
{\ex{"/usr/shivers/.login}}
\end{exampletable}
Note that appending the results of \ex{file-name-extension} and
{\ttt file\=name\=sans\=extension} in all cases produces the original file-name.
\end{defundesc}
\begin{defundesc} {parse-file-name} {fname} {[dir name extension]}
Let $f$ be \ex{(file-name-nondirectory \var{fname})}.
This function returns the three values:
\begin{itemize}
\item \ex{(file-name-directory \var{fname})}
\item \ex{(file-name-sans-extension \var{f}))}
\item \ex{(file-name-extension \var{f}\/)}
\end{itemize}
The inverse of \ex{parse-file-name}, in all cases, is \ex{string-append}.
The boundary case of \ex{/} was chosen to preserve this inverse.
\end{defundesc}
\begin{defundesc} {replace-extension} {fname ext} \str
This procedure replaces \var{fname}'s extension with \var{ext}.
It is exactly equivalent to
\codex{(string-append (file-name-sans-extension \var{fname}) \var{ext})}
\end{defundesc}
\defun{simplify-file-name}{fname}\str
\begin{desc}
Removes leading and internal occurrences of dot.
A trailing dot is left alone, as the parent could be a symlink.
Removes internal and trailing double-slashes.
A leading double-slash is left alone, in accordance with {\Posix}.
However, triple and more leading slashes are reduced to a single slash,
in accordance with {\Posix}.
Double-dots (parent directory) are left alone, in case they come after
symlinks or appear in a \ex{/../\var{machine}/\ldots} ``super-root'' form
(which {\Posix} permits).
\end{desc}
\defun{resolve-file-name}{fname [dir]}\str
\begin{desc}
\begin{itemize}
\item Do \ex{\~} expansion.
\item If \var{dir} is given,
convert a relative file-name to an absolute file-name,
relative to directory \var{dir}.
\end{itemize}
\end{desc}
\begin{defundesc} {expand-file-name} {fname [dir]} \str
Resolve and simplify the file-name.
\end{defundesc}
\begin{defundesc} {absolute-file-name} {fname [dir]} \str
Convert file-name \var{fname} into an absolute file name,
relative to directory \var{dir}, which defaults to the current
working directory. The file name is simplified before being
returned.
This procedure does not treat a leading tilde character specially.
\end{defundesc}
\begin{defundesc} {home-dir} {[user]} \str
\ex{home-dir} returns \var{user}'s home directory.
\var{User} defaults to the current user.
\begin{exampletable}
\ex{(home-dir)} & \ex{"/user1/lecturer/shivers"} \\
\ex{(home-dir "ctkwan")} & \ex{"/user0/research/ctkwan"}
\end{exampletable}
\end{defundesc}
\begin{defundesc} {home-file} {[user] fname} \str
Returns file-name \var{fname} relative to \var{user}'s home directory;
\var{user} defaults to the current user.
%
\begin{exampletable}
\ex{(home-file "man")} & \ex{"/usr/shivers/man"} \\
\ex{(home-file "fcmlau" "man")} & \ex{"/usr/fcmlau/man"}
\end{exampletable}
\end{defundesc}
The general \ex{substitute-env-vars} string procedure,
defined in the previous section,
is also frequently useful for expanding file-names.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Other string manipulation facilities}
\begin{defundesc} {substitute-env-vars} {fname} \str
Replace occurrences of environment variables with their values.
An environment variable is denoted by a dollar sign followed by
alphanumeric chars and underscores, or is surrounded by braces.
\begin{exampletable}
\splitline{\ex{(substitute-env-vars "\$USER/.login")}}
{\ex{"shivers/.login"}} \\
\cd{(substitute-env-vars "$\{USER\}_log")} & \cd{"shivers_log"}
\end{exampletable}
\end{defundesc}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{ASCII encoding}
\defun {char->ascii}{\character} \integer
\defunx {ascii->char}{\integer} \character
\begin{desc}
These are identical to \ex{char->integer} and \ex{integer->char} except that
they use the {\Ascii} encoding.
\end{desc}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Character predicates}
\defun {char-letter?}\character\boolean
\defunx{char-lower-case?}\character\boolean
\defunx{char-upper-case?}\character\boolean
\defunx{char-title-case?}\character\boolean
\defunx{char-digit?}\character\boolean
\defunx{char-letter+digit?}\character\boolean
\defunx{char-graphic?}\character\boolean
\defunx{char-printing?}\character\boolean
\defunx{char-whitespace?}\character\boolean
\defunx{char-blank?}\character\boolean
\defunx{char-iso-control?}\character\boolean
\defunx{char-punctuation?}\character\boolean
\defunx{char-hex-digit?}\character\boolean
\defunx{char-ascii?}\character\boolean
\begin{desc}
Each of these predicates tests for membership in one of the standard
character sets provided by the SRFI-14 character-set library.
Additionally, the following redundant bindings are provided for {R5RS}
compatibility:
\begin{inset}
\begin{tabular}{ll}
{R5RS} name & scsh definition \\ \hline
\ex{char-alphabetic?} & \ex{char-letter+digit?} \\
\ex{char-numeric?} & \ex{char-digit?} \\
\ex{char-alphanumeric?} & \ex{char-letter+digit?}
\end{tabular}
\end{inset}
\end{desc}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Deprecated character-set procedures}
\label{sec:char-sets}
The SRFI-13 character-set library grew out of an earlier library developed
for scsh.
However, the SRFI standardisation process introduced incompatibilities with
the original scsh bindings.
The current version of scsh provides the library
\ex{obsolete-char-set-lib}, which contains the old bindings found in
previous releases of scsh.
The following table lists the members of this library, along with
the equivalent SRFI-13 binding. This obsolete library is deprecated and
\emph{not} open by default in the standard \ex{scsh} environment;
new code should use the SRFI-13 bindings.
\begin{inset}
\begin{tabular}{ll}
Old \ex{obsolete-char-set-lib} & SRFI-13 \ex{char-set-lib} \\ \hline
\ex{char-set-members} & \ex{char-set->list} \\
\ex{chars->char-set} & \ex{list->char-set} \\
\ex{ascii-range->char-set} & \ex{ucs-range->char-set} (not exact) \\
\ex{predicate->char-set} & \ex{char-set-filter} (not exact) \\
\ex{char-set-every}? & \ex{char-set-every} \\
\ex{char-set-any}? & \ex{char-set-any} \\
\\
\ex{char-set-invert} & \ex{char-set-complement} \\
\ex{char-set-invert}! & \ex{char-set-complement!} \\
\\
\ex{char-set:alphabetic} & \ex{char-set:letter} \\
\ex{char-set:numeric} & \ex{char-set:digit} \\
\ex{char-set:alphanumeric} & \ex{char-set:letter+digit} \\
\ex{char-set:control} & \ex{char-set:iso-control}
\end{tabular}
\end{inset}
Note also that the \ex{->char-set} procedure no longer handles a predicate
argument.

3184
doc/scsh-manual/syscalls.tex Normal file

File diff suppressed because it is too large Load Diff

33
doc/scsh-manual/test.tex Normal file
View File

@ -0,0 +1,33 @@
%&latex -*- latex -*-
\documentclass{report}
\usepackage{code,boxedminipage,draftfooters,palatino,ct,makeidx,
headings,mantitle,array,matter,mysize10}
\parskip = 3pt plus 3pt
\sloppy
\input{decls}
%%% End preamble
\begin{document}
\begin{tabular}{ll}
{\begin{codebox}[b]
define structure web server
open scsh
scheme
net-hax
file web\end{codebox}}
&
{\begin{codebox}[b]
(define-structure web-server
(open scheme
scsh
net-hax
\vdots)
(file web))\end{codebox}}\\
\end{tabular}
\end{document}

View File

@ -0,0 +1,9 @@
% tex2page.sty
% Dorai Sitaram
% Loading this file in a LaTeX document
% gives it all the macros of tex2page.tex,
% but via a more LaTeX-convenient filename.
\input{tex2page}

40
doc/scsh-manual/todo.tex Normal file
View File

@ -0,0 +1,40 @@
%&latex -*- latex -*-
\chapter{Todo}
There are always many, many improvements and extensions that could be
made to scsh.
We invite interested hackers to do any of them, and send us the code;
we'll put you on the team.
Visit the Scheme Underground Web page for more information on good hacks at
\begin{inset} \begin{flushleft}
\ex{\urlh{http://www.ai.mit.edu/projects/su/}{http://www.ai.mit.edu/projects/su/}}
\end{flushleft}
\end{inset}
Scsh is a tool that lets you write fun programs that do real things in
an elegant language; go wild.
\begin{itemize}
\item Threads.
\item An X gui interface. (Needs threads.)
\item A better C function/data-structure interface. This is not easy.
\item More network protocols. Telnet and ftp would be the most important.
\item Port Edwin, and emacs text editor written in MIT Scheme, to scsh.
Combine it with scsh's OS interfaces to make a visual shell.
\item Manual hacking.
\begin{itemize}
\item The {\LaTeX} hackery needs yet another serious pass. Most importantly,
long procedure ``declarations'' need to be broken across two lines.
\item Soup up the markup processor, and redo manual in markup. Generate
{\LaTeX}, HTML, and info versions. Alternatively, persuade some kind
soul to hand-port manual to HTML or info.
\end{itemize}
\item Job control, after \ex{jcontrol.scm}
\item Better static heap linker.
\item Gnu readline lib.
\end{itemize}

751
doc/scsh-manual/tty.tex Normal file
View File

@ -0,0 +1,751 @@
%&latex -*- latex -*-
% Fix OXTABS footnote bug
% Figures should be dumped out earlier? Pack two to a page?
\section{Terminal device control}
\label{sect:tty}
\newcommand{\fr}[1]{\makebox[0pt][r]{#1}}
% \ex{#1} and also generates an index entry.
\newcommand{\exi}[1]{\index{#1@\texttt{#1}}\ex{#1}}
\newcommand{\indextt}[1]{\index{#1@\texttt{#1}}}
Scsh provides a complete set of routines for manipulating terminal
devices---putting them in ``raw'' mode, changing and querying their
special characters, modifying their i/o speeds, and so forth.
The scsh interface is designed both for generality and portability
across different Unix platforms, so you don't have to rewrite your
program each time you move to a new system.
We've also made an effort to use reasonable, Scheme-like names for
the multitudinous named constants involved, so when you are reading
code, you'll have less likelihood of getting lost in a bewildering
maze of obfuscatory constants named \ex{ICRNL}, \ex{INPCK}, \ex{IUCLC},
and \ex{ONOCR}.
This section can only lay out the basic functionality of the terminal
device interface.
For further details, see the termios(3) man page on your system,
or consult one of the standard {\Unix} texts.
\subsection{Portability across OS variants}
Terminal-control software is inescapably complex, ugly, and low-level.
Unix variants each provide their own way of controlling terminal
devices, making it difficult to provide interfaces that are
portable across different Unix systems.
Scsh's terminal support is based primarily upon the {\Posix} termios
interface.
Programs that can be written using only the {\Posix} interface are likely
to be widely portable.
The bulk of the documentation that follows consists of several pages worth
of tables defining different named constants that enable and disable different
features of the terminal driver.
Some of these flags are {\Posix}; others are taken from the two common
branches of Unix development, SVR4 and 4.3+ Berkeley.
Scsh guarantees that the non-{\Posix} constants will be bound identifiers.
\begin{itemize}
\item If your OS supports a particular non-{\Posix} flag,
its named constant will be bound to the flag's value.
\item If your OS doesn't support the flag, its named constant
will be present, but bound to \sharpf.
\end{itemize}
This means that if you want to use SVR4 or Berkeley features in a program,
your program can portably test the values of the flags before using
them---the flags can reliably be referenced without producing OS-dependent
``unbound variable'' errors.
Finally, note that although {\Posix}, SVR4, and Berkeley cover the lion's
share of the terminal-driver functionality,
each operating system inevitably has non-standard extensions.
While a particular scsh implementation may provide these extensions,
they are not portable, and so are not documented here.
\subsection{Miscellaneous procedures}
\defun{tty?}{fd/port}{\boolean}
\begin{desc}
Return true if the argument is a tty.
\end{desc}
\defun{tty-file-name}{fd/port}{\str}
\begin{desc}
The argument \var{fd/port} must be a file descriptor or port open on a tty.
Return the file-name of the tty.
\end{desc}
\subsection{The tty-info record type}
The primary data-structure that describes a terminal's mode is
a \ex{tty-info} record, defined as follows:
\index{tty-info record type}
\indextt{tty-info:control-chars}
\indextt{tty-info:input-flags}
\indextt{tty-info:output-flags}
\indextt{tty-info:control-flags}
\indextt{tty-info:local-flags}
\indextt{tty-info:input-speed}
\indextt{tty-info:output-speed}
\indextt{tty-info:min}
\indextt{tty-info:time}
\indextt{tty-info?}
\begin{code}
(define-record tty-info
control-chars ; String: Magic input chars
input-flags ; Int: Input processing
output-flags ; Int: Output processing
control-flags ; Int: Serial-line control
local-flags ; Int: Line-editting UI
input-speed ; Int: Code for input speed
output-speed ; Int: Code for output speed
min ; Int: Raw-mode input policy
time) ; Int: Raw-mode input policy\end{code}
\subsubsection{The control-characters string}
The \ex{control-chars} field is a character string;
its characters may be indexed by integer values taken from
table~\ref{table:ttychars}.
As discussed above,
only the {\Posix} entries in table~\ref{table:ttychars} are guaranteed
to be legal, integer indices.
A program can reliably test the OS to see if the non-{\Posix}
characters are supported by checking the index constants.
If the control-character function is supported by the terminal driver,
then the corresponding index will be bound to an integer;
if it is not supported, the index will be bound to \sharpf.
To disable a given control-character function, set its corresponding
entry in the \ex{tty-info:control-chars} string to the
special character \exi{disable-tty-char}
(and then use the \ex{(set-tty-info \var{fd/port} \var{info})} procedure
to update the terminal's state).
\subsubsection{The flag fields}
The \ex{tty-info} record's \ex{input-flags}, \ex{output-flags},
\ex{control-flags}, and \ex{local-flags} fields are all bit sets
represented as two's-complement integers.
Their values are composed by or'ing together values taken from
the named constants listed in tables~\ref{table:ttyin}
through \ref{table:ttylocal}.
As discussed above,
only the {\Posix} entries listed in these tables are guaranteed
to be legal, integer flag values.
A program can reliably test the OS to see if the non-{\Posix}
flags are supported by checking the named constants.
If the feature is supported by the terminal driver,
then the corresponding flag will be bound to an integer;
if it is not supported, the flag will be bound to \sharpf.
%%%%% I managed to squeeze this into the DEFINE-RECORD's comments.
% Here is a small table classifying the four flag fields by
% the kind of features they determine:
% \begin{center}
% \begin{tabular}{|ll|}\hline
% Field & Affects \\ \hline \hline
% \ex{input-flags} & Processing of input chars \\
% \ex{output-flags} & Processing of output chars \\
% \ex{control-flags} & Controlling of terminal's serial line \\
% \ex{local-flags} & Details of the line-editting user interface \\
% \hline
% \end{tabular}
% \end{center}
%%%
%%% The figures used to go here.
%%%
\subsubsection{The speed fields}
The \ex{input-speed} and \ex{output-speed} fields determine the
I/O rate of the terminal's line.
The value of these fields is an integer giving the speed
in bits-per-second.
The following speeds are supported by {\Posix}:
\begin{center}
\begin{tabular}{rrrr}
0 & 134 & 600 & 4800 \\
50 & 150 & 1200 & 9600 \\
75 & 200 & 1800 & 19200 \\
110 & 300 & 2400 & 38400 \\
\end{tabular}
\end{center}
Your OS may accept others; it may also allow the special symbols
\ex{'exta} and \ex{'extb}.
\subsubsection{The min and time fields}
The integer \ex{min} and \ex{time} fields determine input blocking
behaviour during non-canonical (raw) input; otherwise, they are ignored.
See the termios(3) man page for further details.
Be warned that {\Posix} allows the base system call's representation
of the \ex{tty-info} record to share storage for the \ex{min} field
and the \ex{ttychar/eof} element of the control-characters string,
and for the \ex{time} field and the \ex{ttychar/eol} element
of the control-characters string.
Many implementations in fact do this.
To stay out of trouble, set the \ex{min} and \ex{time} fields only
if you are putting the terminal into raw mode;
set the eof and eol control-characters only if you are putting
the terminal into canonical mode.
It's ugly, but it's {\Unix}.
\subsection{Using tty-info records}
\defun{make-tty-info}{if of cf lf ispeed ospeed min time}
{tty-info-record}
\defunx{copy-tty-info}{tty-info-record}{tty-info-record}
\begin{desc}
These procedures make it possible to create new \ex{tty-info} records.
The typical method for creating a new record is to copy one retrieved
by a call to the \ex{tty-info} procedure, then modify the copy as desired.
Note that the \ex{make-tty-info} procedure does not take a parameter
to define the new record's control characters.\footnote{
Why? Because the length of the string varies from Unix to Unix.
For example, the word-erase control character (typically control-w)
is provided by most Unixes, but not part of the {\Posix} spec.}
Instead, it simply returns a \ex{tty-info} record whose control-character
string has all elements initialised to {\Ascii} nul.
You may then install the special characters by assigning to the string.
Similarly, the control-character string in the record produced by
\ex{copy-tty-info} does not share structure with the string in the record
being copied, so you may mutate it freely.
\end{desc}
\defun{tty-info}{[fd/port/fname]}{tty-info-record}
\begin{desc}
The \var{fd/port/fname} parameter is an integer file descriptor or
Scheme I/O port opened on a terminal device,
or a file-name for a terminal device; it defaults to the current input port.
This procedure returns a \ex{tty-info} record describing the terminal's
current mode.
\end{desc}
\defun {set-tty-info/now} {fd/port/fname info}{no-value}
\defunx{set-tty-info/drain}{fd/port/fname info}{no-value}
\defunx{set-tty-info/flush}{fd/port/fname info}{no-value}
\begin{desc}
The \var{fd/port/fname} parameter is an integer file descriptor or
Scheme I/O port opened on a terminal device,
or a file-name for a terminal device.
The procedure chosen determines when and how the terminal's mode is altered:
\begin{center}
\begin{tabular}{|ll|} \hline
Procedure & Meaning \\ \hline \hline
\ex{set-tty-info/now} & Make change immediately. \\
\ex{set-tty-info/drain} & Drain output, then change. \\
\ex{set-tty-info/flush} & Drain output, flush input, then change. \\ \hline
\end{tabular}
\end{center}
\oops{If I had defined these with the parameters in the reverse order,
I could have made \var{fd/port/fname} optional. Too late now.}
\end{desc}
\subsection{Other terminal-device procedures}
\defun{send-tty-break}{[fd/port/fname duration]}{no-value}
\begin{desc}
The \var{fd/port/fname} parameter is an integer file descriptor or
Scheme I/O port opened on a terminal device,
or a file-name for a terminal device; it defaults to the current output port.
Send a break signal to the designated terminal.
A break signal is a sequence of continuous zeros on the terminal's transmission
line.
The \var{duration} argument determines the length of the break signal.
A zero value (the default) causes a break of between
0.25 and 0.5 seconds to be sent;
other values determine a period in a manner that will depend upon local
community standards.
\end{desc}
\defun{drain-tty}{[fd/port/fname]}{no-value}
\begin{desc}
The \var{fd/port/fname} parameter is an integer file descriptor or
Scheme I/O port opened on a terminal device,
or a file-name for a terminal device; it defaults to the current output port.
This procedure waits until all the output written to the
terminal device has been transmitted to the device.
If \var{fd/port/fname} is an output port with buffered I/O
enabled, then the port's buffered characters are flushed before
waiting for the device to drain.
\end{desc}
\defun {flush-tty/input} {[fd/port/fname]}{no-value}
\defunx{flush-tty/output}{[fd/port/fname]}{no-value}
\defunx{flush-tty/both} {[fd/port/fname]}{no-value}
\begin{desc}
The \var{fd/port/fname} parameter is an integer file descriptor or
Scheme I/O port opened on a terminal device,
or a file-name for a terminal device; it defaults to the current input
port (\ex{flush-tty/input} and \ex{flush-tty/both}),
or output port (\ex{flush-tty/output}).
These procedures discard the unread input chars or unwritten
output chars in the tty's kernel buffers.
\end{desc}
\defun {start-tty-output}{[fd/port/fname]} {no-value}
\defunx{stop-tty-output} {[fd/port/fname]} {no-value}
\defunx{start-tty-input} {[fd/port/fname]} {no-value}
\defunx{stop-tty-input} {[fd/port/fname]} {no-value}
\begin{desc}
These procedures can be used to control a terminal's input and output flow.
The \var{fd/port/fname} parameter is an integer file descriptor or
Scheme I/O port opened on a terminal device,
or a file-name for a terminal device; it defaults to the current input
or output port.
The \ex{stop-tty-output} and \ex{start-tty-output} procedures suspend
and resume output from a terminal device.
The \ex{stop-tty-input} and \ex{start-tty-input} procedures transmit
the special STOP and START characters to the terminal with the intention
of stopping and starting terminal input flow.
\end{desc}
% --- Obsolete ---
% \defun {encode-baud-rate}{speed}{code}
% \defunx{decode-baud-rate}{code}{speed}
% \begin{desc}
% These procedures can be used to map between the special codes
% that are legal values for the \ex{tty-info:input-speed} and
% \ex{tty-info:output-speed} fields, and actual integer bits-per-second speeds.
% The codes are the values bound to the
% \ex{baud/4800}, \ex{baud/9600}, and other named constants defined above.
% For example:
% \begin{code}
% (decode-baud-rate baud/9600) {\evalto} 9600
%
% ;;; These two expressions are identical:
% (set-tty-info:input-speed ti baud/14400)
% (set-tty-info:input-speed ti (encode-baud-rate 14400))\end{code}
% \end{desc}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Control terminals, sessions, and terminal process groups}
\defun{open-control-tty}{tty-name [flags]}{port}
\begin{desc}
This procedure opens terminal device \var{tty-name} as the process'
control terminal
(see the \ex{termios} man page for more information on control terminals).
The \var{tty-name} argument is a file-name such as \ex{/dev/ttya}.
The \var{flags} argument is a value suitable as the second argument
to the \ex{open-file} call; it defaults to \ex{open/read+write}, causing
the terminal to be opened for both input and output.
The port returned is an input port if the \var{flags} permit it,
otherwise an output port.
\R4RS/\scm/scsh do not have input/output ports,
so it's one or the other.
However, you can get both read and write ports open on a terminal
by opening it read/write, taking the result input port,
and duping it to an output port with \ex{dup->outport}.
This procedure guarantees to make the opened terminal the
process' control terminal only if the process does not have
an assigned control terminal at the time of the call.
If the scsh process already has a control terminal, the results are undefined.
To arrange for the process to have no control terminal prior to calling
this procedure, use the \ex{become-session-leader} procedure.
%\oops{The control terminal code was added just before release time
% for scsh release 0.4. Control terminals are one of the less-standardised
% elements of Unix. We can't guarantee that the terminal is definitely
% attached as a control terminal; we were only able to test this out
% on HP-UX. If you intend to use this feature on your OS, you should
% test it out first. If your OS requires the use of the \ex{TIOCSCTTY}
% \ex{ioctl}, uncomment the appropriate few lines of code in the
% file \ex{tty1.c} and send us email.}
\end{desc}
\defun{become-session-leader}{}{\integer}
\begin{desc}
This is the C \ex{setsid()} call.
{\Posix} job-control has a three-level hierarchy:
session/process-group/process.
Every session has an associated control terminal.
This procedure places the current process into a brand new session,
and disassociates the process from any previous control terminal.
You may subsequently use \ex{open-control-tty} to open a new control
terminal.
It is an error to call this procedure if the current process is already
a process-group leader.
One way to guarantee this is not the case is only to call this procedure
after forking.
\end{desc}
\defun {tty-process-group}{fd/port/fname}{\integer}
\defunx{set-tty-process-group}{fd/port/fname pgrp}{\undefined}
\begin{desc}
This pair of procedures gets and sets the process group of a given
terminal.
\end{desc}
\defun{control-tty-file-name}{}{\str}
\begin{desc}
Return the file-name of the process' control tty.
On every version of Unix of which we are aware, this is just the string
\ex{"/dev/tty"}.
However, this procedure uses the official Posix interface, so it is more
portable than simply using a constant string.
\end{desc}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Pseudo-terminals}
Scsh implements an interface to Berkeley-style pseudo-terminals.
\defun{fork-pty-session}{thunk}{[process pty-in pty-out tty-name]}
\begin{desc}
This procedure gives a convenient high-level interface to pseudo-terminals.
It first allocates a pty/tty pair of devices, and then forks a child
to execute procedure \var{thunk}.
In the child process
\begin{itemize}
\item Stdio and the current I/O ports are bound to the terminal device.
\item The child is placed in its own, new session
(see \ex{become\=session\=leader}).
\item The terminal device becomes the new session's controlling terminal
(see \ex{open-control-tty}).
\item The \ex{(error-output-port)} is unbuffered.
\end{itemize}
The \ex{fork-pty-session} procedure returns four values:
the child's process object, two ports open on the controlling pty device,
and the name of the child's corresponding terminal device.
\end{desc}
\defun{open-pty}{}{pty-inport tty-name}
\begin{desc}
This procedure finds a free pty/tty pair, and opens the pty device
with read/write access.
It returns a port on the pty,
and the name of the corresponding terminal device.
The port returned is an input port---Scheme doesn't allow input/output
ports.
However, you can easily use \ex{(dup->outport \var{pty-inport})}
to produce a matching output port.
You may wish to turn off I/O buffering for this output port.
\end{desc}
\defun {pty-name->tty-name}{pty-name}{tty-name}
\defunx{tty-name->pty-name}{tty-name}{pty-name}
\begin{desc}
These two procedures map between corresponding terminal and pty controller
names.
For example,
\begin{code}
(pty-name->tty-name "/dev/ptyq3") {\evalto} "/dev/ttyq3"
(tty-name->pty-name "/dev/ttyrc") {\evalto} "/dev/ptyrc"\end{code}
\remark{This is rather Berkeley-specific. SVR4 ptys are rare enough that
I've no real idea if it generalises across the Unix gap. Experts
are invited to advise. Users feel free to not worry---the predominance
of current popular Unix systems use Berkeley ptys.}
\end{desc}
\defunx{make-pty-generator}{}{\proc}
\begin{desc}
This procedure returns a generator of candidate pty names.
Each time the returned procedure is called, it produces a
new candidate.
Software that wishes to search through the set of available ptys
can use a pty generator to iterate over them.
After producing all the possible ptys, a generator returns {\sharpf}
every time it is called.
Example:
\begin{code}
(define pg (make-pty-generator))
(pg) {\evalto} "/dev/ptyp0"
(pg) {\evalto} "/dev/ptyp1"
\vdots
(pg) {\evalto} "/dev/ptyqe"
(pg) {\evalto} "/dev/ptyqf" \textit{(Last one)}
(pg) {\evalto} {\sharpf}
(pg) {\evalto} {\sharpf}
\vdots\end{code}
\end{desc}
% Flag tables
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Control-chars indices
%%%%%%%%%%%%%%%%%%%%%%%
\begin{table}[p]
\begin{center}
\begin{tabular}{|lll|} \hline
Scsh & C & Typical char \\
\hline\hline
{\Posix} & & \\
\exi{ttychar/delete-char} & \ex{ERASE} & del \\
\exi{ttychar/delete-line} & \ex{KILL} & \verb|^U| \\
\exi{ttychar/eof} & \ex{EOF} & \verb|^D| \\
\exi{ttychar/eol} & \ex{EOL} & \\
\exi{ttychar/interrupt} & \ex{INTR} & \verb|^C| \\
\exi{ttychar/quit} & \ex{QUIT} & \verb|^\| \\
\exi{ttychar/suspend} & \ex{SUSP} & \verb|^Z| \\
\exi{ttychar/start} & \ex{START} & \verb|^Q| \\
\exi{ttychar/stop} & \ex{STOP} & \verb|^S| \\
\hline\hline
{SVR4 and 4.3+BSD} & & \\
\exi{ttychar/delayed-suspend} & \ex{DSUSP} & \verb|^Y| \\
\exi{ttychar/delete-word} & \ex{WERASE} & \verb|^W| \\
\exi{ttychar/discard} & \ex{DISCARD} & \verb|^O| \\
\exi{ttychar/eol2} & \ex{EOL2} & \\
\exi{ttychar/literal-next} & \ex{LNEXT} & \verb|^V| \\
\exi{ttychar/reprint} & \ex{REPRINT} & \verb|^R| \\
\hline\hline
{4.3+BSD} & & \\
\exi{ttychar/status} & \ex{STATUS} & \verb|^T| \\
\hline
\end{tabular}
\end{center}
\caption{Indices into the \protect\ex{tty-info} record's
\protect\var{control-chars} string,
and the character traditionally found at each index.
Only the indices for the {\Posix} entries are guaranteed to
be non-\sharpf.}
\label{table:ttychars}
\end{table}
% Input flags
%%%%%%%%%%%%%
\begin{table}[p]
\begin{center}\small
\begin{tabular}{|lll|} \hline
Scsh & C & Meaning \\
\hline\hline
\Posix & & \\
\exi{ttyin/check-parity}
& \ex{INPCK} & Check parity. \\
\exi{ttyin/ignore-bad-parity-chars}
& \ex{IGNPAR} & Ignore chars with parity errors. \\
\exi{ttyin/mark-parity-errors}
& \ex{PARMRK} & Insert chars to mark parity errors.\\
\exi{ttyin/ignore-break}
& \ex{IGNBRK} & Ignore breaks. \\
\exi{ttyin/interrupt-on-break}
& \ex{BRKINT} & Signal on breaks. \\
\exi{ttyin/7bits}
& \ex{ISTRIP} & Strip char to seven bits. \\
\exi{ttyin/cr->nl}
& \ex{ICRNL} & Map carriage-return to newline. \\
\exi{ttyin/ignore-cr}
& \ex{IGNCR} & Ignore carriage-returns. \\
\exi{ttyin/nl->cr}
& \ex{INLCR} & Map newline to carriage-return. \\
\exi{ttyin/input-flow-ctl}
& \ex{IXOFF} & Enable input flow control. \\
\exi{ttyin/output-flow-ctl}
& \ex{IXON} & Enable output flow control. \\
\hline\hline
{SVR4 and 4.3+BSD} & & \\
\exi{ttyin/xon-any} & \ex{IXANY} & Any char restarts after stop. \\
\exi{ttyin/beep-on-overflow} & \ex{IMAXBEL} & Ring bell when queue full. \\
\hline\hline
{SVR4} & & \\
\exi{ttyin/lowercase} & \ex{IUCLC} & Map upper case to lower case. \\
\hline
\end{tabular}
\end{center}
\caption{Input-flags. These are the named flags for the \protect\ex{tty-info}
record's \protect\var{input-flags} field.
These flags generally control the processing of input chars.
Only the {\Posix} entries are guaranteed to be non-\sharpf.
}
\label{table:ttyin}
\end{table}
% Output flags
%%%%%%%%%%%%%%
\begin{table}[p]
\begin{center}%\small
\begin{tabular}{|lll|} \hline
Scsh & C & Meaning \\ \hline\hline
\multicolumn{3}{|l|}{\Posix} \\
\exi{ttyout/enable} & \ex{OPOST} & Enable output processing. \\
\hline\hline
\multicolumn{3}{|l|}{SVR4 and 4.3+BSD} \\
\exi{ttyout/nl->crnl} & \ex{ONLCR} & Map nl to cr-nl. \\
\hline\hline
\multicolumn{3}{|l|}{4.3+BSD} \\
\exi{ttyout/discard-eot} & \ex{ONOEOT} & Discard EOT chars. \\
\exi{ttyout/expand-tabs} & \ex{OXTABS}\footnote{
Note this is distinct from the SVR4-equivalent
\ex{ttyout/tab-delayx} flag defined in
table~\ref{table:ttydelays}.}
& Expand tabs. \\
\hline\hline
\multicolumn{3}{|l|}{SVR4} \\
\exi{ttyout/cr->nl} & \ex{OCRNL} & Map cr to nl. \\
\exi{ttyout/nl-does-cr} & \ex{ONLRET}& Nl performs cr as well. \\
\exi{ttyout/no-col0-cr} & \ex{ONOCR} & No cr output in column 0. \\
\exi{ttyout/delay-w/fill-char} & \ex{OFILL} & Send fill char to delay. \\
\exi{ttyout/fill-w/del} & \ex{OFDEL} & Fill char is {\Ascii} DEL. \\
\exi{ttyout/uppercase} & \ex{OLCUC} & Map lower to upper case. \\
\hline
\end{tabular}
\end{center}
\caption{Output-flags. These are the named flags for the \protect\ex{tty-info}
record's \protect\var{output-flags} field.
These flags generally control the processing of output chars.
Only the {\Posix} entries are guaranteed to be non-\sharpf.}
\label{table:ttyout}
\end{table}
% Delay flags
%%%%%%%%%%%%%
\begin{table}[p]
\begin{tabular}{r|ll|} \cline{2-3}
& Value & Comment \\ \cline{2-3}
{Backspace delay} & \exi{ttyout/bs-delay} & Bit-field mask \\
& \exi{ttyout/bs-delay0} & \\
& \exi{ttyout/bs-delay1} & \\
\cline{2-3}
{Carriage-return delay} & \exi{ttyout/cr-delay} & Bit-field mask \\
& \exi{ttyout/cr-delay0} & \\
& \exi{ttyout/cr-delay1} & \\
& \exi{ttyout/cr-delay2} & \\
& \exi{ttyout/cr-delay3} & \\
\cline{2-3}
{Form-feed delay} & \exi{ttyout/ff-delay} & Bit-field mask \\
& \exi{ttyout/ff-delay0} & \\
& \exi{ttyout/ff-delay1} & \\
\cline{2-3}
{Horizontal-tab delay} & \exi{ttyout/tab-delay} & Bit-field mask \\
& \exi{ttyout/tab-delay0} & \\
& \exi{ttyout/tab-delay1} & \\
& \exi{ttyout/tab-delay2} & \\
& \exi{ttyout/tab-delayx} & Expand tabs \\
\cline{2-3}
{Newline delay} & \exi{ttyout/nl-delay} & Bit-field mask \\
& \exi{ttyout/nl-delay0} & \\
& \exi{ttyout/nl-delay1} & \\
\cline{2-3}
{Vertical tab delay} & \exi{ttyout/vtab-delay} & Bit-field mask \\
& \exi{ttyout/vtab-delay0} & \\
& \exi{ttyout/vtab-delay1} & \\
\cline{2-3}
{All} & \exi{ttyout/all-delay} & Total bit-field mask \\
\cline{2-3}
\end{tabular}
\caption{Delay constants. These are the named flags for the
\protect\ex{tty-info} record's \protect\var{output-flags} field.
These flags control the output delays associated with printing
special characters.
They are non-{\Posix}, and have non-{\sharpf} values
only on SVR4 systems.}
\label{table:ttydelays}
\end{table}
% Control flags
%%%%%%%%%%%%%%%
\begin{table}[p]
\begin{center}%\small
\begin{tabular}{|lll|} \hline
Scsh & C & Meaning \\
\hline\hline
\multicolumn{3}{|l|}{\Posix} \\
\exi{ttyc/char-size} & \ex{CSIZE} & Character size mask \\
\exi{ttyc/char-size5} & \ex{CS5} & 5 bits \\
\exi{ttyc/char-size6} & \ex{CS6} & 6 bits \\
\exi{ttyc/char-size7} & \ex{CS7} & 7 bits \\
\exi{ttyc/char-size8} & \ex{CS8} & 8 bits \\
\exi{ttyc/enable-parity}& \ex{PARENB} & Generate and detect parity. \\
\exi{ttyc/odd-parity} & \ex{PARODD} & Odd parity. \\
\exi{ttyc/enable-read} & \ex{CREAD} & Enable reception of chars. \\
\exi{ttyc/hup-on-close} & \ex{HUPCL} & Hang up on last close. \\
\exi{ttyc/no-modem-sync}& \ex{LOCAL} & Ignore modem lines. \\
\exi{ttyc/2-stop-bits} & \ex{CSTOPB} & Send two stop bits. \\
\hline\hline
\multicolumn{3}{|l|}{4.3+BSD} \\
\exi{ttyc/ignore-flags} & \ex{CIGNORE} & Ignore control flags. \\
\exi{ttyc/CTS-output-flow-ctl} & \verb|CCTS_OFLOW| & CTS flow control of output \\
\exi{ttyc/RTS-input-flow-ctl} & \verb|CRTS_IFLOW| & RTS flow control of input \\
\exi{ttyc/carrier-flow-ctl} & \ex{MDMBUF} & \\
\hline
\end{tabular}
\end{center}
\caption{Control-flags. These are the named flags for the \protect\ex{tty-info}
record's \protect\var{control-flags} field.
These flags generally control the details of the terminal's
serial line.
Only the {\Posix} entries are guaranteed to be non-\sharpf.}
\label{table:ttyctl}
\end{table}
% Local flags
%%%%%%%%%%%%%
\begin{table}[p]
\begin{center}\small
\begin{tabular}{|lll|} \hline
Scsh & C & Meaning \\
\hline\hline
\multicolumn{3}{|l|}{\Posix} \\
\exi{ttyl/canonical} & \ex{ICANON} & Canonical input processing. \\
\exi{ttyl/echo} & \ex{ECHO} & Enable echoing. \\
\exi{ttyl/echo-delete-line} & \ex{ECHOK} & Echo newline after line kill. \\
\exi{ttyl/echo-nl} & \ex{ECHONL} & Echo newline even if echo is off. \\
\exi{ttyl/visual-delete}& \ex{ECHOE} & Visually erase chars. \\
\exi{ttyl/enable-signals} & \ex{ISIG} & Enable \verb|^|C, \verb|^|Z signalling. \\
\exi{ttyl/extended} & \ex{IEXTEN} & Enable extensions. \\
\exi{ttyl/no-flush-on-interrupt}
& \ex{NOFLSH} & Don't flush after interrupt. \\
\exi{ttyl/ttou-signal} & \ex{ITOSTOP} & \ex{SIGTTOU} on background output. \\
\hline\hline
\multicolumn{3}{|l|}{SVR4 and 4.3+BSD} \\
\exi{ttyl/echo-ctl} & \ex{ECHOCTL}
& Echo control chars as ``\verb|^X|''. \\
\exi{ttyl/flush-output} & \ex{FLUSHO} & Output is being flushed. \\
\exi{ttyl/hardcopy-delete} & \ex{ECHOPRT} & Visual erase for hardcopy. \\
\exi{ttyl/reprint-unread-chars} & \ex{PENDIN} & Retype pending input. \\
\exi{ttyl/visual-delete-line} & \ex{ECHOKE} & Visually erase a line-kill. \\
\hline\hline
\multicolumn{3}{|l|}{4.3+BSD} \\
\exi{ttyl/alt-delete-word} & \ex{ALTWERASE} & Alternate word erase algorithm \\
\exi{ttyl/no-kernel-status} & \ex{NOKERNINFO} & No kernel status on \verb|^T|. \\
\hline\hline
\multicolumn{3}{|l|}{SVR4} \\
\exi{ttyl/case-map} & \ex{XCASE} & Canonical case presentation \\
\hline
\end{tabular}
\end{center}
\caption{Local-flags. These are the named flags for the \protect\ex{tty-info}
record's \protect\var{local-flags} field.
These flags generally control the details of the line-editting
user interface.
Only the {\Posix} entries are guaranteed to be non-\sharpf.}
\label{table:ttylocal}
\end{table}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

38
doc/scsh-manual/xman.tex Normal file
View File

@ -0,0 +1,38 @@
%&latex -*- latex -*-
% This is the reference manual for the Scheme Shell.
\documentclass[twoside]{report}
\usepackage{code,boxedminipage,draftfooters,makeidx,palatino,ct,
headings,mantitle,array,matter,a4,tex2page}
% Style issues
\parskip = 3pt plus 3pt
\sloppy
\input{decls}
\makeindex
%%% End preamble
\begin{document}
\frontmatter
\include{front}
\mainmatter
\include{intro}
\include{procnotation}
\include{syscalls}
\include{network}
\include{strings}
\include{rdelim}
\include{awk}
\include{miscprocs}
\include{running}
\include{changes}
\include{todo}
\backmatter
\printindex
\end{document}