diff --git a/doc/scsh-paper/boxedminipage.sty b/doc/scsh-paper/boxedminipage.sty new file mode 100644 index 0000000..19e3e9d --- /dev/null +++ b/doc/scsh-paper/boxedminipage.sty @@ -0,0 +1,45 @@ +% boxedminipage.sty +% +% adds the boxedminipage environment---just like minipage, but has a +% box round it! +% +% The thickneess of the rules around the box is controlled by +% \fboxrule, and the distance between the rules and the edges of the +% inner box is governed by \fboxsep. +% +% This code is based on Lamport's minipage code. + +\def\boxedminipage{\@ifnextchar [{\@iboxedminipage}{\@iboxedminipage[c]}} + +\def\@iboxedminipage[#1]#2{\leavevmode \@pboxswfalse + \if #1b\vbox + \else \if #1t\vtop + \else \ifmmode \vcenter + \else \@pboxswtrue $\vcenter + \fi + \fi + \fi\bgroup % start of outermost vbox/vtop/vcenter + \hsize #2 + \hrule\@height\fboxrule + \hbox\bgroup % inner hbox + \vrule\@width\fboxrule \hskip\fboxsep \vbox\bgroup % innermost vbox + \advance\hsize -2\fboxrule \advance\hsize-2\fboxsep + \textwidth\hsize \columnwidth\hsize + \@parboxrestore + \def\@mpfn{mpfootnote}\def\thempfn{\thempfootnote}\c@mpfootnote\z@ + \let\@footnotetext\@mpfootnotetext + \let\@listdepth\@mplistdepth \@mplistdepth\z@ + \@minipagerestore\@minipagetrue + \everypar{\global\@minipagefalse\everypar{}}} + +\def\endboxedminipage{% + \par\vskip-\lastskip + \ifvoid\@mpfootins\else + \vskip\skip\@mpfootins\footnoterule\unvbox\@mpfootins\fi + \egroup % ends the innermost \vbox + \hskip\fboxsep \vrule\@width\fboxrule + \egroup % ends the \hbox + \hrule\@height\fboxrule + \egroup% ends the vbox/vtop/vcenter + \if@pboxsw $\fi} + diff --git a/doc/scsh-paper/code.sty b/doc/scsh-paper/code.sty new file mode 100644 index 0000000..2786d61 --- /dev/null +++ b/doc/scsh-paper/code.sty @@ -0,0 +1,296 @@ +% code.sty: -*- latex -*- +% Latex macros for a "weak" verbatim mode. +% -- like verbatim, except \, {, and } have their usual meanings. + +% Environments: code, tightcode, codeaux, codebox, centercode +% Commands: \dcd, \cddollar, \cdmath, \cd, \codeallowbreaks, \codeskip, \^ +% Already defined in LaTeX, but of some relevance: \#, \$, \%, \&, \_, \{, \} + +% Changelog at the end of the file. + +% These commands give you an environment, code, that is like verbatim +% except that you can still insert commands in the middle of the environment: +% \begin{code} +% for(x=1; x] option, then the following newline will +% be read *after* ^M is bound to \cr, so we're cool. If there isn't +% an option given (i.e., default to [c]), then the @\ifnextchar will +% gobble up the newline as it gobbles whitespace. So we insert the +% \cr explicitly. Isn't TeX fun? +\def\codebox{\leavevmode\@ifnextchar[{\@codebox}{\@codebox[c]\cr}} %] + +\def\@codebox[#1]% + {\hbox\bgroup$\if #1t\vtop \else \if#1b\vbox \else \vcenter \fi\fi\bgroup% + \tabskip\z@\setupcode\cd@obeycr% just before cd@obey + \halign\bgroup##\hfil\span} + +\def\endcodebox{\crcr\egroup\egroup\m@th$\egroup} + +% Center the box on the page: +\newenvironment{centercode}% + {\begin{center}\begin{codebox}[c]}% + {\end{codebox}\end{center}} + + +%% code, codeaux, tightcode +%%============================================================================= +%% Code environment as described above. Lines are kept on one page. +%% This actually works by setting a huge penalty for breaking +%% between lines of code. Code is indented same as other displayed paras. +%% Note: to increase left margin, use \begin{codeaux}{\leftmargin=1in}. + +% To allow pagebreaks, say \codeallowbreaks immediately inside the env. +% You can allow breaks at specific lines with a \pagebreak form. + +%% N.B.: The \global\@ignoretrue command must be performed just inside +%% the *last* \end{...} before the following text. If not, you will +%% get an extra space on the following line. Blech. + +%% This environment takes two arguments. +%% The second, required argument is the \list parameters to override the +%% \@listi... defaults. +%% - Usefully set by clients: \topsep \leftmargin +%% - Possible, but less useful: \partopsep +%% The first, optional argument is the extra \parskip glue that you get around +%% \list environments. It defaults to the value of \parskip. +\def\codeaux{\@ifnextchar[{\@codeaux}{\@codeaux[\parskip]}} %] +\def\@codeaux[#1]#2{% + \bgroup\parskip#1% + \begin{list}{}% + {\parsep\z@\rightskip\z@\listparindent\z@\itemindent\z@#2}% + \item[]\setupcode\cd@obeylines}% +\def\endcodeaux{\end{list}\leavevmode\egroup\ignorespaces\global\@ignoretrue} + +%% Code env is codeaux with the default margin and spacing \list params: +\def\code{\codeaux{}} \let\endcode=\endcodeaux + +%% Like code, but with no extra vertical space above and below. +\def\tightcode{\codeaux[=0pt]{\topsep\z@}}% +\let\endtightcode\endcodeaux +% {\vspace{-1\parskip}\begin{codeaux}{\partopsep\z@\topsep\z@}}% +% {\end{codeaux}\vspace{-1\parskip}} + + + +% Reasonable separation between lines of code +\newcommand{\codeskip}{\penalty0\vspace{2ex}} + + +% \cd is used to build a code environment in the middle of text. +% Note: only difference from display code is that cr's are taken +% as unbreakable spaces instead of linebreaks. + +\def\cd{\leavevmode\begingroup\ifmmode\let\startcode=\startmcode\else% + \let\startcode\starttcode\fi% + \setupcode\cd@obeycrsp\startcode} + +\def\starttcode#1{#1\endgroup} +\def\startmcode#1{\hbox{#1}\endgroup} + + +% Restore $&#^_~% to their normal catcodes +% Define \^ to give the ^ char. +% \dcd points to this guy inside a code env. +\def\cd@dcd{\catcode`\$=3\catcode`\&=4\catcode`\#=6\catcode`\^=7% + \catcode`\_=8\catcode`\~=13\catcode`\%=14\def\^{\char`\^}} + +% Selectively enable $, and $^_ as special. +% \cd@mathspecial also defines \^ give the ^ char. +% \cddollar and \cdmath point to these guys inside a code env. +\def\cd@dollarspecial{\catcode`\$=3} +\def\cd@mathspecial{\catcode`\$=3\catcode`\^=7\catcode`\_=8% + \def\^{\char`\^}} + + +% Change log: +% Started off as some macros found in C. Rich's library. +% Olin 1/90: +% Removed \makeatletter, \makeatother's -- they shouldn't be there, +% because style option files are read with makeatletter. The terminal +% makeatother screwed things up for the following style options. +% Olin 3/91: +% Rewritten. +% - Changed things so blank lines don't get compressed out (the \leavevmove +% in \cd@cr and \cd@crwb). +% - Changed names to somewhat less horrible choices. +% - Added lots of doc, so casual hackers can more easily mess with all this. +% - Removed `'"@ from the set of hacked chars, since they are already +% non-special. +% - Removed the bigcode env, which effect can be had with the \codeallowbreaks +% command. +% - Removed the \@noligs command, since it's already defined in latex.tex. +% - Win big with the new \dcd, \cddollar, and \cdmath commands. +% - Now, *only* the chars \{} are special inside the code env. If you need +% more, use the \dcd command inside a group. +% - \cd now works inside math mode. (But if you use it in a superscript, +% it still comes out full size. You must explicitly put a \scriptsize\tt +% inside the \cd: $x^{\cd{\scriptsize\tt...}}$. A \leavevmode was added +% so that if you begin a paragraph with a \cd{...}, TeX realises you +% are starting a paragraph. +% - Added the codebox env. Tricky bit involving the first line hacked +% with help from David Long. +% Olin 8/94 +% Changed the font commands for LaTeX2e. diff --git a/doc/scsh-paper/ct.sty b/doc/scsh-paper/ct.sty new file mode 100644 index 0000000..1edfbc0 --- /dev/null +++ b/doc/scsh-paper/ct.sty @@ -0,0 +1,6 @@ +% Loads cmtt fonts in on \tt. -*- latex -*- +% I prefer these to the Courier fonts that latex gives you w/postscript styles. +% Courier is too spidery and too wide -- it's hard to get 80 chars on a line. +% -Olin + +\renewcommand{\ttdefault}{cmtt} diff --git a/doc/scsh-paper/lcs-note.sty b/doc/scsh-paper/lcs-note.sty new file mode 100644 index 0000000..36c7b78 --- /dev/null +++ b/doc/scsh-paper/lcs-note.sty @@ -0,0 +1,84 @@ +% LCS note style modification of title -*- latex -*- +% +% To use this, you should have the companion postscript file mitlogo.ps +% somewhere latex can find it. If you can't do it, then just say \nologo +% and you'll get logo-less alternative. + +% This is useful for a little paper or note you're writing that isn't +% a T.R. or a conference submission, just a paper. Maybe an early draft. +% Something to hand out to your friends for comments. That kind of thing. +% An example is the three "Scheme Flow-Analysis Working Notes" I wrote +% that later got folded into my dissertation. +% -Olin + +\typeout{LCS Note style option -- 12 August 1994} + +\newcount\notenumber +\newif\if@logo \@logotrue + +% +% Here are the basic parameters and their defaults +% Note that \author, \title, and \date still work, too. +% To get the time in the date, try \date{\Time, \today} +% +\def\notenum#1{\notenumber=#1} +\def\project#1{\def\@project{#1}} +\def\dept#1{\def\@dept{#1}} +\def\university#1{\def\@univ{#1}} + +\def\@project{Personal Information Architecture Project} +\def\@dept{Laboratory for Computer Science} +\def\@note{Note} +% The 'tute: +\def\@univ{\normalfont\scshape massachusetts institute of technology} + +% +% The following "Time" macro will return the current (24 hour) time. +% +\def\Time{{\count1=\time \count2=\count1 \divide\count1 by 60 +\the\count1 :\multiply \count1 by 60 \advance\count2 by -\count1 +\count1=\count2 \divide\count1 by 10 \the\count1 +\multiply \count1 by 10 \advance\count2 by -\count1 \the\count2}} + +\def\@logofile{mitlogo.ps} +\def\nologo{\@logofalse} + + +\def\@maketitle{ + \newdimen\noteheadwidth + \noteheadwidth = \textwidth + \def\@headline##1{\hbox to\noteheadwidth{##1}} + + \newpage + \null + \vskip -\topmargin \vskip -0.5in + \vskip -\headsep + \vskip -\headheight + + \if@logo + \@headline{\fontsize{11}{11}\selectfont\@univ\hfil}% + \kern .72in + \@headline{\fontsize{17}{17}\selectfont\special{psfile=\@logofile} + \hskip 23mm plus 1fil + {\@dept} + \hskip 23mm plus 1fil minus 23mm} + \else + \hbox to\textwidth{\hfill {\LARGE {\bf \@univ}} \hfill} + \vskip.5em + \hbox to\textwidth{\hfill {\Large {\bf \@dept}} \hfill} + \fi + + \vskip 1.5em + \@headline{\@project\ \@note\ \the\notenumber \hfill \@date} + \vskip 0.5em + \hrule width \noteheadwidth + \bigskip + \begin{center} + {\LARGE \@title \par} + \vskip .5em + {\lineskip .5em + {\em \begin{tabular}[t]{c} \@author \end{tabular}} + \par} + \vskip 1em + \end{center} + \par \vskip 1.5em} diff --git a/doc/scsh-paper/mitlogo.ps b/doc/scsh-paper/mitlogo.ps new file mode 100644 index 0000000..d92da94 --- /dev/null +++ b/doc/scsh-paper/mitlogo.ps @@ -0,0 +1,613 @@ +%! +/m {moveto} def % x y m - + +/l {lineto} def % x y l - + +/S {stroke} def % - S - + +/s {closepath S} def % - s - + +/c {curveto} def % x1 y1 x2 y2 x3 y3 c - + +/v {currentpoint 6 2 roll curveto} def % x2 y2 x3 y3 v - + +/y {2 copy curveto} def % x1 y1 x2 y2 y - + +/j {setlinejoin} def % linejoin j - + +/J {setlinecap} def % linecap J - + +/w {setlinewidth} def % linewidth w - + +/b {closepath B} def % - b - + +/B {gsave F grestore S} def % - B - + +/F {fill} def % - F - + +/f {closepath F} def % - f - + +.8125 .75 scale +-242 -314 translate + +0 -3.5 translate + +%%Note: +newpath +272.8118 317.4634 m +272.8118 328.6408 l +257.9086 328.6408 l +257.9086 317.4634 l +272.8118 317.4634 l +s +265.3602 323.0521 m +S +271.8804 328.6408 m +271.8804 331.8585 l +261.6344 331.8585 l +261.6344 328.6408 l +271.8804 328.6408 l +s +266.7574 330.2496 m +S +263.1163 339.9452 m +265.0215 339.9452 l +S +265.0215 339.9452 m +268.7473 339.9452 l +268.7473 336.4734 l +256.9772 336.4734 l +261.6344 331.8585 l +S +268.7473 336.4734 m +272.8118 336.4734 l +272.8118 331.8585 l +271.8804 331.8585 l +S +257.9086 317.4634 m +245.8421 317.4634 l +251.4732 322.798 l +251.4732 335.2456 l +257.9086 335.2456 l +257.9086 328.6408 l +S +251.4732 322.798 m +251.4732 322.798 l +256.3421 319.1569 l +S +265.0215 341.9351 m +265.5719 341.9774 l +266.08 342.0407 l +266.609 342.1891 l +267.0538 342.2948 l +267.5195 342.4855 l +268.5356 343.0992 l +269.488 343.8613 l +269.7421 344.1153 l +269.9114 344.3694 l +269.9538 344.7081 l +269.9538 357.7483 l +S +265.0215 356.2032 m +265.0215 339.9452 l +S +263.1163 339.9452 m +260.0256 339.9452 l +257.7816 338.8444 l +260.3219 336.4734 l +S +260.3219 336.4734 m +S +263.1163 355.187 m +263.1305 339.9452 l +263.1305 341.9879 l +262.5801 342.0313 l +262.0724 342.0956 l +261.5433 342.245 l +261.0989 342.3517 l +260.6335 342.543 l +259.6188 343.1588 l +258.6677 343.9229 l +258.414 344.1773 l +258.245 344.4317 l +258.2033 344.7704 l +258.205 358.5741 l +270.3985 366.2797 l +273.5316 367.4229 274.7763 365.4245 274.9287 365.2213 c +275.1828 364.8826 278.4184 359.9652 272.9388 357.6427 c +266.588 357.6427 l +265.4026 355.9068 260.6183 353.8322 258.205 354.1286 c +S +258.205 350.1274 m +S +252.8703 317.421 m +253.7594 318.2678 l +254.7332 317.421 l +S +251.4732 335.2456 m +249.1445 335.2456 l +254.4369 357.9814 l +S +257.9086 328.9371 m +257.8663 327.4129 256.4691 317.8021 255.2836 317.4634 c +255.707 317.4634 l +S +274.8441 358.4894 m +272.8118 336.4734 l +272.8118 357.7274 l +S +267.0538 364.2052 m +267.0538 368.9471 l +265.8683 371.445 262.9469 371.4027 261.8885 369.9632 c +261.1816 369.0019 260.4066 368.0579 260.3643 366.2797 c +260.3148 364.2055 260.0466 364.2261 261.8461 362.4269 c +263.0316 362.1305 l +263.2433 362.0882 263.7514 362.2576 v +264.2594 362.4269 264.7675 362.7233 y +S +263.0316 362.1305 m +S +258.205 358.5741 m +257.5022 358.1057 256.4942 357.871 254.4369 357.9814 c +248.5095 361.4108 254.3099 365.8987 v +255.1143 366.2797 255.7917 366.2374 v +260.3643 366.2374 l +S +1 J 1 j +254.4369 357.9814 m +252.0236 359.8443 250.7534 361.3261 250.076 363.9088 c +249.3986 366.4914 248.7212 370.7676 250.7534 373.9007 c +253.8587 378.6883 257.6607 378.7801 258.1837 378.9813 c +258.1627 366.2797 l +S +0 j +258.1837 378.9813 m +262.8622 378.9813 l +262.8622 377.0337 l +S +0 J +262.7776 373.2233 m +261.8038 373.562 260.7453 373.816 259.8562 374.9591 c +260.6816 382.8341 l +259.0304 382.8341 l +260.4913 389.3966 l +261.4227 390.0317 264.9368 391.3442 267.5618 390.1163 c +269.34 389.0579 269.5941 386.6022 v +269.8481 384.1466 268.959 380.7595 269.5517 380.0398 c +270.1445 379.32 271.9227 379.32 273.3622 379.0236 c +274.8017 378.7273 276.1989 377.3724 276.7916 376.7374 c +277.3844 376.1023 280.6868 371.6991 279.84 366.068 c +278.9932 360.437 275.0557 359.2515 274.8441 358.8282 c +S +262.9893 370.7676 m +262.9893 375.1285 l +265.1062 375.1285 l +265.1062 370.7253 l +S +260.2796 378.4732 m +260.576 377.6265 262.3118 376.9067 262.7776 376.7797 c +263.2433 376.6527 263.7933 376.5714 264.7675 376.6527 c +265.7836 376.7374 266.3049 377.0141 266.5457 377.2878 c +267.4771 378.3462 267.0961 378.9813 267.1808 379.32 c +267.9005 379.447 l +267.9005 374.3664 l +267.5195 374.1124 266.3763 373.3079 265.0638 373.2233 c +S +0.6774 w +260.0256 387.2797 m +260.8723 387.3643 261.4651 387.322 261.9731 387.0256 c +S +260.6606 387.2797 m +S +0.3387 w +261.1897 386.377 m +261.3055 386.377 261.3993 386.5917 261.3993 386.8563 c +261.3993 387.1208 261.3055 387.3352 261.1897 387.3352 c +261.0738 387.3352 260.98 387.1208 260.98 386.8563 c +260.98 386.5917 261.0738 386.377 261.1897 386.377 c +b +261.1897 386.8563 m +B +0.6774 w +260.4913 389.3966 m +262.3118 389.5659 262.7352 388.7615 263.1163 388.4651 c +263.4973 388.1688 263.963 388.0418 264.1747 386.4329 c +264.2594 381.4579 l +264.5558 380.3995 264.8945 380.1031 265.0215 380.0608 c +265.8104 379.7976 266.0376 379.574 267.6042 379.4894 c +269.9751 379.4894 l +269.9751 365.9834 l +S +263.2856 388.3381 m +264.3864 388.4651 265.4449 388.4228 266.461 388.2958 c +267.4771 388.1688 268.832 387.4914 269.3824 387.1103 c +S +262.8622 390.2857 m +264.3441 390.1163 264.5134 389.1426 264.8098 388.7192 c +265.1062 388.2958 265.4872 387.576 265.4872 387.195 c +265.4872 381.7333 l +265.9953 380.3148 266.207 380.6325 266.5034 380.3785 c +266.7997 380.1244 269.4247 379.574 269.9751 379.4894 c +S +264.6828 390.5821 m +265.9106 390.4551 266.1647 389.4813 266.4187 389.2272 c +266.6727 388.9732 266.8844 387.576 266.9267 387.1526 c +266.9267 382.9611 l +266.8844 380.5902 267.6465 380.4632 268.1969 380.2091 c +269.0078 379.8348 269.7604 379.5131 269.9751 379.4894 c +S +267.1808 390.2434 m +267.9852 389.3119 268.1969 388.7192 268.1969 388.5075 c +268.1969 388.2958 268.2392 386.8986 y +268.2392 383.2152 l +267.8159 379.9974 269.297 379.9209 269.9114 379.6374 c +S +267.6042 379.4894 m +269.6364 379.4894 l +S +269.9751 365.9834 m +S +267.9429 379.4894 m +269.9751 379.4894 l +S +270.3138 365.9834 m +S +1 J +267.0538 367.8886 m +266.08 368.3543 265.5719 368.82 264.1747 368.7777 c +S +267.0538 364.7342 m +266.295 365.097 265.2437 365.6558 264.1747 365.6233 c +S +267.0114 366.2584 m +266.2527 366.6212 265.2014 367.18 264.1324 367.1475 c +S +266.168 363.6575 m +265.4097 364.0202 265.1167 364.2163 264.0477 364.1838 c +S +258.205 357.1346 m +262.7352 359.5056 l +S +258.205 356.1185 m +262.7776 357.939 l +S +258.205 354.9753 m +262.8622 356.5842 l +S +0 J 0.3387 w +252.9974 376.568 m +252.8703 373.3503 257.1889 370.7676 258.1627 374.6204 c +S +252.6163 376.2293 m +252.574 372.6305 253.4207 368.2273 258.1203 369.6245 c +S +251.0498 374.197 m +250.7957 370.3019 253.124 366.4108 258.1627 366.9148 c +S +249.6103 366.0257 m +249.8609 366.1354 251.7584 367.4821 256.2998 366.9148 c +S +270.0175 378.1769 m +270.9066 375.3825 274.5477 372.5035 276.7916 376.7374 c +S +277.469 375.7636 m +277.5496 372.1692 275.9486 368.378 270.0598 368.9047 c +S +279.967 367.9309 m +278.1041 369.4551 275.9449 369.4975 273.8703 369.2858 c +S +278.8239 362.808 m +278.1464 366.1527 276.5376 368.2696 273.4045 369.1588 c +S +1.0161 w +292.5839 335.7113 m +298.0879 335.7113 l +295.717 330.2073 294.9549 323.4331 301.8137 317.6751 c +294.2774 317.6751 l +292.0758 323.0944 291.4831 327.4976 292.5839 335.7113 c +s +1 J 1 j +297.6645 322.9251 m +293.6 319.3686 l +S +0 J 0 j +295.6533 317.6751 m +296.4367 318.3735 l +297.2835 317.6327 l +S +305.7089 335.7113 m +311.2129 335.7113 l +308.8419 330.2073 308.0798 323.4331 314.9387 317.6751 c +307.4024 317.6751 l +305.2008 323.0944 304.6081 327.4976 305.7089 335.7113 c +s +1 J 1 j +310.7895 322.9251 m +306.725 319.3686 l +S +0 J 0 j +308.7782 317.6751 m +309.5617 318.3735 l +310.4084 317.6327 l +S +301.0093 317.6751 m +307.8258 317.6751 l +S +300.7976 359.5902 m +300.7552 339.0137 l +300.7552 337.0888 300.0494 336.3518 298.0879 335.7113 c +290.0436 335.7113 l +288.9004 357.2193 l +S +2 J +289.9166 337.7012 m +296.6907 337.7012 l +299.0194 338.294 298.723 339.8181 298.7653 340.1568 c +298.7653 365.687 l +302.7452 365.687 l +290.2976 365.687 l +302.7452 365.687 l +302.7452 365.687 l +302.7452 365.687 l +302.7452 365.687 l +302.7452 365.687 l +S +0 J +311.2129 335.7113 m +S +305.7089 335.7113 m +S +303.9306 362.0459 m +S +1 J 2 j +304.6081 361.8765 m +304.6236 338.947 l +304.561 337.0231 304.8909 336.5632 306.8307 335.8593 c +311.5726 335.8593 l +306.3863 361.9189 l +S +0 J 0 j +299.2311 336.177 m +305.6242 336.177 l +300.6706 336.177 l +S +300.6706 338.1246 m +304.6081 338.1246 l +S +304.6081 338.5056 m +300.7129 343.7133 l +S +310.7895 337.6165 m +307.6988 337.6165 l +306.5556 338.0399 306.8097 338.675 306.725 339.4794 c +306.725 360.0983 l +S +302.7452 374.07 m +302.7452 360.9451 l +298.9347 358.4894 l +S +302.7452 362.3846 m +322.8983 362.3846 l +S +314.0072 362.3846 m +314.0072 374.07 l +302.7452 374.07 l +302.7452 362.3846 l +314.0072 362.3846 l +s +308.3762 368.2273 m +S +316.1241 362.3846 m +316.1241 360.8604 l +311.975 358.5741 l +311.975 362.3846 l +311.975 359.7596 l +307.3177 357.0499 l +S +302.8298 378.6426 m +300.0355 365.687 l +300.0355 382.1144 l +301.221 382.1144 l +300.7976 385.1204 l +300.7976 385.1204 l +S +304.5234 378.2192 m +303.5073 374.07 l +S +298.6807 359.4209 m +296.4791 356.7112 288.2654 357.3886 287.2492 358.1507 c +286.2331 358.9128 281.7452 360.9451 282.4226 367.6346 c +283.0628 373.9562 285.0283 375.5102 291.3137 378.3039 c +S +294.3621 379.4047 m +300.0355 365.687 l +S +300.6706 385.1627 m +302.1948 385.1627 303.0415 386.0095 303.3379 386.6869 c +303.6343 387.3643 303.5073 387.7877 303.3379 388.2111 c +303.1685 388.6345 302.5758 389.3542 301.8984 389.5236 c +301.221 389.693 300.9246 389.7353 300.4165 389.7353 c +289.7896 389.7353 l +286.5295 389.7353 286.2331 383.4692 289.5779 383.4692 c +290.3823 383.4692 290.594 383.4692 v +290.8057 383.4692 291.7371 383.7656 y +S +287.0799 376.1446 m +288.9004 377.6265 289.9166 377.8382 290.848 378.939 c +291.7795 380.0398 291.7371 380.6748 291.7795 381.5216 c +291.8218 382.3684 291.7795 384.697 291.7795 385.0781 c +291.7795 385.4591 291.7795 385.8825 y +S +1 J 0.6774 w +289.9589 377.9228 m +291.1444 378.4732 292.7109 379.1083 293.6 379.4047 c +294.4891 379.7011 294.7855 379.7857 295.3359 379.9974 c +295.8863 380.2091 296.6061 380.8019 296.7331 381.0982 c +296.8601 381.3946 297.1697 383.0468 297.2411 384.6123 c +297.2835 385.5438 297.1565 385.8401 y +S +0 J 1.0161 w +300.0355 379.2777 m +301.094 379.2777 307.0637 378.4309 308.7996 374.0277 c +S +0.6774 w +295.844 385.8825 m +295.844 384.443 295.8016 381.2252 295.5899 380.6748 c +295.3783 380.1244 294.0234 379.7434 293.219 379.3623 c +292.4145 378.9813 290.7633 378.2615 290.4246 378.1345 c +S +294.5315 385.8401 m +294.5315 384.951 294.5315 381.1829 294.4468 380.8019 c +294.3621 380.4208 293.7694 379.6587 293.219 379.3623 c +S +293.1343 385.8401 m +293.1343 384.8664 293.1766 380.7595 293.0496 380.2938 c +292.9226 379.8281 292.5416 379.2777 291.9488 378.9813 c +291.3561 378.6849 290.2976 378.0922 y +S +1 J +299.4428 377.4571 m +298.7653 378.1769 298.723 379.1083 299.1887 380.6325 c +299.4004 381.3733 l +S +0 J +298.1303 380.8442 m +S +296.7754 381.3523 m +296.5214 381.1829 296.3944 380.2938 v +296.2674 379.4047 296.0557 379.1083 296.352 378.3886 c +296.6484 377.6688 296.9024 377.2031 297.4528 377.0337 c +298.0032 376.8644 298.1303 376.7374 298.85 376.8644 c +299.5698 376.9914 299.9932 377.4571 y +S +1 J +298.85 376.8644 m +298.1726 377.4148 l +297.4952 378.1345 297.4528 379.066 297.9186 380.5902 c +297.9186 380.5902 l +S +300.5859 387.1526 m +300.7552 385.8825 l +291.6525 385.8825 l +291.6101 387.1526 l +300.5859 387.1526 l +s +296.0343 386.5386 m +S +1.0161 w +300.8399 384.697 m +300.6282 385.7978 l +S +0.6774 w +291.6101 386.0518 m +291.0174 385.4168 l +290.6363 385.1204 l +290.2976 384.951 l +S +291.6525 387.0256 m +291.3137 387.195 l +290.9327 387.5337 l +290.5093 387.9994 l +S +299.6968 387.1526 m +300.2048 387.1526 l +300.5859 387.1526 l +300.4589 387.9571 l +S +0.3387 w +314.0072 373.816 m +316.9709 373.816 l +314.0072 362.3846 l +316.7169 372.8422 l +319.4266 372.8422 l +314.0072 362.3846 l +318.7068 371.4027 l +321.2048 371.4027 l +314.0072 362.3846 l +319.8923 369.7092 l +322.0092 369.7092 l +314.0072 362.3846 l +322.7713 367.5499 l +319.7653 367.5499 l +314.0072 362.3846 l +319.2149 365.433 l +322.856 365.433 l +314.1766 362.4693 l +S +302.7028 367.2112 m +308.5455 367.2112 l +308.3338 366.3644 l +311.7633 366.3644 l +311.7633 362.4269 l +S +311.7419 365.433 m +307.3811 365.433 l +S +311.7633 364.5015 m +307.4447 364.5015 l +S +311.7419 363.5911 m +307.3811 363.5911 l +S +308.3338 366.3644 m +305.3278 366.3644 l +S +285.7674 375.2132 m +285.8944 373.1386 286.6565 371.7837 290.2976 371.8684 c +S +2 J 1.0161 w +290.2976 365.687 m +290.2976 376.2716 l +297.7492 365.687 l +300.0355 365.687 l +291.3137 378.3039 l +S +0.3387 w +290.5517 365.56 m +288.3924 364.7979 282.592 365.1789 283.3964 371.9531 c +S +290.1706 365.6023 m +288.9851 363.2737 286.1908 360.7334 282.719 364.1205 c +S +290.2553 365.2636 m +290.0436 362.5963 289.3238 359.8443 287.1222 358.6588 c +S +291.2291 374.8321 m +292.1605 373.3079 293.3037 367.9309 292.7956 365.7293 c +S +291.9912 365.56 m +292.6686 363.7394 293.2613 359.6749 291.9912 357.5157 c +S +293.8541 365.6023 m +297.1002 355.1677 291.077 348.8786 289.4508 347.8625 c +S +295.717 365.6023 m +298.8226 356.2851 296.7744 339.7077 295.6746 337.7435 c +S +0.6774 w +299.7815 385.3321 m +299.3533 385.4445 299.1792 385.4733 298.8287 385.3954 c +S +299.5061 384.6333 m +299.6466 384.6333 299.7605 384.8074 299.7605 385.0225 c +299.7605 385.2376 299.6466 385.4117 299.5061 385.4117 c +299.3655 385.4117 299.2517 385.2376 299.2517 385.0225 c +299.2517 384.8074 299.3655 384.6333 299.5061 384.6333 c +f +299.5061 385.0225 m +F +0 J 2 j 0.3387 w +304.4387 367.2112 m +303.6129 367.2112 l +302.7452 366.1951 l +302.9779 366.9781 l +S +1 J 0.6774 w +302.7452 363.3584 m +303.7823 362.4056 l +S +297.9186 380.5902 m +298.2359 381.3733 l +S + +%showpage diff --git a/doc/scsh-paper/scsh-paper.tex b/doc/scsh-paper/scsh-paper.tex new file mode 100644 index 0000000..3d0dd36 --- /dev/null +++ b/doc/scsh-paper/scsh-paper.tex @@ -0,0 +1,2004 @@ +%&latex -*- latex -*- +\documentstyle[code,11pt,lcs-note,boxedminipage,openbib,twoside, + palatino,ct]{article} +\input{headings} + +% Squeeeeeeze those figures onto the page. +\renewcommand{\floatpagefraction}{0.7} +\renewcommand{\topfraction}{.9} +\renewcommand{\bottomfraction}{.9} +\renewcommand{\textfraction}{.1} + +\raggedbottom + +\makeatletter +%% For chapter and section quotes: +\newcommand{\headingquote}[2] +{\begin{flushright}\em\begin{tabular}{@{}l@{}}#1 \\ + {\rm \qquad --- \begin{tabular}[t]{@{}l@{}}#2\end{tabular}}\end{tabular} + \end{flushright}\par\noindent} + +\newcommand{\halfpage}[1]{\parbox[t]{0.5\linewidth}{#1}} + +\def\ie{\mbox{\em i.e.}} % \mbox keeps the last period from +\def\Ie{\mbox{\em I.e.}} % looking like an end-of-sentence. +\def\eg{\mbox{\em e.g.}} +\def\Eg{\mbox{\em E.g.}} +\def\etc{\mbox{\em etc.}} + +\def\Lisp{{\sc Lisp}} +\def\CommonLisp{{\sc Common Lisp}} +\def\Ascii{{\sc Ascii}} +\def\Unix{{Unix}} % No \sc, according to Bart. +\def\Scheme{{Scheme}} % No \sc. +\def\scm{{Scheme 48}} +\def\R4RS{R4RS} + +\newcommand{\synteq}{{\rm ::=}} + +% One-line code examples +%\newcommand{\codex}[1]% One line, centred. Tight spacing. +% {$$\abovedisplayskip=.75ex plus 1ex minus .5ex% +% \belowdisplayskip=\abovedisplayskip% +% \abovedisplayshortskip=0ex plus .5ex% +% \belowdisplayshortskip=\abovedisplayshortskip% +% \hbox{\ttt #1}$$} +%\newcommand{\codex}[1]{\begin{tightinset}\ex{#1}\end{tightinset}\ignorespaces} +\newcommand{\codex}[1]{\begin{leftinset}\ex{#1}\end{leftinset}\ignorespaces} + + +% For multiletter vars in math mode: +\newcommand{\var}[1]{{\it #1}} +\newcommand{\vari}[2]{${\it #1}_{#2}$} + +%% What you frequently want when you say \tt: +\def\ttt{\tt\catcode``=13\@noligs\frenchspacing} + +% Works in math mode; all special chars remain special; cheaper than \cd. +% Will not be correct size in super and subscripts, though. +\newcommand{\ex}[1]{\mbox{\ttt #1}} + +\newenvironment{inset} + {\bgroup\parskip=1ex plus 1ex\begin{list}{}% + {\topsep=0pt\rightmargin\leftmargin}% + \item[]}% + {\end{list}\leavevmode\egroup\global\@ignoretrue} + +\newenvironment{leftinset} + {\bgroup\parskip=1ex plus 1ex\begin{list}{}% + {\topsep=0pt}% + \item[]}% + {\end{list}\leavevmode\egroup\global\@ignoretrue} + +\newenvironment{tightinset} + {\bgroup\parskip=0pt\begin{list}{}% + {\topsep=0pt\rightmargin\leftmargin}% + \item[]}% + {\end{list}\leavevmode\egroup\ignorespaces} + +\newcommand{\remark}[1]{\mbox{$<<$}{\bf #1}\mbox{$>>$}} +\newcommand{\note}[1]{\{Note #1\}} + +% For use in code. The \llap magicness makes the lambda exactly as wide as +% the other chars in \tt; the \hskip shifts it right a bit so it doesn't +% crowd the left paren -- which is necessary if \tt is cmtt. +% Note that (\l{x y} (+ x y)) uses the same number of columns in TeX form +% as it produces when typeset. This makes it easy to line up the columns +% in your input. \l is bound to some useless command in LaTeX, so we have to +% define it w/renewcommand. +\let\oldl\l %Save the old \l on \oldl +\renewcommand{\l}[1]{\ \llap{$\lambda$\hskip-.05em}\ (#1)} + +% This horrible hack is for typesetting procedure doc. +\newcommand{\proto}[3] {\makebox[\protowidth][l]{{\ttt(#1 {\it #2}\/)} \hfill{\sl #3}}} +\newcommand{\protoitem}[3]{\item[\proto{#1}{#2}{#3}]} +\newlength{\protowidth} \protowidth \linewidth +\newenvironment{protos}{\protowidth \linewidth \begin{description}} +{\end{description}} +\newenvironment{column}{\protowidth \linewidth\begin{tabular}{@{}l@{}}}{\end{tabular}} + +% For subcaptions +\newcommand{\subcaption}[1] +{\unskip\vspace{-2mm}\begin{center}\unskip\em#1\end{center}} + +\makeatother +%%% End preamble + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{document} + +\notenum{3} +\project{Personal Information Architecture} +\title{A {\Scheme} Shell} +\author{Olin Shivers \\ {\ttt shivers@lcs.mit.edu}} +\date{4/94} + +\maketitle +\pagestyle{empty} +\thispagestyle{empty} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\headingquote{ + Although robust enough for general use, adventures \\ + into the esoteric periphery of the C shell may reveal \\ + unexpected quirks.} + {SunOS 4.1 csh(1) man page, 10/2/89} +\vspace{-2em} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section*{Prologue} +%\addcontentsline{toc}{section}{Prologue} +Shell programming terrifies me. There is something about writing a simple +shell script that is just much, much more unpleasant than writing a simple C +program, or a simple {\CommonLisp} program, or a simple Mips assembler program. +Is it trying to remember what the rules are for all the different quotes? Is +it having to look up the multi-phased interaction between filename expansion, +shell variables, quotation, backslashes and alias expansion? Maybe it's having +to subsequently look up which of the twenty or thirty flags I need for my +grep, sed, and awk invocations. Maybe it just gets on my nerves that I have to +run two complete programs simply to count the number of files in a directory +(\ex{ls | wc -l}), which seems like several orders of magnitude more cycles +than was really needed. + +Whatever it is, it's an object lesson in angst. Furthermore, during late-night +conversations with office mates and graduate students, I have formed the +impression that I am not alone. In late February\footnote{February 1992, that +is.}, I got embroiled in a multi-way email flamefest about just exactly what it +was about Unix that drove me nuts. In the midst of the debate, I did a rash +thing. I claimed that it would be easy and so much nicer to do shell +programming from {\Scheme}. Some functions to interface to the OS and a few +straightforward macros would suffice to remove the spectre of \cd{#!/bin/csh} +from my life forever. The serious Unix-philes in the debate expressed their +doubts. So I decided to go do it. + +Probably only take a week or two. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Keywords page for the MIT TR +{ +\clearpage +\vspace*{\fill} + +\newcommand{\keywords}[1]% + {\newlength{\kwlength}\settowidth{\kwlength}{\bf Keywords: }% + \setlength{\kwlength}{-\kwlength}\addtolength{\kwlength}{\linewidth}% + \noindent{\bf Keywords: }\parbox[t]{\kwlength}{\raggedright{}#1.}} + + +\keywords{operating systems, programming languages, Scheme, + Unix, shells, functional languages, systems programming} +} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\cleardoublepage +\tableofcontents +\cleardoublepage +\setcounter{page}{1} +\pagestyle{plain} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Introduction} +The central artifact of this paper is a new {\Unix} shell called scsh. +However, I have a larger purpose beyond simply giving a description of +the new system. +It has become fashionable recently to claim that ``language doesn't matter.'' +After twenty years of research, operating systems and systems +applications are still mainly written in C and its complex successor, C++. +Perhaps advanced programming languages offer too little for the price they +demand in efficiency and formal rigor. + +I disagree strongly with this position, and +I would like to use scsh, in comparison to other {\Unix} systems programming +languages, to make the point that language {\em does\/} matter. +After presenting scsh in the initial sections of the paper, +I will describe its design principles, +and make a series of points concerning the effect language design has +upon systems programming. +I will use scsh, C, and the traditional shells as linguistic exemplars, +and show how their various notational and semantic tradeoffs affect +the programmer's task. +In particular, I wish to show that a functional language such as Scheme is an +excellent tool for systems programming. +Many of the linguistic points I will make are well-known to the members of +the systems programming community that employ modern programming +languages, such as DEC SRC's Modula-3 \cite{Nelson}. +In this respect, I will merely be serving to recast these ideas in +a different perspective, and perhaps diffuse them more widely. + +The rest of this paper is divided into four parts: +\begin{itemize} +\item In part one, I will motivate the design of scsh + (section~\ref{sec:shells}), and then give a brief + tutorial on the system + (\ref{sec:proc-forms}, \ref{sec:syscall-lib}). +\item In part two, I discuss the design issues behind scsh, + and cover some of the relevant implementation details + (\ref{sec:zen}--\ref{sec:size}). +\item Part three concerns systems programming with advanced languages. + I will illustrate my points by comparing scsh to other {\Unix} + programming systems (\ref{sec:scm-sysprog}, \ref{sec:opl}). +\item Finally, we conclude, with some indication of future directions + and a few final thoughts. +\end{itemize} + + +%\part{Shell Programming} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Unix shells} +\label{sec:shells} +Unix shells, such as sh or csh, provide two things at once: an interactive +command language and a programming language. Let us focus on the latter +function: the writing of ``shell scripts''---interpreted programs +that perform small tasks or assemble a collection of Unix tools into +a single application. + +Unix shells are real programming languages. They have variables, if/then +conditionals, and loops. But they are terrible programming languages. The +data structures typically consist only of integers and vectors of strings. +The facilities for procedural abstraction are non-existent to minimal. The +lexical and syntactic structures are multi-phased, unprincipled, and baroque. + +If most shell languages are so awful, why does anyone use them? +There are a few important reasons. +\begin{itemize} +\item + A programming language is a notation for expressing computation. Shells + have a notation that is specifically tuned for running Unix programs and + hooking them together. For example, suppose you want to run programs + \ex{foo} and \ex{bar} with \ex{foo} feeding output into \ex{bar}. If you do + this in C, you must write: two calls to \ex{fork()}, two calls to + \ex{exec()}, one call to \ex{pipe()}, several calls to \ex{close()}, + two calls to \ex{dup()}, and a lot of error checks (fig.~\ref{fig:C-pipe}). + This is a lot of picky bookkeeping: tedious to write, tedious to read, + and easy to get wrong on the first try. In sh, on the other hand, + you simply write ``\ex{foo | bar}'' which is much easier to write and + much clearer to read. + One can look at this expression and instantly understand it; + one can write it and instantly be sure that it is correct. + +\begin{figure} +\begin{boxedminipage}{\linewidth}\vskip 1.5ex +\footnotesize +\begin{verbatim} +int fork_foobar(void) /* foo | bar in C */ +{ + int pid1 = fork(); + int pid2, fds[2]; + + if( pid1 == -1 ) { + perror("foo|bar"); + return -1; + } + + if( !pid1 ) { + int status; + if( -1 == waitpid(pid1, &status, 0) ) { + perror("foo|bar"); + return -1; + } + return status; + } + + if( -1 == pipe(fds) ) { + perror("foo|bar"); + exit(-1); + } + + pid2 = fork(); + if( pid2 == -1 ) { + perror("foo|bar"); + exit(-1); + } + + if( !pid2 ) { + close(fds[1]); + dup2(fds[0], 1); + execlp("foo", "foo", NULL); + perror("foo|bar"); + exit(-1); + } + + close(fds[0]); + dup2(fds[1], 0); + execlp("bar", "bar", NULL); + perror("foo|bar"); + exit(-1); + }\end{verbatim} +\caption{Why we program with shells.} +\label{fig:C-pipe} +\end{boxedminipage} +\end{figure} + +\item + They are interpreted. Debugging is easy and interactive; programs are small. + On my workstation, the ``hello, world'' program is 16kb as a compiled C + program, and 29 bytes as an interpreted sh script. + + In fact, \ex{/bin/sh} is just about the only language interpreter + that a programmer can absolutely rely upon having available + on the system, so this is just about the only reliable way to + get interpreted-code density and know that one's program + will run on any Unix system. + +\item + Because the shell is the programmer's command language, the programmer + is usually very familiar with its commonly-used command-language + subset (this familiarity tails off rapidly, however, as the demands + of shell programming move the programmer out into the dustier recesses + of the language's definition.) +\end{itemize} + +There is a tension between the shell's dual role as interactive command +language and shell-script programming language. A command language should be +terse and convenient to type. It doesn't have to be comprehensible. Users +don't have to maintain or understand a command they typed into a shell a month +ago. A command language can be ``write-only,'' because commands are thrown +away after they are used. However, it is important that most commands fit on +one line, because most interaction is through tty drivers that don't let the +user back up and edit a line after its terminating newline has been entered. +This seems like a trivial point, but imagine how irritating it would be if +typical shell commands required several lines of input. Terse notation is +important for interactive tasks. + +Shell syntax is also carefully designed to allow it to be parsed +on-line---that is, to allow parsing and interpretation to be interleaved. +This usually penalizes the syntax in other ways (for example, consider +rc's clumsy if/then/else syntax \cite{rc}). + +Programming languages, on the other hand, can be a little more verbose, in +return for generality and readability. The programmer enters programs into a +text editor, so the language can spread out a little more. + +The constraints of the shell's role as command language are one of the +things that make it unpleasant as a programming language. + +The really compelling advantage of shell languages over other programming +languages is the first one mentioned above. Shells provide a powerful +notation for connecting processes and files together. In this respect, +shell languages are extremely well-adapted to the general paradigm of +the Unix operating system. +In Unix, the fundamental computational agents are programs, running +as processes in individual address spaces. +These agents cooperate and communicate among themselves to solve a problem +by communicating over directed byte streams called pipes. +Viewed at this level, Unix is a data-flow architecture. +From this perspective, the shell serves a critical role +as the language designed to assemble the individual computational +agents to solve a particular task. + +As a programming language, this interprocess ``glue'' aspect of the +shell is its key desireable feature. +This leads us to a fairly obvious idea: instead of adding weak +programming features to a Unix process-control language, +why not add process invocation features to a strong programming language? + +What programming language would make a good base? +We would want a language that was powerful and high-level. +It should allow for implementations based on interactive interpreters, for +ease of debugging and to keep programs small. +Since we want to add new notation to the language, it would help if the +language was syntactically extensible. +High-level features such as automatic storage allocation would help keep +programs small and simple. +{\Scheme} is an obvious choice. +It has all of the desired features, and its weak points, such as it lack of a +module system or its poor performance relative to compiled C on certain +classes of program, do not apply to the writing of shell scripts. + +I have designed and implemented a {\Unix} shell called scsh that is +embedded inside {\Scheme}. +I had the following design goals and non-goals: +\begin{itemize} +\item + The general systems architecture of {\Unix} is cooperating computational + agents that are realised as processes running in separate, protected address + spaces, communicating via byte streams. + The point of a shell language is to act as the glue to connect up these + computational agents. + That is the goal of scsh. + I resisted the temptation to delve into other programming models. + Perhaps cooperating lightweight threads communicating through shared memory + is a better way to live, but it is not {\Unix}. + The goal here was not to come up with a better systems architecture, but + simply to provide a better way to drive {\Unix}. + \note{Agenda} + +\item + I wanted a programming language, not a command language, and I was + unwilling to compromise the quality of the programming language to + make it a better command language. I was not trying to replace use of + the shell as an interactive command language. I was trying to provide + a better alternative for writing shell scripts. So I did not focus + on issues that might be important for a command language, such as job + control, command history, or command-line editing. There are no write-only + notational conveniences. I made no effort to hide the + base {\Scheme} syntax, even though an interactive user might find all + the necessary parentheses irritating. + (However, see section \ref{sec:future-work}.) + +\item + I wanted the result to fit naturally within {\Scheme}. For example, + this ruled out complex non-standard control-flow paradigms, + such as awk's or sed's. +\end{itemize} + +The result design, scsh, has two dependent components, embedded +within a very portable {\Scheme} system: +\begin{itemize} +\item A high-level process-control notation. +\item A complete library of {\Unix} system calls. +\end{itemize} +The process-control notation allows the user to control {\Unix} programs +with a compact notation. +The syscall library gives the programmer full low-level access to the kernel +for tasks that cannot be handled by the high-level notation. +In this way, scsh's functionality spans a spectrum of detail that is +not available to either C or sh. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Process notation} +\label{sec:proc-forms} +Scsh has a notation for controlling {\Unix} processes that takes the +form of s-expressions; this notation can then be embedded inside of +standard {\Scheme} code. +The basic elements of this notation are {\em process forms}, +{\em extended process forms}, and {\em redirections}. + +\subsection{Extended process forms and i/o redirections} +An {\em extended process form\/} is a specification of a {\Unix} process to +run, in a particular I/O environment: + \codex{\var{epf} {\synteq} (\var{pf} $\var{redir}_1$ {\ldots} $\var{redir}_n$)} +where \var{pf} is a process form and the $\var{redir}_i$ are redirection specs. +A {\em redirection spec} is one of: +\begin{inset} +\begin{tabular}{@{}l@{\qquad{\tt; }}l@{}} + \ex{(< \var{[fdes]} \var{file-name})} & \ex{Open file for read.} +\\\ex{(> \var{[fdes]} \var{file-name})} & \ex{Open file create/truncate.} +\\\ex{(<< \var{[fdes]} \var{object})} & \ex{Use \var{object}'s printed rep.} +\\\ex{(>> \var{[fdes]} \var{file-name})} & \ex{Open file for append.} +\\\ex{(= \var{fdes} \var{fdes/port})} & \ex{Dup2} +\\\ex{(- \var{fdes/port})} & \ex{Close \var{fdes/port}.} +\\\ex{stdports} & \ex{0,1,2 dup'd from standard ports.} +\end{tabular} +\end{inset} +The \var{fdes} file descriptors have these defaults: +\begin{center} +{\ttt +\begin{tabular}{|cccc|}\hline < & << & > & >> \\ + 0 & 0 & 1 & 1 \\ \hline +\end{tabular} +} +\end{center} + +The subforms of a redirection are implicitly backquoted, +and symbols stand for their print-names. +So \ex{(> ,x)} means +``output to the file named by {\Scheme} variable \ex{x},'' +and \ex{(< /usr/shivers/.login)} means ``read from \ex{/usr/shivers/.login}.'' +This implicit backquoting is an important feature of the process notation, +as we'll see later (sections~\ref{sec:zen} and \ref{sec:sexp}). + +Here are two more examples of i/o redirection: +% +\begin{center} +\begin{codebox} +(< ,(vector-ref fv i)) +(>> 2 /tmp/buf)\end{codebox} +\end{center} +% +These two redirections cause the file \ex{fv[i]} to be opened on stdin, and +\ex{/tmp/buf} to be opened for append writes on stderr. + +The redirection \ex{(<< \var{object})} causes input to come from the +printed representation of \var{object}. +For example, + \codex{(<< "The quick brown fox jumped over the lazy dog.")} +causes reads from stdin to produce the characters of the above string. +The object is converted to its printed representation using the \ex{display} +procedure, so + \codex{(<< (A five element list))} +is the same as + \codex{(<< "(A five element list)")} +is the same as + \codex{(<< ,(reverse '(list element five A))){\rm.}} +(Here we use the implicit backquoting feature to compute the list to +be printed.) + +The redirection \ex{(= \var{fdes} \var{fdes/port})} causes \var{fdes/port} +to be dup'd into file descriptor \var{fdes}. +For example, the redirection + \codex{(= 2 1)} +causes stderr to be the same as stdout. +\var{fdes/port} can also be a port, for example: + \codex{(= 2 ,(current-output-port))} +causes stderr to be dup'd from the current output port. +In this case, it is an error if the port is not a file port +(\eg, a string port). \note{No port sync} + +More complex redirections can be accomplished using the \ex{begin} +process form, discussed below, which gives the programmer full control +of i/o redirection from {\Scheme}. + +\subsection{Process forms} +A {\em process form\/} specifies a computation to perform as an independent +{\Unix} process. It can be one of the following: +% +\begin{leftinset} +\begin{codebox} +(begin . \var{scheme-code}) +(| \vari{pf}{\!1} {\ldots} \vari{pf}{\!n}) +(|+ \var{connect-list} \vari{pf}{\!1} {\ldots} \vari{pf}{\!n}) +(epf . \var{epf}) +(\var{prog} \vari{arg}{1} {\ldots} \vari{arg}{n}) +\end{codebox} +\qquad +\begin{codebox} +; Run \var{scheme-code} in a fork. +; Simple pipeline +; Complex pipeline +; An extended process form. +; Default: exec the program. +\end{codebox} +\end{leftinset} +% +The default case \ex{(\var{prog} \vari{arg}1 {\ldots} \vari{arg}n)} +is also implicitly backquoted. +That is, it is equivalent to: +% +\codex{(begin (apply exec-path `(\var{prog} \vari{arg}1 {\ldots} \vari{arg}n)))} +% +\ex{Exec-path} is the version of the \ex{exec()} system call that +uses scsh's path list to search for an executable. +The program and the arguments must be either strings, symbols, or integers. +Symbols and integers are coerced to strings. +A symbol's print-name is used. +Integers are converted to strings in base 10. +Using symbols instead of strings is convenient, since it suppresses the +clutter of the surrounding \ex{"{\ldots}"} quotation marks. +To aid this purpose, scsh reads symbols in a case-sensitive manner, +so that you can say +\codex{(more Readme)} +and get the right file. +(See section \ref{sec:lex} for further details on lexical issues.) + +A \var{connect-list} is a specification of how two processes are to be wired +together by pipes. +It has the form \ex{((\vari{from}1 \vari{from}2 {\ldots} \var{to}) \ldots)} +and is implicitly backquoted. +For example, +% +\codex{(|+ ((1 2 0) (3 3)) \vari{pf}{\!1} \vari{pf}{\!2})} +% +runs \vari{pf}{\!1} and \vari{pf}{\!2}. +The first clause \ex{(1 2 0)} causes \vari{pf}{\!1}'s +stdout (1) and stderr (2) to be connected via pipe +to \vari{pf}{\!2}'s stdin (0). +The second clause \ex{(3 3)} causes \vari{pf}{\!1}'s file descriptor 3 to be +connected to \vari{pf}{\!2}'s file descriptor 3. +%---this is unusual, and not expected to occur very often. + +%[Note that {\R4RS} does not specify whether or not | and |+ are readable +%symbols. Scsh does.] + +\subsection{Using extended process forms in \Scheme} +Process forms and extended process forms are {\em not\/} {\Scheme}. +They are a different notation for expressing computation that, like {\Scheme}, +is based upon s-expressions. +Extended process forms are used in {\Scheme} programs by embedding them inside +special Scheme forms. +\pagebreak +There are three basic {\Scheme} forms that use extended process forms: +\ex{exec-epf}, \cd{&}, and \ex{run}: +\begin{inset} +\begin{codebox}[t] +(exec-epf . \var{epf}) +(& . \var{epf}) +(run . \var{epf}) +\end{codebox} +\quad +\begin{codebox}[t] +; Nuke the current process. +; Run \var{epf} in background; return pid. +; Run \var{epf}; wait for termination. +; Returns exit status.\end{codebox} +\end{inset} +These special forms are macros that expand into the equivalent +series of system calls. +The definition of the \ex{exec-epf} macro is non-trivial, +as it produces the code to handle i/o redirections and set up pipelines. +However, the definitions of the \cd{&} and \ex{run} macros are very simple: +\begin{leftinset} +\begin{tabular}{@{}l@{\quad$\Rightarrow$\quad}l@{}} +\cd{(& . \var{epf})} & \ex{(fork (\l{} (exec-epf . \var{epf})))} \\ +\ex{(run . \var{epf})} & \cd{(wait (& . \var{epf}))} +\end{tabular} +\end{leftinset} + +Figures \ref{fig:ex1} and \ref{fig:ex2} show a series of examples +employing a mix of the process notation and the syscall library. +Note that regular Scheme is used to provide the control structure, +variables, and other linguistic machinery needed by the script fragments. +% +\begin{figure}[bp]\footnotesize +\begin{boxedminipage}{\linewidth}\vskip 1.5ex +\begin{center}\begin{codebox} +;; If the resource file exists, load it into X. +(if (file-exists? f)) + (run (xrdb -merge ,f))) + +;; Decrypt my mailbox; key is "xyzzy". +(run (crypt xyzzy) (< mbox.crypt) (> mbox)) + +;; Dump the output from ls, fortune, and from into log.txt. +(run (begin (run (ls)) + (run (fortune)) + (run (from))) + (> log.txt)) + +;; Compile FILE with FLAGS. +(run (cc ,file ,@flags)) + +;; Delete every file in DIR containing the string "/bin/perl": +(with-cwd dir + (for-each (\l{file} + (if (zero? (run (grep -s /bin/perl ,file))) + (delete-file file))) + (directory-files)))\end{codebox} +\end{center} +\caption{Example shell script fragments (a)} +\label{fig:ex1} +\end{boxedminipage} +\end{figure} + +\begin{figure}\footnotesize +\begin{boxedminipage}{\linewidth}\vskip 1.5ex +\begin{center}\begin{codebox} +;; M4 preprocess each file in the current directory, then pipe +;; the input into cc. Errlog is foo.err, binary is foo.exe. +;; Run compiles in parallel. +(for-each (\l{file} + (let ((outfile (replace-extension file ".exe")) + (errfile (replace-extension file ".err"))) + (& (| (m4) (cc -o ,outfile)) + (< ,file) + (> 2 ,errfile)))) + (directory-files)) + +;; Same as above, but parallelise even the computation +;; of the filenames. +(for-each (\l{file} + (& (begin (let ((outfile (replace-extension file ".exe")) + (errfile (replace-extension file ".err"))) + (exec-epf (| (m4) (cc -o ,outfile)) + (< ,file) + (> 2 ,errfile)))))) + (directory-files)) + +;; DES encrypt string PLAINTEXT with password KEY. My DES program +;; reads the input from fdes 0, and the key from fdes 3. We want to +;; collect the ciphertext into a string and return that, with error +;; messages going to our stderr. Notice we are redirecting Scheme data +;; structures (the strings PLAINTEXT and KEY) from our program into +;; the DES process, instead of redirecting from files. RUN/STRING is +;; like the RUN form, but it collects the output into a string and +;; returns it (see following section). + +(run/string (/usr/shivers/bin/des -e -3) + (<< ,plaintext) (<< 3 ,key)) + +;; Delete the files matching regular expression PAT. +;; Note we aren't actually using any of the process machinery here -- +;; just pure Scheme. +(define (dsw pat) + (for-each (\l{file} + (if (y-or-n? (string-append "Delete " file)) + (delete-file file))) + (file-match #f pat)))\end{codebox} +\end{center} +\caption{Example shell script fragments (b)} +\label{fig:ex2} +\end{boxedminipage} +\end{figure} + + +\subsection{Procedures and special forms} +It is a general design principle in scsh that all functionality +made available through special syntax is also available in a +straightforward procedural form. +So there are procedural equivalents for all of the process notation. +In this way, the programmer is not restricted by the particular details of +the syntax. +Here are some of the syntax/procedure equivalents: +\begin{inset} +\begin{tabular}{@{}|ll|@{}} +\hline +Notation & Procedure \\ \hline \hline +\ex{|} & \ex{fork/pipe} \\ +\ex{|+} & \ex{fork/pipe+} \\ +\ex{exec-epf} & \ex{exec-path} \\ +redirection & \ex{open}, \ex{dup} \\ +\cd{&} & \ex{fork} \\ +\ex{run} & $\ex{wait} + \ex{fork}$ \\ +\hline +\end{tabular} +\end{inset} +% +Having a solid procedural foundation also allows for general notational +experimentation using Scheme's macros. +For example, the programmer can build his own pipeline notation on top of the +\ex{fork} and \ex{fork/pipe} procedures. +%Because the shell notation has {\Scheme} escapes +%(\eg, the \ex{begin} process form), +%the programmer can move back and forth easily, using the simple notation +%where possible, and escaping to general {\Scheme} only where necessary. + +\begin{protos} +\protoitem{fork}{[thunk]}{procedure} + \ex{Fork} spawns a {\Unix} subprocess. + Its exact behavior depends on whether it is called with the optional + \var{thunk} argument. + + With the \var{thunk} argument, \ex{fork} spawns off a subprocess that + calls \var{thunk}, exiting when \var{thunk} returns. + \ex{Fork} returns the subprocess' pid to the parent process. + + Without the \var{thunk} argument, \ex{fork} behaves like the C \ex{fork()} + routine. + It returns in both the parent and child process. + In the parent, \ex{fork} returns the child's pid; + in the child, \ex{fork} returns \cd{#f}. + +\protoitem{fork/pipe}{[thunk]}{procedure} + Like \ex{fork}, but the parent and child communicate via a pipe + connecting the parent's stdin to the child's stdout. This function + side-effects the parent by changing his stdin. + + In effect, \ex{fork/pipe} splices a process into the data stream + immediately upstream of the current process. + This is the basic function for creating pipelines. + Long pipelines are built by performing a sequence of \ex{fork/pipe} calls. +\pagebreak + For example, to create a background two-process pipe \ex{a | b}, we write: +% +\begin{tightcode} +(fork (\l{} (fork/pipe a) (b)))\end{tightcode} +% + which returns the pid of \ex{b}'s process. + + To create a background three-process pipe \ex{a | b | c}, we write: +% +\begin{code} +(fork (\l{} (fork/pipe a) + (fork/pipe b) + (c)))\end{code} +% + which returns the pid of \ex{c}'s process. + + +\protoitem{fork/pipe+}{conns [thunk]}{procedure} + Like \ex{fork/pipe}, but the pipe connections between the child and parent + are specified by the connection list \var{conns}. + See the + \codex{(|+ \var{conns} \vari{pf}{\!1} \ldots{} \vari{pf}{\!n})} + process form for a description of connection lists. +\end{protos} + +\subsection{Interfacing process output to {\Scheme}} +\label{sec:io-interface} +There is a family of procedures and special forms that can be used +to capture the output of processes as {\Scheme} data. +Here are the special forms for the simple variants: +\\[2ex]%\begin{center} +\begin{codebox} +(run/port . \var{epf}) ; Return port open on process's stdout. +(run/file . \var{epf}) ; Process > temp file; return file name. +(run/string . \var{epf}) ; Collect stdout into a string and return. +(run/strings . \var{epf}) ; Stdout->list of newline-delimited strings. +(run/sexp . \var{epf}) ; Read one sexp from stdout with READ. +(run/sexps . \var{epf}) ; Read list of sexps from stdout with READ.\end{codebox} +\\[2ex]%\end{center} +% +\ex{Run/port} returns immediately after forking off the process; +other forms wait for either the process to die (\ex{run/file}), +or eof on the communicating pipe +(\ex{run/string}, \ex{run/strings}, \ex{run/sexps}). +These special forms just expand into calls to the following analogous +procedures: +% +\begin{center} +\begin{column} +\proto{run/port*} {thunk}{procedure} \\ +\proto{run/file*} {thunk}{procedure} \\ +\proto{run/string*} {thunk}{procedure} \\ +\proto{run/strings*} {thunk}{procedure} \\ +\proto{run/sexp*} {thunk}{procedure} \\ +\proto{run/sexps*} {thunk}{procedure} +\end{column} +\end{center} +% +For example, \ex{(run/port . \var{epf})} expands into +\codex{(run/port* (\l{} (exec-epf . \var{epf}))).} + +These procedures can be used to manipulate the output of {\Unix} +programs with {\Scheme} code. For example, the output of the \ex{xhost(1)} +program can be manipulated with the following code: +\begin{code} +;;; Before asking host REMOTE to do X stuff, +;;; make sure it has permission. +(while (not (member remote (run/strings (xhost)))) + (display "Pausing for xhost...") + (read-char))\end{code} + +The following procedures are also of utility for generally parsing +input streams in scsh: +%(port->string \var{port}) +%(port->sexp-list \var{port}) +%(port->string-list \var{port}) +%(port->list \var{reader} \var{port}) +\begin{center} +\begin{column} +\proto{port->string}{port}{procedure} \\ +\proto{port->sexp-list}{port}{procedure} \\ +\proto{port->string-list}{port}{procedure} \\ +\proto{port->list}{reader port}{procedure} +\end{column} +\end{center} +\ex{Port->string} reads the port until eof, +then returns the accumulated string. +\ex{Port->sexp-list} repeatedly reads data from the port until eof, +then returns the accumulated list of items. +\ex{Port->string-list} repeatedly reads newline-terminated strings from the +port until eof, then returns the accumulated list of strings. +The delimiting newlines are not part of the returned strings. +\ex{Port->list} generalises these two procedures. +It uses \var{reader} to repeatedly read objects from a port. +It accumulates these objects into a list, which is returned upon eof. +The \ex{port->string-list} and \ex{port->sexp-list} procedures +are trivial to define, being merely \ex{port->list} curried with +the appropriate parsers: +\begin{code}\cddollar +(port->string-list \var{port}) $\equiv$ (port->list read-line \var{port}) +(port->sexp-list \var{port}) $\equiv$ (port->list read \var{port})\end{code} +% +The following compositions also hold: +\begin{code}\cddollar +run/string* $\equiv$ port->string $\circ$ run/port* +run/strings* $\equiv$ port->string-list $\circ$ run/port* +run/sexp* $\equiv$ read $\circ$ run/port* +run/sexps* $\equiv$ port->sexp-list $\circ$ run/port*\end{code} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{System calls} +\label{sec:syscall-lib} +We've just seen scsh's high-level process-form notation, +for running programs, creating pipelines, and performing I/O redirection. +This notation is at roughly the same level as traditional {\Unix} shells. +The process-form notation is convenient, but does not provide detailed, +low-level access to the operating system. +This is provided by the second component of scsh: its system-call library. + +Scsh's system-call library is a nearly-complete set of {\sc Posix} bindings, +with some extras, such as symbolic links. +As of this writing, network and terminal i/o controls have still not yet +been implemented; work on them is underway. +Scsh also provides a convenient set of systems programming utility procedures, +such as routines to perform pattern matching on file-names and general strings, +manipulate {\Unix} environment variables, and parse file pathnames. +Although some of the procedures have been described in passing, +a detailed description of the system-call library is beyond the scope of +this note. +The reference manual \cite{ref-man} contains the full details. + +%\part{Design Notes} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{The Tao of {\Scheme} and {\Unix}} +\label{sec:zen} +Most attempts at embedding shells in functional programming languages +\cite{fsh,ellis} +try to hide the difference between running a program and calling a procedure. +That is, if the user tries +\codex{(lpr "notes.txt")} +the shell will first treat \ex{lpr} as a procedure to be called. +If \ex{lpr} isn't found in the variable environment, the shell will then +do a path search of the file system for a program. +This sort of transparency is in analogy to the function-binding mechanisms +of traditional shells, such as ksh. + +This is a fundamental error that has hindered these previous designs. +Scsh, in contrast, is explicit about the distinction between +procedures and programs. +In scsh, the programmer must know which are which---the mechanisms +for invocation are different for the two cases + (procedure call {\em versus\/} the \ex{(run . \var{epf})} special form), +and the namespaces are different + (the program's lexical environment {\em versus\/} + \ex{\$PATH} search in the file system). + +Linguistically separating these two mechanisms was an important design +decision in the language. +It was done because the two computational models are fundamentally different; +any attempt to gloss over the distinctions would have made the semantics +ugly and inconsistent. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{figure} +\begin{boxedminipage}{\linewidth}\vskip 1.5ex +\begin{center} +\begin{tabular}{ll} +\bf Unix: & +\begin{tabular}[t]{l} +Computational agents are processes, \\ communicate via byte streams. +\end{tabular} \\ +\\ +\bf Scheme: & +\begin{tabular}[t]{l} +Computational agents are procedures, \\ communicate via procedure call/return. +\end{tabular} +\end{tabular} +\end{center} +\caption{The Tao of {\Scheme} and {\Unix}} +\label{fig:tao} +\end{boxedminipage} +\end{figure} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +There are two computational worlds here (figure~\ref{fig:tao}), +where the basic computational agents are procedures or processes. +These agents are composed differently. +In the world of applicative-order procedures, agents execute serially, +and are composed with function composition: \ex{(g (f x))}. +In the world of processes, agents execute concurrently +and are composed with pipes, in a data-flow network: \ex{f | g}. +A language with both of these computational structures, such as scsh, +must provide a way to interface them. \note{Normal order} +In scsh, we have ``adapters'' for crossing between these paradigms: +%(figure~\ref{fig:cross-connect}). +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%\begin{figure}[bhp] +%\begin{center} +\begin{inset} +\def\foo{\rule[-1.5ex]{0in}{4ex}} +\begin{tabular}{l|c|c|} +\multicolumn{1}{l}{} & \multicolumn{1}{c}{Scheme} + & \multicolumn{1}{c}{Unix} \\ \cline{2-3} +\foo Scheme & \ex{(g (f x))} & \ex{(<< ,x)} \\ \cline{2-3} +\foo Unix & \ex{run/string},\ldots & \ex{f | g} \\ \cline{2-3} +\end{tabular} +\end{inset} +%\end{center} +%\caption{Scheme/Unix cross-connectors} +%\label{fig:cross-connect} +%\end{figure} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +The \ex{run/string} form and its cousins (section~\ref{sec:io-interface}) +map process output to procedure input; +the \ex{<<} i/o redirection maps procedure output to process input. +For example: +\begin{code} +(run/string (nroff -ms) + (<< ,(texinfo->nroff doc-string)))\end{code} +By separating the two worlds, and then providing ways for them to +cross-connect, scsh can cleanly accommodate the two paradigms within +one notational framework. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{I/O} +\label{sec:io} +Perhaps the most difficult part of the design of scsh was the integration +of {\Scheme} ports and {\Unix} file descriptors. +Dealing with {\Unix} file descriptors in a {\Scheme} environment is difficult. +In {\Unix}, open files are part of the process state, and are referenced +by small integers called {\em file descriptors}. +Open file descriptors are the fundamental way i/o redirections are passed to +subprocesses, since file descriptors are preserved across \ex{fork()} +and \ex{exec()} calls. + +{\Scheme}, on the other hand, uses ports for specifying i/o sources. +Ports are anonymous, garbage-collected Scheme objects, not integers. +When a port is collected, it is also closed. Because file +descriptors are just integers, it's impossible to garbage collect them---in +order to close file descriptor 3, you must prove that the process will never +again pass a 3 as a file descriptor to a system call doing I/O, and that it +will never \ex{exec()} a program that will refer to file descriptor 3. + +This is difficult at best. + +If a {\Scheme} program only used {\Scheme} ports, and never directly used +file descriptors, this would not be a problem. +But {\Scheme} code must descend to the file-descriptor level in at least two +circumstances: +\begin{itemize} + \item when interfacing to foreign code; + \item when interfacing to a subprocess. +\end{itemize} +This causes problems. Suppose we have a {\Scheme} port constructed +on top of file descriptor 2. We intend to fork off a C program that +will inherit this file descriptor. If we drop references to the port, +the garbage collector may prematurely close file 2 before we exec +the C program. + +Another difficulty arising between the anonymity of ports +and the explicit naming of file descriptors arises when the +user explicitly manipulates file descriptors, as is required by +{\Unix}. +For example, when a file port is opened in {\Scheme}, the underlying run-time +{\Scheme} kernel must open a file and allocate an integer file descriptor. +When the user subsequently explicitly manipulates particular file descriptors, +perhaps preparatory to executing some {\Unix} subprocess, the port's +underlying file descriptor could be silently redirected to some new file. + +Scsh's {\Unix} i/o interface is intended to fix this and +other problems arising from the mismatch between ports and file descriptors. +The fundamental principle is that in scsh, most ports are attached to files, +not to particular file descriptors. +When the user does an i/o redirection (\eg, with \ex{dup2()}) +that must allocate a particular file descriptor \var{fd}, there is a chance +that \var{fd} has already been inadvertently allocated to a port by a prior +operation (\eg, an \ex{open-input-file} call). +If so, \var{fd}'s original port will be shifted to some new file descriptor +with a \ex{dup(\var{fd})} operation, freeing up \var{fd} for use. +The port machinery is allowed to do this as it does not in general +reveal which file descriptors are allocated to particular {\Scheme} ports. +Not revealing the particular file descriptors allocated to {\Scheme} +ports allows the system two important freedoms: +\begin{itemize} +\item When the user explicitly allocates a particular file descriptor, + the run-time system is free to shuffle around the port/file-descriptor + associations as required to free up that descriptor. +\item When all pointers to an unrevealed file port have been dropped, + the run-time system is free to close the underlying file descriptor. + If the user doesn't know which file descriptor was associated with the + port, then there is no way he could refer to that i/o channel by its + file-descriptor name. + This allows scsh to close file descriptors during gc or when + performing an \ex{exec()}. +\end{itemize} +Users {\em can\/} explicitly manipulate file descriptors, if so desired. +In this case, the associated ports are marked by the run time as ``revealed,'' +and are no longer subject to automatic collection. +The machinery for handling this is carefully marked in the documentation, +and with some simple invariants in mind, follow the user's intuitions. +This facility preserves the transparent close-on-collect property +for file ports that are used in straightforward ways, yet allows +access to the underlying {\Unix} substrate without interference from +the garbage collector. This is critical, since shell programming +absolutely requires access to the {\Unix} file descriptors, as their +numerical values are a critical part of the process interface. + +Under normal circumstances, all this machinery just works behind the scenes +to keep things straightened out. The only time the user has to think about +it is when he starts accessing file descriptors from ports, which he should +almost never have to do. If a user starts asking what file descriptors +have been allocated to what ports, he has to take responsibility for managing +this information. + +Further details on the port mechanisms in scsh are beyond the scope of +this note; for more information, see the reference manual \cite{ref-man}. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Lexical issues} +\label{sec:lex} + +Scsh's lexical syntax is not fully {\R4RS}-compliant in two ways: +\begin{itemize} +\item In scsh, symbol case is preserved by \ex{read} and is significant on + symbol comparison. This means + \codex{(run (less Readme))} + displays the right file. + +\item ``\ex{-}'' and ``\ex{+}'' are allowed to begin symbols. + So the following are legitimate symbols: + \codex{-O2 -geometry +Wn} +\end{itemize} +% +Scsh also extends {\R4RS} lexical syntax in the following ways: +\begin{itemize} +\item ``\ex{|}'' and ``\ex{.}'' are symbol constituents. + This allows \ex{|} for the pipe symbol, and \ex{..} for the parent-directory + symbol. (Of course, ``\ex{.}'' alone is not a symbol, but a + dotted-pair marker.) + +\item A symbol may begin with a digit. + So the following are legitimate symbols: +\codex{9x15 80x36-3+440} + +\item Strings are allowed to contain the {\sc Ansi} C escape sequences + such as \verb|\n| and \verb|\161|. + +\item \cd{#!} is a comment read-macro similar to \ex{;}. + This is important for writing shell scripts. +\end{itemize} + +The lexical details of scsh are perhaps a bit contentious. +Extending the symbol syntax remains backwards compatible +with existing correct {\R4RS} code. +Since flags to {\Unix} programs always begin with a dash, +not extending the syntax would have required the user to explicitly +quote every flag to a program, as in +\codex{(run (cc "-O" "-o" "-c" main.c)).} +This is unacceptably obfuscatory, so the change was made to cover +these sorts of common {\Unix} flags. + +More serious was the decision to make symbols read case-sensitively, +which introduces a true backwards incompatibility with {\R4RS} {\Scheme}. +This was a true case of clashing world-views: +{\Unix}'s tokens are case-sensitive; {\Scheme}'s, are not. + +It is also unfortunate that the single-dot token, ``\ex{.}'', is both +a fundamental {\Unix} file name and a deep, primitive syntactic token +in {\Scheme}---it means the following will not parse correctly in scsh: +\codex{(run/strings (find . -name *.c -print))} +You must instead quote the dot: +\codex{(run/strings (find "." -name *.c -print))} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Implementation} +\label{sec:impl} + +Scsh is currently implemented on top of {\scm}, a freely-available +{\Scheme} implementation written by Kelsey and Rees \cite{S48}. +{\scm} uses a byte-code interpreter for portability, good code density, +and medium efficiency. It is {\R4RS}-compliant, and includes a module +system designed by Rees. + +The scsh design is not {\scm}-specific, although the current implementation +is necessarily so. Scsh is intended to be implementable in other {\Scheme} +implementations---although such a port may require some work. (I would +be very interested to see scsh ported to some of the {\Scheme} systems designed +to serve as embedded command languages---\eg, elk, esh, or any of the other +C-friendly interpreters.) + +Scsh scripts currently have a few problems owing to the current +{\scm} implementation technology. +\begin{itemize} +\item Before running even the smallest shell script, the {\scm} vm must first + load in a 1.4Mb heap image. This i/o load adds a few seconds to the startup + time of even trivial shell scripts. + +\item Since the entire {\scm} and scsh runtime is in the form of byte-code + data in the {\Scheme} heap, the heap is fairly large. As the {\scm} vm + uses a non-generational gc, all of this essentially permanent data + gets copied back and forth by the collector. + +\item The large heap size is compounded by {\Unix} forking. +If you run a +four-stage pipeline, \eg, +\begin{code} +(run (| (zcat paper.tex.Z) + (detex) + (spell) + (enscript -2r)))\end{code} + then, for a brief instant, you could have up to five copies of scsh + forked into existence. This would briefly quintuple the virtual memory + demand placed by a single scsh heap, which is fairly large to begin with. + Since all the code is actually in the data pages of the process, the OS + can't trivially share pages between the processes. Even if the OS is clever + enough to do copy-on-write page sharing, it may insist on reserving enough + backing store on disk for worst-case swapping requirements. If disk space + is limited, this may overflow the paging area, causing the \ex{fork()} + operations to fail. +\end{itemize} +% +Byte-coded virtual machines are intended to be a technology +that provides memory savings through improved code density. +It is ironic that the straightforward implementation of such a byte-code +interpreter actually has high memory cost through bad interactions with +{\Unix} \ex{fork()} and the virtual memory system. + +The situation is not irretrievable, however. A recent release of {\scm} +allows the pure portion of a heap image to be statically linked with the +text pages of the vm binary. Putting static data---such as all the code for +the runtime---into the text pages should drastically shorten start-up time, +move a large amount of data out of the heap, improve paging, +and greatly shrink the dynamic size. This should all lessen +the impact of \ex{fork()} on the virtual memory system. + +Arranging for the garbage collector to communicate with the virtual memory +system with the near-standard \ex{madvise()} system call would further improve +the system. Also, breaking the system run-time into separate modules (\eg, +bignums, list operations, i/o, string operations, scsh operations, compiler, +\etc), each of which can be demand-loaded shared-text by the {\scm} vm +(using \ex{mmap()}), will allow for a full-featured system with a surprisingly +small memory footprint. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Size} +\label{sec:size} +Scsh can justifiably be criticised for being a florid design. +There are a lot of features---perhaps too many. +The optional arguments to many procedures, the implicit backquoting, and +the syntax/procedure equivalents are all easily synthesized by the user. +For example, \ex{port->strings}, \ex{run/strings*}, \ex{run/sexp*}, +and \ex{run/sexps*} are all trivial compositions and curries of other base +procedures. +The \ex{run/strings} and \ex{run/sexps} forms are easily +written as macros, or simply written out by hand. +Not only does scsh provide the basic \ex{file-attributes} procedure +(\ie, the \ex{stat()} system call), +it also provides a host of derived procedures: \ex{file-owner}, \ex{file-mode}, +\ex{file-directory?}, and so forth. +Still, my feeling is that it is easier and clearer to read +\codex{(filter file-directory? (directory-files))} + than +\begin{code} +(filter (\l{fname} + (eq? 'directory + (fileinfo:type (file-attributes fname)))) + (directory-files))\end{code} +A full library can make for clearer user code. + +One measure of scsh's design is that the source code consists of +a large number of small procedures: the source code for scsh has 448 +top-level definitions; the definitions have an average length of 5 lines of +code. +That is, scsh is constructed by connecting together a lot of +small, composable parts, instead of designing one inflexible monolithic +structure. +These small parts can also be composed and abstracted by the programmer +into his own computational structures. +Thus the total functionality of scsh is greater than more traditional +large systems. + + +%\part{Systems Programming} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Systems programming in {\Scheme}} +\label{sec:scm-sysprog} +{\Unix} systems programming in {\Scheme} is a much more pleasant experience +than {\Unix} systems programming in C. +Several features of the language remove a lot of the painful or error-prone +problems C systems programmers are accustomed to suffering. +The most important of these features are: +\begin{itemize} +\item exceptions +\item automatic storage management +\item real strings +\item higher-order procedures +\item S-expression syntax and backquote +\end{itemize} +% +Many of these features are available in other advanced programming languages, +such as Modula-3 or ML. None are available in C. + +\subsection{Exceptions and robust error handling} +In scsh, system calls never return the error codes that make careful +systems programming in C so difficult. Errors are signaled by raising +exceptions. +Exceptions are usually handled by default handlers that either abort the +program or invoke a run-time debugger; the programmer can override these when +desired by using exception-handler expressions. +Not having to return error codes frees up procedures to return useful values, +which encourages procedural composition. +It also keeps the programmer from cluttering up his code with +(or, as is all too often the case, just forgetting to include) +error checks for every system call. +In scsh, the programmer can assume that if a system call returns at all, it +returns successfully. +This greatly simplifies the flow of the code from the programmer's point +of view, as well as greatly increasing the robustness of the program. + +\subsection{Automatic storage management} +Further, {\Scheme}'s automatic storage allocation removes the +``result'' parameters from the procedure argument lists. +When composite data is returned, it is simply returned in a +freshly-allocated data structure. +Again, this helps make it possible for procedures to return useful values. + +For example, the C system call \ex{readlink()} +dereferences a symbolic link in the file system. +A working definition for the system call is given in +figure~\ref{fig:symlink}b. +It is complicated by many small bookkeeping details, +made necessary by C's weak linguistic facilities. + +In contrast, scsh's equivalent procedure, \ex{read-symlink}, +has a much simpler definition (fig.~\ref{fig:symlink}a). +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{figure}\fboxsep=1.5em +\renewcommand{\subcaption}[1] +{\unskip\begin{center}\unskip\em#1\end{center}} + +\begin{boxedminipage}{\linewidth} \vskip 1.5 ex +\ex{(read-symlink fname)}\\[1.5ex] +\ex{read-symlink} returns the filename referenced by symbolic link +\ex{fname}. +An exception is raised if there is an error. +\subcaption{(a) {\Scheme} definition of \ex{readlink}} +\end{boxedminipage} + +\vskip 3ex plus 1fil + +\begin{boxedminipage}{\linewidth}\vskip 1.5ex +\ex{readlink(char *path, char *buf, int bufsiz)}\\[1.5ex] +\ex{readlink} dereferences the symbolic link \ex{path}. +If the referenced filename is less than or equal to \ex{bufsiz} characters +in length, +it is written into the \ex{buf} array, which we fondly hope the +programmer has arranged to be at least of size \ex{bufsiz} characters. +If the referenced filename is longer than \ex{bufsiz} characters, +the system call returns an error code; +presumably the programmer should then reallocate a larger buffer and try +again. +If the system call succeeds, it returns the length of the result filename. +When the referenced filename is written into \ex{buf}, it is {\em not\/} +nul-terminated; it is the programmer's responsibility to leave space +in the buffer for the terminating nul +(remembering to subtract one from the actual buffer length when passing it to +the system call), and deposit the terminal nul after the system call returns. + +If there is a real error, +the procedure will, in most cases, return an error code. + (We will gloss over the error-code mechanism for the sake of + brevity.) +% I will gloss over the -1/\ex{errno} mechanism involved, with its +% dependency upon a global, shared variable, for the sake of +% brevity. +However, if the length of \ex{buf} does not actually match the argument +\ex{bufsiz}, +the system call may either% +\begin{itemize}% +\item succeed anyway, +\item dump core, +\item overwrite other storage and silently proceed, +\item report an error, +\item or perform some fifth action. +\end{itemize}% +It all depends. +\subcaption{(b) C definition of \ex{readlink}} +\end{boxedminipage} + +\caption{Two definitions of \protect\ex{readlink}} +\label{fig:symlink} +\end{figure} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +With the scsh version, there is no possibility that the result buffer will be +too small. +There is no possibility that the programmer will misrepresent the +size of the result buffer with an incorrect \ex{bufsiz} argument. +These sorts of issues are completely eliminated by the {\Scheme} programming +model. +Instead of having to worry about seven or eight trivial but potentially +fatal issues, and write the necessary 10 or 15 lines of code to correctly +handle the operation, the programmer can write a single function call and +get on with his task. + +\subsection{Return values and procedural composition} +Exceptions and automatic storage allocation make it easier for +procedures to return useful values. +This increases the odds that the programmer can use the compact notation +of function composition---\ex{f(g(x))}---to connect producers and consumers +of data, which is surprisingly difficult in C. +%Making it possible for procedures to return useful values is quite +%useful, as it encourages programmers to use the compact notation of function +%composition---\ex{f(g(x))}---to indicate data flow, which is surprisingly +%difficult in C. + +In C, if we wish to compose two procedure calls, we frequently must write: +\begin{code} +/* C style: */ +g(x,&y); +{\ldots}f(y)\ldots\end{code} +Procedures that compute composite data structures for a result +commonly return them by storing them into a data structure passed +by-reference as a parameter. +If \ex{g} does this, we cannot nest calls, but must write the code as shown. + +In fact, the above code is not quite what we want; we forgot to check \ex{g} +for an error return. +What we really wanted was: +\begin{code} +/* Worse/better: */ +err=g(x,&y); +if( err ) \{ + <{\it{handle error on {\tt{g}} call}}> + \} +{\ldots}f(y)\ldots\end{code} +The person who writes this code has to remember to check for the error; +the person who reads it has to visually link up the data flow by +connecting \ex{y}'s def and use points. +% puzzle out the data flow that goes from \ex{g}'s output value \ex{y} to +% \ex{f}'s input value. +% This is the data-flow equivalent of puzzling out the control flow +% of a program by tracing its \ex{goto}'s. +This is the data-flow equivalent of \ex{goto}'s, +with equivalent effects on program clarity. + +In {\Scheme}, none of this is necessary. We simply write +\codex{(f (g x)) ; Scheme} +Easy to write; easy to read and understand. +Figure \ref{fig:stat-file} shows an example of this problem, where the +task is determining if a given file is owned by root. +\begin{figure}[bthp] +\begin{boxedminipage}{\linewidth}\vskip 1.5ex +\begin{tightcode} +(if (zero? (fileinfo:owner (file-attributes fname))) + \ldots)\end{tightcode} +\subcaption{\Scheme} + +\medskip + +\begin{tightinset} +\begin{verbatim} +if( stat(fname,&statbuf) ) { + perror(progname); + exit(-1); + } +if( statbuf.st_uid == 0 ) ...\end{verbatim} +\end{tightinset} +\subcaption{C} +\caption{Why we program with Scheme.} +\label{fig:stat-file} +\end{boxedminipage} +\end{figure} + +\subsection{Strings} +Having a true string datatype turns out to be surprisingly valuable +in making systems programs simpler and more robust. +The programmer never has to expend effort to make sure that a string +length kept in a variable matches the actual length of the string; +never has to expend effort wondering how it will affect his program if +a nul byte gets stored into his string. +This is a minor feature, but like garbage collection, it eliminates a whole +class of common C programming bugs. + +\subsection{Higher-order procedures} +Scheme's first-class procedures are very convenient for systems programming. +Scsh uses them to parameterise the action of procedures that create +{\Unix} processes. +The ability to package up an arbitrary computation as a thunk turns +out to be as useful in the domain of {\Unix} processes as it is in the domain +of {\Scheme} computation. +Being able to pass computations in this way to the procedures that create +{\Unix} processes, such as \ex{fork}, \ex{fork/pipe} and \ex{run/port*} is a +powerful programming technique. + +First-class procedures allow us to parameterise port readers over different +parsers, with the +\codex{(port->list \var{parser} \var{port})} +procedure. +This is the essential {\Scheme} ability to capture abstraction in a procedure +definition. +If the user wants to read a list of objects written in some syntax from an +i/o source, he need only write a parser capable of parsing a single +object. +The \ex{port->list} procedure can work with the user's parser as easily as it +works with \ex{read} or \ex{read-line}. +\note{On-line streams} + +First-class procedures also allow iterators such as \ex{for-each} and +\ex{filter} to loop over lists of data. +For example, to build the list of all my files in \ex{/usr/tmp}, I write: +\begin{code} +(filter (\l{f} (= (file-owner f) (user-uid))) + (glob "/usr/tmp/*"))\end{code} +To delete every C file in my directory, I write: +\codex{(for-each delete-file (glob "*.c"))} + +\subsection{S-expression syntax and backquote} +\label{sec:sexp} +In general, {\Scheme}'s s-expression syntax is much, much simpler to +understand and use than most shells' complex syntax, with their embedded +pattern matching, variable expansion, alias substitution, and multiple +rounds of parsing. +This costs scsh's notation some compactness, at the gain of comprehensibility. + +\subsubsection*{Recursive embeddings and balls of mud} +Scsh's ability to cover a high-level/low-level spectrum of expressiveness +is a function of its uniform s-expression notational framework. +Since scsh's process notation is embedded within Scheme, +and Scheme escapes are embedded within the process notation, +the programmer can easily switch back and forth as needed, +using the simple notation where possible, +and escaping to system calls and general {\Scheme} where necessary. +This recursive embedding is what gives scsh its broad-spectrum coverage +of systems functionality not available to either shells or traditional +systems programming languages; +it is essentially related to the ``ball of mud'' extensibility of the +Lisp and Scheme family of languages. + +\subsubsection*{Backquote and reliable argument lists} +Scsh's use of implicit backquoting in the process notation is a particularly +nice feature of the s-expression syntax. +%Most {\Unix} shells provide the user with a way to compute a list of strings +%and use these strings as arguments to a program. +Most {\Unix} shells provide the user with a way to take a computed string, +split it into pieces, and pass them as arguments to a program. +This usually requires the introduction of some sort of \ex{\$IFS} separator +variable to control how the string is parsed into separate arguments. +This makes things error prone in the cases where a single argument +might contain a space or other parser delimiter. +Worse than error prone, \ex{\$IFS} rescanning is in fact the source of a +famous security hole in {\Unix} \cite{Reeds}. + +In scsh, data are used to construct argument lists using the implicit backquote +feature of process forms, \eg: +\begin{tightcode} +(run (cc ,file -o ,binary ,@flags)).\end{tightcode} +Backquote completely avoids the parsing issue because it deals +with pre-parsed data: it constructs expressions from lists, not character +strings. +When the programmer computes a list of arguments, he has complete +confidence that they will be passed to the program exactly as is, +without running the risk of being re-parsed by the shell. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Other programming languages} +\label{sec:opl} +Having seen the design of scsh, +we can now compare it to other approaches in some detail. + +\subsection{Functional languages} +The design of scsh could be ported without much difficulty +to any language that provides first-class procedures, GC, and exceptions, +such as {\CommonLisp} or ML. +However, {\Scheme}'s syntactic extensibility (macros) plays an important +role in making the shell features convenient to use. +In this respect, {\Scheme} and {\CommonLisp} are better choices than ML. +Using the \ex{fork/pipe} procedure with a series of closures +involves more low-level detail than +using scsh's \ex{(| \vari{pf}{\!1} {\ldots} \vari{pf}{\!n})} +process form with the closures implied. +Good notations suppress unnecessary detail. + +The payoff for using a language such as ML would come not with small +shell scripts, but with larger programs, where the power provided by the +module system and the static type checking would come into play. + +\subsection{Shells} +Traditional {\Unix} shells, such as sh, have no advantage at all as +scripting languages. + +\subsubsection*{Escaping the least common denominator trap} +One of the attractions of scsh is that it is a {\Unix} shell that isn't +constrained by the limits of {\Unix}'s uniform ``least common denominator'' +representation of data as a text string. +Since the standard medium of interchange at the shell level is {\Ascii} +byte strings, shell programmers are forced to parse and reparse data, often +with tools of limited power. +For example, to determine the number of files in a directory, a shell +programmer typically uses an expression of the form \ex{ls | wc -l}. +This traditional idiom is in fact buggy: {\Unix} files are allowed to contain +newlines in their names, which would defeat the simple \ex{wc} parser. +Scsh, on the other hand, gives the programmer direct access to the system +calls, and employs a much richer set of data structures. +Scsh's \ex{directory-files} procedure returns a {\em list\/} of strings, +directly taken from the system call. +There is no possibility of a parsing error. + +As another example, consider the problem of determining if a file has its +setuid bit set. +The shell programmer must grep the text-string output of \ex{ls -l} +for the ``s'' character in the right position. +Scsh gives the programmer direct access to the \ex{stat()} system call, +so that the question can be directly answered. + +\subsubsection*{Computation granularity and impedance matching} +Sh and csh provide minimal computation facilities on the assumption that all +real computation will happen in C programs invoked from the shell. +This is a granularity assumption. +As long as the individual units of computation are large, then the cost of +starting up a separate program is amortised over the actual computation. +However, when the user wants to do something simple---\eg, split an X +\verb|$DISPLAY| string at the colon, +count the number of files in a directory, +or lowercase a string---then the overhead of program invocation +swamps the trivial computation being performed. +One advantage of using a real programming language for the shell language is +that we can get a wider-range ``impedance match'' of computation to process +overhead. +Simple computations can be done in the shell; +large grain computations can still be spawned off +to other programs if necessary. + +\subsection{New-generation scripting languages} +A newer generation of scripting languages has been supplanting sh in {\Unix}. +Systems such as perl and tcl provide many of the advantages of scsh for +programming shell scripts \cite{perl, tcl}. +However, they are still limited by weak linguistic features. +Perl and tcl still deal with the world primarily in terms of strings, +which is both inefficient and expressively limiting. +Scsh makes the full range of Scheme data types available to the programmer: +lists, records, floating point numbers, procedures, and so forth. +Further, the abstraction mechanisms in perl and tcl are also much more limited +than Scheme's lexically scoped, first-class procedures and lambda expressions. +As convenient as tcl and perl are, they are in no sense full-fledged +general systems-programming languages: you would not, for example, want +to write an optimizing compiler in tcl. +Scsh is Scheme, hence a powerful, full-featured general programming tool. + +It is, however, instructive to consider the reasons for the popular success of +tcl and perl. +I would argue that good design is necessary but insufficient for +a successful tool. +Tcl and perl are successful because they are more than just competently +designed; +critically, they are also available on the Net in turn-key forms, +with solid documentation. +A potential user can just down-load and compile them. +Scheme, on the other hand, has existed in multiple mutually-incompatible +implementations that are not widely portable, do not portably address +systems issues, and are frequently poorly documented. +A contentious and standards-cautious Scheme community has not standardised +on a record datatype or exception facility for the language, +features critical for systems programming. +Scheme solves the hard problems, but punts the necessary, simpler ones. +This has made Scheme an impractical systems tool, +banishing it to the realm of pedagogical programming languages. +Scsh, together with Scheme 48, fills in these lacunae. +Its facilities may not be the ultimate solutions, +but they are useable technology: clean, consistent, portable and documented. + +%\part{Conclusion} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Future work} +\label{sec:future-work} +Several extensions to scsh are being considered or implemented. +\subsection{Command language features} +The primary design effort of scsh was for programming. +We are now designing and implementing features to make scsh +a better interactive command language, such as job control. +A top-level parser for an sh-like notation has been designed; +the parser will allow the user to switch back to {\Scheme} notation +when desired. + +We are also considering a display-oriented interactive shell, +to be created by merging the edwin screen editor and scsh. +The user will interact with the operating system using single-keystroke +commands, defining these commands using scsh, and reverting to +{\Scheme} when necessary for complex tasks. +Given a reasonable set of GUI widgets, the same trick could be played +directly in X. + +\subsection{Little languages} +Many {\Unix} tools are built around the idea of ``little languages,'' that is, +custom, limited-purpose languages that are designed to fit the area of +application. The problem with the little-languages approach is that these +languages are usually ugly, idiosyncratic, and limited in expressiveness. +The syntactic quirks of these little languages are notorious. +The well-known problem with \ex{make}'s syntax distinguishing tab and +space has been tripping up programmers for years. +Because each little language is different +from the next, the user is required to master a handful of languages, +unnecessarily increasing the cognitive burden to use these tools. + +An alternate approach is to embed the tool's primitive operations inside +{\Scheme}, +and use the rest of {\Scheme} as the procedural glue to connect the +primitives into complex systems. This sort of approach doesn't require the +re-invention of all the basic functionality needed by a language---{\Scheme} +provides variables, procedures, conditionals, data structures, and so +forth. This means there is a greater chance of the designer ``getting it +right'' since he is really leveraging off of the enormous design effort that +was put into designing the {\Scheme} language. It also means the user doesn't +have to learn five or six different little languages---just {\Scheme} plus the +set of base primitives for each application. Finally, it means the base +language is not limited because the designer didn't have the time or resources +to implement all the features of a real programming language. + +With the scsh {\Unix} library, these ``little language'' {\Unix} tools could +easily be redesigned from a {\Scheme} perspective and have their interface and +functionality significantly improved. +Some examples under consideration are: +\begin{itemize} +\item The awk pattern-matching language can be implemented in + scsh by adding a single record-input procedure to the existing code. + +\item Expect is a scripting language used for automating the + use of interactive programs, such as ftp. With the exception of the tty + control syscalls currently under construction, all the pieces needed to + design an alternate scsh-based {\Unix} scripting tool already exist in scsh. + +\item A dependency-directed system for controlling recompilation such + as make could easily be implemented on top of scsh. Here, instead of + embedding the system inside of {\Scheme}, we embed {\Scheme} inside + of the system. The dependency language would use s-expression notation, + and the embedded compilation actions would be specified as {\Scheme} + expressions, including scsh notation for running {\Unix} programs. +\end{itemize} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Conclusion} +Scsh is a system with several faces. +From one perspective, +it is not much more than a system-call library and +a few macros. +Yet, there is power in this minimalist description---it points up the +utility of embedding systems in languages such as {\Scheme}. +{\Scheme} is at core what makes scsh a successful design. +Which leads us to three final thoughts on the subject of scsh and +systems programming in {\Unix}: +\begin{itemize} +\item A Scheme shell wins because it is broad-spectrum. +\item A functional language is an excellent tool for systems programming. +\item Hacking Unix isn't so bad, actually, if you don't have to use C. +\end{itemize} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Acknowledgements} +John Ellis' 1980 {\em SIGPLAN Notices\/} paper \cite{ellis} got me thinking +about this entire area. +Some of the design for the system calls was modeled after +Richard Stallman's emacs \cite{emacs}, +Project MAC's MIT {\Scheme} \cite{c-scheme}, and {\CommonLisp} \cite{cltl2}. +Tom Duff's {\Unix} shell, rc, was also inspirational; +his is the only elegant {\Unix} shell I've seen \cite{rc}. +Flames with Bennet Yee and Scott Draves drove me to design scsh in the +first place; +polite discussions with John Ellis and Scott Nettles subsequently improved it. +Douglas Orr was my private {\Unix} kernel consultant. +Richard Kelsey and Jonathan Rees provided me with twenty-four hour +turnaround time on requested modifications to {\scm}, and +spent a great deal of time explaining the internals of the implementation +to me. +Their elegant {\Scheme} implementation was a superb platform for development. +The design and the major portion of the implementation of scsh were completed +while I was visiting on the faculty of the University of Hong Kong +in 1992. +It was very pleasant to work in such a congenial atmosphere. +Doug Kwan was a cooperative sounding-board during the design phase. +Hsu Suchu has patiently waited quite a while for this document to +be finished. +Members of the MIT LCS and AI Lab community encouraged me to polish +the research prototype version of the shell into something releasable +to the net. +Henry Minsky and Ian Horswill did a lot of the encouraging; +my students Dave Albertz and Brian Carlstrom did a lot of the polishing. + +Finally, +the unix-haters list helped a great deal to maintain my perspective. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\cleardoublepage +\begin{thebibliography}{MIT Scheme} +\addcontentsline{toc}{section}{References} +\sloppy +\def\\{\newblock} + +\renewcommand{\=}{\discretionary{/}{}{/}} +\renewcommand{\.}{\discretionary{.}{}{.}} +\newcommand{\ob}{\linebreak[0]} + +\itemsep= 2ex plus 1fil +\let\Bibitem=\bibitem + +\Bibitem[CLtL2]{cltl2} Guy L.~Steele Jr. \\ + {\em Common Lisp: The Language.} \\ + Digital Press, Maynard, Mass., second edition 1990. + +\Bibitem[Ellis]{ellis} John R.~Ellis. \\ A {\sc Lisp} shell. \\ + {\em SIGPLAN Notices}, 15(5):24--34, May 1980. + +\Bibitem[emacs]{emacs} Bil Lewis, Dan LaLiberte, Richard M.~Stallman, + {\em et al.} \\ + {\em The GNU Emacs Lisp Reference Manual, vol.~2.} \\ + Free Software Foundation, Cambridge, Mass., edition 2.1 September 1993. + (Also available from many ftp sites.) + +\Bibitem[fsh]{fsh} Chris S.~McDonald. \\ + {\em fsh}---A functional {\Unix} command interpreter. \\ + {\em Software---Practice and Experience}, 17(10):685--700, + October 1987. + +\Bibitem[MIT Scheme]{c-scheme} Chris Hanson. \\ + {\em MIT Scheme Reference Manual.} \\ + MIT Artificial Intelligence Laboratory Technical Report 1281, + January 1991. + (Also URL +` {\tt http://zurich\.ai\.mit\.edu\=emacs-html\.local\=scheme\_toc.html}) + +\Bibitem[Nelson]{Nelson} Greg Nelson, ed. \\ + {\em Systems Programming with Modula-3.} \\ + Prentice Hall, Englewood Cliffs, New Jersey, 1991. + +\Bibitem[perl]{perl} Larry Wall and Randal Schwartz. \\ + {\em Programming Perl.} \\ + O'Reilly \& Associates. + +\Bibitem[rc]{rc} Tom Duff. \\ Rc---A shell for Plan 9 and {\Unix} systems. \\ + In {\em Proceedings of the Summer 1990 UKUUG Conference}, + pages 21--33, July 1990, London. + (A revised version is reprinted in ``Plan 9: The early papers,'' + Computing Science Technical Report 158, AT\&T Bell Laboratories. + Also available in Postscript form as URL + \ex{ftp:{\ob}/\=research.att.com/dist/plan9doc/7}.) + +\Bibitem[Reeds]{Reeds} J.~Reeds. \\ + \ex{/bin/sh}: the biggest UNIX security loophole. \\ + 11217-840302-04TM, AT\&T Bell Laboratories (1988). + +\Bibitem[refman]{ref-man} Olin Shivers. \\ Scsh reference manual. \\ + In preparation. + +\Bibitem[S48]{S48} Richard A.~Kelsey and Jonathan A.~Rees. \\ + A tractable Scheme implementation. \\ + To appear, {\em Lisp and Symbolic Computation}, + Kluwer Academic Publishers, The Netherlands. + (Also URL {\tt ftp:/\=altdorf\.ai\.mit\.edu\=pub\=jar\=lsc.ps}) + +\Bibitem[tcl]{tcl} John~K.~Ousterhout. \\ + Tcl: An embeddable command language. \\ + In {\em The Proceedings of the 1990 Winter USENIX Conference}, + pp.~133--146. + (Also URL + {\tt ftp:{\ob}/\=ftp\.cs\.berkeley\.edu\=ucb\=tcl\=tclUsenix90.ps}) +\vfill +\end{thebibliography} + +\appendix +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\cleardoublepage +\section*{Notes} +\addcontentsline{toc}{section}{Notes} +\newcommand{\notetext}[1]{\subsection*{\{Note #1\}}} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\notetext{Agenda} +In fact, I have an additional hidden agenda. +I do believe that computational agents should be expressed as procedures +or procedure libraries, not as programs. +Scsh is intended to be an incremental step in this direction, one that +is integrated with {\Unix}. +Writing a program as a Scheme 48 module should allow the user to make it +available as a both a subroutine library callable from other Scheme 48 +programs or the interactive read-eval-print loop, and, by adding a small +top-level, as a standalone {\Unix} program. +So {\Unix} programs written this way will also be useable as linkable +subroutine libraries---giving the programmer module interfaces superior +to {\Unix}'s ``least common denominator'' of {\sc Ascii} byte streams +sent over pipes. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\notetext{No port sync} +\begin{sloppypar} +In scsh, {\Unix}' stdio file descriptors and {\Scheme}'s standard i/o ports +(\ie, the values of \ex{(current-input-port)}, \ex{(current-output-port)} and +\ex{(error-output-port)}) are not necessarily synchronised. +This is impossible to do in general, since some {\Scheme} ports are +not representable as {\Unix} file descriptors. +For example, many Scheme implementations provide ``string ports,'' +that is, ports that collect characters sent to them into memory buffers. +The accumulated string can later be retrieved from the port as a string. +If a user were to bind \ex{(current-output-port)} to such a port, it would +be impossible to associate file descriptor 1 with this port, as it +cannot be represented in {\Unix}. +So, if the user subsequently forked off some other program as a subprocess, +that program would of course not see the Scheme string port as its standard +output. +\end{sloppypar} + +To keep stdio synced with the values of {\Scheme}'s current i/o ports, +use the special redirection \ex{stdports}. +This causes 0, 1, 2 to be redirected from the current {\Scheme} standard ports. +It is equivalent to the three redirections: +\begin{code} +(= 0 ,(current-input-port)) +(= 1 ,(current-output-port)) +(= 2 ,(error-output-port))\end{code} +% +The redirections are done in the indicated order. This will cause an error if +the one of current i/o ports isn't a {\Unix} port (\eg, if one is a string +port). +This {\Scheme}/{\Unix} i/o synchronisation can also be had in {\Scheme} code +(as opposed to a redirection spec) with the \ex{(stdports->stdio)} +procedure. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\notetext{Normal order} +Having to explicitly shift between processes and functions in scsh is in part +due to the arbitrary-size nature of a {\Unix} stream. +A better, more integrated approach might be to use a lazy, normal-order +language as the glue or shell language. +Then files and process output streams could be regarded as first-class values, +and treated like any other sequence in the language. +However, I suspect that the realities of {\Unix}, such as side-effects, will +interfere with this simple model. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\notetext{On-line streams} +The \ex{(port->list \var{reader} \var{port})} procedure is a batch processor: +it reads the port all the way to eof before returning a value. +As an alternative, we might write a procedure to take a port and a reader, +and return a lazily-evaluated list of values, +so that I/O can be interleaved with element processing. +A nice example of the power of Scheme's abstraction facilities is the +ease with which we can write this procedure: +it can be done with five lines of code. +\begin{code} +;;; A is either +;;; (delay '()) or +;;; (delay (cons data )). + +(define (port->lazy-list reader port) + (let collector () + (delay (let ((x (reader port))) + (if (eof-object? x) '() + (cons x (collector)))))))\end{code} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\notetext{Tempfile example} +For a more detailed example showing the advantages of higher-order procedures +in {\Unix} systems programming, consider the task of making random temporary +objects (files, directories, fifos, \etc) in the file system. +Most {\Unix}'s simply provide a function such as \ex{tmpnam()} that creates a +file with an unusual name, and hope for the best. +Other {\Unix}'s provide functions that avoid the race condition between +determining the temporary file's name and creating it, but they do not +provide equivalent features for non-file objects, such as directories or +symbolic links. +\pagebreak +This functionality is easily generalised with the procedure +\codex{(temp-file-iterate \var{maker} \var{[template]})} +This procedure can be used to perform atomic transactions on +the file system involving filenames, \eg: +\begin{itemize} +\item Linking a file to a fresh backup temporary name. +\item Creating and opening an unused, secure temporary file. +\item Creating an unused temporary directory.% +\end{itemize} +% +The string \var{template} is a \ex{format} control string used to generate +a series of trial filenames; it defaults to +% +\begin{tightinset}\verb|"/usr/tmp/.~a"|\end{tightinset}\ignorespaces +% +where \ex{} is the current process' process id. +Filenames are generated by calling \ex{format} to instantiate the +template's \verb|~a| field with a varying string. + (It is not necessary for the process' pid to be a part of the filename + for the uniqueness guarantees to hold. The pid component of the default + prefix simply serves to scatter the name searches into sparse regions, so + that collisions are less likely to occur. This speeds things up, but does + not affect correctness.) + +The \ex{maker} procedure is serially called on each filename generated. +It must return at least one value; it may return multiple values. If +the first return value is \ex{\#f} or if \ex{maker} raises the ``file already +exists'' syscall error exception, \ex{temp-file-iterate} will loop, +generating a new filename and calling \ex{maker} again. +If the first return value is true, the loop is terminated, +returning whatever \ex{maker} returned. + +After a number of unsuccessful trials, \ex{temp-file-iterate} may give up +and signal an error. + +To rename a file to a temporary name, we write: +\begin{code} +(temp-file-iterate (\l{backup-name} + (create-hard-link old-file + backup-name) + backup-name) + ".#temp.~a") ; Keep link in cwd. +(delete-file old-file)\end{code} +Note the guarantee: if \ex{temp-file-iterate} returns successfully, +then the hard link was definitely created, so we can safely delete the +old link with the following \ex{delete-file}. + +To create a unique temporary directory, we write: +% +\codex{(temp-file-iterate (\l{dir} (create-directory dir) dir))} +% +Similar operations can be used to generate unique symlinks and fifos, +or to return values other than the new filename (\eg, an open file +descriptor or port). +\end{document} + +% LocalWords: Mips grep sed awk ls wc email flamefest philes SRC's dup int pid +% LocalWords: foobar fds perror waitpid execlp kb rc's epf pf fdes fv +% LocalWords: stdports dup'd subforms backquoted usr backquoting ref tmp +% LocalWords: buf stdin stderr stdout sync prog arg Readme xrdb xyzzy SunOS +% LocalWords: mbox txt cc preprocess Errlog exe outfile errfile PLAINTEXT des +% LocalWords: plaintext DIR perl cwd dir dsw ll conns xhost lpr ksh namespaces +% LocalWords: ms texinfo doc fd RS Wn Ansi esh zcat tex detex enscript madvise +% LocalWords: mmap stat fname eq fileinfo backquote readlink symlink fil nul +% LocalWords: bufsiz def bthp statbuf progname uid Tempfile IFS pre Ascii bp +% LocalWords: reparse setuid