ikarus/doc/ikarus-scheme-users-guide.tex

3314 lines
120 KiB
TeX
Raw Normal View History

%!TEX TS-program = xelatex
\documentclass[onecolumn, 12pt, twoside, openright, dvipdfm]{book}
\usepackage{fontspec}
\usepackage{hanging}
\usepackage{xltxtra}
\defaultfontfeatures{Scale=MatchLowercase}
2007-10-22 16:47:51 -04:00
%\setmainfont[Mapping=tex-text]{Cochin}
%\setmainfont[Mapping=tex-text]{Palatino}
2007-10-22 16:47:51 -04:00
%\setmainfont[Mapping=tex-text]{Baskerville}
%\setmainfont[Mapping=tex-text]{Perpetua}
%\setmainfont[Mapping=tex-text]{Lido STF}
%\setmainfont[Mapping=tex-text]{Perpetua}
\setmainfont[Mapping=tex-text]{Charis SIL}
%\setmainfont[Mapping=tex-text]{Gentium}
%\setmainfont[Mapping=tex-text]{DejaVu Serif}
%\setmainfont[Mapping=tex-text]{Palatino}
%\setmainfont[Mapping=tex-text]{URWPalladioL}
%\setmainfont[Mapping=tex-text]{GentiumAlt}
\setsansfont[Mapping=tex-text]{Geneva}
%\setmonofont{DejaVu Sans Mono}
%\setmonofont{Monaco}
\setmonofont[Scale=0.95]{Inconsolata}
\usepackage{fancyhdr}
\usepackage{makeidx}
2007-10-22 16:47:51 -04:00
\usepackage{fancyvrb}
\makeindex
\usepackage[dvipdfm,CJKbookmarks,bookmarks=true,bookmarksopen=true]{hyperref}
\hypersetup{
pdftitle={Ikarus Scheme User's Guide},
pdfauthor={Abdulaziz Ghuloum},
pdfkeywords={Scheme, R6RS, Compiler, Ikarus},
bookmarksnumbered=true,
pagebackref=true,
breaklinks=true,
% pdfview=FitH, % Or try pdfstartview={FitV}, This lead to uncorrect bookmarks
urlcolor=blue,
colorlinks=true,
citecolor=blue, %citeref's color
linkcolor=blue,
}
\usepackage{rotating}
\usepackage{multicol,ragged2e}
\renewcommand{\sectionmark}[1]{\markright{\thesection\ #1}}
\fancyhf{}
\fancyhead[LE,RO]{\bfseries\thepage}
\fancyhead[LO]{\bfseries\rightmark}
\fancyhead[RE]{\bfseries\leftmark}
\renewcommand{\headrulewidth}{0pt}
\renewcommand{\footrulewidth}{0pt}
\renewcommand{\headheight}{15pt}
\newcommand{\coderefpage}[1]{figure~\ref{#1}, p.~\pageref{#1}}
\newcommand{\figrefpage}[1]{figure~\ref{#1}, p.~\pageref{#1}}
\newcommand{\deflabelref}[2]{\texttt{#1}~(page~\pageref{#2})}
\newcommand{\defref}[1]{\deflabelref{#1}{#1}}
\newcommand{\coderef}[1]{figure~\ref{#1}}
\newcommand{\figref}[1]{figure~\ref{#1}}
\newcommand{\secref}[1]{section~\ref{#1}}
\newcommand{\rnrs}[1]{R$^{\mathrm{#1}}$RS}
\newcommand{\BoxedText}[2]{
\vspace{.05in}
\begin{center}
\begin{tabular}{|p{4.6in}|} {\large \emph{#1}} #2 \end{tabular}
\end{center}
\vspace{.05in}
}
\newcommand{\testfile}[2]{\texttt{tests-{\ref{#1}}-{#2}.ss}}
2007-10-22 16:47:51 -04:00
\newcommand{\idxtt}[1]{\index{#1 @ \texttt{#1}}\texttt{#1}}
\newenvironment{Language}
{
\begin{center}
\begin{tabular}{|p{4.5in}|} {\large \emph{Input Language:}}
\begin{center}
\begin{tabular}{@{}r@{ }c@{ }l@{}}
}{
\end{tabular}
\end{center}
\end{tabular}
\end{center}
}
2007-10-22 16:47:51 -04:00
\DefineVerbatimEnvironment{CodeInline}{Verbatim}
{gobble=0, xleftmargin=2em, xrightmargin=0em,
%numbers=left, numbersep=2mm,
frame=lines ,framerule=1pt}
\DefineVerbatimEnvironment{CodeInlineIdx}{Verbatim}
{gobble=0, xleftmargin=3em, xrightmargin=0em,
numbers=left, numbersep=1ex,
frame=lines ,framerule=1pt}
2007-11-19 23:19:42 -05:00
\newcommand{\idxlabeldefun}[5]{
\vspace{1ex}
\rule{\textwidth}{2pt}
{\phantomsection\index{#1@\texttt{#2}}\label{#3}{\Large\texttt{#4}}\hfill\textbf{#5}}\\}
2007-11-19 23:19:42 -05:00
\newcommand{\idxdefun}[3]{\idxlabeldefun{#1}{#2}{#1}{#2}{#3}}
\newcommand{\defun}[2]{\idxdefun{#1}{#1}{#2}}
\begin{document}
\frontmatter
\VerbatimFootnotes
\title{Ikarus Scheme User's Guide}
\author{Abdulaziz Ghuloum}
\pagestyle{empty}
\mbox{}
\vspace{3in}
\newcommand{\fstpagefont}[0]
{\fontspec{Charis SIL}}
{ \fontsize{66}{66} \fstpagefont{}
\noindent Ikarus Scheme\\ User's Guide\\ }
\noindent \rule{\textwidth}{6pt}
{\fontsize{18}{18} \fstpagefont{}
\mbox{} \hfill Version~0.0.4 }
\vfill
{ \fontsize{24}{24} \fstpagefont{}
\hfill{} Abdulaziz Ghuloum}
{ \fontsize{18}{18} \fstpagefont{}
\hfill{} \today \\}
\newpage
\mbox{}
\vfill{}
%\addcontentsline{toc}{section}{Copyrights}
\noindent
Ikarus Scheme User's Guide \hfill{} (Version 0.0.4)\\
Copyright \copyright{} 2007,2008 Abdulaziz Ghuloum\\
{\small
\noindent
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 3 as
published by the Free Software Foundation.
\\ \\
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
\\ \\
You should have received a copy of the GNU General Public License
along with this program. If not, see
\url{http://www.gnu.org/licenses/}.
}
% Permission is granted to copy, distribute and/or modify this
% document under the terms of the GNU Free Documentation License,
% Version 1.2 published by the Free Software
% Foundation; with no Invariant Sections, the Front-Cover Texts
% being \emph{``Ikarus Scheme User's Guide''}, and
% no Back-Cover Texts. A copy of the license is included in the
% section entitled ``GNU Free Documentation License''.
\newpage
\pagestyle{fancy}
\phantomsection
\addcontentsline{toc}{section}{Contents}
\tableofcontents
\newpage
\mainmatter
\setlength{\parindent}{0pt}
\setlength{\parskip}{2.0ex plus 0ex minus 0ex}
\chapter{Getting Started}
\section{Introduction}
Ikarus Scheme is an implementation of the Scheme programming
language. The preliminary release of Ikarus implements the majority
of the features found in the current standard, the
Revised$^\mathrm{6}$ report on the algorithmic language
Scheme\cite{r6rs} including full \rnrs{6} library and script syntax,
syntax-case, unicode strings, bytevectors, user-defined record
types, exception handling, conditions, and enumerations.
User-define \rnrs{6} libraries can be compiled in-memory on the fly
or compiled to disk and loaded in subsequent runs.
In addition to supporting \rnrs{6} (and most of the features found
in the the previous \rnrs{n} standards), Ikarus supports powerful
interprocess communication facilities including support for
communication with child processes via pipes and with remote
processes via TCP and UDP sockets. The facility also allows for
both synchronous and asynchronous communication so that a Scheme
program running in Ikarus can communicate with many processes
concurrently.
Ikarus also supports basic foreign function interface (FFI)
facilities. This allows the programmer to define accessors and
mutators for native data structures. It also allows for Scheme
programs to dynamically load any library found on the host machine.
Native procedures and Scheme procedures can call to each other by
the call-out and call-back facilities of Ikarus.
%The main purpose behind releasing Ikarus early is to give Scheme
%programmers the opportunity to experiment with the various new
%features that were newly introduced in \rnrs{6}. The most important
%of such features is the ability to structure large programs into
%libraries; where each library extends the language through
%procedural and syntactic abstractions. Many useful libraries can be
%written using the currently supported set of \rnrs{6} features
%including text processing tools, symbolic logic systems,
%interpreters and compilers, and many mathematical and scientific
%packages. It is my hope that this release will encourage the
%Scheme community to write and to share their most useful \rnrs{6}
%libraries.
2007-10-22 16:47:51 -04:00
\newpage
\section{Technology overview}
Ikarus Scheme provides the programmer with many advantages:
\textbf{Optimizing code generator:} The compiler's backend employs
state of the art technologies in code generation that produce fast
efficient machine code. When developing computationally intensive
programs, one is not constrained by using a slow interpreter.
\textbf{Fast incremental compilation:} Every library and script is
quickly compiled to native machine code. When developing large
software, one is not constrained by how slow the batch compiler
runs.
\textbf{Robust and fine-tuned standard libraries:} The standard
libraries are written such that they perform as much error checking
as required to provide a safe and fast runtime environment.
\textbf{Multi-generational garbage collector:} The
BiBOP\cite{dybvig:sm} based garbage collector used in Ikarus allows
the runtime system to expand its memory footprint as needed. The
entire 32-bit virtual address space could be used and unneeded
memory is released back to the operating system.
\textbf{32-bit and 64-bit computing:} Ikarus supports both the
Intel-x86 and the AMD-64 architectures. 64-bit computing allows the
programmer to utilize larger address space (larger than 4GB) and
provides a greater range for fixnums (61-bit fixnums). Running in
32-bit mode, however, makes more efficient utilization of resources
due to the smaller memory footprint for most data structures.\\
(64-bit support is experimental at this stage of development.)
\textbf{Supports many operating systems:} Ikarus runs on the most
popular and widely used operating systems for servers and personal
computers. The supported systems include Mac~OS~X,
GNU/Linux, FreeBSD, NetBSD, and Microsoft Windows.
\section{System requirements}
This section provides an overview of the hardware and software
requirements needed for running Ikarus.
\subsection{Hardware}
Ikarus Scheme runs in 32-bit mode on the IA-32 (\emph{x86})
architecture supporting SSE2 extensions. It also runs in 64-bit
mode on platforms supporing the AMD-64 architecture. This includes
the Athlon 64, Sempron 64, and Turion 64 processors from AMD and the
Pentium 4, Xeon, Celeron, Pentium M, Core, and Core2 processors from
Intel. Ikarus does not run on Intel Pentium III or earlier
processors.
The Ikarus compiler generates SSE2 instructions to handle Scheme's
IEEE floating point representation (\emph{flonums}) for inexact
numbers.
\subsection{Operating systems}
Ikarus is tested under the following operating systems:
\begin{itemize}
\item Mac OS X version 10.4 and 10.5.
\item Linux 2.6.18 (Debian, Fedora, Gentoo, and Ubuntu).
\item FreeBSD version 6.2.
\item NetBSD version 3.1.
\item Microsoft Windows XP (using Cygwin 1.5.24).
\end{itemize}
\subsection{Additional software}
\begin{itemize}
\item\textbf{GMP:} Ikarus uses the GNU Multiple Precision Arithmetic
Library (GMP) for some bignum arithmetic operations. To build
Ikarus from scratch, GMP version 4.2 or better must be installed
along with the required header files. Pre-built GMP packages are
available for most operating systems. Alternatively, GMP can be
downloaded from \\
\url{http://gmplib.org/}.
\item\textbf{FFI:}
The \texttt{libffi} library (version \texttt{3.0.6}) can be utilized
to enable Scheme procedures to call and be called from native
procedure (see Chapter~\ref{chapter:foreign} for details).
To enable
\texttt{libffi} while building Ikarus, one must pass
2008-11-15 11:03:22 -05:00
\texttt{--enable-libffi} to the \texttt{configure} script along with
the paths to \texttt{libffi}'s include and lib directory.
The \texttt{libffi} library can be obtained from
\url{http://sourceware.org/libffi/}.
FFI support is optional. It is not required if all one needs is
writing pure Scheme code.
\item\textbf{GCC:} The GNU C Compiler is required to build the Ikarus
executable (e.g. the garbage collector, loader, and OS-related
runtime). GCC versions 4.1 and 4.2 were successfully used to build
Ikarus.
2007-11-19 23:19:42 -05:00
\item\textbf{Autoconf and Automake:} The GNU Autoconf (version 2.61)
and GNU Automake (version 1.10) tools are required if one
wishes to modify the Ikarus source base. They are not
required to build the official release of Ikarus.
\item\textbf{\XeLaTeX{}:} The \XeLaTeX\ typesetting system is
required for building the documentation. \XeLaTeX\ (and \XeTeX) is
an implementation of the \LaTeX\ (and \TeX) typesetting system.
\XeLaTeX\ can be obtained from \url{http://scripts.sil.org/xetex}
and is included with \TeX-Live\footnote{
\url{http://tug.org/texlive/}} and and
Mac-\TeX\footnote{\url{http://tug.org/mactex/}} distributions.
\end{itemize}
\section{Installation}
If you are familiar with installing Unix software on your system,
then all you need to know is that Ikarus uses the standard
installation method found in most other Unix software. Simply run
the following commands from the shell:
\begin{verbatim}
$ tar -zxf ikarus-n.n.n.tar.gz
$ cd ikarus-n.n.n
$ ./configure [--prefix=path] [CFLAGS=-I/dir] [LDFLAGS=-L/dir]
$ make
$ make install
$
\end{verbatim}
The rest of this section describes the build process in more
details. It is targeted to users who are unfamiliar with steps
mentioned above.
\subsection{Installation details}
\begin{enumerate}
\item Download the Ikarus source distribution. The source is
distributed as a \texttt{gzip}-compressed \texttt{tar} file
(\texttt{ikarus-n.n.n.tar.gz} where \texttt{n.n.n} is a 3-digit
number indicating the current revision). The latest revision can be
downloaded from the following URL:\\
\url{http://www.cs.indiana.edu/~aghuloum/ikarus/}
\item Unpack the source distribution package. From your shell
command, type:
\begin{verbatim}
$ tar -zxf ikarus-n.n.n.tar.gz
$
\end{verbatim}
This creates the base directory \texttt{ikarus-n.n.n}.
\item Configure the build system by running the \texttt{configure}
script located in the base directory. To do this, type the
following commands:
\begin{verbatim}
$ cd ikarus-n.n.n
$ ./configure
checking build system type... i386-apple-darwin8.10.1
checking host system type... i386-apple-darwin8.10.1
...
configure: creating ./config.status
config.status: creating Makefile
config.status: creating src/Makefile
config.status: creating scheme/Makefile
config.status: creating doc/Makefile
config.status: executing depfiles commands
$
\end{verbatim}
This configures the system to be built then installed in the
system-wide location (binaries are installed in
\texttt{/usr/local/bin}) . If you wish to install it
in another location (e.g. in your home directory), you can supply
a \texttt{--prefix} location to the \texttt{configure} script as
follows:
\begin{verbatim}
$ ./configure --prefix=/path/to/installation/location
\end{verbatim}
The \texttt{configure} script will fail if it cannot locate the
location where GMP is installed. If running \texttt{configure}
fails to locate GMP, you should supply the location in which the GMP
header file, \texttt{gmp.h}, and the GMP library file,
\texttt{libgmp.so}, are installed. This is done by supplying the
two paths in the \texttt{CFLAGS} and \texttt{LDFLAGS} arguments:
\begin{verbatim}
$ ./configure CFLAGS=-I/path/to/include LDFLAGS=-L/path/to/lib
\end{verbatim}
If you wish to enable support for accessing and calling to/from
foreign libraries, you need to configure Ikarus with the
\texttt{--enable-libffi} option and supply the approriate
\texttt{CFLAGS} and \texttt{LDFLAGS} as needed.
\begin{verbatim}
$ ./configure --enable-libffi \
[CFLAGS=/path/to/ffi.h] \
[LDFLAGS=/path/to/libffi.so|.dylib|.dll]
\end{verbatim}
\item Build the system by running:
\begin{verbatim}
$ make
\end{verbatim}
This performs two
tasks. First, it builds the \texttt{ikarus} executable from the C
files located in the \texttt{src} directory. It then uses the
\texttt{ikarus} executable and the pre-built
\texttt{ikarus.boot.orig} boot file to rebuild the Scheme boot image
file \texttt{ikarus.boot} from the Scheme sources located in the
\texttt{scheme} directory.
\item Install Ikarus by typing:
\begin{verbatim}
$ make install
\end{verbatim}
If you are installing Ikarus in a system-wide location, you might
need to have administrator privileges (use the \texttt{sudo} or
\texttt{su} commands).
\item Test that Ikarus runs from the command line.
\begin{verbatim}
$ ikarus
Ikarus Scheme version 0.0.4
Copyright (c) 2006-2008 Abdulaziz Ghuloum
>
\end{verbatim}
If you get the prompt, then Ikarus was successfully installed on
your system. You may need to update the \texttt{PATH} variable in
your environment to contain the directory in which the
\texttt{ikarus} executable was installed.
Do not delete the \texttt{ikarus-n.n.n} directory from which you
configured, built, and installed Ikarus. It will be needed if you
decide at a later time to uninstall Ikarus.
\end{enumerate}
\subsection{Uninstalling Ikarus}
To uninstall Ikarus, use the following steps:
\begin{verbatim}
$ cd path/to/ikarus-n.n.n
$ make uninstall
$
\end{verbatim}
\section{\index{Command-line switches}Command-line switches}
The \texttt{ikarus} executable recognizes a few command-line
switches that influence how Ikarus starts.
\begin{itemize}
\item \texttt{ikarus -h}
The presence of the \texttt{-h} flag causes \texttt{ikarus} to
display a help message then exits. The help message summarizes the
command-line switches. No further action is performed.
\item \texttt{ikarus -b path/to/boot/file.boot}
The \texttt{-b} flag (which requires an extra argument) directs
\texttt{ikarus} to use the specified boot file as the initial system
boot file. \index{Boot files} The boot file is a binary file that
contains all the code and data of the Scheme system. In the absence
2007-11-19 23:19:42 -05:00
of \texttt{-b} flag, the executable will use the default boot file.
Running \texttt{ikarus~-h} shows the location where the default boot
file was installed.
The rest of the command-line arguments are recognized by the
standard Scheme run time system. They are processed after the
boot file is loaded.
\item \texttt{ikarus files ... --r6rs-script script-file arguments ...}
\index{R6RS Script@\rnrs{6} Script} The \texttt{--r6rs-script}
argument instructs Ikarus that the supplied file is an \rnrs{6}
script. The optional list of \texttt{files} must be paths to files,
each containing a set of libraries that Ikarus must load,
sequentially, before running the \rnrs{6} script
\texttt{script-file}. See Section~\ref{sec:scripts} for a short
introduction to writing \rnrs{6} scripts. The script file name and
any additional optional \texttt{arguments} can be obtained by
calling the \idxtt{command-line} procedure.
\begin{verbatim}
$ cat test.ss
(import (rnrs))
(write (command-line))
(newline)
$ ikarus --r6rs-script test.ss hi there
("test.ss" "hi" "there")
$
\end{verbatim}
\item \texttt{ikarus files ... [-- arguments ...]}
The lack of an \texttt{--r6rs-script} argument causes Ikarus to
start in interactive mode. Each of the \texttt{files} is first
loaded, in the interaction environment. The interaction environment
initially contains all the bindings exported from the
\texttt{(ikarus)} library (see Chapter~\ref{chapter:ikarus}). The
optional \texttt{arguments} following the \texttt{--} marker can be
obtained by calling the \texttt{command-line} procedure. In
interactive mode, the first element of the returned list will be the
string \texttt{"*interactive*"}, corresponding to the script name in
\rnrs{6}-script mode.
\BoxedText{Note:}{The interactive mode is intended for quickly
experimenting with the built-in features. It is intended neither
for developing applications nor for writing any substantial pieces
of code.}
\end{itemize}
\section{Using \texttt{scheme-script}}
2007-11-19 23:19:42 -05:00
Scheme scripts can be executed using the
\texttt{ikarus~--r6rs-script~\textit{script-name}} command as
described in the previous section. For convenience, Ikarus
follows the \rnrs{6} recommendations and installs a wrapper program
called \texttt{scheme-script}. Typically, a script you write would
start with a \texttt{\#!}\ line that directs your operating system
to the interpreter used to evaluate the script file. The following
example shows a very simple script that uses the
\texttt{scheme-script} command.
\begin{CodeInline}
#!/usr/bin/env scheme-script
(import (rnrs))
(display "Hello World\n")
\end{CodeInline}
If the above script was placed in a file called
\texttt{hello-world}, then one can make it executable using the
\texttt{chmod} Unix command.
2007-11-19 23:19:42 -05:00
\begin{verbatim}
$ cat hello-world
#!/usr/bin/env scheme-script
(import (rnrs))
(display "Hello World\n")
$ chmod 755 hello-world
$ ./hello-world
Hello World
$
\end{verbatim}
\BoxedText{Under Mac OS X,}{if a script name ends with the
\texttt{.command} extension, then it can be executed from the Finder
by double-clicking on it. This brings up a terminal window in which
the script is executed. The \texttt{.command} extension can be
hidden from the \emph{Get Info} item from the Finder's File menu.}
2007-11-19 23:19:42 -05:00
\section{Mapping library names to file names}
2007-11-19 23:19:42 -05:00
The name of an \rnrs{6} library consists of a non-empty list of
identifiers (symbols), followed by an optional version number. All
of the standard \rnrs{6} libraries are built into Ikarus, thus
importing any one of them does not require any special action other
than listing the library name in the \texttt{import} part of a
library or a script. The same holds for the \texttt{(ikarus)}
library (chapter~\ref{chapter:ikarus},
page~\pageref{chapter:ikarus}).
When importing a user library, Ikarus uses a simple mechanism
to map library names to file names. A library name is converted to
a file path by joining the library identifiers with a path
separator, e.g. \verb|"/"|.
\begin{center}
\begin{tabular}{lcl}
Library Name & \hspace{2em}$\Rightarrow$\hspace{2em} & File name \\
\hline
\verb|(foo)| & $\Rightarrow$ & \verb|foo| \\
\verb|(foo bar)| & $\Rightarrow$ & \verb|foo/bar| \\
\verb|(foo bar baz)| & $\Rightarrow$ & \verb|foo/bar/baz|
\end{tabular}
\end{center}
Having mapped a library name to a file path, Ikarus attempts to
locate that file in one of several locations. The locations
attempted depend on two settings: the search path and the file
extension set (e.g., \verb|.sls|, \verb|.ss|, \verb|.scm|, etc.).
First, Ikarus attempts to locate the file in the current working
directory from which Ikarus was invoked. In the current working
directory, Ikarus enumerates all file extensions first before
searching other locations. If the file is not found in the current
directory, Ikarus tries to find it in the Ikarus library directory.
The Ikarus library directory is determined when Ikarus is installed
(based on the \texttt{--prefix} argument that was passed to the
\texttt{configure} script). If Ikarus failes to locate the library
file, it raises an exception and exits.
%See
%Chapter~\ref{chapter:contributed} for more details about the library
%locations.
\BoxedText{Tip:}{Use simple library names for the libraries that
you define. Library names that contain non-printable characters,
complex punctuations, or unicode may pose a challenge for some
operating systems. If Ikarus cannot find a library, it will raise
an error listing the locations in which it looked, helping you move
the library file to a place where Ikarus can find it.}
\section{Writing cross-implementation libraries}
When searching for a library, Ikarus appends an extension (e.g.,
\verb|.ss|) to the appropriate file name (e.g., \verb|foo/bar|).
The initial set of file extensions are: \\
\verb|/main.ikarus.sls|,
\verb|/main.ikarus.ss|, \verb|/main.ikarus.scm|,
\verb|/main.sls|, \verb|/main.ss|, \verb|/main.scm|,
\verb|.ikarus.sls|,
\verb|.ikarus.ss|, \verb|.ikarus.scm|,
\verb|.sls|, \verb|.ss|, and \verb|.scm|.
The list of file extensions are searched sequentially. As a
consequence, files ending with the \verb|.ikarus.*| extensions are
given precedence over files that have generic Scheme extensions.
The rationale for this behavior is to facilitate writing
cross-implementation libraries: ones that take advantage of
implementation-specific features, while at the same time
provide a fail-safe alternative for other \rnrs{6}
implementations.
Consider for example a program which would like to use the
\verb|pretty-print| procedure to format some code, and suppose
2008-11-15 11:03:22 -05:00
further that pretty printing is just a nice add-on (e.g., using
\verb|write| suffices, but pretty-printing is \emph{just prettier})
Ikarus exports a good pretty-printing facility in its
\verb|(ikarus)| library. However, since \verb|pretty-print| is not
a standard procedure, a program that uses it would be rendered
unportable to other \rnrs{6} Scheme implementations.
The programmer can put the \verb|.ikarus.*| extensions to use in
this situation. First, the programmer writes two versions of a
\verb|(pretty-printing)| library: one for use by Ikarus, and one
portable for other implementations.
\begin{CodeInline}
(library (pretty-printing) ;;; this is pretty-printing.ikarus.ss
(export pretty-print) ;;; can only be used by Ikarus
(import (only (ikarus) pretty-print)))
\end{CodeInline}
\begin{CodeInline}
(library (pretty-printing) ;;; this is pretty-printing.sls
(export pretty-print) ;;; *portable* though not very pretty.
(import (rnrs)) ;;; for any other implementation
(define (pretty-print x port)
(write x port)
(newline port)))
\end{CodeInline}
The \verb|/main.*| extensions serve a different purpose. Often
times, a set of libraries are distributed together as a package and
it is convenient for the programmer to group related files in
directories. If a package contains the libraries \verb|(foo)|,
\verb|(foo core)|, and \verb|(foo compat)|, then putting all such
library files together in one directory makes it easier to
package, install, and remove these libraries en masse. The layout
of the package would look like:
\DefineVerbatimEnvironment{Code}{Verbatim}
{%baselinestretch=1.1,
%frame=single,
%framerule=0.5pt,
commandchars=\\\{\}}
\begin{Code}
foo/README : {\textrm{ignored by Ikarus}}
foo/COPYING :
foo/main.ss : (foo) {\textrm{implementation independent}}
foo/core.ss : (foo core)
foo/compat.ss : (foo compat) {\textrm{default \rnrs{6} library}}
foo/compat.ikarus.ss : {\textrm{specific for Ikarus}}
foo/compat.mzscheme.ss : {\textrm{specific for MzScheme}}
\end{Code}
By default, running the \texttt{configure} script
installs a set of contributed libraries into the
\verb|/usr/local/lib/ikarus| directory. If a \verb|--prefix DIR|
argument was supplied to \texttt{configure}, then the libraries are
installed in the \verb|DIR/ikarus/lib| directory.
You may install additional libraries into the Ikarus library
directory. Doing so makes them available for \texttt{import} into
other libraries and script regardless of where the importing code is
located or the current directory in which it is executed.
\section{Defining \texttt{IKARUS\_LIBRARY\_PATH}}
\index{ikarus library path@\texttt{IKARUS\_LIBRARY\_PATH}}
There may be situations in which you may wish to install your own
libraries into a different location. For example, you may not have
sufficient administrative privileges to write to the system
directory, or you may wish to keep your own libraries separate from
the standard libraries. Whatever the reason is, your can store your
library files in any location you want and set up the
\verb|IKARUS_LIBRARY_PATH| environment variable to point to these
locations. The value of \verb|IKARUS_LIBRARY_PATH| is a
colon-separated list of directories in which Ikarus will search.
For example, suppose your script imports the
\texttt{(streams~derived)} library. First, Ikarus will map the
library name to the file path \verb|streams/derived.ss|. Suppose
that Ikarus was installed using the \verb|--prefix /usr/local|
configuration option, and suppose further that the value of
\verb|IKARUS_LIBRARY_PATH| is set by the user to be
\verb|/home/john/ikarus-libraries:/home/john/srfis|. Ikarus will
search in the following locations in sequence until it finds the
file it is looking for.
\begin{verbatim}
./streams/derived.ss
/home/john/ikarus-libraries/streams/derived.ss
/home/john/srfis/streams/derived.ss
/usr/local/lib/ikarus/streams/derived.ss
\end{verbatim}
The method in which the value of \verb|IKARUS_LIBRARY_PATH| is
defined is typically shell dependant. If you use GNU Bash, you
typically set the values of environment variables in the
\verb|~/.bash_profile| or \verb|~/.bashrc| file by adding the
following lines:
\begin{verbatim}
IKARUS_LIBRARY_PATH=/path/to/some/directory:/and/another
export IKARUS_LIBRARY_PATH
\end{verbatim}
2007-10-22 16:47:51 -04:00
\chapter{\rnrs{6} Crash Course}
The major difference between \rnrs{5} and \rnrs{6} is the way
in which programs are loaded and evaluated.
In \rnrs{5}, Scheme implementations typically start as an
interactive session (often referred to as the REPL, or
read-eval-print-loop). Inside the interactive session, the user
enters definitions and expressions one at a time using the keyboard.
Files, which also contain definitions and expressions, can be loaded
and reloaded by calling the \texttt{load} procedure. The
environment in which the interactive session starts often contains
implementation-specific bindings that are not found \rnrs{5} and
users may redefine any of the initial bindings. The semantics of
2007-10-22 16:47:51 -04:00
loading a file depends on the state of the environment at the time
the file contents are evaluated.
2007-11-19 23:19:42 -05:00
\index{R6RS Script@\rnrs{6} Script!Import}
%
2007-10-22 16:47:51 -04:00
\rnrs{6} differs from \rnrs{5} in that it specifies how \emph{whole
programs}, or scripts, are compiled and evaluated. An \rnrs{6}
script is closed in the sense that all the identifiers found in the
body of the script must either be defined in the script or imported
from a library. \rnrs{6} also specifies how \emph{libraries} can be
2007-11-19 23:19:42 -05:00
defined and used. While files in \rnrs{5} are typically
\emph{loaded} imperatively into the top-level environments, \rnrs{6}
libraries are \emph{imported} declaratively in scripts and in other
\rnrs{6} libraries.
2007-10-22 16:47:51 -04:00
\section{\label{sec:scripts}Writing a simple script}
An \rnrs{6} script is a set of definitions and expressions preceded
2007-10-22 16:47:51 -04:00
by an \texttt{import} form. The \texttt{import} form specifies
the language (i.e. the variable and keyword bindings) in which the
library body is written. A very simple example of an \rnrs{6}
script is listed below.
\index{Examples!Hello World}
2007-10-22 16:47:51 -04:00
\begin{CodeInline}
2007-11-19 23:19:42 -05:00
#!/usr/bin/env scheme-script
2007-10-22 16:47:51 -04:00
(import (rnrs))
(display "Hello World!\n")
\end{CodeInline}
The first line imports the \texttt{(rnrs)} library. All the
bindings exported from the \texttt{(rnrs)} library are made
available to be used within the body of the script.
2007-10-22 16:47:51 -04:00
The exports of the \texttt{(rnrs)} library include variables
(e.g. \texttt{cons}, \texttt{car}, \texttt{display}, etc.) and
keywords (e.g. \texttt{define}, \texttt{lambda}, \texttt{quote},
etc.). The second line displays the string \texttt{Hello World!}
followed by a new line character.
In addition to expressions, such as the call to \texttt{display} in
the previous example, a script may define some variables. The
script below defines the variable \texttt{greeting} and calls the
procedure bound to it.
\begin{CodeInline}
2007-11-19 23:19:42 -05:00
#!/usr/bin/env scheme-script
2007-10-22 16:47:51 -04:00
(import (rnrs))
(define greeting
(lambda ()
(display "Hello World!\n")))
2007-10-22 16:47:51 -04:00
(greeting)
\end{CodeInline}
Additional keywords may be defined within a script. In the example
below, we define the \texttt{(do-times n exprs ...)} macro that
evaluates the expressions \texttt{exprs} \texttt{n} times. Running
the script displays \texttt{Hello World} 3 times.
\newpage
2007-10-22 16:47:51 -04:00
\begin{CodeInline}
2007-11-19 23:19:42 -05:00
#!/usr/bin/env scheme-script
2007-10-22 16:47:51 -04:00
(import (rnrs))
2007-10-22 16:47:51 -04:00
(define greeting
(lambda ()
(display "Hello World!\n")))
2007-10-22 16:47:51 -04:00
(define-syntax do-times
(syntax-rules ()
[(_ n exprs ...)
(let f ([i n])
(unless (zero? i)
exprs ...
(f (- i 1))))]))
2007-10-22 16:47:51 -04:00
(do-times 3 (greeting))
\end{CodeInline}
2007-10-22 16:47:51 -04:00
\section{Writing simple libraries}
2007-10-22 16:47:51 -04:00
A script is intended to be a small piece of the program---useful
abstractions belong to libraries. The \texttt{do-times} macro that
was defined in the previous section may be useful in places other
than printing greeting messages. So, we can create a small library,
\texttt{(iterations)} that contains common iteration forms.
2007-10-22 16:47:51 -04:00
An \rnrs{6} library form is made of four essential parts: (1) the
library name, (2) the set of identifiers that the library exports,
(3) the set of libraries that the library imports, and (4) the body
of the library.
2007-10-22 16:47:51 -04:00
The library name can be any non-empty list of identifiers.
\rnrs{6}-defined libraries includes \texttt{(rnrs)},
\texttt{(rnrs~unicode)}, \texttt{(rnrs~bytevectors)}, and so on.
2007-10-22 16:47:51 -04:00
The library exports are a set of identifiers that are made available
to importing libraries. Every exported identifier must be bound: it
may either be defined in the library or imported using the
\texttt{import} form. Library exports include variables, keywords,
record names, and condition names.
2007-10-22 16:47:51 -04:00
Library imports are similar to script imports: they specify the set
of libraries whose exports are made visible within the body of the
library.
2007-10-22 16:47:51 -04:00
\index{Invoke}
The body of a library contains definitions (variable, keyword,
record, condition, etc.) followed by an optional set of expressions.
The expressions are evaluated for side effect when needed.
2007-10-22 16:47:51 -04:00
The \texttt{(iteration)} library may be written as follows:
\begin{CodeInline}
(library (iteration)
(export do-times)
(import (rnrs))
2007-10-22 16:47:51 -04:00
(define-syntax do-times
(syntax-rules ()
[(_ n exprs ...)
(let f ([i n])
(unless (zero? i)
exprs ...
(f (- i 1))))])))
\end{CodeInline}
To use the \texttt{(iteration)} library in our script, we add the
name of the library to the script's \texttt{import} form. This
makes all of \texttt{(iteration)}'s exported identifiers, e.g.
\texttt{do-times}, visible in the body of the script.
\begin{CodeInline}
2007-11-19 23:19:42 -05:00
#!/usr/bin/env scheme-script
2007-10-22 16:47:51 -04:00
(import (rnrs) (iteration))
2007-10-22 16:47:51 -04:00
(define greeting
(lambda ()
(display "Hello World!\n")))
2007-10-22 16:47:51 -04:00
(do-times 3 (greeting))
\end{CodeInline}
\section{\rnrs{6} record types}
2007-10-22 16:47:51 -04:00
\rnrs{6} provides ways for users to define new types, called record
types. A record is a fixed-size data structure with a unique type
(called a record type). A record may have any finite number of
fields that hold arbitrary values. This section briefly describes
what we expect to be the most commonly used features of the record
system. Full details are in the \rnrs{6} Standard Libraries
document\cite{r6rs:lib}.
2007-10-22 16:47:51 -04:00
\subsection{Defining new record types}
To define a new record type, use the \texttt{define-record-type}
form. For example, suppose we want to define a new record type for
describing points, where a point is a data structure that has two
fields to hold the point's $x$ and $y$ coordinates. The following
definition achieves just that:
\begin{CodeInline}
(define-record-type point
(fields x y))
\end{CodeInline}
The above use of \texttt{define-record-type} defines the following
procedures automatically for you:
\begin{itemize}
\item The constructor \texttt{make-point} that takes two arguments,
\texttt{x} and \texttt{y} and returns a new record whose type is
point.
\item The predicate \texttt{point?}\ that takes an arbitrary value
and returns \texttt{\#t} if that value is a point, \texttt{\#f}
otherwise.
\item The accessors \texttt{point-x} and \texttt{point-y} that,
given a record of type point, return the value stored in the
\texttt{x} and \texttt{y} fields.
\end{itemize}
Both the \texttt{x} and \texttt{y} fields of the \texttt{point}
record type are \emph{immutable}, meaning that once a record is
created with specific \texttt{x} and \texttt{y} values, they cannot
be changed later. If you want the fields to be \emph{mutable}, then
you need to specify that explicitly as in the following example.
2007-10-22 16:47:51 -04:00
\newpage
\begin{CodeInline}
(define-record-type point
(fields (mutable x) (mutable y)))
\end{CodeInline}
This definition gives us, in addition to the constructor, predicate,
and accessors, two additional procedures:
\begin{itemize}
\item The mutators \texttt{point-x-set!} and \texttt{point-y-set!} that,
given a record of type point, and a new value, sets the value stored in the
\texttt{x} field or \texttt{y} field to the new value.
\end{itemize}
\BoxedText{Note:}{Records in Ikarus have a printable representation
in order to enable debugging programs that use records. Records are
printed in the \texttt{\#[type-name field-values ...]} notation.
For example, \texttt{(write (make-point 1 2))} produces
\texttt{\#[point 1 2]}.}
\subsection{Extending existing record types}
A record type may be extended by defining new variants of a record
with additional fields. In our running example, suppose we want
to define a \texttt{colored-point} record type that, in addition to
being a \texttt{point}, it has an additional field: a \emph{color}.
A simple way of achieving that is by using the following record
definition:
\begin{CodeInline}
(define-record-type cpoint
(parent point)
(fields color))
\end{CodeInline}
Here, the definition of \texttt{cpoint} gives us:
\begin{itemize}
\item A constructor \texttt{make-cpoint} that takes three arguments
(\texttt{x}, \texttt{y}, and \texttt{color} in that order) and returns a
\texttt{cpoint} record.
\item A predicate \texttt{cpoint?}\ that takes a single argument and
determines whether the argument is a \texttt{cpoint} record.
\item An accessor \texttt{cpoint-color} that returns the value of
the \texttt{color} field of a \texttt{cpoint} object.
\end{itemize}
All procedures that are applicable to records of type
\texttt{point} (\texttt{point?}, \texttt{point-x},
\texttt{point-y}) are also applicable to records of type
\texttt{cpoint} since a \texttt{cpoint} is also a \texttt{point}.
\subsection{Specifying custom constructors}
The record type definitions explained so far use the default
constructor that takes as many arguments as there are fields and
returns a new record type with the values of the fields initialized
to the arguments' values. It is sometimes necessary or convenient
to provide a constructor that performs more than the default
constructor. For example, we can modify the definition of our
\texttt{point} record so that the constructor takes either
no arguments, in which case it would return a point located at the
origin, or two arguments specifying the $x$ and $y$ coordinates. We
use the \texttt{protocol} keyword for specifying such constructor as
in the following example:
\begin{CodeInline}
(define-record-type point
(fields x y)
(protocol
(lambda (new)
(case-lambda
[(x y) (new x y)]
[() (new 0 0)]))))
\end{CodeInline}
The protocol here is a procedure that takes a constructor procedure
\texttt{new} (\texttt{new} takes as many arguments as there are
fields.) and returns the desired custom constructor that we want
(The actual constructor will be the value of the
\texttt{case-lambda} expression in the example above).
Now the constructor \texttt{make-point} would either take two
arguments which constructs a \texttt{point} record as before, or no
arguments, in which case \texttt{(new 0 0)} is called to construct a
point at the origin.
Another reason why one might want to use custom constructors is to
precompute the initial values of some fields based on the values of
other fields. An example of this case is adding a \texttt{distance}
field to the record type which is computed as
$d = \sqrt{x^2+y^2}$. The protocol in this case may be defined as:
\begin{CodeInline}
(define-record-type point
(fields x y distance)
(protocol
(lambda (new)
(lambda (x y)
(new x y (sqrt (+ (expt x 2) (expt y 2))))))))
\end{CodeInline}
Note that derived record types need not be modified when additional
fields are added to the parent record type. For example, our
\texttt{cpoint} record type still works unmodified even after we
added the new \texttt{distance} field to the parent.
Calling \texttt{(point-distance (make-cpoint 3 4 \#xFF0000))}
returns \texttt{5.0} as expected.
\subsection{Custom constructors for derived record types}
Just like how base record types (e.g. \texttt{point} in the running
example) may have a custom constructor, derived record types can
also have custom constructors that do other actions. Suppose that
you want to construct \texttt{cpoint} records using an optional
color that, if not supplied, defaults to the value 0. To do so, we
supply a \texttt{protocol} argument to \texttt{define-record-type}.
The only difference here is that the procedure \texttt{new} is a
\emph{curried} constructor. It first takes as many arguments as the
constructor of the parent record type, and returns a procedure that
takes the initial values of the new fields.
In our example, the constructor for the \texttt{point} record type
takes two arguments. \texttt{cpoint} extends \texttt{point} with
one new field. Therefore, \texttt{new} in the definition below
first takes the arguments for \texttt{point}'s constructor, then
takes the initial color value. The definition below shows how the
custom constructor may be defined.
\newpage
\begin{CodeInline}
(define-record-type cpoint
(parent point)
(fields color)
(protocol
(lambda (new)
(case-lambda
[(x y c) ((new x y) c)]
[(x y) ((new x y) 0)]))))
\end{CodeInline}
\section{Exception handling}
The procedure \texttt{with-exception-handler} allows the programmer
to specify how to handle exceptional situations. It takes two
procedures as arguments:
\begin{itemize}
\item An exception handler which is a procedure that takes a
single argument, the object that was raised.
\item A body thunk which is a procedure with no arguments whose body
is evaluated with the exception handler installed.
\end{itemize}
In addition to installing exception handlers, \rnrs{6} provides two
ways of raising exceptions: \texttt{raise} and
\texttt{raise-continuable}. We describe the
\texttt{raise-continuable} procedure
first since it's the simpler of the two.
For the code below, assume that \texttt{print} is defined as:
\begin{CodeInline}
(define (print who obj)
(display who)
(display ": ")
(display obj)
(newline))
\end{CodeInline}
The first example, below, shows how a simple exception handler is
installed. Here, the exception handler prints the object it
receives and returns the symbol \texttt{there}. The call to
\texttt{raise-continuable} calls the exception handler, passing it
the symbol \texttt{here}. When the handler returns, the returned
value becomes the value of the call to \texttt{raise-continuable}.
\begin{CodeInline}
(with-exception-handler
(lambda (obj) ;;; prints
(print "handling" obj) ;;; handling: here
'there) ;;; returned: there
(lambda ()
(print "returned" (raise-continuable 'here))))
\end{CodeInline}
Exceptional handlers may nest, and in that case, if an exception is
raised while evaluating an inner handler, the outer handler is
called as the following example illustrates:
\begin{CodeInline}
(with-exception-handler
(lambda (obj) ;;; prints
(print "outer" obj) ;;; inner: here
'outer) ;;; outer: there
(lambda () ;;; returned: outer
(with-exception-handler
(lambda (obj)
(print "inner" obj)
(raise-continuable 'there))
(lambda ()
(print "returned" (raise-continuable 'here))))))
\end{CodeInline}
In short, \texttt{with-exception-handler} binds an exception handler
within the dynamic context of evaluating the thunk, and
\texttt{raise-continuable} calls it.
The procedure \texttt{raise} is similar to
\texttt{raise-continuable} except that if the handler returns, a new
exception is raised, calling the next handler in sequence until the
list of handlers is exhausted.
2007-10-22 16:47:51 -04:00
\begin{CodeInline}
(call/cc ;;; prints
(lambda (escape) ;;; inner: here
(with-exception-handler ;;; outer: #[condition ---]
(lambda (obj) ;;; returns
(print "outer" obj) ;;; 12
(escape 12))
(lambda ()
(with-exception-handler
(lambda (obj)
(print "inner" obj)
'there)
(lambda ()
(print "returned" (raise 'here))))))))
\end{CodeInline}
Here, the call to \texttt{raise} calls the inner exception handler,
which returns, causing \texttt{raise} to re-raise a non-continuable
exception to the outer exception handler. The outer exception
handler then calls the escape continuation.
The following procedure provides a useful example of using the
exception handling mechanism. Consider a simple definition of the
procedure \texttt{configuration-option} which returns the value
associated with a key where the key/value pairs are stored in an
association list in a configuration file.
\begin{CodeInline}
(define (configuration-option filename key)
(cdr (assq key (call-with-input-file filename read))))
\end{CodeInline}
Possible things may go wrong with calling
\texttt{configuration-option} including errors opening the file,
errors reading from the file (file may be corrupt), error in
\texttt{assq} since what's read may not be an association list, and
error in \texttt{cdr} since the key may not be in the association
list. Handling all error possibilities is tedious and error prone.
Exceptions provide a clean way of solving the problem. Instead of
guarding against all possible errors, we install a handler that
suppresses all errors and returns a default value if things go
wrong. Error handling for \texttt{configuration-option} may be
added as follows:
\begin{CodeInline}
(define (configuration-option filename key default)
(define (getopt)
(cdr (assq key (call-with-input-file filename read))))
(call/cc
(lambda (k)
(with-exception-handler
(lambda (_) (k default))
getopt))))
\end{CodeInline}
\chapter{\label{chapter:ikarus}The \texttt{(ikarus)} library}
In addition to the libraries listed in the \rnrs{6} standard, Ikarus
contains the \texttt{(ikarus)} library which provides additional
useful features. The \texttt{(ikarus)} library is a composite
library---it exports a superset of all the supported bindings of
\rnrs{6}. While not all of the exports of \texttt{(ikarus)} are
documented at this time, this chapter attempts to describe a few of
2008-11-15 11:03:22 -05:00
these useful extensions. Extensions to Scheme's lexical syntax are
also documented.
\idxlabeldefun{\#"!ikarus}{\#"!ikarus}{shebang-ikarus}{\#!ikarus}{reader syntax}
Ikarus extends Scheme's lexical syntax (\rnrs{6}~Chapter~4) in a
variety of ways including:\\
$\bullet$ end-of-file marker, \deflabelref{\#!eof}{shebang-eof}\\
$\bullet$ gensym syntax, \deflabelref{\#\{gensym\}}{gensym syntax}\\
$\bullet$ graph syntax, \deflabelref{\#nn= \#nn\#}{graph-syntax}
The syntax extensions are made available by default on all input
ports, until the \texttt{\#!r6rs} token is read. Thus, reading the
\texttt{\#!r6rs} token disables all extensions to the lexical syntax
on the specific port, and the \texttt{\#!ikarus} enables them again.
If you are writing code that is intended to be portable across
different Scheme implementations, we recommend adding the
\texttt{\#!r6rs} token to the top of every script and library that
you write. This allows Ikarus to alert you when using non-portable
features. If you're writing code that's intended to be
2007-11-26 21:12:07 -05:00
Ikarus-specific, we recommend adding the \texttt{\#!ikarus} token in
order to get an immediate error when your code is run under other
implementations.
\defun{port-mode}{procedure}
\texttt{(port-mode ip)}
The \texttt{port-mode} procedure accepts an input port as an
argument and returns one of \texttt{r6rs-mode} or
\texttt{ikarus-mode} as a result. All input ports initially start
in the \texttt{ikarus-mode} and thus accept Ikarus-specific reader
extensions. When the \texttt{\#!r6rs} token is read from a port,
its mode changes to \texttt{ikarus-mode}.
\begin{verbatim}
2007-11-26 21:12:07 -05:00
> (port-mode (current-input-port))
ikarus-mode
2007-11-26 21:12:07 -05:00
> #!r6rs (port-mode (current-input-port))
r6rs-mode
2007-11-26 21:12:07 -05:00
> #!ikarus (port-mode (current-input-port))
ikarus-mode
\end{verbatim}
\idxlabeldefun{set-port-mode"!}{set-port-mode"!}{set-port-mode-bang}{set-port-mode!}{procedure}
%\defun{set-port-mode!}{procedure}
%\index{set-port-mode@\texttt{set-port-mode"!}}
\texttt{(set-port-mode!\ ip mode)}
The \texttt{set-port-mode!} procedure modifies the lexical syntax
accepted by subsequent calls to \texttt{read} on the input port.
The mode is a symbol which should be one of \texttt{r6rs-mode} or
\texttt{ikarus-mode}. The effect of setting the port mode is
similar to that of reading the \texttt{\#!r6rs} or \texttt{\#ikarus}
from that port.
\begin{verbatim}
> (set-port-mode! (current-input-port) 'r6rs-mode)
> (port-mode (current-input-port))
r6rs-mode
\end{verbatim}
\newpage
\idxlabeldefun{\#"!eof}{\#"!eof}{shebang-eof}{\#!eof}{reader syntax}
The end-of-file marker, \texttt{\#!eof}, is an extension to the
\rnrs{6} syntax. The primary utility of the \texttt{\#!eof} marker
is to stop the reader (e.g. \texttt{read} and \texttt{get-datum})
from reading the rest of the file.
\begin{verbatim}
#!/usr/bin/env scheme-script
(import (ikarus))
<some code>
(display "goodbye\n")
#!eof
<some junk>
\end{verbatim}
The \texttt{\#!eof} marker also serves as a datum in Ikarus, much
like \texttt{\#t} and \texttt{\#f}, when it is found inside other
expressions.
\begin{verbatim}
> (eof-object)
#!eof
> (read (open-input-string ""))
#!eof
> (read (open-input-string "#!eof"))
#!eof
> (quote #!eof)
#!eof
> (eof-object? '#!eof)
#t
> #!r6rs #!eof
Unhandled exception
Condition components:
1. &error
2. &who: tokenize
3. &message: "invalid syntax: #!e"
> #!ikarus #!eof
$
\end{verbatim}
2007-11-19 23:19:42 -05:00
\newpage
\section{Parameters}
Parameters in Ikarus\footnote{Parameters are found in many Scheme
implementations such as Chez Scheme and MzScheme.} are intended for
customizing the behavior of a procedure during the dynamic execution
of some piece of code. Parameters are first class entities
(represented as procedures) that hold the parameter value. A
parameter procedure accepts either zero or one argument. If given
no arguments, it returns the current value of the parameter. If
given a single argument, it must set the state to the value of the
argument. Parameters replace the older concept of using starred
\texttt{*global*} customization variables. For example, instead of
writing:
\begin{verbatim}
(define *screen-width* 72)
\end{verbatim}
and then mutating the variable \texttt{*screen-width*} with
\texttt{set!}, we could wrap the variable \texttt{*screen-width*} with a
\texttt{screen-width} parameter as follows:
\begin{verbatim}
(define *screen-width* 72)
(define screen-width
(case-lambda
[() *screen-width*]
[(x) (set! *screen-width* x)]))
\end{verbatim}
The value of \texttt{screen-width} can now be passed as argument,
returned as a value, and exported from libraries.
\defun{make-parameter}{procedure}
\texttt{
(make-parameter x)\\
(make-parameter x f)
}
As parameters are common in Ikarus, the procedure
\texttt{make-parameter} is defined to model the common usage pattern
of parameter construction.
\paragraph{\texttt{(make-parameter x)}} constructs a parameter
with \texttt{x} as the initial value. For example, the code above
could be written succinctly as:
\begin{verbatim}
(define screen-width (make-parameter 72))
\end{verbatim}
\paragraph{\texttt{(make-parameter x f)}} constructs a parameter
which filters the assigned values through the procedure \texttt{f}.
The initial value of the parameter is the result of calling
\texttt{(f~x)}. Typical uses of the filter procedure include
checking some constraints on the passed argument or converting it to
a different data type. The \texttt{screen-width} parameter may be
constructed more robustly as:
\begin{verbatim}
(define screen-width
(make-parameter 72
(lambda (w)
(assert (and (integer? w) (exact? w)))
(max w 1))))
\end{verbatim}
This definition ensures, through \texttt{assert}, that the argument
passed is an exact integer. It also ensures, through \texttt{max}
that the assigned value is always positive.
\defun{parameterize}{syntax}
\texttt{(parameterize ([lhs* rhs*] ...) body body* ...)}
Parameters can be assigned to by simply calling the parameter
procedure with a single argument. The \texttt{parameterize} syntax
is used to set the value of a parameter within the dynamic extent of
the \texttt{body~body*~...} expressions.
The \texttt{lhs* ...} are expressions, each of which must evaluate
to a parameter. Such parameters are not necessarily constructed by
\texttt{make-parameter}---any procedure that follows the parameters
protocol works.
The advantage of using \texttt{parameterize} over explicitly
assigning to parameters (same argument applies to global variables)
is that you're guaranteed that whenever control exits the body of a
\texttt{parameterize} expression, the value of the parameter is
reset back to what it was before the body expressions were entered.
This is true even in the presence of \texttt{call/cc}, errors, and
exceptions.
The following example shows how to set the text property of a
terminal window. The parameter \texttt{terminal-property} sends an
ANSI escape sequence to the terminal whenever the parameter value is
changed. The use of \texttt{terminal-property} within
\texttt{parameterize} changes the property before
\texttt{(display~"RED!")} is called and resets it back to normal
when the body returns.
2007-10-22 16:47:51 -04:00
\begin{CodeInline}
(define terminal-property
(make-parameter "0"
(lambda (x)
(display "\x1b;[")
(display x)
(display "m")
x)))
(display "Normal and ")
(parameterize ([terminal-property "41;37"])
(display "RED!"))
(newline)
\end{CodeInline}
2007-10-22 16:47:51 -04:00
\newpage
\section{Local library imports}
\defun{import}{syntax}
\texttt{(import import-spec* ...)}
The \texttt{import} keyword which is exported from the
\texttt{(ikarus)} library can be used anywhere definitions can
occur: at a script body, library's top-level, or in internal
definitions context. The syntax of the local \texttt{import} form
is similar to the \texttt{import} that appears at the top of a
library or a script form, and carries with it the same restrictions:
no identifier name may be imported twice unless it denotes the same
identifier; no identifier may be both imported and defined; and
imported identifiers are immutable.
Local \texttt{import} forms are useful for two reasons: (1) they
minimize the namespace clutter that usually occurs when many
libraries are imported at the top level, and (2) they limit the
scope of the import and thus help modularize a library's
dependencies.
Suppose you are constructing a large library and at some point you
realize that one of your procedures needs to make use of some other
library for performing a specific task. Importing that library at
top level makes it available for the entire library. Consequently,
even if that library is no longer used anywhere in the code (say
when the code that uses it is deleted), it becomes very hard to
delete the import without first examiniming the entire library body
for potential usage leaks. By locally importing a library into the
appropriate scope, we gain the ability to delete the \texttt{import}
form when the procedure that was using it is deleted.
2007-10-22 16:47:51 -04:00
\newpage
\section{Local modules}
This section is not documented yet.
Please refer to Section~10.5 of Chez Scheme
User's Guide~\cite{csug7}, Chapter~3 of Oscar Waddel's Ph.D
Thesis~\cite{waddell-thesis}, and its POPL99
paper~\cite{waddell-extending} for details on using the
\texttt{module} and \texttt{import} keywords. Ikarus's internal
module system is similar in spirit to that of Chez Scheme.
\defun{module}{syntax}
\texttt{(module M definitions ... expressions ...)}\\
\texttt{(module definitions ... expressions ...)}
\defun{import}{syntax}
\texttt{(import M)}
2007-11-22 14:26:54 -05:00
\newpage
2007-11-22 14:26:54 -05:00
\section{\label{sec:gensyms}Gensyms}
Gensym stands for a \emph{generated symbol}---a fresh symbol that is
generated at run time and is guaranteed to be \emph{not}
\texttt{eq?} to any other symbol present in the system. Gensyms are
useful in many applications including expanders, compilers, and
interpreters when generating an arbitrary number of unique names is
needed.
Ikarus is similar to Chez Scheme in that the readers (including the
\texttt{read} procedure) and writers (including \texttt{write} and
\texttt{pretty-print}) maintain the read/write invariance on
gensyms. When a gensym is written to an output port, the system
automatically generates a random unique identifier for the gensym.
When the gensym is read back though the \verb|#{gensym}| read
syntax, a new gensym is \emph{not} regenerated, but instead, it is
looked up in the global symbol table.
A gensym's name is composed of two parts: a \emph{pretty} string and
a \emph{unique} string. The Scheme procedure
\texttt{symbol->string} returns the pretty string of the gensym and
not its unique string. Gensyms are printed by default as \\
\verb|#{pretty-string unique-string}|.
\defun{gensym}{procedure}
\texttt{(gensym)}\\
\texttt{(gensym string)}\\
\texttt{(gensym symbol)}
The procedure \texttt{gensym} constructs a new gensym. If passed no
arguments, it constructs a gensym with no pretty name. The pretty
name is constructed when and if the pretty name of the resulting
gensym is needed. If \texttt{gensym} is passed a string, that
string is used as the pretty name. If \texttt{gensym} is passed a
symbol, the pretty name of the symbol is used as the pretty name of
the returned gensym.
See \defref{gensym-prefix} and \defref{gensym-count} for details.
\begin{verbatim}
> (gensym)
#{g0 |y0zf>GlFvcTJE0xw|}
> (gensym)
#{g1 |U%X&sF6kX!YC8LW=|}
> (eq? (gensym) (gensym))
#f
\end{verbatim}
\texttt{(gensym string)} constructs a new gensym with
\texttt{string} as its pretty name. Similarly,
\texttt{(gensym~symbol)} constructs a new gensym with the pretty
name of \texttt{symbol}, if it has one, as its pretty name.
\begin{verbatim}
> (gensym "foo")
#{foo |>VgOllCM&$dSvRN=|}
> (gensym 'foo)
#{foo |!TqQLmtw2hoEYfU>|}
> (gensym (gensym 'foo))
#{foo |N2C>5O0>C?OROUBU|}
\end{verbatim}
\defun{gensym?}{procedure}
\texttt{(gensym? x)}
The \texttt{gensym?}\ predicate returns \texttt{\#t} if its argument
is a gensym, and returns \texttt{\#f} otherwise.
\begin{verbatim}
> (gensym? (gensym))
#t
> (gensym? 'foo)
#f
> (gensym? 12)
#f
\end{verbatim}
\defun{gensym->unique-string}{procedure}
\texttt{(gensym->unique-string gensym)}
The \texttt{gensym->unique-string} procedure returns the unique name
associated with the gensym argument.
\begin{verbatim}
> (gensym->unique-string (gensym))
"YukrolLMgP?%ElcR"
\end{verbatim}
\idxdefun{gensym syntax}{\#\{gensym\}}{reader syntax}
2007-11-22 14:26:54 -05:00
\texttt{\#\{unique-name\}}
\index{\#\{pretty unique\}@\texttt{\#\{pretty unique\}} reader syntax}
2007-11-22 14:26:54 -05:00
\\
\texttt{\#\{pretty-name unique-name\}}
\index{\#\{unique\}@\texttt{\#\{unique\}} reader syntax}
2007-11-22 14:26:54 -05:00
\\
\texttt{\#:pretty-name}
2007-11-22 14:26:54 -05:00
\index{\#:pretty@\texttt{\#:pretty} reader syntax}
Ikarus's \texttt{read} and \texttt{write} procedures extend the
lexical syntax of Scheme by the ability to read and write gensyms
using one of the three forms listed above.
\verb|#{unique-name}| constructs, at read time, a gensym whose
unique name is the one specified. If a gensym with the same unique
name already exists in the system's symbol table, that gensym is
returned.
\begin{verbatim}
> '#{some-long-name}
#{g0 |some-long-name|}
> (gensym? '#{some-long-unique-name})
#t
> (eq? '#{another-unique-name} '#{another-unique-name})
#t
\end{verbatim}
The two-part \verb|#{pretty-name unique-name}| gensym syntax is
similar to the syntax shown above with the exception that if a new
gensym is constructed (that is, if the gensym did not already exist
in the symbol table), the pretty name of the constructed gensym is
set to \texttt{pretty-name}.
\begin{verbatim}
> '#{foo unique-identifier}
#{foo |unique-identifier|}
> '#{unique-identifier}
#{foo |unique-identifier|}
> '#{bar unique-identifier}
#{foo |unique-identifier|}
\end{verbatim}
The \texttt{\#:pretty-name} form constructs, at read time, a gensym
whose pretty name is \texttt{pretty-name} and whose unique name is
fresh. This form guarantees that the resulting gensym is not
\texttt{eq?}\ to any other symbol in the system.
\begin{verbatim}
> '#:foo
#{foo |j=qTGlEwS/Zlp2Dj|}
> (eq? '#:foo '#:foo)
#f
\end{verbatim}
\defun{generate-temporaries}{example}
\index{Examples!generate-temporaries@\texttt{generate-temporaries}}
The \texttt{(rnrs syntax-case)} library provides a
\texttt{generate-temporaries} procedure, which takes a syntax object
(representing a list of things) and returns a list of fresh
identifiers. Using \texttt{gensym}, that procedure can be defined
as follows:
\begin{CodeInline}
(define (generate-temporaries* stx)
(syntax-case stx ()
[(x* ...)
(map (lambda (x)
(datum->syntax #'unimportant
(gensym
(if (identifier? x)
(syntax->datum x)
't))))
#'(x* ...))]))
\end{CodeInline}
The above definition works by taking the input \texttt{stx} and
destructuring it into the list of syntax objects \texttt{x*~...}.
The inner procedure maps each \texttt{x} into a new syntax object
(constructed with \texttt{datum->syntax}). The datum is a gensym,
whose name is the same name as \texttt{x} if \texttt{x} is an
identifier, or the symbol \texttt{t} if \texttt{x} is not an
identifier. The output of \texttt{generate-temporaries*} generates
names similar to their input counterpart:
\begin{verbatim}
> (print-gensym #f)
> (generate-temporaries* #'(x y z 1 2))
(#<syntax x> #<syntax y> #<syntax z> #<syntax t> #<syntax t>)
\end{verbatim}
\newpage
\section{Printing}
\defun{pretty-print}{procedure}
\texttt{(pretty-print datum)}\\
\texttt{(pretty-print datum output-port)}
The procedure \texttt{pretty-print} is intended for printing Scheme
data, typically Scheme programs, in a format close to how a Scheme
programmer would write it. Unlike \texttt{write}, which writes its
input all in one line, \texttt{pretty-print} inserts spaces and new
lines in order to produce more pleasant output.
\begin{verbatim}
(define fact-code
'(letrec ([fact (lambda (n) (if (zero? n) 1 (* n (fact (- n 1)))))])
(fact 5)))
> (pretty-print fact-code)
(letrec ((fact
(lambda (n) (if (zero? n) 1 (* n (fact (- n 1)))))))
(fact 5))
\end{verbatim}
The second argument to \texttt{pretty-print}, if supplied, must be
an output port. If not supplied, the \texttt{current-output-port}
is used.
\BoxedText{Limitations:}{As shown in the output above, the current
implementation of \texttt{pretty-print} does not handle printing of
square brackets properly.}
\defun{pretty-width}{parameter}
\texttt{(pretty-width)}\\
\texttt{(pretty-width n)}
The parameter \texttt{pretty-width} controls the number of
characters after which the \texttt{pretty-print} starts breaking
long lines into multiple lines. The initial value of
\texttt{pretty-width} is set to 60 characters, which is suitable for most
terminals and printed material.
\begin{verbatim}
> (parameterize ([pretty-width 40])
(pretty-print fact-code))
(letrec ((fact
(lambda (n)
(if (zero? n)
1
(* n (fact (- n 1)))))))
(fact 5))
\end{verbatim}
Note that \texttt{pretty-width} does not guarantee that
the output will not extend beyond the specified number. Very long
symbols, for examples, cannot be split into multiple lines and may
force the printer to go beyond the value of \texttt{pretty-width}.
\defun{format}{procedure}
\texttt{(format fmt-string args ...)}
The procedure \texttt{format} produces a string formatted according
to \texttt{fmt-string} and the supplied
arguments. The format string contains markers in which the string
representation of each argument is placed. The markers include:
\hangpara{2em}{1}
\verb|"~s"| instructs the formatter to place the next argument
as if the procedure \texttt{write} has printed it. If the argument
contains a string, the string will be quoted and all quotes and
backslashes in the string will be escaped. Similarly, characters
will be printed using the \verb|#\x| notation.
\hangpara{2em}{1}
\verb|"~a"| instructs the formatter to place the next argument
as if the procedure \texttt{display} has printed it. Strings and
characters are placed as they are in the output.
\hangpara{2em}{1}
\verb|"~b"| instructs the formatter to convert the next
argument to its binary (base 2) representation. The argument must be an
exact number. Note that the \texttt{\#b} numeric prefix is not
produced in the output.
\hangpara{2em}{1}
\verb|"~o"| is similar to \verb|"~b"| except that
the number is printed in octal (base 8).
\hangpara{2em}{1}
\verb|"~x"| is similar to \verb|"~b"| except that
the number is printed in hexadecimal (base 16).
\hangpara{2em}{1}
\verb|"~d"| outputs the next argument, which can be an
exact or inexact number, in its decimal (base 10) representation.
\hangpara{2em}{1}
\verb|"~~"| instructs the formatter to place a tilde
character, \verb|~|, in the output without consuming an
argument.
Note that the \texttt{\#b}, \texttt{\#o}, and \texttt{\#x} numeric
prefixes are not added to the output when \verb|~b|, \verb|~o|, and
\verb|~x| are used.
\begin{verbatim}
> (format "message: ~s, ~s, and ~s" 'symbol "string" #\c)
"message: symbol, \"string\", and #\\c"
> (format "message: ~a, ~a, and ~a" 'symbol "string" #\c)
"message: symbol, string, and c"
\end{verbatim}
\defun{printf}{procedure}
\texttt{(printf fmt-string args ...)}
The procedure \texttt{printf} is similar to \texttt{format} except
that the output is sent to the \texttt{current-output-port} instead
of being collected in a string.
\begin{verbatim}
> (let ([n (+ (expt 2 32) #b11001)])
(printf "~d = #b~b = #x~x\n" n n n))
4294967321 = #b100000000000000000000000000011001 = #x100000019
\end{verbatim}
\defun{fprintf}{procedure}
\texttt{(fprintf output-port fmt-string args ...)}
The procedure \texttt{fprintf} is similar to \texttt{printf} except
that the output port to which the output is sent is specified as the
first argument.
\defun{print-graph}{parameter}
\texttt{(print-graph)} \\
\texttt{(print-graph \#t)}\\
\texttt{(print-graph \#f)}
\phantomsection
\label{graph-syntax}
The graph notation is a way of marking and referencing parts of a
data structure and, consequently, creating shared and cyclic data
structures at read time instead of resorting to explicit mutation at
run time. The \texttt{\#$n$=} marks the following data structure with
mark $n$, where $n$ is a nonnegative integer. The \texttt{\#$n$\#}
references the data structure marked $n$. Marks can be assigned and
referenced in any order but each mark must be assigned to exactly
once in an expression.
\begin{verbatim}
> (let ([x '#0=(1 2 3)])
(eq? x '#0#))
#t
> (let ([x '#0#] [y '#0=(1 2 3)])
(eq? x y))
#t
> (eq? (cdr '(12 . #1#)) '#1=(1 2 3))
#t
> (let ([x '#1=(#1# . #1#)])
(and (eq? x (car x))
(eq? x (cdr x))))
#t
\end{verbatim}
The \texttt{print-graph} parameter controls how the writers (e.g.
\texttt{pretty-print} and \texttt{write}) handle shared and cyclic
data structures. In Ikarus, all writers detect cyclic data
structures and they all terminate on all input, cyclic or otherwise.
If the value of \texttt{print-graph} is set to \texttt{\#f} (the
default), then the writers does not attempt to detect shared data
structures. Any part of the input that is shared is printed as if
no sharing is present.
If the value of \texttt{print-graph} is set to \texttt{\#t}, all
sharing of data structures is marked using the \texttt{\#$n$=} and
\texttt{\#$n$\#} notation.
\begin{verbatim}
> (parameterize ([print-graph #f])
(let ([x (list 1 2 3 4)])
(pretty-print (list x x x))))
((1 2 3 4) (1 2 3 4) (1 2 3 4))
> (parameterize ([print-graph #t])
(let ([x (list 1 2 3 4)])
(pretty-print (list x x x))))
(#0=(1 2 3 4) #0# #0#)
> (parameterize ([print-graph #f])
(let ([x (list 1 2)])
(let ([y (list x x x x)])
(set-car! (last-pair y) y)
(pretty-print (list y y)))))
(#0=((1 2) (1 2) (1 2) #0#) #0#)
> (parameterize ([print-graph #t])
(let ([x (list 1 2)])
(let ([y (list x x x x)])
(set-car! (last-pair y) y)
(pretty-print (list y y)))))
(#0=(#1=(1 2) #1# #1# #0#) #0#)
\end{verbatim}
% \defun{print-unicode}{parameter}
% \texttt{(print-unicode)} \\
% \texttt{(print-unicode \#t)} \\
% \texttt{(print-unicode \#f)}
\defun{print-gensym}{parameter}
\texttt{(print-gensym)}\\
\texttt{(print-gensym \#t)}\\
\texttt{(print-gensym \#f)}\\
\texttt{(print-gensym 'pretty)}
The parameter \texttt{print-gensym} controls how gensyms are printed
by the various writers.
If the value of \texttt{print-gensym} is \texttt{\#f}, then gensym
syntax is suppressed by the writers and only the gensyms' pretty
names are printed. If the value of \texttt{print-gensym} is
\texttt{\#t}, then the full \verb|#{pretty unique}| syntax is
printed. Finally, if the value of \texttt{print-gensym} is the
symbol \texttt{pretty}, then gensyms are printed using the
\texttt{\#:pretty} notation.
\begin{verbatim}
> (parameterize ([print-gensym #f])
(pretty-print (list (gensym) (gensym))))
(g0 g1)
> (parameterize ([print-gensym #t])
(pretty-print (list (gensym) (gensym))))
(#{g2 |KR1M2&CTt1<B0n/m|} #{g3 |FBAb&7NC6&=c82!O|})
> (parameterize ([print-gensym 'pretty])
(pretty-print (list (gensym) (gensym))))
(#:g4 #:g5)
\end{verbatim}
The initial value of \texttt{print-gensym} is \texttt{\#t}.
\defun{gensym-prefix}{parameter}
\texttt{(gensym-prefix)}\\
\texttt{(gensym-prefix string)}
The parameter \texttt{gensym-prefix} specifies the string to be used
as the prefix to generated pretty names. The default value of
\texttt{gensym-prefix} is the string \texttt{"g"}, which causes
generated strings to have pretty names in the sequence \texttt{g0},
\texttt{g1}, \texttt{g2}, etc.
\begin{verbatim}
> (parameterize ([gensym-prefix "var"]
[print-gensym #f])
(pretty-print (list (gensym) (gensym) (gensym))))
(var0 var1 var2)
\end{verbatim}
Beware that the \texttt{gensym-prefix} controls how pretty names are
generated, and has nothing to do with how \texttt{gensym} constructs
a new gensym. In particular, notice the difference between the
output in the first example with the output of the examples below:
\begin{verbatim}
> (pretty-print
(parameterize ([gensym-prefix "var"] [print-gensym #f])
(list (gensym) (gensym) (gensym))))
(g3 g4 g5)
> (let ([ls (list (gensym) (gensym) (gensym))])
(parameterize ([gensym-prefix "var"] [print-gensym #f])
(pretty-print ls)))
(var5 var6 var7)
\end{verbatim}
\defun{gensym-count}{parameter}
\texttt{(gensym-count)}\\
\texttt{(gensym-count n)}
The parameter \texttt{gensym-count} determines the number
which is attached to the \texttt{gensym-prefix} when gensyms'
pretty names are generated. The initial value of \texttt{gensym-count}
is 0 and is incremented every time a
pretty name is generated. It might be set to any non-negative
integer value.
\begin{verbatim}
> (let ([x (gensym)])
(parameterize ([gensym-count 100] [print-gensym #f])
(pretty-print (list (gensym) x (gensym)))))
(g100 g101 g102)
\end{verbatim}
Notice from all the examples so far that pretty names are generated
in the order at which the gensyms are printed, not in the order in
which gensyms were created.
\newpage
\section{Tracing}
\defun{trace-define}{syntax}
\texttt{(trace-define (name . args) body body* ...)}\\
\texttt{(trace-define name expression)}
The \texttt{trace-define} syntax is similar to \texttt{define}
except that the bound value, which must be a procedure, becomes a
traced procedure. A traced procedure prints its arguments when it
is called and prints its values when it returns.
\begin{verbatim}
> (trace-define (fact n)
(if (zero? n) 1 (* n (fact (- n 1)))))
> (fact 5)
|(fact 5)
| (fact 4)
| |(fact 3)
| | (fact 2)
| | |(fact 1)
| | | (fact 0)
| | | 1
| | |1
| | 2
| |6
| 24
|120
120
\end{verbatim}
The tracing facility in Ikarus preserves and shows tail recursion
and distinguishes it from non-tail recursion by showing tail calls
starting at the same line in which their parent was called.
\begin{verbatim}
> (trace-define (fact n)
(trace-define (fact-aux n m)
(if (zero? n) m (fact-aux (- n 1) (* n m))))
(fact-aux n 1))
> (fact 5)
|(fact 5)
|(fact-aux 5 1)
|(fact-aux 4 5)
|(fact-aux 3 20)
|(fact-aux 2 60)
|(fact-aux 1 120)
|(fact-aux 0 120)
|120
120
\end{verbatim}
Moreover, the tracing facility interacts well with continuations and
exceptions.
\begin{verbatim}
> (call/cc
(lambda (k)
(trace-define (loop n)
(if (zero? n)
(k 'done)
(+ (loop (- n 1)) 1)))
(loop 5)))
|(loop 5)
| (loop 4)
| |(loop 3)
| | (loop 2)
| | |(loop 1)
| | | (loop 0)
done
\end{verbatim}
\defun{trace-lambda}{syntax}
\texttt{(trace-lambda name args body body* ...)}
The \texttt{trace-lambda} macro is similar to \texttt{lambda} except
that the resulting procedure is traced: it prints the arguments it
receives and the results it returns.
\newpage
\defun{make-traced-procedure}{procedure}
\texttt{(make-traced-procedure name proc)}
The procedure \texttt{make-traced-procedure} takes a name (typically
a symbol) and a procedure. It returns a procedure similar to
\texttt{proc} except that it traces its arguments and values.
\begin{verbatim}
> (define (fact n)
(if (zero? n)
(lambda (k) (k 1))
(lambda (k)
((fact (- n 1))
(make-traced-procedure `(k ,n)
(lambda (v)
(k (* v n))))))))
> (call/cc
(lambda (k)
((fact 5) (make-traced-procedure 'K k))))
|((k 1) 1)
|((k 2) 1)
|((k 3) 2)
|((k 4) 6)
|((k 5) 24)
|(K 120)
120
\end{verbatim}
\newpage
\section{Timing}
This section describes some of Ikarus's timing facilities which may
be useful for benchmarking and performance tuning.
\defun{time}{syntax}
\texttt{(time expression)}
The \texttt{time} macro performs the following: it evaluates
\texttt{expression}, then prints a summary of the run time
statistics, then returns the values returned by \texttt{expression}.
The run-time summary includes the number of bytes allocated, the
number of garbage collection runs, and the time spent in both the
mutator and the collector.
\begin{verbatim}
> (let () ;;; 10 million
(define ls (time (vector->list (make-vector 10000000))))
(time (append ls ls))
(values))
running stats for (vector->list (make-vector 10000000)):
3 collections
672 ms elapsed cpu time, including 547 ms collecting
674 ms elapsed real time, including 549 ms collecting
120012328 bytes allocated
running stats for (append ls ls):
4 collections
1536 ms elapsed cpu time, including 1336 ms collecting
1538 ms elapsed real time, including 1337 ms collecting
160000040 bytes allocated
\end{verbatim}
Note: The output listed above is \emph{just a sample} that was
taken at some point on some machine. The output on your
machine at the time you read this may vary.
\newpage
\defun{time-it}{procedure}
\texttt{(time-it who thunk)}
The procedure \texttt{time-it} takes a datum denoting the name of
the computation and a thunk (i.e. a
procedure with no arguments), invokes the thunk, prints the stats,
and returns the values obtained from invoking the thunk.
If the value of \texttt{who} is non-false, \texttt{who}
is used when displaying the run-time statistics. If the value of
\texttt{who} is \texttt{\#f}, then no name for the computation is
displayed.
\begin{verbatim}
> (time-it "a very fast computation"
(lambda () (values 1 2 3)))
running stats for a very fast computation:
no collections
0 ms elapsed cpu time, including 0 ms collecting
0 ms elapsed real time, including 0 ms collecting
24 bytes allocated
1
2
3
> (time-it #f (lambda () 12))
running stats:
no collections
0 ms elapsed cpu time, including 0 ms collecting
0 ms elapsed real time, including 0 ms collecting
0 bytes allocated
12
\end{verbatim}
\chapter{The \texttt{(ikarus ipc)} library}
\ref{sec:environment-variables}
\ref{sec:subprocess}
\ref{sec:sockets}
\newpage
\section{\label{sec:environment-variables}Environment variables}
When the operating system starts a process, it starts the process in
some environment that maps environment variables to values. Typical
keys found in the environment are \texttt{HOME} (pointing to the
home directory of the user), \texttt{PATH} (containing a
colon-separated list of directories to be searched when running a
command from the shell), \texttt{SHELL}, \texttt{EDITOR}, and
\texttt{PAGER}. This section describes the procedures provided by
Ikarus for manipulating this environment.
The environment procedures are placed in the \texttt{(ikarus~ipc)}
library because they provide a (limited) way for one process to
communicate to a subprocess, akin to parameter passing.
\defun{getenv}{procedure}
\texttt{(getenv key)}
The procedure \texttt{getenv} retrieves the value associated with
\texttt{key} (which must be a string) in the environment. The value
returned is a (utf8-decoded) string, or \texttt{\#f} if there is no
mapping for \texttt{key} in the environment.
\defun{setenv}{procedure}
\texttt{(setenv key value)}\\
\texttt{(setenv key value overwrite?)}
The procedure \texttt{setenv} sets the mapping of \texttt{key} to
\texttt{value} in the environment. Both \texttt{key} and
\texttt{value} must be strings. If the \texttt{overwrite?} argument
is provided and is \texttt{\#f}, \texttt{setenv} does not overwrite
a value associated with \texttt{key} if one already exists. The
procedure \texttt{setenv} may raise an exception if the operating
system cannot allocate enough memory to hold the new mapping.
\defun{unsetenv}{procedure}
\texttt{(unsetenv key)}
The procedure \texttt{unsetenv} removes \texttt{key} and its
associated value (if one exists) from the environment.
\BoxedText{Caveat:}{The underlying system procedure \texttt{setenv}
may leak some memory in some operating systems when passed some
values. Ikarus has no way of getting around this system limitation
and thus may leak some memory for some calls to \texttt{setenv}.
Use sparingly.}
\newpage
\section{\label{sec:subprocess}Subprocess communication}
This section describes the facilities that Ikarus provides for
starting subprocesses and sending and receiving data through the
subprocesses' standard input, output, and error ports.
\defun{system}{procedure}
\texttt{(system string)}
The \texttt{system} procedure takes a string represeting an external
shell command and arguments and invokes the shell (typically
\texttt{sh} on Unix systems) on this command. The returned value
from \texttt{system} is the exit status of the external command.
Ikarus's \texttt{system} procedure is a thin wrapper around the
\texttt{system} procedure in the Standard C Library \texttt{libc}.
\begin{verbatim}
> (system "ls M*")
Makefile Makefile.am Makefile.in
0
\end{verbatim}
\defun{process}{procedure}
\texttt{(process program-name args ...)}
The \texttt{process} procedure takes as input a string representing
the path to an external program and a set of strings that are the
arguments to the external program. It invokes the program with the
given arguments, and returns four values: (1) a process identifier
(\texttt{pid}),
(2) an output port which pipes to the process's \texttt{stdin}, (3)
an input port wired to the process's \texttt{stdout}, and (4) an
input port wired to the process's \texttt{stderr}. All three ports
are blocking: reading and writing to any one of them blocks Ikarus
until the some bytes are available for reading or writing.
Attempting to read from the process's \texttt{stdout} port may block
indefinitely if the external program does not write anything
(e.g.,~if it attempts to read from \texttt{stdin} instead).
Communicating with an external process must therefore be done
according to the protocol in which the external process
communicates.
\defun{process-nonblocking}{procedure}
\texttt{(process-nonblocking program-name args ...)}
The procedure \texttt{process-nonblocking} is similar to the
\texttt{process} procedure except that the three returned ports are
put in nonblocking mode. Attempting to perform a read or write
operation on a nonblocking port in which bytes are not available for
reading or writing causes Ikarus to enqueue the port with the
continuation in which the read/write operation occurs and attempt to
dispatch previously enqueued ports on which some bytes are ready for
read or write. See Section~\ref{sec:sockets} for more details on
blocking and nonblocking operations.
\defun{waitpid}{procedure}
\texttt{(waitpid)}\\
\texttt{(waitpid pid)}\\
\texttt{(waitpid pid block?)}\\
\texttt{(waitpid pid block? want-error?)}
The \texttt{waitpid} procedure waits for the process with the given
\texttt{pid} to terminate and, if successful, returns a
\texttt{wstatus} object encapsulating the wait status of the
process. Without arguments, \texttt{waitpid} defaults the
\texttt{pid} to \texttt{-1} which allows one to wait for any child
process to exit. If the \texttt{block?} argument is true (the
default), \texttt{waitpid} blocks indefinitely waiting for a child
process to exit. When \texttt{block?} is false, \texttt{waitpid}
returns immediately regardless of whether or not a child process has
exited. The \texttt{want-error?} controls what happens if
\texttt{block?} was specified to be \texttt{\#f} and no child had
exited. If \texttt{want-error?} is true (the default), an error is
signaled. Otherwise, \texttt{waitpid} returns \texttt{\#f} if no
process has exited. Operations on the wait status result are listed
below.
\defun{wstatus-pid}{procedure}
\texttt{(wstatus-pid wstatus)}
The \texttt{wstatus-pid} returns the \texttt{pid} of the process
whose status is recorded in the \texttt{wstatus} object. This
\texttt{pid} is most useful when the default \texttt{pid} of
\texttt{-1} is given to \texttt{waitpid} and thus the pid of the
exiting process is not known beforehand.
\defun{wstatus-exit-status}{procedure}
\texttt{(wstatus-exit-status wstatus)}
The procedure \texttt{wstatus-exit-status} returns the exit status
of the child process. It is typically \texttt{0} if the child
exited normally and has other numeric values if the child process
encountered an error.
\defun{wstatus-received-signal}{procedure}
\texttt{(wstatus-received-signal wstatus)}
The procedure \texttt{wstatus-received-signal} returns the
name of the signal (or the number of the signal if the name is not
known) that caused the child process to exit.
The signal name is one of the following symbols:
\begin{verbatim}
SIGABRT SIGALRM SIGBUS SIGCHLD SIGCONT SIGFPE SIGHUP
SIGILL SIGINT SIGKILL SIGPIPE SIGQUIT SIGSEGV SIGSTOP
SIGTERM SIGTSTP SIGTTIN SIGTTOU SIGUSR1 SIGUSR2 SIGPOLL
SIGPROF SIGSYS SIGTRAP SIGURG SIGVTALRM SIGXCPU SIGXFSZ
\end{verbatim}
\defun{kill}{procedure}
\texttt{(kill pid signal-name)}
The \texttt{kill} procedure takes a \texttt{pid} and a signal name
(a symbol from the list above) and asks the operating system to send
the signal to the given process.
\newpage
\section{\label{sec:sockets}TCP and UDP sockets}
Ikarus supports synchronous (blocking) and asynchronous
(multiplexing) communication facilities over both TCP/IP and UDP/IP
channels. It facilitates writing client and server applications
that serve a variety of purposes, e.g., web servers,
char clients, mail, news, et cetera.
The synchronous model is simple and is ideal for noninteractive
command-line applications that communicate with a single host at a
time. FTP clients, HTTP spiders, and off-line netnews caching
programs typically use synchronous communication. The basic
operations start with connecting (via \texttt{tcp-connect}) to an
internet service (identified by a port number or a service name)
located on some host (identified by its host name or IP number). By
connecting to a server, we obtain an input port and an output port
forming bidirectional channel of communication. Depending on the
service protocol, the client exchanges information with the server
by reading and writing to the designated ports. Read and write
operations in this model may block indefinitely until appropriate
number of bytes are read/written, or until the operation times out.
Communication ends when the client closes both ports.
The asynchronous model allows for communicating with many hosts
simultaneously. Ikarus maintains a queue of pending ports, the
blocking operation performed on these ports, and their respective
continuations. Whenever the operating system indicates that a
read/write operation may block, Ikarus schedules the port and a
restarting continuation into the queue and then dispatches one of
the \emph{ready} operations. This is reminiscent of how
multitasking operating systems schedule I/O-bound threads, except
that in Ikarus, threads are lightweight, represented by ordinary
continuations. Thus, reading or writing to a nonblocking port
causes Ikarus to transparently capture a continuation, enlist it in
the queue, and dispatch another continuation captured earlier.
Multiple read and write operations from multiple connections are
fulfilled concurrently, dispatching whichever one is ready and
without one operation blocking the rest.
Because asynchronous scheduling and dispatching involves switching
continuations, winders that maintain the dynamic environment
(e.g.,~those established by \texttt{dynamic-wind},
\texttt{parameterize}, \texttt{with-output-to-file},
\texttt{with-exception-handler}, etc.) are properly invoked when
leaving a dynamic context and entering another. Care must be taken
when using winders that perform externally-visible side effects upon
entering/leaving a dynamic context.
\defun{tcp-connect}{procedure}
\texttt{(tcp-connect host service)}
The procedure \texttt{tcp-connect} attempts to connect to the
\texttt{service} located on the remote \texttt{host} through the
TCP/IP protocol. The \texttt{host} argument is a string
representing either the IP address (e.g., \texttt{"127.0.0.1"}) or a
fully-qualified domain name (e.g., \texttt{"www.example.com"}), in
which case name to address resolution is performed automatically.
The \texttt{service} argument is also a string which can be either a
port number (e.g., \texttt{"80"}) or a service name (e.g.,
\texttt{"http"}) in which case the service name is mapped to the
canonical port number for the service.
Upon success, \texttt{tcp-connect} returns two values: a binary
\emph{input} port and a binary \emph{output} port. Writing and
reading from the obtained ports may block indefinitely until an
appropriate number of bytes is read/written. Closing both ports
closes the communication channel and frees the underlying
operating-system resources.
\defun{tcp-connect-nonblocking}{procedure}
\texttt{(tcp-connect-nonblocking host service)}
The procedure \texttt{tcp-connect-nonblocking} is similar to
\texttt{tcp-connect} except that the two returned ports are put in
\emph{nonblocking} mode. If an attempt to perform a read (write)
operation on the input (output) port may block, a restart
continuation is captured and scheduled in the I/O queue and a
perviously blocked operation may be restarted (when its blocking
operation can progress).
\defun{udp-connect}{procedure}
\texttt{(udp-connect host service)}
The procedure \texttt{udp-connect} is similar to
\texttt{tcp-connect} except that it connects to the remote server
through the UDP protocol (as implied by the name).
\defun{udp-connect-nonblocking}{procedure}
\texttt{(udp-connect-nonblocking host-name service-name)}
The procedure \texttt{udp-connect-nonblocking} is similar to
\texttt{tcp-connect-nonblocking} except that it connects to the
remote server through the UDP protocol.
\defun{tcp-server-socket}{procedure}
\texttt{(tcp-server-socket port-number)}
The procedure \texttt{tcp-server-socket} attempts to \emph{listen}
on the given port number for incoming connections. On success,
\texttt{tcp-server-socket} returns an abstract \emph{tcp-server}
object encapsulating the underlying operating-system server socket.
The server socket is placed in \emph{blocking} mode: an attempt to
accept a connection on such server blocks indefinitely
until a remote client attempts to establish a connection.
\defun{tcp-server-socket-nonblocking}{procedure}
\texttt{(tcp-server-socket-nonblocking port-number)}
This procedure is similar to \texttt{tcp-server-socket} except that
the returned server socket is placed in \emph{nonblocking} mode. An
attempt to accept a connection from a nonblocking server socket does
not block the entire process, instead, a restarting continuation is
scheduled and is invoked when an incoming connection is available
(and another I/O-bound operation blocks).
\defun{accept-connection}{procedure}
\texttt{(accept-connection tcp-server)}
The procedure \texttt{accept-connection} takes a tcp-server socket
(e.g., one obtained from \texttt{tcp-server-socket}) and returns two
values: a binary \emph{input} port and a binary \emph{output} port
through which the server communicates with the connecting client.
If the \texttt{tcp-server} object is in blocking mode,
\texttt{accept-connection} may block the entire process until an
incoming connection is obtained. If the server is in nonblocking
mode, an (otherwise) blocking operation would be rescheduled and
invoked later when a connection occurs.
The input and output ports that \texttt{accept-connection} returns
are put in blocking mode. .
\newpage
\defun{accept-connection-nonblocking}{procedure}
\texttt{(accept-connection-nonblocking tcp-server)}
The procedure \texttt{accept-connection-nonblocking} is similar to
\texttt{accept-connection} except that the two returned ports are
put in nonblocking mode.
\defun{close-tcp-server-socket}{procedure}
\texttt{(close-tcp-server-socket tcp-server)}
This procedure closing the server socket (so that no more incoming
connections can be accepted) and frees the underlying
operating-system resources associated with the socket.
\defun{register-callback}{procedure}
\texttt{(register-callback input-port thunk)}\\
\texttt{(register-callback output-port thunk)}\\
\texttt{(register-callback tcp-server thunk)}
The procedure \texttt{register-callback} takes a nonblocking port or
server socket and a callback procedure. It enqueues the port/socket
and the thunk into the event queue. The given procedure is called
when another I/O operation blocks and data is ready to be read (for
an input port argument), written (for an output port argument), or
an incoming connection is available (for a tcp-server argument).
The \texttt{register-callback} procedure returns immediately. It
does not block and does not attempt to perform any read, write, or
accept operation on the given argument.
\chapter{\label{chapter:foreign}The \texttt{(ikarus foreign)} library}
This chapter describes the facilities through which Ikarus
interfaces with the host operating system and other external
libraries. The facilities of the \texttt{(ikarus~foreign)}
library give the Scheme program unrestricted access to the computer
memory, allowing one to allocate, access, modify, and free memory as
needed. The facilities also allow the Scheme program to \emph{call
out} to system procedures as well as allow the native procedures to
\emph{call back} into Scheme.
This chapter is organized as follows: Section~\ref{sec:ffi-overview}
gives an overview of the basic concepts such as shared libraries,
external symbols, foreign data types, pointers, and procedures.
Section~\ref{sec:ffi-memory} describes the primitives that
\texttt{(ikarus~foreign)} provides for direct manipulation of
memory. Section~\ref{sec:ffi-procedures} deals with loading
external libraries and calling out to native library procedures and
calling back into Scheme. To demonstrate the usefulness of the
foreign facilities, Ikarus ships with two libraries that also serve
as extended examples for using the system.
Section~\ref{sec:ffi-opengl} describes The OpenGL library
\texttt{(ikarus~opengl)} which allows the programmer to produce 2D
and 3D computer graphics. Section~\ref{sec:ffi-objc} describes the
\texttt{(ikarus~objc)} which allows the programmer to access
libraries and frameworks written in the Objective-C programming
language and thus provides full access to the Mac OS X system
(e.g.,~making graphical user interfaces with Cocoa and drawing
graphics with Quartz all from Scheme).
Ikarus version \texttt{0.0.4} is the first version of Ikarus to
support the described foreign interfaces.
\newpage
\section{\label{sec:ffi-overview}Overview}
In order to make full use of the computer, it is important for a
programming environment (e.g., Ikarus Scheme) to facilitate access
to the underlying architecture on which it runs. The underlying
architecture includes the API provided by the host operating system
kernel (e.g., Linux), the system libraries (e.g., \texttt{libc}),
and other site-installed libraries (e.g., \texttt{sqlite3}).
Providing direct access to such API from within Scheme allows the
programmer to write Scheme libraries that have few or no
dependencies on external programs (such as \texttt{C} development
toolchain). When dealing with system libraries, the programmer
must have a thorough understanding of many aspects of the targetted
system. This section attempts to provide answers to many questions
that are frequently encountered when interfacing to external
libraries.
\section{Memory management}
Ikarus Scheme is a managed environment. Like in many programming
environments, Ikarus manages its own memory. Scheme objects are
allocated in a special memory region (the Scheme heap) and have
type-specific object layout that allows the run time system to
distinguish object types and allows the garbage collector to locate
all potentially live objects and reclaim the memory of dead objects.
Scheme objects are also \emph{opaque} in the sense that the data
structures used to represent Scheme objects (e.g., pairs) are not
exposed to the programmer, who can only interact with objects
through an interface (e.g., \texttt{car}, \texttt{cdr}).
Unmanaged environments, such as the operating system on which Ikarus
runs, require that the programmer manages the allocation and
deallocation of system resources herself. Memory regions, file
handles, external devices, the screen, etc., are all examples of
resources whose management must be coordinated among the different
parts of the system, and this becomes the responsibility of the
programmer who is wiring the different subsystems together.
Memory, from a system's point of view, is \emph{transparent}. A
pointer is an integer denoting an address of memory. This memory
address may contain a value that requires interpretation. At the
lowest-level, each byte of memory contains eight bits, each of which
may be toggled on or off. A level higher, contiguous sequences of
bytes are grouped together and are interpreted as integers, floating
point numbers, or pointers to other memory addresses. These are the
basic data types that are often interpreted atomically. Yet a level
higher, groups of basic types form data structures such as arrays,
linked lists, trees, and so on. Objects, as found in
object-oriented programming languages, are at an even higher level
of abstraction since they are treated as opaque references that
retain state and know how to respond to messages.
The procedures in the \texttt{(ikarus~foreign)} library are meant to
provide a way to interface with the low level memory operations such
as setting and getting bytes from specific locations in memory.
Although they do not provide high-level operations, the basic
procdures make implementing high-level operations (such as the
Objective-C system presented in Chapter~\ref{chapter:objc})
possible. Programmers are encouraged to define their own
abstractions that are most suitable for the specific target library
rather than using the low-level operations directly. This results
in writing more robust and more easily maintainable libraries. To
put it more boldly: \textbf{Do not sprinkle your code with low-level
memory operations}.
\section{\label{sec:ffi-memory}Memory operations}
\defun{malloc}{procedure}
\texttt{(malloc n)}
The \texttt{malloc} procedure allocates \texttt{n} bytes of memory
and returns a pointer to the allocated memory. The \texttt{malloc}
Scheme procedure is implemented using the host-provided
\texttt{malloc} system procedure (often found in \texttt{libc}).
The number of bytes, \texttt{n}, must be a positive exact integer.
\begin{verbatim}
> (malloc 10)
#<pointer #x00300320>
> (malloc 10000)
#<pointer #x01800400>
\end{verbatim}
\newpage
\defun{free}{procedure}
\texttt{(free p)}
The \texttt{free} procedure takes a pointer and frees the memory
region at the given address. The memory region must be allocated
with \texttt{malloc}, \texttt{calloc}, or a similar system
procedure. Once freed, memory operations on the given address are
invalid and may cause the system to crash at unpredictable times.
Ikarus cannot check for such errors since the memory may be freed by
procedures that are external to Ikarus.
\defun{pointer->integer}{procedure}
\texttt{(pointer->integer p)}
The procedure \texttt{pointer->integer} converts the value of the
pointer \texttt{p} to an exact integer value. The result may be a
fixnum or a bignum depending on the pointer.
\defun{integer->pointer}{procedure}
\texttt{(integer->pointer n)}
The procedure \texttt{integer->pointer} converts the exact integer
\texttt{n} to a pointer value. The lower 32 bits (or 64 bits on
64-bit systems) of the value of \texttt{n} are significant in
computing the pointer value. It is guaranteed that
\texttt{(integer->pointer (pointer->integer p))} points to the same
address as \texttt{p}.
\defun{pointer?}{procedure}
\texttt{(pointer? x)}
The predicate \texttt{pointer?} returns \texttt{\#t} if the value
of \texttt{x} is a pointer, and returns \texttt{\#f} otherwise.
\BoxedText{Note:}{The result of calling the procedures
\texttt{eq?}, \texttt{eqv?} and \texttt{equal?} on pointer values is
unspecified.}
\newpage
\defun{pointer-set-c-char!}{procedure}
\texttt{(pointer-set-c-char! p i n)}
The procedure \texttt{pointer-set-c-char!} sets a single byte of memory
located at offset \texttt{i} from the pointer \texttt{p} to the
value of \texttt{n}. The pointer \texttt{p} must be a valid
pointer. The index \texttt{i} must be an exact integer. The value
of \texttt{n} must be an exact integer. Only the 8 lowermost
bits of \texttt{n} are used in the operation and the remaining bits
are ignored.
\defun{pointer-set-c-short!}{procedure}
\texttt{(pointer-set-c-short! p i n)}
The procedure \texttt{pointer-set-c-char!!} sets two bytes located at
offset \texttt{i} and \texttt{(+ i 1)} to the 16 lowermost bits of
the exact integer \texttt{n}. Note that the offset \texttt{i} is a
byte offset; \texttt{pointer-set-c-short!} does not perform any pointer
arithmetic such as scaling the offset by the size of the memory
location.
\defun{pointer-set-c-int!}{procedure}
\texttt{(pointer-set-c-int! p i n)}
The procedure \texttt{pointer-set-c-int!} sets four bytes located at
offset \texttt{i} to \texttt{(+ i 3)} to the 32 lowermost bits of
the exact integer \texttt{n}. Like \texttt{pointer-set-c-short!},
\texttt{pointer-set-c-int!} does not scale the offset \texttt{i}.
\defun{pointer-set-c-long!}{procedure}
\texttt{(pointer-set-c-long! p i n)}
On 64-bit systems, the procedure \texttt{pointer-set-c-long!} sets
eight bytes located at offset \texttt{i} to \texttt{(+ i 7)} to the
64 lowermost bits of the exact integer \texttt{n}. Like the
previous procedures, \texttt{pointer-set-c-long!} does not scale the
offset \texttt{i}. On 32-bit systems, \texttt{pointer-set-c-long!}
performs the same task as \texttt{pointer-set-c-int!}.
\defun{pointer-set-c-float!}{procedure}
\texttt{(pointer-set-c-float! p i fl)}
The procedure \texttt{pointer-set-c-float!} converts the Scheme
floating point number \texttt{fl} (represented in Ikarus as an
IEEE-754 double precision floating point number) to a float (an
IEEE-754 single precision floating point number) and stores the
result in the four bytes at offset \texttt{i} of the pointer
\texttt{p}.
\defun{pointer-set-c-double!}{procedure}
\texttt{(pointer-set-c-double! p i fl)}
The procedure \texttt{pointer-set-c-double!} stores the double
precision IEEE-754 floating point value of the Scheme flonum
\texttt{fl} in the eight bytes at offset \texttt{i} of the pointer
\texttt{p}.
\defun{pointer-set-c-pointer!}{procedure}
\texttt{(pointer-set-c-pointer! p i pv)}
On 64-bit systems, the procedure \texttt{pointer-set-c-pointer!} sets
eight bytes located at offset \texttt{i} to \texttt{(+ i 7)} to the
64-bit pointer value of \texttt{pv}. On 32-bit systems, the
procedure \texttt{pointer-set-c-pointer!} sets four bytes located at
offset \texttt{i} to \texttt{(+ i 3)} to the 32-bit pointer value of
\texttt{pv}. Like the previous procedures,
\texttt{pointer-set-c-pointer!} does not scale the offset \texttt{i}.
\defun{pointer-ref-c-signed-char}{procedure}
\texttt{(pointer-ref-c-signed-char p i)}
The procedure \texttt{pointer-ref-c-signed-char} loads a single byte located
at offset \texttt{i} from the pointer \texttt{p} and returns an
exact integer representing the sign-extended integer value of that
byte. The resulting value is in the range of $[-128, 127]$ inclusive.
\defun{pointer-ref-c-unsigned-char}{procedure}
\texttt{(pointer-ref-c-unsigned-char p i)}
The procedure \texttt{pointer-ref-c-unsigned-char} loads a single byte
located at offset \texttt{i} from the pointer \texttt{p} and returns
an exact integer representing the unsigned integer value of that
byte. The resulting value is in the range $[0, 255]$ inclusive.
The following example shows the difference between
\texttt{pointer-ref-c-signed-char} and
\texttt{pointer-ref-c-unsigned-char}.
\begin{verbatim}
> (let ([p (malloc 3)])
(pointer-set-c-char! p 0 #b01111111)
(pointer-set-c-char! p 1 #b10000000)
(pointer-set-c-char! p 2 #b11111111)
(let ([result
(list (pointer-ref-c-signed-char p 0)
(pointer-ref-c-signed-char p 1)
(pointer-ref-c-signed-char p 2)
(pointer-ref-c-unsigned-char p 0)
(pointer-ref-c-unsigned-char p 1)
(pointer-ref-c-unsigned-char p 2))])
(free p)
result))
(127 -128 -1 127 128 255)
\end{verbatim}
\defun{pointer-ref-c-signed-short}{procedure}
\texttt{(pointer-ref-c-signed-short p i)}
The procedure \texttt{pointer-ref-c-signed-short} loads two bytes
located at offsets \texttt{i} and \texttt{(+ i 1)} from the pointer
\texttt{p} and returns an exact integer representing the
sign-extended
integer value of the sequence. The resulting value is in the range
$[-32768, 32767]$ inclusive.
\defun{pointer-ref-c-unsigned-short}{procedure}
\texttt{(pointer-ref-c-unsigned-short p i)}
The procedure \texttt{pointer-ref-c-unsigned-short} loads two bytes
located at offsets \texttt{i} and \texttt{(+ i 1)} from the pointer
\texttt{p} and returns an exact integer representing the unsigned
integer value of the sequence. The resulting value is in the range
$[0, 65535]$ inclusive.
\newpage
\defun{pointer-ref-c-signed-int}{procedure}
\texttt{(pointer-ref-c-signed-int p i)}
The procedure \texttt{pointer-ref-c-signed-int} loads four bytes
starting at offset \texttt{i} of pointer \texttt{p} and returns an
exact integer in the range of $[-2^{31},2^{31}-1]$ inclusive.
\defun{pointer-ref-c-unsigned-int}{procedure}
\texttt{(pointer-ref-c-unsigned-int p i)}
The procedure \texttt{pointer-ref-c-unsigned-int} loads four bytes
starting at offset \texttt{i} of pointer \texttt{p} and returns an
exact integer in the range of $[0,2^{32}-1]$ inclusive.
\defun{pointer-ref-c-signed-long}{procedure}
\texttt{(pointer-ref-c-signed-long p i)}
On 64-bit systems, the procedure \texttt{pointer-ref-c-signed-long}
loads eight bytes starting at offset \texttt{i} of pointer
\texttt{p} and returns an integer in the range of
$[-2^{63},2^{63}-1]$ inclusive. On 32-bit systems, the procedure
\texttt{pointer-ref-c-signed-long} performs the same task as
\texttt{pointer-ref-c-signed-int}.
\defun{pointer-ref-c-unsigned-long}{procedure}
\texttt{(pointer-ref-c-unsigned-long p i)}
On 64-bit systems, the procedure \texttt{pointer-ref-c-unsigned-long}
loads eight bytes starting at offset \texttt{i} of pointer
\texttt{p} and returns an integer in the range of
$[0,2^{64}-1]$ inclusive. On 32-bit systems, the procedure
\texttt{pointer-ref-c-unsigned-long} performs the same task as
\texttt{pointer-ref-c-unsigned-int}.
\defun{pointer-ref-c-float}{procedure}
\texttt{(pointer-ref-c-float p i)}
The procedure \texttt{pointer-ref-c-float} returns the four-byte
float (represented as IEEE-754 single precision floating point
number) stored at offset \texttt{i} of the pointer \texttt{p}.
The value is extended to an IEEE-754 double precision floating
point number that Ikarus uses to represent inexact numbers.
\defun{pointer-ref-c-double}{procedure}
\texttt{(pointer-ref-c-double p i)}
The procedure \texttt{pointer-ref-c-double} returns the eight-byte
float (represented as IEEE-754 double precision floating point
number) stored at offset \texttt{i} of the pointer \texttt{p}.
\defun{pointer-ref-c-pointer}{procedure}
\texttt{(pointer-ref-c-pointer p i)}
The procedure \texttt{pointer-ref-c-pointer} returns the pointer
stored at offset \texttt{i} from the pointer \texttt{p}. The size
of the pointer (also the number of bytes loaded) depends on the
architecture: it is 4 bytes on 32-bit systems and 8 bytes on 64-bit
systems.
\section{\label{sec:foreign-objects}Accessing foreign objects from
Scheme}
\defun{dlopen}{procedure}
\texttt{(dlopen)}\\
\texttt{(dlopen library-name)}\\
\texttt{(dlopen library-name lazy? global?)}
The procedure \texttt{dlopen} takes a string \texttt{library-name}
represented a system library and calls the system procedure
\texttt{dlopen} which dynamically loads the given library into the
running process. The name of the library is system-dependent and
must include the appropriate suffix (e.g., \texttt{*.so} on Linux,
\texttt{*.dylib} on Darwin and \texttt{*.dll} on Cygwin). The
\texttt{library-name} may include a full path which identifies the
location of the library, or may be just the name of the library in
which case the system will lookup the library name using the
\texttt{LD\_LIBRARY\_PATH} environment variable.
The argument \texttt{lazy?} specifies how library dependencies are
loaded. If true, \texttt{dlopen} delays the resolution and loading
of dependent libraries until they are actually used. If false, all
library dependencies are loaded before the call to \texttt{dlopen}
returns.
The argument \texttt{global?} specifies how the scope of the symbols
exported from the loaded library. If true, all exported symbols
become part of the running image, and subsequent \texttt{dlsym}
calls may not need to specify the library from which the symbol is
loaded. If false, the exported symbols are not global and the
library pointer needs to be specified for \texttt{dlsym}.
Calling \texttt{(dlopen library-name)} is equivalent to
\texttt{(dlopen library-name \#f \#f)}. Calling \texttt{(dlopen)}
without arguments returns a pointer to the current process.
If succesful, \texttt{dlopen} returns a pointer to the external
library which can be used subsequently by \texttt{dlsym} and
\texttt{dlclose}. If the library cannot be loaded, \texttt{dlopen}
returns \texttt{\#f} and the procedure \texttt{dlerror} can be used
to obtain the cause of the failure.
Consult the \texttt{dlopen(3)} page in your system manual for
further details.
\defun{dlclose}{procedure}
\texttt{(dlclose library-pointer)}
The procedure \texttt{dlclose} is a wrapped around the system
procedure with the same name. It receives a library pointer
(e.g.,~one obtained from \texttt{dlopen}) and releases the resources
loaded from that library. Closing a library renders all symbols and
static data structures that the library exports invalid and the
program may crash or corrupt its memory if such symbols are used
after a library is closed.
Most system implementations of dynamic loading employ reference
counting for \texttt{dlopen} and \texttt{dlclose} in that library
resources are not freed until the number of calls to
\texttt{dlclose} matches the number of calls to \texttt{dlopen}.
The procedure \texttt{dlclose} returns a boolean value indicating
whether the success status of the operation. If \texttt{dlclose}
returns \texttt{\#f}, the procedure \texttt{dlerror} can be used to
obtain the cause of the error.
Consult the \texttt{dlclose(3)} page in your system manual for
further details.
\defun{dlerror}{procedure}
\texttt{(dlerror)}
If any of the dynamic loading operations (i.e., \texttt{dlopen},
\texttt{dlclose}, \texttt{dlsym}) fails, the cause of the error can
be obtained by calling \texttt{dlerror} which returns a string
describing the error. The procedure \texttt{dlerror} returns
\texttt{\#f} if there was no dynamic loading error.
Consult the \texttt{dlerror(3)} page in your system manual for
further details.
\defun{dlsym}{procedure}
\texttt{(dlsym library-pointer string)}
The procedure \texttt{dlsym} takes a library pointer (e.g., one
obtained by a call to \texttt{dlopen}) and a string representing the
name of a symbol that the library exports and returns a pointer to
the location of that symbol in memory. If \texttt{dlsym} fails, it
returns \texttt{\#f} and the cause of the error can be obtained
using the procedure \texttt{dlerror}.
Consult the \texttt{dlsym(3)} page in your system manual for
further details.
\section{\label{sec:callout}Calling out to foreign procedures}
Ikarus provides the means to call out from Scheme to foreign
procedures. This allows the programmers to extend Ikarus to access
system-specific facilities that is available on the host machine.
In order to call out to a foreign procedure, one must provide two
pieces of information: the signature of the foreign procedure (e.g.,
its type declaration if it is a \texttt{C} procedure) and the
address of the procedure in memory. The address of the procedure
can be easily obtained using \texttt{dlsym} if the name of the
procedure and its exporting library are known. The signature of the
procedure cannot, in general, be obtained dynamically, and therefore
must be hard coded into the program.
The signature of the foreign procedure is required for proper
linkeage between the Scheme system and the foreign system. Using
the signature, Ikarus determines how Scheme values are converted
into native values, and where (e.g., in which registers and stack
slots) to put these arguments. The signature also determines where
the returned values are placed and how they are converted from the
system data types to the corresponding Scheme data types.
A procedure's signature is composed of two parts: the return type
and the parameter types. The return type is a symbol that can be
any one of the type specifiers listed in
Figure~\ref{fig:foreign-types}, page~\pageref{fig:foreign-types}.
The parameter types is a list of type specifier symbols. The symbol
\texttt{void} can appear as a return type but cannot appear as a
parameter type.
\defun{make-c-callout}{procedure}
\texttt{((make-c-callout return-type parameter-types) native-pointer)}
The procedure \texttt{make-c-callout} is the primary facility for
making foreign procedures callable from Scheme. It works as
follows. First, \texttt{make-c-callout} receives two arguments
denoting the signature of the procedure to be called. It prepares a
bridge that converts from Scheme's calling conventions and data
structures to their foreign counterparts. It returns a procedure
$p_1$. Second, the procedure $p_1$ accepts a pointer to a foreign
procedure (e.g., one obtained from \texttt{dlsym}) and returns a
Scheme procedure $p_2$ that encapsulates the foreign procedure. The
final procedure $p_2$ can be called with as many arguments as the
ones specified in the \texttt{parameter-types}. The parameters
supplies to $p_2$ must match the types supplied as the
\texttt{parameter-types} according to the ``Valid Scheme types''
column in the table in Figure~\ref{fig:foreign-types}. The
procedure $p_2$ converts the parameters from Scheme types to native
types, calls the foreign procedure, obtains the result, and converts
it to the appropriate Scheme value (depending on the
\texttt{return-type}).
The interface of \texttt{make-c-callout} is broken down into three
stages in order to accomodate common usage patterns. Often types, a
function signature can be used by many foreign procedures and
therefore, \texttt{make-c-callout} can be called once per signature
and each signature can be used multiple times. Similarly,
separating the foreign procedure preparation from parameter passing
allows for preparing the foreign procedure once and calling it many
times.
The types listed in Figure~\ref{fig:foreign-types} are restricted to
basic types and provide no automatic conversion from composite
Scheme data structures (such as strings, symbols, vectors, and
lists) to native types. The restriction is intentional in order for
Ikarus to avoid making invalid assumptions about the memory
management of the targeted library. For example, while Ikarus
\emph{can} convert a Scheme string to a native byte array (e.g.,
using \texttt{string->bytevector} to decode the string, then using
\texttt{malloc} to allocate a temporary buffer, and copying the
bytes from the bytevector to the allocated memory), it cannot decide
when this allocated byte array is no longer needed and should be
freed. This knowledge is library-dependent and is often
procedure-dependent. Therefore, Ikarus leaves it to the programmer
to manage all memory related issues.
Outgoing parameters to foreign procedures are checked against the
declared types. For example, if a callback is prepared to expect a
parameter of type \texttt{signed-int}, only exact integers are
allowed to be passed out. For integer types, only a fixed number of
bits is used and the remaining bits are ignored. For floating point
types, the argument is checked to be a Scheme flonum. No implicit
conversion between exact and inexact numbers is performed.
{
\begin{figure}[b!]
\begin{center}
\begin{tabular}{@{}llll@{}}
\hline
Type specifier & Size & Valid Scheme types & Corresponding \texttt{C} types\\
\hline
\texttt{signed-char} & 1 byte & exact integer & \texttt{char}\\
\texttt{unsigned-char} & 1 byte & exact integer & \texttt{unsigned char}\\
\texttt{signed-short} & 2 bytes & exact integer & \texttt{short}\\
\texttt{unsigned-short} & 2 bytes & exact integer & \texttt{unsigned short}\\
\texttt{signed-int} & 4 bytes & exact integer & \texttt{int}\\
\texttt{unsigned-int} & 4 bytes & exact integer & \texttt{unsigned int}\\
\texttt{signed-long} & 4/8 bytes & exact integer & \texttt{long}\\
\texttt{unsigned-long} & 4/8 bytes & exact integer & \texttt{unsigned long}\\
\texttt{float} & 4 bytes & flonum & \texttt{float}\\
\texttt{double} & 8 bytes & flonum & \texttt{double}\\
\texttt{pointer} & 4/8 bytes & pointer &
\texttt{void*}, \texttt{char*}, \texttt{int*}, \texttt{int**}, \\
&&& \texttt{int(*)(int,int,int)}, etc. \\
\texttt{void} & -- & -- & \texttt{void}\\
\hline
\end{tabular}
\end{center}
\caption{\label{fig:foreign-types}The above table lists valid type
specifiers that can be used in callout and callback signatures.
Specifiers with ``4/8 bytes'' have size that depends on the system:
it is 4 bytes on 32-bit systems and 8 bytes on 64-bit systems. The
\texttt{void} specifier can only be used as a return value
specifier to mean ``no useful value is returned''.}
\end{figure}
}
The following example illustrates the use of the
\texttt{make-c-callout} procedure in combination with \texttt{dlopen}
and \texttt{dlsym}. The session was run on a 32-bit Ikarus running
under Mac OS X 10.4. First, the \texttt{libc.dylib} foreign library
is loaded and is bound to the variable \texttt{libc}. Next, we
obtain a pointer to the \texttt{atan} foreign procedure that is
defined in \texttt{libc}. The native procedure \texttt{atan} takes
a \texttt{double} as an argument and returns a \texttt{double} and
that's the signature that we use for \texttt{make-c-callout}.
Finally, we call the foreign procedure interface with one argument,
\texttt{1.0}, which is a flonum and thus matches the required
parameter type. The native procedure returns a \texttt{double}
value which is converted to the Scheme flonum with value
\texttt{0.7853981633974483}.
\begin{verbatim}
> (import (ikarus foreign))
> (define libc (dlopen "libc.dylib"))
> libc
#<pointer #x00100770>
> (define libc-atan-ptr (dlsym libc "atan"))
> libc-atan-ptr
#<pointer #x9006CB1F>
> (define libc-atan
((make-c-callout 'double '(double)) libc-atan-ptr))
> libc-atan
#<procedure>
> (libc-atan 1.0)
0.7853981633974483
> (libc-atan 1)
Unhandled exception
Condition components:
1. &assertion
2. &who: callout-procedure
3. &message: "argument does not match type double"
4. &irritants: (1)
\end{verbatim}
\section{\label{sec:callback}Calling back to Scheme}
In order to provide full interoperability with native procedures,
Ikarus allows native procedures to call back into Scheme just as it
allows Scheme to call out to native procedures. This is important
for many system libraries that provide graphical user interfaces
with event handling (e.g., Cocoa, GTK+, GLUT, etc.), database
engines (e.g., libsqlite, libmysql, etc.), among others.
The native calling site for the call back is compiled with a
specific callback signature encoding the expected parameter types
and return type. Therefore, a Scheme procedure used for a call back
must be wrapped with a proper adapter that converts the incoming
parameters from native format to Scheme values as well as convert
the value that the Scheme procedure returns back to native format.
The signature format is similar to the one used for call outs (see
Figure~\ref{fig:foreign-types} on page~\pageref{fig:foreign-types} for
details).
\defun{make-c-callback}{procedure}
\texttt{((make-c-callback return-type parameter-types) scheme-procedure)}
The procedure \texttt{make-c-callback} is similar to the procedure
\texttt{make-c-callout} except that it provides a bridge from native
procedures back into Scheme. While the procedure
\texttt{make-c-callout} takes a native pointer and returns a Scheme
procedure, \texttt{make-c-callback} takes a Scheme procedure and
returns a native pointer. The native pointer can be called by
foreign procedures. The native parameters are converted to Scheme
data (according to \texttt{parameter-types}), the Scheme procedure
is called with these parameters, and the returned value is converted
back into native format (according to \texttt{return-type}) before
control returns to the native call site.
Note that the native procedure pointer obtained from
\texttt{make-c-callback} is indistinguishable from other native
procedures that are obtained using \texttt{dlsym} or similar means.
In particular, such native pointers can be passed to
\texttt{make-c-callout} resulting in a Scheme procedure that calls out
to the native procedure that in turn calls back into Scheme. The
following segment illustrates a very inefficient way of extracting
the lowermost 32 bits from an exact integer.
\begin{verbatim}
> (format "~x"
(((make-c-callout 'unsigned-int '(unsigned-int))
((make-c-callback 'unsigned-int '(unsigned-int))
values))
#xfedcba09876543210fedcba09876543210))
"76543210"
\end{verbatim}
\BoxedText{Caveat emptor:}{Preparing each call out and call back
procedure leaks a small amount of memory. This is because the
system cannot track such pointers that go into native code
(which may retain such pointers indefinitely). Use judiciously.}
\nocite{ghuloum-implicit}
\nocite{ghuloum-generation}
\appendix
\chapter{Missing Features}
Ikarus does not fully conform to \rnrs{6} yet. Although it
implements most of \rnrs{6}'s macros and procedures, some are still
missing. This section summarizes the set of missing features and
procedures.
\begin{itemize}
\item \texttt{number->string} does not accept the third argument
(precision). Similarly, \texttt{string->number} and the reader do
not recognize the \texttt{|p} notation.
\item The following procedures are missing from \texttt{(rnrs arithmetic
bitwise)}:
\begin{Verbatim}
bitwise-reverse-bit-field bitwise-rotate-bit-field
\end{Verbatim}
\item The following procedures are missing from \texttt{(rnrs arithmetic
fixnum)}:
\begin{Verbatim}
2007-11-22 14:26:54 -05:00
fxreverse-bit-field fxrotate-bit-field
\end{Verbatim}
\item The following procedures are missing from \texttt{(rnrs hashtables)}:
\begin{Verbatim}
equal-hash
\end{Verbatim}
\item The following procedures are missing from \texttt{(rnrs io ports)}:
\begin{Verbatim}
make-custom-binary-input/output-port
make-custom-textual-input/output-port
open-file-input/output-port
\end{Verbatim}
\end{itemize}
\newpage
\backmatter
%\appendix
\phantomsection
%\addcontentsline{toc}{chapter}{Bibliogaraphy}
\addcontentsline{toc}{chapter}{\bibname}
\bibliographystyle{plain}
\bibliography{ikarus-scheme-users-guide}
\newpage
\phantomsection
\addcontentsline{toc}{chapter}{Index}
\printindex
\end{document}