vx-scheme/src/vx-scheme.h

1224 lines
38 KiB
C++

//----------------------------------------------------------------------
// vx-scheme : Scheme interpreter.
// Copyright (c) 2002,2003,2006 and onwards Colin Smith.
//
// You may distribute under the terms of the Artistic License,
// as specified in the LICENSE file.
//
// vx-scheme.h : class definitions
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <limits.h>
#ifndef WIN32
#include <unistd.h>
#else
#include <io.h>
// We need to do bit manipulations on pointers in order to
// implement our storage model (garbage collection bits, etc.)
// MSVC quite properly complains about this, but since it's
// necessary in this case we squelch the warnings.
#pragma warning (disable : 4311)
#pragma warning (disable : 4312)
#endif
#if __GNUG__ >= 3
using namespace std; // so sue me
#endif
#if defined (__GNUC__)
// Statically allocated cells must lie upon an 8-byte
// boundary, so that the lower three bits of pointers
// to such objects are free for our use.
#define ALIGN8 __attribute__ ((aligned (8)))
#define PACKED __attribute__ ((packed))
#elif defined (WIN32)
#define PACKED
#define ALIGN8 __declspec(align(8))
#else
#error "must have a way of aligning Cells to 8-byte boundary"
#endif
class OS;
class Cell;
class Slab;
class Context;
// OS abstraction layer
class OS
{
public:
static double get_time(); // get timestamp
static bool interactive (int fd); // terminal input?
// supply value for undef symbol
static Cell * undef (Context *, const char *);
// report exception and restart
static void exception (const char *);
// manage debug flags
static unsigned int flags ();
static bool flag (int bit)
{ return (flags () & bit) != 0; }
};
typedef Cell * (* subr_f) (Context * ctx, Cell * arglist);
typedef void (* magic_set_f) (Context *, void * key, Cell * rhs);
typedef Cell * (* magic_get_f) (Context *, void * key);
extern Cell * nil;
extern Cell * unspecified;
extern Cell * unassigned;
extern Cell * unimplemented;
void error (const char *, const char * = 0);
// FOR_EACH is a macro that can be used to traverse a standard Scheme
// list. The variable `var' is bound for the duration of the traversal
// to each node in the list.
#define FOR_EACH(var,list) \
for (Cell * var = list; var != nil; var = Cell::cdr (var))
#define INTERN_SYM(sym, symname) \
psymbol sym = intern (symname);
class cellvector
{
public:
// Acquire from Freelist
static cellvector* alloc(int size);
static cellvector* alloc(int size, int allocate);
// Return to freelist
void free();
cellvector (int size = 0);
cellvector (int size, int alloc);
~cellvector ();
Cell * get(int ix)
{ if (ix < 0 || ix >= sz) vref_error (); return v [ix]; }
void set (int, Cell *);
// used when you know the reference is in bounds.
Cell *get_unchecked(int ix) { return v[ix]; }
void set_unchecked(int ix, Cell* c) { v[ix] = c; }
Cell *& operator [] (int);
Cell * top ()
{ if (sz <= 0) vref_error (); return v [sz-1]; }
void push (Cell * c)
{ if (sz == allocated) expand (); v [sz++] = c; }
Cell * pop ()
{ if (sz <= 0) vref_error (); return v [--sz]; }
Cell * shift ();
void unshift (Cell *);
int size () {return sz;}
void discard (int n = 1)
{ if (n < 0 || n > sz) vref_error (); sz -= n; }
void clear();
private:
void make_cv (int size, int alloc);
void expand ();
void vref_error ();
int sz;
int allocated;
friend class Context; // Context::gc needs to see our gc_* members
int gc_index;
union {
Cell * gc_uplink;
cellvector* next_free;
};
Cell ** v;
// Freelist. We keep allocated storage for "short" cellvectors.
static const int keep_size = 4;
static const int keep_count = 100;
static cellvector* freelist_head[keep_size+1];
static int freelist_count[keep_size+1];
};
// The symbol table is implemented as an AVL tree of these nodes.
// There's no repetition, so the address of one of these nodes can
// serve as a unique hashcode for a symbol for equality-testing
// purposes. There's one call, intern(), for introducing a new
// string to the collection.
//
// The Scheme standard, however, introduces one complication: the
// requirement that symbols be stored in a "standard case." This
// is in conflict with our desire to have case-sensitive symbol
// matching (for integration with underlying symbol tables). (Scheme
// also provides the primitive string->symbol, which can be used to
// create symbols outside of standard case, but the REPL is not
// expected to use this.)
//
// In the end I decided to spend some extra memory to achieve standard
// compliance and VxWorks symbol table integration at the same time.
// We choose to consider lower-case symbols as "canonical". (The standard
// says we must choose upper or lower case, but not which one). In the event
// that a symbol arrives in mixed case, we store it both ways: canonically
// (that is, with lowered case) for Scheme symbol lookup, and unmolested
// so that, when we try the VxWorks symbol table after all else has failed,
// we can respect the case of the sybmol as written.
typedef struct _symbol
{
struct _symbol * llink; // Left binary tree link */
struct _symbol * rlink; // Right binary tree link */
const char * key; // Search key (symbol name) */
const char * truename; // case-sensitive name, if diff. */
cellvector * plist; // property list */
short b; // Balance factor */
} symbol, *psymbol;
psymbol intern (const char * name);
psymbol intern_stet (const char * name);
Cell * vector_from_list (Context * ctx, Cell *);
Cell * vector_to_list (Context * ctx, Cell *);
// ------------------------------------------------------------------------
// class sio: input/output behavior we expect from strings or streams.
// interface class.
class sio
{
public:
virtual ~sio() {}
virtual int get() = 0;
virtual int peek() = 0;
virtual void unget() = 0;
virtual void ignore() = 0;
};
// ------------------------------------------------------------------------
// class file_sio: This wraps a FILE* into an object that answers to
// the above interface.
class file_sio : public sio
{
public:
file_sio (FILE * _fp) : fp (_fp), lastch (-1) {};
virtual int get () { return lastch = fgetc (fp); }
virtual int peek () { int c = get (); ungetc (c, fp); return c; }
virtual void unget () { ungetc (lastch, fp); }
virtual void ignore () { get (); }
private:
FILE * fp;
int lastch;
};
// ------------------------------------------------------------------------
// An "sstring" is a simple extensible string. It reallocates storage
// as necessary to support arbitrary growth. It is a poor cousin to
// STL's string, but with considerably less code-bloat since there's no
// template expansion or nontrivial inlining.
//
// In order to avoid involving the strstream class, we also extend
// our sstream with a small amount of I/O semantics. This allows
// sstrings to be passed to the lexical analyzer.
class sstring : public sio
{
public:
sstring ();
virtual ~sstring ();
char * str ()
{ return base; }
char & operator [] (size_t ix)
{ return base [ix]; }
void append (const char *);
void append (const char *, size_t len);
void append (const char);
size_t length ()
{ return sz; }
void claim (); // claim dynamic storage
bool operator == (const char * s)
{ return !strcmp (base, s); }
// I/O behavior
int get ();
int peek ();
bool eof ();
void unget ();
void ignore ();
private:
static const int stat_size = 32;
char c [stat_size];
size_t sz;
size_t alloc;
char * base;
char * end;
char * pos; // I/O read position
bool claimed;
};
//----------------------------------------------------------------------
// class Cell
//
// The Cell is the heart of the Scheme implementation. It is the
// universal container for all Scheme data types and also the central
// structure supporting Scheme's garbage-collected memory model.
// The economy of a Cell's realization is the single most significant
// factor influencing the speed and space efficiency of a Scheme
// system (with the possible exception of compilation, beyind the
// scope of this header file).
//
// We consider it imperative that an ordinary cell be no larger than
// two machine pointers (car and cdr); if a data object requires
// more storage than this, we allocate extension words.
//
// For our implementation, we expect that a machine pointer is at
// least four bytes, so that two of these (car,cdr) will occupy
// eight bytes. In consequence, we may therefore insist that the
// storage for cells be 8-byte aligned, which gives us three bits at
// the least-significant end of a cell pointer to use as type-tagging
// information.
class Cell
{
friend class Context;
friend class Slab;
friend class InterpreterExt;
public:
typedef Cell * ptr;
void display (FILE *);
void write(FILE *) const;
void write(sstring&) const;
bool eq (Cell * c);
bool eqv (Cell * c)
{
return eq (c);
}
bool equal (Cell * c);
bool is_symbol (psymbol s)
{
return type () == Cell::Symbol && SymbolValue () == s;
}
struct Procedure
{
Procedure (Cell * _envt, Cell * _body, Cell * _arglist)
: body (_body),
arglist (_arglist),
envt (_envt)
{}
Procedure ()
: body (nil),
arglist (nil),
envt (nil)
{}
Cell * body;
Cell * arglist;
Cell * envt;
};
// Certain cells we have heard of
ALIGN8 static Cell Nil;
ALIGN8 static Cell Unspecified;
ALIGN8 static Cell Unassigned;
ALIGN8 static Cell Eof_Object;
ALIGN8 static Cell Bool_T;
ALIGN8 static Cell Bool_F;
ALIGN8 static Cell Apply;
ALIGN8 static Cell Error;
ALIGN8 static Cell Halt;
ALIGN8 static Cell Unimplemented;
// Access/Mutate Cons Cells. These are checked calls, in
// that they will verify that they are traversing a set of
// cons cells at each step, using "assert_cons", which
// throws a C++ exception if this is not found to be true.
static void setcar (Cell * c, Cell * car)
{ atomic (c) ? notcons() : (c->ca.p = car); }
static void setcdr (Cell * c, Cell * cdr)
{ atomic (c) ? notcons() : (c->cd.p = cdr); }
static Cell * car (const Cell * c)
{ return atomic (c) ? notcons() : c->ca.p; }
static Cell * cdr (const Cell * c)
{ return atomic (c) ? notcons() : c->cd.p; }
static Cell * caar (Cell * c);
static Cell * cadr (Cell * c);
static Cell * cdar (Cell * c);
static Cell * cddr (Cell * c);
static Cell * caaar (Cell * c);
static Cell * caadr (Cell * c);
static Cell * cadar (Cell * c);
static Cell * caddr (Cell * c);
static Cell * cdaar (Cell * c);
static Cell * cdadr (Cell * c);
static Cell * cddar (Cell * c);
static Cell * cdddr (Cell * c);
static Cell * caaaar (Cell * c);
static Cell * caaadr (Cell * c);
static Cell * caadar (Cell * c);
static Cell * caaddr (Cell * c);
static Cell * cadaar (Cell * c);
static Cell * cadadr (Cell * c);
static Cell * caddar (Cell * c);
static Cell * cadddr (Cell * c);
static Cell * cdaaar (Cell * c);
static Cell * cdaadr (Cell * c);
static Cell * cdadar (Cell * c);
static Cell * cdaddr (Cell * c);
static Cell * cddaar (Cell * c);
static Cell * cddadr (Cell * c);
static Cell * cdddar (Cell * c);
static Cell * cddddr (Cell * c);
// "Boxes" to hold things related to atoms that won't fit in a cell.
// We need one of these whenever the atom has two words or more of
// data. They are allocated from the heap and are freed when a gc'd
// atom is finalized.
struct SubrBox
{
subr_f subr;
const char * name;
};
struct MagicBox
{
void* key;
magic_set_f set_f;
magic_get_f get_f;
};
// We store length with strings. When these are allocated we
// preallocate the string space; freeing this object discards
// both box and string.
struct StringBox
{
size_t length;
char s[1];
};
// Value extractors
int IntValue () const;
char CharValue () const;
SubrBox * SubrValue () const;
char * StringValue () const;
size_t StringLength () const;
FILE * IportValue () const;
FILE * OportValue () const;
void * ContValue () const;
cellvector * VectorValue () const;
cellvector * CProcValue () const;
Cell * PromiseValue () const;
psymbol SymbolValue () const;
psymbol BuiltinValue () const;
Procedure LambdaValue () const;
double RealValue () const;
const char * name () const;
// unsafe accessors: use when you have prior knowledge that the
// cell contains an atom of the proper type.
cellvector * unsafe_vector_value() const {
return cd.cv;
}
static void real_to_string (double, char *, int);
double asReal () const {
if (type () == Cell::Int)
return (double) IntValue();
else
return RealValue();
}
// In scheme, the only two values of type `boolean' are #t and
// #f. However, from the point of view of truth valuation,
// anything other than #f is considered `true'. We follow the
// Scheme standard strictly, and so do not consider nil to have
// a false connotation as it would in other dialects of Lisp.
bool isBoolean () {return this == &Bool_T || this == &Bool_F;}
bool istrue () {return this != &Bool_F; }
bool ispair();
static Cell * untagged (Cell *);
// Utilities
int length ()
{
int i = 0;
FOR_EACH (p, this)
++i;
return i;
}
class List
{
public:
List () : h (&Nil), t (&Nil)
{}
void append (Cell * c)
{
if (t == &Nil)
h = t = c;
else
{
Cell::setcdr (t, c);
t = c;
}
}
void append_list(Cell* list_head, Cell* list_tail) {
if (h == &Nil) {
h = list_head;
t = list_tail;
} else {
Cell::setcdr(t, list_head);
t = list_tail;
}
}
Cell * head () { return h; }
Cell * tail () { return t; }
private:
Cell * h; // head
Cell * t; // tail
};
void list_append (Cell *& head, Cell *& tail)
{
if (tail == &Nil)
{
head = tail = this;
}
else
{
setcdr (tail, this);
tail = this;
}
}
static void stats ();
static void sanity_check ();
enum Type
{
//------------
Int = 0, // The Atoms...
Symbol = 1,
Unique = 2,
String = 3,
Real = 4,
Subr = 5,
Lambda = 6,
Vec = 7,
Char = 8,
Iport = 9,
Oport = 10,
Promise = 11,
Cont = 12,
Builtin = 13,
Magic = 14,
Insn = 15,
Cproc = 16,
Cpromise = 17,
NUM_ATOMS = 18,
//------------
Cons = NUM_ATOMS, // A cell.
NUM_TYPES = Cons + 1
//------------
};
// If the ATOM bit is clear, it's a cons. Otherwise, the type
// is stored in the TYPEBITS field, unless it's a short integer.
Type type () const
{
if (short_atom (this))
return Int;
return (Type) (((ca.i & (ATOM|SHORT)) == ATOM)
? ((ca.i >> TAGBITS) & TYPEMASK)
: Cons);
}
void typecheck (Type t) const
{
if (type () != t)
typefail (type (), t);
}
bool macro () const
{
return flag (MACRO);
}
private:
static inline bool short_atom (const Cell * c)
{ return (reinterpret_cast <unsigned int> (c) & (ATOM|SHORT))
== (ATOM|SHORT); }
static inline bool long_atom (const Cell* c)
{ return (reinterpret_cast <unsigned int> (c) & (ATOM|SHORT)) == ATOM; }
static inline bool atomic (const Cell * c)
{ return short_atom (c) || ((c->ca.i & (ATOM|SHORT)) == ATOM); }
void gc_set_car (Cell *);
void gc_set_cdr (Cell *);
static Cell * notcons ();
Cell ()
{
ca.p = cd.p = &Nil;
}
Cell (const char * unique_name)
{
ca.i = 0;
set_type (Unique);
cd.u = unique_name;
}
void typefail (Type t1, Type t2) const;
// The lowest order three bits of a pointer are called the
// tagbits. They are always free for our use, since a cell
// consists of two words, each at least 32 bits, with the
// natural alignment (8 bytes for a 32-bit machine).
static const unsigned int TAGBITS = 3;
static const unsigned int ATOM = 0x1;
static const unsigned int MARK = 0x2;
static const unsigned int SHORT = 0x4;
static const unsigned int TYPEBITS = 5;
static const unsigned int TYPEMASK = (1 << TYPEBITS) - 1;
static const unsigned int TAGMASK = (1 << TAGBITS) - 1;
// Make sure flag bits are disjoint from TYPE and TAG bits.
static const unsigned int FLAGBASE = 1 << (TYPEBITS + TAGBITS);
static const unsigned int FORCED = FLAGBASE;
static const unsigned int QUICK = FLAGBASE << 1;
static const unsigned int GLOBAL = FLAGBASE << 2;
static const unsigned int MACRO = FLAGBASE << 3;
static const unsigned int VREF = FLAGBASE << 4;
static const unsigned int FREE = FLAGBASE << 5;
static const unsigned int FLAGBITS = 6;
static const int GLOBAL_ENV = -1;
// Warning! The virtual machine instructions use the upper
// 16 bits of the car for the opcode, and count field,
// so space for types, tags, and flags is limited to 16 bits.
#if TAGBITS + TYPEBITS + FLAGBITS > 16
#error too many atom bits used
#endif
inline int e_skip () {
// If global symbol, return -1. Else number of environments
// to skip is in highest-order byte
return (ca.i & GLOBAL) ? GLOBAL_ENV
: (int)((ca.i >> (8*(sizeof(ca.i)-1))) & 0xff);
}
inline int b_skip () {
// If global symbol, number of bindings to skip is in upper 16
// bits; else, it's in 2nd-highest-order byte
return (ca.i & GLOBAL) ? (ca.i >> (8*(sizeof(ca.i)-2)) & 0xffff)
: ((ca.i >> (8*(sizeof(ca.i)-2))) & 0xff);
}
void set_lexaddr (int e_skip, int b_skip) {
// If global, set flag and store b_skip in upper 16 bits.
// Else set e_skip in upper 8 bits, and set b_skip in
// next 8 bits.
const int start_bit = 8*(sizeof(ca.i)-2);
const int two_bytes = (1 << 16) - 1;
ca.i &= ~(two_bytes << start_bit);
if (e_skip == -1)
ca.i |= (b_skip << start_bit) | GLOBAL | QUICK;
else
ca.i |= ((e_skip << 8 | b_skip) << start_bit) | QUICK;
}
// The set of bits which should be ignored when
// comparing two cells in the sense of "eq?". We ignore the
// pieces having to do with lexical addresses.
static const unsigned int IGNORE = QUICK | GLOBAL | (~0 << 16);
static const unsigned int IGN_MASK = ~IGNORE;
static const char * typeName [NUM_TYPES];
static int typeCount [NUM_TYPES];
void flag (unsigned int f, bool b)
{
if (b)
ca.i |= f;
else
ca.i &= ~f;
}
void dump (FILE *);
bool flag (unsigned int f) const
{
// only non-short atoms can have flags. All requested bits must be set
return (ca.i & (f | SHORT | ATOM)) == (f | ATOM);
}
void set_type (Type t)
{
if (t != Cons)
ca.i |= (t << TAGBITS) | ATOM;
++typeCount [t];
}
// The actual data for an Atom/Cell is here.
union _car
{
unsigned int i;
Cell * p;
} ca;
union _cdr
{
unsigned int i;
double * d;
Cell * p;
const char * u;
SubrBox * f;
MagicBox * m;
StringBox * s;
psymbol y;
Cell * e;
cellvector * cv;
FILE * ip;
FILE * op;
char c;
void * vp;
void * j;
} cd;
};
//----------------------------------------------------------------------
// class Environment
//
// At the simplest level, an Environment is a mapping from symbols
// to values. Symbols are the hash codes maintained by the SymbolTable
// class, and the value of any symbol is simply a pointer to a Scheme
// cell. To implement this simple data structure, we use an STL vector
// of <symbol, Cell*> pairs. This choice of data structure is guided
// by some particularities of evaluation in Scheme (discussed below).
//
// Environments are created by binding constructs (like let and lambda),
// and a new environment is always linked to the environment in force
// when it was created (this is called the "enclosing environment").
// The enclosure chain always terminates at the global environment, which
// is where the symbols representing the language's standard features
// are bound.
//
// In Scheme, all variables are "lexically bound." This means that
// when a variable is mentioned in source code, one can determine the
// binding for that variable at "compile time" by looking through the
// stack of bindings crated by special forms capable of creating such
// bindings (e.g., lambda, let, et al.). The innermost matching binding
// found represents the storage for the value of the variable, and
// this can never change.
//
// This binding model creates the possibility of lexcial addressing, a
// system in which a variable reference can be replaced by the "index"
// of the storage in terms of the number of enclosing environments
// that must be traversed together with the index of the target
// variable within that environment. This represents an extremely
// efficient shortcut for variable value lookup. This is why we
// choose the vector data structure rather than an STL map: while a
// vector is slower to search the first time a variable is referenced,
// that initial search will reveal the "lexical address" of the
// variable, which we can then store in place of the referring symbol.
// It is therefore necessary that variable storage in an environment
// never move, once allocated. The simplest way to guarantee this is
// to manage the bindings ourselves in a vector; the lexical address
// can then be stored in the simple form of two integers and does not
// depend on peculiarities of the data-structure implementation.
//
// We overload the concept of Environment with other data needed to
// evaluate Scheme expressions. For example, Scheme I/O primitives
// like `with-input-from-file' provide for the presence of a stack
// of open files which we maintain in this structure.
class Context
{
public:
friend class Cell;
friend class Slab;
friend class VmLibExtension;
Context ();
// Argument and environment manipulation for the VM.
Cell * extend (Cell * env);
Cell * extend (Cell * env, Cell * blist);
Cell * extend_from_vector (Cell * env, cellvector * cv, int n);
void adjoin (Cell * env, Cell * val);
Cell * pop_list (int n);
int push_list (Cell*);
// "Binding" is the process of asserting a value for a
// variable in the given environment. That is, we do
// not search upward in the enclosure chain for an
// existing binding; we create one in the current environement.
// (The contrast is with `set', which does perform such
// a search.
void bind (Cell * env, Cell * c, Cell * value);
void bind_arguments (Cell * env, Cell * vars, Cell * values);
void bind_subr (const char * name, subr_f subr);
Cell * find_var (Cell * env, psymbol var, unsigned int* index);
void set_var (Cell * env, psymbol var, Cell * value) {
set_var(env, var, value, 0);
}
void set_var (Cell * env, psymbol var, Cell * value, unsigned int* index);
void set_var (psymbol var, Cell * value, unsigned int* index) {
set_var(root_envt, var, value, index);
}
// When new bindings are created, the existing environment
// is _extended_ with a vector of new {variable,value} bindings
// provided in parallel-list form.
// Getting and Setting values in an environment is slightly
// different from binding: `get' will search the enclosure
// chain if necessary, returning the innermost matching binding.
// Set does the same. Both of these will signal an error if
// a binding cannot be found (they will not establish one: only
// bind can do that).
Cell * get (Cell * env, Cell * c);
void set (Cell * env, Cell * var, Cell * value);
// root : find the "root" (i.e., parentless) environment
// which contains this one.
bool read_eval_print (FILE * in, FILE * out, bool);
Cell * root () { return root_envt; }
void gc ();
void gc_if_needed ();
void print_mem_stats (FILE *);
// "Switching" evaluator: calls the interpreter to evaluate if
// present; else the compiler.
Cell * eval (Cell* form);
// Returns true if we are using the bytecode VM.
bool using_vm() const;
// Interpreting evaluator
Cell* interp_evaluator(Cell* form);
Cell* (Context::*interp_eval)(Cell* form);
// VM for compiled code.
// It might not be linked in, in an interpreter-only
// build. The function pointer is used to connect it
// if it is present.
Cell * execute (Cell* form, Cell* args);
Cell * (Context::*vm_execute)(Cell* form, Cell* args);
Cell * vm_evaluator(Cell* form);
Cell * (Context::*vm_eval)(Cell* form);
// Convert text to live cells
Cell * read (sio &);
Cell * read (FILE *);
// Manufacture Cells and Atoms
Cell * make ();
Cell * make_int (int i);
Cell * make_char (char ch);
Cell * make_real (double d);
Cell * make_string (size_t len);
Cell * make_string (int len, char ch);
Cell * make_string (const char * s);
Cell * make_string (const char * s, size_t len);
Cell * make_subr (subr_f s, const char * name);
Cell * make_builtin (psymbol y);
Cell * make_symbol (psymbol y);
Cell * make_boolean (bool b);
Cell * make_vector (int n, Cell * init = &Cell::Unspecified);
Cell * make_iport (const char * fname);
Cell * make_iport (FILE *);
Cell * make_oport (const char * fname);
Cell * make_oport (FILE * op);
Cell * make_procedure (Cell * env, Cell * body, Cell * arglist);
Cell * make_promise (Cell * env, Cell * body);
Cell * make_macro (Cell * env, Cell * body, Cell * arglist);
Cell * make_magic (void *, magic_set_f, magic_get_f);
Cell * make (Cell * ca, Cell * cd = &Cell::Nil);
Cell * make_list1 (Cell *);
Cell * make_list2 (Cell *, Cell *);
Cell * make_list3 (Cell *, Cell *, Cell *);
Cell * make_instruction (Cell *insn);
Cell * make_instruction (int opcode, Cell *operands);
Cell * make_compiled_procedure (Cell * insns, Cell * literals,
Cell * envt, int start);
Cell * make_compiled_promise(Cell* procedure);
Cell * force_compiled_promise(Cell* promise);
Cell * make_continuation ();
void load_continuation (Cell * cont);
void print_insn(int pc, Cell* insn);
Cell* write_compiled_procedure (Cell * arglist);
Cell* load_compiled_procedure(struct vm_cproc*);
Cell* load_instructions(vm_cproc*);
Cell * cons (Cell * _car, Cell * _cdr) { return make (_car, _cdr); }
// ------------------------------------------------------------
void with_input (const char * fname)
{
istack.push (make_iport (fname));
}
void with_output (const char * fname)
{
ostack.push (make_oport (fname));
}
void without_output ()
{
fflush (ostack.pop ()->OportValue ());
}
void without_input ()
{
istack.pop ();
}
Cell * current_output () {return ostack.top ();}
Cell * current_input () {return istack.top ();}
// Protection from garbage collection (cell pointers not contained
// in "register machine" variables need to be treated this way.
// The variables are protected/unprotected in strict LIFO order.
Cell * gc_protect (Cell * c)
{ r_gcp.push (c); return c; }
void gc_unprotect (int ncells = 1)
{ r_gcp.discard (ncells); }
// If the VM has a main procedure linked in, run it and return
// the result; otherwise return NULL (a signal that the driver
// program should enter interactive mode). In the event that
// a value is returned, the caller will probably want to print
// it.
Cell* RunMain();
private:
Cell * alloc (Cell::Type t);
void mark (Cell *);
Cell * find (Cell * env, Cell * s);
void quicken (Cell *, int, int);
Cell * eval_list (Cell * list);
void provision ();
void init_machine ();
void print_vm_state ();
void * xmalloc (size_t);
// ===========================
// Machine Stack Operations
// The machine stack is just a cellvector, with one difference:
// it can hold integers (marked with the ATOM flag) as well as
// cell pointers. (There are thus only 31 bits in these integers,
// but that's way more than enough to hold the virtual machine
// state.
void save (Cell * c) { m_stack.push (c); }
void save (Cell & rc) { m_stack.push (rc.ca.p);
m_stack.push (rc.cd.p); }
void save (int i)
{ m_stack.push (reinterpret_cast <Cell *> ((i << 1) | Cell::ATOM)); }
void restore (Cell *& c) { c = m_stack.pop (); }
void restore (Cell & rc) { rc.cd.p = m_stack.pop ();
rc.ca.p = m_stack.pop (); }
void restore (int & i)
{ i = (reinterpret_cast <int> (m_stack.pop ()) &
static_cast<int>(~Cell::ATOM)) >> 1; }
// ===========================
// REGISTER MACHINE
// ===========================
Cell * r_exp; // expression to evaluate
Cell * r_env; // evaluation environment
Cell * r_unev; // args awaiting evaluation
Cell r_argl; // (head,tail) of argument list
Cell r_varl; // (head,tail) of binding list
Cell * r_proc; // procedure to apply
Cell * r_val; // value resulting from evaluation
Cell * r_tmp; // temporary values
Cell * r_elt; // elements assembled into lists
Cell * r_nu; // reference to objects being created
int r_qq; // quasiquotation depth
cellvector r_gcp; // extra cells protected from GC
int r_cont; // current continuation
cellvector m_stack; // recursion/evaluation stack
int state; // current machine state
// We added a different set of registers for the compiler VM.
// this avoids GC collisions when the interpreter is invoking
// compiled procedures. In the event vx-scheme is configured
// to use only one of the interpreter or compiler, there are
// some slots here that will be unused, but only one per execution
// context.
Cell * r_envt; // environment
Cell * r_cproc; // current compiled procedure.
// The assembled instructions to resume a saved continuation
Cell* cc_procedure;
Cell* empty_vector;
// ===========================
// routines to append elements to lists (used with r_argl and r_varl).
// Note: r_argl and r_varl MUST be maintained as correctly-formed
// lists, since we use unsafe car/cdr to traverse them.
void l_appendtail (Cell & l, Cell * t)
{
if (l.ca.p == nil)
l.ca.p = l.cd.p = t;
else
{
l.cd.p->cd.p = t; // l.cd.p->setcdr (t);
l.cd.p = t;
}
}
void l_append (Cell & l, Cell * t)
{
r_elt = make (t);
l_appendtail (l, r_elt);
}
void clear (Cell & c)
{
c.ca.p = c.cd.p = nil;
}
Cell * envt;
Cell * root_envt;
Cell * eval_cproc;
cellvector istack; // stack of input ports (with-input...)
cellvector ostack; // stack of output ports (with-output...)
struct Memory
{
cellvector active; // list of allocated Slabs
Cell * free; // freelist of cells
int c_free; // count of free cells
Slab * current () { return (Slab *) active.top (); }
bool low_water; // true if next exhaustion should alloc
bool last_alloc_gc; // true if last allocation provoked gc
bool no_inline_gc; // don't try gc on allocation failure
Memory ()
: active ()
{
free = 0;
c_free = 0;
low_water = last_alloc_gc = no_inline_gc = false;
}
};
bool ok_to_gc;
Memory mem;
int cellsAlloc;
int cellsTotal;
};
class VxSchemeInit
{
public:
VxSchemeInit ()
{
// Do sanity checks before scheme runs
Cell::sanity_check ();
}
~VxSchemeInit ()
{
// Print statistics when scheme exits.
Cell::stats ();
}
};
class SchemeExtension
{
public:
virtual ~SchemeExtension() {}
static void Register(SchemeExtension* ext);
static void RunInstall(Context*, Cell*);
static void MainProcedure(SchemeExtension* m) { main = m; }
static bool HaveMain() { return main != NULL; }
static Cell* RunMain(Context* ctx) { return main->Run(ctx); }
virtual void Install(Context*, Cell*) = 0;
private:
virtual Cell* Run(Context*) { return &Cell::Bool_F; }
static cellvector* extensions;
static SchemeExtension* main;
};
// Simple accessors to avoid the Cell:: scope, which we don't
// really need for simple things like 'car'.
inline Cell * car (Cell * c) {return Cell::car (c);}
inline Cell * caar (Cell * c) {return Cell::caar (c);}
inline Cell * cdr (Cell * c) {return Cell::cdr (c);}
inline Cell * cdar (Cell * c) {return Cell::cdar (c);}
inline Cell * cadr (Cell * c) {return Cell::cadr (c);}
inline Cell * cddr (Cell * c) {return Cell::cddr (c);}
inline Cell * cadar (Cell * c) {return Cell::cadar (c);}
inline Cell * caddr (Cell * c) {return Cell::caddr (c);}
inline Cell * caadr (Cell * c) {return Cell::caadr (c);}
inline Cell * cdadr (Cell * c) {return Cell::cdadr (c);}
inline Cell * cddar (Cell * c) {return Cell::cddar (c);}
inline Cell * caddar (Cell * c) {return Cell::caddar (c);}
inline Cell * cadaar (Cell * c) {return Cell::cadaar (c);}
// Certain syntactic features of Scheme (so-called "syntactic sugar"
// like the `else' clause in a cond statement, the use of `.' to
// construct improper lists and "varargs lambdas", and some of the
// mechanics of quasiquotation) are most easily implemented if we have
// predefined symbols for these tokens. They are not part of the
// global environment, however, and have no definitions themselves.
// We create them with global scope (in the `C' sense) as they can
// serve as invariant hashcodes throughout any universe of Scheme
// execution: there is never any need to compute their values more
// than once, even for multiple threads.
extern psymbol s_dot;
extern psymbol s_quote;
extern psymbol s_quasiquote;
extern psymbol s_unquote;
extern psymbol s_unquote_splicing;
extern psymbol s_passto;
extern psymbol s_else;
extern psymbol s_time;
extern psymbol s_eval;
extern psymbol s_foreach;
extern psymbol s_load;
extern psymbol s_map;
extern psymbol s_apply;
extern psymbol s_force;
extern psymbol s_delay;
extern psymbol s_defmacro;
extern psymbol s_withinput;
extern psymbol s_withoutput;
extern psymbol s_callwof;
extern psymbol s_callwif;
// We treat special forms similarly.
extern psymbol s_if;
extern psymbol s_define;
extern psymbol s_quote;
extern psymbol s_begin;
extern psymbol s_set;
extern psymbol s_or;
extern psymbol s_and;
extern psymbol s_lambda;
extern psymbol s_let;
extern psymbol s_letstar;
extern psymbol s_letrec;
extern psymbol s_do;
extern psymbol s_cond;
extern psymbol s_case;
extern psymbol s_callcc;
// Execution flags
#define TRACE_EVAL 0x01
#define TRACE_GC 0x02
#define DEBUG_NO_INLINE_GC 0x04
#define DEBUG_MEMSTATS_AT_EXIT 0x08
#define DEBUG_PRINT_PROCEDURES 0x10
#define TRACE_GC_ALL 0x20
#define TRACE_VM 0x40
#define TRACE_VMSTACK 0x80
#define COUNT_INSNS 0x100
// Typedefs for compiled procedures in C form. It's possible to serialize
// a compiled procedure into a C data structure that can be used to load
// the bytecode.
typedef unsigned char byte;
#if defined(WIN32)
#pragma pack(push, 1)
#endif
struct vm_insn {
byte opcode;
byte count;
const void* operand;
} PACKED;
#if defined(WIN32)
#pragma pack(pop)
#endif
struct vm_cproc {
vm_insn* insns;
unsigned int n_insns;
const char** literals;
unsigned int n_literals;
int entry;
};