vx-scheme/src/symtab.cpp

411 lines
8.1 KiB
C++

//----------------------------------------------------------------------
// vx-scheme : Scheme interpreter.
// Copyright (c) 2002,2003,2006 and onwards Colin Smith.
//
// You may distribute under the terms of the Artistic License,
// as specified in the LICENSE file.
//
// symtab.cpp : symbol table with copied strings in an AVL tree.
#include <stddef.h>
#include "vx-scheme.h"
static inline char * string_dup (const char * s)
{
char * x = (char *) malloc (strlen (s) + 1);
strcpy (x, s);
return x;
}
static psymbol newnode (const char * key)
{
psymbol n = (psymbol) malloc (sizeof (symbol));
memset (n, 0, sizeof (symbol));
n->key = string_dup (key);
return n;
}
// This is unguarded global data. Symbols stored in this tree
// could be shared among multiple threads, though, so it would
// be easy to protect this table with a mutex.
static symbol _head = { 0, 0, 0, 0 };
static psymbol head = &_head;
static psymbol symtab_insert (const char *);
// intern: place the string in the symbol table in standard case.
psymbol intern
(
const char * name
)
{
sstring ss;
size_t l;
psymbol q;
ss.append (name);
l = ss.length ();
for (size_t ix = 0; ix < l; ++ix)
ss [ix] = tolower (ss [ix]);
if (strcmp (ss.str (), name))
{
// Name was not given in standard case! We store it both as
// it was given and in standard case.
q = intern_stet (ss.str ());
q->truename = string_dup (name);
return q;
}
q = intern_stet (name);
q->truename = q->key;
return q;
}
// intern_stet: place the string in the symbol table exactly as given.
psymbol intern_stet
(
const char * name
)
{
return symtab_insert (name);
}
// An implementation of Knuth's Algorithm 6.2.3A "Balanced Tree Search
// and Insertion," from [TAoCP (3ed.) vol III p.462]. The insert
// function follows the structure of Knuth's algorithm closely, even
// using the same variable names and labels he chooses. Hence, we
// refer the reader his book for further documentation of this
// routine.
static psymbol symtab_insert
(
const char * K
)
{
psymbol P, Q, R, S, T;
int c, a;
if (!head->rlink)
{
return (head->rlink = newnode (K));
}
//A1: /* Initialize. */
T = head;
S = P = head->rlink;
A2: c = strcmp (K, P->key); /* Compare. */
if (c < 0) goto A3;
if (c > 0) goto A4;
return P;
A3: Q = P->llink; /* Move left. */
if (Q == 0)
{
Q = newnode (K);
P->llink = Q;
goto A5;
}
proceed:
if (Q->b != 0)
{
T = P;
S = Q;
}
P = Q;
goto A2;
A4: Q = P->rlink; /* Move right. */
if (Q == 0)
{
Q = newnode (K);
P->rlink = Q;
goto A5;
}
goto proceed;
A5: /* The "Insert" step is handled in the newnode function. */
//A6: /* Adjust balance factors. */
a = (strcmp (K, S->key) < 0) ? -1 : +1;
R = P = (a < 0 ? S->llink : S->rlink);
while (P != Q)
{
c = strcmp (K, P->key);
if (c < 0)
{
P->b = -1;
P = P->llink;
}
else if (c > 0)
{
P->b = +1;
P = P->rlink;
}
}
//A7:
if (S->b == 0) /* Balancing act. */
{
S->b = a;
++head->b; /* Keep track of tree height: */
return Q; /* Knuth uses LLINK but we use B. */
}
if (S->b == -a)
{
S->b = 0;
return Q;
}
if (S->b == a)
{
if (R->b == a)
goto A8;
else if (R->b == -a)
goto A9;
}
A8: P = R; /* Single rotation. */
if (a < 0)
{
S->llink = R->rlink;
R->rlink = S;
}
else
{
S->rlink = R->llink;
R->llink = S;
}
S->b = R->b = 0;
goto A10;
A9: if (a < 0) /* Double rotation. */
{
P = R->rlink;
R->rlink = P->llink;
P->llink = R;
S->llink = P->rlink;
P->rlink = S;
}
else
{
P = R->llink;
R->llink = P->rlink;
P->rlink = R;
S->rlink = P->llink;
P->llink = S;
}
if (P->b == a)
{
S->b = -a;
R->b = 0;
}
else if (P->b == -a)
{
S->b = 0;
R->b = a;
}
else
{
S->b = R->b = 0;
}
P->b = 0;
A10: if (S == T->rlink) /* Finishing touch. */
T->rlink = P;
else
T->llink = P;
return Q;
}
// sstring class
//
// simple, extensible string. Tries to work efficiently for small
// strings by using a static buffer, which "spills" into a region on
// the heap if necessary. Aims for compactness. We maintain
// null-termination at all times. A "claim" operation is supported,
// which means that malloc'd string storage won't be freed when the
// sstring is destructed. This can help avoid excess strdup's for the
// consumers of strings created this way. STL strings are fine, but
// their template-based implementation leads to "bloat" (in the
// context of an application like this which is aiming for embedded
// compactness).
//
// ** We maintain null termination at all times.
sstring::sstring ()
{
// Initially, we use our static buffer, abandoning it for storage
// obtained with malloc if we need to.
sz = 0;
base = c; // base points to base of allocation.
*base = '\0';
alloc = stat_size; // How much allocated (statically or otherwise).
end = base; // End will point to the first free character.
pos = base; // read position
claimed = false; // freeing the storage is our job
}
sstring::~sstring ()
{
// If we've spilled, and the caller hasn't claimed the buffer,
// free it.
if (base != c && !claimed)
free (base);
}
void sstring::append (const char ch)
{
append (&ch, 1);
}
void sstring::append (const char * s)
{
append (s, strlen (s));
}
void sstring::append (const char * s, size_t len)
{
size_t required = sz + len + 1;
ptrdiff_t read_offset = pos - base;
// Will it fit in the current allocation?
TOP: if (required < alloc)
{
memcpy (end, s, len);
end += len;
sz += len;
end [0] = '\0';
return;
}
// No. We will have to allocate more storage (perhaps for
// the first time). Double the size (unless that won't be
// enough to accept this append.
size_t new_alloc = 2 * alloc;
if (required > new_alloc)
new_alloc = required;
char * new_buf = (char *) malloc (new_alloc);
if (!new_buf)
error ("out of memory");
memcpy (new_buf, base, sz);
if (base != c)
free (base);
base = new_buf;
end = base + sz;
end [0] = '\0';
alloc = new_alloc;
pos = base + read_offset;
// It should work now.
goto TOP;
}
void sstring::claim ()
{
if (base == c)
{
// We were getting away with using the little static buffer,
// but now the user wants to hand off the buffer as if it were
// malloc'd. We have to clone it now.
base = (char *) malloc (sz + 1);
strcpy (base, c);
}
claimed = true;
}
int sstring::get ()
{
if (pos >= base && pos < end)
return *pos++;
return EOF;
}
int sstring::peek ()
{
if (pos >= base && pos < end)
return *pos;
return EOF;
}
void sstring::unget ()
{
if (pos > base)
--pos;
}
void sstring::ignore ()
{
if (pos < end)
++pos;
}
#ifdef TEST
// Unit test: with TEST defined, this module becomes a standalone
// program which will sort its input. It prints the height of the tree
// at the end, which ought to be about log2 (number of input records).
void print (psymbol n)
{
// traverse the tree inorder, and print.
if (!n)
return;
print (n->llink);
printf ("%s\n", n->key);
print (n->rlink);
}
int main ()
{
char buf [80];
int i = 1;
psymbol Q;
while (fgets (buf, sizeof (buf), stdin))
{
int l = strlen (buf);
if (buf [l-1] == '\n')
buf [l-1] = '\0'; /* chomp newline */
Q = intern_stet (buf);
}
print (head->rlink);
printf ("height: %d\n", head->b);
return 0;
}
void error (const char * s1, const char * s2)
{
fputs (s1, stderr);
fputs (s2, stderr);
exit (1);
}
#endif