2008-06-30 21:54:22 -04:00
|
|
|
enum {
|
|
|
|
TOK_NONE, TOK_OPEN, TOK_CLOSE, TOK_DOT, TOK_QUOTE, TOK_SYM, TOK_NUM,
|
|
|
|
TOK_BQ, TOK_COMMA, TOK_COMMAAT, TOK_COMMADOT,
|
|
|
|
TOK_SHARPDOT, TOK_LABEL, TOK_BACKREF, TOK_SHARPQUOTE, TOK_SHARPOPEN,
|
2009-03-12 23:30:10 -04:00
|
|
|
TOK_OPENB, TOK_CLOSEB, TOK_SHARPSYM, TOK_GENSYM, TOK_DOUBLEQUOTE
|
2008-06-30 21:54:22 -04:00
|
|
|
};
|
|
|
|
|
2009-02-23 21:21:16 -05:00
|
|
|
#define F value2c(ios_t*,readstate->source)
|
|
|
|
|
2008-06-30 21:54:22 -04:00
|
|
|
// defines which characters are ordinary symbol characters.
|
|
|
|
// exceptions are '.', which is an ordinary symbol character
|
|
|
|
// unless it's the only character in the symbol, and '#', which is
|
|
|
|
// an ordinary symbol character unless it's the first character.
|
2010-05-05 01:31:46 -04:00
|
|
|
static inline int symchar(char c)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
2009-08-09 16:34:07 -04:00
|
|
|
static char *special = "()[]'\";`,\\| \f\n\r\t\v";
|
|
|
|
return !strchr(special, c);
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
|
|
|
|
2009-03-23 15:49:08 -04:00
|
|
|
int isnumtok_base(char *tok, value_t *pval, int base)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
|
|
|
char *end;
|
|
|
|
int64_t i64;
|
|
|
|
uint64_t ui64;
|
|
|
|
double d;
|
|
|
|
if (*tok == '\0')
|
|
|
|
return 0;
|
2009-02-24 13:31:05 -05:00
|
|
|
if (!((tok[0]=='0' && tok[1]=='x') || (base >= 15)) &&
|
|
|
|
strpbrk(tok, ".eEpP")) {
|
2008-06-30 21:54:22 -04:00
|
|
|
d = strtod(tok, &end);
|
|
|
|
if (*end == '\0') {
|
|
|
|
if (pval) *pval = mk_double(d);
|
|
|
|
return 1;
|
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
// floats can end in f or f0
|
|
|
|
if (end > tok && end[0] == 'f' &&
|
|
|
|
(end[1] == '\0' ||
|
|
|
|
(end[1] == '0' && end[2] == '\0'))) {
|
2008-08-16 17:15:36 -04:00
|
|
|
if (pval) *pval = mk_float((float)d);
|
|
|
|
return 1;
|
|
|
|
}
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2008-11-23 02:12:37 -05:00
|
|
|
|
|
|
|
if (tok[0] == '+') {
|
2009-01-31 20:53:58 -05:00
|
|
|
if (!strcmp(tok,"+NaN") || !strcasecmp(tok,"+nan.0")) {
|
2008-11-23 02:12:37 -05:00
|
|
|
if (pval) *pval = mk_double(D_PNAN);
|
|
|
|
return 1;
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
if (!strcmp(tok,"+Inf") || !strcasecmp(tok,"+inf.0")) {
|
2008-11-23 02:12:37 -05:00
|
|
|
if (pval) *pval = mk_double(D_PINF);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (tok[0] == '-') {
|
2009-01-31 20:53:58 -05:00
|
|
|
if (!strcmp(tok,"-NaN") || !strcasecmp(tok,"-nan.0")) {
|
2008-11-23 02:12:37 -05:00
|
|
|
if (pval) *pval = mk_double(D_NNAN);
|
|
|
|
return 1;
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
if (!strcmp(tok,"-Inf") || !strcasecmp(tok,"-inf.0")) {
|
2008-11-23 02:12:37 -05:00
|
|
|
if (pval) *pval = mk_double(D_NINF);
|
2008-06-30 21:54:22 -04:00
|
|
|
return 1;
|
2008-11-23 02:12:37 -05:00
|
|
|
}
|
2011-08-16 15:59:46 -04:00
|
|
|
errno = 0;
|
2009-01-31 20:53:58 -05:00
|
|
|
i64 = strtoll(tok, &end, base);
|
2011-08-16 15:59:46 -04:00
|
|
|
if (errno)
|
|
|
|
return 0;
|
2008-11-23 02:12:37 -05:00
|
|
|
if (pval) *pval = return_from_int64(i64);
|
|
|
|
return (*end == '\0');
|
|
|
|
}
|
2011-08-16 15:59:46 -04:00
|
|
|
errno = 0;
|
2009-01-31 20:53:58 -05:00
|
|
|
ui64 = strtoull(tok, &end, base);
|
2011-08-16 15:59:46 -04:00
|
|
|
if (errno)
|
|
|
|
return 0;
|
2008-11-23 02:12:37 -05:00
|
|
|
if (pval) *pval = return_from_uint64(ui64);
|
|
|
|
return (*end == '\0');
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
|
|
|
|
2009-01-31 20:53:58 -05:00
|
|
|
static int isnumtok(char *tok, value_t *pval)
|
|
|
|
{
|
|
|
|
return isnumtok_base(tok, pval, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int read_numtok(char *tok, value_t *pval, int base)
|
|
|
|
{
|
|
|
|
int result;
|
|
|
|
errno = 0;
|
|
|
|
result = isnumtok_base(tok, pval, base);
|
2009-08-28 20:54:51 -04:00
|
|
|
if (errno == ERANGE)
|
|
|
|
lerrorf(ParseError, "read: overflow in numeric constant %s", tok);
|
2009-01-31 20:53:58 -05:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2008-06-30 21:54:22 -04:00
|
|
|
static u_int32_t toktype = TOK_NONE;
|
|
|
|
static value_t tokval;
|
|
|
|
static char buf[256];
|
|
|
|
|
2013-06-08 19:29:15 -04:00
|
|
|
static char nextchar(void)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
|
|
|
int ch;
|
|
|
|
char c;
|
2009-08-09 16:34:07 -04:00
|
|
|
ios_t *f = F;
|
2008-06-30 21:54:22 -04:00
|
|
|
|
|
|
|
do {
|
2009-08-09 16:34:07 -04:00
|
|
|
if (f->bpos < f->size) {
|
|
|
|
ch = f->buf[f->bpos++];
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
ch = ios_getc(f);
|
|
|
|
if (ch == IOS_EOF)
|
|
|
|
return 0;
|
|
|
|
}
|
2008-06-30 21:54:22 -04:00
|
|
|
c = (char)ch;
|
|
|
|
if (c == ';') {
|
|
|
|
// single-line comment
|
|
|
|
do {
|
2009-08-09 16:34:07 -04:00
|
|
|
ch = ios_getc(f);
|
2008-08-17 14:16:31 -04:00
|
|
|
if (ch == IOS_EOF)
|
2008-06-30 21:54:22 -04:00
|
|
|
return 0;
|
|
|
|
} while ((char)ch != '\n');
|
|
|
|
c = (char)ch;
|
|
|
|
}
|
2009-08-09 16:34:07 -04:00
|
|
|
} while (c==' ' || isspace(c));
|
2008-06-30 21:54:22 -04:00
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void take(void)
|
|
|
|
{
|
|
|
|
toktype = TOK_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void accumchar(char c, int *pi)
|
|
|
|
{
|
|
|
|
buf[(*pi)++] = c;
|
|
|
|
if (*pi >= (int)(sizeof(buf)-1))
|
|
|
|
lerror(ParseError, "read: token too long");
|
|
|
|
}
|
|
|
|
|
|
|
|
// return: 1 if escaped (forced to be symbol)
|
2009-02-23 21:21:16 -05:00
|
|
|
static int read_token(char c, int digits)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
|
|
|
int i=0, ch, escaped=0, issym=0, first=1;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
if (!first) {
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
2008-08-17 14:16:31 -04:00
|
|
|
if (ch == IOS_EOF)
|
2008-06-30 21:54:22 -04:00
|
|
|
goto terminate;
|
|
|
|
c = (char)ch;
|
|
|
|
}
|
|
|
|
first = 0;
|
|
|
|
if (c == '|') {
|
|
|
|
issym = 1;
|
|
|
|
escaped = !escaped;
|
|
|
|
}
|
|
|
|
else if (c == '\\') {
|
|
|
|
issym = 1;
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
2008-08-17 14:16:31 -04:00
|
|
|
if (ch == IOS_EOF)
|
2008-06-30 21:54:22 -04:00
|
|
|
goto terminate;
|
|
|
|
accumchar((char)ch, &i);
|
|
|
|
}
|
|
|
|
else if (!escaped && !(symchar(c) && (!digits || isdigit(c)))) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
accumchar(c, &i);
|
|
|
|
}
|
|
|
|
}
|
2009-02-23 21:21:16 -05:00
|
|
|
ios_ungetc(c, F);
|
2008-06-30 21:54:22 -04:00
|
|
|
terminate:
|
|
|
|
buf[i++] = '\0';
|
|
|
|
return issym;
|
|
|
|
}
|
|
|
|
|
2009-03-12 23:30:10 -04:00
|
|
|
static value_t do_read_sexpr(value_t label);
|
|
|
|
|
2013-06-08 19:29:15 -04:00
|
|
|
static u_int32_t peek(void)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
|
|
|
char c, *end;
|
|
|
|
fixnum_t x;
|
2009-01-31 20:53:58 -05:00
|
|
|
int ch, base;
|
2008-06-30 21:54:22 -04:00
|
|
|
|
|
|
|
if (toktype != TOK_NONE)
|
|
|
|
return toktype;
|
2009-02-23 21:21:16 -05:00
|
|
|
c = nextchar();
|
|
|
|
if (ios_eof(F)) return TOK_NONE;
|
2008-06-30 21:54:22 -04:00
|
|
|
if (c == '(') {
|
|
|
|
toktype = TOK_OPEN;
|
|
|
|
}
|
|
|
|
else if (c == ')') {
|
|
|
|
toktype = TOK_CLOSE;
|
|
|
|
}
|
|
|
|
else if (c == '[') {
|
|
|
|
toktype = TOK_OPENB;
|
|
|
|
}
|
|
|
|
else if (c == ']') {
|
|
|
|
toktype = TOK_CLOSEB;
|
|
|
|
}
|
|
|
|
else if (c == '\'') {
|
|
|
|
toktype = TOK_QUOTE;
|
|
|
|
}
|
|
|
|
else if (c == '`') {
|
|
|
|
toktype = TOK_BQ;
|
|
|
|
}
|
|
|
|
else if (c == '"') {
|
|
|
|
toktype = TOK_DOUBLEQUOTE;
|
|
|
|
}
|
|
|
|
else if (c == '#') {
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F); c = (char)ch;
|
2008-08-17 14:16:31 -04:00
|
|
|
if (ch == IOS_EOF)
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: invalid read macro");
|
2009-01-31 20:53:58 -05:00
|
|
|
if (c == '.') {
|
2008-06-30 21:54:22 -04:00
|
|
|
toktype = TOK_SHARPDOT;
|
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (c == '\'') {
|
2008-06-30 21:54:22 -04:00
|
|
|
toktype = TOK_SHARPQUOTE;
|
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (c == '\\') {
|
2008-08-17 14:16:31 -04:00
|
|
|
uint32_t cval;
|
2009-02-23 21:21:16 -05:00
|
|
|
if (ios_getutf8(F, &cval) == IOS_EOF)
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: end of input in character constant");
|
2009-03-24 17:27:38 -04:00
|
|
|
if (cval == (uint32_t)'u' || cval == (uint32_t)'U' ||
|
|
|
|
cval == (uint32_t)'x') {
|
2009-03-21 22:05:26 -04:00
|
|
|
read_token('u', 0);
|
2009-03-24 17:27:38 -04:00
|
|
|
if (buf[1] != '\0') { // not a solitary 'u','U','x'
|
2009-03-21 22:05:26 -04:00
|
|
|
if (!read_numtok(&buf[1], &tokval, 16))
|
|
|
|
lerror(ParseError,
|
|
|
|
"read: invalid hex character constant");
|
|
|
|
cval = numval(tokval);
|
|
|
|
}
|
|
|
|
}
|
2009-08-08 17:44:14 -04:00
|
|
|
else if (cval >= 'a' && cval <= 'z') {
|
|
|
|
read_token((char)cval, 0);
|
|
|
|
tokval = symbol(buf);
|
|
|
|
if (buf[1] == '\0') /* one character */;
|
|
|
|
else if (tokval == nulsym) cval = 0x00;
|
|
|
|
else if (tokval == alarmsym) cval = 0x07;
|
|
|
|
else if (tokval == backspacesym) cval = 0x08;
|
|
|
|
else if (tokval == tabsym) cval = 0x09;
|
|
|
|
else if (tokval == linefeedsym) cval = 0x0A;
|
|
|
|
else if (tokval == newlinesym) cval = 0x0A;
|
|
|
|
else if (tokval == vtabsym) cval = 0x0B;
|
|
|
|
else if (tokval == pagesym) cval = 0x0C;
|
|
|
|
else if (tokval == returnsym) cval = 0x0D;
|
|
|
|
else if (tokval == escsym) cval = 0x1B;
|
|
|
|
else if (tokval == spacesym) cval = 0x20;
|
|
|
|
else if (tokval == deletesym) cval = 0x7F;
|
|
|
|
else
|
|
|
|
lerrorf(ParseError, "read: unknown character #\\%s", buf);
|
|
|
|
}
|
2008-06-30 21:54:22 -04:00
|
|
|
toktype = TOK_NUM;
|
2008-12-23 23:43:36 -05:00
|
|
|
tokval = mk_wchar(cval);
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (c == '(') {
|
2008-06-30 21:54:22 -04:00
|
|
|
toktype = TOK_SHARPOPEN;
|
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (c == '<') {
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: unreadable object");
|
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (isdigit(c)) {
|
2009-02-23 21:21:16 -05:00
|
|
|
read_token(c, 1);
|
|
|
|
c = (char)ios_getc(F);
|
2008-06-30 21:54:22 -04:00
|
|
|
if (c == '#')
|
|
|
|
toktype = TOK_BACKREF;
|
|
|
|
else if (c == '=')
|
|
|
|
toktype = TOK_LABEL;
|
|
|
|
else
|
|
|
|
lerror(ParseError, "read: invalid label");
|
|
|
|
errno = 0;
|
|
|
|
x = strtol(buf, &end, 10);
|
|
|
|
if (*end != '\0' || errno)
|
|
|
|
lerror(ParseError, "read: invalid label");
|
|
|
|
tokval = fixnum(x);
|
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (c == '!') {
|
2008-06-30 21:54:22 -04:00
|
|
|
// #! single line comment for shbang script support
|
|
|
|
do {
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
2008-08-17 14:16:31 -04:00
|
|
|
} while (ch != IOS_EOF && (char)ch != '\n');
|
2009-02-23 21:21:16 -05:00
|
|
|
return peek();
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (c == '|') {
|
2008-06-30 21:54:22 -04:00
|
|
|
// multiline comment
|
2009-01-25 21:03:37 -05:00
|
|
|
int commentlevel=1;
|
2008-06-30 21:54:22 -04:00
|
|
|
while (1) {
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
2009-01-25 21:03:37 -05:00
|
|
|
hashpipe_gotc:
|
2008-08-17 14:16:31 -04:00
|
|
|
if (ch == IOS_EOF)
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: eof within comment");
|
|
|
|
if ((char)ch == '|') {
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
2009-01-25 21:03:37 -05:00
|
|
|
if ((char)ch == '#') {
|
|
|
|
commentlevel--;
|
|
|
|
if (commentlevel == 0)
|
|
|
|
break;
|
|
|
|
else
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
goto hashpipe_gotc;
|
|
|
|
}
|
|
|
|
else if ((char)ch == '#') {
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
2009-01-25 21:03:37 -05:00
|
|
|
if ((char)ch == '|')
|
|
|
|
commentlevel++;
|
|
|
|
else
|
|
|
|
goto hashpipe_gotc;
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// this was whitespace, so keep peeking
|
2009-02-23 21:21:16 -05:00
|
|
|
return peek();
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (c == ';') {
|
2009-03-12 23:30:10 -04:00
|
|
|
// datum comment
|
|
|
|
(void)do_read_sexpr(UNBOUND); // skip
|
|
|
|
return peek();
|
2009-01-25 21:03:37 -05:00
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (c == ':') {
|
2008-06-30 21:54:22 -04:00
|
|
|
// gensym
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
2008-06-30 21:54:22 -04:00
|
|
|
if ((char)ch == 'g')
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
|
|
|
read_token((char)ch, 0);
|
2008-06-30 21:54:22 -04:00
|
|
|
errno = 0;
|
|
|
|
x = strtol(buf, &end, 10);
|
|
|
|
if (*end != '\0' || buf[0] == '\0' || errno)
|
|
|
|
lerror(ParseError, "read: invalid gensym label");
|
|
|
|
toktype = TOK_GENSYM;
|
|
|
|
tokval = fixnum(x);
|
|
|
|
}
|
2009-01-31 20:53:58 -05:00
|
|
|
else if (symchar(c)) {
|
2009-02-23 21:21:16 -05:00
|
|
|
read_token(ch, 0);
|
2009-01-31 20:53:58 -05:00
|
|
|
|
|
|
|
if (((c == 'b' && (base= 2)) ||
|
|
|
|
(c == 'o' && (base= 8)) ||
|
|
|
|
(c == 'd' && (base=10)) ||
|
2009-02-27 20:59:01 -05:00
|
|
|
(c == 'x' && (base=16))) &&
|
|
|
|
(isdigit_base(buf[1],base) ||
|
|
|
|
buf[1]=='-')) {
|
2009-01-31 20:53:58 -05:00
|
|
|
if (!read_numtok(&buf[1], &tokval, base))
|
2009-03-24 22:28:21 -04:00
|
|
|
lerrorf(ParseError, "read: invalid base %d constant", base);
|
2009-01-31 20:53:58 -05:00
|
|
|
return (toktype=TOK_NUM);
|
|
|
|
}
|
|
|
|
|
2008-06-30 21:54:22 -04:00
|
|
|
toktype = TOK_SHARPSYM;
|
|
|
|
tokval = symbol(buf);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
lerror(ParseError, "read: unknown read macro");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (c == ',') {
|
|
|
|
toktype = TOK_COMMA;
|
2009-02-23 21:21:16 -05:00
|
|
|
ch = ios_getc(F);
|
2008-08-17 14:16:31 -04:00
|
|
|
if (ch == IOS_EOF)
|
2008-06-30 21:54:22 -04:00
|
|
|
return toktype;
|
|
|
|
if ((char)ch == '@')
|
|
|
|
toktype = TOK_COMMAAT;
|
|
|
|
else if ((char)ch == '.')
|
|
|
|
toktype = TOK_COMMADOT;
|
|
|
|
else
|
2009-02-23 21:21:16 -05:00
|
|
|
ios_ungetc((char)ch, F);
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
|
|
|
else {
|
2009-02-23 21:21:16 -05:00
|
|
|
if (!read_token(c, 0)) {
|
2008-06-30 21:54:22 -04:00
|
|
|
if (buf[0]=='.' && buf[1]=='\0') {
|
|
|
|
return (toktype=TOK_DOT);
|
|
|
|
}
|
|
|
|
else {
|
2009-01-31 20:53:58 -05:00
|
|
|
if (read_numtok(buf, &tokval, 0))
|
2008-06-30 21:54:22 -04:00
|
|
|
return (toktype=TOK_NUM);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
toktype = TOK_SYM;
|
|
|
|
tokval = symbol(buf);
|
|
|
|
}
|
|
|
|
return toktype;
|
|
|
|
}
|
|
|
|
|
2009-05-13 00:03:13 -04:00
|
|
|
// NOTE: this is NOT an efficient operation. it is only used by the
|
|
|
|
// reader, and requires at least 1 and up to 3 garbage collections!
|
|
|
|
static value_t vector_grow(value_t v)
|
|
|
|
{
|
2009-05-13 21:30:25 -04:00
|
|
|
size_t i, s = vector_size(v);
|
2009-05-13 00:03:13 -04:00
|
|
|
size_t d = vector_grow_amt(s);
|
|
|
|
PUSH(v);
|
2010-05-04 14:17:55 -04:00
|
|
|
assert(s+d > s);
|
2009-05-13 00:03:13 -04:00
|
|
|
value_t newv = alloc_vector(s+d, 1);
|
|
|
|
v = Stack[SP-1];
|
|
|
|
for(i=0; i < s; i++)
|
|
|
|
vector_elt(newv, i) = vector_elt(v, i);
|
|
|
|
// use gc to rewrite references from the old vector to the new
|
|
|
|
Stack[SP-1] = newv;
|
|
|
|
if (s > 0) {
|
|
|
|
((size_t*)ptr(v))[0] |= 0x1;
|
|
|
|
vector_elt(v, 0) = newv;
|
|
|
|
gc(0);
|
|
|
|
}
|
|
|
|
return POP();
|
|
|
|
}
|
|
|
|
|
2009-02-23 21:21:16 -05:00
|
|
|
static value_t read_vector(value_t label, u_int32_t closer)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
2009-04-20 20:56:05 -04:00
|
|
|
value_t v=the_empty_vector, elt;
|
2008-06-30 21:54:22 -04:00
|
|
|
u_int32_t i=0;
|
|
|
|
PUSH(v);
|
|
|
|
if (label != UNBOUND)
|
|
|
|
ptrhash_put(&readstate->backrefs, (void*)label, (void*)v);
|
2009-02-23 21:21:16 -05:00
|
|
|
while (peek() != closer) {
|
|
|
|
if (ios_eof(F))
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: unexpected end of input");
|
2009-05-13 00:03:13 -04:00
|
|
|
if (i >= vector_size(v)) {
|
|
|
|
v = Stack[SP-1] = vector_grow(v);
|
2009-04-20 20:56:05 -04:00
|
|
|
if (label != UNBOUND)
|
|
|
|
ptrhash_put(&readstate->backrefs, (void*)label, (void*)v);
|
|
|
|
}
|
2009-02-23 21:21:16 -05:00
|
|
|
elt = do_read_sexpr(UNBOUND);
|
2008-06-30 21:54:22 -04:00
|
|
|
v = Stack[SP-1];
|
2010-05-04 14:17:55 -04:00
|
|
|
assert(i < vector_size(v));
|
2008-06-30 21:54:22 -04:00
|
|
|
vector_elt(v,i) = elt;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
take();
|
2009-04-20 20:56:05 -04:00
|
|
|
if (i > 0)
|
|
|
|
vector_setsize(v, i);
|
2008-06-30 21:54:22 -04:00
|
|
|
return POP();
|
|
|
|
}
|
|
|
|
|
2013-06-08 19:29:15 -04:00
|
|
|
static value_t read_string(void)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
|
|
|
char *buf, *temp;
|
|
|
|
char eseq[10];
|
|
|
|
size_t i=0, j, sz = 64, ndig;
|
|
|
|
int c;
|
|
|
|
value_t s;
|
2013-06-08 19:29:15 -04:00
|
|
|
u_int32_t wc=0;
|
2008-06-30 21:54:22 -04:00
|
|
|
|
2010-05-03 01:07:22 -04:00
|
|
|
buf = malloc(sz);
|
2008-06-30 21:54:22 -04:00
|
|
|
while (1) {
|
|
|
|
if (i >= sz-4) { // -4: leaves room for longest utf8 sequence
|
|
|
|
sz *= 2;
|
2010-05-03 01:07:22 -04:00
|
|
|
temp = realloc(buf, sz);
|
2008-06-30 21:54:22 -04:00
|
|
|
if (temp == NULL) {
|
2010-05-03 01:07:22 -04:00
|
|
|
free(buf);
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: out of memory reading string");
|
|
|
|
}
|
|
|
|
buf = temp;
|
|
|
|
}
|
2009-02-23 21:21:16 -05:00
|
|
|
c = ios_getc(F);
|
2008-08-17 14:16:31 -04:00
|
|
|
if (c == IOS_EOF) {
|
2010-05-03 01:07:22 -04:00
|
|
|
free(buf);
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: unexpected end of input in string");
|
|
|
|
}
|
|
|
|
if (c == '"')
|
|
|
|
break;
|
|
|
|
else if (c == '\\') {
|
2009-02-23 21:21:16 -05:00
|
|
|
c = ios_getc(F);
|
2008-08-17 14:16:31 -04:00
|
|
|
if (c == IOS_EOF) {
|
2010-05-03 01:07:22 -04:00
|
|
|
free(buf);
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: end of input in escape sequence");
|
|
|
|
}
|
|
|
|
j=0;
|
|
|
|
if (octal_digit(c)) {
|
|
|
|
do {
|
|
|
|
eseq[j++] = c;
|
2009-02-23 21:21:16 -05:00
|
|
|
c = ios_getc(F);
|
2008-08-17 14:16:31 -04:00
|
|
|
} while (octal_digit(c) && j<3 && (c!=IOS_EOF));
|
2009-02-23 21:21:16 -05:00
|
|
|
if (c!=IOS_EOF) ios_ungetc(c, F);
|
2008-06-30 21:54:22 -04:00
|
|
|
eseq[j] = '\0';
|
|
|
|
wc = strtol(eseq, NULL, 8);
|
2008-12-10 23:04:17 -05:00
|
|
|
// \DDD and \xXX read bytes, not characters
|
|
|
|
buf[i++] = ((char)wc);
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
|
|
|
else if ((c=='x' && (ndig=2)) ||
|
|
|
|
(c=='u' && (ndig=4)) ||
|
|
|
|
(c=='U' && (ndig=8))) {
|
2009-02-23 21:21:16 -05:00
|
|
|
c = ios_getc(F);
|
2008-08-17 14:16:31 -04:00
|
|
|
while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) {
|
2008-06-30 21:54:22 -04:00
|
|
|
eseq[j++] = c;
|
2009-02-23 21:21:16 -05:00
|
|
|
c = ios_getc(F);
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2009-02-23 21:21:16 -05:00
|
|
|
if (c!=IOS_EOF) ios_ungetc(c, F);
|
2008-06-30 21:54:22 -04:00
|
|
|
eseq[j] = '\0';
|
|
|
|
if (j) wc = strtol(eseq, NULL, 16);
|
2013-06-08 19:29:15 -04:00
|
|
|
if (!j || wc > 0x10ffff) {
|
2010-05-03 01:07:22 -04:00
|
|
|
free(buf);
|
2008-11-05 23:04:04 -05:00
|
|
|
lerror(ParseError, "read: invalid escape sequence");
|
|
|
|
}
|
2008-12-10 23:04:17 -05:00
|
|
|
if (ndig == 2)
|
|
|
|
buf[i++] = ((char)wc);
|
|
|
|
else
|
|
|
|
i += u8_wc_toutf8(&buf[i], wc);
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2008-11-05 23:04:04 -05:00
|
|
|
else {
|
|
|
|
buf[i++] = read_escape_control_char((char)c);
|
|
|
|
}
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
buf[i++] = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s = cvalue_string(i);
|
|
|
|
memcpy(cvalue_data(s), buf, i);
|
2010-05-03 01:07:22 -04:00
|
|
|
free(buf);
|
2008-06-30 21:54:22 -04:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
// build a list of conses. this is complicated by the fact that all conses
|
|
|
|
// can move whenever a new cons is allocated. we have to refer to every cons
|
|
|
|
// through a handle to a relocatable pointer (i.e. a pointer on the stack).
|
2009-02-23 21:21:16 -05:00
|
|
|
static void read_list(value_t *pval, value_t label)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
|
|
|
value_t c, *pc;
|
|
|
|
u_int32_t t;
|
|
|
|
|
|
|
|
PUSH(NIL);
|
|
|
|
pc = &Stack[SP-1]; // to keep track of current cons cell
|
2009-02-23 21:21:16 -05:00
|
|
|
t = peek();
|
2008-06-30 21:54:22 -04:00
|
|
|
while (t != TOK_CLOSE) {
|
2009-02-23 21:21:16 -05:00
|
|
|
if (ios_eof(F))
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: unexpected end of input");
|
|
|
|
c = mk_cons(); car_(c) = cdr_(c) = NIL;
|
|
|
|
if (iscons(*pc)) {
|
|
|
|
cdr_(*pc) = c;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
*pval = c;
|
|
|
|
if (label != UNBOUND)
|
|
|
|
ptrhash_put(&readstate->backrefs, (void*)label, (void*)c);
|
|
|
|
}
|
|
|
|
*pc = c;
|
2009-02-23 21:21:16 -05:00
|
|
|
c = do_read_sexpr(UNBOUND); // must be on separate lines due to
|
2010-05-04 14:17:55 -04:00
|
|
|
car_(*pc) = c; // undefined evaluation order
|
2008-06-30 21:54:22 -04:00
|
|
|
|
2009-02-23 21:21:16 -05:00
|
|
|
t = peek();
|
2008-06-30 21:54:22 -04:00
|
|
|
if (t == TOK_DOT) {
|
|
|
|
take();
|
2009-02-23 21:21:16 -05:00
|
|
|
c = do_read_sexpr(UNBOUND);
|
2008-06-30 21:54:22 -04:00
|
|
|
cdr_(*pc) = c;
|
2009-02-23 21:21:16 -05:00
|
|
|
t = peek();
|
|
|
|
if (ios_eof(F))
|
2008-06-30 21:54:22 -04:00
|
|
|
lerror(ParseError, "read: unexpected end of input");
|
|
|
|
if (t != TOK_CLOSE)
|
|
|
|
lerror(ParseError, "read: expected ')'");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
take();
|
|
|
|
(void)POP();
|
|
|
|
}
|
|
|
|
|
|
|
|
// label is the backreference we'd like to fix up with this read
|
2009-02-23 21:21:16 -05:00
|
|
|
static value_t do_read_sexpr(value_t label)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
|
|
|
value_t v, sym, oldtokval, *head;
|
|
|
|
value_t *pv;
|
|
|
|
u_int32_t t;
|
switching to scheme #t, #f, and () values
porting code to sort out which NILs are false and which are
empty lists
switching to scheme-style special forms. however you feel about
scheme names vs. CL names, using both is silly.
mostly switching to scheme predicate names, with compatibility
aliases for now. adding set-constant! to make this efficient.
adding null?, eqv?, assq, assv, assoc, memq, memv, member
adding 2-argument form of if
allowing else as final cond condition
looking for init file in same directory as executable, so flisp
can be started from anywhere
renaming T to FL_T, since exporting a 1-character symbol is
not very nice
adding opaque type boilerplate example file
adding correctness checking for the pattern-lambda benchmark
bugfix in int2str
2009-01-28 20:04:23 -05:00
|
|
|
char c;
|
2008-06-30 21:54:22 -04:00
|
|
|
|
2009-02-23 21:21:16 -05:00
|
|
|
t = peek();
|
2008-06-30 21:54:22 -04:00
|
|
|
take();
|
|
|
|
switch (t) {
|
|
|
|
case TOK_CLOSE:
|
|
|
|
lerror(ParseError, "read: unexpected ')'");
|
|
|
|
case TOK_CLOSEB:
|
|
|
|
lerror(ParseError, "read: unexpected ']'");
|
|
|
|
case TOK_DOT:
|
|
|
|
lerror(ParseError, "read: unexpected '.'");
|
|
|
|
case TOK_SYM:
|
|
|
|
case TOK_NUM:
|
|
|
|
return tokval;
|
|
|
|
case TOK_COMMA:
|
|
|
|
head = &COMMA; goto listwith;
|
|
|
|
case TOK_COMMAAT:
|
|
|
|
head = &COMMAAT; goto listwith;
|
|
|
|
case TOK_COMMADOT:
|
|
|
|
head = &COMMADOT; goto listwith;
|
|
|
|
case TOK_BQ:
|
|
|
|
head = &BACKQUOTE; goto listwith;
|
|
|
|
case TOK_QUOTE:
|
|
|
|
head = "E;
|
|
|
|
listwith:
|
|
|
|
v = cons_reserve(2);
|
|
|
|
car_(v) = *head;
|
|
|
|
cdr_(v) = tagptr(((cons_t*)ptr(v))+1, TAG_CONS);
|
|
|
|
car_(cdr_(v)) = cdr_(cdr_(v)) = NIL;
|
|
|
|
PUSH(v);
|
|
|
|
if (label != UNBOUND)
|
|
|
|
ptrhash_put(&readstate->backrefs, (void*)label, (void*)v);
|
2009-02-23 21:21:16 -05:00
|
|
|
v = do_read_sexpr(UNBOUND);
|
2008-06-30 21:54:22 -04:00
|
|
|
car_(cdr_(Stack[SP-1])) = v;
|
|
|
|
return POP();
|
|
|
|
case TOK_SHARPQUOTE:
|
|
|
|
// femtoLisp doesn't need symbol-function, so #' does nothing
|
2009-02-23 21:21:16 -05:00
|
|
|
return do_read_sexpr(label);
|
2008-06-30 21:54:22 -04:00
|
|
|
case TOK_OPEN:
|
|
|
|
PUSH(NIL);
|
2009-02-23 21:21:16 -05:00
|
|
|
read_list(&Stack[SP-1], label);
|
2008-06-30 21:54:22 -04:00
|
|
|
return POP();
|
|
|
|
case TOK_SHARPSYM:
|
|
|
|
sym = tokval;
|
switching to scheme #t, #f, and () values
porting code to sort out which NILs are false and which are
empty lists
switching to scheme-style special forms. however you feel about
scheme names vs. CL names, using both is silly.
mostly switching to scheme predicate names, with compatibility
aliases for now. adding set-constant! to make this efficient.
adding null?, eqv?, assq, assv, assoc, memq, memv, member
adding 2-argument form of if
allowing else as final cond condition
looking for init file in same directory as executable, so flisp
can be started from anywhere
renaming T to FL_T, since exporting a 1-character symbol is
not very nice
adding opaque type boilerplate example file
adding correctness checking for the pattern-lambda benchmark
bugfix in int2str
2009-01-28 20:04:23 -05:00
|
|
|
if (sym == tsym || sym == Tsym)
|
|
|
|
return FL_T;
|
|
|
|
else if (sym == fsym || sym == Fsym)
|
|
|
|
return FL_F;
|
|
|
|
// constructor notation
|
2009-02-23 21:21:16 -05:00
|
|
|
c = nextchar();
|
switching to scheme #t, #f, and () values
porting code to sort out which NILs are false and which are
empty lists
switching to scheme-style special forms. however you feel about
scheme names vs. CL names, using both is silly.
mostly switching to scheme predicate names, with compatibility
aliases for now. adding set-constant! to make this efficient.
adding null?, eqv?, assq, assv, assoc, memq, memv, member
adding 2-argument form of if
allowing else as final cond condition
looking for init file in same directory as executable, so flisp
can be started from anywhere
renaming T to FL_T, since exporting a 1-character symbol is
not very nice
adding opaque type boilerplate example file
adding correctness checking for the pattern-lambda benchmark
bugfix in int2str
2009-01-28 20:04:23 -05:00
|
|
|
if (c != '(') {
|
|
|
|
take();
|
2009-03-24 22:28:21 -04:00
|
|
|
lerrorf(ParseError, "read: expected argument list for %s",
|
|
|
|
symbol_name(tokval));
|
switching to scheme #t, #f, and () values
porting code to sort out which NILs are false and which are
empty lists
switching to scheme-style special forms. however you feel about
scheme names vs. CL names, using both is silly.
mostly switching to scheme predicate names, with compatibility
aliases for now. adding set-constant! to make this efficient.
adding null?, eqv?, assq, assv, assoc, memq, memv, member
adding 2-argument form of if
allowing else as final cond condition
looking for init file in same directory as executable, so flisp
can be started from anywhere
renaming T to FL_T, since exporting a 1-character symbol is
not very nice
adding opaque type boilerplate example file
adding correctness checking for the pattern-lambda benchmark
bugfix in int2str
2009-01-28 20:04:23 -05:00
|
|
|
}
|
2008-06-30 21:54:22 -04:00
|
|
|
PUSH(NIL);
|
2009-02-23 21:21:16 -05:00
|
|
|
read_list(&Stack[SP-1], UNBOUND);
|
2009-04-21 11:35:46 -04:00
|
|
|
if (sym == vu8sym) {
|
|
|
|
sym = arraysym;
|
|
|
|
Stack[SP-1] = fl_cons(uint8sym, Stack[SP-1]);
|
|
|
|
}
|
2009-08-08 17:44:14 -04:00
|
|
|
else if (sym == fnsym) {
|
|
|
|
sym = FUNCTION;
|
|
|
|
}
|
2009-04-19 18:22:17 -04:00
|
|
|
v = symbol_value(sym);
|
|
|
|
if (v == UNBOUND)
|
2010-04-29 14:01:26 -04:00
|
|
|
fl_raise(fl_list2(UnboundError, sym));
|
|
|
|
return fl_apply(v, POP());
|
2008-06-30 21:54:22 -04:00
|
|
|
case TOK_OPENB:
|
2009-02-23 21:21:16 -05:00
|
|
|
return read_vector(label, TOK_CLOSEB);
|
2008-06-30 21:54:22 -04:00
|
|
|
case TOK_SHARPOPEN:
|
2009-02-23 21:21:16 -05:00
|
|
|
return read_vector(label, TOK_CLOSE);
|
2008-06-30 21:54:22 -04:00
|
|
|
case TOK_SHARPDOT:
|
|
|
|
// eval-when-read
|
|
|
|
// evaluated expressions can refer to existing backreferences, but they
|
|
|
|
// cannot see pending labels. in other words:
|
|
|
|
// (... #2=#.#0# ... ) OK
|
|
|
|
// (... #2=#.(#2#) ... ) DO NOT WANT
|
2009-04-22 20:55:03 -04:00
|
|
|
sym = do_read_sexpr(UNBOUND);
|
|
|
|
if (issymbol(sym)) {
|
|
|
|
v = symbol_value(sym);
|
|
|
|
if (v == UNBOUND)
|
2010-04-29 14:01:26 -04:00
|
|
|
fl_raise(fl_list2(UnboundError, sym));
|
2009-04-22 20:55:03 -04:00
|
|
|
return v;
|
|
|
|
}
|
2010-04-29 14:01:26 -04:00
|
|
|
return fl_toplevel_eval(sym);
|
2008-06-30 21:54:22 -04:00
|
|
|
case TOK_LABEL:
|
|
|
|
// create backreference label
|
|
|
|
if (ptrhash_has(&readstate->backrefs, (void*)tokval))
|
2009-03-24 22:28:21 -04:00
|
|
|
lerrorf(ParseError, "read: label %ld redefined", numval(tokval));
|
2008-06-30 21:54:22 -04:00
|
|
|
oldtokval = tokval;
|
2009-02-23 21:21:16 -05:00
|
|
|
v = do_read_sexpr(tokval);
|
2008-06-30 21:54:22 -04:00
|
|
|
ptrhash_put(&readstate->backrefs, (void*)oldtokval, (void*)v);
|
|
|
|
return v;
|
|
|
|
case TOK_BACKREF:
|
|
|
|
// look up backreference
|
|
|
|
v = (value_t)ptrhash_get(&readstate->backrefs, (void*)tokval);
|
2008-11-23 02:12:37 -05:00
|
|
|
if (v == (value_t)HT_NOTFOUND)
|
2009-03-24 22:28:21 -04:00
|
|
|
lerrorf(ParseError, "read: undefined label %ld", numval(tokval));
|
2008-06-30 21:54:22 -04:00
|
|
|
return v;
|
|
|
|
case TOK_GENSYM:
|
|
|
|
pv = (value_t*)ptrhash_bp(&readstate->gensyms, (void*)tokval);
|
2008-11-23 02:12:37 -05:00
|
|
|
if (*pv == (value_t)HT_NOTFOUND)
|
2009-05-12 21:13:40 -04:00
|
|
|
*pv = fl_gensym(NULL, 0);
|
2008-06-30 21:54:22 -04:00
|
|
|
return *pv;
|
|
|
|
case TOK_DOUBLEQUOTE:
|
2009-02-23 21:21:16 -05:00
|
|
|
return read_string();
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
2010-01-06 13:27:28 -05:00
|
|
|
return FL_UNSPECIFIED;
|
2008-06-30 21:54:22 -04:00
|
|
|
}
|
|
|
|
|
2010-04-29 14:01:26 -04:00
|
|
|
value_t fl_read_sexpr(value_t f)
|
2008-06-30 21:54:22 -04:00
|
|
|
{
|
|
|
|
value_t v;
|
2010-05-02 14:17:47 -04:00
|
|
|
fl_readstate_t state;
|
2008-06-30 21:54:22 -04:00
|
|
|
state.prev = readstate;
|
2008-12-27 01:02:53 -05:00
|
|
|
htable_new(&state.backrefs, 8);
|
|
|
|
htable_new(&state.gensyms, 8);
|
2009-02-23 21:21:16 -05:00
|
|
|
state.source = f;
|
2008-06-30 21:54:22 -04:00
|
|
|
readstate = &state;
|
2009-08-09 16:34:07 -04:00
|
|
|
assert(toktype == TOK_NONE);
|
2010-05-04 14:17:55 -04:00
|
|
|
fl_gc_handle(&tokval);
|
2008-06-30 21:54:22 -04:00
|
|
|
|
2009-02-23 21:21:16 -05:00
|
|
|
v = do_read_sexpr(UNBOUND);
|
2008-06-30 21:54:22 -04:00
|
|
|
|
2010-05-04 14:17:55 -04:00
|
|
|
fl_free_gc_handles(1);
|
2008-06-30 21:54:22 -04:00
|
|
|
readstate = state.prev;
|
|
|
|
free_readstate(&state);
|
|
|
|
return v;
|
|
|
|
}
|