557 lines
16 KiB
C
557 lines
16 KiB
C
enum {
|
|
TOK_NONE, TOK_OPEN, TOK_CLOSE, TOK_DOT, TOK_QUOTE, TOK_SYM, TOK_NUM,
|
|
TOK_BQ, TOK_COMMA, TOK_COMMAAT, TOK_COMMADOT,
|
|
TOK_SHARPDOT, TOK_LABEL, TOK_BACKREF, TOK_SHARPQUOTE, TOK_SHARPOPEN,
|
|
TOK_OPENB, TOK_CLOSEB, TOK_SHARPSYM, TOK_GENSYM, TOK_DOUBLEQUOTE
|
|
};
|
|
|
|
// defines which characters are ordinary symbol characters.
|
|
// exceptions are '.', which is an ordinary symbol character
|
|
// unless it's the only character in the symbol, and '#', which is
|
|
// an ordinary symbol character unless it's the first character.
|
|
static int symchar(char c)
|
|
{
|
|
static char *special = "()[]'\";`,\\|";
|
|
return (!isspace(c) && !strchr(special, c));
|
|
}
|
|
|
|
static int isnumtok(char *tok, value_t *pval)
|
|
{
|
|
char *end;
|
|
int64_t i64;
|
|
uint64_t ui64;
|
|
double d;
|
|
if (*tok == '\0')
|
|
return 0;
|
|
if (!(tok[0]=='0' && isdigit(tok[1])) &&
|
|
strpbrk(tok, ".eEpP")) {
|
|
d = strtod(tok, &end);
|
|
if (*end == '\0') {
|
|
if (pval) *pval = mk_double(d);
|
|
return 1;
|
|
}
|
|
if (end > tok && end[0] == 'f' && end[1] == '\0') {
|
|
if (pval) *pval = mk_float((float)d);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (tok[0] == '+') {
|
|
if (!strcmp(tok,"+NaN")) {
|
|
if (pval) *pval = mk_double(D_PNAN);
|
|
return 1;
|
|
}
|
|
if (!strcmp(tok,"+Inf")) {
|
|
if (pval) *pval = mk_double(D_PINF);
|
|
return 1;
|
|
}
|
|
}
|
|
else if (tok[0] == '-') {
|
|
if (!strcmp(tok,"-NaN")) {
|
|
if (pval) *pval = mk_double(D_NNAN);
|
|
return 1;
|
|
}
|
|
if (!strcmp(tok,"-Inf")) {
|
|
if (pval) *pval = mk_double(D_NINF);
|
|
return 1;
|
|
}
|
|
i64 = strtoll(tok, &end, 0);
|
|
if (pval) *pval = return_from_int64(i64);
|
|
return (*end == '\0');
|
|
}
|
|
else if (!isdigit(tok[0])) {
|
|
return 0;
|
|
}
|
|
ui64 = strtoull(tok, &end, 0);
|
|
if (pval) *pval = return_from_uint64(ui64);
|
|
return (*end == '\0');
|
|
}
|
|
|
|
static u_int32_t toktype = TOK_NONE;
|
|
static value_t tokval;
|
|
static char buf[256];
|
|
|
|
static char nextchar(ios_t *f)
|
|
{
|
|
int ch;
|
|
char c;
|
|
|
|
do {
|
|
ch = ios_getc(f);
|
|
if (ch == IOS_EOF)
|
|
return 0;
|
|
c = (char)ch;
|
|
if (c == ';') {
|
|
// single-line comment
|
|
do {
|
|
ch = ios_getc(f);
|
|
if (ch == IOS_EOF)
|
|
return 0;
|
|
} while ((char)ch != '\n');
|
|
c = (char)ch;
|
|
}
|
|
} while (isspace(c));
|
|
return c;
|
|
}
|
|
|
|
static void take(void)
|
|
{
|
|
toktype = TOK_NONE;
|
|
}
|
|
|
|
static void accumchar(char c, int *pi)
|
|
{
|
|
buf[(*pi)++] = c;
|
|
if (*pi >= (int)(sizeof(buf)-1))
|
|
lerror(ParseError, "read: token too long");
|
|
}
|
|
|
|
// return: 1 if escaped (forced to be symbol)
|
|
static int read_token(ios_t *f, char c, int digits)
|
|
{
|
|
int i=0, ch, escaped=0, issym=0, first=1;
|
|
|
|
while (1) {
|
|
if (!first) {
|
|
ch = ios_getc(f);
|
|
if (ch == IOS_EOF)
|
|
goto terminate;
|
|
c = (char)ch;
|
|
}
|
|
first = 0;
|
|
if (c == '|') {
|
|
issym = 1;
|
|
escaped = !escaped;
|
|
}
|
|
else if (c == '\\') {
|
|
issym = 1;
|
|
ch = ios_getc(f);
|
|
if (ch == IOS_EOF)
|
|
goto terminate;
|
|
accumchar((char)ch, &i);
|
|
}
|
|
else if (!escaped && !(symchar(c) && (!digits || isdigit(c)))) {
|
|
break;
|
|
}
|
|
else {
|
|
accumchar(c, &i);
|
|
}
|
|
}
|
|
ios_ungetc(c, f);
|
|
terminate:
|
|
buf[i++] = '\0';
|
|
return issym;
|
|
}
|
|
|
|
static u_int32_t peek(ios_t *f)
|
|
{
|
|
char c, *end;
|
|
fixnum_t x;
|
|
int ch;
|
|
|
|
if (toktype != TOK_NONE)
|
|
return toktype;
|
|
c = nextchar(f);
|
|
if (ios_eof(f)) return TOK_NONE;
|
|
if (c == '(') {
|
|
toktype = TOK_OPEN;
|
|
}
|
|
else if (c == ')') {
|
|
toktype = TOK_CLOSE;
|
|
}
|
|
else if (c == '[') {
|
|
toktype = TOK_OPENB;
|
|
}
|
|
else if (c == ']') {
|
|
toktype = TOK_CLOSEB;
|
|
}
|
|
else if (c == '\'') {
|
|
toktype = TOK_QUOTE;
|
|
}
|
|
else if (c == '`') {
|
|
toktype = TOK_BQ;
|
|
}
|
|
else if (c == '"') {
|
|
toktype = TOK_DOUBLEQUOTE;
|
|
}
|
|
else if (c == '#') {
|
|
ch = ios_getc(f);
|
|
if (ch == IOS_EOF)
|
|
lerror(ParseError, "read: invalid read macro");
|
|
if ((char)ch == '.') {
|
|
toktype = TOK_SHARPDOT;
|
|
}
|
|
else if ((char)ch == '\'') {
|
|
toktype = TOK_SHARPQUOTE;
|
|
}
|
|
else if ((char)ch == '\\') {
|
|
uint32_t cval;
|
|
if (ios_getutf8(f, &cval) == IOS_EOF)
|
|
lerror(ParseError, "read: end of input in character constant");
|
|
toktype = TOK_NUM;
|
|
tokval = fixnum(cval);
|
|
if (cval > 0x7f) {
|
|
tokval = cvalue_wchar(&tokval, 1);
|
|
}
|
|
else {
|
|
tokval = cvalue_char(&tokval, 1);
|
|
}
|
|
}
|
|
else if ((char)ch == '(') {
|
|
toktype = TOK_SHARPOPEN;
|
|
}
|
|
else if ((char)ch == '<') {
|
|
lerror(ParseError, "read: unreadable object");
|
|
}
|
|
else if (isdigit((char)ch)) {
|
|
read_token(f, (char)ch, 1);
|
|
c = (char)ios_getc(f);
|
|
if (c == '#')
|
|
toktype = TOK_BACKREF;
|
|
else if (c == '=')
|
|
toktype = TOK_LABEL;
|
|
else
|
|
lerror(ParseError, "read: invalid label");
|
|
errno = 0;
|
|
x = strtol(buf, &end, 10);
|
|
if (*end != '\0' || errno)
|
|
lerror(ParseError, "read: invalid label");
|
|
tokval = fixnum(x);
|
|
}
|
|
else if ((char)ch == '!') {
|
|
// #! single line comment for shbang script support
|
|
do {
|
|
ch = ios_getc(f);
|
|
} while (ch != IOS_EOF && (char)ch != '\n');
|
|
return peek(f);
|
|
}
|
|
else if ((char)ch == '|') {
|
|
// multiline comment
|
|
while (1) {
|
|
ch = ios_getc(f);
|
|
hashpipe_got:
|
|
if (ch == IOS_EOF)
|
|
lerror(ParseError, "read: eof within comment");
|
|
if ((char)ch == '|') {
|
|
ch = ios_getc(f);
|
|
if ((char)ch == '#')
|
|
break;
|
|
goto hashpipe_got;
|
|
}
|
|
}
|
|
// this was whitespace, so keep peeking
|
|
return peek(f);
|
|
}
|
|
else if ((char)ch == ':') {
|
|
// gensym
|
|
ch = ios_getc(f);
|
|
if ((char)ch == 'g')
|
|
ch = ios_getc(f);
|
|
read_token(f, (char)ch, 0);
|
|
errno = 0;
|
|
x = strtol(buf, &end, 10);
|
|
if (*end != '\0' || buf[0] == '\0' || errno)
|
|
lerror(ParseError, "read: invalid gensym label");
|
|
toktype = TOK_GENSYM;
|
|
tokval = fixnum(x);
|
|
}
|
|
else if (symchar((char)ch)) {
|
|
read_token(f, ch, 0);
|
|
toktype = TOK_SHARPSYM;
|
|
tokval = symbol(buf);
|
|
c = nextchar(f);
|
|
if (c != '(') {
|
|
take();
|
|
lerror(ParseError, "read: expected argument list for %s",
|
|
symbol_name(tokval));
|
|
}
|
|
}
|
|
else {
|
|
lerror(ParseError, "read: unknown read macro");
|
|
}
|
|
}
|
|
else if (c == ',') {
|
|
toktype = TOK_COMMA;
|
|
ch = ios_getc(f);
|
|
if (ch == IOS_EOF)
|
|
return toktype;
|
|
if ((char)ch == '@')
|
|
toktype = TOK_COMMAAT;
|
|
else if ((char)ch == '.')
|
|
toktype = TOK_COMMADOT;
|
|
else
|
|
ios_ungetc((char)ch, f);
|
|
}
|
|
else {
|
|
if (!read_token(f, c, 0)) {
|
|
if (buf[0]=='.' && buf[1]=='\0') {
|
|
return (toktype=TOK_DOT);
|
|
}
|
|
else {
|
|
errno = 0;
|
|
if (isnumtok(buf, &tokval)) {
|
|
if (errno)
|
|
lerror(ParseError,"read: overflow in numeric constant");
|
|
return (toktype=TOK_NUM);
|
|
}
|
|
}
|
|
}
|
|
toktype = TOK_SYM;
|
|
tokval = symbol(buf);
|
|
}
|
|
return toktype;
|
|
}
|
|
|
|
static value_t do_read_sexpr(ios_t *f, value_t label);
|
|
|
|
static value_t read_vector(ios_t *f, value_t label, u_int32_t closer)
|
|
{
|
|
value_t v=alloc_vector(4, 1), elt;
|
|
u_int32_t i=0;
|
|
PUSH(v);
|
|
if (label != UNBOUND)
|
|
ptrhash_put(&readstate->backrefs, (void*)label, (void*)v);
|
|
while (peek(f) != closer) {
|
|
if (ios_eof(f))
|
|
lerror(ParseError, "read: unexpected end of input");
|
|
if (i >= vector_size(v))
|
|
Stack[SP-1] = vector_grow(v);
|
|
elt = do_read_sexpr(f, UNBOUND);
|
|
v = Stack[SP-1];
|
|
vector_elt(v,i) = elt;
|
|
i++;
|
|
}
|
|
take();
|
|
vector_setsize(v, i);
|
|
return POP();
|
|
}
|
|
|
|
static value_t read_string(ios_t *f)
|
|
{
|
|
char *buf, *temp;
|
|
char eseq[10];
|
|
size_t i=0, j, sz = 64, ndig;
|
|
int c;
|
|
value_t s;
|
|
u_int32_t wc;
|
|
|
|
buf = malloc(sz);
|
|
while (1) {
|
|
if (i >= sz-4) { // -4: leaves room for longest utf8 sequence
|
|
sz *= 2;
|
|
temp = realloc(buf, sz);
|
|
if (temp == NULL) {
|
|
free(buf);
|
|
lerror(ParseError, "read: out of memory reading string");
|
|
}
|
|
buf = temp;
|
|
}
|
|
c = ios_getc(f);
|
|
if (c == IOS_EOF) {
|
|
free(buf);
|
|
lerror(ParseError, "read: unexpected end of input in string");
|
|
}
|
|
if (c == '"')
|
|
break;
|
|
else if (c == '\\') {
|
|
c = ios_getc(f);
|
|
if (c == IOS_EOF) {
|
|
free(buf);
|
|
lerror(ParseError, "read: end of input in escape sequence");
|
|
}
|
|
j=0;
|
|
if (octal_digit(c)) {
|
|
do {
|
|
eseq[j++] = c;
|
|
c = ios_getc(f);
|
|
} while (octal_digit(c) && j<3 && (c!=IOS_EOF));
|
|
if (c!=IOS_EOF) ios_ungetc(c, f);
|
|
eseq[j] = '\0';
|
|
wc = strtol(eseq, NULL, 8);
|
|
// \DDD and \xXX read bytes, not characters
|
|
buf[i++] = ((char)wc);
|
|
}
|
|
else if ((c=='x' && (ndig=2)) ||
|
|
(c=='u' && (ndig=4)) ||
|
|
(c=='U' && (ndig=8))) {
|
|
c = ios_getc(f);
|
|
while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) {
|
|
eseq[j++] = c;
|
|
c = ios_getc(f);
|
|
}
|
|
if (c!=IOS_EOF) ios_ungetc(c, f);
|
|
eseq[j] = '\0';
|
|
if (j) wc = strtol(eseq, NULL, 16);
|
|
else {
|
|
free(buf);
|
|
lerror(ParseError, "read: invalid escape sequence");
|
|
}
|
|
if (ndig == 2)
|
|
buf[i++] = ((char)wc);
|
|
else
|
|
i += u8_wc_toutf8(&buf[i], wc);
|
|
}
|
|
else {
|
|
buf[i++] = read_escape_control_char((char)c);
|
|
}
|
|
}
|
|
else {
|
|
buf[i++] = c;
|
|
}
|
|
}
|
|
s = cvalue_string(i);
|
|
memcpy(cvalue_data(s), buf, i);
|
|
free(buf);
|
|
return s;
|
|
}
|
|
|
|
// build a list of conses. this is complicated by the fact that all conses
|
|
// can move whenever a new cons is allocated. we have to refer to every cons
|
|
// through a handle to a relocatable pointer (i.e. a pointer on the stack).
|
|
static void read_list(ios_t *f, value_t *pval, value_t label)
|
|
{
|
|
value_t c, *pc;
|
|
u_int32_t t;
|
|
|
|
PUSH(NIL);
|
|
pc = &Stack[SP-1]; // to keep track of current cons cell
|
|
t = peek(f);
|
|
while (t != TOK_CLOSE) {
|
|
if (ios_eof(f))
|
|
lerror(ParseError, "read: unexpected end of input");
|
|
c = mk_cons(); car_(c) = cdr_(c) = NIL;
|
|
if (iscons(*pc)) {
|
|
cdr_(*pc) = c;
|
|
}
|
|
else {
|
|
*pval = c;
|
|
if (label != UNBOUND)
|
|
ptrhash_put(&readstate->backrefs, (void*)label, (void*)c);
|
|
}
|
|
*pc = c;
|
|
c = do_read_sexpr(f,UNBOUND); // must be on separate lines due to
|
|
car_(*pc) = c; // undefined evaluation order
|
|
|
|
t = peek(f);
|
|
if (t == TOK_DOT) {
|
|
take();
|
|
c = do_read_sexpr(f,UNBOUND);
|
|
cdr_(*pc) = c;
|
|
t = peek(f);
|
|
if (ios_eof(f))
|
|
lerror(ParseError, "read: unexpected end of input");
|
|
if (t != TOK_CLOSE)
|
|
lerror(ParseError, "read: expected ')'");
|
|
}
|
|
}
|
|
take();
|
|
(void)POP();
|
|
}
|
|
|
|
// label is the backreference we'd like to fix up with this read
|
|
static value_t do_read_sexpr(ios_t *f, value_t label)
|
|
{
|
|
value_t v, sym, oldtokval, *head;
|
|
value_t *pv;
|
|
u_int32_t t;
|
|
|
|
t = peek(f);
|
|
take();
|
|
switch (t) {
|
|
case TOK_CLOSE:
|
|
lerror(ParseError, "read: unexpected ')'");
|
|
case TOK_CLOSEB:
|
|
lerror(ParseError, "read: unexpected ']'");
|
|
case TOK_DOT:
|
|
lerror(ParseError, "read: unexpected '.'");
|
|
case TOK_SYM:
|
|
case TOK_NUM:
|
|
return tokval;
|
|
case TOK_COMMA:
|
|
head = &COMMA; goto listwith;
|
|
case TOK_COMMAAT:
|
|
head = &COMMAAT; goto listwith;
|
|
case TOK_COMMADOT:
|
|
head = &COMMADOT; goto listwith;
|
|
case TOK_BQ:
|
|
head = &BACKQUOTE; goto listwith;
|
|
case TOK_QUOTE:
|
|
head = "E;
|
|
listwith:
|
|
v = cons_reserve(2);
|
|
car_(v) = *head;
|
|
cdr_(v) = tagptr(((cons_t*)ptr(v))+1, TAG_CONS);
|
|
car_(cdr_(v)) = cdr_(cdr_(v)) = NIL;
|
|
PUSH(v);
|
|
if (label != UNBOUND)
|
|
ptrhash_put(&readstate->backrefs, (void*)label, (void*)v);
|
|
v = do_read_sexpr(f,UNBOUND);
|
|
car_(cdr_(Stack[SP-1])) = v;
|
|
return POP();
|
|
case TOK_SHARPQUOTE:
|
|
// femtoLisp doesn't need symbol-function, so #' does nothing
|
|
return do_read_sexpr(f, label);
|
|
case TOK_OPEN:
|
|
PUSH(NIL);
|
|
read_list(f, &Stack[SP-1], label);
|
|
return POP();
|
|
case TOK_SHARPSYM:
|
|
// constructor notation
|
|
sym = tokval;
|
|
PUSH(NIL);
|
|
read_list(f, &Stack[SP-1], UNBOUND);
|
|
v = POP();
|
|
return apply(symbol_value(sym), v);
|
|
case TOK_OPENB:
|
|
return read_vector(f, label, TOK_CLOSEB);
|
|
case TOK_SHARPOPEN:
|
|
return read_vector(f, label, TOK_CLOSE);
|
|
case TOK_SHARPDOT:
|
|
// eval-when-read
|
|
// evaluated expressions can refer to existing backreferences, but they
|
|
// cannot see pending labels. in other words:
|
|
// (... #2=#.#0# ... ) OK
|
|
// (... #2=#.(#2#) ... ) DO NOT WANT
|
|
v = do_read_sexpr(f,UNBOUND);
|
|
return toplevel_eval(v);
|
|
case TOK_LABEL:
|
|
// create backreference label
|
|
if (ptrhash_has(&readstate->backrefs, (void*)tokval))
|
|
lerror(ParseError, "read: label %ld redefined", numval(tokval));
|
|
oldtokval = tokval;
|
|
v = do_read_sexpr(f, tokval);
|
|
ptrhash_put(&readstate->backrefs, (void*)oldtokval, (void*)v);
|
|
return v;
|
|
case TOK_BACKREF:
|
|
// look up backreference
|
|
v = (value_t)ptrhash_get(&readstate->backrefs, (void*)tokval);
|
|
if (v == (value_t)HT_NOTFOUND)
|
|
lerror(ParseError, "read: undefined label %ld", numval(tokval));
|
|
return v;
|
|
case TOK_GENSYM:
|
|
pv = (value_t*)ptrhash_bp(&readstate->gensyms, (void*)tokval);
|
|
if (*pv == (value_t)HT_NOTFOUND)
|
|
*pv = gensym(NULL, 0);
|
|
return *pv;
|
|
case TOK_DOUBLEQUOTE:
|
|
return read_string(f);
|
|
}
|
|
return NIL;
|
|
}
|
|
|
|
value_t read_sexpr(ios_t *f)
|
|
{
|
|
value_t v;
|
|
readstate_t state;
|
|
state.prev = readstate;
|
|
htable_new(&state.backrefs, 16);
|
|
htable_new(&state.gensyms, 16);
|
|
readstate = &state;
|
|
|
|
v = do_read_sexpr(f, UNBOUND);
|
|
|
|
readstate = state.prev;
|
|
free_readstate(&state);
|
|
return v;
|
|
}
|