initial read implementation

This commit is contained in:
Yuichi Nishiwaki 2014-06-25 15:36:36 +09:00
parent 88a4943148
commit 219b244743
1 changed files with 492 additions and 225 deletions

View File

@ -2,84 +2,427 @@
* See Copyright Notice in picrin.h * See Copyright Notice in picrin.h
*/ */
#include <ctype.h>
#include <math.h>
#include "picrin.h" #include "picrin.h"
#include "picrin/parse.h" #include "picrin/error.h"
#include "picrin/pair.h" #include "picrin/pair.h"
#include "picrin/string.h" #include "picrin/string.h"
#include "picrin/vector.h" #include "picrin/vector.h"
#include "picrin/blob.h" #include "picrin/blob.h"
#include "picrin/port.h" #include "picrin/port.h"
#define YY_NO_UNISTD_H typedef pic_value (*read_func_t)(pic_state *, struct pic_port *, char);
#include "lex.yy.h"
static pic_value read(int, yyscan_t); static pic_value read(pic_state *pic, struct pic_port *port, char c);
#define pic (yyget_extra(scanner)->pic) static noreturn void
#define yylval (yyget_extra(scanner)->yylval) read_error(pic_state *pic, const char *msg)
#define yylabels (yyget_extra(scanner)->labels)
#define yymsg (yyget_extra(scanner)->msg)
#define yyjmp (yyget_extra(scanner)->jmp)
static void
error(const char *msg, yyscan_t scanner)
{ {
yymsg = msg; pic_error(pic, msg);
longjmp(yyjmp, 1);
} }
static int static char
gettok(yyscan_t scanner) skip(struct pic_port *port, char c)
{ {
int tok; while (isspace(c)) {
c = xfgetc(port->file);
while ((tok = yylex(scanner)) == tDATUM_COMMENT) {
read(gettok(scanner), scanner); /* discard */
} }
return tok; return c;
}
static char
next(struct pic_port *port)
{
char c;
c = xfgetc(port->file);
// printf("%c", c);
return c;
}
static char
peek(struct pic_port *port)
{
char c;
xungetc((c = xfgetc(port->file)), port->file);
return c;
} }
static pic_value static pic_value
read_label_set(int i, yyscan_t scanner) read_comment(pic_state *pic, struct pic_port *port, char c)
{
do {
c = next(port);
} while (! (c == EOF || c == '\n'));
return read(pic, port, c);
}
static pic_value
read_block_comment(pic_state *pic, struct pic_port *port, char c)
{
char x, y;
UNUSED(c);
x = next(port);
y = next(port);
while (! (x == '|' && y == '#')) {
x = y;
y = next(port);
if (y == EOF) {
break;
}
}
if (y != EOF) {
y = next(port);
}
return read(pic, port, y);
}
static pic_value
read_quote(pic_state *pic, struct pic_port *port, char c)
{
UNUSED(c);
return pic_list2(pic, pic_sym_value(pic->sQUOTE), read(pic, port, next(port)));
}
static pic_value
read_quasiquote(pic_state *pic, struct pic_port *port, char c)
{
UNUSED(c);
return pic_list2(pic, pic_sym_value(pic->sQUASIQUOTE), read(pic, port, next(port)));
}
static pic_value
read_comma(pic_state *pic, struct pic_port *port, char c)
{
c = next(port);
if (c == '@') {
return pic_list2(pic, pic_sym_value(pic->sUNQUOTE_SPLICING), read(pic, port, next(port)));
} else {
return pic_list2(pic, pic_sym_value(pic->sUNQUOTE), read(pic, port, c));
}
}
static pic_value
read_datum_comment(pic_state *pic, struct pic_port *port, char c)
{
UNUSED(c);
read(pic, port, next(port));
return read(pic, port, next(port));
}
static pic_value
read_symbol(pic_state *pic, struct pic_port *port, char c)
{
static const char TRAIL_SYMBOL[] = "+/*!$%&:@^~?<=>_.-";
size_t len;
char *buf;
pic_sym sym;
len = 0;
buf = NULL;
do {
if (len != 0) {
c = next(port);
}
len += 1;
buf = pic_realloc(pic, buf, len);
buf[len - 1] = c;
} while (isalnum(peek(port)) || strchr(TRAIL_SYMBOL, peek(port)));
buf[len] = '\0';
sym = pic_intern_cstr(pic, buf);
pic_free(pic, buf);
return pic_sym_value(sym);
}
static int
read_uinteger(pic_state *pic, struct pic_port *port, char c)
{
int n;
c = skip(port, c);
if (! isdigit(c)) {
read_error(pic, "expected one or more digits");
}
n = c - '0';
while (isdigit(c = peek(port))) {
next(port);
n = n * 10 + c - '0';
}
return n;
}
static pic_value
read_number(pic_state *pic, struct pic_port *port, char c)
{
int i, j;
i = read_uinteger(pic, port, c);
if (peek(port) == '.') {
next(port);
j = read_uinteger(pic, port, next(port));
return pic_float_value(i + (double)j * pow(10, -snprintf(NULL, 0, "%d", j)));
}
else {
return pic_int_value(i);
}
}
static pic_value
negate(pic_value n)
{
if (pic_int_p(n)) {
return pic_int_value(-pic_int(n));
} else {
return pic_float_value(-pic_float(n));
}
}
static pic_value
read_minus(pic_state *pic, struct pic_port *port, char c)
{
static const char DIGITS[] = "0123456789";
/* TODO: -inf.0, -nan.0 */
if (strchr(DIGITS, peek(port))) {
return negate(read_number(pic, port, c));
}
else {
return read_symbol(pic, port, c);
}
}
static pic_value
read_plus(pic_state *pic, struct pic_port *port, char c)
{
static const char DIGITS[] = "0123456789";
/* TODO: +inf.0, +nan.0 */
if (strchr(DIGITS, peek(port))) {
return read_number(pic, port, c);
}
else {
return read_symbol(pic, port, c);
}
}
static pic_value
read_boolean(pic_state *pic, struct pic_port *port, char c)
{
UNUSED(pic);
UNUSED(port);
/* TODO: support #true and #false */
if (c == 't') {
return pic_true_value();
} else {
return pic_false_value();
}
}
static pic_value
read_char(pic_state *pic, struct pic_port *port, char c)
{
UNUSED(pic);
UNUSED(c);
/* TODO: #\alart, #\space, so on and so on */
return pic_char_value(next(port));
}
static pic_value
read_string(pic_state *pic, struct pic_port *port, char c)
{
char *buf;
size_t size, cnt;
pic_str *str;
size = 256;
buf = pic_alloc(pic, size);
cnt = 0;
/* TODO: intraline whitespaces */
while ((c = next(port)) != '"') {
if (c == '\\') {
switch (c = next(port)) {
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
case 't': c = '\t'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
}
}
buf[cnt++] = c;
if (cnt >= size) {
buf = pic_realloc(pic, buf, size *= 2);
}
}
buf[cnt] = '\0';
str = pic_str_new(pic, buf, size);
pic_free(pic, buf);
return pic_obj_value(str);
}
static pic_value
read_unsigned_blob(pic_state *pic, struct pic_port *port, char c)
{
int nbits, n;
size_t len;
char *buf;
pic_blob *blob;
nbits = 0;
while (isdigit(c = next(port))) {
nbits = 10 * nbits + c - '0';
}
if (nbits != 8) {
read_error(pic, "unsupported bytevector bit width");
}
if (c != '(') {
read_error(pic, "expected '(' character");
}
len = 0;
buf = NULL;
c = next(port);
while ((c = skip(port, c)) != ')') {
n = read_uinteger(pic, port, c);
if (n < 0 || (1 << nbits) <= n) {
read_error(pic, "invalid element in bytevector literal");
}
len += 1;
buf = pic_realloc(pic, buf, len);
buf[len - 1] = n;
c = next(port);
}
blob = pic_blob_new(pic, buf, len);
pic_free(pic, buf);
return pic_obj_value(blob);
}
static pic_value
read_pair(pic_state *pic, struct pic_port *port, char c)
{
char tOPEN = c, tCLOSE = (tOPEN == '(') ? ')' : ']';
pic_value car, cdr;
c = skip(port, ' ');
if (c == tCLOSE) {
return pic_nil_value();
}
if (c == '.') {
cdr = read(pic, port, next(port));
if ((c = skip(port, ' ')) != tCLOSE) {
read_error(pic, "unmatched parenthesis");
}
return cdr;
}
else {
car = read(pic, port, c);
cdr = read_pair(pic, port, tOPEN); /* FIXME: don't use recursion */
return pic_cons(pic, car, cdr);
}
}
static pic_value
read_vector(pic_state *pic, struct pic_port *port, char c)
{ {
int tok;
pic_value val; pic_value val;
switch (tok = gettok(scanner)) { val = pic_nil_value();
case tLPAREN: while ((c = skip(port, c)) != ')') {
case tLBRACKET: val = pic_cons(pic, read(pic, port, c), val);
c = next(port);
}
return pic_obj_value(pic_vec_new_from_list(pic, pic_reverse(pic, val)));
}
static pic_value
read_label_set(pic_state *pic, struct pic_port *port, int i)
{
pic_value val;
char c;
switch (c = skip(port, ' ')) {
case '(': case '[':
{ {
pic_value tmp; pic_value tmp;
val = pic_cons(pic, pic_none_value(), pic_none_value()); val = pic_cons(pic, pic_none_value(), pic_none_value());
xh_put_int(&yylabels, i, &val); xh_put_int(&pic->rlabels, i, &val);
tmp = read(tok, scanner); tmp = read(pic, port, c);
pic_pair_ptr(val)->car = pic_car(pic, tmp); pic_pair_ptr(val)->car = pic_car(pic, tmp);
pic_pair_ptr(val)->cdr = pic_cdr(pic, tmp); pic_pair_ptr(val)->cdr = pic_cdr(pic, tmp);
return val; return val;
} }
case tVPAREN: case '#':
{ {
pic_vec *tmp; bool vect;
val = pic_obj_value(pic_vec_new(pic, 0)); if (peek(port) == '(') {
vect = true;
} else {
vect = false;
}
xh_put_int(&yylabels, i, &val); if (vect) {
pic_vec *tmp;
tmp = pic_vec_ptr(read(tok, scanner)); val = pic_obj_value(pic_vec_new(pic, 0));
SWAP(pic_value *, tmp->data, pic_vec_ptr(val)->data);
SWAP(size_t, tmp->len, pic_vec_ptr(val)->len);
return val; xh_put_int(&pic->rlabels, i, &val);
tmp = pic_vec_ptr(read(pic, port, c));
SWAP(pic_value *, tmp->data, pic_vec_ptr(val)->data);
SWAP(size_t, tmp->len, pic_vec_ptr(val)->len);
return val;
}
FALLTHROUGH;
} }
default: default:
{ {
val = read(tok, scanner); val = read(pic, port, c);
xh_put_int(&yylabels, i, &val); xh_put_int(&pic->rlabels, i, &val);
return val; return val;
} }
@ -87,239 +430,163 @@ read_label_set(int i, yyscan_t scanner)
} }
static pic_value static pic_value
read_label_ref(int i, yyscan_t scanner) read_label_ref(pic_state *pic, struct pic_port *port, int i)
{ {
xh_entry *e; xh_entry *e;
e = xh_get_int(&yylabels, i); UNUSED(port);
e = xh_get_int(&pic->rlabels, i);
if (! e) { if (! e) {
error("label of given index not defined", scanner); read_error(pic, "label of given index not defined");
} }
return xh_val(e, pic_value); return xh_val(e, pic_value);
} }
static pic_value static pic_value
read_pair(int tOPEN, yyscan_t scanner) read_label(pic_state *pic, struct pic_port *port, char c)
{ {
int tok, tCLOSE = (tOPEN == tLPAREN) ? tRPAREN : tRBRACKET; int i;
pic_value car, cdr;
tok = gettok(scanner); i = 0;
if (tok == tCLOSE) { do {
return pic_nil_value(); i = i * 10 + c;
} } while (isdigit(c = next(port)));
if (tok == tDOT) {
cdr = read(gettok(scanner), scanner);
if (gettok(scanner) != tCLOSE) { if (c == '=') {
error("unmatched parenthesis", scanner); return read_label_set(pic, port, i);
}
return cdr;
} }
else { if (c == '#') {
car = read(tok, scanner); return read_label_ref(pic, port, i);
cdr = read_pair(tOPEN, scanner);
return pic_cons(pic, car, cdr);
} }
} read_error(pic, "broken label expression");
static pic_vec *
read_vect(yyscan_t scanner)
{
int tok;
pic_value val;
val = pic_nil_value();
while ((tok = gettok(scanner)) != tRPAREN) {
val = pic_cons(pic, read(tok, scanner), val);
}
return pic_vec_new_from_list(pic, pic_reverse(pic, val));
} }
static pic_value static pic_value
read_abbrev(pic_sym sym, yyscan_t scanner) read_dispatch(pic_state *pic, struct pic_port *port, char c)
{ {
return pic_cons(pic, pic_sym_value(sym), pic_cons(pic, read(gettok(scanner), scanner), pic_nil_value())); c = next(port);
switch (c) {
case '!':
return read_comment(pic, port, c);
case '|':
return read_block_comment(pic, port, c);
case ';':
return read_datum_comment(pic, port, c);
case 't': case 'f':
return read_boolean(pic, port, c);
case '\\':
return read_char(pic, port, c);
case '(':
return read_vector(pic, port, c);
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return read_label(pic, port, c);
case 'u':
return read_unsigned_blob(pic, port, c);
default:
read_error(pic, "unexpected dispatch character");
}
} }
static pic_value static pic_value
read_datum(int tok, yyscan_t scanner) read(pic_state *pic, struct pic_port *port, char c)
{ {
pic_value val; c = skip(port, c);
switch (tok) { if (c == EOF) {
case tLABEL_SET: read_error(pic, "unexpected EOF");
return read_label_set(yylval.i, scanner);
case tLABEL_REF:
return read_label_ref(yylval.i, scanner);
case tSYMBOL:
return pic_symbol_value(pic_intern(pic, yylval.buf.dat, yylval.buf.len));
case tINT:
return pic_int_value(yylval.i);
case tFLOAT:
return pic_float_value(yylval.f);
case tBOOLEAN:
return pic_bool_value(yylval.i);
case tCHAR:
return pic_char_value(yylval.c);
case tSTRING:
val = pic_obj_value(pic_str_new(pic, yylval.buf.dat, yylval.buf.len));
pic_free(pic, yylval.buf.dat);
return val;
case tBYTEVECTOR:
val = pic_obj_value(pic_blob_new(pic, yylval.buf.dat, yylval.buf.len));
pic_free(pic, yylval.buf.dat);
return val;
case tLPAREN:
case tLBRACKET:
return read_pair(tok, scanner);
case tVPAREN:
return pic_obj_value(read_vect(scanner));
case tQUOTE:
return read_abbrev(pic->sQUOTE, scanner);
case tQUASIQUOTE:
return read_abbrev(pic->sQUASIQUOTE, scanner);
case tUNQUOTE:
return read_abbrev(pic->sUNQUOTE, scanner);
case tUNQUOTE_SPLICING:
return read_abbrev(pic->sUNQUOTE_SPLICING, scanner);
case tRPAREN:
error("unexpected close parenthesis", scanner);
case tRBRACKET:
error("unexpected close bracket", scanner);
case tDOT:
error("unexpected '.'", scanner);
case tEOF:
error(NULL, scanner);
} }
UNREACHABLE(); switch (c) {
} case ';':
return read_comment(pic, port, c);
static pic_value case '#':
read(int tok, yyscan_t scanner) return read_dispatch(pic, port, c);
{ case '\'':
size_t ai = pic_gc_arena_preserve(pic); return read_quote(pic, port, c);
pic_value val; case '`':
return read_quasiquote(pic, port, c);
val = read_datum(tok, scanner); case ',':
return read_comma(pic, port, c);
pic_gc_arena_restore(pic, ai); case '"':
pic_gc_protect(pic, val); return read_string(pic, port, c);
return val; case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return read_number(pic, port, c);
case '+':
return read_plus(pic, port, c);
case '-':
return read_minus(pic, port, c);
case '(': case '[':
return read_pair(pic, port, c);
default:
return read_symbol(pic, port, c);
}
} }
pic_value pic_value
read_one(yyscan_t scanner) pic_read(pic_state *pic, struct pic_port *port)
{ {
int tok; char c;
if (setjmp(yyjmp) != 0) { c = next(port);
pic_errorf(pic, "%s", yymsg ? yymsg : "unexpected EOF");
if (c == EOF) {
return pic_eof_object();
} }
if ((tok = gettok(scanner)) == tEOF) { return read(pic, port, c);
}
pic_value
pic_read_cstr(pic_state *pic, const char *str)
{
struct pic_port *port;
port = pic_open_input_string(pic, str);
return pic_read(pic, port);
}
static pic_value
pic_parse(pic_state *pic, struct pic_port *port)
{
pic_value val, acc;
pic_try {
acc = pic_nil_value();
while (! pic_eof_p(val = pic_read(pic, port))) {
pic_push(pic, val, acc);
}
}
pic_catch {
return pic_undef_value(); return pic_undef_value();
} }
return read(tok, scanner);
}
pic_list return pic_reverse(pic, acc);
read_many(yyscan_t scanner)
{
int tok;
pic_value vals;
if (setjmp(yyjmp) != 0) {
if (yymsg) {
pic_errorf(pic, "%s", yymsg);
}
return pic_undef_value(); /* incomplete string */
}
vals = pic_nil_value();
while ((tok = gettok(scanner)) != tEOF) {
vals = pic_cons(pic, read(tok, scanner), vals);
}
return pic_reverse(pic, vals);
}
#undef pic
pic_value
pic_read(pic_state *pic, const char *cstr)
{
yyscan_t scanner;
struct parser_control ctrl;
pic_value val;
ctrl.pic = pic;
xh_init_int(&ctrl.labels, sizeof(pic_value));
yylex_init_extra(&ctrl, &scanner);
yy_scan_string(cstr, scanner);
val = read_one(scanner);
yylex_destroy(scanner);
xh_destroy(&ctrl.labels);
return val;
} }
pic_list pic_list
pic_parse_file(pic_state *pic, FILE *file) pic_parse_file(pic_state *pic, FILE *file)
{ {
yyscan_t scanner; struct pic_port *port;
struct parser_control ctrl;
pic_value vals;
ctrl.pic = pic; port = (struct pic_port *)pic_obj_alloc(pic, sizeof(struct pic_port *), PIC_TT_PORT);
xh_init_int(&ctrl.labels, sizeof(pic_value)); port->file = xfpopen(file);
yylex_init_extra(&ctrl, &scanner); port->flags = PIC_PORT_OUT | PIC_PORT_TEXT;
yyset_in(file, scanner); port->status = PIC_PORT_OPEN;
vals = read_many(scanner); return pic_parse(pic, port);
yylex_destroy(scanner);
xh_destroy(&ctrl.labels);
return vals;
} }
pic_list pic_list
pic_parse_cstr(pic_state *pic, const char *cstr) pic_parse_cstr(pic_state *pic, const char *str)
{ {
yyscan_t scanner; struct pic_port *port;
struct parser_control ctrl;
pic_value vals;
ctrl.pic = pic; port = pic_open_input_string(pic, str);
xh_init_int(&ctrl.labels, sizeof(pic_value));
yylex_init_extra(&ctrl, &scanner);
yy_scan_string(cstr, scanner);
vals = read_many(scanner); return pic_parse(pic, port);
yylex_destroy(scanner);
xh_destroy(&ctrl.labels);
return vals;
} }