refactored escape sequence handling a bit, added error for invalid hex

discarding rest of input line after a parse error
made compare() do less work for unordered comparison
added peekc and purge to ios
This commit is contained in:
JeffBezanson 2008-11-06 04:04:04 +00:00
parent 120522c212
commit c89111f7cb
12 changed files with 180 additions and 99 deletions

View File

@ -30,10 +30,8 @@ static void eq_union(ptrhash_t *table, value_t a, value_t b,
ptrhash_put(table, (void*)b, (void*)ca);
}
// ordered comparison
// a is a fixnum, b is a cvalue
static value_t compare_num_cvalue(value_t a, value_t b)
static value_t compare_num_cvalue(value_t a, value_t b, int eq)
{
cvalue_t *bcv = (cvalue_t*)ptr(b);
numerictype_t bt;
@ -42,6 +40,7 @@ static value_t compare_num_cvalue(value_t a, value_t b)
void *bptr = cv_data(bcv);
if (cmp_eq(&ia, T_FIXNUM, bptr, bt))
return fixnum(0);
if (eq) return fixnum(1);
if (cmp_lt(&ia, T_FIXNUM, bptr, bt))
return fixnum(-1);
}
@ -51,17 +50,19 @@ static value_t compare_num_cvalue(value_t a, value_t b)
return fixnum(1);
}
static value_t bounded_compare(value_t a, value_t b, int bound);
static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table);
static value_t bounded_compare(value_t a, value_t b, int bound, int eq);
static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq);
static value_t bounded_vector_compare(value_t a, value_t b, int bound)
static value_t bounded_vector_compare(value_t a, value_t b, int bound, int eq)
{
size_t la = vector_size(a);
size_t lb = vector_size(b);
size_t m, i;
if (eq && (la!=lb)) return fixnum(1);
m = la < lb ? la : lb;
for (i = 0; i < m; i++) {
value_t d = bounded_compare(vector_elt(a,i), vector_elt(b,i), bound-1);
value_t d = bounded_compare(vector_elt(a,i), vector_elt(b,i),
bound-1, eq);
if (d==NIL || numval(d)!=0) return d;
}
if (la < lb) return fixnum(-1);
@ -71,7 +72,7 @@ static value_t bounded_vector_compare(value_t a, value_t b, int bound)
// strange comparisons are resolved arbitrarily but consistently.
// ordering: number < builtin < cvalue < vector < symbol < cons
static value_t bounded_compare(value_t a, value_t b, int bound)
static value_t bounded_compare(value_t a, value_t b, int bound, int eq)
{
value_t d;
@ -88,16 +89,17 @@ static value_t bounded_compare(value_t a, value_t b, int bound)
return (numval(a) < numval(b)) ? fixnum(-1) : fixnum(1);
}
if (iscvalue(b)) {
return compare_num_cvalue(a, b);
return compare_num_cvalue(a, b, eq);
}
return fixnum(-1);
case TAG_SYM:
if (eq) return fixnum(1);
if (tagb < TAG_SYM) return fixnum(1);
if (tagb > TAG_SYM) return fixnum(-1);
return fixnum(strcmp(symbol_name(a), symbol_name(b)));
case TAG_VECTOR:
if (isvector(b))
return bounded_vector_compare(a, b, bound);
return bounded_vector_compare(a, b, bound, eq);
break;
case TAG_CVALUE:
if (iscvalue(b)) {
@ -109,6 +111,7 @@ static value_t bounded_compare(value_t a, value_t b, int bound)
void *bptr = cv_data(bcv);
if (cmp_eq(aptr, at, bptr, bt))
return fixnum(0);
if (eq) return fixnum(1);
if (cmp_lt(aptr, at, bptr, bt))
return fixnum(-1);
return fixnum(1);
@ -116,7 +119,7 @@ static value_t bounded_compare(value_t a, value_t b, int bound)
return cvalue_compare(a, b);
}
else if (isfixnum(b)) {
return fixnum(-numval(compare_num_cvalue(b, a)));
return fixnum(-numval(compare_num_cvalue(b, a, eq)));
}
break;
case TAG_BUILTIN:
@ -126,7 +129,7 @@ static value_t bounded_compare(value_t a, value_t b, int bound)
break;
case TAG_CONS:
if (tagb < TAG_CONS) return fixnum(1);
d = bounded_compare(car_(a), car_(b), bound-1);
d = bounded_compare(car_(a), car_(b), bound-1, eq);
if (d==NIL || numval(d) != 0) return d;
a = cdr_(a); b = cdr_(b);
bound--;
@ -135,7 +138,8 @@ static value_t bounded_compare(value_t a, value_t b, int bound)
return (taga < tagb) ? fixnum(-1) : fixnum(1);
}
static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table)
static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table,
int eq)
{
size_t la = vector_size(a);
size_t lb = vector_size(b);
@ -143,12 +147,13 @@ static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table)
value_t d, xa, xb, ca, cb;
// first try to prove them different with no recursion
if (eq && (la!=lb)) return fixnum(1);
m = la < lb ? la : lb;
for (i = 0; i < m; i++) {
xa = vector_elt(a,i);
xb = vector_elt(b,i);
if (leafp(xa) || leafp(xb)) {
d = bounded_compare(xa, xb, 1);
d = bounded_compare(xa, xb, 1, eq);
if (numval(d)!=0) return d;
}
else if (cmptag(xa) < cmptag(xb)) {
@ -170,7 +175,7 @@ static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table)
xa = vector_elt(a,i);
xb = vector_elt(b,i);
if (!leafp(xa) && !leafp(xb)) {
d = cyc_compare(xa, xb, table);
d = cyc_compare(xa, xb, table, eq);
if (numval(d)!=0)
return d;
}
@ -181,7 +186,7 @@ static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table)
return fixnum(0);
}
static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table)
static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq)
{
if (a==b)
return fixnum(0);
@ -193,7 +198,7 @@ static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table)
int tagab = cmptag(ab); int tagdb = cmptag(db);
value_t d, ca, cb;
if (leafp(aa) || leafp(ab)) {
d = bounded_compare(aa, ab, 1);
d = bounded_compare(aa, ab, 1, eq);
if (numval(d)!=0) return d;
}
else if (tagaa < tagab)
@ -201,7 +206,7 @@ static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table)
else if (tagaa > tagab)
return fixnum(1);
if (leafp(da) || leafp(db)) {
d = bounded_compare(da, db, 1);
d = bounded_compare(da, db, 1, eq);
if (numval(d)!=0) return d;
}
else if (tagda < tagdb)
@ -215,18 +220,18 @@ static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table)
return fixnum(0);
eq_union(table, a, b, ca, cb);
d = cyc_compare(aa, ab, table);
d = cyc_compare(aa, ab, table, eq);
if (numval(d)!=0) return d;
return cyc_compare(da, db, table);
return cyc_compare(da, db, table, eq);
}
else {
return fixnum(1);
}
}
else if (isvector(a) && isvector(b)) {
return cyc_vector_compare(a, b, table);
return cyc_vector_compare(a, b, table, eq);
}
return bounded_compare(a, b, 1);
return bounded_compare(a, b, 1, eq);
}
static ptrhash_t equal_eq_hashtable;
@ -235,21 +240,27 @@ void comparehash_init()
ptrhash_new(&equal_eq_hashtable, 512);
}
value_t compare(value_t a, value_t b)
// 'eq' means unordered comparison is sufficient
static value_t compare_(value_t a, value_t b, int eq)
{
value_t guess = bounded_compare(a, b, 2048);
value_t guess = bounded_compare(a, b, 2048, eq);
if (guess == NIL) {
guess = cyc_compare(a, b, &equal_eq_hashtable);
guess = cyc_compare(a, b, &equal_eq_hashtable, eq);
ptrhash_reset(&equal_eq_hashtable, 512);
}
return guess;
}
value_t compare(value_t a, value_t b)
{
return compare_(a, b, 0);
}
value_t equal(value_t a, value_t b)
{
if (eq_comparable(a, b))
return (a == b) ? T : NIL;
return (numval(compare(a,b))==0 ? T : NIL);
return (numval(compare_(a,b,1))==0 ? T : NIL);
}
/*

View File

@ -1509,7 +1509,13 @@ int main(int argc, char *argv[])
repl:
while (1) {
ios_puts("> ", ios_stdout); ios_flush(ios_stdout);
v = read_sexpr(ios_stdin);
FL_TRY {
v = read_sexpr(ios_stdin);
}
FL_CATCH {
ios_purge(ios_stdin);
raise(lasterror);
}
if (ios_eof(ios_stdin)) break;
print(ios_stdout, v=toplevel_eval(v), 0);
set(symbol("that"), v);

View File

@ -357,7 +357,6 @@ static value_t read_string(ios_t *f)
else if ((c=='x' && (ndig=2)) ||
(c=='u' && (ndig=4)) ||
(c=='U' && (ndig=8))) {
wc = c;
c = ios_getc(f);
while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) {
eseq[j++] = c;
@ -366,24 +365,15 @@ static value_t read_string(ios_t *f)
if (c!=IOS_EOF) ios_ungetc(c, f);
eseq[j] = '\0';
if (j) wc = strtol(eseq, NULL, 16);
else {
free(buf);
lerror(ParseError, "read: invalid escape sequence");
}
i += u8_wc_toutf8(&buf[i], wc);
}
else if (c == 'n')
buf[i++] = '\n';
else if (c == 't')
buf[i++] = '\t';
else if (c == 'r')
buf[i++] = '\r';
else if (c == 'b')
buf[i++] = '\b';
else if (c == 'f')
buf[i++] = '\f';
else if (c == 'v')
buf[i++] = '\v';
else if (c == 'a')
buf[i++] = '\a';
else
buf[i++] = c;
else {
buf[i++] = read_escape_control_char((char)c);
}
}
else {
buf[i++] = c;

View File

@ -47,11 +47,10 @@
(defun nconc lsts
(cond ((null lsts) ())
((null (cdr lsts)) (car lsts))
(T ((lambda (l d) (if (null l) d
(prog1 l
(while (consp (cdr l)) (setq l (cdr l)))
(rplacd l d))))
(car lsts) (apply nconc (cdr lsts))))))
((null (car lsts)) (apply nconc (cdr lsts)))
(T (prog1 (car lsts)
(rplacd (last (car lsts))
(apply nconc (cdr lsts)))))))
(defun append lsts
(cond ((null lsts) ())
@ -211,10 +210,21 @@
(defun transpose (M) (apply mapcar (cons list M)))
(defun filter (pred lst)
(cond ((null lst) ())
((pred (car lst)) (cons (car lst) (filter pred (cdr lst))))
(T (filter pred (cdr lst)))))
(defun filter (pred lst) (filter- pred lst nil))
(defun filter- (pred lst accum)
(cond ((null lst) accum)
((pred (car lst))
(filter- pred (cdr lst) (cons (car lst) accum)))
(T
(filter- pred (cdr lst) accum))))
(defun separate (pred lst) (separate- pred lst nil nil))
(defun separate- (pred lst yes no)
(cond ((null lst) (cons yes no))
((pred (car lst))
(separate- pred (cdr lst) (cons (car lst) yes) no))
(T
(separate- pred (cdr lst) yes (cons (car lst) no)))))
(define (foldr f zero lst)
(if (null lst) zero

View File

@ -79,22 +79,30 @@ value_t fl_hashtablep(value_t *args, u_int32_t nargs)
return NIL;
}
// (put table key value)
value_t fl_hash_put(value_t *args, u_int32_t nargs)
{
argcount("put", nargs, 3);
return NIL;
}
// (get table key)
value_t fl_hash_get(value_t *args, u_int32_t nargs)
{
argcount("get", nargs, 2);
return NIL;
}
// (has table key)
value_t fl_hash_has(value_t *args, u_int32_t nargs)
{
argcount("has", nargs, 2);
return NIL;
}
// (del table key)
value_t fl_hash_delete(value_t *args, u_int32_t nargs)
{
argcount("del", nargs, 2);
return NIL;
}

View File

@ -43,10 +43,11 @@
(defun sort (l)
(if (or (null l) (null (cdr l))) l
(let ((piv (car l)))
(nconc (sort (filter (lambda (x) (<= x piv)) (cdr l)))
(let* ((piv (car l))
(halves (separate (lambda (x) (< x piv)) (cdr l))))
(nconc (sort (car halves))
(list piv)
(sort (filter (lambda (x) (> x piv)) (cdr l)))))))
(sort (cdr halves))))))
(defmacro dotimes (var . body)
(let ((v (car var))

View File

@ -928,3 +928,35 @@ consolidated todo list as of 8/30:
- remaining cvalues functions
- special efficient reader for #array
- finish ios
-----------------------------------------------------------------------------
cvalues redesign
goals:
. allow custom types with vtables
. use less space, share types more
. simplify access to important metadata like length
. unify vectors and arrays
typedef struct {
fltype_t *type;
void *data;
size_t len; // length of *data in bytes
value_t parent; // optional
char data[1]; // variable size
} cvalue_t;
typedef struct {
fltype_t *type;
void *data;
} cprim_t;
typedef struct _fltype_t {
value_t type;
int numtype;
size_t sz;
cvtable_t *vtable;
struct _fltype_t *eltype; // for arrays
} fltype_t;

View File

@ -715,6 +715,16 @@ int ios_getc(ios_t *s)
return (int)ch;
}
int ios_peekc(ios_t *s)
{
if (s->bpos < s->size)
return s->buf[s->bpos];
if (s->_eof) return IOS_EOF;
size_t n = ios_readprep(s, 1);
if (n == 0) return IOS_EOF;
return s->buf[s->bpos];
}
int ios_ungetc(int c, ios_t *s)
{
if (s->state == bst_wr)
@ -761,6 +771,13 @@ int ios_getutf8(ios_t *s, uint32_t *pwc)
return 1;
}
void ios_purge(ios_t *s)
{
if (s->state == bst_rd) {
s->bpos = s->size;
}
}
int ios_printf(ios_t *s, char *format, ...)
{
char *str=NULL;

View File

@ -112,6 +112,9 @@ int ios_getstringn(ios_t *dest, ios_t *src, size_t nchars);
int ios_readline(ios_t *dest, ios_t *s, char delim);
int ios_getline(ios_t *s, char **pbuf, size_t *psz);
// discard data buffered for reading
void ios_purge(ios_t *s);
// seek by utf8 sequence increments
int ios_nextutf8(ios_t *s);
int ios_prevutf8(ios_t *s);
@ -121,6 +124,7 @@ int ios_prevutf8(ios_t *s);
int ios_putc(int c, ios_t *s);
//wint_t ios_putwc(ios_t *s, wchar_t wc);
int ios_getc(ios_t *s);
int ios_peekc(ios_t *s);
//wint_t ios_getwc(ios_t *s);
int ios_ungetc(int c, ios_t *s);
//wint_t ios_ungetwc(ios_t *s, wint_t wc);

View File

@ -70,7 +70,7 @@ static void **ptrhash_lookup_bp(ptrhash_t *h, void *key)
orig = index;
do {
if (tab[index] == PH_NOTFOUND) {
if (tab[index+1] == PH_NOTFOUND) {
tab[index] = key;
return &tab[index+1];
}

View File

@ -313,56 +313,56 @@ int hex_digit(char c)
(c >= 'a' && c <= 'f'));
}
/* assumes that src points to the character after a backslash
returns number of input characters processed */
int u8_read_escape_sequence(const char *str, u_int32_t *dest)
char read_escape_control_char(char c)
{
u_int32_t ch;
char digs[9]="\0\0\0\0\0\0\0\0\0";
int dno=0, i=1;
if (c == 'n')
return '\n';
else if (c == 't')
return '\t';
else if (c == 'r')
return '\r';
else if (c == 'b')
return '\b';
else if (c == 'f')
return '\f';
else if (c == 'v')
return '\v';
else if (c == 'a')
return '\a';
return c;
}
ch = (u_int32_t)str[0]; /* take literal character */
if (str[0] == 'n')
ch = L'\n';
else if (str[0] == 't')
ch = L'\t';
else if (str[0] == 'r')
ch = L'\r';
else if (str[0] == 'b')
ch = L'\b';
else if (str[0] == 'f')
ch = L'\f';
else if (str[0] == 'v')
ch = L'\v';
else if (str[0] == 'a')
ch = L'\a';
else if (octal_digit(str[0])) {
/* assumes that src points to the character after a backslash
returns number of input characters processed, 0 if error */
size_t u8_read_escape_sequence(const char *str, size_t ssz, u_int32_t *dest)
{
assert(ssz > 0);
u_int32_t ch;
char digs[10];
int dno=0, ndig;
size_t i=1;
char c0 = str[0];
if (octal_digit(c0)) {
i = 0;
do {
digs[dno++] = str[i++];
} while (octal_digit(str[i]) && dno < 3);
} while (i<ssz && octal_digit(str[i]) && dno<3);
digs[dno] = '\0';
ch = strtol(digs, NULL, 8);
}
else if (str[0] == 'x') {
while (hex_digit(str[i]) && dno < 2) {
else if ((c0=='x' && (ndig=2)) ||
(c0=='u' && (ndig=4)) ||
(c0=='U' && (ndig=8))) {
while (i<ssz && hex_digit(str[i]) && dno<ndig) {
digs[dno++] = str[i++];
}
if (dno > 0)
ch = strtol(digs, NULL, 16);
if (dno == 0) return 0;
digs[dno] = '\0';
ch = strtol(digs, NULL, 16);
}
else if (str[0] == 'u') {
while (hex_digit(str[i]) && dno < 4) {
digs[dno++] = str[i++];
}
if (dno > 0)
ch = strtol(digs, NULL, 16);
}
else if (str[0] == 'U') {
while (hex_digit(str[i]) && dno < 8) {
digs[dno++] = str[i++];
}
if (dno > 0)
ch = strtol(digs, NULL, 16);
else {
ch = (u_int32_t)read_escape_control_char(c0);
}
*dest = ch;
@ -381,7 +381,7 @@ size_t u8_unescape(char *buf, size_t sz, const char *src)
while (*src && c < sz) {
if (*src == '\\') {
src++;
amt = u8_read_escape_sequence(src, &ch);
amt = u8_read_escape_sequence(src, 1000, &ch);
}
else {
ch = (u_int32_t)*src;

View File

@ -55,10 +55,12 @@ size_t u8_charlen(u_int32_t ch);
/* computes the # of bytes needed to encode a WC string as UTF-8 */
size_t u8_codingsize(u_int32_t *wcstr, size_t n);
char read_escape_control_char(char c);
/* assuming src points to the character after a backslash, read an
escape sequence, storing the result in dest and returning the number of
input characters processed */
int u8_read_escape_sequence(const char *src, u_int32_t *dest);
size_t u8_read_escape_sequence(const char *src, size_t ssz, u_int32_t *dest);
/* given a wide character, convert it to an ASCII escape sequence stored in
buf, where buf is "sz" bytes. returns the number of characters output.