From c89111f7cb0844696014e6061bc843c3cf315344 Mon Sep 17 00:00:00 2001 From: JeffBezanson Date: Thu, 6 Nov 2008 04:04:04 +0000 Subject: [PATCH] refactored escape sequence handling a bit, added error for invalid hex discarding rest of input line after a parse error made compare() do less work for unordered comparison added peekc and purge to ios --- femtolisp/equal.c | 63 ++++++++++++++++++++-------------- femtolisp/flisp.c | 8 ++++- femtolisp/read.c | 24 ++++--------- femtolisp/system.lsp | 28 ++++++++++----- femtolisp/table.c | 8 +++++ femtolisp/test.lsp | 7 ++-- femtolisp/todo | 32 +++++++++++++++++ llt/ios.c | 17 +++++++++ llt/ios.h | 4 +++ llt/ptrhash.c | 2 +- llt/utf8.c | 82 ++++++++++++++++++++++---------------------- llt/utf8.h | 4 ++- 12 files changed, 180 insertions(+), 99 deletions(-) diff --git a/femtolisp/equal.c b/femtolisp/equal.c index a2f64f2..2fd889e 100644 --- a/femtolisp/equal.c +++ b/femtolisp/equal.c @@ -30,10 +30,8 @@ static void eq_union(ptrhash_t *table, value_t a, value_t b, ptrhash_put(table, (void*)b, (void*)ca); } -// ordered comparison - // a is a fixnum, b is a cvalue -static value_t compare_num_cvalue(value_t a, value_t b) +static value_t compare_num_cvalue(value_t a, value_t b, int eq) { cvalue_t *bcv = (cvalue_t*)ptr(b); numerictype_t bt; @@ -42,6 +40,7 @@ static value_t compare_num_cvalue(value_t a, value_t b) void *bptr = cv_data(bcv); if (cmp_eq(&ia, T_FIXNUM, bptr, bt)) return fixnum(0); + if (eq) return fixnum(1); if (cmp_lt(&ia, T_FIXNUM, bptr, bt)) return fixnum(-1); } @@ -51,17 +50,19 @@ static value_t compare_num_cvalue(value_t a, value_t b) return fixnum(1); } -static value_t bounded_compare(value_t a, value_t b, int bound); -static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table); +static value_t bounded_compare(value_t a, value_t b, int bound, int eq); +static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq); -static value_t bounded_vector_compare(value_t a, value_t b, int bound) +static value_t bounded_vector_compare(value_t a, value_t b, int bound, int eq) { size_t la = vector_size(a); size_t lb = vector_size(b); size_t m, i; + if (eq && (la!=lb)) return fixnum(1); m = la < lb ? la : lb; for (i = 0; i < m; i++) { - value_t d = bounded_compare(vector_elt(a,i), vector_elt(b,i), bound-1); + value_t d = bounded_compare(vector_elt(a,i), vector_elt(b,i), + bound-1, eq); if (d==NIL || numval(d)!=0) return d; } if (la < lb) return fixnum(-1); @@ -71,7 +72,7 @@ static value_t bounded_vector_compare(value_t a, value_t b, int bound) // strange comparisons are resolved arbitrarily but consistently. // ordering: number < builtin < cvalue < vector < symbol < cons -static value_t bounded_compare(value_t a, value_t b, int bound) +static value_t bounded_compare(value_t a, value_t b, int bound, int eq) { value_t d; @@ -88,16 +89,17 @@ static value_t bounded_compare(value_t a, value_t b, int bound) return (numval(a) < numval(b)) ? fixnum(-1) : fixnum(1); } if (iscvalue(b)) { - return compare_num_cvalue(a, b); + return compare_num_cvalue(a, b, eq); } return fixnum(-1); case TAG_SYM: + if (eq) return fixnum(1); if (tagb < TAG_SYM) return fixnum(1); if (tagb > TAG_SYM) return fixnum(-1); return fixnum(strcmp(symbol_name(a), symbol_name(b))); case TAG_VECTOR: if (isvector(b)) - return bounded_vector_compare(a, b, bound); + return bounded_vector_compare(a, b, bound, eq); break; case TAG_CVALUE: if (iscvalue(b)) { @@ -109,6 +111,7 @@ static value_t bounded_compare(value_t a, value_t b, int bound) void *bptr = cv_data(bcv); if (cmp_eq(aptr, at, bptr, bt)) return fixnum(0); + if (eq) return fixnum(1); if (cmp_lt(aptr, at, bptr, bt)) return fixnum(-1); return fixnum(1); @@ -116,7 +119,7 @@ static value_t bounded_compare(value_t a, value_t b, int bound) return cvalue_compare(a, b); } else if (isfixnum(b)) { - return fixnum(-numval(compare_num_cvalue(b, a))); + return fixnum(-numval(compare_num_cvalue(b, a, eq))); } break; case TAG_BUILTIN: @@ -126,7 +129,7 @@ static value_t bounded_compare(value_t a, value_t b, int bound) break; case TAG_CONS: if (tagb < TAG_CONS) return fixnum(1); - d = bounded_compare(car_(a), car_(b), bound-1); + d = bounded_compare(car_(a), car_(b), bound-1, eq); if (d==NIL || numval(d) != 0) return d; a = cdr_(a); b = cdr_(b); bound--; @@ -135,7 +138,8 @@ static value_t bounded_compare(value_t a, value_t b, int bound) return (taga < tagb) ? fixnum(-1) : fixnum(1); } -static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table) +static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table, + int eq) { size_t la = vector_size(a); size_t lb = vector_size(b); @@ -143,12 +147,13 @@ static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table) value_t d, xa, xb, ca, cb; // first try to prove them different with no recursion + if (eq && (la!=lb)) return fixnum(1); m = la < lb ? la : lb; for (i = 0; i < m; i++) { xa = vector_elt(a,i); xb = vector_elt(b,i); if (leafp(xa) || leafp(xb)) { - d = bounded_compare(xa, xb, 1); + d = bounded_compare(xa, xb, 1, eq); if (numval(d)!=0) return d; } else if (cmptag(xa) < cmptag(xb)) { @@ -170,7 +175,7 @@ static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table) xa = vector_elt(a,i); xb = vector_elt(b,i); if (!leafp(xa) && !leafp(xb)) { - d = cyc_compare(xa, xb, table); + d = cyc_compare(xa, xb, table, eq); if (numval(d)!=0) return d; } @@ -181,7 +186,7 @@ static value_t cyc_vector_compare(value_t a, value_t b, ptrhash_t *table) return fixnum(0); } -static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table) +static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table, int eq) { if (a==b) return fixnum(0); @@ -193,7 +198,7 @@ static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table) int tagab = cmptag(ab); int tagdb = cmptag(db); value_t d, ca, cb; if (leafp(aa) || leafp(ab)) { - d = bounded_compare(aa, ab, 1); + d = bounded_compare(aa, ab, 1, eq); if (numval(d)!=0) return d; } else if (tagaa < tagab) @@ -201,7 +206,7 @@ static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table) else if (tagaa > tagab) return fixnum(1); if (leafp(da) || leafp(db)) { - d = bounded_compare(da, db, 1); + d = bounded_compare(da, db, 1, eq); if (numval(d)!=0) return d; } else if (tagda < tagdb) @@ -215,18 +220,18 @@ static value_t cyc_compare(value_t a, value_t b, ptrhash_t *table) return fixnum(0); eq_union(table, a, b, ca, cb); - d = cyc_compare(aa, ab, table); + d = cyc_compare(aa, ab, table, eq); if (numval(d)!=0) return d; - return cyc_compare(da, db, table); + return cyc_compare(da, db, table, eq); } else { return fixnum(1); } } else if (isvector(a) && isvector(b)) { - return cyc_vector_compare(a, b, table); + return cyc_vector_compare(a, b, table, eq); } - return bounded_compare(a, b, 1); + return bounded_compare(a, b, 1, eq); } static ptrhash_t equal_eq_hashtable; @@ -235,21 +240,27 @@ void comparehash_init() ptrhash_new(&equal_eq_hashtable, 512); } -value_t compare(value_t a, value_t b) +// 'eq' means unordered comparison is sufficient +static value_t compare_(value_t a, value_t b, int eq) { - value_t guess = bounded_compare(a, b, 2048); + value_t guess = bounded_compare(a, b, 2048, eq); if (guess == NIL) { - guess = cyc_compare(a, b, &equal_eq_hashtable); + guess = cyc_compare(a, b, &equal_eq_hashtable, eq); ptrhash_reset(&equal_eq_hashtable, 512); } return guess; } +value_t compare(value_t a, value_t b) +{ + return compare_(a, b, 0); +} + value_t equal(value_t a, value_t b) { if (eq_comparable(a, b)) return (a == b) ? T : NIL; - return (numval(compare(a,b))==0 ? T : NIL); + return (numval(compare_(a,b,1))==0 ? T : NIL); } /* diff --git a/femtolisp/flisp.c b/femtolisp/flisp.c index 9665454..9726a0b 100644 --- a/femtolisp/flisp.c +++ b/femtolisp/flisp.c @@ -1509,7 +1509,13 @@ int main(int argc, char *argv[]) repl: while (1) { ios_puts("> ", ios_stdout); ios_flush(ios_stdout); - v = read_sexpr(ios_stdin); + FL_TRY { + v = read_sexpr(ios_stdin); + } + FL_CATCH { + ios_purge(ios_stdin); + raise(lasterror); + } if (ios_eof(ios_stdin)) break; print(ios_stdout, v=toplevel_eval(v), 0); set(symbol("that"), v); diff --git a/femtolisp/read.c b/femtolisp/read.c index 2b8a3be..8629fba 100644 --- a/femtolisp/read.c +++ b/femtolisp/read.c @@ -357,7 +357,6 @@ static value_t read_string(ios_t *f) else if ((c=='x' && (ndig=2)) || (c=='u' && (ndig=4)) || (c=='U' && (ndig=8))) { - wc = c; c = ios_getc(f); while (hex_digit(c) && j x piv)) (cdr l))))))) + (sort (cdr halves)))))) (defmacro dotimes (var . body) (let ((v (car var)) diff --git a/femtolisp/todo b/femtolisp/todo index 6195dad..8cda4f3 100644 --- a/femtolisp/todo +++ b/femtolisp/todo @@ -928,3 +928,35 @@ consolidated todo list as of 8/30: - remaining cvalues functions - special efficient reader for #array - finish ios + +----------------------------------------------------------------------------- + +cvalues redesign + +goals: +. allow custom types with vtables +. use less space, share types more +. simplify access to important metadata like length +. unify vectors and arrays + +typedef struct { + fltype_t *type; + void *data; + size_t len; // length of *data in bytes + + value_t parent; // optional + char data[1]; // variable size +} cvalue_t; + +typedef struct { + fltype_t *type; + void *data; +} cprim_t; + +typedef struct _fltype_t { + value_t type; + int numtype; + size_t sz; + cvtable_t *vtable; + struct _fltype_t *eltype; // for arrays +} fltype_t; diff --git a/llt/ios.c b/llt/ios.c index 7acd75f..ea04dc8 100644 --- a/llt/ios.c +++ b/llt/ios.c @@ -715,6 +715,16 @@ int ios_getc(ios_t *s) return (int)ch; } +int ios_peekc(ios_t *s) +{ + if (s->bpos < s->size) + return s->buf[s->bpos]; + if (s->_eof) return IOS_EOF; + size_t n = ios_readprep(s, 1); + if (n == 0) return IOS_EOF; + return s->buf[s->bpos]; +} + int ios_ungetc(int c, ios_t *s) { if (s->state == bst_wr) @@ -761,6 +771,13 @@ int ios_getutf8(ios_t *s, uint32_t *pwc) return 1; } +void ios_purge(ios_t *s) +{ + if (s->state == bst_rd) { + s->bpos = s->size; + } +} + int ios_printf(ios_t *s, char *format, ...) { char *str=NULL; diff --git a/llt/ios.h b/llt/ios.h index 2076e55..44e801f 100644 --- a/llt/ios.h +++ b/llt/ios.h @@ -112,6 +112,9 @@ int ios_getstringn(ios_t *dest, ios_t *src, size_t nchars); int ios_readline(ios_t *dest, ios_t *s, char delim); int ios_getline(ios_t *s, char **pbuf, size_t *psz); +// discard data buffered for reading +void ios_purge(ios_t *s); + // seek by utf8 sequence increments int ios_nextutf8(ios_t *s); int ios_prevutf8(ios_t *s); @@ -121,6 +124,7 @@ int ios_prevutf8(ios_t *s); int ios_putc(int c, ios_t *s); //wint_t ios_putwc(ios_t *s, wchar_t wc); int ios_getc(ios_t *s); +int ios_peekc(ios_t *s); //wint_t ios_getwc(ios_t *s); int ios_ungetc(int c, ios_t *s); //wint_t ios_ungetwc(ios_t *s, wint_t wc); diff --git a/llt/ptrhash.c b/llt/ptrhash.c index 84b294a..1a1ebcc 100644 --- a/llt/ptrhash.c +++ b/llt/ptrhash.c @@ -70,7 +70,7 @@ static void **ptrhash_lookup_bp(ptrhash_t *h, void *key) orig = index; do { - if (tab[index] == PH_NOTFOUND) { + if (tab[index+1] == PH_NOTFOUND) { tab[index] = key; return &tab[index+1]; } diff --git a/llt/utf8.c b/llt/utf8.c index c670270..a42158f 100644 --- a/llt/utf8.c +++ b/llt/utf8.c @@ -313,56 +313,56 @@ int hex_digit(char c) (c >= 'a' && c <= 'f')); } -/* assumes that src points to the character after a backslash - returns number of input characters processed */ -int u8_read_escape_sequence(const char *str, u_int32_t *dest) +char read_escape_control_char(char c) { - u_int32_t ch; - char digs[9]="\0\0\0\0\0\0\0\0\0"; - int dno=0, i=1; + if (c == 'n') + return '\n'; + else if (c == 't') + return '\t'; + else if (c == 'r') + return '\r'; + else if (c == 'b') + return '\b'; + else if (c == 'f') + return '\f'; + else if (c == 'v') + return '\v'; + else if (c == 'a') + return '\a'; + return c; +} - ch = (u_int32_t)str[0]; /* take literal character */ - if (str[0] == 'n') - ch = L'\n'; - else if (str[0] == 't') - ch = L'\t'; - else if (str[0] == 'r') - ch = L'\r'; - else if (str[0] == 'b') - ch = L'\b'; - else if (str[0] == 'f') - ch = L'\f'; - else if (str[0] == 'v') - ch = L'\v'; - else if (str[0] == 'a') - ch = L'\a'; - else if (octal_digit(str[0])) { +/* assumes that src points to the character after a backslash + returns number of input characters processed, 0 if error */ +size_t u8_read_escape_sequence(const char *str, size_t ssz, u_int32_t *dest) +{ + assert(ssz > 0); + u_int32_t ch; + char digs[10]; + int dno=0, ndig; + size_t i=1; + char c0 = str[0]; + + if (octal_digit(c0)) { i = 0; do { digs[dno++] = str[i++]; - } while (octal_digit(str[i]) && dno < 3); + } while (i 0) - ch = strtol(digs, NULL, 16); + if (dno == 0) return 0; + digs[dno] = '\0'; + ch = strtol(digs, NULL, 16); } - else if (str[0] == 'u') { - while (hex_digit(str[i]) && dno < 4) { - digs[dno++] = str[i++]; - } - if (dno > 0) - ch = strtol(digs, NULL, 16); - } - else if (str[0] == 'U') { - while (hex_digit(str[i]) && dno < 8) { - digs[dno++] = str[i++]; - } - if (dno > 0) - ch = strtol(digs, NULL, 16); + else { + ch = (u_int32_t)read_escape_control_char(c0); } *dest = ch; @@ -381,7 +381,7 @@ size_t u8_unescape(char *buf, size_t sz, const char *src) while (*src && c < sz) { if (*src == '\\') { src++; - amt = u8_read_escape_sequence(src, &ch); + amt = u8_read_escape_sequence(src, 1000, &ch); } else { ch = (u_int32_t)*src; diff --git a/llt/utf8.h b/llt/utf8.h index 1a5db95..39b7fc6 100644 --- a/llt/utf8.h +++ b/llt/utf8.h @@ -55,10 +55,12 @@ size_t u8_charlen(u_int32_t ch); /* computes the # of bytes needed to encode a WC string as UTF-8 */ size_t u8_codingsize(u_int32_t *wcstr, size_t n); +char read_escape_control_char(char c); + /* assuming src points to the character after a backslash, read an escape sequence, storing the result in dest and returning the number of input characters processed */ -int u8_read_escape_sequence(const char *src, u_int32_t *dest); +size_t u8_read_escape_sequence(const char *src, size_t ssz, u_int32_t *dest); /* given a wide character, convert it to an ASCII escape sequence stored in buf, where buf is "sz" bytes. returns the number of characters output.