renaming 'char' type to 'byte' to avoid confusion

wchar will be used for all individual characters

adding string.find function

fixing bug in #sym(...) if sym was undefined
This commit is contained in:
JeffBezanson 2008-12-24 04:43:36 +00:00
parent 8e4ba69a7b
commit 830e1c986c
10 changed files with 110 additions and 88 deletions

View File

@ -9,7 +9,7 @@ static int ALIGN2, ALIGN4, ALIGN8, ALIGNPTR;
value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym; value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
value_t int64sym, uint64sym; value_t int64sym, uint64sym;
value_t longsym, ulongsym, charsym, wcharsym; value_t longsym, ulongsym, bytesym, wcharsym;
value_t floatsym, doublesym; value_t floatsym, doublesym;
value_t gftypesym, stringtypesym, wcstringtypesym; value_t gftypesym, stringtypesym, wcstringtypesym;
value_t emptystringsym; value_t emptystringsym;
@ -25,7 +25,7 @@ static fltype_t *int32type, *uint32type;
static fltype_t *int64type, *uint64type; static fltype_t *int64type, *uint64type;
static fltype_t *longtype, *ulongtype; static fltype_t *longtype, *ulongtype;
static fltype_t *floattype, *doubletype; static fltype_t *floattype, *doubletype;
fltype_t *chartype, *wchartype; fltype_t *bytetype, *wchartype;
fltype_t *stringtype, *wcstringtype; fltype_t *stringtype, *wcstringtype;
fltype_t *builtintype; fltype_t *builtintype;
@ -231,11 +231,11 @@ static void cv_pin(cvalue_t *cv)
} }
*/ */
#define num_ctor(typenam, cnvt, tag) \ #define num_ctor(typenam, ctype, cnvt, tag) \
static void cvalue_##typenam##_init(fltype_t *type, value_t arg, \ static void cvalue_##typenam##_init(fltype_t *type, value_t arg, \
void *dest) \ void *dest) \
{ \ { \
typenam##_t n=0; \ ctype##_t n=0; \
(void)type; \ (void)type; \
if (isfixnum(arg)) { \ if (isfixnum(arg)) { \
n = numval(arg); \ n = numval(arg); \
@ -244,14 +244,14 @@ static void cvalue_##typenam##_init(fltype_t *type, value_t arg, \
cvalue_t *cv = (cvalue_t*)ptr(arg); \ cvalue_t *cv = (cvalue_t*)ptr(arg); \
void *p = cv_data(cv); \ void *p = cv_data(cv); \
if (valid_numtype(cv_numtype(cv))) \ if (valid_numtype(cv_numtype(cv))) \
n = (typenam##_t)conv_to_##cnvt(p, cv_numtype(cv)); \ n = (ctype##_t)conv_to_##cnvt(p, cv_numtype(cv)); \
else \ else \
goto cnvt_error; \ goto cnvt_error; \
} \ } \
else { \ else { \
goto cnvt_error; \ goto cnvt_error; \
} \ } \
*((typenam##_t*)dest) = n; \ *((ctype##_t*)dest) = n; \
return; \ return; \
cnvt_error: \ cnvt_error: \
type_error(#typenam, "number", arg); \ type_error(#typenam, "number", arg); \
@ -259,37 +259,37 @@ static void cvalue_##typenam##_init(fltype_t *type, value_t arg, \
value_t cvalue_##typenam(value_t *args, u_int32_t nargs) \ value_t cvalue_##typenam(value_t *args, u_int32_t nargs) \
{ \ { \
if (nargs==0) { PUSH(fixnum(0)); args = &Stack[SP-1]; } \ if (nargs==0) { PUSH(fixnum(0)); args = &Stack[SP-1]; } \
value_t cv = cvalue(typenam##type, sizeof(typenam##_t)); \ value_t cv = cvalue(typenam##type, sizeof(ctype##_t)); \
cvalue_##typenam##_init(typenam##type, \ cvalue_##typenam##_init(typenam##type, \
args[0], &((cvalue_t*)ptr(cv))->_space[0]); \ args[0], &((cvalue_t*)ptr(cv))->_space[0]); \
return cv; \ return cv; \
} \ } \
value_t mk_##typenam(typenam##_t n) \ value_t mk_##typenam(ctype##_t n) \
{ \ { \
value_t cv = cvalue(typenam##type, sizeof(typenam##_t)); \ value_t cv = cvalue(typenam##type, sizeof(ctype##_t)); \
*(typenam##_t*)&((cvalue_t*)ptr(cv))->_space[0] = n; \ *(ctype##_t*)&((cvalue_t*)ptr(cv))->_space[0] = n; \
return cv; \ return cv; \
} }
num_ctor(int8, int32, T_INT8) num_ctor(int8, int8, int32, T_INT8)
num_ctor(uint8, uint32, T_UINT8) num_ctor(uint8, uint8, uint32, T_UINT8)
num_ctor(int16, int32, T_INT16) num_ctor(int16, int16, int32, T_INT16)
num_ctor(uint16, uint32, T_UINT16) num_ctor(uint16, uint16, uint32, T_UINT16)
num_ctor(int32, int32, T_INT32) num_ctor(int32, int32, int32, T_INT32)
num_ctor(uint32, uint32, T_UINT32) num_ctor(uint32, uint32, uint32, T_UINT32)
num_ctor(int64, int64, T_INT64) num_ctor(int64, int64, int64, T_INT64)
num_ctor(uint64, uint64, T_UINT64) num_ctor(uint64, uint64, uint64, T_UINT64)
num_ctor(char, uint32, T_UINT8) num_ctor(byte, uint8, uint32, T_UINT8)
num_ctor(wchar, int32, T_INT32) num_ctor(wchar, int32, int32, T_INT32)
#ifdef BITS64 #ifdef BITS64
num_ctor(long, int64, T_INT64) num_ctor(long, long, int64, T_INT64)
num_ctor(ulong, uint64, T_UINT64) num_ctor(ulong, ulong, uint64, T_UINT64)
#else #else
num_ctor(long, int32, T_INT32) num_ctor(long, long, int32, T_INT32)
num_ctor(ulong, uint32, T_UINT32) num_ctor(ulong, ulong, uint32, T_UINT32)
#endif #endif
num_ctor(float, double, T_FLOAT) num_ctor(float, float, double, T_FLOAT)
num_ctor(double, double, T_DOUBLE) num_ctor(double, double, double, T_DOUBLE)
value_t size_wrap(size_t sz) value_t size_wrap(size_t sz)
{ {
@ -313,14 +313,6 @@ size_t toulong(value_t n, char *fname)
return 0; return 0;
} }
value_t char_from_code(uint32_t code)
{
value_t ccode = fixnum(code);
if (code > 0x7f)
return cvalue_wchar(&ccode, 1);
return cvalue_char(&ccode, 1);
}
static void cvalue_enum_init(fltype_t *ft, value_t arg, void *dest) static void cvalue_enum_init(fltype_t *ft, value_t arg, void *dest)
{ {
int n=0; int n=0;
@ -457,7 +449,7 @@ static void cvalue_array_init(fltype_t *ft, value_t arg, void *dest)
static value_t alloc_array(fltype_t *type, size_t sz) static value_t alloc_array(fltype_t *type, size_t sz)
{ {
value_t cv; value_t cv;
if (type->eltype == chartype) { if (type->eltype == bytetype) {
cv = cvalue_string(sz); cv = cvalue_string(sz);
} }
else { else {
@ -556,7 +548,7 @@ static size_t cvalue_union_size(value_t type, int *palign)
// *palign is an output argument giving the alignment required by type // *palign is an output argument giving the alignment required by type
size_t ctype_sizeof(value_t type, int *palign) size_t ctype_sizeof(value_t type, int *palign)
{ {
if (type == int8sym || type == uint8sym || type == charsym) { if (type == int8sym || type == uint8sym || type == bytesym) {
*palign = 1; *palign = 1;
return 1; return 1;
} }
@ -672,7 +664,7 @@ static numerictype_t sym_to_numtype(value_t type)
{ {
if (type == int8sym) if (type == int8sym)
return T_INT8; return T_INT8;
else if (type == uint8sym || type == charsym) else if (type == uint8sym || type == bytesym)
return T_UINT8; return T_UINT8;
else if (type == int16sym) else if (type == int16sym)
return T_INT16; return T_INT16;
@ -868,7 +860,7 @@ void cvalues_init()
ctor_cv_intern(uint32); ctor_cv_intern(uint32);
ctor_cv_intern(int64); ctor_cv_intern(int64);
ctor_cv_intern(uint64); ctor_cv_intern(uint64);
ctor_cv_intern(char); ctor_cv_intern(byte);
ctor_cv_intern(wchar); ctor_cv_intern(wchar);
ctor_cv_intern(long); ctor_cv_intern(long);
ctor_cv_intern(ulong); ctor_cv_intern(ulong);
@ -890,7 +882,7 @@ void cvalues_init()
// todo: autorelease // todo: autorelease
stringtypesym = symbol("*string-type*"); stringtypesym = symbol("*string-type*");
setc(stringtypesym, list2(arraysym, charsym)); setc(stringtypesym, list2(arraysym, bytesym));
wcstringtypesym = symbol("*wcstring-type*"); wcstringtypesym = symbol("*wcstring-type*");
setc(wcstringtypesym, list2(arraysym, wcharsym)); setc(wcstringtypesym, list2(arraysym, wcharsym));

View File

@ -904,7 +904,7 @@ static value_t eval_sexpr(value_t e, uint32_t penv, int tail)
v = size_wrap(cvalue_arraylen(Stack[SP-1])); v = size_wrap(cvalue_arraylen(Stack[SP-1]));
break; break;
} }
else if (v == charsym) { else if (v == bytesym) {
v = fixnum(1); v = fixnum(1);
break; break;
} }

View File

@ -212,7 +212,7 @@ typedef struct {
#define cv_type(cv) (cv_class(cv)->type) #define cv_type(cv) (cv_class(cv)->type)
#define cv_data(cv) ((cv)->data) #define cv_data(cv) ((cv)->data)
#define cv_numtype(cv) (cv_class(cv)->numtype) #define cv_numtype(cv) (cv_class(cv)->numtype)
#define cv_isstr(cv) (cv_class(cv)->eltype == chartype) #define cv_isstr(cv) (cv_class(cv)->eltype == bytetype)
#define cvalue_data(v) cv_data((cvalue_t*)ptr(v)) #define cvalue_data(v) cv_data((cvalue_t*)ptr(v))
@ -232,11 +232,11 @@ typedef value_t (*builtin_t)(value_t*, uint32_t);
extern value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym; extern value_t int8sym, uint8sym, int16sym, uint16sym, int32sym, uint32sym;
extern value_t int64sym, uint64sym; extern value_t int64sym, uint64sym;
extern value_t longsym, ulongsym, charsym, ucharsym, wcharsym; extern value_t longsym, ulongsym, bytesym, wcharsym;
extern value_t structsym, arraysym, enumsym, cfunctionsym, voidsym, pointersym; extern value_t structsym, arraysym, enumsym, cfunctionsym, voidsym, pointersym;
extern value_t stringtypesym, wcstringtypesym, emptystringsym; extern value_t stringtypesym, wcstringtypesym, emptystringsym;
extern value_t unionsym, floatsym, doublesym, builtinsym; extern value_t unionsym, floatsym, doublesym, builtinsym;
extern fltype_t *chartype, *wchartype; extern fltype_t *bytetype, *wchartype;
extern fltype_t *stringtype, *wcstringtype; extern fltype_t *stringtype, *wcstringtype;
extern fltype_t *builtintype; extern fltype_t *builtintype;
@ -266,9 +266,9 @@ value_t mk_double(double_t n);
value_t mk_float(float_t n); value_t mk_float(float_t n);
value_t mk_uint32(uint32_t n); value_t mk_uint32(uint32_t n);
value_t mk_uint64(uint64_t n); value_t mk_uint64(uint64_t n);
value_t mk_wchar(int32_t n);
value_t return_from_uint64(uint64_t Uaccum); value_t return_from_uint64(uint64_t Uaccum);
value_t return_from_int64(int64_t Saccum); value_t return_from_int64(int64_t Saccum);
value_t char_from_code(uint32_t code);
typedef struct { typedef struct {
char *name; char *name;
@ -279,7 +279,7 @@ void assign_global_builtins(builtinspec_t *b);
/* builtins */ /* builtins */
value_t fl_hash(value_t *args, u_int32_t nargs); value_t fl_hash(value_t *args, u_int32_t nargs);
value_t cvalue_char(value_t *args, uint32_t nargs); value_t cvalue_byte(value_t *args, uint32_t nargs);
value_t cvalue_wchar(value_t *args, uint32_t nargs); value_t cvalue_wchar(value_t *args, uint32_t nargs);
#endif #endif

View File

@ -411,45 +411,28 @@ static void cvalue_printdata(ios_t *f, void *data, size_t len, value_t type,
{ {
int64_t tmp=0; int64_t tmp=0;
if (type == charsym) { if (type == bytesym) {
// print chars as characters when possible
unsigned char ch = *(unsigned char*)data; unsigned char ch = *(unsigned char*)data;
if (princ) if (princ)
outc(ch, f); outc(ch, f);
else if (weak) else if (weak)
HPOS+=ios_printf(f, "%hhu", ch); HPOS+=ios_printf(f, "0x%hhx", ch);
else if (isprint(ch))
HPOS+=ios_printf(f, "#\\%c", ch);
else else
HPOS+=ios_printf(f, "#char(%hhu)", ch); HPOS+=ios_printf(f, "#byte(0x%hhx)", ch);
} }
/*
else if (type == ucharsym) {
uchar ch = *(uchar*)data;
if (princ)
outc(ch, f);
else {
if (!weak)
ios_printf(f, "#uchar(");
ios_printf(f, "%hhu", ch);
if (!weak)
outs(")", f);
}
}
*/
else if (type == wcharsym) { else if (type == wcharsym) {
uint32_t wc = *(uint32_t*)data; uint32_t wc = *(uint32_t*)data;
char seq[8]; char seq[8];
if (weak) if (princ || iswprint(wc)) {
HPOS+=ios_printf(f, "%d", (int)wc);
else if (princ || (iswprint(wc) && wc>0x7f)) {
// reader only reads #\c syntax as wchar if the code is >0x7f
size_t nb = u8_toutf8(seq, sizeof(seq), &wc, 1); size_t nb = u8_toutf8(seq, sizeof(seq), &wc, 1);
seq[nb] = '\0'; seq[nb] = '\0';
// TODO: better multibyte handling // TODO: better multibyte handling
if (!princ) outs("#\\", f); if (!princ) outs("#\\", f);
outs(seq, f); outs(seq, f);
} }
else if (weak) {
HPOS+=ios_printf(f, "%d", (int)wc);
}
else { else {
HPOS+=ios_printf(f, "#%s(%d)", symbol_name(type), (int)wc); HPOS+=ios_printf(f, "#%s(%d)", symbol_name(type), (int)wc);
} }
@ -544,7 +527,7 @@ static void cvalue_printdata(ios_t *f, void *data, size_t len, value_t type,
elsize = ctype_sizeof(eltype, &junk); elsize = ctype_sizeof(eltype, &junk);
cnt = elsize ? len/elsize : 0; cnt = elsize ? len/elsize : 0;
} }
if (eltype == charsym) { if (eltype == bytesym) {
if (princ) { if (princ) {
ios_write(f, data, len); ios_write(f, data, len);
} }

View File

@ -189,13 +189,7 @@ static u_int32_t peek(ios_t *f)
if (ios_getutf8(f, &cval) == IOS_EOF) if (ios_getutf8(f, &cval) == IOS_EOF)
lerror(ParseError, "read: end of input in character constant"); lerror(ParseError, "read: end of input in character constant");
toktype = TOK_NUM; toktype = TOK_NUM;
tokval = fixnum(cval); tokval = mk_wchar(cval);
if (cval > 0x7f) {
tokval = cvalue_wchar(&tokval, 1);
}
else {
tokval = cvalue_char(&tokval, 1);
}
} }
else if ((char)ch == '(') { else if ((char)ch == '(') {
toktype = TOK_SHARPOPEN; toktype = TOK_SHARPOPEN;
@ -501,7 +495,7 @@ static value_t do_read_sexpr(ios_t *f, value_t label)
PUSH(NIL); PUSH(NIL);
read_list(f, &Stack[SP-1], UNBOUND); read_list(f, &Stack[SP-1], UNBOUND);
v = POP(); v = POP();
return apply(symbol_value(sym), v); return apply(toplevel_eval(sym), v);
case TOK_OPENB: case TOK_OPENB:
return read_vector(f, label, TOK_CLOSEB); return read_vector(f, label, TOK_CLOSEB);
case TOK_SHARPOPEN: case TOK_SHARPOPEN:

View File

@ -109,7 +109,7 @@ value_t fl_string(value_t *args, u_int32_t nargs)
else if (iscvalue(args[i])) { else if (iscvalue(args[i])) {
temp = (cvalue_t*)ptr(args[i]); temp = (cvalue_t*)ptr(args[i]);
t = cv_type(temp); t = cv_type(temp);
if (t == charsym) { if (t == bytesym) {
sz++; sz++;
continue; continue;
} }
@ -136,7 +136,7 @@ value_t fl_string(value_t *args, u_int32_t nargs)
temp = (cvalue_t*)ptr(args[i]); temp = (cvalue_t*)ptr(args[i]);
t = cv_type(temp); t = cv_type(temp);
data = cvalue_data(args[i]); data = cvalue_data(args[i]);
if (t == charsym) { if (t == bytesym) {
*ptr++ = *(char*)data; *ptr++ = *(char*)data;
} }
else if (t == wcharsym) { else if (t == wcharsym) {
@ -225,7 +225,59 @@ value_t fl_string_char(value_t *args, u_int32_t nargs)
size_t sl = u8_seqlen(&s[i]); size_t sl = u8_seqlen(&s[i]);
if (sl > len || i > len-sl) if (sl > len || i > len-sl)
bounds_error("string.char", args[0], args[1]); bounds_error("string.char", args[0], args[1]);
return char_from_code(u8_nextchar(s, &i)); return mk_wchar(u8_nextchar(s, &i));
}
static value_t mem_find_byte(char *s, char c, size_t start, size_t len)
{
char *p = memchr(s+start, c, len-start);
if (p == NULL)
return NIL;
return size_wrap((size_t)(p - s));
}
value_t fl_string_find(value_t *args, u_int32_t nargs)
{
char cbuf[8];
size_t start = 0;
if (nargs == 3)
start = toulong(args[2], "string.find");
else
argcount("string.find", nargs, 2);
char *s = tostring(args[0], "string.find");
size_t len = cv_len((cvalue_t*)ptr(args[0]));
if (start > len)
bounds_error("string.find", args[0], args[2]);
char *needle=NULL; size_t needlesz=0;
if (!iscvalue(args[1]))
type_error("string.find", "string", args[1]);
cvalue_t *cv = (cvalue_t*)ptr(args[1]);
if (isstring(args[1])) {
needlesz = cv_len(cv);
needle = (char*)cv_data(cv);
}
else if (cv_class(cv) == wchartype) {
uint32_t c = *(uint32_t*)cv_data(cv);
if (c <= 0x7f)
return mem_find_byte(s, (char)c, start, len);
needlesz = u8_toutf8(cbuf, sizeof(cbuf), &c, 1);
needle = cbuf;
}
else if (cv_class(cv) == bytetype) {
return mem_find_byte(s, *(char*)cv_data(cv), start, len);
}
if (needlesz == 0)
return fixnum(start);
if (needlesz > len-start)
return NIL;
size_t i;
for(i=start; i < len; i++) {
if (s[i] == needle[0]) {
if (!memcmp(&s[i], needle, needlesz))
return size_wrap(i);
}
}
return NIL;
} }
value_t fl_string_inc(value_t *args, u_int32_t nargs) value_t fl_string_inc(value_t *args, u_int32_t nargs)
@ -274,6 +326,7 @@ static builtinspec_t stringfunc_info[] = {
{ "string.length", fl_string_length }, { "string.length", fl_string_length },
{ "string.split", fl_string_split }, { "string.split", fl_string_split },
{ "string.sub", fl_string_sub }, { "string.sub", fl_string_sub },
{ "string.find", fl_string_find },
{ "string.char", fl_string_char }, { "string.char", fl_string_char },
{ "string.inc", fl_string_inc }, { "string.inc", fl_string_inc },
{ "string.dec", fl_string_dec }, { "string.dec", fl_string_dec },

View File

@ -87,8 +87,8 @@
(define (cadr x) (car (cdr x))) (define (cadr x) (car (cdr x)))
;(setq *special-forms* '(quote cond if and or while lambda label trycatch ;(setq *special-forms* '(quote cond if and or while lambda trycatch
; %top progn)) ; setq progn))
(defun macroexpand (e) (defun macroexpand (e)
((label mexpand ((label mexpand
@ -101,8 +101,7 @@
(cond ((and (consp e) (cond ((and (consp e)
(not (eq (car e) 'quote))) (not (eq (car e) 'quote)))
(let ((newenv (let ((newenv
(if (and (or (eq (car e) 'lambda) (if (and (eq (car e) 'lambda)
(eq (car e) 'label))
(consp (cdr e))) (consp (cdr e)))
(append.2 (cadr e) env) (append.2 (cadr e) env)
env))) env)))

View File

@ -818,7 +818,8 @@ String API
*string.split - (string.split s sep-chars) *string.split - (string.split s sep-chars)
string.trim - (string.trim s chars-at-start chars-at-end) string.trim - (string.trim s chars-at-start chars-at-end)
*string.reverse *string.reverse
string.find - (string.find s str|char), or nil if not found *string.find - (string.find s str|char [offs]), or nil if not found
string.rfind
string.map - (string.map f s) string.map - (string.map f s)
*string.encode - to utf8 *string.encode - to utf8
*string.decode - from utf8 to UCS *string.decode - from utf8 to UCS

View File

@ -114,7 +114,7 @@ void types_init()
mk_primtype(uint64); mk_primtype(uint64);
mk_primtype(long); mk_primtype(long);
mk_primtype(ulong); mk_primtype(ulong);
mk_primtype(char); mk_primtype(byte);
mk_primtype(wchar); mk_primtype(wchar);
mk_primtype(float); mk_primtype(float);
mk_primtype(double); mk_primtype(double);

View File

@ -64,7 +64,7 @@
(assert (equal (uint64 (double -123)) #uint64(0xffffffffffffff85))) (assert (equal (uint64 (double -123)) #uint64(0xffffffffffffff85)))
(assert (equal (string 'sym #char(65) #wchar(945) "blah") "symA\u03B1blah")) (assert (equal (string 'sym #byte(65) #wchar(945) "blah") "symA\u03B1blah"))
; this crashed once ; this crashed once
(for 1 10 (lambda (i) 0)) (for 1 10 (lambda (i) 0))