From 62e5c359d0101a763613f294f4847d3f7c8d012b Mon Sep 17 00:00:00 2001 From: JeffBezanson Date: Tue, 5 Aug 2008 04:34:14 +0000 Subject: [PATCH] adding string.inc and string.dec moving string functions to their own file --- femtolisp/Makefile | 2 +- femtolisp/builtins.c | 235 +---------------------------------- femtolisp/cvalues.c | 5 + femtolisp/flisp.c | 3 +- femtolisp/string.c | 285 +++++++++++++++++++++++++++++++++++++++++++ femtolisp/todo | 8 +- 6 files changed, 301 insertions(+), 237 deletions(-) create mode 100644 femtolisp/string.c diff --git a/femtolisp/Makefile b/femtolisp/Makefile index 2b9db26..7ee8a3e 100644 --- a/femtolisp/Makefile +++ b/femtolisp/Makefile @@ -1,7 +1,7 @@ CC = gcc NAME = flisp -SRCS = $(NAME).c builtins.c equal.c +SRCS = $(NAME).c equal.c builtins.c string.c OBJS = $(SRCS:%.c=%.o) DOBJS = $(SRCS:%.c=%.do) EXENAME = $(NAME) diff --git a/femtolisp/builtins.c b/femtolisp/builtins.c index c4e8f7f..ce08b02 100644 --- a/femtolisp/builtins.c +++ b/femtolisp/builtins.c @@ -234,226 +234,6 @@ value_t fl_vector_alloc(value_t *args, u_int32_t nargs) return v; } -int isstring(value_t v) -{ - return (iscvalue(v) && ((cvalue_t*)ptr(v))->flags.cstring); -} - -value_t fl_intern(value_t *args, u_int32_t nargs) -{ - argcount("intern", nargs, 1); - if (!isstring(args[0])) - type_error("intern", "string", args[0]); - return symbol(cvalue_data(args[0])); -} - -value_t fl_stringp(value_t *args, u_int32_t nargs) -{ - argcount("stringp", nargs, 1); - return isstring(args[0]) ? T : NIL; -} - -value_t fl_string_length(value_t *args, u_int32_t nargs) -{ - argcount("string.length", nargs, 1); - if (!isstring(args[0])) - type_error("string.length", "string", args[0]); - size_t len = cv_len((cvalue_t*)ptr(args[0])); - return size_wrap(u8_charnum(cvalue_data(args[0]), len)); -} - -value_t fl_string_reverse(value_t *args, u_int32_t nargs) -{ - argcount("string.reverse", nargs, 1); - if (!isstring(args[0])) - type_error("string.reverse", "string", args[0]); - size_t len = cv_len((cvalue_t*)ptr(args[0])); - value_t ns = cvalue_string(len); - u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len); - return ns; -} - -value_t fl_string_encode(value_t *args, u_int32_t nargs) -{ - argcount("string.encode", nargs, 1); - if (iscvalue(args[0])) { - cvalue_t *cv = (cvalue_t*)ptr(args[0]); - value_t t = cv_type(cv); - if (iscons(t) && car_(t) == arraysym && - iscons(cdr_(t)) && car_(cdr_(t)) == wcharsym) { - size_t nc = cv_len(cv) / sizeof(uint32_t); - uint32_t *ptr = (uint32_t*)cv_data(cv); - size_t nbytes = u8_codingsize(ptr, nc); - value_t str = cvalue_string(nbytes); - ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer - u8_toutf8(cvalue_data(str), nbytes, ptr, nc); - return str; - } - } - type_error("string.encode", "wide character array", args[0]); -} - -value_t fl_string_decode(value_t *args, u_int32_t nargs) -{ - int term=0; - if (nargs == 2) { - term = (POP() != NIL); - nargs--; - } - argcount("string.decode", nargs, 1); - if (!isstring(args[0])) - type_error("string.decode", "string", args[0]); - cvalue_t *cv = (cvalue_t*)ptr(args[0]); - char *ptr = (char*)cv_data(cv); - size_t nb = cv_len(cv); - size_t nc = u8_charnum(ptr, nb); - size_t newsz = nc*sizeof(uint32_t); - if (term) newsz += sizeof(uint32_t); - value_t wcstr = cvalue(symbol_value(wcstringtypesym), newsz); - ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer - uint32_t *pwc = cvalue_data(wcstr); - u8_toucs(pwc, nc, ptr, nb); - if (term) pwc[nc] = 0; - return wcstr; -} - -value_t fl_string(value_t *args, u_int32_t nargs) -{ - value_t cv, t; - u_int32_t i; - size_t len, sz = 0; - cvalue_t *temp; - char *data; - wchar_t wc; - - for(i=0; i < nargs; i++) { - if (issymbol(args[i])) { - sz += strlen(symbol_name(args[i])); - continue; - } - else if (iscvalue(args[i])) { - temp = (cvalue_t*)ptr(args[i]); - t = cv_type(temp); - if (t == charsym) { - sz++; - continue; - } - else if (t == wcharsym) { - wc = *(wchar_t*)cv_data(temp); - sz += u8_charlen(wc); - continue; - } - else if (temp->flags.cstring) { - sz += cv_len(temp); - continue; - } - } - lerror(ArgError, "string: expected string, symbol or character"); - } - cv = cvalue_string(sz); - char *ptr = cvalue_data(cv); - for(i=0; i < nargs; i++) { - if (issymbol(args[i])) { - char *name = symbol_name(args[i]); - while (*name) *ptr++ = *name++; - } - else { - temp = (cvalue_t*)ptr(args[i]); - t = cv_type(temp); - data = cvalue_data(args[i]); - if (t == charsym) { - *ptr++ = *(char*)data; - } - else if (t == wcharsym) { - ptr += u8_wc_toutf8(ptr, *(wchar_t*)data); - } - else { - len = cv_len(temp); - memcpy(ptr, data, len); - ptr += len; - } - } - } - return cv; -} - -value_t fl_string_split(value_t *args, u_int32_t nargs) -{ - argcount("string.split", nargs, 2); - char *s = tostring(args[0], "string.split"); - char *delim = tostring(args[1], "string.split"); - size_t len = cv_len((cvalue_t*)ptr(args[0])); - size_t dlen = cv_len((cvalue_t*)ptr(args[1])); - PUSH(NIL); - size_t ssz, tokend=0, tokstart=0, i=0; - value_t c=NIL; - size_t junk; - do { - // find and allocate next token - tokstart = tokend = i; - while (i < len && - !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk)) - tokend = i; - ssz = tokend - tokstart; - PUSH(c); // save previous cons cell - c = fl_cons(cvalue_string(ssz), NIL); - - // we've done allocation; reload movable pointers - s = cv_data((cvalue_t*)ptr(args[0])); - delim = cv_data((cvalue_t*)ptr(args[1])); - - if (ssz) memcpy(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz); - - // link new cell - if (Stack[SP-1] == NIL) { - Stack[SP-2] = c; // first time, save first cons - (void)POP(); - } - else { - ((cons_t*)ptr(POP()))->cdr = c; - } - - // note this tricky condition: if the string ends with a - // delimiter, we need to go around one more time to add an - // empty string. this happens when (i==len && tokend len) - bounds_error("string.sub", args[0], args[1]); - i2 = toulong(args[2], "string.sub"); - if (i2 > len) - bounds_error("string.sub", args[0], args[2]); - if (i2 <= i1) - return cvalue_string(0); - value_t ns = cvalue_string(i2-i1); - memcpy(cv_data((cvalue_t*)ptr(ns)), &s[i1], i2-i1); - return ns; -} - -value_t fl_string_char(value_t *args, u_int32_t nargs) -{ - argcount("string.char", nargs, 2); - char *s = tostring(args[0], "string.char"); - size_t len = cv_len((cvalue_t*)ptr(args[0])); - size_t i; - i = toulong(args[1], "string.char"); - if (i > len) - bounds_error("string.char", args[0], args[1]); - size_t sl = u8_seqlen(&s[i]); - if (sl > len || i > len-sl) - bounds_error("string.char", args[0], args[1]); - return char_from_code(u8_nextchar(s, &i)); -} - value_t fl_time_now(value_t *args, u_int32_t nargs) { argcount("time.now", nargs, 0); @@ -559,6 +339,8 @@ value_t fl_randd(value_t *args, u_int32_t nargs) return mk_double(rand_double()); } +extern void stringfuncs_init(); + void builtins_init() { set(symbol("set-syntax"), guestfunc(fl_setsyntax)); @@ -572,22 +354,11 @@ void builtins_init() set(symbol("read"), guestfunc(fl_read)); set(symbol("load"), guestfunc(fl_load)); set(symbol("exit"), guestfunc(fl_exit)); - set(symbol("intern"), guestfunc(fl_intern)); set(symbol("fixnum"), guestfunc(fl_fixnum)); set(symbol("truncate"), guestfunc(fl_truncate)); set(symbol("vector.alloc"), guestfunc(fl_vector_alloc)); - set(symbol("string"), guestfunc(fl_string)); - set(symbol("stringp"), guestfunc(fl_stringp)); - set(symbol("string.length"), guestfunc(fl_string_length)); - set(symbol("string.split"), guestfunc(fl_string_split)); - set(symbol("string.sub"), guestfunc(fl_string_sub)); - set(symbol("string.char"), guestfunc(fl_string_char)); - set(symbol("string.reverse"), guestfunc(fl_string_reverse)); - set(symbol("string.encode"), guestfunc(fl_string_encode)); - set(symbol("string.decode"), guestfunc(fl_string_decode)); - set(symbol("time.now"), guestfunc(fl_time_now)); set(symbol("time.string"), guestfunc(fl_time_string)); @@ -600,4 +371,6 @@ void builtins_init() set(symbol("os.getenv"), guestfunc(fl_os_getenv)); set(symbol("os.setenv"), guestfunc(fl_os_setenv)); + + stringfuncs_init(); } diff --git a/femtolisp/cvalues.c b/femtolisp/cvalues.c index 5d07dd7..1e0b1d4 100644 --- a/femtolisp/cvalues.c +++ b/femtolisp/cvalues.c @@ -183,6 +183,11 @@ value_t cvalue_pinned_cstring(char *str) return v; } +int isstring(value_t v) +{ + return (iscvalue(v) && ((cvalue_t*)ptr(v))->flags.cstring); +} + // convert to malloc representation (fixed address) /* static void cv_pin(cvalue_t *cv) diff --git a/femtolisp/flisp.c b/femtolisp/flisp.c index c48c95f..8eef772 100644 --- a/femtolisp/flisp.c +++ b/femtolisp/flisp.c @@ -397,7 +397,8 @@ static value_t relocate(value_t v) else if (iscvalue(v)) { return cvalue_relocate(v); } - else if (ismanaged(v) && issymbol(v)) { + else if (ismanaged(v)) { + assert(issymbol(v)); gensym_t *gs = (gensym_t*)ptr(v); if (gs->id == 0xffffffff) return gs->binding; diff --git a/femtolisp/string.c b/femtolisp/string.c new file mode 100644 index 0000000..beda106 --- /dev/null +++ b/femtolisp/string.c @@ -0,0 +1,285 @@ +/* + string functions +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "llt.h" +#include "flisp.h" + +value_t fl_intern(value_t *args, u_int32_t nargs) +{ + argcount("intern", nargs, 1); + if (!isstring(args[0])) + type_error("intern", "string", args[0]); + return symbol(cvalue_data(args[0])); +} + +value_t fl_stringp(value_t *args, u_int32_t nargs) +{ + argcount("stringp", nargs, 1); + return isstring(args[0]) ? T : NIL; +} + +value_t fl_string_length(value_t *args, u_int32_t nargs) +{ + argcount("string.length", nargs, 1); + if (!isstring(args[0])) + type_error("string.length", "string", args[0]); + size_t len = cv_len((cvalue_t*)ptr(args[0])); + return size_wrap(u8_charnum(cvalue_data(args[0]), len)); +} + +value_t fl_string_reverse(value_t *args, u_int32_t nargs) +{ + argcount("string.reverse", nargs, 1); + if (!isstring(args[0])) + type_error("string.reverse", "string", args[0]); + size_t len = cv_len((cvalue_t*)ptr(args[0])); + value_t ns = cvalue_string(len); + u8_reverse(cvalue_data(ns), cvalue_data(args[0]), len); + return ns; +} + +value_t fl_string_encode(value_t *args, u_int32_t nargs) +{ + argcount("string.encode", nargs, 1); + if (iscvalue(args[0])) { + cvalue_t *cv = (cvalue_t*)ptr(args[0]); + value_t t = cv_type(cv); + if (iscons(t) && car_(t) == arraysym && + iscons(cdr_(t)) && car_(cdr_(t)) == wcharsym) { + size_t nc = cv_len(cv) / sizeof(uint32_t); + uint32_t *ptr = (uint32_t*)cv_data(cv); + size_t nbytes = u8_codingsize(ptr, nc); + value_t str = cvalue_string(nbytes); + ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer + u8_toutf8(cvalue_data(str), nbytes, ptr, nc); + return str; + } + } + type_error("string.encode", "wide character array", args[0]); +} + +value_t fl_string_decode(value_t *args, u_int32_t nargs) +{ + int term=0; + if (nargs == 2) { + term = (POP() != NIL); + nargs--; + } + argcount("string.decode", nargs, 1); + if (!isstring(args[0])) + type_error("string.decode", "string", args[0]); + cvalue_t *cv = (cvalue_t*)ptr(args[0]); + char *ptr = (char*)cv_data(cv); + size_t nb = cv_len(cv); + size_t nc = u8_charnum(ptr, nb); + size_t newsz = nc*sizeof(uint32_t); + if (term) newsz += sizeof(uint32_t); + value_t wcstr = cvalue(symbol_value(wcstringtypesym), newsz); + ptr = cv_data((cvalue_t*)ptr(args[0])); // relocatable pointer + uint32_t *pwc = cvalue_data(wcstr); + u8_toucs(pwc, nc, ptr, nb); + if (term) pwc[nc] = 0; + return wcstr; +} + +value_t fl_string(value_t *args, u_int32_t nargs) +{ + value_t cv, t; + u_int32_t i; + size_t len, sz = 0; + cvalue_t *temp; + char *data; + wchar_t wc; + + for(i=0; i < nargs; i++) { + if (issymbol(args[i])) { + sz += strlen(symbol_name(args[i])); + continue; + } + else if (iscvalue(args[i])) { + temp = (cvalue_t*)ptr(args[i]); + t = cv_type(temp); + if (t == charsym) { + sz++; + continue; + } + else if (t == wcharsym) { + wc = *(wchar_t*)cv_data(temp); + sz += u8_charlen(wc); + continue; + } + else if (temp->flags.cstring) { + sz += cv_len(temp); + continue; + } + } + lerror(ArgError, "string: expected string, symbol or character"); + } + cv = cvalue_string(sz); + char *ptr = cvalue_data(cv); + for(i=0; i < nargs; i++) { + if (issymbol(args[i])) { + char *name = symbol_name(args[i]); + while (*name) *ptr++ = *name++; + } + else { + temp = (cvalue_t*)ptr(args[i]); + t = cv_type(temp); + data = cvalue_data(args[i]); + if (t == charsym) { + *ptr++ = *(char*)data; + } + else if (t == wcharsym) { + ptr += u8_wc_toutf8(ptr, *(wchar_t*)data); + } + else { + len = cv_len(temp); + memcpy(ptr, data, len); + ptr += len; + } + } + } + return cv; +} + +value_t fl_string_split(value_t *args, u_int32_t nargs) +{ + argcount("string.split", nargs, 2); + char *s = tostring(args[0], "string.split"); + char *delim = tostring(args[1], "string.split"); + size_t len = cv_len((cvalue_t*)ptr(args[0])); + size_t dlen = cv_len((cvalue_t*)ptr(args[1])); + PUSH(NIL); + size_t ssz, tokend=0, tokstart=0, i=0; + value_t c=NIL; + size_t junk; + do { + // find and allocate next token + tokstart = tokend = i; + while (i < len && + !u8_memchr(delim, u8_nextmemchar(s, &i), dlen, &junk)) + tokend = i; + ssz = tokend - tokstart; + PUSH(c); // save previous cons cell + c = fl_cons(cvalue_string(ssz), NIL); + + // we've done allocation; reload movable pointers + s = cv_data((cvalue_t*)ptr(args[0])); + delim = cv_data((cvalue_t*)ptr(args[1])); + + if (ssz) memcpy(cv_data((cvalue_t*)ptr(car_(c))), &s[tokstart], ssz); + + // link new cell + if (Stack[SP-1] == NIL) { + Stack[SP-2] = c; // first time, save first cons + (void)POP(); + } + else { + ((cons_t*)ptr(POP()))->cdr = c; + } + + // note this tricky condition: if the string ends with a + // delimiter, we need to go around one more time to add an + // empty string. this happens when (i==len && tokend len) + bounds_error("string.sub", args[0], args[1]); + i2 = toulong(args[2], "string.sub"); + if (i2 > len) + bounds_error("string.sub", args[0], args[2]); + if (i2 <= i1) + return cvalue_string(0); + value_t ns = cvalue_string(i2-i1); + memcpy(cv_data((cvalue_t*)ptr(ns)), &s[i1], i2-i1); + return ns; +} + +value_t fl_string_char(value_t *args, u_int32_t nargs) +{ + argcount("string.char", nargs, 2); + char *s = tostring(args[0], "string.char"); + size_t len = cv_len((cvalue_t*)ptr(args[0])); + size_t i = toulong(args[1], "string.char"); + if (i >= len) + bounds_error("string.char", args[0], args[1]); + size_t sl = u8_seqlen(&s[i]); + if (sl > len || i > len-sl) + bounds_error("string.char", args[0], args[1]); + return char_from_code(u8_nextchar(s, &i)); +} + +value_t fl_string_inc(value_t *args, u_int32_t nargs) +{ + if (nargs < 2 || nargs > 3) + argcount("string.inc", nargs, 2); + char *s = tostring(args[0], "string.inc"); + size_t len = cv_len((cvalue_t*)ptr(args[0])); + size_t i = toulong(args[1], "string.inc"); + size_t cnt = 1; + if (nargs == 3) + cnt = toulong(args[2], "string.inc"); + while (cnt--) { + if (i >= len) + bounds_error("string.inc", args[0], args[1]); + u8_inc(s, &i); + } + return size_wrap(i); +} + +value_t fl_string_dec(value_t *args, u_int32_t nargs) +{ + if (nargs < 2 || nargs > 3) + argcount("string.dec", nargs, 2); + char *s = tostring(args[0], "string.dec"); + size_t len = cv_len((cvalue_t*)ptr(args[0])); + size_t i = toulong(args[1], "string.dec"); + size_t cnt = 1; + if (nargs == 3) + cnt = toulong(args[2], "string.dec"); + // note: i is allowed to start at index len + if (i > len) + bounds_error("string.dec", args[0], args[1]); + while (cnt--) { + if (i == 0) + bounds_error("string.dec", args[0], args[1]); + u8_dec(s, &i); + } + return size_wrap(i); +} + +void stringfuncs_init() +{ + set(symbol("intern"), guestfunc(fl_intern)); + + set(symbol("string"), guestfunc(fl_string)); + set(symbol("stringp"), guestfunc(fl_stringp)); + set(symbol("string.length"), guestfunc(fl_string_length)); + set(symbol("string.split"), guestfunc(fl_string_split)); + set(symbol("string.sub"), guestfunc(fl_string_sub)); + set(symbol("string.char"), guestfunc(fl_string_char)); + set(symbol("string.inc"), guestfunc(fl_string_inc)); + set(symbol("string.dec"), guestfunc(fl_string_dec)); + set(symbol("string.reverse"), guestfunc(fl_string_reverse)); + set(symbol("string.encode"), guestfunc(fl_string_encode)); + set(symbol("string.decode"), guestfunc(fl_string_decode)); +} diff --git a/femtolisp/todo b/femtolisp/todo index 94f5514..5c69fd0 100644 --- a/femtolisp/todo +++ b/femtolisp/todo @@ -768,8 +768,8 @@ raise `(,e ,S). first argument e should be a symbol. String API *string - append/construct - string.inc - (string.inc s i [nchars]) - string.dec +*string.inc - (string.inc s i [nchars]) +*string.dec string.count - # of chars between 2 byte offsets string.width - # columns *string.char - char at byte offset @@ -798,8 +798,8 @@ IOStream API stream.copy - (stream.copy to from [nbytes]) stream.copyuntil - (stream.copy to from byte) stream.flush - stream.pos - stream.seek + stream.pos - (stream.pos s [set-pos]) + stream.seek - (stream.seek s offset) stream.trunc stream.getc - get utf8 character(s)