diff --git a/bin/ikarus b/bin/ikarus index 2012bf6..4f08424 100755 Binary files a/bin/ikarus and b/bin/ikarus differ diff --git a/bin/ikarus-runtime.c b/bin/ikarus-runtime.c index 2e11127..13e00b0 100644 --- a/bin/ikarus-runtime.c +++ b/bin/ikarus-runtime.c @@ -773,6 +773,29 @@ ikrt_strftime(ikp outstr, ikp fmtstr){ return fix(rv); } +ikp +ikrt_bvftime(ikp outbv, ikp fmtbv){ + time_t t; + struct tm* tmp; + t = time(NULL); + tmp = localtime(&t); + if(tmp == NULL){ + fprintf(stderr, "Error in time: %s\n", strerror(errno)); + } + int rv = + strftime((char*)outbv+off_bytevector_data, + unfix(ref(outbv, off_bytevector_length)) + 1, + (char*)fmtbv+off_bytevector_data, + tmp); + if(rv == 0){ + fprintf(stderr, "Error in strftime: %s\n", strerror(errno)); + } + return fix(rv); +} + + + + ikp ikrt_close_file(ikp fd, ikpcb* pcb){ int err = close(unfix(fd)); diff --git a/src/ikarus.boot b/src/ikarus.boot index caf8f53..4f17c54 100644 Binary files a/src/ikarus.boot and b/src/ikarus.boot differ diff --git a/src/ikarus.compiler.ss b/src/ikarus.compiler.ss index f559d41..77db7b7 100644 --- a/src/ikarus.compiler.ss +++ b/src/ikarus.compiler.ss @@ -1943,7 +1943,7 @@ $fx= $fx< $fx<= $fx> $fx>= $fxquotient $fxmodulo $fxsll $fxsra $fxlogxor $exit) (andmap (check op fixnum?) rand*)] [($fixnum->char) - (andmap (check op byte?) rand*)] + (andmap (check op fixnum?) rand*)] [($char->fixnum $char= $char< $char<= $char> $char>= $string) (andmap (check op char?) rand*)] [($make-vector $make-string) diff --git a/src/ikarus.intel-assembler.ss b/src/ikarus.intel-assembler.ss index 113448c..62d0cfa 100644 --- a/src/ikarus.intel-assembler.ss +++ b/src/ikarus.intel-assembler.ss @@ -868,6 +868,8 @@ (vector-set! vec (fx+ reloc-idx 1) v) (set! reloc-idx (fx+ reloc-idx 2))] [(foreign-label) + ;;; FIXME: converted strings should be memoized. + ;;; wait for equal? hash tables. (let ([name (if (string? v) (string->utf8-bytevector v) diff --git a/src/ikarus.transcoders.ss b/src/ikarus.transcoders.ss index aca8ade..c7db14d 100644 --- a/src/ikarus.transcoders.ss +++ b/src/ikarus.transcoders.ss @@ -1,14 +1,12 @@ (library (ikarus transcoders) (export string->utf8-bytevector - #;utf8-bytevector->string) - (import (except (ikarus) string->utf8-bytevector) + utf8-bytevector->string) + (import (except (ikarus) string->utf8-bytevector utf8-bytevector->string) (ikarus system $strings) (ikarus system $bytevectors) (ikarus system $fx) - (ikarus system $chars) - ) - + (ikarus system $chars)) ;;; From http://en.wikipedia.org/wiki/UTF-8 ;;; hexadecimal binary scalar value UTF8 ;;; 000000-00007F 00000000_00000000_0zzzzzzz 0zzzzzzz @@ -24,71 +22,230 @@ ;;; replace: places a U+FFFD in place of the malformed bytes ;;; raise: raises an error - (define (utf8-string-size str) - (let f ([str str] [i 0] [j ($string-length str)] [n 0]) - (cond - [($fx= i j) n] - [else - (let ([c ($string-ref str i)]) - (let ([b ($char->fixnum c)]) - (f str ($fxadd1 i) j - ($fx+ n - (cond - [($fx<= b #x7F) 1] - [($fx<= b #x7FF) 2] - [($fx<= b #xFFFF) 3] - [else 4])))))]))) - - (define (fill-utf8-bytevector bv str) - (let f ([bv bv] [str str] [i 0] [j 0] [n ($string-length str)]) - (cond - [($fx= i n) bv] - [else - (let ([c ($string-ref str i)]) - (let ([b ($char->fixnum c)]) - (cond - [($fx<= b #x7F) - ($bytevector-set! bv j b) - (f bv str ($fxadd1 i) ($fxadd1 j) n)] - [($fx<= b #x7FF) - ($bytevector-set! bv j - ($fxlogor #b11000000 ($fxsra b 6))) - ($bytevector-set! bv ($fx+ j 1) - ($fxlogor #b10000000 ($fxlogand b #b111111))) - (f bv str ($fxadd1 i) ($fx+ j 2) n)] - [($fx<= b #xFFFF) - ($bytevector-set! bv j - ($fxlogor #b11100000 ($fxsra b 12))) - ($bytevector-set! bv ($fx+ j 1) - ($fxlogor #b10000000 ($fxlogand ($fxsra b 6) #b111111))) - ($bytevector-set! bv ($fx+ j 2) - ($fxlogor #b10000000 ($fxlogand b #b111111))) - (f bv str ($fxadd1 i) ($fx+ j 3) n)] - [else - ($bytevector-set! bv j - ($fxlogor #b11110000 ($fxsra b 18))) - ($bytevector-set! bv ($fx+ j 1) - ($fxlogor #b10000000 ($fxlogand ($fxsra b 12) #b111111))) - ($bytevector-set! bv ($fx+ j 2) - ($fxlogor #b10000000 ($fxlogand ($fxsra b 6) #b111111))) - ($bytevector-set! bv ($fx+ j 3) - ($fxlogor #b10000000 ($fxlogand b #b111111))) - (f bv str ($fxadd1 i) ($fx+ j 4) n)])))]))) - - (define string->utf8-bytevector (lambda (str) + (define (utf8-string-size str) + (let f ([str str] [i 0] [j ($string-length str)] [n 0]) + (cond + [($fx= i j) n] + [else + (let ([c ($string-ref str i)]) + (let ([b ($char->fixnum c)]) + (f str ($fxadd1 i) j + ($fx+ n + (cond + [($fx<= b #x7F) 1] + [($fx<= b #x7FF) 2] + [($fx<= b #xFFFF) 3] + [else 4])))))]))) + (define (fill-utf8-bytevector bv str) + (let f ([bv bv] [str str] [i 0] [j 0] [n ($string-length str)]) + (cond + [($fx= i n) bv] + [else + (let ([c ($string-ref str i)]) + (let ([b ($char->fixnum c)]) + (cond + [($fx<= b #x7F) + ($bytevector-set! bv j b) + (f bv str ($fxadd1 i) ($fxadd1 j) n)] + [($fx<= b #x7FF) + ($bytevector-set! bv j + ($fxlogor #b11000000 ($fxsra b 6))) + ($bytevector-set! bv ($fx+ j 1) + ($fxlogor #b10000000 ($fxlogand b #b111111))) + (f bv str ($fxadd1 i) ($fx+ j 2) n)] + [($fx<= b #xFFFF) + ($bytevector-set! bv j + ($fxlogor #b11100000 ($fxsra b 12))) + ($bytevector-set! bv ($fx+ j 1) + ($fxlogor #b10000000 ($fxlogand ($fxsra b 6) #b111111))) + ($bytevector-set! bv ($fx+ j 2) + ($fxlogor #b10000000 ($fxlogand b #b111111))) + (f bv str ($fxadd1 i) ($fx+ j 3) n)] + [else + ($bytevector-set! bv j + ($fxlogor #b11110000 ($fxsra b 18))) + ($bytevector-set! bv ($fx+ j 1) + ($fxlogor #b10000000 ($fxlogand ($fxsra b 12) #b111111))) + ($bytevector-set! bv ($fx+ j 2) + ($fxlogor #b10000000 ($fxlogand ($fxsra b 6) #b111111))) + ($bytevector-set! bv ($fx+ j 3) + ($fxlogor #b10000000 ($fxlogand b #b111111))) + (f bv str ($fxadd1 i) ($fx+ j 4) n)])))]))) (unless (string? str) (error 'string->utf8-bytevector "~s is not a string" str)) (fill-utf8-bytevector ($make-bytevector (utf8-string-size str)) str))) + (define utf8-bytevector->string + (let () + (define who 'utf8-bytevector->string) + (define (count bv mode) + (let f ([x bv] [i 0] [j ($bytevector-length bv)] [n 0] [mode mode]) + (cond + [($fx= i j) n] + [else + (let ([b0 ($bytevector-u8-ref x i)]) + (cond + [($fx<= b0 #x7F) + (f x ($fxadd1 i) j ($fxadd1 n) mode)] + [($fx= ($fxsra b0 5) #b110) + (let ([i ($fxadd1 i)]) + (cond + [($fx< i j) + (let ([b1 ($bytevector-u8-ref x i)]) + (cond + [($fx= ($fxsra b1 6) #b10) + (f x ($fxadd1 i) j ($fxadd1 n) mode)] + [(eq? mode 'ignore) + (f x i j n mode)] + [(eq? mode 'replace) + (f x i j ($fxadd1 n) mode)] + [else + (error who "invalid byte sequence ~s ~s" b0 b1)]))] + [(eq? mode 'ignore) n] + [(eq? mode 'replace) ($fxadd1 n)] + [else + (error who "invalid byte ~s near end of bytevector" b0)]))] + [($fx= ($fxsra b0 4) #b1110) + (cond + [($fx< ($fx+ i 2) j) + (let ([b1 ($bytevector-u8-ref x ($fx+ i 1))] + [b2 ($bytevector-u8-ref x ($fx+ i 2))]) + (cond + [($fx= ($fxsra ($fxlogor b1 b2) 6) #b10) + (f x ($fx+ i 3) j ($fxadd1 n) mode)] + [(eq? mode 'ignore) + (f x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) + (f x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "invalid sequence ~s ~s ~s" b0 b1 b2)]))] + [(eq? mode 'ignore) (f x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) (f x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "incomplete char sequence")])] + [($fx= ($fxsra b0 3) #b11110) + (cond + [($fx< ($fx+ i 3) j) + (let ([b1 ($bytevector-u8-ref x ($fx+ i 1))] + [b2 ($bytevector-u8-ref x ($fx+ i 2))] + [b3 ($bytevector-u8-ref x ($fx+ i 3))]) + (cond + [($fx= ($fxsra ($fxlogor b1 ($fxlogor b2 b3)) 6) #b10) + (f x ($fx+ i 4) j ($fxadd1 n) mode)] + [(eq? mode 'ignore) + (f x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) + (f x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "invalid sequence ~s ~s ~s ~s" b0 b1 b2 b3)]))] + [(eq? mode 'ignore) (f x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) (f x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "incomplete char sequence")])] + [(eq? mode 'ignore) (f x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) (f x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "invalid byte ~s" b0)]))]))) + (define (fill str bv mode) + (let f ([str str] [x bv] [i 0] [j ($bytevector-length bv)] [n 0] [mode mode]) + (cond + [($fx= i j) str] + [else + (let ([b0 ($bytevector-u8-ref x i)]) + (cond + [($fx<= b0 #x7F) + ($string-set! str n ($fixnum->char b0)) + (f str x ($fxadd1 i) j ($fxadd1 n) mode)] + [($fx= ($fxsra b0 5) #b110) + (let ([i ($fxadd1 i)]) + (cond + [($fx< i j) + (let ([b1 ($bytevector-u8-ref x i)]) + (cond + [($fx= ($fxsra b1 6) #b10) + ($string-set! str n + ($fixnum->char + ($fx+ ($fxlogand b1 #b111111) + ($fxsll ($fxlogand b0 #b11111) 6)))) + (f str x ($fxadd1 i) j ($fxadd1 n) mode)] + [(eq? mode 'ignore) + (f str x i j n mode)] + [(eq? mode 'replace) + ($string-set! str n ($fixnum->char #xFFFD)) + (f str x i j ($fxadd1 n) mode)] + [else (error who "BUG")]))] + [(eq? mode 'ignore) str] + [(eq? mode 'replace) + ($string-set! str n ($fixnum->char #xFFFD)) + str] + [else (error who "BUG")]))] + [($fx= ($fxsra b0 4) #b1110) + (cond + [($fx< ($fx+ i 2) j) + (let ([b1 ($bytevector-u8-ref x ($fx+ i 1))] + [b2 ($bytevector-u8-ref x ($fx+ i 2))]) + (cond + [($fx= ($fxsra ($fxlogor b1 b2) 6) #b10) + ($string-set! str n + ($fixnum->char + ($fx+ ($fxlogand b2 #b111111) + ($fx+ ($fxsll ($fxlogand b1 #b111111) 6) + ($fxsll ($fxlogand b0 #b1111) 12))))) + (f str x ($fx+ i 3) j ($fxadd1 n) mode)] + [(eq? mode 'ignore) + (f str x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) + ($string-set! str n ($fixnum->char #xFFFD)) + (f str x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "BUG")]))] + [(eq? mode 'ignore) (f str x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) + ($string-set! str n ($fixnum->char #xFFFD)) + (f str x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "BUG: incomplete char sequence")])] + [($fx= ($fxsra b0 3) #b11110) + (cond + [($fx< ($fx+ i 3) j) + (let ([b1 ($bytevector-u8-ref x ($fx+ i 1))] + [b2 ($bytevector-u8-ref x ($fx+ i 2))] + [b3 ($bytevector-u8-ref x ($fx+ i 3))]) + (cond + [($fx= ($fxsra ($fxlogor b1 ($fxlogor b2 b3)) 6) #b10) + ($string-set! str n + ($fixnum->char + ($fx+ ($fxlogand b3 #b111111) + ($fx+ ($fxsll ($fxlogand b2 #b111111) 6) + ($fx+ ($fxsll ($fxlogand b1 #b111111) 12) + ($fxsll ($fxlogand b0 #b111) 18)))))) + (f str x ($fx+ i 4) j ($fxadd1 n) mode)] + [(eq? mode 'ignore) + (f str x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) + ($string-set! str n ($fixnum->char #xFFFD)) + (f str x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "BUG")]))] + [(eq? mode 'ignore) (f str x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) + ($string-set! str n ($fixnum->char #xFFFD)) + (f str x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "BUG")])] + [(eq? mode 'ignore) (f str x ($fxadd1 i) j n mode)] + [(eq? mode 'replace) + ($string-set! str n ($fixnum->char #xFFFD)) + (f str x ($fxadd1 i) j ($fxadd1 n) mode)] + [else (error who "BUG")]))]))) + (define (convert bv mode) + (fill ($make-string (count bv mode)) bv mode)) + (case-lambda + [(bv) (convert bv 'error)] + [(bv handling-mode) + (unless (memq handling-mode '(ignore replace raise)) + (error 'utf8-bytevector->string + "~s is not a valid handling mode" + handling-mode)) + (convert bv handling-mode)]))) ) #!eof - (unless (memq handling-mode '(ignore replace raise)) - (error 'string->utf8-bytevector "~s is not a valid hanlding-mode, shound be one of ignore, replace, or raise" handling-mode)) diff --git a/src/makefile.ss b/src/makefile.ss index 69e4e72..421d776 100755 --- a/src/makefile.ss +++ b/src/makefile.ss @@ -39,6 +39,7 @@ "ikarus.chars.ss" "ikarus.records.ss" "ikarus.strings.ss" + "ikarus.transcoders.ss" "ikarus.date-string.ss" "ikarus.symbols.ss" "ikarus.vectors.ss" @@ -69,7 +70,6 @@ "ikarus.posix.ss" "ikarus.timer.ss" "ikarus.bytevectors.ss" - "ikarus.transcoders.ss" "ikarus.main.ss")) (define ikarus-system-macros @@ -293,6 +293,7 @@ [uint-list->bytevector i] [sint-list->bytevector i] [string->utf8-bytevector i] + [utf8-bytevector->string i] [for-each i r] [map i r]