* Added utf8-bytevector->string decoder
This commit is contained in:
parent
10f5e53338
commit
85944a2cff
BIN
bin/ikarus
BIN
bin/ikarus
Binary file not shown.
|
@ -773,6 +773,29 @@ ikrt_strftime(ikp outstr, ikp fmtstr){
|
|||
return fix(rv);
|
||||
}
|
||||
|
||||
ikp
|
||||
ikrt_bvftime(ikp outbv, ikp fmtbv){
|
||||
time_t t;
|
||||
struct tm* tmp;
|
||||
t = time(NULL);
|
||||
tmp = localtime(&t);
|
||||
if(tmp == NULL){
|
||||
fprintf(stderr, "Error in time: %s\n", strerror(errno));
|
||||
}
|
||||
int rv =
|
||||
strftime((char*)outbv+off_bytevector_data,
|
||||
unfix(ref(outbv, off_bytevector_length)) + 1,
|
||||
(char*)fmtbv+off_bytevector_data,
|
||||
tmp);
|
||||
if(rv == 0){
|
||||
fprintf(stderr, "Error in strftime: %s\n", strerror(errno));
|
||||
}
|
||||
return fix(rv);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
ikp
|
||||
ikrt_close_file(ikp fd, ikpcb* pcb){
|
||||
int err = close(unfix(fd));
|
||||
|
|
BIN
src/ikarus.boot
BIN
src/ikarus.boot
Binary file not shown.
|
@ -1943,7 +1943,7 @@
|
|||
$fx= $fx< $fx<= $fx> $fx>= $fxquotient $fxmodulo $fxsll $fxsra $fxlogxor $exit)
|
||||
(andmap (check op fixnum?) rand*)]
|
||||
[($fixnum->char)
|
||||
(andmap (check op byte?) rand*)]
|
||||
(andmap (check op fixnum?) rand*)]
|
||||
[($char->fixnum $char= $char< $char<= $char> $char>= $string)
|
||||
(andmap (check op char?) rand*)]
|
||||
[($make-vector $make-string)
|
||||
|
|
|
@ -868,6 +868,8 @@
|
|||
(vector-set! vec (fx+ reloc-idx 1) v)
|
||||
(set! reloc-idx (fx+ reloc-idx 2))]
|
||||
[(foreign-label)
|
||||
;;; FIXME: converted strings should be memoized.
|
||||
;;; wait for equal? hash tables.
|
||||
(let ([name
|
||||
(if (string? v)
|
||||
(string->utf8-bytevector v)
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
|
||||
(library (ikarus transcoders)
|
||||
(export string->utf8-bytevector
|
||||
#;utf8-bytevector->string)
|
||||
(import (except (ikarus) string->utf8-bytevector)
|
||||
utf8-bytevector->string)
|
||||
(import (except (ikarus) string->utf8-bytevector utf8-bytevector->string)
|
||||
(ikarus system $strings)
|
||||
(ikarus system $bytevectors)
|
||||
(ikarus system $fx)
|
||||
(ikarus system $chars)
|
||||
)
|
||||
|
||||
(ikarus system $chars))
|
||||
;;; From http://en.wikipedia.org/wiki/UTF-8
|
||||
;;; hexadecimal binary scalar value UTF8
|
||||
;;; 000000-00007F 00000000_00000000_0zzzzzzz 0zzzzzzz
|
||||
|
@ -24,6 +22,8 @@
|
|||
;;; replace: places a U+FFFD in place of the malformed bytes
|
||||
;;; raise: raises an error
|
||||
|
||||
(define string->utf8-bytevector
|
||||
(lambda (str)
|
||||
(define (utf8-string-size str)
|
||||
(let f ([str str] [i 0] [j ($string-length str)] [n 0])
|
||||
(cond
|
||||
|
@ -38,7 +38,6 @@
|
|||
[($fx<= b #x7FF) 2]
|
||||
[($fx<= b #xFFFF) 3]
|
||||
[else 4])))))])))
|
||||
|
||||
(define (fill-utf8-bytevector bv str)
|
||||
(let f ([bv bv] [str str] [i 0] [j 0] [n ($string-length str)])
|
||||
(cond
|
||||
|
@ -74,21 +73,179 @@
|
|||
($bytevector-set! bv ($fx+ j 3)
|
||||
($fxlogor #b10000000 ($fxlogand b #b111111)))
|
||||
(f bv str ($fxadd1 i) ($fx+ j 4) n)])))])))
|
||||
|
||||
|
||||
(define string->utf8-bytevector
|
||||
(lambda (str)
|
||||
(unless (string? str)
|
||||
(error 'string->utf8-bytevector "~s is not a string" str))
|
||||
(fill-utf8-bytevector
|
||||
($make-bytevector (utf8-string-size str))
|
||||
str)))
|
||||
|
||||
(define utf8-bytevector->string
|
||||
(let ()
|
||||
(define who 'utf8-bytevector->string)
|
||||
(define (count bv mode)
|
||||
(let f ([x bv] [i 0] [j ($bytevector-length bv)] [n 0] [mode mode])
|
||||
(cond
|
||||
[($fx= i j) n]
|
||||
[else
|
||||
(let ([b0 ($bytevector-u8-ref x i)])
|
||||
(cond
|
||||
[($fx<= b0 #x7F)
|
||||
(f x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[($fx= ($fxsra b0 5) #b110)
|
||||
(let ([i ($fxadd1 i)])
|
||||
(cond
|
||||
[($fx< i j)
|
||||
(let ([b1 ($bytevector-u8-ref x i)])
|
||||
(cond
|
||||
[($fx= ($fxsra b1 6) #b10)
|
||||
(f x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[(eq? mode 'ignore)
|
||||
(f x i j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
(f x i j ($fxadd1 n) mode)]
|
||||
[else
|
||||
(error who "invalid byte sequence ~s ~s" b0 b1)]))]
|
||||
[(eq? mode 'ignore) n]
|
||||
[(eq? mode 'replace) ($fxadd1 n)]
|
||||
[else
|
||||
(error who "invalid byte ~s near end of bytevector" b0)]))]
|
||||
[($fx= ($fxsra b0 4) #b1110)
|
||||
(cond
|
||||
[($fx< ($fx+ i 2) j)
|
||||
(let ([b1 ($bytevector-u8-ref x ($fx+ i 1))]
|
||||
[b2 ($bytevector-u8-ref x ($fx+ i 2))])
|
||||
(cond
|
||||
[($fx= ($fxsra ($fxlogor b1 b2) 6) #b10)
|
||||
(f x ($fx+ i 3) j ($fxadd1 n) mode)]
|
||||
[(eq? mode 'ignore)
|
||||
(f x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
(f x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "invalid sequence ~s ~s ~s" b0 b1 b2)]))]
|
||||
[(eq? mode 'ignore) (f x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace) (f x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "incomplete char sequence")])]
|
||||
[($fx= ($fxsra b0 3) #b11110)
|
||||
(cond
|
||||
[($fx< ($fx+ i 3) j)
|
||||
(let ([b1 ($bytevector-u8-ref x ($fx+ i 1))]
|
||||
[b2 ($bytevector-u8-ref x ($fx+ i 2))]
|
||||
[b3 ($bytevector-u8-ref x ($fx+ i 3))])
|
||||
(cond
|
||||
[($fx= ($fxsra ($fxlogor b1 ($fxlogor b2 b3)) 6) #b10)
|
||||
(f x ($fx+ i 4) j ($fxadd1 n) mode)]
|
||||
[(eq? mode 'ignore)
|
||||
(f x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
(f x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "invalid sequence ~s ~s ~s ~s" b0 b1 b2 b3)]))]
|
||||
[(eq? mode 'ignore) (f x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace) (f x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "incomplete char sequence")])]
|
||||
[(eq? mode 'ignore) (f x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace) (f x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "invalid byte ~s" b0)]))])))
|
||||
(define (fill str bv mode)
|
||||
(let f ([str str] [x bv] [i 0] [j ($bytevector-length bv)] [n 0] [mode mode])
|
||||
(cond
|
||||
[($fx= i j) str]
|
||||
[else
|
||||
(let ([b0 ($bytevector-u8-ref x i)])
|
||||
(cond
|
||||
[($fx<= b0 #x7F)
|
||||
($string-set! str n ($fixnum->char b0))
|
||||
(f str x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[($fx= ($fxsra b0 5) #b110)
|
||||
(let ([i ($fxadd1 i)])
|
||||
(cond
|
||||
[($fx< i j)
|
||||
(let ([b1 ($bytevector-u8-ref x i)])
|
||||
(cond
|
||||
[($fx= ($fxsra b1 6) #b10)
|
||||
($string-set! str n
|
||||
($fixnum->char
|
||||
($fx+ ($fxlogand b1 #b111111)
|
||||
($fxsll ($fxlogand b0 #b11111) 6))))
|
||||
(f str x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[(eq? mode 'ignore)
|
||||
(f str x i j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
($string-set! str n ($fixnum->char #xFFFD))
|
||||
(f str x i j ($fxadd1 n) mode)]
|
||||
[else (error who "BUG")]))]
|
||||
[(eq? mode 'ignore) str]
|
||||
[(eq? mode 'replace)
|
||||
($string-set! str n ($fixnum->char #xFFFD))
|
||||
str]
|
||||
[else (error who "BUG")]))]
|
||||
[($fx= ($fxsra b0 4) #b1110)
|
||||
(cond
|
||||
[($fx< ($fx+ i 2) j)
|
||||
(let ([b1 ($bytevector-u8-ref x ($fx+ i 1))]
|
||||
[b2 ($bytevector-u8-ref x ($fx+ i 2))])
|
||||
(cond
|
||||
[($fx= ($fxsra ($fxlogor b1 b2) 6) #b10)
|
||||
($string-set! str n
|
||||
($fixnum->char
|
||||
($fx+ ($fxlogand b2 #b111111)
|
||||
($fx+ ($fxsll ($fxlogand b1 #b111111) 6)
|
||||
($fxsll ($fxlogand b0 #b1111) 12)))))
|
||||
(f str x ($fx+ i 3) j ($fxadd1 n) mode)]
|
||||
[(eq? mode 'ignore)
|
||||
(f str x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
($string-set! str n ($fixnum->char #xFFFD))
|
||||
(f str x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "BUG")]))]
|
||||
[(eq? mode 'ignore) (f str x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
($string-set! str n ($fixnum->char #xFFFD))
|
||||
(f str x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "BUG: incomplete char sequence")])]
|
||||
[($fx= ($fxsra b0 3) #b11110)
|
||||
(cond
|
||||
[($fx< ($fx+ i 3) j)
|
||||
(let ([b1 ($bytevector-u8-ref x ($fx+ i 1))]
|
||||
[b2 ($bytevector-u8-ref x ($fx+ i 2))]
|
||||
[b3 ($bytevector-u8-ref x ($fx+ i 3))])
|
||||
(cond
|
||||
[($fx= ($fxsra ($fxlogor b1 ($fxlogor b2 b3)) 6) #b10)
|
||||
($string-set! str n
|
||||
($fixnum->char
|
||||
($fx+ ($fxlogand b3 #b111111)
|
||||
($fx+ ($fxsll ($fxlogand b2 #b111111) 6)
|
||||
($fx+ ($fxsll ($fxlogand b1 #b111111) 12)
|
||||
($fxsll ($fxlogand b0 #b111) 18))))))
|
||||
(f str x ($fx+ i 4) j ($fxadd1 n) mode)]
|
||||
[(eq? mode 'ignore)
|
||||
(f str x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
($string-set! str n ($fixnum->char #xFFFD))
|
||||
(f str x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "BUG")]))]
|
||||
[(eq? mode 'ignore) (f str x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
($string-set! str n ($fixnum->char #xFFFD))
|
||||
(f str x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "BUG")])]
|
||||
[(eq? mode 'ignore) (f str x ($fxadd1 i) j n mode)]
|
||||
[(eq? mode 'replace)
|
||||
($string-set! str n ($fixnum->char #xFFFD))
|
||||
(f str x ($fxadd1 i) j ($fxadd1 n) mode)]
|
||||
[else (error who "BUG")]))])))
|
||||
(define (convert bv mode)
|
||||
(fill ($make-string (count bv mode)) bv mode))
|
||||
(case-lambda
|
||||
[(bv) (convert bv 'error)]
|
||||
[(bv handling-mode)
|
||||
(unless (memq handling-mode '(ignore replace raise))
|
||||
(error 'utf8-bytevector->string
|
||||
"~s is not a valid handling mode"
|
||||
handling-mode))
|
||||
(convert bv handling-mode)])))
|
||||
|
||||
)
|
||||
|
||||
|
||||
#!eof
|
||||
(unless (memq handling-mode '(ignore replace raise))
|
||||
(error 'string->utf8-bytevector "~s is not a valid hanlding-mode, shound be one of ignore, replace, or raise" handling-mode))
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
"ikarus.chars.ss"
|
||||
"ikarus.records.ss"
|
||||
"ikarus.strings.ss"
|
||||
"ikarus.transcoders.ss"
|
||||
"ikarus.date-string.ss"
|
||||
"ikarus.symbols.ss"
|
||||
"ikarus.vectors.ss"
|
||||
|
@ -69,7 +70,6 @@
|
|||
"ikarus.posix.ss"
|
||||
"ikarus.timer.ss"
|
||||
"ikarus.bytevectors.ss"
|
||||
"ikarus.transcoders.ss"
|
||||
"ikarus.main.ss"))
|
||||
|
||||
(define ikarus-system-macros
|
||||
|
@ -293,6 +293,7 @@
|
|||
[uint-list->bytevector i]
|
||||
[sint-list->bytevector i]
|
||||
[string->utf8-bytevector i]
|
||||
[utf8-bytevector->string i]
|
||||
|
||||
[for-each i r]
|
||||
[map i r]
|
||||
|
|
Loading…
Reference in New Issue