utf16->string, utf32->string, string->utf16, string->utf32 are all
working now.
This commit is contained in:
parent
4efdebb3ac
commit
d878bd3934
|
@ -15,7 +15,8 @@
|
||||||
|
|
||||||
|
|
||||||
(library (ikarus transcoders)
|
(library (ikarus transcoders)
|
||||||
(export string->utf8 utf8->string string->utf16 string->utf32)
|
(export string->utf8 utf8->string string->utf16 string->utf32
|
||||||
|
utf16->string utf32->string)
|
||||||
(import (except (ikarus) string->utf8 utf8->string string->utf16
|
(import (except (ikarus) string->utf8 utf8->string string->utf16
|
||||||
utf16->string string->utf32 utf32->string)
|
utf16->string string->utf32 utf32->string)
|
||||||
(ikarus system $strings)
|
(ikarus system $strings)
|
||||||
|
@ -38,6 +39,15 @@
|
||||||
;;; replace: places a U+FFFD in place of the malformed bytes
|
;;; replace: places a U+FFFD in place of the malformed bytes
|
||||||
;;; raise: raises an error
|
;;; raise: raises an error
|
||||||
|
|
||||||
|
(define integer->char/invalid
|
||||||
|
(lambda (n)
|
||||||
|
(cond
|
||||||
|
[(not (fixnum? n)) #\xFFFD]
|
||||||
|
[($fx<= n #xD7FF) ($fixnum->char n)]
|
||||||
|
[($fx< n #xE000) #\xFFFD]
|
||||||
|
[($fx<= n #x10FFFF) ($fixnum->char n)]
|
||||||
|
[else #\xFFFD])))
|
||||||
|
|
||||||
(define string->utf8
|
(define string->utf8
|
||||||
(lambda (str)
|
(lambda (str)
|
||||||
(define (utf8-string-size str)
|
(define (utf8-string-size str)
|
||||||
|
@ -313,6 +323,9 @@
|
||||||
;;; 4) Construct a 20-bit unsigned integer U', taking the 10 low-order
|
;;; 4) Construct a 20-bit unsigned integer U', taking the 10 low-order
|
||||||
;;; bits of W1 as its 10 high-order bits and the 10 low-order bits of
|
;;; bits of W1 as its 10 high-order bits and the 10 low-order bits of
|
||||||
;;; W2 as its 10 low-order bits.
|
;;; W2 as its 10 low-order bits.
|
||||||
|
;;; 5) Add 0x10000 to U' to obtain the character value U.
|
||||||
|
;;; Terminate.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(module (string->utf16)
|
(module (string->utf16)
|
||||||
|
@ -342,7 +355,7 @@
|
||||||
(fxlogor (fxsll #b110110 10) (fxsra u^ 10))
|
(fxlogor (fxsll #b110110 10) (fxsra u^ 10))
|
||||||
endianness)
|
endianness)
|
||||||
(bytevector-u16-set! bv (fx+ j 2)
|
(bytevector-u16-set! bv (fx+ j 2)
|
||||||
(fxlogor (fxsll #b110111 10) (fxlogand u^ #xFFFF))
|
(fxlogor (fxsll #b110111 10) (fxlogand u^ #x3FF))
|
||||||
endianness))
|
endianness))
|
||||||
(bvfill str bv (fx+ i 1) (fx+ j 4) len endianness)]))]))
|
(bvfill str bv (fx+ i 1) (fx+ j 4) len endianness)]))]))
|
||||||
(let ([len ($string-length str)])
|
(let ([len ($string-length str)])
|
||||||
|
@ -376,7 +389,7 @@
|
||||||
(cond
|
(cond
|
||||||
[(or (fx< w1 #xD800) (fx> w1 #xDFFF))
|
[(or (fx< w1 #xD800) (fx> w1 #xDFFF))
|
||||||
(count-size bv endianness (+ i 2) len (+ n 1))]
|
(count-size bv endianness (+ i 2) len (+ n 1))]
|
||||||
[(not (fx<= #xD800 w1 #xDFFF)) ;;; error sequence
|
[(not (fx<= #xD800 w1 #xDBFF)) ;;; error sequence
|
||||||
(count-size bv endianness (+ i 2) len (+ n 1))]
|
(count-size bv endianness (+ i 2) len (+ n 1))]
|
||||||
[(<= (+ i 4) (bytevector-length bv))
|
[(<= (+ i 4) (bytevector-length bv))
|
||||||
(let ([w2 (bytevector-u16-ref bv (+ i 2) endianness)])
|
(let ([w2 (bytevector-u16-ref bv (+ i 2) endianness)])
|
||||||
|
@ -401,9 +414,9 @@
|
||||||
(let ([w1 (bytevector-u16-ref bv i endianness)])
|
(let ([w1 (bytevector-u16-ref bv i endianness)])
|
||||||
(cond
|
(cond
|
||||||
[(or (fx< w1 #xD800) (fx> w1 #xDFFF))
|
[(or (fx< w1 #xD800) (fx> w1 #xDFFF))
|
||||||
(string-set! str n (integer->char w1))
|
(string-set! str n (integer->char/invalid w1))
|
||||||
(fill bv endianness str (+ i 2) len (+ n 1))]
|
(fill bv endianness str (+ i 2) len (+ n 1))]
|
||||||
[(not (fx<= #xD800 w1 #xDFFF)) ;;; error sequence
|
[(not (fx<= #xD800 w1 #xDBFF)) ;;; error sequence
|
||||||
(string-set! str n #\xFFFD)
|
(string-set! str n #\xFFFD)
|
||||||
(fill bv endianness str (+ i 2) len (+ n 1))]
|
(fill bv endianness str (+ i 2) len (+ n 1))]
|
||||||
[(<= (+ i 4) (bytevector-length bv))
|
[(<= (+ i 4) (bytevector-length bv))
|
||||||
|
@ -416,14 +429,15 @@
|
||||||
(fill bv endianness str (+ i 2) len (+ n 1))]
|
(fill bv endianness str (+ i 2) len (+ n 1))]
|
||||||
[else
|
[else
|
||||||
(string-set! str n
|
(string-set! str n
|
||||||
(integer->char
|
(integer->char/invalid
|
||||||
(fxlogor (fxsll (fxlogand w1 #x3FF) 10)
|
(+ #x10000
|
||||||
(fxlogand w2 #x3FF))))
|
(fxlogor (fxsll (fxlogand w1 #x3FF) 10)
|
||||||
(count-size bv endianness (+ i 4) len (+ n 1))]))]
|
(fxlogand w2 #x3FF)))))
|
||||||
|
(fill bv endianness str (+ i 4) len (+ n 1))]))]
|
||||||
[else
|
[else
|
||||||
;;; error again
|
;;; error again
|
||||||
(string-set! str n #\xFFFD)
|
(string-set! str n #\xFFFD)
|
||||||
(count-size bv endianness str (+ i 2) len (+ n 1))]))]))
|
(fill bv endianness str (+ i 2) len (+ n 1))]))]))
|
||||||
(define (decode bv endianness start)
|
(define (decode bv endianness start)
|
||||||
(let ([len (fxand (bytevector-length bv) -2)])
|
(let ([len (fxand (bytevector-length bv) -2)])
|
||||||
(let ([n (count-size bv endianness start len 0)])
|
(let ([n (count-size bv endianness start len 0)])
|
||||||
|
@ -495,8 +509,8 @@
|
||||||
str]
|
str]
|
||||||
[else
|
[else
|
||||||
(string-set! str n
|
(string-set! str n
|
||||||
(let ([w (bytevector-u32-ref bv i endianness)])
|
(integer->char/invalid
|
||||||
(integer->char w)))
|
(bytevector-u32-ref bv i endianness)))
|
||||||
(fill bv endianness str (fx+ i 4) j (fx+ n 1))]))
|
(fill bv endianness str (fx+ i 4) j (fx+ n 1))]))
|
||||||
(define (decode bv endianness start)
|
(define (decode bv endianness start)
|
||||||
(let ([bvlen (bytevector-length bv)])
|
(let ([bvlen (bytevector-length bv)])
|
||||||
|
@ -508,8 +522,8 @@
|
||||||
(and (fx>= (bytevector-length bv) 4)
|
(and (fx>= (bytevector-length bv) 4)
|
||||||
(let ([n (bytevector-u16-ref bv 0 'big)])
|
(let ([n (bytevector-u16-ref bv 0 'big)])
|
||||||
(cond
|
(cond
|
||||||
[(fx= n #x0000FEFF) 'big]
|
[(= n #x0000FEFF) 'big]
|
||||||
[(fx= n #xFFFE0000) 'little]
|
[(= n #xFFFE0000) 'little]
|
||||||
[else #f]))))
|
[else #f]))))
|
||||||
(unless (bytevector? bv)
|
(unless (bytevector? bv)
|
||||||
(error who "not a bytevector" bv))
|
(error who "not a bytevector" bv))
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
1177
|
1178
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
;;; You should have received a copy of the GNU General Public License
|
;;; You should have received a copy of the GNU General Public License
|
||||||
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
;;; vim:syntax=scheme
|
||||||
(import (only (ikarus) import))
|
(import (only (ikarus) import))
|
||||||
(import (except (ikarus) assembler-output))
|
(import (except (ikarus) assembler-output))
|
||||||
(import (ikarus compiler))
|
(import (ikarus compiler))
|
||||||
|
@ -935,8 +936,8 @@
|
||||||
[u8-list->bytevector i r bv]
|
[u8-list->bytevector i r bv]
|
||||||
[uint-list->bytevector i r bv]
|
[uint-list->bytevector i r bv]
|
||||||
[utf8->string i r bv]
|
[utf8->string i r bv]
|
||||||
[utf16->string r bv]
|
[utf16->string i r bv]
|
||||||
[utf32->string r bv]
|
[utf32->string i r bv]
|
||||||
[print-condition i]
|
[print-condition i]
|
||||||
[condition? i r co]
|
[condition? i r co]
|
||||||
[&assertion i r co]
|
[&assertion i r co]
|
||||||
|
|
Loading…
Reference in New Issue