600 lines
22 KiB
Plaintext
600 lines
22 KiB
Plaintext
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;
|
||
|
; Copyright 2007 William D Clinger.
|
||
|
;
|
||
|
; Permission to copy this software, in whole or in part, to use this
|
||
|
; software for any lawful purpose, and to redistribute this software
|
||
|
; is granted subject to the restriction that all copies made of this
|
||
|
; software must include this copyright notice in full.
|
||
|
;
|
||
|
; I also request that you send me a copy of any improvements that you
|
||
|
; make to this software so that they may be incorporated within it to
|
||
|
; the benefit of the Scheme community.
|
||
|
;
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;
|
||
|
; Tests of string <-> bytevector conversions.
|
||
|
;
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
(import (rnrs base)
|
||
|
(rnrs unicode)
|
||
|
(rnrs bytevectors)
|
||
|
(rnrs control)
|
||
|
(rnrs io simple)
|
||
|
(rnrs mutable-strings))
|
||
|
|
||
|
; Crude test rig, just for benchmarking.
|
||
|
|
||
|
(define failed-tests '())
|
||
|
|
||
|
(define (test name actual expected)
|
||
|
(if (not (equal? actual expected))
|
||
|
(begin (display "******** FAILED TEST ******** ")
|
||
|
(display name)
|
||
|
(newline)
|
||
|
(set! failed-tests (cons name failed-tests)))))
|
||
|
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;
|
||
|
; The R6RS doesn't specify exactly how many replacement
|
||
|
; characters get generated by an encoding or decoding error,
|
||
|
; so the results of some tests are compared by treating any
|
||
|
; sequence of consecutive replacement characters the same as
|
||
|
; a single replacement character.
|
||
|
;
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
(define (string~? s1 s2)
|
||
|
(define (replacement? c)
|
||
|
(char=? c #\xfffd))
|
||
|
(define (canonicalized s)
|
||
|
(let loop ((rchars (reverse (string->list s)))
|
||
|
(cchars '()))
|
||
|
(cond ((or (null? rchars) (null? (cdr rchars)))
|
||
|
(list->string cchars))
|
||
|
((and (replacement? (car rchars))
|
||
|
(replacement? (cadr rchars)))
|
||
|
(loop (cdr rchars) cchars))
|
||
|
(else
|
||
|
(loop (cdr rchars) (cons (car rchars) cchars))))))
|
||
|
(string=? (canonicalized s1) (canonicalized s2)))
|
||
|
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;
|
||
|
; Basic sanity tests, followed by stress tests on random inputs.
|
||
|
;
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
(define (string-bytevector-tests
|
||
|
*random-stress-tests* *random-stress-test-max-size*)
|
||
|
|
||
|
(define (test-roundtrip bvec tostring tobvec)
|
||
|
(let* ((s1 (tostring bvec))
|
||
|
(b2 (tobvec s1))
|
||
|
(s2 (tostring b2)))
|
||
|
(test "round trip of string conversion" (string=? s1 s2) #t)))
|
||
|
|
||
|
; This random number generator doesn't have to be good.
|
||
|
; It just has to be fast.
|
||
|
|
||
|
(define random
|
||
|
(letrec ((random14
|
||
|
(lambda (n)
|
||
|
(set! x (mod (+ (* a x) c) (+ m 1)))
|
||
|
(mod (div x 8) n)))
|
||
|
(a 701)
|
||
|
(x 1)
|
||
|
(c 743483)
|
||
|
(m 524287)
|
||
|
(loop
|
||
|
(lambda (q r n)
|
||
|
(if (zero? q)
|
||
|
(mod r n)
|
||
|
(loop (div q 16384)
|
||
|
(+ (* 16384 r) (random14 16384))
|
||
|
n)))))
|
||
|
(lambda (n)
|
||
|
(if (< n 16384)
|
||
|
(random14 n)
|
||
|
(loop (div n 16384) (random14 16384) n)))))
|
||
|
|
||
|
; Returns a random bytevector of length up to n.
|
||
|
|
||
|
(define (random-bytevector n)
|
||
|
(let* ((n (random n))
|
||
|
(bv (make-bytevector n)))
|
||
|
(do ((i 0 (+ i 1)))
|
||
|
((= i n) bv)
|
||
|
(bytevector-u8-set! bv i (random 256)))))
|
||
|
|
||
|
; Returns a random bytevector of even length up to n.
|
||
|
|
||
|
(define (random-bytevector2 n)
|
||
|
(let* ((n (random n))
|
||
|
(n (if (odd? n) (+ n 1) n))
|
||
|
(bv (make-bytevector n)))
|
||
|
(do ((i 0 (+ i 1)))
|
||
|
((= i n) bv)
|
||
|
(bytevector-u8-set! bv i (random 256)))))
|
||
|
|
||
|
; Returns a random bytevector of multiple-of-4 length up to n.
|
||
|
|
||
|
(define (random-bytevector4 n)
|
||
|
(let* ((n (random n))
|
||
|
(n (* 4 (round (/ n 4))))
|
||
|
(bv (make-bytevector n)))
|
||
|
(do ((i 0 (+ i 1)))
|
||
|
((= i n) bv)
|
||
|
(bytevector-u8-set! bv i (random 256)))))
|
||
|
|
||
|
(test "utf-8, BMP"
|
||
|
(bytevector=? (string->utf8 "k\x007f;\x0080;\x07ff;\x0800;\xffff;")
|
||
|
'#vu8(#x6b
|
||
|
#x7f
|
||
|
#b11000010 #b10000000
|
||
|
#b11011111 #b10111111
|
||
|
#b11100000 #b10100000 #b10000000
|
||
|
#b11101111 #b10111111 #b10111111))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-8, supplemental"
|
||
|
(bytevector=? (string->utf8 "\x010000;\x10ffff;")
|
||
|
'#vu8(#b11110000 #b10010000 #b10000000 #b10000000
|
||
|
#b11110100 #b10001111 #b10111111 #b10111111))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-8, errors 1"
|
||
|
(string~? (utf8->string '#vu8(#x61 ; a
|
||
|
#xc0 #x62 ; ?b
|
||
|
#xc1 #x63 ; ?c
|
||
|
#xc2 #x64 ; ?d
|
||
|
#x80 #x65 ; ?e
|
||
|
#xc0 #xc0 #x66 ; ??f
|
||
|
#xe0 #x67 ; ?g
|
||
|
))
|
||
|
"a\xfffd;b\xfffd;c\xfffd;d\xfffd;e\xfffd;\xfffd;f\xfffd;g")
|
||
|
#t)
|
||
|
|
||
|
(test "utf-8, errors 2"
|
||
|
(string~? (utf8->string '#vu8(#xe0 #x80 #x80 #x68 ; ???h
|
||
|
#xe0 #xc0 #x80 #x69 ; ???i
|
||
|
#xf0 #x6a ; ?j
|
||
|
))
|
||
|
"\xfffd;\xfffd;\xfffd;h\xfffd;\xfffd;\xfffd;i\xfffd;j")
|
||
|
#t)
|
||
|
|
||
|
(test "utf-8, errors 3"
|
||
|
(string~? (utf8->string '#vu8(#x61 ; a
|
||
|
#xf0 #x80 #x80 #x80 #x62 ; ????b
|
||
|
#xf0 #x90 #x80 #x80 #x63 ; .c
|
||
|
))
|
||
|
"a\xfffd;\xfffd;\xfffd;\xfffd;b\x10000;c")
|
||
|
#t)
|
||
|
|
||
|
(test "utf-8, errors 4"
|
||
|
(string~? (utf8->string '#vu8(#x61 ; a
|
||
|
#xf0 #xbf #xbf #xbf #x64 ; .d
|
||
|
#xf0 #xbf #xbf #x65 ; ?e
|
||
|
#xf0 #xbf #x66 ; ?f
|
||
|
))
|
||
|
"a\x3ffff;d\xfffd;e\xfffd;f")
|
||
|
#t)
|
||
|
|
||
|
(test "utf-8, errors 5"
|
||
|
(string~? (utf8->string '#vu8(#x61 ; a
|
||
|
#xf4 #x8f #xbf #xbf #x62 ; .b
|
||
|
#xf4 #x90 #x80 #x80 #x63 ; ????c
|
||
|
))
|
||
|
|
||
|
"a\x10ffff;b\xfffd;\xfffd;\xfffd;\xfffd;c")
|
||
|
#t)
|
||
|
|
||
|
(test "utf-8, errors 6"
|
||
|
(string~? (utf8->string '#vu8(#x61 ; a
|
||
|
#xf5 #x80 #x80 #x80 #x64 ; ????d
|
||
|
))
|
||
|
|
||
|
"a\xfffd;\xfffd;\xfffd;\xfffd;d")
|
||
|
#t)
|
||
|
|
||
|
; ignores BOM signature
|
||
|
; Officially, there is no BOM signature for UTF-8,
|
||
|
; so this test is commented out.
|
||
|
|
||
|
#;(test "utf-8, BOM"
|
||
|
(string=? (utf8->string '#vu8(#xef #xbb #xbf #x61 #x62 #x63 #x64))
|
||
|
"abcd")
|
||
|
#t)
|
||
|
|
||
|
(test-roundtrip (random-bytevector 10) utf8->string string->utf8)
|
||
|
|
||
|
(do ((i 0 (+ i 1)))
|
||
|
((= i *random-stress-tests*))
|
||
|
(test-roundtrip (random-bytevector *random-stress-test-max-size*)
|
||
|
utf8->string string->utf8))
|
||
|
|
||
|
(test "utf-16, BMP"
|
||
|
(bytevector=? (string->utf16 "k\x007f;\x0080;\x07ff;\x0800;\xffff;")
|
||
|
'#vu8(#x00 #x6b
|
||
|
#x00 #x7f
|
||
|
#x00 #x80
|
||
|
#x07 #xff
|
||
|
#x08 #x00
|
||
|
#xff #xff))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16le, BMP"
|
||
|
(bytevector=? (string->utf16 "k\x007f;\x0080;\x07ff;\x0800;\xffff;"
|
||
|
'little)
|
||
|
'#vu8(#x6b #x00
|
||
|
#x7f #x00
|
||
|
#x80 #x00
|
||
|
#xff #x07
|
||
|
#x00 #x08
|
||
|
#xff #xff))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16, supplemental"
|
||
|
(bytevector=? (string->utf16 "\x010000;\xfdcba;\x10ffff;")
|
||
|
'#vu8(#xd8 #x00 #xdc #x00
|
||
|
#xdb #xb7 #xdc #xba
|
||
|
#xdb #xff #xdf #xff))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16le, supplemental"
|
||
|
(bytevector=? (string->utf16 "\x010000;\xfdcba;\x10ffff;" 'little)
|
||
|
'#vu8(#x00 #xd8 #x00 #xdc
|
||
|
#xb7 #xdb #xba #xdc
|
||
|
#xff #xdb #xff #xdf))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16be"
|
||
|
(bytevector=? (string->utf16 "ab\x010000;\xfdcba;\x10ffff;cd")
|
||
|
(string->utf16 "ab\x010000;\xfdcba;\x10ffff;cd" 'big))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16, errors 1"
|
||
|
(string~? "k\x007f;\x0080;\x07ff;\x0800;\xffff;"
|
||
|
(utf16->string
|
||
|
'#vu8(#x00 #x6b
|
||
|
#x00 #x7f
|
||
|
#x00 #x80
|
||
|
#x07 #xff
|
||
|
#x08 #x00
|
||
|
#xff #xff)
|
||
|
'big))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16, errors 2"
|
||
|
(string~? "k\x007f;\x0080;\x07ff;\x0800;\xffff;"
|
||
|
(utf16->string
|
||
|
'#vu8(#x00 #x6b
|
||
|
#x00 #x7f
|
||
|
#x00 #x80
|
||
|
#x07 #xff
|
||
|
#x08 #x00
|
||
|
#xff #xff)
|
||
|
'big #t))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16, errors 3"
|
||
|
(string~? "k\x007f;\x0080;\x07ff;\x0800;\xffff;"
|
||
|
(utf16->string
|
||
|
'#vu8(#xfe #xff ; big-endian BOM
|
||
|
#x00 #x6b
|
||
|
#x00 #x7f
|
||
|
#x00 #x80
|
||
|
#x07 #xff
|
||
|
#x08 #x00
|
||
|
#xff #xff)
|
||
|
'big))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16, errors 4"
|
||
|
(string~? "k\x007f;\x0080;\x07ff;\x0800;\xffff;"
|
||
|
(utf16->string
|
||
|
'#vu8(#x6b #x00
|
||
|
#x7f #x00
|
||
|
#x80 #x00
|
||
|
#xff #x07
|
||
|
#x00 #x08
|
||
|
#xff #xff)
|
||
|
'little #t))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-16, errors 5"
|
||
|
(string~? "k\x007f;\x0080;\x07ff;\x0800;\xffff;"
|
||
|
(utf16->string
|
||
|
'#vu8(#xff #xfe ; little-endian BOM
|
||
|
#x6b #x00
|
||
|
#x7f #x00
|
||
|
#x80 #x00
|
||
|
#xff #x07
|
||
|
#x00 #x08
|
||
|
#xff #xff)
|
||
|
'big))
|
||
|
#t)
|
||
|
|
||
|
(let ((tostring (lambda (bv) (utf16->string bv 'big)))
|
||
|
(tostring-big (lambda (bv) (utf16->string bv 'big #t)))
|
||
|
(tostring-little (lambda (bv) (utf16->string bv 'little #t)))
|
||
|
(tobvec string->utf16)
|
||
|
(tobvec-big (lambda (s) (string->utf16 s 'big)))
|
||
|
(tobvec-little (lambda (s) (string->utf16 s 'little))))
|
||
|
|
||
|
(do ((i 0 (+ i 1)))
|
||
|
((= i *random-stress-tests*))
|
||
|
(test-roundtrip (random-bytevector2 *random-stress-test-max-size*)
|
||
|
tostring tobvec)
|
||
|
(test-roundtrip (random-bytevector2 *random-stress-test-max-size*)
|
||
|
tostring-big tobvec-big)
|
||
|
(test-roundtrip (random-bytevector2 *random-stress-test-max-size*)
|
||
|
tostring-little tobvec-little)))
|
||
|
|
||
|
(test "utf-32"
|
||
|
(bytevector=? (string->utf32 "abc")
|
||
|
'#vu8(#x00 #x00 #x00 #x61
|
||
|
#x00 #x00 #x00 #x62
|
||
|
#x00 #x00 #x00 #x63))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32be"
|
||
|
(bytevector=? (string->utf32 "abc" 'big)
|
||
|
'#vu8(#x00 #x00 #x00 #x61
|
||
|
#x00 #x00 #x00 #x62
|
||
|
#x00 #x00 #x00 #x63))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32le"
|
||
|
(bytevector=? (string->utf32 "abc" 'little)
|
||
|
'#vu8(#x61 #x00 #x00 #x00
|
||
|
#x62 #x00 #x00 #x00
|
||
|
#x63 #x00 #x00 #x00))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32, errors 1"
|
||
|
(string~? "a\xfffd;b\xfffd;c\xfffd;d\xfffd;e"
|
||
|
(utf32->string
|
||
|
'#vu8(#x00 #x00 #x00 #x61
|
||
|
#x00 #x00 #xd9 #x00
|
||
|
#x00 #x00 #x00 #x62
|
||
|
#x00 #x00 #xdd #xab
|
||
|
#x00 #x00 #x00 #x63
|
||
|
#x00 #x11 #x00 #x00
|
||
|
#x00 #x00 #x00 #x64
|
||
|
#x01 #x00 #x00 #x65
|
||
|
#x00 #x00 #x00 #x65)
|
||
|
'big))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32, errors 2"
|
||
|
(string~? "a\xfffd;b\xfffd;c\xfffd;d\xfffd;e"
|
||
|
(utf32->string
|
||
|
'#vu8(#x00 #x00 #x00 #x61
|
||
|
#x00 #x00 #xd9 #x00
|
||
|
#x00 #x00 #x00 #x62
|
||
|
#x00 #x00 #xdd #xab
|
||
|
#x00 #x00 #x00 #x63
|
||
|
#x00 #x11 #x00 #x00
|
||
|
#x00 #x00 #x00 #x64
|
||
|
#x01 #x00 #x00 #x65
|
||
|
#x00 #x00 #x00 #x65)
|
||
|
'big #t))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32, errors 3"
|
||
|
(string~? "a\xfffd;b\xfffd;c\xfffd;d\xfffd;e"
|
||
|
(utf32->string
|
||
|
'#vu8(#x00 #x00 #xfe #xff ; big-endian BOM
|
||
|
#x00 #x00 #x00 #x61
|
||
|
#x00 #x00 #xd9 #x00
|
||
|
#x00 #x00 #x00 #x62
|
||
|
#x00 #x00 #xdd #xab
|
||
|
#x00 #x00 #x00 #x63
|
||
|
#x00 #x11 #x00 #x00
|
||
|
#x00 #x00 #x00 #x64
|
||
|
#x01 #x00 #x00 #x65
|
||
|
#x00 #x00 #x00 #x65)
|
||
|
'big))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32, errors 4"
|
||
|
(string~? "\xfeff;a\xfffd;b\xfffd;c\xfffd;d\xfffd;e"
|
||
|
(utf32->string
|
||
|
'#vu8(#x00 #x00 #xfe #xff ; big-endian BOM
|
||
|
#x00 #x00 #x00 #x61
|
||
|
#x00 #x00 #xd9 #x00
|
||
|
#x00 #x00 #x00 #x62
|
||
|
#x00 #x00 #xdd #xab
|
||
|
#x00 #x00 #x00 #x63
|
||
|
#x00 #x11 #x00 #x00
|
||
|
#x00 #x00 #x00 #x64
|
||
|
#x01 #x00 #x00 #x65
|
||
|
#x00 #x00 #x00 #x65)
|
||
|
'big #t))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32, errors 5"
|
||
|
(string~? "a\xfffd;b\xfffd;c\xfffd;d\xfffd;e"
|
||
|
(utf32->string
|
||
|
'#vu8(#x61 #x00 #x00 #x00
|
||
|
#x00 #xd9 #x00 #x00
|
||
|
#x62 #x00 #x00 #x00
|
||
|
#xab #xdd #x00 #x00
|
||
|
#x63 #x00 #x00 #x00
|
||
|
#x00 #x00 #x11 #x00
|
||
|
#x64 #x00 #x00 #x00
|
||
|
#x65 #x00 #x00 #x01
|
||
|
#x65 #x00 #x00 #x00)
|
||
|
'little #t))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32, errors 6"
|
||
|
(string~? "a\xfffd;b\xfffd;c\xfffd;d\xfffd;e"
|
||
|
(utf32->string
|
||
|
'#vu8(#xff #xfe #x00 #x00 ; little-endian BOM
|
||
|
#x61 #x00 #x00 #x00
|
||
|
#x00 #xd9 #x00 #x00
|
||
|
#x62 #x00 #x00 #x00
|
||
|
#xab #xdd #x00 #x00
|
||
|
#x63 #x00 #x00 #x00
|
||
|
#x00 #x00 #x11 #x00
|
||
|
#x64 #x00 #x00 #x00
|
||
|
#x65 #x00 #x00 #x01
|
||
|
#x65 #x00 #x00 #x00)
|
||
|
'big))
|
||
|
#t)
|
||
|
|
||
|
(test "utf-32, errors 7"
|
||
|
(string~? "\xfeff;a\xfffd;b\xfffd;c\xfffd;d\xfffd;e"
|
||
|
(utf32->string
|
||
|
'#vu8(#xff #xfe #x00 #x00 ; little-endian BOM
|
||
|
#x61 #x00 #x00 #x00
|
||
|
#x00 #xd9 #x00 #x00
|
||
|
#x62 #x00 #x00 #x00
|
||
|
#xab #xdd #x00 #x00
|
||
|
#x63 #x00 #x00 #x00
|
||
|
#x00 #x00 #x11 #x00
|
||
|
#x64 #x00 #x00 #x00
|
||
|
#x65 #x00 #x00 #x01
|
||
|
#x65 #x00 #x00 #x00)
|
||
|
'little #t))
|
||
|
#t)
|
||
|
|
||
|
(let ((tostring (lambda (bv) (utf32->string bv 'big)))
|
||
|
(tostring-big (lambda (bv) (utf32->string bv 'big #t)))
|
||
|
(tostring-little (lambda (bv) (utf32->string bv 'little #t)))
|
||
|
(tobvec string->utf32)
|
||
|
(tobvec-big (lambda (s) (string->utf32 s 'big)))
|
||
|
(tobvec-little (lambda (s) (string->utf32 s 'little))))
|
||
|
|
||
|
(do ((i 0 (+ i 1)))
|
||
|
((= i *random-stress-tests*))
|
||
|
(test-roundtrip (random-bytevector4 *random-stress-test-max-size*)
|
||
|
tostring tobvec)
|
||
|
(test-roundtrip (random-bytevector4 *random-stress-test-max-size*)
|
||
|
tostring-big tobvec-big)
|
||
|
(test-roundtrip (random-bytevector4 *random-stress-test-max-size*)
|
||
|
tostring-little tobvec-little)))
|
||
|
|
||
|
)
|
||
|
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;
|
||
|
; Exhaustive tests.
|
||
|
;
|
||
|
; Tests string <-> bytevector conversion on strings
|
||
|
; that contain every Unicode scalar value.
|
||
|
;
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
(define (exhaustive-string-bytevector-tests)
|
||
|
|
||
|
; Tests throughout an inclusive range.
|
||
|
|
||
|
(define (test-char-range lo hi tostring tobytevector)
|
||
|
(let* ((n (+ 1 (- hi lo)))
|
||
|
(s (make-string n))
|
||
|
(replacement-character (integer->char #xfffd)))
|
||
|
(do ((i lo (+ i 1)))
|
||
|
((> i hi))
|
||
|
(let ((c (if (or (<= 0 i #xd7ff)
|
||
|
(<= #xe000 i #x10ffff))
|
||
|
(integer->char i)
|
||
|
replacement-character)))
|
||
|
(string-set! s (- i lo) c)))
|
||
|
(test "test of long string conversion"
|
||
|
(string=? (tostring (tobytevector s)) s) #t)))
|
||
|
|
||
|
(define (test-exhaustively name tostring tobytevector)
|
||
|
;(display "Testing ")
|
||
|
;(display name)
|
||
|
;(display " conversions...")
|
||
|
;(newline)
|
||
|
(test-char-range 0 #xffff tostring tobytevector)
|
||
|
(test-char-range #x10000 #x1ffff tostring tobytevector)
|
||
|
(test-char-range #x20000 #x2ffff tostring tobytevector)
|
||
|
(test-char-range #x30000 #x3ffff tostring tobytevector)
|
||
|
(test-char-range #x40000 #x4ffff tostring tobytevector)
|
||
|
(test-char-range #x50000 #x5ffff tostring tobytevector)
|
||
|
(test-char-range #x60000 #x6ffff tostring tobytevector)
|
||
|
(test-char-range #x70000 #x7ffff tostring tobytevector)
|
||
|
(test-char-range #x80000 #x8ffff tostring tobytevector)
|
||
|
(test-char-range #x90000 #x9ffff tostring tobytevector)
|
||
|
(test-char-range #xa0000 #xaffff tostring tobytevector)
|
||
|
(test-char-range #xb0000 #xbffff tostring tobytevector)
|
||
|
(test-char-range #xc0000 #xcffff tostring tobytevector)
|
||
|
(test-char-range #xd0000 #xdffff tostring tobytevector)
|
||
|
(test-char-range #xe0000 #xeffff tostring tobytevector)
|
||
|
(test-char-range #xf0000 #xfffff tostring tobytevector)
|
||
|
(test-char-range #x100000 #x10ffff tostring tobytevector))
|
||
|
|
||
|
; Feel free to replace this with your favorite timing macro.
|
||
|
|
||
|
(define (timeit x) x)
|
||
|
|
||
|
(timeit (test-exhaustively "UTF-8" utf8->string string->utf8))
|
||
|
|
||
|
; NOTE: An unfortunate misunderstanding led to a late deletion
|
||
|
; of single-argument utf16->string from the R6RS. To get the
|
||
|
; correct effect of single-argument utf16->string, you have to
|
||
|
; use two arguments, as below.
|
||
|
;
|
||
|
;(timeit (test-exhaustively "UTF-16" utf16->string string->utf16))
|
||
|
|
||
|
(timeit (test-exhaustively "UTF-16"
|
||
|
(lambda (bv) (utf16->string bv 'big))
|
||
|
string->utf16))
|
||
|
|
||
|
; NOTE: To get the correct effect of two-argument utf16->string,
|
||
|
; you have to use three arguments, as below.
|
||
|
|
||
|
(timeit (test-exhaustively "UTF-16BE"
|
||
|
(lambda (bv) (utf16->string bv 'big #t))
|
||
|
(lambda (s) (string->utf16 s 'big))))
|
||
|
|
||
|
(timeit (test-exhaustively "UTF-16LE"
|
||
|
(lambda (bv) (utf16->string bv 'little #t))
|
||
|
(lambda (s) (string->utf16 s 'little))))
|
||
|
|
||
|
; NOTE: An unfortunate misunderstanding led to a late deletion
|
||
|
; of single-argument utf32->string from the R6RS. To get the
|
||
|
; correct effect of single-argument utf32->string, you have to
|
||
|
; use two arguments, as below.
|
||
|
;
|
||
|
;(timeit (test-exhaustively "UTF-32" utf32->string string->utf32))
|
||
|
|
||
|
(timeit (test-exhaustively "UTF-32"
|
||
|
(lambda (bv) (utf32->string bv 'big))
|
||
|
string->utf32))
|
||
|
|
||
|
; NOTE: To get the correct effect of two-argument utf32->string,
|
||
|
; you have to use three arguments, as below.
|
||
|
|
||
|
(timeit (test-exhaustively "UTF-32BE"
|
||
|
(lambda (bv) (utf32->string bv 'big #t))
|
||
|
(lambda (s) (string->utf32 s 'big))))
|
||
|
|
||
|
(timeit (test-exhaustively "UTF-32LE"
|
||
|
(lambda (bv) (utf32->string bv 'little #t))
|
||
|
(lambda (s) (string->utf32 s 'little)))))
|
||
|
|
||
|
(define (main)
|
||
|
(let* ((count (read))
|
||
|
(input1 (read))
|
||
|
(input2 (read))
|
||
|
(output (read))
|
||
|
(s3 (number->string count))
|
||
|
(s2 (number->string input2))
|
||
|
(s1 (number->string input1))
|
||
|
(name "bv2string"))
|
||
|
(run-r6rs-benchmark
|
||
|
(string-append name ":" s1 ":" s2 ":" s3)
|
||
|
count
|
||
|
(lambda ()
|
||
|
(string-bytevector-tests (hide count count) (hide count input1))
|
||
|
(exhaustive-string-bytevector-tests)
|
||
|
(length failed-tests))
|
||
|
(lambda (result) (equal? result output)))))
|