Transcoding to latin-1 now works in the new IO layer.

This commit is contained in:
Abdulaziz Ghuloum 2007-12-06 08:14:05 -05:00
parent 1469932f3d
commit 1f352825f5
4 changed files with 197 additions and 18 deletions

View File

@ -4,13 +4,15 @@
input-port? textual-port? port-eof?
open-bytevector-input-port
make-custom-binary-input-port
get-char lookahead-char get-u8 lookahead-u8 close-port)
get-char lookahead-char get-u8 lookahead-u8 close-port
transcoded-port)
(import
(except (ikarus)
input-port? textual-port? port-eof?
open-bytevector-input-port
make-custom-binary-input-port
get-char lookahead-char get-u8 lookahead-u8 close-port))
get-char lookahead-char get-u8 lookahead-u8 close-port
transcoded-port))
(define-struct $port
(index size buffer base-index codec closed? attrs
@ -72,31 +74,31 @@
($make-custom-binary-input-port id read! get-position
set-position! close 256))
(define (transcoder-attrs x)
(import (ikarus system $transcoders))
(define (input-transcoder-attrs x)
(cond
[(not x) ;;; binary
[(not x) ;;; binary input port
(fxior fast-get-tag fast-get-byte-tag)]
[else
(error 'transcoder-attrs "not handled" x)]))
[(and (eq? 'latin-1-codec (transcoder-codec x))
(eq? 'none (transcoder-eol-style x)))
(fxior fast-get-tag fast-get-latin-tag)]
[else 0]))
(define open-bytevector-input-port
(case-lambda
[(bv) (open-bytevector-input-port bv #f)]
[(bv maybe-transcoder)
(import (ikarus system $transcoders))
(unless (bytevector? bv)
(error 'open-bytevector-input-port
"not a bytevector" bv))
(when (and maybe-transcoder
(not ($transcoder? maybe-transcoder)))
(not (transcoder? maybe-transcoder)))
(error 'open-bytevector-input-port
"not a transcoder" maybe-transcoder))
($make-port 0 (bytevector-length bv) bv 0
maybe-transcoder
#f ;;; closed?
(transcoder-attrs maybe-transcoder)
(input-transcoder-attrs maybe-transcoder)
"*bytevector-input-port*"
(lambda (bv i c) 0) ;;; read!
#f ;;; write!
@ -105,6 +107,32 @@
#f ;;; close
)]))
(define (transcoded-port p transcoder)
(define who 'transcoded-port)
(unless (transcoder? transcoder)
(error who "not a transcoder" transcoder))
(unless ($port? p) (error who "not a port" p))
(when ($port-codec p) (error who "not a binary port" p))
(let ([read! ($port-read! p)]
[closed? ($port-closed? p)])
($set-port-closed?! p #t)
($make-port
($port-index p)
($port-size p)
($port-buffer p)
($port-base-index p)
transcoder
closed?
(if read! (input-transcoder-attrs transcoder) 0)
($port-id p)
read!
($port-write! p)
($port-get-position p)
($port-set-position! p)
($port-close p))))
(define (input-port? p)
(and ($port? p)
($port-read! p)
@ -136,11 +164,66 @@
;;; ----------------------------------------------------------
(module (get-char lookahead-char)
(define (refill-bv-start p who)
(when ($port-closed? p) (error who "port is closed" p))
(let* ([bv ($port-buffer p)]
[n (bytevector-length bv)])
(let ([j (($port-read! p) bv 0 n)])
(unless (fixnum? j)
(error who "invalid return value from read! procedure" j))
(cond
[(fx>= j 0)
(unless (fx<= j n)
(error who "read! returned a value out of range" j))
($set-port-index! p 0)
($set-port-size! p j)
j]
[else
(error who "read! returned a value out of range" j)]))))
(define (get-char-latin-mode p who idx)
(let ([n (refill-bv-start p who)])
(cond
[(fx= n 0) (eof-object)]
[else
($set-port-index! p idx)
(integer->char (bytevector-u8-ref ($port-buffer p) 0))])))
(define-rrr get-char-utf8-mode)
(define-rrr get-char-latin-mode)
(define-rrr get-char-char-mode)
(define-rrr slow-get-char)
(define-rrr lookahead-char)
(define-rrr slow-lookahead-char)
(define-rrr lookahead-char-utf8-mode)
(define-rrr lookahead-char-char-mode)
;;;
(define (lookahead-char p)
(define who 'lookahead-char)
(let ([m ($port-get-mode p)])
(cond
[(eq? m fast-get-utf8-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
(let ([b (bytevector-u8-ref ($port-buffer p) i)])
(cond
[(fx< b 128) (integer->char b)]
[else (lookahead-char-utf8-mode p)]))]
[else
(lookahead-char-utf8-mode p who)]))]
[(eq? m fast-get-char-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
(string-ref ($port-buffer p) i)]
[else
(lookahead-char-char-mode p who)]))]
[(eq? m fast-get-latin-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
(integer->char
(bytevector-u8-ref ($port-buffer p) i))]
[else
(get-char-latin-mode p who 0)]))]
[else (slow-lookahead-char p who)])))
;;;
(define (get-char p)
(define who 'get-char)
@ -174,7 +257,7 @@
(integer->char
(bytevector-u8-ref ($port-buffer p) i))]
[else
(get-char-latin-mode p who)]))]
(get-char-latin-mode p who 1)]))]
[else (slow-get-char p who)]))))
;;; ----------------------------------------------------------

View File

@ -34,6 +34,25 @@
(f (+ i 1))))
(open-bytevector-input-port bv)))
(define (make-ascii-range-bytevector)
(let ([bv (make-bytevector 128)])
(let f ([i 0])
(unless (= i 128)
(bytevector-u8-set! bv i i)
(f (+ i 1))))
bv))
(define (make-ascii-range-bytevector+utf8-bom)
(let ([bv (make-bytevector (+ 128 3))])
(bytevector-u8-set! bv 0 #xEF)
(bytevector-u8-set! bv 1 #xBB)
(bytevector-u8-set! bv 2 #xBF)
(let f ([i 0])
(unless (= i 128)
(bytevector-u8-set! bv (+ i 3) i)
(f (+ i 1))))
bv))
(define (test-get-u8-1 p n)
(let f ([i 0])
(let ([x (get-u8 p)])
@ -45,6 +64,18 @@
[else
(error 'test0 "incorrect value returned" x)]))))
(define (test-get-char-1 p n)
(let f ([i 0])
(let ([x (get-char p)])
(cond
[(eof-object? x)
(unless (= i n)
(error 'test0 "premature termination" i))]
[(= (char->integer x) i) (f (+ i 1))]
[else
(error 'test0 "incorrect value returned" x)]))))
(define (test-peek-u8-1 p n)
(let f ([i 0])
(let* ([px (lookahead-u8 p)]
@ -58,7 +89,20 @@
[else
(error #f "incorrect value returned" x i)]))))
(define (test-port-eof?-1 p n)
(define (test-peek-char-1 p n)
(let f ([i 0])
(let* ([px (lookahead-char p)]
[x (get-char p)])
(cond
[(not (eqv? px x)) (error #f "peek invalid" px x)]
[(eof-object? x)
(unless (= i n)
(error #f "premature termination" i))]
[(= (char->integer x) i) (f (+ i 1))]
[else
(error #f "incorrect value returned" x i)]))))
(define (test-binary-port-eof?-1 p n)
(let f ([i 0])
(cond
[(port-eof? p)
@ -70,6 +114,18 @@
[else
(error #f "incorrect value returned" i)])))
(define (test-textual-port-eof?-1 p n)
(let f ([i 0])
(cond
[(port-eof? p)
(unless (= i n)
(error #f "premature termination" i))
(assert (eof-object? (lookahead-char p)))
(assert (eof-object? (get-char p)))]
[(= (char->integer (get-char p)) i) (f (+ i 1))]
[else
(error #f "incorrect value returned" i)])))
(test "reading 256 bytes in ascending order"
(test-get-u8-1 (make-n-byte-custom-binary-input-port 256) 256))
@ -93,7 +149,7 @@
(test-peek-u8-1 (make-n-byte-custom-binary-input-port 256) 256))
(test "custom-binary-port port-eof?"
(test-port-eof?-1 (make-n-byte-custom-binary-input-port 256) 256))
(test-binary-port-eof?-1 (make-n-byte-custom-binary-input-port 256) 256))
;;;
(test "reading 256 bytes from bytevector-input-port"
@ -103,6 +159,46 @@
(test-peek-u8-1 (make-n-byte-bytevector-binary-input-port 256) 256))
(test "bytevector-binary-port port-eof?"
(test-port-eof?-1 (make-n-byte-bytevector-binary-input-port 256) 256))
(test-binary-port-eof?-1 (make-n-byte-bytevector-binary-input-port 256) 256))
;;;
(test "reading 256 latin1 chars from bytevector-input-port"
(test-get-char-1
(transcoded-port (make-n-byte-bytevector-binary-input-port 256)
(make-transcoder (latin-1-codec) 'none 'raise))
256))
(test "peeking 256 bytes from latin1 transcoded port"
(test-peek-char-1
(transcoded-port (make-n-byte-bytevector-binary-input-port 256)
(make-transcoder (latin-1-codec) 'none 'raise))
256))
(test "latin1 transcoded port port-eof?"
(test-textual-port-eof?-1
(transcoded-port (make-n-byte-bytevector-binary-input-port 256)
(make-transcoder (latin-1-codec) 'none 'raise))
256))
;;;
(test "reading 128 utf8 chars from bytevector-input-port"
(test-get-char-1
(open-bytevector-input-port (make-ascii-range-bytevector)
(make-transcoder (utf-8-codec) 'none 'raise))
128))
(test "peeking 128 chars from utf8 port"
(test-peek-char-1
(open-bytevector-input-port (make-ascii-range-bytevector)
(make-transcoder (utf-8-codec) 'none 'raise))
128))
(test "utf8 transcoded port port-eof?"
(test-textual-port-eof?-1
(open-bytevector-input-port (make-ascii-range-bytevector)
(make-transcoder (utf-8-codec) 'none 'raise))
128))

View File

@ -1 +1 @@
1190
1191

View File

@ -855,7 +855,7 @@
(define (print-ids ls)
(unless (null? ls)
(let-values ([(ls rest)
(split ls 80)])
(split ls 72)])
(for-each display ls)
(newline)
(print-ids rest))))