From 1f352825f5547524e2998c8df9cc666ba9739ae9 Mon Sep 17 00:00:00 2001 From: Abdulaziz Ghuloum Date: Thu, 6 Dec 2007 08:14:05 -0500 Subject: [PATCH] Transcoding to latin-1 now works in the new IO layer. --- lab/io-spec.ss | 109 +++++++++++++++++++++++++++++++++++++------ lab/io-test.ss | 102 ++++++++++++++++++++++++++++++++++++++-- scheme/last-revision | 2 +- scheme/todo-r6rs.ss | 2 +- 4 files changed, 197 insertions(+), 18 deletions(-) diff --git a/lab/io-spec.ss b/lab/io-spec.ss index 94199a3..aacab1f 100644 --- a/lab/io-spec.ss +++ b/lab/io-spec.ss @@ -4,13 +4,15 @@ input-port? textual-port? port-eof? open-bytevector-input-port make-custom-binary-input-port - get-char lookahead-char get-u8 lookahead-u8 close-port) + get-char lookahead-char get-u8 lookahead-u8 close-port + transcoded-port) (import (except (ikarus) input-port? textual-port? port-eof? open-bytevector-input-port make-custom-binary-input-port - get-char lookahead-char get-u8 lookahead-u8 close-port)) + get-char lookahead-char get-u8 lookahead-u8 close-port + transcoded-port)) (define-struct $port (index size buffer base-index codec closed? attrs @@ -72,31 +74,31 @@ ($make-custom-binary-input-port id read! get-position set-position! close 256)) - (define (transcoder-attrs x) - (import (ikarus system $transcoders)) + (define (input-transcoder-attrs x) (cond - [(not x) ;;; binary + [(not x) ;;; binary input port (fxior fast-get-tag fast-get-byte-tag)] - [else - (error 'transcoder-attrs "not handled" x)])) + [(and (eq? 'latin-1-codec (transcoder-codec x)) + (eq? 'none (transcoder-eol-style x))) + (fxior fast-get-tag fast-get-latin-tag)] + [else 0])) (define open-bytevector-input-port (case-lambda [(bv) (open-bytevector-input-port bv #f)] [(bv maybe-transcoder) - (import (ikarus system $transcoders)) (unless (bytevector? bv) (error 'open-bytevector-input-port "not a bytevector" bv)) (when (and maybe-transcoder - (not ($transcoder? maybe-transcoder))) + (not (transcoder? maybe-transcoder))) (error 'open-bytevector-input-port "not a transcoder" maybe-transcoder)) ($make-port 0 (bytevector-length bv) bv 0 maybe-transcoder #f ;;; closed? - (transcoder-attrs maybe-transcoder) + (input-transcoder-attrs maybe-transcoder) "*bytevector-input-port*" (lambda (bv i c) 0) ;;; read! #f ;;; write! @@ -105,6 +107,32 @@ #f ;;; close )])) + (define (transcoded-port p transcoder) + (define who 'transcoded-port) + (unless (transcoder? transcoder) + (error who "not a transcoder" transcoder)) + (unless ($port? p) (error who "not a port" p)) + (when ($port-codec p) (error who "not a binary port" p)) + (let ([read! ($port-read! p)] + [closed? ($port-closed? p)]) + ($set-port-closed?! p #t) + ($make-port + ($port-index p) + ($port-size p) + ($port-buffer p) + ($port-base-index p) + transcoder + closed? + (if read! (input-transcoder-attrs transcoder) 0) + ($port-id p) + read! + ($port-write! p) + ($port-get-position p) + ($port-set-position! p) + ($port-close p)))) + + + (define (input-port? p) (and ($port? p) ($port-read! p) @@ -136,11 +164,66 @@ ;;; ---------------------------------------------------------- (module (get-char lookahead-char) + (define (refill-bv-start p who) + (when ($port-closed? p) (error who "port is closed" p)) + (let* ([bv ($port-buffer p)] + [n (bytevector-length bv)]) + (let ([j (($port-read! p) bv 0 n)]) + (unless (fixnum? j) + (error who "invalid return value from read! procedure" j)) + (cond + [(fx>= j 0) + (unless (fx<= j n) + (error who "read! returned a value out of range" j)) + ($set-port-index! p 0) + ($set-port-size! p j) + j] + [else + (error who "read! returned a value out of range" j)])))) + (define (get-char-latin-mode p who idx) + (let ([n (refill-bv-start p who)]) + (cond + [(fx= n 0) (eof-object)] + [else + ($set-port-index! p idx) + (integer->char (bytevector-u8-ref ($port-buffer p) 0))]))) (define-rrr get-char-utf8-mode) - (define-rrr get-char-latin-mode) (define-rrr get-char-char-mode) (define-rrr slow-get-char) - (define-rrr lookahead-char) + (define-rrr slow-lookahead-char) + (define-rrr lookahead-char-utf8-mode) + (define-rrr lookahead-char-char-mode) + ;;; + (define (lookahead-char p) + (define who 'lookahead-char) + (let ([m ($port-get-mode p)]) + (cond + [(eq? m fast-get-utf8-tag) + (let ([i ($port-index p)]) + (cond + [(fx< i ($port-size p)) + (let ([b (bytevector-u8-ref ($port-buffer p) i)]) + (cond + [(fx< b 128) (integer->char b)] + [else (lookahead-char-utf8-mode p)]))] + [else + (lookahead-char-utf8-mode p who)]))] + [(eq? m fast-get-char-tag) + (let ([i ($port-index p)]) + (cond + [(fx< i ($port-size p)) + (string-ref ($port-buffer p) i)] + [else + (lookahead-char-char-mode p who)]))] + [(eq? m fast-get-latin-tag) + (let ([i ($port-index p)]) + (cond + [(fx< i ($port-size p)) + (integer->char + (bytevector-u8-ref ($port-buffer p) i))] + [else + (get-char-latin-mode p who 0)]))] + [else (slow-lookahead-char p who)]))) ;;; (define (get-char p) (define who 'get-char) @@ -174,7 +257,7 @@ (integer->char (bytevector-u8-ref ($port-buffer p) i))] [else - (get-char-latin-mode p who)]))] + (get-char-latin-mode p who 1)]))] [else (slow-get-char p who)])))) ;;; ---------------------------------------------------------- diff --git a/lab/io-test.ss b/lab/io-test.ss index b9af090..0fc4365 100755 --- a/lab/io-test.ss +++ b/lab/io-test.ss @@ -34,6 +34,25 @@ (f (+ i 1)))) (open-bytevector-input-port bv))) +(define (make-ascii-range-bytevector) + (let ([bv (make-bytevector 128)]) + (let f ([i 0]) + (unless (= i 128) + (bytevector-u8-set! bv i i) + (f (+ i 1)))) + bv)) + +(define (make-ascii-range-bytevector+utf8-bom) + (let ([bv (make-bytevector (+ 128 3))]) + (bytevector-u8-set! bv 0 #xEF) + (bytevector-u8-set! bv 1 #xBB) + (bytevector-u8-set! bv 2 #xBF) + (let f ([i 0]) + (unless (= i 128) + (bytevector-u8-set! bv (+ i 3) i) + (f (+ i 1)))) + bv)) + (define (test-get-u8-1 p n) (let f ([i 0]) (let ([x (get-u8 p)]) @@ -45,6 +64,18 @@ [else (error 'test0 "incorrect value returned" x)])))) +(define (test-get-char-1 p n) + (let f ([i 0]) + (let ([x (get-char p)]) + (cond + [(eof-object? x) + (unless (= i n) + (error 'test0 "premature termination" i))] + [(= (char->integer x) i) (f (+ i 1))] + [else + (error 'test0 "incorrect value returned" x)])))) + + (define (test-peek-u8-1 p n) (let f ([i 0]) (let* ([px (lookahead-u8 p)] @@ -58,7 +89,20 @@ [else (error #f "incorrect value returned" x i)])))) -(define (test-port-eof?-1 p n) +(define (test-peek-char-1 p n) + (let f ([i 0]) + (let* ([px (lookahead-char p)] + [x (get-char p)]) + (cond + [(not (eqv? px x)) (error #f "peek invalid" px x)] + [(eof-object? x) + (unless (= i n) + (error #f "premature termination" i))] + [(= (char->integer x) i) (f (+ i 1))] + [else + (error #f "incorrect value returned" x i)])))) + +(define (test-binary-port-eof?-1 p n) (let f ([i 0]) (cond [(port-eof? p) @@ -70,6 +114,18 @@ [else (error #f "incorrect value returned" i)]))) +(define (test-textual-port-eof?-1 p n) + (let f ([i 0]) + (cond + [(port-eof? p) + (unless (= i n) + (error #f "premature termination" i)) + (assert (eof-object? (lookahead-char p))) + (assert (eof-object? (get-char p)))] + [(= (char->integer (get-char p)) i) (f (+ i 1))] + [else + (error #f "incorrect value returned" i)]))) + (test "reading 256 bytes in ascending order" (test-get-u8-1 (make-n-byte-custom-binary-input-port 256) 256)) @@ -93,7 +149,7 @@ (test-peek-u8-1 (make-n-byte-custom-binary-input-port 256) 256)) (test "custom-binary-port port-eof?" - (test-port-eof?-1 (make-n-byte-custom-binary-input-port 256) 256)) + (test-binary-port-eof?-1 (make-n-byte-custom-binary-input-port 256) 256)) ;;; (test "reading 256 bytes from bytevector-input-port" @@ -103,6 +159,46 @@ (test-peek-u8-1 (make-n-byte-bytevector-binary-input-port 256) 256)) (test "bytevector-binary-port port-eof?" - (test-port-eof?-1 (make-n-byte-bytevector-binary-input-port 256) 256)) + (test-binary-port-eof?-1 (make-n-byte-bytevector-binary-input-port 256) 256)) + +;;; + +(test "reading 256 latin1 chars from bytevector-input-port" + (test-get-char-1 + (transcoded-port (make-n-byte-bytevector-binary-input-port 256) + (make-transcoder (latin-1-codec) 'none 'raise)) + 256)) + +(test "peeking 256 bytes from latin1 transcoded port" + (test-peek-char-1 + (transcoded-port (make-n-byte-bytevector-binary-input-port 256) + (make-transcoder (latin-1-codec) 'none 'raise)) + 256)) + +(test "latin1 transcoded port port-eof?" + (test-textual-port-eof?-1 + (transcoded-port (make-n-byte-bytevector-binary-input-port 256) + (make-transcoder (latin-1-codec) 'none 'raise)) + 256)) + +;;; + +(test "reading 128 utf8 chars from bytevector-input-port" + (test-get-char-1 + (open-bytevector-input-port (make-ascii-range-bytevector) + (make-transcoder (utf-8-codec) 'none 'raise)) + 128)) + +(test "peeking 128 chars from utf8 port" + (test-peek-char-1 + (open-bytevector-input-port (make-ascii-range-bytevector) + (make-transcoder (utf-8-codec) 'none 'raise)) + 128)) + +(test "utf8 transcoded port port-eof?" + (test-textual-port-eof?-1 + (open-bytevector-input-port (make-ascii-range-bytevector) + (make-transcoder (utf-8-codec) 'none 'raise)) + 128)) diff --git a/scheme/last-revision b/scheme/last-revision index 0b2a2c0..f37c021 100644 --- a/scheme/last-revision +++ b/scheme/last-revision @@ -1 +1 @@ -1190 +1191 diff --git a/scheme/todo-r6rs.ss b/scheme/todo-r6rs.ss index 4c6ac2d..a1bd5b4 100755 --- a/scheme/todo-r6rs.ss +++ b/scheme/todo-r6rs.ss @@ -855,7 +855,7 @@ (define (print-ids ls) (unless (null? ls) (let-values ([(ls rest) - (split ls 80)]) + (split ls 72)]) (for-each display ls) (newline) (print-ids rest))))