ikarus/lab/io-spec.ss

651 lines
24 KiB
Scheme

(library (io-spec)
(export
input-port? textual-port? port-eof?
open-bytevector-input-port
open-string-input-port
make-custom-binary-input-port
get-char lookahead-char get-u8 lookahead-u8 close-port
transcoded-port)
(import
(except (ikarus)
input-port? textual-port? port-eof?
open-bytevector-input-port
open-string-input-port
make-custom-binary-input-port
get-char lookahead-char get-u8 lookahead-u8 close-port
transcoded-port))
(define-struct $port
(index size buffer base-index transcoder closed? attrs
id read! write! get-position set-position! close))
(define $set-port-index! set-$port-index!)
(define $set-port-size! set-$port-size!)
(define $set-port-attrs! set-$port-attrs!)
(define $set-port-closed?! set-$port-closed?!)
(define $make-port make-$port)
(define fast-get-tag #x0001)
(define fast-put-tag #x0002)
(define fast-get-position-tag #x0004)
(define fast-get-mask #x00F0)
(define fast-get-utf8-tag #x0010)
(define fast-get-latin-tag #x0030)
(define fast-get-byte-tag #x0040)
(define fast-get-char-tag #x0080)
(define fast-put-mask #x0F00)
(define fast-put-utf8-tag #x0100)
(define fast-put-latin-tag #x0300)
(define fast-put-byte-tag #x0400)
(define fast-put-char-tag #x0800)
(define r6rs-mode-tag #x1000)
(define ($port-get-mode x)
(and ($port? x) (fxand ($port-attrs x) fast-get-mask)))
(define ($port-put-mode x)
(and ($port? x) (fxand ($port-attrs x) fast-put-mask)))
(define (u8? x)
(and (fixnum? x) (fx>= x 0) (fx< x 256)))
;;; everything above this line will turn into primitive
;;; ----------------------------------------------------------
(define ($make-custom-binary-input-port id
read! get-position set-position! close buffer-size)
(let ([bv (make-bytevector buffer-size)])
($make-port 0 0 bv 0 #f #f 0 id read! #f get-position
set-position! close)))
(define (make-custom-binary-input-port id
read! get-position set-position! close)
;;; FIXME: get-position and set-position! are ignored for now
(define who 'make-custom-binary-input-port)
(unless (string? id)
(error who "id is not a string" id))
(unless (procedure? read!)
(error who "read! is not a procedure" read!))
(unless (or (procedure? close) (not close))
(error who "close should be either a procedure or #f" close))
($make-custom-binary-input-port id read! get-position
set-position! close 256))
(define (input-transcoder-attrs x)
(cond
[(not x) ;;; binary input port
(fxior fast-get-tag fast-get-byte-tag)]
[(and (eq? 'latin-1-codec (transcoder-codec x))
(eq? 'none (transcoder-eol-style x)))
(fxior fast-get-tag fast-get-latin-tag)]
[else 0]))
(define open-bytevector-input-port
(case-lambda
[(bv) (open-bytevector-input-port bv #f)]
[(bv maybe-transcoder)
(unless (bytevector? bv)
(error 'open-bytevector-input-port
"not a bytevector" bv))
(when (and maybe-transcoder
(not (transcoder? maybe-transcoder)))
(error 'open-bytevector-input-port
"not a transcoder" maybe-transcoder))
($make-port 0 (bytevector-length bv) bv 0
maybe-transcoder
#f ;;; closed?
(input-transcoder-attrs maybe-transcoder)
"*bytevector-input-port*"
(lambda (bv i c) 0) ;;; read!
#f ;;; write!
#f ;;; FIXME: get-position
#f ;;; FIXME: set-position!
#f ;;; close
)]))
(define (transcoded-port p transcoder)
(define who 'transcoded-port)
(unless (transcoder? transcoder)
(error who "not a transcoder" transcoder))
(unless ($port? p) (error who "not a port" p))
(when ($port-transcoder p) (error who "not a binary port" p))
(let ([read! ($port-read! p)]
[closed? ($port-closed? p)])
($set-port-closed?! p #t)
($make-port
($port-index p)
($port-size p)
($port-buffer p)
($port-base-index p)
transcoder
closed?
(if read! (input-transcoder-attrs transcoder) 0)
($port-id p)
read!
($port-write! p)
($port-get-position p)
($port-set-position! p)
($port-close p))))
(define (input-port? p)
(and ($port? p)
($port-read! p)
#t))
(define (textual-port? p)
(and ($port? p)
($port-transcoder p)
#t))
(define (close-port p)
(cond
[(not ($port? p))
(error 'close-port "not a port" p)]
[($port-closed? p) (void)]
[else
(when ($port-write! p)
(flush-output-port p))
($set-port-closed?! p #t)
(let ([close ($port-close p)])
(when (procedure? close)
(close)))]))
(define-syntax define-rrr
(syntax-rules ()
[(_ name)
(define (name . args)
(apply error 'name "not implemented" args))]))
;;; ----------------------------------------------------------
(module (get-char lookahead-char)
(define (refill-bv-start p who)
(when ($port-closed? p) (error who "port is closed" p))
(let* ([bv ($port-buffer p)]
[n (bytevector-length bv)])
(let ([j (($port-read! p) bv 0 n)])
(unless (fixnum? j)
(error who "invalid return value from read! procedure" j))
(cond
[(fx>= j 0)
(unless (fx<= j n)
(error who "read! returned a value out of range" j))
($set-port-index! p 0)
($set-port-size! p j)
j]
[else
(error who "read! returned a value out of range" j)]))))
(define (refill-bv-buffer p who)
(when ($port-closed? p) (error who "port is closed" p))
(let ([bv ($port-buffer p)] [i ($port-index p)] [j ($port-size p)])
(let ([c0 (fx- j i)])
(bytevector-copy! bv i bv 0 c0)
(let* ([max (fx- (bytevector-length bv) c0)]
[c1 (($port-read! p) bv c0 max)])
(unless (fixnum? c1)
(error who "invalid return value from read! procedure" c1))
(cond
[(fx>= j 0)
(unless (fx<= j max)
(error who "read! returned a value out of range" j))
($set-port-index! p c0)
($set-port-size! p (fx+ c1 c0))
c1]
[else
(error who "read! returned a value out of range" c1)])))))
(define (get-char-latin-mode p who idx)
(let ([n (refill-bv-start p who)])
(cond
[(fx= n 0) (eof-object)]
[else
($set-port-index! p idx)
(integer->char (bytevector-u8-ref ($port-buffer p) 0))])))
(define (get-char-utf8-mode p who)
(define (do-error p who)
(case (transcoder-error-handling-mode ($port-transcoder p))
[(ignore) (get-char p)]
[(replace) #\xFFFD]
[(raise)
(raise (make-i/o-decoding-error p))]
[else (error who "cannot happen")]))
(let ([i ($port-index p)]
[j ($port-size p)]
[buf ($port-buffer p)])
(cond
[(fx= i j) ;;; exhausted
(let ([bytes (refill-bv-buffer p who)])
(cond
[(fx= bytes 0) (eof-object)]
[else (get-char p)]))]
[else
(let ([b0 (bytevector-u8-ref buf i)])
(cond
[(fx= (fxsra b0 5) #b110) ;;; two-byte-encoding
(let ([i (fx+ i 1)])
(cond
[(fx< i j)
(let ([b1 (bytevector-u8-ref buf i)])
(cond
[(fx= (fxsra b1 6) #b10)
($set-port-index! p (fx+ i 1))
(integer->char
(fxior (fxand b1 #b111111)
(fxsll (fxand b0 #b11111) 6)))]
[else
($set-port-index! p i)
(do-error p who)]))]
[else
(let ([bytes (refill-bv-buffer p who)])
(cond
[(fx= bytes 0)
($set-port-index! p (fx+ ($port-index p) 1))
(do-error p who)]
[else (get-char-utf8-mode p who)]))]))]
[(fx= (fxsra b0 4) #b1110) ;;; three-byte-encoding
(cond
[(fx< (fx+ i 2) j)
(let ([b1 (bytevector-u8-ref buf (fx+ i 1))]
[b2 (bytevector-u8-ref buf (fx+ i 2))])
(cond
[(fx= (fxsra (fxlogor b1 b2) 6) #b10)
(let ([n (fxlogor
(fxsll (fxand b0 #b1111) 12)
(fxsll (fxand b1 #b111111) 6)
(fxand b2 #b111111))])
(cond
[(fx<= #xD800 n #xDFFF)
($set-port-index! p (fx+ i 1))
(do-error p who)]
[else
($set-port-index! p (fx+ i 3))
(integer->char n)]))]
[else
($set-port-index! p (fx+ i 1))
(do-error p who)]))]
[else
(let ([bytes (refill-bv-buffer p who)])
(cond
[(fx= bytes 0)
($set-port-index! p (fx+ ($port-index p) 1))
(do-error p who)]
[else (get-char-utf8-mode p who)]))])]
[(fx= (fxsra b0 3) #b11110) ;;; four-byte-encoding
(cond
[(fx< (fx+ i 3) j)
(let ([b1 (bytevector-u8-ref buf (fx+ i 1))]
[b2 (bytevector-u8-ref buf (fx+ i 2))]
[b3 (bytevector-u8-ref buf (fx+ i 3))])
(cond
[(fx= (fxsra (fxlogor b1 b2 b3) 6) #b10)
(let ([n (fxlogor
(fxsll (fxand b0 #b111) 18)
(fxsll (fxand b1 #b111111) 12)
(fxsll (fxand b2 #b111111) 6)
(fxand b3 #b111111))])
(cond
[(fx<= #x10000 n #x10FFFF)
($set-port-index! p (fx+ i 4))
(integer->char n)]
[else
($set-port-index! p (fx+ i 1))
(do-error p who)]))]
[else
($set-port-index! p (fx+ i 1))
(do-error p who)]))]
[else
(let ([bytes (refill-bv-buffer p who)])
(cond
[(fx= bytes 0)
($set-port-index! p (fx+ ($port-index p) 1))
(do-error p who)]
[else (get-char-utf8-mode p who)]))])]
[else
($set-port-index! p (fx+ i 1))
(do-error p who)]))])))
(define (lookahead-char-utf8-mode p who)
(define (do-error p who)
(case (transcoder-error-handling-mode ($port-transcoder p))
[(ignore) (get-char p)]
[(replace) #\xFFFD]
[(raise)
(raise (make-i/o-decoding-error p))]
[else (error who "cannot happen")]))
(let ([i ($port-index p)]
[j ($port-size p)]
[buf ($port-buffer p)])
(cond
[(fx= i j) ;;; exhausted
(let ([bytes (refill-bv-buffer p who)])
(cond
[(fx= bytes 0) (eof-object)]
[else (lookahead-char p)]))]
[else
(let ([b0 (bytevector-u8-ref buf i)])
(cond
[(fx= (fxsra b0 5) #b110) ;;; two-byte-encoding
(let ([i (fx+ i 1)])
(cond
[(fx< i j)
(let ([b1 (bytevector-u8-ref buf i)])
(cond
[(fx= (fxsra b1 6) #b10)
(integer->char
(fxior (fxand b1 #b111111)
(fxsll (fxand b0 #b11111) 6)))]
[else
(do-error p who)]))]
[else
(let ([bytes (refill-bv-buffer p who)])
(cond
[(fx= bytes 0) (do-error p who)]
[else (lookahead-char-utf8-mode p who)]))]))]
[(fx= (fxsra b0 4) #b1110) ;;; three-byte-encoding
(cond
[(fx< (fx+ i 2) j)
(let ([b1 (bytevector-u8-ref buf (fx+ i 1))]
[b2 (bytevector-u8-ref buf (fx+ i 2))])
(cond
[(fx= (fxsra (fxlogor b1 b2) 6) #b10)
(let ([n (fxlogor
(fxsll (fxand b0 #b1111) 12)
(fxsll (fxand b1 #b111111) 6)
(fxand b2 #b111111))])
(cond
[(fx<= #xD800 n #xDFFF) (do-error p who)]
[else (integer->char n)]))]
[else (do-error p who)]))]
[else
(let ([bytes (refill-bv-buffer p who)])
(cond
[(fx= bytes 0) (do-error p who)]
[else (lookahead-char-utf8-mode p who)]))])]
[(fx= (fxsra b0 3) #b11110) ;;; four-byte-encoding
(cond
[(fx< (fx+ i 3) j)
(let ([b1 (bytevector-u8-ref buf (fx+ i 1))]
[b2 (bytevector-u8-ref buf (fx+ i 2))]
[b3 (bytevector-u8-ref buf (fx+ i 3))])
(cond
[(fx= (fxsra (fxlogor b1 b2 b3) 6) #b10)
(let ([n (fxlogor
(fxsll (fxand b0 #b111) 18)
(fxsll (fxand b1 #b111111) 12)
(fxsll (fxand b2 #b111111) 6)
(fxand b3 #b111111))])
(cond
[(fx<= #x10000 n #x10FFFF)
(integer->char n)]
[else
(do-error p who)]))]
[else
(do-error p who)]))]
[else
(let ([bytes (refill-bv-buffer p who)])
(cond
[(fx= bytes 0)
(do-error p who)]
[else (get-char-utf8-mode p who)]))])]
[else (do-error p who)]))])))
(define-rrr get-char-char-mode)
(define (advance-utf8-bom p who)
(let ([i ($port-index p)]
[j ($port-size p)]
[buf ($port-buffer p)])
(cond
[(fx< (fx+ i 3) j)
(when (and (fx=? (bytevector-u8-ref buf i) #xEF)
(fx=? (bytevector-u8-ref buf i) #xBB)
(fx=? (bytevector-u8-ref buf i) #xBF))
($set-port-index! p (fx+ i 3)))]
[else
(let ([c (fx- j i)])
(bytevector-copy! buf i buf 0 c)
(let ([read! ($port-read! p)])
(let ([c1 (read! buf c (fx- (bytevector-length buf) c))])
($set-port-index! p c)
($set-port-size! p (fx+ c c1))
(unless (fx= c1 0)
(advance-utf8-bom p who)))))])))
(define (speedup-input-port p who)
(unless (input-port? p)
(error who "not an input port" p))
(let ([tr ($port-transcoder p)])
(unless tr
(error who "not a textual port" p))
(case (transcoder-codec tr)
[(utf-8-codec)
;;;
(advance-utf8-bom p who)
($set-port-attrs! p
(fxior fast-get-tag fast-get-utf8-tag))]
[else (error 'slow-get-char "codec not handled")])))
(define-rrr slow-lookahead-char)
(define-rrr lookahead-char-char-mode)
;;;
(define (lookahead-char p)
(define who 'lookahead-char)
(let ([m ($port-get-mode p)])
(cond
[(eq? m fast-get-utf8-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
(let ([b (bytevector-u8-ref ($port-buffer p) i)])
(cond
[(fx< b 128) (integer->char b)]
[else (lookahead-char-utf8-mode p who)]))]
[else
(lookahead-char-utf8-mode p who)]))]
[(eq? m fast-get-char-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
(string-ref ($port-buffer p) i)]
[else
(lookahead-char-char-mode p who)]))]
[(eq? m fast-get-latin-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
(integer->char
(bytevector-u8-ref ($port-buffer p) i))]
[else
(get-char-latin-mode p who 0)]))]
[else
(speedup-input-port p who)
(lookahead-char p)])))
;;;
(define (get-char p)
(define who 'get-char)
(let ([m ($port-get-mode p)])
(cond
[(eq? m fast-get-utf8-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
(let ([b (bytevector-u8-ref ($port-buffer p) i)])
(cond
[(fx< b 128)
($set-port-index! p (fx+ i 1))
(integer->char b)]
[else (get-char-utf8-mode p who)]))]
[else
(get-char-utf8-mode p who)]))]
[(eq? m fast-get-char-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
($set-port-index! p (fx+ i 1))
(string-ref ($port-buffer p) i)]
[else
(get-char-char-mode p who)]))]
[(eq? m fast-get-latin-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
($set-port-index! p (fx+ i 1))
(integer->char
(bytevector-u8-ref ($port-buffer p) i))]
[else
(get-char-latin-mode p who 1)]))]
[else
(speedup-input-port p who)
(get-char p)]))))
;;; ----------------------------------------------------------
(define (assert-binary-input-port p who)
(unless ($port? p) (error who "not a port" p))
(when ($port-closed? p) (error who "port is closed" p))
(when ($port-transcoder p) (error who "port is not binary" p))
(unless ($port-read! p)
(error who "port is not an input port" p)))
(module (get-u8 lookahead-u8)
(define (get-u8-byte-mode p who start)
(when ($port-closed? p) (error who "port is closed" p))
(let* ([bv ($port-buffer p)]
[n (bytevector-length bv)])
(let ([j (($port-read! p) bv 0 n)])
(unless (fixnum? j)
(error who "invalid return value from read! procedure" j))
(cond
[(fx> j 0)
(unless (fx<= j n)
(error who "read! returned a value out of range" j))
($set-port-index! p start)
($set-port-size! p j)
(bytevector-u8-ref bv 0)]
[(fx= j 0) (eof-object)]
[else
(error who "read! returned a value out of range" j)]))))
(define (slow-get-u8 p who start)
(assert-binary-input-port p who)
($set-port-attrs! p (fxior fast-get-tag fast-get-byte-tag))
(get-u8-byte-mode p who start))
;;;
(define (get-u8 p)
(define who 'get-u8)
(let ([m ($port-get-mode p)])
(cond
[(eq? m fast-get-byte-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
($set-port-index! p (fx+ i 1))
(bytevector-u8-ref ($port-buffer p) i)]
[else (get-u8-byte-mode p who 1)]))]
[else (slow-get-u8 p who 1)])))
(define (lookahead-u8 p)
(define who 'lookahead-u8)
(let ([m ($port-get-mode p)])
(cond
[(eq? m fast-get-byte-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
(bytevector-u8-ref ($port-buffer p) i)]
[else (get-u8-byte-mode p who 0)]))]
[else (slow-get-u8 p who 0)]))))
(define (port-eof? p)
(define who 'port-eof?)
(let ([m ($port-get-mode p)])
(cond
[(not (eq? m 0))
(if (fx< ($port-index p) ($port-size p))
#f
(if ($port-transcoder p)
(eof-object? (lookahead-char p))
(eof-object? (lookahead-u8 p))))]
[(input-port? p)
(when ($port-closed? p)
(error 'port-eof? "port is closed" p))
(if (textual-port? p)
(eof-object? (lookahead-char p))
(eof-object? (lookahead-u8 p)))]
[else (error 'port-eof? "not an input port" p)])))
)
#!eof
;;; ----------------------------------------------------------
;;; do input ports first.
;;; ----------------------------------------------------------
(module (put-char)
(define-rrr put-char-utf8-mode)
(define-rrr put-char-latin-mode)
(define-rrr put-char-char-mode)
(define-rrr slow-put-char)
;;;
(define (put-char p c)
(define who 'put-char)
(unless (char? c) (error who "not a char" c))
(let ([m ($port-put-mode p)])
(cond
[(eq? m fast-put-utf8-tag)
(let ([i ($port-index p)])
(let ([b (char->integer c)])
(cond
[(and (fx< i ($port-size p)) (fx< b 128))
($set-port-index! p (fx+ i 1))
(bytevector-u8-set! ($port-buffer p) i b)]
[else
(put-char-utf8-mode p b who)])))]
[(eq? m fast-put-char-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
($set-port-index! p (fx+ i 1))
(string-set! ($port-buffer p) i c)]
[else
(put-char-char-mode p c who)]))]
[(eq? m fast-put-latin-tag)
(let ([i ($port-index p)])
(let ([b (char->integer c)])
(cond
[(and (fx< i ($port-size p)) (fx< b 256))
($set-port-index! p (fx+ i 1))
(bytevector-u8-set! ($port-buffer p) i b)]
[else
(put-char-latin-mode p b who)])))]
[else (slow-put-char p c who)]))))
(module (put-u8)
(define-rrr put-u8-byte-mode)
(define-rrr slow-put-u8)
;;;
(define (put-u8 p b)
(define who 'put-u8)
(unless (u8? b) (error who "not a u8" b))
(let ([m ($port-put-mode p)])
(cond
[(eq? m fast-put-byte-tag)
(let ([i ($port-index p)])
(cond
[(fx< i ($port-size p))
($set-port-index! p (fx+ i 1))
(bytevector-u8-set! ($port-buffer p) i b)]
[else
(put-u8-byte-mode p b who)]))]
[else (slow-put-u8 p b who)]))))