From aac4a4bdecf390bfde022ebd517d181e18bddd23 Mon Sep 17 00:00:00 2001 From: Lassi Kortela Date: Sun, 12 May 2019 17:21:48 +0300 Subject: [PATCH] Skip non-ASCII bytes altogether This permits UTF-16, with or without byte order mark --- encoding-reader.scm | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/encoding-reader.scm b/encoding-reader.scm index 9edf3e4..00be003 100644 --- a/encoding-reader.scm +++ b/encoding-reader.scm @@ -10,17 +10,19 @@ (lambda (port) (read-bytevector 1000 port))))) (if (eof-object? bytes) (make-bytevector 0) bytes))) (i 0)) + (define (peek-next-ascii-byte) + (if (not (< i (bytevector-length bytes))) + (eof-object) + (let ((next-byte (bytevector-u8-ref bytes i))) + (if (<= 1 next-byte 126) + next-byte + (begin (set! i (+ i 1)) + (peek-next-ascii-byte)))))) (define (read-char? k) - (let* ((remain? (< i (bytevector-length bytes))) - (next-byte (if remain? - (bytevector-u8-ref bytes i) - (eof-object))) - (next-char (cond ((eof-object? next-byte) - next-byte) - ((<= 1 next-byte 126) - (integer->char next-byte)) - (else - next-byte))) + (let* ((next-byte (peek-next-ascii-byte)) + (next-char (if (eof-object? next-byte) + next-byte + (integer->char next-byte))) (consume? (cond ((procedure? k) (k next-char)) ((char? k) (eqv? k next-char)) (else #f))))