Skip non-ASCII bytes altogether
This permits UTF-16, with or without byte order mark
This commit is contained in:
parent
0a9c678e64
commit
aac4a4bdec
|
@ -10,17 +10,19 @@
|
||||||
(lambda (port) (read-bytevector 1000 port)))))
|
(lambda (port) (read-bytevector 1000 port)))))
|
||||||
(if (eof-object? bytes) (make-bytevector 0) bytes)))
|
(if (eof-object? bytes) (make-bytevector 0) bytes)))
|
||||||
(i 0))
|
(i 0))
|
||||||
|
(define (peek-next-ascii-byte)
|
||||||
|
(if (not (< i (bytevector-length bytes)))
|
||||||
|
(eof-object)
|
||||||
|
(let ((next-byte (bytevector-u8-ref bytes i)))
|
||||||
|
(if (<= 1 next-byte 126)
|
||||||
|
next-byte
|
||||||
|
(begin (set! i (+ i 1))
|
||||||
|
(peek-next-ascii-byte))))))
|
||||||
(define (read-char? k)
|
(define (read-char? k)
|
||||||
(let* ((remain? (< i (bytevector-length bytes)))
|
(let* ((next-byte (peek-next-ascii-byte))
|
||||||
(next-byte (if remain?
|
(next-char (if (eof-object? next-byte)
|
||||||
(bytevector-u8-ref bytes i)
|
next-byte
|
||||||
(eof-object)))
|
(integer->char next-byte)))
|
||||||
(next-char (cond ((eof-object? next-byte)
|
|
||||||
next-byte)
|
|
||||||
((<= 1 next-byte 126)
|
|
||||||
(integer->char next-byte))
|
|
||||||
(else
|
|
||||||
next-byte)))
|
|
||||||
(consume? (cond ((procedure? k) (k next-char))
|
(consume? (cond ((procedure? k) (k next-char))
|
||||||
((char? k) (eqv? k next-char))
|
((char? k) (eqv? k next-char))
|
||||||
(else #f))))
|
(else #f))))
|
||||||
|
|
Loading…
Reference in New Issue