diff --git a/encoding-reader-simple.scm b/encoding-reader-simple.scm new file mode 100644 index 0000000..384da36 --- /dev/null +++ b/encoding-reader-simple.scm @@ -0,0 +1,61 @@ +(import (scheme base) (scheme char) (scheme file) + (scheme read) (scheme write)) + +(define (read-declarations-from-file filename) + (let ((bytes (let ((bytes (call-with-port + (open-binary-input-file filename) + (lambda (port) (read-bytevector 1000 port))))) + (if (eof-object? bytes) (make-bytevector 0) bytes))) + (i 0)) + (define (peek-next-ascii-byte) + (if (not (< i (bytevector-length bytes))) + (eof-object) + (let ((next-byte (bytevector-u8-ref bytes i))) + (if (<= 1 next-byte 126) + next-byte + (begin (set! i (+ i 1)) + (peek-next-ascii-byte)))))) + (define (read-char? k) + (let* ((next-byte (peek-next-ascii-byte)) + (next-char (if (eof-object? next-byte) + next-byte (integer->char next-byte))) + (consume? (cond ((procedure? k) (k next-char)) + ((char? k) (eqv? k next-char)) + (else #f)))) + (cond (consume? (set! i (+ i 1)) next-char) + (else #f)))) + (define (read-char* k) + (let loop ((chars '())) + (let ((c (read-char? k))) + (if c (loop (append chars (list c))) + (if (null? chars) #f (list->string chars)))))) + (define (symbol-char? c) + (not (or (eof-object? c) (char-whitespace? c) + (eqv? c #\") (eqv? c #\() (eqv? c #\))))) + (define (read->eol) + (read-char* (lambda (c) (not (eqv? c #\newline))))) + (define (skip-white-comm) + (cond ((read-char? #\;) (read->eol) (skip-white-comm)) + ((read-char* char-whitespace?) (skip-white-comm)) + (else #f))) + (define (read-lexeme) + (skip-white-comm) + (cond ((read-char? #\()) ((read-char? #\))) ((read-char* symbol-char?)) + (else #f))) + (let* ((shebang (and (read-char? #\#) (read-char? #\!) (read->eol)))) + (cons shebang + (and (eqv? (read-lexeme) #\() (equal? (read-lexeme) "declare-file") + (eqv? (read-lexeme) #\() (equal? (read-lexeme) "coding") + (let ((coding (read-lexeme))) + (and (string? coding) (eqv? (read-lexeme) #\)) + (string-downcase coding)))))))) + +(define (writeln x) + (write x) + (newline)) + +(writeln (read-declarations-from-file "test-big5.scm")) +(writeln (read-declarations-from-file "test-euc-jp.scm")) +(writeln (read-declarations-from-file "test-shift-jis.scm")) +(writeln (read-declarations-from-file "test-utf16-be.scm")) +(writeln (read-declarations-from-file "test-utf16-le.scm"))