Compare commits
10 Commits
4edcffdc3a
...
1190f32ab4
Author | SHA1 | Date |
---|---|---|
|
1190f32ab4 | |
|
7f70dd8906 | |
|
d5ea8c21ec | |
|
eeeea2c93e | |
|
0f7db91954 | |
|
298a4337a8 | |
|
aac4a4bdec | |
|
0a9c678e64 | |
|
2ab02b063f | |
|
000a898778 |
|
@ -0,0 +1,61 @@
|
||||||
|
(import (scheme base) (scheme char) (scheme file)
|
||||||
|
(scheme read) (scheme write))
|
||||||
|
|
||||||
|
(define (read-declarations-from-file filename)
|
||||||
|
(let ((bytes (let ((bytes (call-with-port
|
||||||
|
(open-binary-input-file filename)
|
||||||
|
(lambda (port) (read-bytevector 1000 port)))))
|
||||||
|
(if (eof-object? bytes) (make-bytevector 0) bytes)))
|
||||||
|
(i 0))
|
||||||
|
(define (peek-next-ascii-byte)
|
||||||
|
(if (not (< i (bytevector-length bytes)))
|
||||||
|
(eof-object)
|
||||||
|
(let ((next-byte (bytevector-u8-ref bytes i)))
|
||||||
|
(if (<= 1 next-byte 126)
|
||||||
|
next-byte
|
||||||
|
(begin (set! i (+ i 1))
|
||||||
|
(peek-next-ascii-byte))))))
|
||||||
|
(define (read-char? k)
|
||||||
|
(let* ((next-byte (peek-next-ascii-byte))
|
||||||
|
(next-char (if (eof-object? next-byte)
|
||||||
|
next-byte (integer->char next-byte)))
|
||||||
|
(consume? (cond ((procedure? k) (k next-char))
|
||||||
|
((char? k) (eqv? k next-char))
|
||||||
|
(else #f))))
|
||||||
|
(cond (consume? (set! i (+ i 1)) next-char)
|
||||||
|
(else #f))))
|
||||||
|
(define (read-char* k)
|
||||||
|
(let loop ((chars '()))
|
||||||
|
(let ((c (read-char? k)))
|
||||||
|
(if c (loop (append chars (list c)))
|
||||||
|
(if (null? chars) #f (list->string chars))))))
|
||||||
|
(define (symbol-char? c)
|
||||||
|
(not (or (eof-object? c) (char-whitespace? c)
|
||||||
|
(eqv? c #\") (eqv? c #\() (eqv? c #\)))))
|
||||||
|
(define (read->eol)
|
||||||
|
(read-char* (lambda (c) (not (eqv? c #\newline)))))
|
||||||
|
(define (skip-white-comm)
|
||||||
|
(cond ((read-char? #\;) (read->eol) (skip-white-comm))
|
||||||
|
((read-char? char-whitespace?) (skip-white-comm))
|
||||||
|
(else #f)))
|
||||||
|
(define (read-lexeme)
|
||||||
|
(skip-white-comm)
|
||||||
|
(cond ((read-char? #\()) ((read-char? #\))) ((read-char* symbol-char?))
|
||||||
|
(else #f)))
|
||||||
|
(let* ((shebang (and (read-char? #\#) (read-char? #\!) (read->eol))))
|
||||||
|
(cons shebang
|
||||||
|
(and (eqv? (read-lexeme) #\() (equal? (read-lexeme) "declare-file")
|
||||||
|
(eqv? (read-lexeme) #\() (equal? (read-lexeme) "coding")
|
||||||
|
(let ((coding (read-lexeme)))
|
||||||
|
(and (string? coding) (eqv? (read-lexeme) #\))
|
||||||
|
(string-downcase coding))))))))
|
||||||
|
|
||||||
|
(define (writeln x)
|
||||||
|
(write x)
|
||||||
|
(newline))
|
||||||
|
|
||||||
|
(writeln (read-declarations-from-file "test-big5.scm"))
|
||||||
|
(writeln (read-declarations-from-file "test-euc-jp.scm"))
|
||||||
|
(writeln (read-declarations-from-file "test-shift-jis.scm"))
|
||||||
|
(writeln (read-declarations-from-file "test-utf16-be.scm"))
|
||||||
|
(writeln (read-declarations-from-file "test-utf16-le.scm"))
|
|
@ -10,17 +10,19 @@
|
||||||
(lambda (port) (read-bytevector 1000 port)))))
|
(lambda (port) (read-bytevector 1000 port)))))
|
||||||
(if (eof-object? bytes) (make-bytevector 0) bytes)))
|
(if (eof-object? bytes) (make-bytevector 0) bytes)))
|
||||||
(i 0))
|
(i 0))
|
||||||
|
(define (peek-next-ascii-byte)
|
||||||
|
(if (not (< i (bytevector-length bytes)))
|
||||||
|
(eof-object)
|
||||||
|
(let ((next-byte (bytevector-u8-ref bytes i)))
|
||||||
|
(if (<= 1 next-byte 126)
|
||||||
|
next-byte
|
||||||
|
(begin (set! i (+ i 1))
|
||||||
|
(peek-next-ascii-byte))))))
|
||||||
(define (read-char? k)
|
(define (read-char? k)
|
||||||
(let* ((remain? (< i (bytevector-length bytes)))
|
(let* ((next-byte (peek-next-ascii-byte))
|
||||||
(next-byte (if remain?
|
(next-char (if (eof-object? next-byte)
|
||||||
(bytevector-u8-ref bytes i)
|
next-byte
|
||||||
(eof-object)))
|
(integer->char next-byte)))
|
||||||
(next-char (cond ((eof-object? next-byte)
|
|
||||||
next-byte)
|
|
||||||
((<= 1 next-byte 126)
|
|
||||||
(integer->char next-byte))
|
|
||||||
(else
|
|
||||||
next-byte)))
|
|
||||||
(consume? (cond ((procedure? k) (k next-char))
|
(consume? (cond ((procedure? k) (k next-char))
|
||||||
((char? k) (eqv? k next-char))
|
((char? k) (eqv? k next-char))
|
||||||
(else #f))))
|
(else #f))))
|
||||||
|
@ -40,6 +42,13 @@
|
||||||
(eqv? c #\")
|
(eqv? c #\")
|
||||||
(eqv? c #\()
|
(eqv? c #\()
|
||||||
(eqv? c #\)))))
|
(eqv? c #\)))))
|
||||||
|
(define (string-char? c)
|
||||||
|
(not (or (eof-object? c)
|
||||||
|
(eqv? c #\tab)
|
||||||
|
(eqv? c #\newline)
|
||||||
|
(eqv? c #\return)
|
||||||
|
(eqv? c #\")
|
||||||
|
(eqv? c #\\))))
|
||||||
(define (skip-char* k)
|
(define (skip-char* k)
|
||||||
(when (read-char? k) (skip-char* k)))
|
(when (read-char? k) (skip-char* k)))
|
||||||
(define (read-rest-of-line)
|
(define (read-rest-of-line)
|
||||||
|
@ -60,6 +69,14 @@
|
||||||
#f
|
#f
|
||||||
(list->string chars))
|
(list->string chars))
|
||||||
(loop (append chars (list c)))))))
|
(loop (append chars (list c)))))))
|
||||||
|
(define (read-string)
|
||||||
|
(let loop ((chars '()))
|
||||||
|
(if (read-char? #\")
|
||||||
|
(list->string chars)
|
||||||
|
(let ((c (read-char? string-char?)))
|
||||||
|
(if (not c)
|
||||||
|
(eof-object)
|
||||||
|
(loop (append chars (list c))))))))
|
||||||
(define (read-list)
|
(define (read-list)
|
||||||
(let loop ((xs '()))
|
(let loop ((xs '()))
|
||||||
(skip-whitespace-and-comments)
|
(skip-whitespace-and-comments)
|
||||||
|
@ -71,19 +88,23 @@
|
||||||
(loop (append xs (list x))))))))
|
(loop (append xs (list x))))))))
|
||||||
(define (read-form)
|
(define (read-form)
|
||||||
(skip-whitespace-and-comments)
|
(skip-whitespace-and-comments)
|
||||||
(if (read-char? #\()
|
(cond ((read-char? #\()
|
||||||
(read-list)
|
(read-list))
|
||||||
(let ((symbol-name (read-char* not-special-char?)))
|
((read-char? #\")
|
||||||
(if symbol-name
|
(read-string))
|
||||||
(string->symbol symbol-name)
|
(else
|
||||||
(eof-object)))))
|
(let ((symbol-name (read-char* not-special-char?)))
|
||||||
|
(if symbol-name
|
||||||
|
(string->symbol symbol-name)
|
||||||
|
(eof-object))))))
|
||||||
(let* ((shebang (if (and (read-char? #\#) (read-char? #\!))
|
(let* ((shebang (if (and (read-char? #\#) (read-char? #\!))
|
||||||
(read-rest-of-line)
|
(read-rest-of-line)
|
||||||
#f))
|
#f))
|
||||||
(first-form (read-form))
|
(first-form (read-form))
|
||||||
(declarations (and (list? first-form)
|
(declarations (or (and (list? first-form)
|
||||||
(eqv? 'declare-file (car first-form))
|
(eqv? 'declare-file (car first-form))
|
||||||
(cdr first-form))))
|
(cdr first-form))
|
||||||
|
'())))
|
||||||
(if shebang
|
(if shebang
|
||||||
(cons (list 'shebang shebang) declarations)
|
(cons (list 'shebang shebang) declarations)
|
||||||
declarations))))
|
declarations))))
|
||||||
|
@ -92,4 +113,12 @@
|
||||||
(write x)
|
(write x)
|
||||||
(newline))
|
(newline))
|
||||||
|
|
||||||
(for-each writeln (read-declarations-from-file "test.scm"))
|
(define (try-file filename)
|
||||||
|
(for-each writeln (read-declarations-from-file filename))
|
||||||
|
(newline))
|
||||||
|
|
||||||
|
(try-file "test-big5.scm")
|
||||||
|
(try-file "test-euc-jp.scm")
|
||||||
|
(try-file "test-shift-jis.scm")
|
||||||
|
(try-file "test-utf16-be.scm")
|
||||||
|
(try-file "test-utf16-le.scm")
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
#! /usr/bin/env gosh
|
||||||
|
|
||||||
|
(declare-file
|
||||||
|
(coding Big5)
|
||||||
|
(language scheme r7rs)
|
||||||
|
(copyright 2019 "Lassi Kortela")
|
||||||
|
(spdx-license-identifier "ISC"))
|
||||||
|
|
||||||
|
(display "你好世界")
|
||||||
|
(newline)
|
|
@ -0,0 +1,10 @@
|
||||||
|
#! /usr/bin/env gosh
|
||||||
|
|
||||||
|
(declare-file
|
||||||
|
(coding EUC-JP)
|
||||||
|
(language scheme r7rs)
|
||||||
|
(copyright 2019 "Lassi Kortela")
|
||||||
|
(spdx-license-identifier "ISC"))
|
||||||
|
|
||||||
|
(display "こんにちは世界")
|
||||||
|
(newline)
|
|
@ -0,0 +1,2 @@
|
||||||
|
#!/bin/sh
|
||||||
|
kawa encoding-reader.scm
|
|
@ -0,0 +1,10 @@
|
||||||
|
#! /usr/bin/env gosh
|
||||||
|
|
||||||
|
(declare-file
|
||||||
|
(coding shift_jis)
|
||||||
|
(language scheme r7rs)
|
||||||
|
(copyright 2019 "Lassi Kortela")
|
||||||
|
(spdx-license-identifier "ISC"))
|
||||||
|
|
||||||
|
(display "こんにちは世界")
|
||||||
|
(newline)
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue