514 lines
18 KiB
Scheme
514 lines
18 KiB
Scheme
(let ()
|
|
(define char-whitespace?
|
|
(lambda (c)
|
|
(or ($char= #\space c)
|
|
(memq ($char->fixnum c) '(9 10 11 12 13)))))
|
|
(define delimiter?
|
|
(lambda (c)
|
|
(or (char-whitespace? c)
|
|
(memq c '(#\( #\) #\[ #\] #\' #\` #\, #\")))))
|
|
(define digit?
|
|
(lambda (c)
|
|
(and ($char<= #\0 c) ($char<= c #\9))))
|
|
(define char->num
|
|
(lambda (c)
|
|
(fx- ($char->fixnum c) ($char->fixnum #\0))))
|
|
(define initial?
|
|
(lambda (c)
|
|
(or (letter? c) (special-initial? c))))
|
|
(define letter?
|
|
(lambda (c)
|
|
(or (and ($char<= #\a c) ($char<= c #\z))
|
|
(and ($char<= #\A c) ($char<= c #\Z)))))
|
|
(define af?
|
|
(lambda (c)
|
|
(or (and ($char<= #\a c) ($char<= c #\f))
|
|
(and ($char<= #\A c) ($char<= c #\F)))))
|
|
(define af->num
|
|
(lambda (c)
|
|
(if (and ($char<= #\a c) ($char<= c #\f))
|
|
(fx+ 10 (fx- ($char->fixnum c) ($char->fixnum #\a)))
|
|
(fx+ 10 (fx- ($char->fixnum c) ($char->fixnum #\A))))))
|
|
(define special-initial?
|
|
(lambda (c)
|
|
(memq c '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\= #\> #\? #\^ #\_ #\~))))
|
|
(define subsequent?
|
|
(lambda (c)
|
|
(or (initial? c) (digit? c) (special-subsequent? c))))
|
|
(define special-subsequent?
|
|
(lambda (c)
|
|
(memq c '(#\+ #\- #\. #\@))))
|
|
(define tokenize-number
|
|
(lambda (n p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) n]
|
|
[(digit? c)
|
|
(tokenize-number (fx+ (fx* n 10) (char->num c)) p)]
|
|
[(delimiter? c)
|
|
(unread-char c p)
|
|
n]
|
|
[else
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid number syntax: ~a~a" n c)]))))
|
|
(define tokenize-hex
|
|
(lambda (n p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) n]
|
|
[(digit? c)
|
|
(tokenize-hex (fx+ (fx* n 16) (char->num c)) p)]
|
|
[(af? c)
|
|
(tokenize-hex (fx+ (fx* n 16) (af->num c)) p)]
|
|
[(delimiter? c)
|
|
(unread-char c p)
|
|
n]
|
|
[else
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid hex number sequence: ~a~a" n c)]))))
|
|
(define tokenize-hex-init
|
|
(lambda (p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid #x near end of file")]
|
|
[(digit? c)
|
|
(cons 'datum (tokenize-hex (char->num c) p))]
|
|
[(af? c)
|
|
(cons 'datum (tokenize-hex (af->num c) p))]
|
|
[($char= c #\-)
|
|
(cons 'datum (fx- 0 (tokenize-hex 0 p)))]
|
|
[($char= c #\+)
|
|
(cons 'datum (tokenize-hex 0 p))]
|
|
[else
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid number syntax: #x~a" c)]))))
|
|
(define tokenize-identifier
|
|
(lambda (ls p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) ls]
|
|
[(subsequent? c)
|
|
(tokenize-identifier (cons c ls) p)]
|
|
[(delimiter? c)
|
|
(unread-char c p)
|
|
ls]
|
|
[else
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid identifier syntax: ~a"
|
|
(list->string (reverse (cons c ls))))]))))
|
|
(define tokenize-string
|
|
(lambda (ls p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(error 'tokenize "end-of-file while inside a string")]
|
|
[($char= #\" c) ls]
|
|
[($char= #\\ c)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[($char= #\" c) (tokenize-string (cons #\" ls) p)]
|
|
[($char= #\\ c) (tokenize-string (cons #\\ ls) p)]
|
|
[($char= #\n c) (tokenize-string (cons #\newline ls) p)]
|
|
[($char= #\t c) (tokenize-string (cons #\tab ls) p)]
|
|
[else (error 'tokenize "invalid string escape \\~a" c)]))]
|
|
[else
|
|
(tokenize-string (cons c ls) p)]))))
|
|
(define skip-comment
|
|
(lambda (p)
|
|
(let ([c (read-char p)])
|
|
(unless (eof-object? c)
|
|
(let ([i ($char->fixnum c)])
|
|
(unless (or (fx= i 10) (fx= i 13))
|
|
(skip-comment p)))))))
|
|
(define tokenize-plus
|
|
(lambda (p)
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) '(datum . +)]
|
|
[(delimiter? c) '(datum . +)]
|
|
[(digit? c)
|
|
(read-char p)
|
|
(cons 'datum (tokenize-number (char->num c) p))]
|
|
[else (error 'tokenize "invalid sequence +~a" c)]))))
|
|
(define tokenize-minus
|
|
(lambda (p)
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) '(datum . -)]
|
|
[(delimiter? c) '(datum . -)]
|
|
[(digit? c)
|
|
(read-char p)
|
|
(cons 'datum (fx- 0 (tokenize-number (char->num c) p)))]
|
|
[else (error 'tokenize "invalid sequence -~a" c)]))))
|
|
(define tokenize-dot
|
|
(lambda (p)
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) 'dot]
|
|
[(delimiter? c) 'dot]
|
|
[($char= c #\.) ; this is second dot
|
|
(read-char p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(error 'tokenize "invalid syntax .. near end of file")]
|
|
[($char= c #\.) ; this is the third
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) '(datum . ...)]
|
|
[(delimiter? c) '(datum . ...)]
|
|
[else
|
|
(error 'tokenize "invalid syntax ...~a" c)]))]
|
|
[else
|
|
(unread-char c)
|
|
(error 'tokenize "invalid syntax ..~a" c)]))]
|
|
[else
|
|
(error 'tokenize "invalid syntax .~a" c)]))))
|
|
(define tokenize-char*
|
|
(lambda (i str p d)
|
|
(cond
|
|
[(fx= i (string-length str))
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) d]
|
|
[(delimiter? c) d]
|
|
[else (error 'tokenize "invalid character after #\\~a" str)]))]
|
|
[else
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(error 'tokenize "invalid eof in the middle of #\\~a" str)]
|
|
[($char= c (string-ref str i))
|
|
(tokenize-char* (fxadd1 i) str p d)]
|
|
[else
|
|
(error 'tokenize
|
|
"invalid char ~a while scanning #\\~a" c str)]))])))
|
|
(define tokenize-char-seq
|
|
(lambda (p str d)
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) (cons 'datum (string-ref str 0))]
|
|
[(delimiter? c) (cons 'datum (string-ref str 0))]
|
|
[($char= (string-ref str 1) c)
|
|
(read-char p)
|
|
(tokenize-char* 2 str p d)]
|
|
[else (error 'tokenize "invalid syntax near #\\~a~a"
|
|
(string-ref str 0) c)]))))
|
|
(define tokenize-char
|
|
(lambda (p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(error 'tokenize "invalid #\\ near end of file")]
|
|
[($char= #\s c)
|
|
(tokenize-char-seq p "space" '(datum . #\space))]
|
|
[($char= #\n c)
|
|
(tokenize-char-seq p "newline" '(datum . #\newline))]
|
|
[($char= #\t c)
|
|
(tokenize-char-seq p "tab" '(datum . #\tab))]
|
|
[($char= #\r c)
|
|
(tokenize-char-seq p "return" '(datum . #\return))]
|
|
[else
|
|
(let ([n (peek-char p)])
|
|
(cond
|
|
[(eof-object? n) (cons 'datum c)]
|
|
[(delimiter? n) (cons 'datum c)]
|
|
[else
|
|
(error 'tokenize "invalid syntax #\\~a~a" c n)]))]))))
|
|
(define multiline-error
|
|
(lambda ()
|
|
(error 'tokenize
|
|
"end of file encountered while inside a #|-style comment")))
|
|
(define multiline-comment
|
|
(lambda (p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) (multiline-error)]
|
|
[($char= #\| c)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) (multiline-error)]
|
|
[($char= #\# c) (void)]
|
|
[else (multiline-comment p)]))]
|
|
[($char= #\# c)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) (multiline-error)]
|
|
[($char= #\| c)
|
|
(multiline-comment p)
|
|
(multiline-comment p)]
|
|
[else
|
|
(multiline-comment p)]))]
|
|
[else (multiline-comment p)]))))
|
|
(define read-binary
|
|
(lambda (ac chars p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) ac]
|
|
[($char= #\0 c) (read-binary (fxsll ac 1) (cons c chars) p)]
|
|
[($char= #\1 c) (read-binary (fx+ (fxsll ac 1) 1) (cons c chars) p)]
|
|
[(delimiter? c) (unread-char c p) ac]
|
|
[else
|
|
(unread-char c)
|
|
(error 'tokenize "invalid syntax #b~a"
|
|
(list->string (reverse (cons c chars))))]))))
|
|
(define tokenize-hash
|
|
(lambda (p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) (error 'tokenize "invalid # near end of file")]
|
|
[($char= c #\t)
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) '(datum . #t)]
|
|
[(delimiter? c) '(datum . #t)]
|
|
[else (error 'tokenize "invalid syntax near #t")]))]
|
|
[($char= c #\f)
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) '(datum . #f)]
|
|
[(delimiter? c) '(datum . #f)]
|
|
[else (error 'tokenize "invalid syntax near #f")]))]
|
|
[($char= #\\ c) (tokenize-char p)]
|
|
[($char= #\( c) 'vparen]
|
|
[($char= #\x c) (tokenize-hex-init p)]
|
|
[($char= #\' c) '(macro . syntax)]
|
|
[($char= #\; c) 'hash-semi]
|
|
[($char= #\% c) '(macro . |#primitive|)]
|
|
[($char= #\| c) (multiline-comment p) (tokenize p)]
|
|
[($char= #\b c)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(error 'tokenize "invalid eof while reading #b")]
|
|
[($char= #\- c)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(error 'tokenize "invalid eof while reading #b-")]
|
|
[($char= #\0 c)
|
|
(cons 'datum
|
|
(fx- 0 (read-binary 0 '(#\0 #\-) p)))]
|
|
[($char= #\1 c)
|
|
(cons 'datum
|
|
(fx- 0 (read-binary 1 '(#\1 #\-) p)))]
|
|
[else
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid binary syntax #b-~a" c)]))]
|
|
[($char= #\0 c)
|
|
(cons 'datum (read-binary 0 '(#\0) p))]
|
|
[($char= #\1 c)
|
|
(cons 'datum (read-binary 1 '(#\1) p))]
|
|
[else
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid syntax #b~a" c)]
|
|
))]
|
|
[($char= #\! c)
|
|
(let ([e (read-char p)])
|
|
(when (eof-object? e)
|
|
(error 'tokenize "invalid eof near #!"))
|
|
(unless ($char= #\e e)
|
|
(error 'tokenize "invalid syntax near #!~a" e))
|
|
(let ([o (read-char p)])
|
|
(when (eof-object? o)
|
|
(error 'tokenize "invalid eof near #!e"))
|
|
(unless ($char= #\o o)
|
|
(error 'tokenize "invalid syntax near #!e~a" o))
|
|
(let ([f (read-char p)])
|
|
(when (eof-object? f)
|
|
(error 'tokenize "invalid syntax near #!eo"))
|
|
(unless ($char= #\f f)
|
|
(error 'tokenize "invalid syntax near #!eo~a" f))
|
|
(cons 'datum (eof-object)))))]
|
|
[else
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid syntax #~a" c)]))))
|
|
(define tokenize-bar
|
|
(lambda (p ac)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(error 'tokenize "unexpected eof while reading symbol")]
|
|
[($char= #\\ c)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c)
|
|
(error 'tokenize "unexpected eof while reading symbol")]
|
|
[else (tokenize-bar p (cons c ac))]))]
|
|
[($char= #\| c) ac]
|
|
[else (tokenize-bar p (cons c ac))]))))
|
|
(define tokenize
|
|
(lambda (p)
|
|
(let ([c (read-char p)])
|
|
(cond
|
|
[(eof-object? c) (eof-object)]
|
|
[(char-whitespace? c) (tokenize p)]
|
|
[($char= #\( c) 'lparen]
|
|
[($char= #\) c) 'rparen]
|
|
[($char= #\[ c) 'lbrack]
|
|
[($char= #\] c) 'rbrack]
|
|
[($char= #\' c) '(macro . quote)]
|
|
[($char= #\` c) '(macro . quasiquote)]
|
|
[($char= #\, c)
|
|
(let ([c (peek-char p)])
|
|
(cond
|
|
[(eof-object? c) '(macro . unquote)]
|
|
[($char= c #\@)
|
|
(read-char p)
|
|
'(macro . unquote-splicing)]
|
|
[else '(macro . unquote)]))]
|
|
[($char= #\# c) (tokenize-hash p)]
|
|
[(digit? c)
|
|
(cons 'datum (tokenize-number (char->num c) p))]
|
|
[(initial? c)
|
|
(let ([ls (reverse (tokenize-identifier (cons c '()) p))])
|
|
(cons 'datum (string->symbol (list->string ls))))]
|
|
[($char= #\" c)
|
|
(let ([ls (tokenize-string '() p)])
|
|
(cons 'datum (list->string (reverse ls))))]
|
|
[($char= #\; c)
|
|
(skip-comment p)
|
|
(tokenize p)]
|
|
[($char= #\+ c)
|
|
(tokenize-plus p)]
|
|
[($char= #\- c)
|
|
(tokenize-minus p)]
|
|
[($char= #\. c)
|
|
(tokenize-dot p)]
|
|
[($char= #\| c)
|
|
(let ([ls (reverse (tokenize-bar p '()))])
|
|
(cons 'datum (string->symbol (list->string ls))))]
|
|
[else
|
|
(unread-char c p)
|
|
(error 'tokenize "invalid syntax ~a" c)]))))
|
|
|
|
;;;
|
|
;;;--------------------------------------------------------------* READ *---
|
|
;;;
|
|
(define read-list-rest
|
|
(lambda (p end mis)
|
|
(let ([t (read-token p)])
|
|
(cond
|
|
[(eof-object? t)
|
|
(error 'read "end of file encountered while reading list")]
|
|
[(eq? t end) '()]
|
|
[(eq? t mis)
|
|
(error 'read "paren mismatch")]
|
|
[(eq? t 'dot)
|
|
(let ([d (read p)])
|
|
(let ([t (read-token p)])
|
|
(cond
|
|
[(eq? t end) d]
|
|
[(eq? t mis)
|
|
(error 'read "paren mismatch")]
|
|
[(eq? t 'dot)
|
|
(error 'read "cannot have two dots in a list")]
|
|
[else
|
|
(error 'read "expecting ~a, got ~a" end t)])))]
|
|
[(eq? t 'hash-semi)
|
|
(read p)
|
|
(read-list-rest p end mis)]
|
|
[else
|
|
(let ([a (parse-token p t)])
|
|
(let ([d (read-list-rest p end mis)])
|
|
(cons a d)))]))))
|
|
(define read-list-init
|
|
(lambda (p end mis)
|
|
(let ([t (read-token p)])
|
|
(cond
|
|
[(eof-object? t)
|
|
(error 'read "end of file encountered while reading list")]
|
|
[(eq? t end) '()]
|
|
[(eq? t mis)
|
|
(error 'read "paren mismatch")]
|
|
[(eq? t 'dot)
|
|
(error 'read "invalid dot while reading list")]
|
|
[(eq? t 'hash-semi)
|
|
(read p)
|
|
(read-list-init p end mis)]
|
|
[else
|
|
(let ([a (parse-token p t)])
|
|
(cons a (read-list-rest p end mis)))]))))
|
|
(define vector-put!
|
|
(lambda (v i ls)
|
|
(cond
|
|
[(null? ls) v]
|
|
[else
|
|
(vector-set! v i (car ls))
|
|
(vector-put! v (fxsub1 i) (cdr ls))])))
|
|
(define read-vector
|
|
(lambda (p count ls)
|
|
(let ([t (read-token p)])
|
|
(cond
|
|
[(eof-object? t)
|
|
(error 'read "end of file encountered while reading a vector")]
|
|
[(eq? t 'rparen)
|
|
(let ([v (make-vector count)])
|
|
(vector-put! v (fxsub1 count) ls))]
|
|
[(eq? t 'rbrack)
|
|
(error 'read "unexpected ] while reading a vector")]
|
|
[(eq? t 'dot)
|
|
(error 'read "unexpected . while reading a vector")]
|
|
[(eq? t 'hash-semi)
|
|
(read p)
|
|
(read-vector p count ls)]
|
|
[else
|
|
(let ([a (parse-token p t)])
|
|
(read-vector p (fxadd1 count) (cons a ls)))]))))
|
|
(define parse-token
|
|
(lambda (p t)
|
|
(cond
|
|
[(eof-object? t) (eof-object)]
|
|
[(eq? t 'lparen) (read-list-init p 'rparen 'rbrack)]
|
|
[(eq? t 'lbrack) (read-list-init p 'rbrack 'rparen)]
|
|
[(eq? t 'vparen) (read-vector p 0 '())]
|
|
[(eq? t 'hash-semi)
|
|
(read p) ; ignored expression
|
|
(read p)]
|
|
[(pair? t)
|
|
(cond
|
|
[(eq? (car t) 'datum) (cdr t)]
|
|
[(eq? (car t) 'macro)
|
|
(cons (cdr t) (cons (read p) '()))]
|
|
[else (error 'read "invalid token! ~s" t)])]
|
|
[else
|
|
(error 'read "unexpected ~s found" t)])))
|
|
(define read
|
|
(lambda (p) (parse-token p (read-token p))))
|
|
|
|
;;;
|
|
;;;--------------------------------------------------------------* INIT *---
|
|
;;;
|
|
(primitive-set! 'read-token
|
|
(case-lambda
|
|
[() (tokenize (current-input-port))]
|
|
[(p)
|
|
(if (input-port? p)
|
|
(tokenize p)
|
|
(error 'read-token "~s is not an input port" p))]))
|
|
(primitive-set! 'read
|
|
(case-lambda
|
|
[() (read (current-input-port))]
|
|
[(p)
|
|
(if (input-port? p)
|
|
(read p)
|
|
(error 'read "~s is not an input port" p))]))
|
|
(let ()
|
|
(define read-and-eval
|
|
(lambda (p)
|
|
(let ([x (read p)])
|
|
(unless (eof-object? x)
|
|
(eval x)
|
|
(read-and-eval p)))))
|
|
(primitive-set! 'load
|
|
(lambda (x)
|
|
(unless (string? x)
|
|
(error 'load "~s is not a string" x))
|
|
(let ([p (open-input-file x)])
|
|
(read-and-eval p)
|
|
(close-input-port p)))))
|
|
)
|
|
|