(let () (define char-whitespace? (lambda (c) (or ($char= #\space c) (memq ($char->fixnum c) '(9 10 11 12 13))))) (define delimiter? (lambda (c) (or (char-whitespace? c) (memq c '(#\( #\) #\[ #\] #\' #\` #\, #\"))))) (define digit? (lambda (c) (and ($char<= #\0 c) ($char<= c #\9)))) (define char->num (lambda (c) (fx- ($char->fixnum c) ($char->fixnum #\0)))) (define initial? (lambda (c) (or (letter? c) (special-initial? c)))) (define letter? (lambda (c) (or (and ($char<= #\a c) ($char<= c #\z)) (and ($char<= #\A c) ($char<= c #\Z))))) (define af? (lambda (c) (or (and ($char<= #\a c) ($char<= c #\f)) (and ($char<= #\A c) ($char<= c #\F))))) (define af->num (lambda (c) (if (and ($char<= #\a c) ($char<= c #\f)) (fx+ 10 (fx- ($char->fixnum c) ($char->fixnum #\a))) (fx+ 10 (fx- ($char->fixnum c) ($char->fixnum #\A)))))) (define special-initial? (lambda (c) (memq c '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\= #\> #\? #\^ #\_ #\~)))) (define subsequent? (lambda (c) (or (initial? c) (digit? c) (special-subsequent? c)))) (define special-subsequent? (lambda (c) (memq c '(#\+ #\- #\. #\@)))) (define tokenize-number (lambda (n p) (let ([c (read-char p)]) (cond [(eof-object? c) n] [(digit? c) (tokenize-number (fx+ (fx* n 10) (char->num c)) p)] [(delimiter? c) (unread-char c p) n] [else (unread-char c p) (error 'tokenize "invalid number syntax: ~a~a" n c)])))) (define tokenize-hex (lambda (n p) (let ([c (read-char p)]) (cond [(eof-object? c) n] [(digit? c) (tokenize-hex (fx+ (fx* n 16) (char->num c)) p)] [(af? c) (tokenize-hex (fx+ (fx* n 16) (af->num c)) p)] [(delimiter? c) (unread-char c p) n] [else (unread-char c p) (error 'tokenize "invalid hex number sequence: ~a~a" n c)])))) (define tokenize-hex-init (lambda (p) (let ([c (read-char p)]) (cond [(eof-object? c) (unread-char c p) (error 'tokenize "invalid #x near end of file")] [(digit? c) (cons 'datum (tokenize-hex (char->num c) p))] [(af? c) (cons 'datum (tokenize-hex (af->num c) p))] [($char= c #\-) (cons 'datum (fx- 0 (tokenize-hex 0 p)))] [($char= c #\+) (cons 'datum (tokenize-hex 0 p))] [else (unread-char c p) (error 'tokenize "invalid number syntax: #x~a" c)])))) (define tokenize-identifier (lambda (ls p) (let ([c (read-char p)]) (cond [(eof-object? c) ls] [(subsequent? c) (tokenize-identifier (cons c ls) p)] [(delimiter? c) (unread-char c p) ls] [else (unread-char c p) (error 'tokenize "invalid identifier syntax: ~a" (list->string (reverse (cons c ls))))])))) (define tokenize-string (lambda (ls p) (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "end-of-file while inside a string")] [($char= #\" c) ls] [($char= #\\ c) (let ([c (read-char p)]) (cond [($char= #\" c) (tokenize-string (cons #\" ls) p)] [($char= #\\ c) (tokenize-string (cons #\\ ls) p)] [($char= #\n c) (tokenize-string (cons #\newline ls) p)] [($char= #\t c) (tokenize-string (cons #\tab ls) p)] [else (error 'tokenize "invalid string escape \\~a" c)]))] [else (tokenize-string (cons c ls) p)])))) (define skip-comment (lambda (p) (let ([c (read-char p)]) (unless (eof-object? c) (let ([i ($char->fixnum c)]) (unless (or (fx= i 10) (fx= i 13)) (skip-comment p))))))) (define tokenize-plus (lambda (p) (let ([c (peek-char p)]) (cond [(eof-object? c) '(datum . +)] [(delimiter? c) '(datum . +)] [(digit? c) (read-char p) (cons 'datum (tokenize-number (char->num c) p))] [else (error 'tokenize "invalid sequence +~a" c)])))) (define tokenize-minus (lambda (p) (let ([c (peek-char p)]) (cond [(eof-object? c) '(datum . -)] [(delimiter? c) '(datum . -)] [(digit? c) (read-char p) (cons 'datum (fx- 0 (tokenize-number (char->num c) p)))] [else (error 'tokenize "invalid sequence -~a" c)])))) (define tokenize-dot (lambda (p) (let ([c (peek-char p)]) (cond [(eof-object? c) 'dot] [(delimiter? c) 'dot] [($char= c #\.) ; this is second dot (read-char p) (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "invalid syntax .. near end of file")] [($char= c #\.) ; this is the third (let ([c (peek-char p)]) (cond [(eof-object? c) '(datum . ...)] [(delimiter? c) '(datum . ...)] [else (error 'tokenize "invalid syntax ...~a" c)]))] [else (unread-char c) (error 'tokenize "invalid syntax ..~a" c)]))] [else (error 'tokenize "invalid syntax .~a" c)])))) (define tokenize-char* (lambda (i str p d) (cond [(fx= i (string-length str)) (let ([c (peek-char p)]) (cond [(eof-object? c) d] [(delimiter? c) d] [else (error 'tokenize "invalid character after #\\~a" str)]))] [else (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "invalid eof in the middle of #\\~a" str)] [($char= c (string-ref str i)) (tokenize-char* (fxadd1 i) str p d)] [else (error 'tokenize "invalid char ~a while scanning #\\~a" c str)]))]))) (define tokenize-char-seq (lambda (p str d) (let ([c (peek-char p)]) (cond [(eof-object? c) (cons 'datum (string-ref str 0))] [(delimiter? c) (cons 'datum (string-ref str 0))] [($char= (string-ref str 1) c) (read-char p) (tokenize-char* 2 str p d)] [else (error 'tokenize "invalid syntax near #\\~a~a" (string-ref str 0) c)])))) (define tokenize-char (lambda (p) (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "invalid #\\ near end of file")] [($char= #\s c) (tokenize-char-seq p "space" '(datum . #\space))] [($char= #\n c) (tokenize-char-seq p "newline" '(datum . #\newline))] [($char= #\t c) (tokenize-char-seq p "tab" '(datum . #\tab))] [($char= #\r c) (tokenize-char-seq p "return" '(datum . #\return))] [else (let ([n (peek-char p)]) (cond [(eof-object? n) (cons 'datum c)] [(delimiter? n) (cons 'datum c)] [else (error 'tokenize "invalid syntax #\\~a~a" c n)]))])))) (define multiline-error (lambda () (error 'tokenize "end of file encountered while inside a #|-style comment"))) (define multiline-comment (lambda (p) (let ([c (read-char p)]) (cond [(eof-object? c) (multiline-error)] [($char= #\| c) (let ([c (read-char p)]) (cond [(eof-object? c) (multiline-error)] [($char= #\# c) (void)] [else (multiline-comment p)]))] [($char= #\# c) (let ([c (read-char p)]) (cond [(eof-object? c) (multiline-error)] [($char= #\| c) (multiline-comment p) (multiline-comment p)] [else (multiline-comment p)]))] [else (multiline-comment p)])))) (define read-binary (lambda (ac chars p) (let ([c (read-char p)]) (cond [(eof-object? c) ac] [($char= #\0 c) (read-binary (fxsll ac 1) (cons c chars) p)] [($char= #\1 c) (read-binary (fx+ (fxsll ac 1) 1) (cons c chars) p)] [(delimiter? c) (unread-char c p) ac] [else (unread-char c) (error 'tokenize "invalid syntax #b~a" (list->string (reverse (cons c chars))))])))) (define tokenize-hash (lambda (p) (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "invalid # near end of file")] [($char= c #\t) (let ([c (peek-char p)]) (cond [(eof-object? c) '(datum . #t)] [(delimiter? c) '(datum . #t)] [else (error 'tokenize "invalid syntax near #t")]))] [($char= c #\f) (let ([c (peek-char p)]) (cond [(eof-object? c) '(datum . #f)] [(delimiter? c) '(datum . #f)] [else (error 'tokenize "invalid syntax near #f")]))] [($char= #\\ c) (tokenize-char p)] [($char= #\( c) 'vparen] [($char= #\x c) (tokenize-hex-init p)] [($char= #\' c) '(macro . syntax)] [($char= #\; c) 'hash-semi] [($char= #\% c) '(macro . |#primitive|)] [($char= #\| c) (multiline-comment p) (tokenize p)] [($char= #\b c) (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "invalid eof while reading #b")] [($char= #\- c) (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "invalid eof while reading #b-")] [($char= #\0 c) (cons 'datum (fx- 0 (read-binary 0 '(#\0 #\-) p)))] [($char= #\1 c) (cons 'datum (fx- 0 (read-binary 1 '(#\1 #\-) p)))] [else (unread-char c p) (error 'tokenize "invalid binary syntax #b-~a" c)]))] [($char= #\0 c) (cons 'datum (read-binary 0 '(#\0) p))] [($char= #\1 c) (cons 'datum (read-binary 1 '(#\1) p))] [else (unread-char c p) (error 'tokenize "invalid syntax #b~a" c)] ))] [($char= #\! c) (let ([e (read-char p)]) (when (eof-object? e) (error 'tokenize "invalid eof near #!")) (unless ($char= #\e e) (error 'tokenize "invalid syntax near #!~a" e)) (let ([o (read-char p)]) (when (eof-object? o) (error 'tokenize "invalid eof near #!e")) (unless ($char= #\o o) (error 'tokenize "invalid syntax near #!e~a" o)) (let ([f (read-char p)]) (when (eof-object? f) (error 'tokenize "invalid syntax near #!eo")) (unless ($char= #\f f) (error 'tokenize "invalid syntax near #!eo~a" f)) (cons 'datum (eof-object)))))] [else (unread-char c p) (error 'tokenize "invalid syntax #~a" c)])))) (define tokenize-bar (lambda (p ac) (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "unexpected eof while reading symbol")] [($char= #\\ c) (let ([c (read-char p)]) (cond [(eof-object? c) (error 'tokenize "unexpected eof while reading symbol")] [else (tokenize-bar p (cons c ac))]))] [($char= #\| c) ac] [else (tokenize-bar p (cons c ac))])))) (define tokenize (lambda (p) (let ([c (read-char p)]) (cond [(eof-object? c) (eof-object)] [(char-whitespace? c) (tokenize p)] [($char= #\( c) 'lparen] [($char= #\) c) 'rparen] [($char= #\[ c) 'lbrack] [($char= #\] c) 'rbrack] [($char= #\' c) '(macro . quote)] [($char= #\` c) '(macro . quasiquote)] [($char= #\, c) (let ([c (peek-char p)]) (cond [(eof-object? c) '(macro . unquote)] [($char= c #\@) (read-char p) '(macro . unquote-splicing)] [else '(macro . unquote)]))] [($char= #\# c) (tokenize-hash p)] [(digit? c) (cons 'datum (tokenize-number (char->num c) p))] [(initial? c) (let ([ls (reverse (tokenize-identifier (cons c '()) p))]) (cons 'datum (string->symbol (list->string ls))))] [($char= #\" c) (let ([ls (tokenize-string '() p)]) (cons 'datum (list->string (reverse ls))))] [($char= #\; c) (skip-comment p) (tokenize p)] [($char= #\+ c) (tokenize-plus p)] [($char= #\- c) (tokenize-minus p)] [($char= #\. c) (tokenize-dot p)] [($char= #\| c) (let ([ls (reverse (tokenize-bar p '()))]) (cons 'datum (string->symbol (list->string ls))))] [else (unread-char c p) (error 'tokenize "invalid syntax ~a" c)])))) ;;; ;;;--------------------------------------------------------------* READ *--- ;;; (define read-list-rest (lambda (p end mis) (let ([t (read-token p)]) (cond [(eof-object? t) (error 'read "end of file encountered while reading list")] [(eq? t end) '()] [(eq? t mis) (error 'read "paren mismatch")] [(eq? t 'dot) (let ([d (read p)]) (let ([t (read-token p)]) (cond [(eq? t end) d] [(eq? t mis) (error 'read "paren mismatch")] [(eq? t 'dot) (error 'read "cannot have two dots in a list")] [else (error 'read "expecting ~a, got ~a" end t)])))] [(eq? t 'hash-semi) (read p) (read-list-rest p end mis)] [else (let ([a (parse-token p t)]) (let ([d (read-list-rest p end mis)]) (cons a d)))])))) (define read-list-init (lambda (p end mis) (let ([t (read-token p)]) (cond [(eof-object? t) (error 'read "end of file encountered while reading list")] [(eq? t end) '()] [(eq? t mis) (error 'read "paren mismatch")] [(eq? t 'dot) (error 'read "invalid dot while reading list")] [(eq? t 'hash-semi) (read p) (read-list-init p end mis)] [else (let ([a (parse-token p t)]) (cons a (read-list-rest p end mis)))])))) (define vector-put! (lambda (v i ls) (cond [(null? ls) v] [else (vector-set! v i (car ls)) (vector-put! v (fxsub1 i) (cdr ls))]))) (define read-vector (lambda (p count ls) (let ([t (read-token p)]) (cond [(eof-object? t) (error 'read "end of file encountered while reading a vector")] [(eq? t 'rparen) (let ([v (make-vector count)]) (vector-put! v (fxsub1 count) ls))] [(eq? t 'rbrack) (error 'read "unexpected ] while reading a vector")] [(eq? t 'dot) (error 'read "unexpected . while reading a vector")] [(eq? t 'hash-semi) (read p) (read-vector p count ls)] [else (let ([a (parse-token p t)]) (read-vector p (fxadd1 count) (cons a ls)))])))) (define parse-token (lambda (p t) (cond [(eof-object? t) (eof-object)] [(eq? t 'lparen) (read-list-init p 'rparen 'rbrack)] [(eq? t 'lbrack) (read-list-init p 'rbrack 'rparen)] [(eq? t 'vparen) (read-vector p 0 '())] [(eq? t 'hash-semi) (read p) ; ignored expression (read p)] [(pair? t) (cond [(eq? (car t) 'datum) (cdr t)] [(eq? (car t) 'macro) (cons (cdr t) (cons (read p) '()))] [else (error 'read "invalid token! ~s" t)])] [else (error 'read "unexpected ~s found" t)]))) (define read (lambda (p) (parse-token p (read-token p)))) ;;; ;;;--------------------------------------------------------------* INIT *--- ;;; (primitive-set! 'read-token (case-lambda [() (tokenize (current-input-port))] [(p) (if (input-port? p) (tokenize p) (error 'read-token "~s is not an input port" p))])) (primitive-set! 'read (case-lambda [() (read (current-input-port))] [(p) (if (input-port? p) (read p) (error 'read "~s is not an input port" p))])) (let () (define read-and-eval (lambda (p) (let ([x (read p)]) (unless (eof-object? x) (eval x) (read-and-eval p))))) (primitive-set! 'load (lambda (x) (unless (string? x) (error 'load "~s is not a string" x)) (let ([p (open-input-file x)]) (read-and-eval p) (close-input-port p))))) )