2007-10-25 16:27:34 -04:00
|
|
|
;;; Ikarus Scheme -- A compiler for R6RS Scheme.
|
2008-01-29 00:34:34 -05:00
|
|
|
;;; Copyright (C) 2006,2007,2008 Abdulaziz Ghuloum
|
2007-10-25 16:27:34 -04:00
|
|
|
;;;
|
|
|
|
;;; This program is free software: you can redistribute it and/or modify
|
|
|
|
;;; it under the terms of the GNU General Public License version 3 as
|
|
|
|
;;; published by the Free Software Foundation.
|
|
|
|
;;;
|
|
|
|
;;; This program is distributed in the hope that it will be useful, but
|
|
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
;;; General Public License for more details.
|
|
|
|
;;;
|
|
|
|
;;; You should have received a copy of the GNU General Public License
|
|
|
|
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2006-11-23 19:48:14 -05:00
|
|
|
|
2008-05-21 02:21:37 -04:00
|
|
|
(library (ikarus.reader)
|
2007-12-19 00:40:25 -05:00
|
|
|
(export read read-initial read-token comment-handler get-datum
|
|
|
|
read-annotated read-script-annotated annotation?
|
|
|
|
annotation-expression annotation-source
|
2008-05-21 02:21:37 -04:00
|
|
|
annotation-stripped)
|
2007-05-05 20:47:31 -04:00
|
|
|
(import
|
2008-06-02 03:01:59 -04:00
|
|
|
(only (ikarus.string-to-number) define-string->number-parser)
|
2007-05-06 18:43:04 -04:00
|
|
|
(ikarus system $chars)
|
2007-05-15 23:57:35 -04:00
|
|
|
(ikarus system $fx)
|
|
|
|
(ikarus system $pairs)
|
|
|
|
(ikarus system $bytevectors)
|
2008-05-21 02:21:37 -04:00
|
|
|
;(only (ikarus unicode-data) unicode-printable-char?)
|
2007-12-19 00:40:25 -05:00
|
|
|
(except (ikarus) read-char read read-token comment-handler get-datum
|
|
|
|
read-annotated read-script-annotated annotation?
|
|
|
|
annotation-expression annotation-source annotation-stripped))
|
2007-12-13 05:57:15 -05:00
|
|
|
|
2007-12-18 19:52:15 -05:00
|
|
|
(define (die/pos p off who msg arg*)
|
|
|
|
(define-condition-type &lexical-position &condition
|
|
|
|
make-lexical-position-condition lexical-position?
|
|
|
|
(file-name lexical-position-filename)
|
|
|
|
(character lexical-position-character))
|
|
|
|
(raise
|
|
|
|
(condition
|
|
|
|
(make-lexical-violation)
|
|
|
|
(make-message-condition msg)
|
|
|
|
(if (null? arg*)
|
|
|
|
(condition)
|
|
|
|
(make-irritants-condition arg*))
|
|
|
|
(make-lexical-position-condition
|
|
|
|
(port-id p)
|
|
|
|
(let ([pos (input-port-byte-position p)])
|
|
|
|
(and pos (+ pos off)))))))
|
|
|
|
|
|
|
|
(define (die/p p who msg . arg*)
|
|
|
|
(die/pos p 0 who msg arg*))
|
2007-12-19 19:33:05 -05:00
|
|
|
(define (die/p-1 p who msg . arg*)
|
|
|
|
(die/pos p -1 who msg arg*))
|
2007-12-18 19:52:15 -05:00
|
|
|
|
|
|
|
|
2008-04-28 15:01:45 -04:00
|
|
|
(define (checked-integer->char n ac p)
|
|
|
|
(define (valid-integer-char? n)
|
|
|
|
(cond
|
|
|
|
[(<= n #xD7FF) #t]
|
|
|
|
[(< n #xE000) #f]
|
|
|
|
[(<= n #x10FFFF) #t]
|
|
|
|
[else #f]))
|
|
|
|
(if (valid-integer-char? n)
|
|
|
|
($fixnum->char n)
|
|
|
|
(die/p p 'tokenize
|
|
|
|
"invalid numeric value for character"
|
|
|
|
(list->string (reverse ac)))))
|
|
|
|
|
2007-12-13 05:57:15 -05:00
|
|
|
(define-syntax read-char
|
|
|
|
(syntax-rules ()
|
|
|
|
[(_ p) (get-char p)]))
|
2007-04-29 22:29:42 -04:00
|
|
|
|
2006-11-23 19:33:45 -05:00
|
|
|
(define delimiter?
|
|
|
|
(lambda (c)
|
|
|
|
(or (char-whitespace? c)
|
2007-12-01 03:18:28 -05:00
|
|
|
(memq c '(#\( #\) #\[ #\] #\" #\# #\;)))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define digit?
|
|
|
|
(lambda (c)
|
2006-11-23 19:48:14 -05:00
|
|
|
(and ($char<= #\0 c) ($char<= c #\9))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define char->num
|
|
|
|
(lambda (c)
|
2006-11-23 19:38:26 -05:00
|
|
|
(fx- ($char->fixnum c) ($char->fixnum #\0))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define initial?
|
|
|
|
(lambda (c)
|
2007-06-01 22:17:22 -04:00
|
|
|
(cond
|
|
|
|
[($char<= c ($fixnum->char 127))
|
|
|
|
(or (letter? c) (special-initial? c))]
|
|
|
|
[else (unicode-printable-char? c)])))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define letter?
|
|
|
|
(lambda (c)
|
2006-11-23 19:48:14 -05:00
|
|
|
(or (and ($char<= #\a c) ($char<= c #\z))
|
|
|
|
(and ($char<= #\A c) ($char<= c #\Z)))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define af?
|
|
|
|
(lambda (c)
|
2006-11-23 19:48:14 -05:00
|
|
|
(or (and ($char<= #\a c) ($char<= c #\f))
|
|
|
|
(and ($char<= #\A c) ($char<= c #\F)))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define af->num
|
|
|
|
(lambda (c)
|
2006-11-23 19:48:14 -05:00
|
|
|
(if (and ($char<= #\a c) ($char<= c #\f))
|
2006-11-23 19:38:26 -05:00
|
|
|
(fx+ 10 (fx- ($char->fixnum c) ($char->fixnum #\a)))
|
|
|
|
(fx+ 10 (fx- ($char->fixnum c) ($char->fixnum #\A))))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define special-initial?
|
|
|
|
(lambda (c)
|
|
|
|
(memq c '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\= #\> #\? #\^ #\_ #\~))))
|
|
|
|
(define subsequent?
|
|
|
|
(lambda (c)
|
|
|
|
(or (initial? c) (digit? c) (special-subsequent? c))))
|
|
|
|
(define special-subsequent?
|
|
|
|
(lambda (c)
|
|
|
|
(memq c '(#\+ #\- #\. #\@))))
|
|
|
|
(define tokenize-identifier
|
|
|
|
(lambda (ls p)
|
2007-12-05 05:01:56 -05:00
|
|
|
(let ([c (peek-char p)])
|
2006-11-23 19:33:45 -05:00
|
|
|
(cond
|
|
|
|
[(eof-object? c) ls]
|
|
|
|
[(subsequent? c)
|
2007-12-05 05:01:56 -05:00
|
|
|
(tokenize-identifier (cons (read-char p) ls) p)]
|
2006-11-23 19:33:45 -05:00
|
|
|
[(delimiter? c)
|
|
|
|
ls]
|
2007-12-05 05:01:56 -05:00
|
|
|
[(char=? c #\\)
|
|
|
|
(read-char p)
|
|
|
|
(tokenize-backslash ls p)]
|
2006-11-23 19:33:45 -05:00
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid identifier syntax"
|
|
|
|
(list->string (reverse (cons c ls))))]))))
|
2007-12-02 06:58:33 -05:00
|
|
|
(define (tokenize-string ls p)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof inside string")]
|
2007-12-02 06:58:33 -05:00
|
|
|
[else (tokenize-string-char ls p c)])))
|
|
|
|
(define (tokenize-string-char ls p c)
|
|
|
|
(define (intraline-whitespace? c)
|
|
|
|
(or (eqv? c #\x9)
|
|
|
|
(eq? (char-general-category c) 'Zs)))
|
|
|
|
(define (tokenize-string-continue ls p c)
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof inside string")]
|
2007-12-02 06:58:33 -05:00
|
|
|
[(intraline-whitespace? c)
|
|
|
|
(let f ()
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof inside string")]
|
2007-12-02 06:58:33 -05:00
|
|
|
[(intraline-whitespace? c) (f)]
|
|
|
|
[else (tokenize-string-char ls p c)])))]
|
|
|
|
[else (tokenize-string-char ls p c)]))
|
|
|
|
(cond
|
|
|
|
[($char= #\" c) ls]
|
|
|
|
[($char= #\\ c)
|
2006-11-23 19:33:45 -05:00
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
2007-12-02 06:58:33 -05:00
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof after string escape")]
|
2007-12-02 06:58:33 -05:00
|
|
|
[($char= #\a c) (tokenize-string (cons #\x7 ls) p)]
|
|
|
|
[($char= #\b c) (tokenize-string (cons #\x8 ls) p)]
|
|
|
|
[($char= #\t c) (tokenize-string (cons #\x9 ls) p)]
|
|
|
|
[($char= #\n c) (tokenize-string (cons #\xA ls) p)]
|
|
|
|
[($char= #\v c) (tokenize-string (cons #\xB ls) p)]
|
|
|
|
[($char= #\f c) (tokenize-string (cons #\xC ls) p)]
|
|
|
|
[($char= #\r c) (tokenize-string (cons #\xD ls) p)]
|
|
|
|
[($char= #\" c) (tokenize-string (cons #\x22 ls) p)]
|
|
|
|
[($char= #\\ c) (tokenize-string (cons #\x5C ls) p)]
|
|
|
|
[($char= #\x c) ;;; unicode escape \xXXX;
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof inside string")]
|
2007-12-02 06:58:33 -05:00
|
|
|
[(hex c) =>
|
|
|
|
(lambda (n)
|
2008-04-28 15:01:45 -04:00
|
|
|
(let f ([n n] [ac (cons c '(#\x))])
|
2007-12-02 06:58:33 -05:00
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? n)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof inside string")]
|
2007-12-02 06:58:33 -05:00
|
|
|
[(hex c) =>
|
2008-04-28 15:01:45 -04:00
|
|
|
(lambda (v) (f (+ (* n 16) v) (cons c ac)))]
|
2007-12-02 06:58:33 -05:00
|
|
|
[($char= c #\;)
|
|
|
|
(tokenize-string
|
2008-04-28 15:01:45 -04:00
|
|
|
(cons (checked-integer->char n ac p) ls) p)]
|
2007-12-02 06:58:33 -05:00
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-12-02 06:58:33 -05:00
|
|
|
"invalid char in escape sequence"
|
2008-04-28 15:01:45 -04:00
|
|
|
(list->string (reverse (cons c ac))))]))))]
|
2007-12-02 06:58:33 -05:00
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-12-02 06:58:33 -05:00
|
|
|
"invalid char in escape sequence" c)]))]
|
|
|
|
[(intraline-whitespace? c)
|
|
|
|
(let f ()
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof inside string")]
|
2007-12-02 06:58:33 -05:00
|
|
|
[(intraline-whitespace? c) (f)]
|
|
|
|
[(memv c '(#\xA #\x85 #\x2028))
|
|
|
|
(tokenize-string-continue ls p (read-char p))]
|
|
|
|
[(memv c '(#\xD))
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(memv c '(#\xA #\x85))
|
|
|
|
(tokenize-string-continue ls p (read-char p))]
|
|
|
|
[else
|
|
|
|
(tokenize-string-continue ls p c)]))]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-12-02 06:58:33 -05:00
|
|
|
"non-whitespace character after escape")])))]
|
|
|
|
[(memv c '(#\xA #\x85 #\x2028))
|
|
|
|
(tokenize-string-continue ls p (read-char p))]
|
|
|
|
[(memv c '(#\xD))
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(memv c '(#\xA #\x85))
|
|
|
|
(tokenize-string-continue ls p (read-char p))]
|
|
|
|
[else
|
|
|
|
(tokenize-string-continue ls p c)]))]
|
2007-12-18 19:52:15 -05:00
|
|
|
[else (die/p-1 p 'tokenize "invalid string escape" c)]))]
|
2007-12-02 06:58:33 -05:00
|
|
|
[(memv c '(#\xA #\x85 #\x2028))
|
|
|
|
(tokenize-string (cons #\linefeed ls) p)]
|
|
|
|
[(memv c '(#\xD))
|
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(when (memv c '(#\xA #\x85))
|
|
|
|
(read-char p))
|
|
|
|
(tokenize-string (cons #\linefeed ls) p))]
|
|
|
|
[else
|
|
|
|
(tokenize-string (cons c ls) p)]))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define skip-comment
|
|
|
|
(lambda (p)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(unless (eof-object? c)
|
2006-11-23 19:38:26 -05:00
|
|
|
(let ([i ($char->fixnum c)])
|
2006-11-23 19:33:45 -05:00
|
|
|
(unless (or (fx= i 10) (fx= i 13))
|
|
|
|
(skip-comment p)))))))
|
|
|
|
(define tokenize-dot
|
|
|
|
(lambda (p)
|
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c) 'dot]
|
|
|
|
[(delimiter? c) 'dot]
|
2006-11-23 19:48:14 -05:00
|
|
|
[($char= c #\.) ; this is second dot
|
2006-11-23 19:33:45 -05:00
|
|
|
(read-char p)
|
2007-12-05 05:01:56 -05:00
|
|
|
(let ([c (peek-char p)])
|
2006-11-23 19:33:45 -05:00
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid syntax .. near end of file")]
|
2006-11-23 19:48:14 -05:00
|
|
|
[($char= c #\.) ; this is the third
|
2007-12-05 05:01:56 -05:00
|
|
|
(read-char p)
|
2006-11-23 19:33:45 -05:00
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(cond
|
2007-01-20 19:26:17 -05:00
|
|
|
[(eof-object? c) '(datum . ...)]
|
|
|
|
[(delimiter? c) '(datum . ...)]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid syntax"
|
|
|
|
(string-append "..." (string c)))]))]
|
2006-11-23 19:33:45 -05:00
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid syntax"
|
|
|
|
(string-append ".." (string c)))]))]
|
2007-06-14 11:56:47 -04:00
|
|
|
[else
|
2008-06-02 03:01:59 -04:00
|
|
|
(cons 'datum
|
|
|
|
(dot p '(#\.) 10 #f #f))]))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define tokenize-char*
|
|
|
|
(lambda (i str p d)
|
|
|
|
(cond
|
|
|
|
[(fx= i (string-length str))
|
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c) d]
|
|
|
|
[(delimiter? c) d]
|
2007-12-18 19:52:15 -05:00
|
|
|
[else (die/p p 'tokenize "invalid character after sequence"
|
2008-04-28 15:01:45 -04:00
|
|
|
(string-append str (string c)))]))]
|
2006-11-23 19:33:45 -05:00
|
|
|
[else
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof in the middle of expected sequence" str)]
|
2006-11-23 19:48:14 -05:00
|
|
|
[($char= c (string-ref str i))
|
2006-11-23 19:33:45 -05:00
|
|
|
(tokenize-char* (fxadd1 i) str p d)]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
|
|
|
"invalid char while scanning string"
|
|
|
|
c str)]))])))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define tokenize-char-seq
|
|
|
|
(lambda (p str d)
|
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c) (cons 'datum (string-ref str 0))]
|
|
|
|
[(delimiter? c) (cons 'datum (string-ref str 0))]
|
2006-11-23 19:48:14 -05:00
|
|
|
[($char= (string-ref str 1) c)
|
2006-11-23 19:33:45 -05:00
|
|
|
(read-char p)
|
|
|
|
(tokenize-char* 2 str p d)]
|
2007-12-18 19:52:15 -05:00
|
|
|
[else (die/p p 'tokenize "invalid syntax"
|
|
|
|
(string-ref str 0) c)]))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define tokenize-char
|
|
|
|
(lambda (p)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid #\\ near end of file")]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\n c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(let ([c (peek-char p)])
|
2007-11-07 11:24:18 -05:00
|
|
|
(cond
|
2007-11-07 11:41:28 -05:00
|
|
|
[(eof-object? c)
|
|
|
|
(read-char p)
|
|
|
|
'(datum . #\n)]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\u c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(read-char p)
|
|
|
|
(tokenize-char-seq p "ul" '(datum . #\x0))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\e c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(read-char p)
|
|
|
|
(tokenize-char-seq p "ewline" '(datum . #\xA))]
|
|
|
|
[(delimiter? c)
|
|
|
|
'(datum . #\n)]
|
2007-11-07 11:24:18 -05:00
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid syntax"
|
2007-11-07 11:24:18 -05:00
|
|
|
(string #\# #\\ #\n c))]))]
|
|
|
|
[(eqv? #\a c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "alarm" '(datum . #\x7))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\b c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "backspace" '(datum . #\x8))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\t c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "tab" '(datum . #\x9))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\l c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "linefeed" '(datum . #\xA))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\v c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "vtab" '(datum . #\xB))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\p c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "page" '(datum . #\xC))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\r c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "return" '(datum . #\xD))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\e c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "esc" '(datum . #\x1B))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\s c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "space" '(datum . #\x20))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\d c)
|
2007-11-07 11:41:28 -05:00
|
|
|
(tokenize-char-seq p "delete" '(datum . #\x7F))]
|
2007-11-07 11:24:18 -05:00
|
|
|
[(eqv? #\x c)
|
2007-06-01 22:17:22 -04:00
|
|
|
(let ([n (peek-char p)])
|
|
|
|
(cond
|
|
|
|
[(or (eof-object? n) (delimiter? n))
|
|
|
|
'(datum . #\x)]
|
|
|
|
[(hex n) =>
|
2008-04-28 15:01:45 -04:00
|
|
|
(lambda (v)
|
2007-06-01 22:17:22 -04:00
|
|
|
(read-char p)
|
2008-04-28 15:01:45 -04:00
|
|
|
(let f ([v v] [ac (cons n '(#\x))])
|
2007-12-05 05:01:56 -05:00
|
|
|
(let ([c (peek-char p)])
|
2007-06-01 22:17:22 -04:00
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2008-04-28 15:01:45 -04:00
|
|
|
(cons 'datum (checked-integer->char v ac p))]
|
2007-06-01 22:17:22 -04:00
|
|
|
[(delimiter? c)
|
2008-04-28 15:01:45 -04:00
|
|
|
(cons 'datum (checked-integer->char v ac p))]
|
2007-06-01 22:17:22 -04:00
|
|
|
[(hex c) =>
|
|
|
|
(lambda (v0)
|
2007-12-05 05:01:56 -05:00
|
|
|
(read-char p)
|
2008-04-28 15:01:45 -04:00
|
|
|
(f (+ (* v 16) v0) (cons c ac)))]
|
2007-06-01 22:17:22 -04:00
|
|
|
[else
|
2008-04-28 15:01:45 -04:00
|
|
|
(die/p p 'tokenize
|
|
|
|
"invalid character sequence"
|
|
|
|
(list->string (reverse (cons c ac))))]))))]
|
2007-06-01 22:17:22 -04:00
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid character sequence"
|
2007-10-25 14:32:26 -04:00
|
|
|
(string-append "#\\" (string n)))]))]
|
2006-11-23 19:33:45 -05:00
|
|
|
[else
|
|
|
|
(let ([n (peek-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? n) (cons 'datum c)]
|
|
|
|
[(delimiter? n) (cons 'datum c)]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid syntax"
|
|
|
|
(string-append "#\\" (string c n)))]))]))))
|
2007-06-01 22:17:22 -04:00
|
|
|
(define (hex x)
|
|
|
|
(cond
|
|
|
|
[(and ($char<= #\0 x) ($char<= x #\9))
|
|
|
|
($fx- ($char->fixnum x) ($char->fixnum #\0))]
|
2008-04-28 15:01:45 -04:00
|
|
|
[(and ($char<= #\a x) ($char<= x #\f))
|
2007-06-01 22:17:22 -04:00
|
|
|
($fx- ($char->fixnum x)
|
|
|
|
($fx- ($char->fixnum #\a) 10))]
|
2008-04-28 15:01:45 -04:00
|
|
|
[(and ($char<= #\A x) ($char<= x #\F))
|
|
|
|
($fx- ($char->fixnum x)
|
2007-06-01 22:17:22 -04:00
|
|
|
($fx- ($char->fixnum #\A) 10))]
|
|
|
|
[else #f]))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define multiline-error
|
2007-12-18 19:52:15 -05:00
|
|
|
(lambda (p)
|
|
|
|
(die/p p 'tokenize
|
|
|
|
"end of file encountered while inside a #|-style comment")))
|
2006-12-02 10:11:57 -05:00
|
|
|
(define apprev
|
|
|
|
(lambda (str i ac)
|
|
|
|
(cond
|
|
|
|
[(fx= i (string-length str)) ac]
|
|
|
|
[else
|
|
|
|
(apprev str (fx+ i 1) (cons (string-ref str i) ac))])))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define multiline-comment
|
|
|
|
(lambda (p)
|
2006-12-02 10:11:57 -05:00
|
|
|
(define f
|
|
|
|
(lambda (p ac)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
2007-12-18 19:52:15 -05:00
|
|
|
[(eof-object? c) (multiline-error p)]
|
2006-12-02 10:11:57 -05:00
|
|
|
[($char= #\| c)
|
2008-03-15 21:12:43 -04:00
|
|
|
(let g ([c (read-char p)] [ac ac])
|
2006-12-02 10:11:57 -05:00
|
|
|
(cond
|
2007-12-18 19:52:15 -05:00
|
|
|
[(eof-object? c) (multiline-error p)]
|
2006-12-02 10:11:57 -05:00
|
|
|
[($char= #\# c) ac]
|
2008-03-15 21:12:43 -04:00
|
|
|
[($char= #\| c)
|
|
|
|
(g (read-char p) (cons c ac))]
|
2006-12-02 10:11:57 -05:00
|
|
|
[else (f p (cons c ac))]))]
|
|
|
|
[($char= #\# c)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
2007-12-18 19:52:15 -05:00
|
|
|
[(eof-object? c) (multiline-error p)]
|
2006-12-02 10:11:57 -05:00
|
|
|
[($char= #\| c)
|
|
|
|
(let ([v (multiline-comment p)])
|
|
|
|
(if (string? v)
|
|
|
|
(f p (apprev v 0 ac))
|
|
|
|
(f p ac)))]
|
|
|
|
[else
|
|
|
|
(f p (cons c (cons #\# ac)))]))]
|
|
|
|
[else (f p (cons c ac))]))))
|
|
|
|
(let ([ac (f p '())])
|
|
|
|
((comment-handler)
|
|
|
|
(list->string (reverse ac))))))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define tokenize-hash
|
|
|
|
(lambda (p)
|
2006-12-02 05:02:05 -05:00
|
|
|
(tokenize-hash/c (read-char p) p)))
|
2006-12-25 03:18:37 -05:00
|
|
|
(define (skip-whitespace p caller)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof inside" caller)]
|
2006-12-25 03:18:37 -05:00
|
|
|
[(char-whitespace? c)
|
|
|
|
(skip-whitespace p caller)]
|
|
|
|
[else c])))
|
2006-12-02 05:02:05 -05:00
|
|
|
(define tokenize-hash/c
|
|
|
|
(lambda (c p)
|
|
|
|
(cond
|
2007-12-18 19:52:15 -05:00
|
|
|
[(eof-object? c) (die/p p 'tokenize "invalid # near end of file")]
|
2007-05-20 22:16:57 -04:00
|
|
|
[(memq c '(#\t #\T))
|
2006-12-02 05:02:05 -05:00
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c) '(datum . #t)]
|
|
|
|
[(delimiter? c) '(datum . #t)]
|
2007-12-18 19:52:15 -05:00
|
|
|
[else (die/p p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
(format "invalid syntax near #~a" c))]))]
|
2007-05-20 22:16:57 -04:00
|
|
|
[(memq c '(#\f #\F))
|
2006-12-02 05:02:05 -05:00
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c) '(datum . #f)]
|
|
|
|
[(delimiter? c) '(datum . #f)]
|
2007-12-18 19:52:15 -05:00
|
|
|
[else (die/p p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
(format "invalid syntax near #~a" c))]))]
|
2006-12-02 05:02:05 -05:00
|
|
|
[($char= #\\ c) (tokenize-char p)]
|
|
|
|
[($char= #\( c) 'vparen]
|
|
|
|
[($char= #\' c) '(macro . syntax)]
|
2007-08-30 20:17:23 -04:00
|
|
|
[($char= #\` c) '(macro . quasisyntax)]
|
|
|
|
[($char= #\, c)
|
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(cond
|
|
|
|
[(eqv? c #\@) (read-char p)
|
|
|
|
'(macro . unsyntax-splicing)]
|
|
|
|
[else '(macro . unsyntax)]))]
|
2006-12-02 05:02:05 -05:00
|
|
|
[($char= #\! c)
|
|
|
|
(let ([e (read-char p)])
|
|
|
|
(when (eof-object? e)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof near #!"))
|
2007-11-18 19:53:32 -05:00
|
|
|
(case e
|
|
|
|
[(#\e)
|
2007-11-25 16:23:39 -05:00
|
|
|
(when (eq? (port-mode p) 'r6rs-mode)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize "invalid syntax: #!e"))
|
2007-11-18 19:53:32 -05:00
|
|
|
(read-char* p '(#\e) "of" "eof sequence" #f #f)
|
|
|
|
(cons 'datum (eof-object))]
|
|
|
|
[(#\r)
|
|
|
|
(read-char* p '(#\r) "6rs" "#!r6rs comment" #f #f)
|
|
|
|
(set-port-mode! p 'r6rs-mode)
|
2007-12-18 22:06:58 -05:00
|
|
|
(tokenize/1 p)]
|
2007-11-18 19:53:32 -05:00
|
|
|
[(#\i)
|
|
|
|
(read-char* p '(#\i) "karus" "#!ikarus comment" #f #f)
|
|
|
|
(set-port-mode! p 'ikarus-mode)
|
2007-12-18 22:06:58 -05:00
|
|
|
(tokenize/1 p)]
|
2007-11-18 19:53:32 -05:00
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-11-18 19:53:32 -05:00
|
|
|
(format "invalid syntax near #!~a" e))]))]
|
2006-12-02 05:02:05 -05:00
|
|
|
[(digit? c)
|
2007-11-18 19:53:32 -05:00
|
|
|
(when (eq? (port-mode p) 'r6rs-mode)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize "graph syntax is invalid in #!r6rs mode"
|
2007-11-18 19:53:32 -05:00
|
|
|
(format "#~a" c)))
|
2006-12-02 05:02:05 -05:00
|
|
|
(tokenize-hashnum p (char->num c))]
|
2006-12-25 03:33:03 -05:00
|
|
|
[($char= #\: c)
|
2007-11-18 19:53:32 -05:00
|
|
|
(when (eq? (port-mode p) 'r6rs-mode)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize "gensym syntax is invalid in #!r6rs mode"
|
2007-11-18 19:53:32 -05:00
|
|
|
(format "#~a" c)))
|
2006-12-25 03:33:03 -05:00
|
|
|
(let* ([c (skip-whitespace p "gensym")]
|
|
|
|
[id0
|
|
|
|
(cond
|
|
|
|
[(initial? c)
|
|
|
|
(list->string
|
|
|
|
(reverse (tokenize-identifier (cons c '()) p)))]
|
|
|
|
[($char= #\| c)
|
|
|
|
(list->string
|
|
|
|
(reverse (tokenize-bar p '())))]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
"invalid char inside gensym" c)])])
|
2006-12-25 03:33:03 -05:00
|
|
|
(cons 'datum (gensym id0)))]
|
2006-12-25 03:18:37 -05:00
|
|
|
[($char= #\{ c)
|
2007-11-18 19:53:32 -05:00
|
|
|
(when (eq? (port-mode p) 'r6rs-mode)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize "gensym syntax is invalid in #!r6rs mode"
|
2007-11-18 19:53:32 -05:00
|
|
|
(format "#~a" c)))
|
2006-12-25 03:18:37 -05:00
|
|
|
(let* ([c (skip-whitespace p "gensym")]
|
|
|
|
[id0
|
|
|
|
(cond
|
|
|
|
[(initial? c)
|
|
|
|
(list->string
|
|
|
|
(reverse (tokenize-identifier (cons c '()) p)))]
|
|
|
|
[($char= #\| c)
|
|
|
|
(list->string
|
|
|
|
(reverse (tokenize-bar p '())))]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
"invalid char inside gensym" c)])]
|
2006-12-25 03:18:37 -05:00
|
|
|
[c (skip-whitespace p "gensym")])
|
|
|
|
(cond
|
|
|
|
[($char= #\} c)
|
2007-10-30 17:45:08 -04:00
|
|
|
(cons 'datum
|
|
|
|
(foreign-call "ikrt_strings_to_gensym" #f id0))]
|
2006-12-25 03:18:37 -05:00
|
|
|
[else
|
|
|
|
(let ([id1
|
|
|
|
(cond
|
|
|
|
[(initial? c)
|
|
|
|
(list->string
|
|
|
|
(reverse
|
|
|
|
(tokenize-identifier
|
|
|
|
(cons c '()) p)))]
|
|
|
|
[($char= #\| c)
|
|
|
|
(list->string
|
|
|
|
(reverse (tokenize-bar p '())))]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
"invalid char inside gensym" c)])])
|
2006-12-25 03:18:37 -05:00
|
|
|
(let ([c (skip-whitespace p "gensym")])
|
|
|
|
(cond
|
|
|
|
[($char= #\} c)
|
|
|
|
(cons 'datum
|
|
|
|
(foreign-call "ikrt_strings_to_gensym"
|
|
|
|
id0 id1))]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
"invalid char inside gensym" c)])))]))]
|
2007-05-15 23:57:35 -04:00
|
|
|
[($char= #\v c)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[($char= #\u c)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[($char= c #\8)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[($char= c #\() 'vu8]
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof object after #vu8")]
|
|
|
|
[else (die/p-1 p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
(format "invalid sequence #vu8~a" c))]))]
|
2007-05-15 23:57:35 -04:00
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof object after #vu")]
|
|
|
|
[else (die/p-1 p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
(format "invalid sequence #vu~a" c))]))]
|
2007-05-15 23:57:35 -04:00
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof object after #v")]
|
|
|
|
[else (die/p p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
(format "invalid sequence #v~a" c))]))]
|
2007-06-14 11:56:47 -04:00
|
|
|
[(memq c '(#\e #\E))
|
2008-06-02 03:01:59 -04:00
|
|
|
(cons 'datum (parse-string p (list c #\#) 10 #f 'e))]
|
2007-06-14 11:56:47 -04:00
|
|
|
[(memq c '(#\i #\I))
|
2008-06-02 03:01:59 -04:00
|
|
|
(cons 'datum (parse-string p (list c #\#) 10 #f 'i))]
|
2007-06-14 11:56:47 -04:00
|
|
|
[(memq c '(#\b #\B))
|
2008-06-02 03:01:59 -04:00
|
|
|
(cons 'datum (parse-string p (list c #\#) 2 2 #f))]
|
2007-06-14 11:56:47 -04:00
|
|
|
[(memq c '(#\x #\X))
|
2008-06-02 03:01:59 -04:00
|
|
|
(cons 'datum (parse-string p (list c #\#) 16 16 #f))]
|
2007-06-14 11:56:47 -04:00
|
|
|
[(memq c '(#\o #\O))
|
2008-06-02 03:01:59 -04:00
|
|
|
(cons 'datum (parse-string p (list c #\#) 8 8 #f))]
|
2007-06-14 11:56:47 -04:00
|
|
|
[(memq c '(#\d #\D))
|
2008-06-02 03:01:59 -04:00
|
|
|
(cons 'datum (parse-string p (list c #\#) 10 10 #f))]
|
2006-12-25 22:21:07 -05:00
|
|
|
[($char= #\@ c)
|
2007-11-18 19:53:32 -05:00
|
|
|
(when (eq? (port-mode p) 'r6rs-mode)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize "fasl syntax is invalid in #!r6rs mode"
|
2007-11-18 19:53:32 -05:00
|
|
|
(format "#~a" c)))
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'read "FIXME: fasl read disabled")
|
2007-04-29 22:29:42 -04:00
|
|
|
'(cons 'datum ($fasl-read p))]
|
2006-12-02 05:02:05 -05:00
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-10-25 14:32:26 -04:00
|
|
|
(format "invalid syntax #~a" c))])))
|
2008-06-02 03:01:59 -04:00
|
|
|
|
|
|
|
(define (num-error p str ls)
|
|
|
|
(die/p-1 p 'read "invalid numeric sequence"
|
|
|
|
(list->string (reverse ls))))
|
|
|
|
|
|
|
|
(define-syntax port-config
|
|
|
|
(syntax-rules (GEN-TEST GEN-ARGS FAIL)
|
|
|
|
[(_ GEN-ARGS k . rest) (k (p ac) . rest)]
|
|
|
|
[(_ FAIL (p ac))
|
|
|
|
(num-error p "invalid numeric sequence" ac)]
|
|
|
|
[(_ GEN-TEST var next (p ac) eof-case char-case)
|
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(if (or (eof-object? c) (delimiter? c))
|
|
|
|
eof-case
|
|
|
|
(let ([var c])
|
|
|
|
(define-syntax next
|
|
|
|
(syntax-rules ()
|
|
|
|
[(_ who args (... ...))
|
|
|
|
(who p (cons (get-char p) ac) args (... ...))]))
|
|
|
|
char-case)))]))
|
|
|
|
|
|
|
|
(define-string->number-parser port-config
|
|
|
|
(parse-string digit+ sign dot))
|
|
|
|
|
2007-11-18 19:53:32 -05:00
|
|
|
(define (read-char* p ls str who ci? delimited?)
|
|
|
|
(let f ([i 0] [ls ls])
|
|
|
|
(cond
|
|
|
|
[(fx= i (string-length str))
|
|
|
|
(when delimited?
|
|
|
|
(let ([c (peek-char p)])
|
|
|
|
(when (and (not (eof-object? c)) (not (delimiter? c)))
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize
|
2007-11-18 19:53:32 -05:00
|
|
|
(format "invalid ~a: ~s" who
|
|
|
|
(list->string (reverse (cons c ls))))))))]
|
|
|
|
[else
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
2007-12-05 05:01:56 -05:00
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize
|
2007-12-05 05:01:56 -05:00
|
|
|
(format "invalid eof inside ~a" who))]
|
|
|
|
[(or (and (not ci?) (char=? c (string-ref str i)))
|
|
|
|
(and ci? (char=? (char-downcase c) (string-ref str i))))
|
|
|
|
(f (add1 i) (cons c ls))]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize
|
2007-12-05 05:01:56 -05:00
|
|
|
(format "invalid ~a: ~s" who
|
|
|
|
(list->string (reverse (cons c ls)))))]))])))
|
2006-11-23 19:48:14 -05:00
|
|
|
(define (tokenize-hashnum p n)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof inside #n mark/ref")]
|
2006-11-23 19:48:14 -05:00
|
|
|
[($char= #\= c) (cons 'mark n)]
|
|
|
|
[($char= #\# c) (cons 'ref n)]
|
|
|
|
[(digit? c)
|
|
|
|
(tokenize-hashnum p (fx+ (fx* n 10) (char->num c)))]
|
|
|
|
[else
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p-1 p 'tokenize "invalid char while inside a #n mark/ref" c)])))
|
2006-11-23 19:33:45 -05:00
|
|
|
(define tokenize-bar
|
|
|
|
(lambda (p ac)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "unexpected eof while reading symbol")]
|
2006-11-23 19:48:14 -05:00
|
|
|
[($char= #\\ c)
|
2006-11-23 19:33:45 -05:00
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "unexpected eof while reading symbol")]
|
2006-11-23 19:33:45 -05:00
|
|
|
[else (tokenize-bar p (cons c ac))]))]
|
2006-11-23 19:48:14 -05:00
|
|
|
[($char= #\| c) ac]
|
2006-11-23 19:33:45 -05:00
|
|
|
[else (tokenize-bar p (cons c ac))]))))
|
2007-11-19 02:00:26 -05:00
|
|
|
(define (tokenize-backslash main-ac p)
|
2007-06-01 22:17:22 -04:00
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof after symbol escape")]
|
2007-06-01 22:17:22 -04:00
|
|
|
[($char= #\x c)
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize "invalid eof after \\x")]
|
2007-06-01 22:17:22 -04:00
|
|
|
[(hex c) =>
|
|
|
|
(lambda (v)
|
|
|
|
(let f ([v v] [ac `(,c #\x #\\)])
|
|
|
|
(let ([c (read-char p)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? c)
|
2007-12-18 19:52:15 -05:00
|
|
|
(die/p p 'tokenize
|
|