* Added an R6RS todo list

* fixed some problems (and introduced others) in the reader.
This commit is contained in:
Abdulaziz Ghuloum 2007-05-20 22:16:57 -04:00
parent aa98df6c4c
commit d062baee17
4 changed files with 244 additions and 189 deletions

89
R6RS-TODO.txt Normal file
View File

@ -0,0 +1,89 @@
TODO for (R6RS BASE)
* Compiler + libraries:
- letrec and letrec* restrictions (references and multiple returns)
- prohibit export of mutable bindings
- library versioning
- library phases (simply ignore)
- internal imports
- Recognize (define x)
- Add let-syntax and letrec-syntax
- Add identifier-syntax
- Add do, let*-values.
* Numerics:
- Add rational, complex, and single-precision numbers
- Make sure the following primitives work:
rationalize
numerator denominator
make-rectangular make-polar
real-part imag-part
magnitude angle
expt log
sin cos tan
asin acos atan
sqrt exact-integer-sqrt
number? complex? real? rational? integer?
real-valued? rational-valued? integer-values?
exact? inexact? ->exact ->inexact
real->flonum real->single real->double
= < <= > >=
zero? positive? negative?
odd? even?
finite? infinite? nan?
min max abs
+ - * /
div mod div-and-mod
div0 mod0 div0-and-mod0
gcd lcm
floor ceiling truncate round
number->string(radix,precision) string->number(radix)
* Read:
- recognize rational, complex and flonum syntax
- recognize inline-hex sequences (strings, chars, and symbols)
- #!r6rs
- #\ sequnces: nul, alarm, backspace, tab, linefeed, vtab, page,
return, esc, space, delete
- respect unicode constituents
- recognize \a, \b, \t, \n, \v, \f, \r, \", \\, \<lf>, \<spc>,
\xXXX; sequences in strings.
- Add unsyntax, unsyntax-splicing, and quasisyntax.
* Bytevectors:
- equal? for bytevectors
* Strings:
- string-copy
- string-fill!
* Vectors:
- vector-fill!
- vector-map
- vector-for-each
* Errors:
- (error who msg irritants ...)
- (assertion-violation who message irritants ...)
Completed for (R6RS BASE):

Binary file not shown.

View File

@ -43,54 +43,6 @@
(define special-subsequent? (define special-subsequent?
(lambda (c) (lambda (c)
(memq c '(#\+ #\- #\. #\@)))) (memq c '(#\+ #\- #\. #\@))))
(define tokenize-number
(lambda (n p)
(let ([c (read-char p)])
(cond
[(eof-object? c) n]
[(digit? c)
(tokenize-number (+ (* n 10) (char->num c)) p)]
[($char= c #\.)
(tokenize-flonum/with-digits n p)]
[(delimiter? c)
(unread-char c p)
n]
[else
(unread-char c p)
(error 'tokenize "invalid number syntax: ~a~a" n c)]))))
(define tokenize-hex
(lambda (n p)
(let ([c (read-char p)])
(cond
[(eof-object? c) n]
[(digit? c)
(tokenize-hex (+ (* n 16) (char->num c)) p)]
[(af? c)
(tokenize-hex (+ (* n 16) (af->num c)) p)]
[(delimiter? c)
(unread-char c p)
n]
[else
(unread-char c p)
(error 'tokenize "invalid hex number sequence: ~a~a" n c)]))))
(define tokenize-hex-init
(lambda (p)
(let ([c (read-char p)])
(cond
[(eof-object? c)
(unread-char c p)
(error 'tokenize "invalid #x near end of file")]
[(digit? c)
(cons 'datum (tokenize-hex (char->num c) p))]
[(af? c)
(cons 'datum (tokenize-hex (af->num c) p))]
[($char= c #\-)
(cons 'datum (* -1 (tokenize-hex 0 p)))]
[($char= c #\+)
(cons 'datum (tokenize-hex 0 p))]
[else
(unread-char c p)
(error 'tokenize "invalid number syntax: #x~a" c)]))))
(define tokenize-identifier (define tokenize-identifier
(lambda (ls p) (lambda (ls p)
(let ([c (read-char p)]) (let ([c (read-char p)])
@ -130,77 +82,6 @@
(let ([i ($char->fixnum c)]) (let ([i ($char->fixnum c)])
(unless (or (fx= i 10) (fx= i 13)) (unless (or (fx= i 10) (fx= i 13))
(skip-comment p))))))) (skip-comment p)))))))
(define (ls->flonum ls pos?)
(let ([str (if pos?
(list->string
(cons #\. (reverse ls)))
(list->string
(list* #\- #\. (reverse ls))))])
(string->flonum str)))
(define (tokenize-flonum ls pos? p)
(let ([c (read-char p)])
(cond
[(eof-object? c) (ls->flonum ls pos?)]
[(digit? c) (tokenize-flonum (cons c ls) pos? p)]
[(delimiter? c)
(unread-char c p)
(ls->flonum ls pos?)]
[else
(unread-char c p)
(error 'tokenize "invalid char ~a after flonum" c)])))
(define (tokenize-flonum/no-digits pos? p)
(let ([c (read-char p)])
(cond
[(eof-object? c)
(error 'tokenize "invalid eof")]
[(digit? c)
(tokenize-flonum (list c) pos? p)]
[else
(unread-char c p)
(error 'tokenize "invalid char ~a after decimal point" c)])))
(define (tokenize-flonum/with-digits n p)
(let ([c (read-char p)])
(cond
[(eof-object? c) (+ n (string->flonum "0.0"))]
[(digit? c)
(+ n (tokenize-flonum (list c) #t p))]
[(delimiter? c)
(unread-char c p)
(+ n (string->flonum "0.0"))]
[else
(unread-char c p)
(error 'tokenize "invalid char ~a after decimal point" c)])))
(define tokenize-plus
(lambda (p)
(let ([c (peek-char p)])
(cond
[(eof-object? c) '(datum . +)]
[(delimiter? c) '(datum . +)]
[(digit? c)
(read-char p)
(cons 'datum (tokenize-number (char->num c) p))]
[($char= c #\.)
(read-char p)
(cons 'datum (tokenize-flonum/no-digits #t p))]
[else (error 'tokenize "invalid sequence +~a" c)]))))
(define tokenize-minus
(lambda (p)
(let ([c (peek-char p)])
(cond
[(eof-object? c) '(datum . -)]
[(delimiter? c) '(datum . -)]
[(digit? c)
(read-char p)
(cons 'datum (* -1 (tokenize-number (char->num c) p)))]
[($char= c #\.)
(read-char p)
(cons 'datum (tokenize-flonum/no-digits #f p))]
[($char= c #\>)
(read-char p)
(let ([ls (tokenize-identifier '() p)])
(let ([str (list->string (list* #\- #\> (reverse ls)))])
(cons 'datum (string->symbol str))))]
[else (error 'tokenize "invalid sequence -~a" c)]))))
(define tokenize-dot (define tokenize-dot
(lambda (p) (lambda (p)
(let ([c (peek-char p)]) (let ([c (peek-char p)])
@ -223,11 +104,12 @@
[else [else
(unread-char c p) (unread-char c p)
(error 'tokenize "invalid syntax ..~a" c)]))] (error 'tokenize "invalid syntax ..~a" c)]))]
[(digit? c) [else (tokenize-real p (cons #\. (tok-real-decpt p)))]))))
(read-char p) (define (tokenize-real p ls)
(cons 'datum (tokenize-flonum (list c) #t p))] (let ([str (list->string ls)])
[else (cond
(error 'tokenize "invalid syntax .~a" c)])))) [(string->number str) => (lambda (n) (cons 'datum n))]
[else (error 'tokenize "invalid numeric sequence ~a" str)])))
(define tokenize-char* (define tokenize-char*
(lambda (i str p d) (lambda (i str p d)
(cond (cond
@ -317,18 +199,6 @@
(let ([ac (f p '())]) (let ([ac (f p '())])
((comment-handler) ((comment-handler)
(list->string (reverse ac)))))) (list->string (reverse ac))))))
(define read-binary
(lambda (ac chars p)
(let ([c (read-char p)])
(cond
[(eof-object? c) ac]
[($char= #\0 c) (read-binary (* ac 2) (cons c chars) p)]
[($char= #\1 c) (read-binary (+ (* ac 2) 1) (cons c chars) p)]
[(delimiter? c) (unread-char c p) ac]
[else
(unread-char c p)
(error 'tokenize "invalid syntax #b~a"
(list->string (reverse (cons c chars))))]))))
(define tokenize-hash (define tokenize-hash
(lambda (p) (lambda (p)
(tokenize-hash/c (read-char p) p))) (tokenize-hash/c (read-char p) p)))
@ -340,20 +210,62 @@
[(char-whitespace? c) [(char-whitespace? c)
(skip-whitespace p caller)] (skip-whitespace p caller)]
[else c]))) [else c])))
(module (tok-exact tok-radix) (module (tok-exact tok-radix tok-real tok-real-sign tok-real-digit tok-real-decpt)
(define (eof-error) (define (eof-error)
(error 'tokenize "eof encountered while reading a number")) (error 'tokenize "eof encountered while reading a number"))
(define (digit? c) (define (digit? c)
(memq c '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 (memq c '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\a #\b #\c #\d #\e #\f #\A #\B #\C #\D #\E #\F))) #\a #\b #\c #\d #\e #\f #\A #\B #\C #\D #\E #\F)))
(define (exponent-marker? c)
(memq c '(#\e #\E #\s #\S #\f #\F #\d #\D #\l #\L)))
(define (tok-complex-sign p) (define (tok-complex-sign p)
(error 'tok-complex-sign "not yet")) (error 'tok-complex-sign "not yet"))
(define (tok-real-decimal p) (define (tok-real-exponent-digit p) ;;; one digit of the exponent of a real part was read
(error 'tok-real-decimal "not yet")) (let ([c (read-char p)])
(define (tok-real-decpt p) (cond
(error 'tok-real-decpt "not yet")) [(eof-object? c) (eof-error)]
(define (tok-real-sign p) [(digit? c) (cons c (tok-real-exponent-digit p))]
;;; we read the sign part of a real number [(delimiter? c) (unread-char c p) '()]
[(memq c '(#\+ #\-))
(cons c (tok-complex-sign p))]
[else (list c)])))
(define (tok-real-exponent-sign p) ;;; an exponent marker and a sign of the real part were read
(let ([c (read-char p)])
(cond
[(eof-object? c) (eof-error)]
[(digit? c) (cons c (tok-real-exponent-digit p))]
[(delimiter? c) (unread-char c p) '()]
[else (list c)])))
(define (tok-real-exponent p) ;;; read after an exponent marker of the real part is read
(let ([c (read-char p)])
(cond
[(eof-object? c) (eof-error)]
[(memq c '(#\- #\+))
(cons c (tok-real-exponent-sign p))]
[(digit? c)
(cons c (tok-real-exponent-digit p))]
[(delimiter? c) (unread-char c p) '()]
[else (list c)])))
(define (tok-real-decimal p) ;;; called after a prefix, a sign, a decimal point, and at least one digit were read
(let ([c (read-char p)])
(cond
[(eof-object? c) '()]
[(digit? c) (cons c (tok-real-decimal p))]
[(delimiter? c) (unread-char c p) '()]
[(memq c '(#\- #\+))
(cons c (tok-complex-sign p))]
[(exponent-marker? c)
(cons c (tok-real-exponent p))]
[else (list c)])))
(define (tok-real-decpt p) ;;; called after a prefix, a sign, and a decimal point with no digits were read
(let ([c (read-char p)])
(cond
[(eof-object? c) (eof-error)]
[(digit? c)
(cons c (tok-real-decimal p))]
[(delimiter? c) (unread-char c p) '()]
[else (list c)])))
(define (tok-real-sign p) ;;; called after a prefix and a sign are read
(let ([c0 (read-char p)]) (let ([c0 (read-char p)])
(cond (cond
[(eof-object? c0) (eof-error)] [(eof-object? c0) (eof-error)]
@ -362,11 +274,10 @@
[(digit? c0) [(digit? c0)
(cons c0 (tok-real-digit p))] (cons c0 (tok-real-digit p))]
[(delimiter? c0) (unread-char c0 p) '()] [(delimiter? c0) (unread-char c0 p) '()]
;[(memq c0 '(#\i)) ;;; +i and -i ;[(memq c0 '(#\i)) ;;; FIXME +i and -i
; (list c0)] ; (list c0)]
[else (list c0)]))) [else (list c0)])))
(define (tok-real-digit p) (define (tok-real-digit p) ;;; called after a prefix, a sign, and at least one digit was read
;;; we did read one digit.
(let ([c0 (read-char p)]) (let ([c0 (read-char p)])
(cond (cond
[(eof-object? c0) '()] [(eof-object? c0) '()]
@ -376,9 +287,11 @@
(cons c0 (tok-complex-sign p))] (cons c0 (tok-complex-sign p))]
[(digit? c0) [(digit? c0)
(cons c0 (tok-real-digit p))] (cons c0 (tok-real-digit p))]
[(exponent-marker? c0)
(cons c0 (tok-real-exponent p))]
[(delimiter? c0) (unread-char c0 p) '()] [(delimiter? c0) (unread-char c0 p) '()]
[else (list c0)]))) [else (list c0)])))
(define (tok-real p) (define (tok-real p) ;;; called after the prefix is read
(let ([c0 (read-char p)]) (let ([c0 (read-char p)])
(cond (cond
[(eof-object? c0) (eof-error)] [(eof-object? c0) (eof-error)]
@ -390,9 +303,20 @@
(cons c0 (tok-real-digit p))] (cons c0 (tok-real-digit p))]
[(delimiter? c0) (unread-char c0 p) '()] [(delimiter? c0) (unread-char c0 p) '()]
[else (list c0)]))) [else (list c0)])))
(define (tok-exact p) (define (tok-exact p) ;;; called after an exactness mark is read
(error 'tokenize-exact "not yet")) (let ([c0 (read-char p)])
(define (tok-radix p) (cond
[(eof-object? c0) (eof-error)]
[(memq c0 '(#\#))
(let ([c1 (read-char p)])
(cond
[(eof-object? c0) (eof-error)]
[(memq c1 '(#\x #\X #\b #\B #\d #\D #\o #\O))
(list* c0 c1 (tok-real p))]
[(delimiter? c1) (unread-char c1 p) (list c0)]
[else (list c0 c1)]))]
[else (unread-char c0 p) (tok-real p)])))
(define (tok-radix p) ;;; called after a radix mark is read
(let ([c0 (peek-char p)]) (let ([c0 (peek-char p)])
(cond (cond
[(eof-object? c0) (eof-error)] [(eof-object? c0) (eof-error)]
@ -410,18 +334,18 @@
(lambda (c p) (lambda (c p)
(cond (cond
[(eof-object? c) (error 'tokenize "invalid # near end of file")] [(eof-object? c) (error 'tokenize "invalid # near end of file")]
[($char= c #\t) [(memq c '(#\t #\T))
(let ([c (peek-char p)]) (let ([c (peek-char p)])
(cond (cond
[(eof-object? c) '(datum . #t)] [(eof-object? c) '(datum . #t)]
[(delimiter? c) '(datum . #t)] [(delimiter? c) '(datum . #t)]
[else (error 'tokenize "invalid syntax near #t")]))] [else (error 'tokenize "invalid syntax near #~a" c)]))]
[($char= c #\f) [(memq c '(#\f #\F))
(let ([c (peek-char p)]) (let ([c (peek-char p)])
(cond (cond
[(eof-object? c) '(datum . #f)] [(eof-object? c) '(datum . #f)]
[(delimiter? c) '(datum . #f)] [(delimiter? c) '(datum . #f)]
[else (error 'tokenize "invalid syntax near #f")]))] [else (error 'tokenize "invalid syntax near #~a" c)]))]
[($char= #\\ c) (tokenize-char p)] [($char= #\\ c) (tokenize-char p)]
[($char= #\( c) 'vparen] [($char= #\( c) 'vparen]
[($char= #\' c) '(macro . syntax)] [($char= #\' c) '(macro . syntax)]
@ -530,34 +454,6 @@
(cond (cond
[(string->number str) => (lambda (n) (cons 'datum n))] [(string->number str) => (lambda (n) (cons 'datum n))]
[else (error 'tokenize "invalid numeric sequence ~a" str)]))] [else (error 'tokenize "invalid numeric sequence ~a" str)]))]
;[($char= #\x c) (tokenize-hex-init p)]
;[(or ($char= #\b c) ($char= #\B c))
; (let ([c (read-char p)])
; (cond
; [(eof-object? c)
; (error 'tokenize "invalid eof while reading #b")]
; [($char= #\- c)
; (let ([c (read-char p)])
; (cond
; [(eof-object? c)
; (error 'tokenize "invalid eof while reading #b-")]
; [($char= #\0 c)
; (cons 'datum
; (* -1 (read-binary 0 '(#\0 #\-) p)))]
; [($char= #\1 c)
; (cons 'datum
; (* -1 (read-binary 1 '(#\1 #\-) p)))]
; [else
; (unread-char c p)
; (error 'tokenize "invalid binary syntax #b-~a" c)]))]
; [($char= #\0 c)
; (cons 'datum (read-binary 0 '(#\0) p))]
; [($char= #\1 c)
; (cons 'datum (read-binary 1 '(#\1) p))]
; [else
; (unread-char c p)
; (error 'tokenize "invalid syntax #b~a" c)]
; ))]
[($char= #\@ c) [($char= #\@ c)
(error 'read "FIXME: fasl read disabled") (error 'read "FIXME: fasl read disabled")
'(cons 'datum ($fasl-read p))] '(cons 'datum ($fasl-read p))]
@ -610,8 +506,7 @@
'(macro . unquote-splicing)] '(macro . unquote-splicing)]
[else '(macro . unquote)]))] [else '(macro . unquote)]))]
[($char= #\# c) (tokenize-hash p)] [($char= #\# c) (tokenize-hash p)]
[(digit? c) [(digit? c) (tokenize-real p (cons c (tok-real-digit p)))]
(cons 'datum (tokenize-number (char->num c) p))]
[(initial? c) [(initial? c)
(let ([ls (reverse (tokenize-identifier (cons c '()) p))]) (let ([ls (reverse (tokenize-identifier (cons c '()) p))])
(cons 'datum (string->symbol (list->string ls))))] (cons 'datum (string->symbol (list->string ls))))]
@ -621,10 +516,23 @@
[($char= #\; c) [($char= #\; c)
(skip-comment p) (skip-comment p)
(tokenize p)] (tokenize p)]
[($char= #\+ c) [(memq c '(#\+))
(tokenize-plus p)] (let ([c (peek-char p)])
[($char= #\- c) (cond
(tokenize-minus p)] [(eof-object? c) '(datum . +)]
[(delimiter? c) '(datum . +)]
[else (tokenize-real p (cons #\+ (tok-real-sign p)))]))]
[(memq c '(#\-))
(let ([c (peek-char p)])
(cond
[(eof-object? c) '(datum . -)]
[(delimiter? c) '(datum . -)]
[($char= c #\>)
(read-char p)
(let ([ls (tokenize-identifier '() p)])
(let ([str (list->string (list* #\- #\> (reverse ls)))])
(cons 'datum (string->symbol str))))]
[else (tokenize-real p (cons #\- (tok-real-sign p)))]))]
[($char= #\. c) [($char= #\. c)
(tokenize-dot p)] (tokenize-dot p)]
[($char= #\| c) [($char= #\| c)

View File

@ -15,5 +15,63 @@
[(t str*) (string->number str*)] ...)])) [(t str*) (string->number str*)] ...)]))
(reader-tests test-reader (reader-tests test-reader
"12")) "12"
"+12"
"3427384783264876238746784234"
"0"
"+0"
"-12"
"-3498738947983748939478347834"
"-0"
"#x-238973897AAAAAFFFFbb00bbdddcc"
"#x238973897AAAAA000FFFFbbbbdddcc"
"#x+07edf387"
"#x+0"
"#x-0"
"#x0"
"#b01010101010000000111111111110000"
"#b-01010101010000000111111111110000"
"#b+01010101010000000111111111110000"
"#b+0"
"#b-0"
"#b0"
"#d2398128321308912830912830912839"
"#d-2398128321308912830912830912839"
"#d+2398128321308912830912830912839"
"#d+0"
"#d-0"
"#d0"
"#o237612036721631263126371263712"
"#o-2376120036721631263126371263712"
"#o+23761236721631263126371263712"
"#o+0"
"#o-0"
"#o0"
"#X-238973897AAAAAFFFFbb00bbdddcc"
"#X238973897AAAAA000FFFFbbbbdddcc"
"#X+07edf387"
"#X+0"
"#X-0"
"#X0"
"#B01010101010000000111111111110000"
"#B-01010101010000000111111111110000"
"#B+01010101010000000111111111110000"
"#B+0"
"#B-0"
"#B0"
"#D2398128321308912830912830912839"
"#D-2398128321308912830912830912839"
"#D+2398128321308912830912830912839"
"#D+0"
"#D-0"
"#D0"
"#O237612036721631263126371263712"
"#O-2376120036721631263126371263712"
"#O+23761236721631263126371263712"
"#O+0"
"#O-0"
"#O0"
))