moving to a new numeric reading algorithm.

added:
  src/tests/reader.ss
This commit is contained in:
Abdulaziz Ghuloum 2007-05-20 13:11:33 -04:00
parent 3c520ea9d3
commit aa98df6c4c
7 changed files with 204 additions and 33 deletions

Binary file not shown.

View File

@ -1,6 +1,6 @@
(library (ikarus io input-strings) (library (ikarus io input-strings)
(export open-input-string) (export open-input-string with-input-from-string)
(import (import
(ikarus system $strings) (ikarus system $strings)
(ikarus system $bytevectors) (ikarus system $bytevectors)
@ -8,7 +8,7 @@
(ikarus system $pairs) (ikarus system $pairs)
(ikarus system $ports) (ikarus system $ports)
(ikarus system $io) (ikarus system $io)
(except (ikarus) open-input-string)) (except (ikarus) open-input-string with-input-from-string))
(define-syntax message-case (define-syntax message-case
(syntax-rules (else) (syntax-rules (else)
@ -75,5 +75,17 @@
(make-input-string-handler str) (make-input-string-handler str)
'#vu8())]) '#vu8())])
port))) port)))
(define with-input-from-string
(lambda (str proc)
(unless (string? str)
(error 'with-input-from-string "~s is not a string" str))
(unless (procedure? proc)
(error 'with-input-from-string "~s is not a procedure" proc))
(let ([p (open-input-string str)])
(parameterize ([current-input-port p])
(proc)))))
) )

View File

@ -832,6 +832,66 @@
[($fx> len 1) [($fx> len 1)
(convert-data-init str len pos? 2 ($string-ref str 1))] (convert-data-init str len pos? 2 ($string-ref str 1))]
[else #f])) [else #f]))
(define (digit c radix)
(cond
[(and ($char<= #\0 c) ($char<= c #\9))
(let ([n ($fx- ($char->fixnum c) ($char->fixnum #\0))])
(and
(or ($fx>= radix 10)
(and ($fx= radix 8) ($char<= c #\7))
(and ($fx= radix 2) ($char<= c #\1)))
n))]
[(and ($char<= #\a c) ($char<= c #\f))
(let ([n ($fx+ 10 ($fx- ($char->fixnum c) ($char->fixnum #\a)))])
(and ($fx= radix 16) n))]
[(and ($char<= #\A c) ($char<= c #\F))
(let ([n ($fx+ 10 ($fx- ($char->fixnum c) ($char->fixnum #\A)))])
(and ($fx= radix 16) n))]
[else #f]))
(define (convert-subseq str idx len radix ac)
(cond
[($fx< idx len)
(let ([c (string-ref str idx)])
(cond
[(digit c radix) =>
(lambda (n)
(convert-subseq str ($fxadd1 idx) len radix
(+ (* ac radix) n)))]
[else #f]))]
[else ac]))
(define (convert-init str idx len radix)
(cond
[($fx< idx len)
(let ([c (string-ref str idx)])
(cond
[(digit c radix) =>
(lambda (n)
(convert-subseq str ($fxadd1 idx) len radix n))]
[else #f]))]
[else #f]))
(define (convert-init-sign str idx len radix)
(cond
[($fx< idx len)
(let ([c (string-ref str idx)])
(cond
[(char=? c #\+)
(convert-init str ($fxadd1 idx) len radix)]
[(char=? c #\-)
(let ([n (convert-init str ($fxadd1 idx) len radix)])
(and n (- n)))]
[else (convert-init str idx len radix)]))]
[else #f]))
(define (convert-radix str len)
(cond
[($fx>= len 2)
(let ([c (string-ref str 1)])
(case c
[(#\x #\X) (convert-init-sign str 2 len 16)]
[(#\b #\B) (convert-init-sign str 2 len 2)]
[(#\d #\D) (convert-init-sign str 2 len 10)]
[(#\o #\O) (convert-init-sign str 2 len 8)]
[else #f]))]
[else #f]))
(define (convert-sign str len) (define (convert-sign str len)
(cond (cond
[($fx> len 0) [($fx> len 0)
@ -839,6 +899,7 @@
(case c (case c
[(#\+) (convert-num str len #t)] [(#\+) (convert-num str len #t)]
[(#\-) (convert-num str len #f)] [(#\-) (convert-num str len #f)]
[(#\#) (convert-radix str len)]
[else [else
(convert-data-init str len #t 1 c)]))] (convert-data-init str len #t 1 c)]))]
[else #f])) [else #f]))

View File

@ -340,6 +340,72 @@
[(char-whitespace? c) [(char-whitespace? c)
(skip-whitespace p caller)] (skip-whitespace p caller)]
[else c]))) [else c])))
(module (tok-exact tok-radix)
(define (eof-error)
(error 'tokenize "eof encountered while reading a number"))
(define (digit? c)
(memq c '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\a #\b #\c #\d #\e #\f #\A #\B #\C #\D #\E #\F)))
(define (tok-complex-sign p)
(error 'tok-complex-sign "not yet"))
(define (tok-real-decimal p)
(error 'tok-real-decimal "not yet"))
(define (tok-real-decpt p)
(error 'tok-real-decpt "not yet"))
(define (tok-real-sign p)
;;; we read the sign part of a real number
(let ([c0 (read-char p)])
(cond
[(eof-object? c0) (eof-error)]
[(memq c0 '(#\.))
(cons c0 (tok-real-decpt p))]
[(digit? c0)
(cons c0 (tok-real-digit p))]
[(delimiter? c0) (unread-char c0 p) '()]
;[(memq c0 '(#\i)) ;;; +i and -i
; (list c0)]
[else (list c0)])))
(define (tok-real-digit p)
;;; we did read one digit.
(let ([c0 (read-char p)])
(cond
[(eof-object? c0) '()]
[(memq c0 '(#\.))
(cons c0 (tok-real-decimal p))]
[(memq c0 '(#\- #\+))
(cons c0 (tok-complex-sign p))]
[(digit? c0)
(cons c0 (tok-real-digit p))]
[(delimiter? c0) (unread-char c0 p) '()]
[else (list c0)])))
(define (tok-real p)
(let ([c0 (read-char p)])
(cond
[(eof-object? c0) (eof-error)]
[(memq c0 '(#\- #\+))
(cons c0 (tok-real-sign p))]
[(memq c0 '(#\.))
(cons c0 (tok-real-decpt p))]
[(digit? c0)
(cons c0 (tok-real-digit p))]
[(delimiter? c0) (unread-char c0 p) '()]
[else (list c0)])))
(define (tok-exact p)
(error 'tokenize-exact "not yet"))
(define (tok-radix p)
(let ([c0 (peek-char p)])
(cond
[(eof-object? c0) (eof-error)]
[($char= c0 #\#)
(read-char p)
(let ([c1 (read-char p)])
(cond
[(eof-object? c1) (eof-error)]
[(memq c1 '(#\e #\i #\E #\I))
(list* c0 c1 (tok-real p))]
[(delimiter? c1) (unread-char c1 p) (list c0)]
[else (list c0 c1)]))]
[else (tok-real p)]))))
(define tokenize-hash/c (define tokenize-hash/c
(lambda (c p) (lambda (c p)
(cond (cond
@ -358,38 +424,10 @@
[else (error 'tokenize "invalid syntax near #f")]))] [else (error 'tokenize "invalid syntax near #f")]))]
[($char= #\\ c) (tokenize-char p)] [($char= #\\ c) (tokenize-char p)]
[($char= #\( c) 'vparen] [($char= #\( c) 'vparen]
[($char= #\x c) (tokenize-hex-init p)]
[($char= #\' c) '(macro . syntax)] [($char= #\' c) '(macro . syntax)]
[($char= #\; c) 'hash-semi] [($char= #\; c) 'hash-semi]
[($char= #\% c) '(macro . |#primitive|)] [($char= #\% c) '(macro . |#primitive|)]
[($char= #\| c) (multiline-comment p) (tokenize p)] [($char= #\| c) (multiline-comment p) (tokenize p)]
[($char= #\b c)
(let ([c (read-char p)])
(cond
[(eof-object? c)
(error 'tokenize "invalid eof while reading #b")]
[($char= #\- c)
(let ([c (read-char p)])
(cond
[(eof-object? c)
(error 'tokenize "invalid eof while reading #b-")]
[($char= #\0 c)
(cons 'datum
(* -1 (read-binary 0 '(#\0 #\-) p)))]
[($char= #\1 c)
(cons 'datum
(* -1 (read-binary 1 '(#\1 #\-) p)))]
[else
(unread-char c p)
(error 'tokenize "invalid binary syntax #b-~a" c)]))]
[($char= #\0 c)
(cons 'datum (read-binary 0 '(#\0) p))]
[($char= #\1 c)
(cons 'datum (read-binary 1 '(#\1) p))]
[else
(unread-char c p)
(error 'tokenize "invalid syntax #b~a" c)]
))]
[($char= #\! c) [($char= #\! c)
(let ([e (read-char p)]) (let ([e (read-char p)])
(when (eof-object? e) (when (eof-object? e)
@ -482,6 +520,44 @@
[(eof-object? c) [(eof-object? c)
(error 'tokenize "invalid eof object after #v")] (error 'tokenize "invalid eof object after #v")]
[else (error 'tokenize "invalid sequence #v~a" c)]))] [else (error 'tokenize "invalid sequence #v~a" c)]))]
[(memq c '(#\e #\i #\E #\I))
(let ([str (list->string (list* #\# c (tok-exact p)))])
(cond
[(string->number str) => (lambda (n) (cons 'datum n))]
[else (error 'tokenize "invalid numeric sequence ~a" str)]))]
[(memq c '(#\b #\B #\x #\X #\o #\O #\d #\D))
(let ([str (list->string (list* #\# c (tok-radix p)))])
(cond
[(string->number str) => (lambda (n) (cons 'datum n))]
[else (error 'tokenize "invalid numeric sequence ~a" str)]))]
;[($char= #\x c) (tokenize-hex-init p)]
;[(or ($char= #\b c) ($char= #\B c))
; (let ([c (read-char p)])
; (cond
; [(eof-object? c)
; (error 'tokenize "invalid eof while reading #b")]
; [($char= #\- c)
; (let ([c (read-char p)])
; (cond
; [(eof-object? c)
; (error 'tokenize "invalid eof while reading #b-")]
; [($char= #\0 c)
; (cons 'datum
; (* -1 (read-binary 0 '(#\0 #\-) p)))]
; [($char= #\1 c)
; (cons 'datum
; (* -1 (read-binary 1 '(#\1 #\-) p)))]
; [else
; (unread-char c p)
; (error 'tokenize "invalid binary syntax #b-~a" c)]))]
; [($char= #\0 c)
; (cons 'datum (read-binary 0 '(#\0) p))]
; [($char= #\1 c)
; (cons 'datum (read-binary 1 '(#\1) p))]
; [else
; (unread-char c p)
; (error 'tokenize "invalid syntax #b~a" c)]
; ))]
[($char= #\@ c) [($char= #\@ c)
(error 'read "FIXME: fasl read disabled") (error 'read "FIXME: fasl read disabled")
'(cons 'datum ($fasl-read p))] '(cons 'datum ($fasl-read p))]

View File

@ -380,6 +380,7 @@
[output-port-name i] [output-port-name i]
[open-input-file i r] [open-input-file i r]
[with-input-from-file i r] [with-input-from-file i r]
[with-input-from-string i]
[with-output-to-file i r] [with-output-to-file i r]
[open-output-file i r] [open-output-file i r]
[open-output-string i] [open-output-string i]

View File

@ -1,7 +1,9 @@
#!/usr/bin/env ikarus --r6rs-script #!/usr/bin/env ikarus --r6rs-script
(import (ikarus) (import (ikarus)
(tests reader)
(tests bytevectors)) (tests bytevectors))
(test-reader)
(test-bytevectors) (test-bytevectors)
(printf "Happy Happy Joy Joy\n") (printf "Happy Happy Joy Joy\n")

19
src/tests/reader.ss Normal file
View File

@ -0,0 +1,19 @@
(library (tests reader)
(export test-reader)
(import (ikarus) (tests framework))
(define t
(lambda (str)
(lambda (n?)
(and (number? n?)
(= (with-input-from-string str read) n?)))))
(define-syntax reader-tests
(syntax-rules ()
[(_ name str* ...)
(define-tests name
[(t str*) (string->number str*)] ...)]))
(reader-tests test-reader
"12"))