small cleanup to the reader.
This commit is contained in:
parent
5e02972e7f
commit
3d17aa7cf8
|
@ -74,7 +74,7 @@
|
||||||
(define delimiter?
|
(define delimiter?
|
||||||
(lambda (c)
|
(lambda (c)
|
||||||
(or (char-whitespace? c)
|
(or (char-whitespace? c)
|
||||||
(memq c '(#\( #\) #\[ #\] #\" #\# #\;)))))
|
(memq c '(#\( #\) #\[ #\] #\" #\# #\; #\{ #\})))))
|
||||||
(define digit?
|
(define digit?
|
||||||
(lambda (c)
|
(lambda (c)
|
||||||
(and ($char<= #\0 c) ($char<= c #\9))))
|
(and ($char<= #\0 c) ($char<= c #\9))))
|
||||||
|
@ -131,9 +131,12 @@
|
||||||
[(eof-object? c)
|
[(eof-object? c)
|
||||||
(die/p p 'tokenize "invalid eof inside string")]
|
(die/p p 'tokenize "invalid eof inside string")]
|
||||||
[else (tokenize-string-char ls p c)])))
|
[else (tokenize-string-char ls p c)])))
|
||||||
|
(define LF1 '(#\xA #\x85 #\x2028)) ;;; these are considered newlines
|
||||||
|
(define LF2 '(#\xA #\x85)) ;;; these are not newlines if they
|
||||||
|
;;; appear after CR
|
||||||
(define (tokenize-string-char ls p c)
|
(define (tokenize-string-char ls p c)
|
||||||
(define (intraline-whitespace? c)
|
(define (intraline-whitespace? c)
|
||||||
(or (eqv? c #\x9)
|
(or (eqv? c #\x9)
|
||||||
(eq? (char-general-category c) 'Zs)))
|
(eq? (char-general-category c) 'Zs)))
|
||||||
(define (tokenize-string-continue ls p c)
|
(define (tokenize-string-continue ls p c)
|
||||||
(cond
|
(cond
|
||||||
|
@ -195,44 +198,41 @@
|
||||||
[(eof-object? c)
|
[(eof-object? c)
|
||||||
(die/p p 'tokenize "invalid eof inside string")]
|
(die/p p 'tokenize "invalid eof inside string")]
|
||||||
[(intraline-whitespace? c) (f)]
|
[(intraline-whitespace? c) (f)]
|
||||||
[(memv c '(#\xA #\x85 #\x2028))
|
[(memv c LF1)
|
||||||
(tokenize-string-continue ls p (read-char p))]
|
(tokenize-string-continue ls p (read-char p))]
|
||||||
[(memv c '(#\xD))
|
[(eqv? c #\return)
|
||||||
(let ([c (read-char p)])
|
(let ([c (read-char p)])
|
||||||
(cond
|
(cond
|
||||||
[(memv c '(#\xA #\x85))
|
[(memv c LF2)
|
||||||
(tokenize-string-continue ls p (read-char p))]
|
(tokenize-string-continue ls p (read-char p))]
|
||||||
[else
|
[else
|
||||||
(tokenize-string-continue ls p c)]))]
|
(tokenize-string-continue ls p c)]))]
|
||||||
[else
|
[else
|
||||||
(die/p-1 p 'tokenize
|
(die/p-1 p 'tokenize
|
||||||
"non-whitespace character after escape")])))]
|
"non-whitespace character after escape")])))]
|
||||||
[(memv c '(#\xA #\x85 #\x2028))
|
[(memv c LF1)
|
||||||
(tokenize-string-continue ls p (read-char p))]
|
(tokenize-string-continue ls p (read-char p))]
|
||||||
[(memv c '(#\xD))
|
[(eqv? c #\return)
|
||||||
(let ([c (read-char p)])
|
(let ([c (read-char p)])
|
||||||
(cond
|
(cond
|
||||||
[(memv c '(#\xA #\x85))
|
[(memv c LF2)
|
||||||
(tokenize-string-continue ls p (read-char p))]
|
(tokenize-string-continue ls p (read-char p))]
|
||||||
[else
|
[else
|
||||||
(tokenize-string-continue ls p c)]))]
|
(tokenize-string-continue ls p c)]))]
|
||||||
[else (die/p-1 p 'tokenize "invalid string escape" c)]))]
|
[else (die/p-1 p 'tokenize "invalid string escape" c)]))]
|
||||||
[(memv c '(#\xA #\x85 #\x2028))
|
[(memv c LF1)
|
||||||
(tokenize-string (cons #\linefeed ls) p)]
|
(tokenize-string (cons #\linefeed ls) p)]
|
||||||
[(memv c '(#\xD))
|
[(eqv? c #\return)
|
||||||
(let ([c (peek-char p)])
|
(let ([c (peek-char p)])
|
||||||
(when (memv c '(#\xA #\x85))
|
(when (memv c LF2) (read-char p))
|
||||||
(read-char p))
|
|
||||||
(tokenize-string (cons #\linefeed ls) p))]
|
(tokenize-string (cons #\linefeed ls) p))]
|
||||||
[else
|
[else
|
||||||
(tokenize-string (cons c ls) p)]))
|
(tokenize-string (cons c ls) p)]))
|
||||||
(define skip-comment
|
(define skip-comment
|
||||||
(lambda (p)
|
(lambda (p)
|
||||||
(let ([c (read-char p)])
|
(let ([c (read-char p)])
|
||||||
(unless (eof-object? c)
|
(unless (or (eof-object? c) (memv c LF1) (eqv? c #\return))
|
||||||
(let ([i ($char->fixnum c)])
|
(skip-comment p)))))
|
||||||
(unless (or (fx= i 10) (fx= i 13))
|
|
||||||
(skip-comment p)))))))
|
|
||||||
(define tokenize-dot
|
(define tokenize-dot
|
||||||
(lambda (p)
|
(lambda (p)
|
||||||
(let ([c (peek-char p)])
|
(let ([c (peek-char p)])
|
||||||
|
@ -268,13 +268,15 @@
|
||||||
(cond
|
(cond
|
||||||
[(eof-object? c) d]
|
[(eof-object? c) d]
|
||||||
[(delimiter? c) d]
|
[(delimiter? c) d]
|
||||||
[else (die/p p 'tokenize "invalid character after sequence"
|
[else
|
||||||
(string-append str (string c)))]))]
|
(die/p p 'tokenize "invalid character after sequence"
|
||||||
|
(string-append str (string c)))]))]
|
||||||
[else
|
[else
|
||||||
(let ([c (read-char p)])
|
(let ([c (read-char p)])
|
||||||
(cond
|
(cond
|
||||||
[(eof-object? c)
|
[(eof-object? c)
|
||||||
(die/p p 'tokenize "invalid eof in the middle of expected sequence" str)]
|
(die/p p 'tokenize
|
||||||
|
"invalid eof in the middle of expected sequence" str)]
|
||||||
[($char= c (string-ref str i))
|
[($char= c (string-ref str i))
|
||||||
(tokenize-char* (fxadd1 i) str p d)]
|
(tokenize-char* (fxadd1 i) str p d)]
|
||||||
[else
|
[else
|
||||||
|
@ -290,15 +292,16 @@
|
||||||
[($char= (string-ref str 1) c)
|
[($char= (string-ref str 1) c)
|
||||||
(read-char p)
|
(read-char p)
|
||||||
(tokenize-char* 2 str p d)]
|
(tokenize-char* 2 str p d)]
|
||||||
[else (die/p p 'tokenize "invalid syntax"
|
[else
|
||||||
(string-ref str 0) c)]))))
|
(die/p p 'tokenize "invalid syntax"
|
||||||
|
(string-ref str 0) c)]))))
|
||||||
(define tokenize-char
|
(define tokenize-char
|
||||||
(lambda (p)
|
(lambda (p)
|
||||||
(let ([c (read-char p)])
|
(let ([c (read-char p)])
|
||||||
(cond
|
(cond
|
||||||
[(eof-object? c)
|
[(eof-object? c)
|
||||||
(die/p p 'tokenize "invalid #\\ near end of file")]
|
(die/p p 'tokenize "invalid #\\ near end of file")]
|
||||||
[(eqv? #\n c)
|
[(eqv? #\n c)
|
||||||
(let ([c (peek-char p)])
|
(let ([c (peek-char p)])
|
||||||
(cond
|
(cond
|
||||||
[(eof-object? c)
|
[(eof-object? c)
|
||||||
|
@ -580,12 +583,15 @@
|
||||||
(cons 'datum (parse-string p (list c #\#) 8 8 #f))]
|
(cons 'datum (parse-string p (list c #\#) 8 8 #f))]
|
||||||
[(memq c '(#\d #\D))
|
[(memq c '(#\d #\D))
|
||||||
(cons 'datum (parse-string p (list c #\#) 10 10 #f))]
|
(cons 'datum (parse-string p (list c #\#) 10 10 #f))]
|
||||||
[($char= #\@ c)
|
;[($char= #\@ c) DEAD: Unfixable due to port encoding
|
||||||
(when (eq? (port-mode p) 'r6rs-mode)
|
; that does not allow mixing binary and
|
||||||
(die/p-1 p 'tokenize "fasl syntax is invalid in #!r6rs mode"
|
; textual data in the same port.
|
||||||
(format "#~a" c)))
|
; Left here for historical value
|
||||||
(die/p-1 p 'read "FIXME: fasl read disabled")
|
; (when (eq? (port-mode p) 'r6rs-mode)
|
||||||
'(cons 'datum ($fasl-read p))]
|
; (die/p-1 p 'tokenize "fasl syntax is invalid in #!r6rs mode"
|
||||||
|
; (format "#~a" c)))
|
||||||
|
; (die/p-1 p 'read "FIXME: fasl read disabled")
|
||||||
|
; '(cons 'datum ($fasl-read p))]
|
||||||
[else
|
[else
|
||||||
(die/p-1 p 'tokenize
|
(die/p-1 p 'tokenize
|
||||||
(format "invalid syntax #~a" c))])))
|
(format "invalid syntax #~a" c))])))
|
||||||
|
|
Loading…
Reference in New Issue