Cases in bug 173201 are closed. Still failing read0 though.

This commit is contained in:
Abdulaziz Ghuloum 2007-12-02 06:58:33 -05:00
parent bc2e88e4e7
commit 9d9735ce2a
3 changed files with 100 additions and 75 deletions

Binary file not shown.

View File

@ -77,82 +77,107 @@
(unread-char c p) (unread-char c p)
(error 'tokenize "invalid identifier syntax" (error 'tokenize "invalid identifier syntax"
(list->string (reverse (cons c ls))))])))) (list->string (reverse (cons c ls))))]))))
(define tokenize-string (define (tokenize-string ls p)
(lambda (ls p) (let ([c (read-char p)])
(define (intraline-whitespace? c) (cond
(or (eqv? c #\x9) [(eof-object? c)
(eq? (char-general-category c) 'Zs))) (error 'tokenize "invalid eof inside string")]
[else (tokenize-string-char ls p c)])))
(define (tokenize-string-char ls p c)
(define (intraline-whitespace? c)
(or (eqv? c #\x9)
(eq? (char-general-category c) 'Zs)))
(define (tokenize-string-continue ls p c)
(cond
[(eof-object? c)
(error 'tokenize "invalid eof inside string")]
[(intraline-whitespace? c)
(let f ()
(let ([c (read-char p)])
(cond
[(eof-object? c)
(error 'tokenize "invalid eof inside string")]
[(intraline-whitespace? c) (f)]
[else (tokenize-string-char ls p c)])))]
[else (tokenize-string-char ls p c)]))
(cond
[($char= #\" c) ls]
[($char= #\\ c)
(let ([c (read-char p)]) (let ([c (read-char p)])
(cond (cond
[(eof-object? c) [(eof-object? c)
(error 'tokenize "end-of-file while inside a string")] (error 'tokenize "invalid eof after string escape")]
[($char= #\" c) ls] [($char= #\a c) (tokenize-string (cons #\x7 ls) p)]
[($char= #\\ c) [($char= #\b c) (tokenize-string (cons #\x8 ls) p)]
(let ([c (read-char p)]) [($char= #\t c) (tokenize-string (cons #\x9 ls) p)]
(cond [($char= #\n c) (tokenize-string (cons #\xA ls) p)]
[($char= #\a c) (tokenize-string (cons #\x7 ls) p)] [($char= #\v c) (tokenize-string (cons #\xB ls) p)]
[($char= #\b c) (tokenize-string (cons #\x8 ls) p)] [($char= #\f c) (tokenize-string (cons #\xC ls) p)]
[($char= #\t c) (tokenize-string (cons #\x9 ls) p)] [($char= #\r c) (tokenize-string (cons #\xD ls) p)]
[($char= #\n c) (tokenize-string (cons #\xA ls) p)] [($char= #\" c) (tokenize-string (cons #\x22 ls) p)]
[($char= #\v c) (tokenize-string (cons #\xB ls) p)] [($char= #\\ c) (tokenize-string (cons #\x5C ls) p)]
[($char= #\f c) (tokenize-string (cons #\xC ls) p)] [($char= #\x c) ;;; unicode escape \xXXX;
[($char= #\r c) (tokenize-string (cons #\xD ls) p)] (let ([c (read-char p)])
[($char= #\" c) (tokenize-string (cons #\x22 ls) p)] (cond
[($char= #\\ c) (tokenize-string (cons #\x5C ls) p)] [(eof-object? c)
[($char= #\x c) ;;; unicode escape \xXXX; (error 'tokenize "invalid eof inside string")]
(let ([c (read-char p)]) [(hex c) =>
(cond (lambda (n)
[(eof-object? c) (let f ([n n])
(error 'tokenize "invalid eof inside string")] (let ([c (read-char p)])
[(hex c) => (cond
(lambda (n) [(eof-object? n)
(let f ([n n]) (error 'tokenize "invalid eof inside string")]
(let ([c (read-char p)]) [(hex c) =>
(cond (lambda (v) (f (+ (* n 16) v)))]
[(eof-object? n) [($char= c #\;)
(error 'tokenize "invalid eof inside string")] (tokenize-string
[(hex c) => (cons (integer->char n) ls) p)]
(lambda (v) (f (+ (* n 16) v)))] [else
[($char= c #\;) (error 'tokenize
(tokenize-string "invalid char in escape sequence"
(cons (integer->char n) ls) p)] c)]))))]
[else [else
(error 'tokenize (error 'tokenize
"invalid char in escape sequence" "invalid char in escape sequence" c)]))]
c)]))))] [(intraline-whitespace? c)
[else (let f ()
(error 'tokenize (let ([c (read-char p)])
"invalid char in escape sequence" c)]))] (cond
[(eof-object? c) [(eof-object? c)
(error 'tokenize "invalid eof after string escape")] (error 'tokenize "invalid eof inside string")]
[(intraline-whitespace? c) [(intraline-whitespace? c) (f)]
(let ([c [(memv c '(#\xA #\x85 #\x2028))
(let ([c (read-char p)]) (tokenize-string-continue ls p (read-char p))]
(cond [(memv c '(#\xD))
[(memv c '(#\xA #\x85 #\x2028)) (read-char p)] (let ([c (read-char p)])
[(memv c '(#\xD)) (cond
(let ([c (read-char p)]) [(memv c '(#\xA #\x85))
(cond (tokenize-string-continue ls p (read-char p))]
[(memv c '(#\A #\x85)) [else
(read-char p)] (tokenize-string-continue ls p c)]))]
[else c]))] [else
[else (error 'tokenize
(error 'tokenize "non-whitespace character after escape")])))]
"expected line ending inside string")]))]) [(memv c '(#\xA #\x85 #\x2028))
(unless (and (char? c) (intraline-whitespace? c)) (tokenize-string-continue ls p (read-char p))]
(error 'tokenize [(memv c '(#\xD))
"expected an intraline whitespace inside a string")) (let ([c (read-char p)])
(tokenize-string ls p))] (cond
[else (error 'tokenize "invalid string escape" c)]))] [(memv c '(#\xA #\x85))
[(memv c '(#\xA #\x85 #\x2028)) (tokenize-string-continue ls p (read-char p))]
(tokenize-string (cons #\linefeed ls) p)] [else
[(memv c '(#\xD)) (tokenize-string-continue ls p c)]))]
(let ([c (peek-char p)]) [else (error 'tokenize "invalid string escape" c)]))]
(when (memv c '(#\xA #\x85)) [(memv c '(#\xA #\x85 #\x2028))
(read-char p)) (tokenize-string (cons #\linefeed ls) p)]
(tokenize-string (cons #\linefeed ls) p))] [(memv c '(#\xD))
[else (let ([c (peek-char p)])
(tokenize-string (cons c ls) p)])))) (when (memv c '(#\xA #\x85))
(read-char p))
(tokenize-string (cons #\linefeed ls) p))]
[else
(tokenize-string (cons c ls) p)]))
(define skip-comment (define skip-comment
(lambda (p) (lambda (p)
(let ([c (read-char p)]) (let ([c (read-char p)])

View File

@ -1 +1 @@
1168 1169