Reader now explicitly checks for valid unicode range when reading

#\xHHHH  "\xHHHH;" and foo\xHHHH;bar sequences.
This commit is contained in:
Abdulaziz Ghuloum 2008-04-28 15:01:45 -04:00
parent 89def78c3c
commit b97b568e36
2 changed files with 33 additions and 16 deletions

View File

@ -52,6 +52,19 @@
(die/pos p -1 who msg arg*)) (die/pos p -1 who msg arg*))
(define (checked-integer->char n ac p)
(define (valid-integer-char? n)
(cond
[(<= n #xD7FF) #t]
[(< n #xE000) #f]
[(<= n #x10FFFF) #t]
[else #f]))
(if (valid-integer-char? n)
($fixnum->char n)
(die/p p 'tokenize
"invalid numeric value for character"
(list->string (reverse ac)))))
(define-syntax read-char (define-syntax read-char
(syntax-rules () (syntax-rules ()
[(_ p) (get-char p)])) [(_ p) (get-char p)]))
@ -155,20 +168,20 @@
(die/p p 'tokenize "invalid eof inside string")] (die/p p 'tokenize "invalid eof inside string")]
[(hex c) => [(hex c) =>
(lambda (n) (lambda (n)
(let f ([n n]) (let f ([n n] [ac (cons c '(#\x))])
(let ([c (read-char p)]) (let ([c (read-char p)])
(cond (cond
[(eof-object? n) [(eof-object? n)
(die/p p 'tokenize "invalid eof inside string")] (die/p p 'tokenize "invalid eof inside string")]
[(hex c) => [(hex c) =>
(lambda (v) (f (+ (* n 16) v)))] (lambda (v) (f (+ (* n 16) v) (cons c ac)))]
[($char= c #\;) [($char= c #\;)
(tokenize-string (tokenize-string
(cons (integer->char n) ls) p)] (cons (checked-integer->char n ac p) ls) p)]
[else [else
(die/p-1 p 'tokenize (die/p-1 p 'tokenize
"invalid char in escape sequence" "invalid char in escape sequence"
c)]))))] (list->string (reverse (cons c ac))))]))))]
[else [else
(die/p-1 p 'tokenize (die/p-1 p 'tokenize
"invalid char in escape sequence" c)]))] "invalid char in escape sequence" c)]))]
@ -253,7 +266,7 @@
[(eof-object? c) d] [(eof-object? c) d]
[(delimiter? c) d] [(delimiter? c) d]
[else (die/p p 'tokenize "invalid character after sequence" [else (die/p p 'tokenize "invalid character after sequence"
(string-append (string c) str))]))] (string-append str (string c)))]))]
[else [else
(let ([c (read-char p)]) (let ([c (read-char p)])
(cond (cond
@ -325,21 +338,23 @@
[(or (eof-object? n) (delimiter? n)) [(or (eof-object? n) (delimiter? n))
'(datum . #\x)] '(datum . #\x)]
[(hex n) => [(hex n) =>
(lambda (v) (lambda (v)
(read-char p) (read-char p)
(let f ([v v]) (let f ([v v] [ac (cons n '(#\x))])
(let ([c (peek-char p)]) (let ([c (peek-char p)])
(cond (cond
[(eof-object? c) [(eof-object? c)
(cons 'datum (integer->char v))] (cons 'datum (checked-integer->char v ac p))]
[(delimiter? c) [(delimiter? c)
(cons 'datum (integer->char v))] (cons 'datum (checked-integer->char v ac p))]
[(hex c) => [(hex c) =>
(lambda (v0) (lambda (v0)
(read-char p) (read-char p)
(f (+ (* v 16) v0)))] (f (+ (* v 16) v0) (cons c ac)))]
[else [else
(die/p p 'tokenize "invalid character sequence")]))))] (die/p p 'tokenize
"invalid character sequence"
(list->string (reverse (cons c ac))))]))))]
[else [else
(die/p p 'tokenize "invalid character sequence" (die/p p 'tokenize "invalid character sequence"
(string-append "#\\" (string n)))]))] (string-append "#\\" (string n)))]))]
@ -355,11 +370,11 @@
(cond (cond
[(and ($char<= #\0 x) ($char<= x #\9)) [(and ($char<= #\0 x) ($char<= x #\9))
($fx- ($char->fixnum x) ($char->fixnum #\0))] ($fx- ($char->fixnum x) ($char->fixnum #\0))]
[(and ($char<= #\a x) ($char<= x #\z)) [(and ($char<= #\a x) ($char<= x #\f))
($fx- ($char->fixnum x) ($fx- ($char->fixnum x)
($fx- ($char->fixnum #\a) 10))] ($fx- ($char->fixnum #\a) 10))]
[(and ($char<= #\A x) ($char<= x #\Z)) [(and ($char<= #\A x) ($char<= x #\F))
($fx- ($char->fixnum x) ($fx- ($char->fixnum x)
($fx- ($char->fixnum #\A) 10))] ($fx- ($char->fixnum #\A) 10))]
[else #f])) [else #f]))
(define multiline-error (define multiline-error
@ -897,7 +912,9 @@
(format "invalid eof after ~a" (format "invalid eof after ~a"
(list->string (reverse ac))))] (list->string (reverse ac))))]
[($char= #\; c) [($char= #\; c)
(tokenize-identifier (cons (integer->char v) main-ac) p)] (tokenize-identifier
(cons (checked-integer->char v ac p) main-ac)
p)]
[(hex c) => [(hex c) =>
(lambda (v0) (lambda (v0)
(f (+ (* v 16) v0) (cons c ac)))] (f (+ (* v 16) v0) (cons c ac)))]

View File

@ -1 +1 @@
1452 1453