Reader now explicitly checks for valid unicode range when reading
#\xHHHH "\xHHHH;" and foo\xHHHH;bar sequences.
This commit is contained in:
parent
89def78c3c
commit
b97b568e36
|
@ -52,6 +52,19 @@
|
||||||
(die/pos p -1 who msg arg*))
|
(die/pos p -1 who msg arg*))
|
||||||
|
|
||||||
|
|
||||||
|
(define (checked-integer->char n ac p)
|
||||||
|
(define (valid-integer-char? n)
|
||||||
|
(cond
|
||||||
|
[(<= n #xD7FF) #t]
|
||||||
|
[(< n #xE000) #f]
|
||||||
|
[(<= n #x10FFFF) #t]
|
||||||
|
[else #f]))
|
||||||
|
(if (valid-integer-char? n)
|
||||||
|
($fixnum->char n)
|
||||||
|
(die/p p 'tokenize
|
||||||
|
"invalid numeric value for character"
|
||||||
|
(list->string (reverse ac)))))
|
||||||
|
|
||||||
(define-syntax read-char
|
(define-syntax read-char
|
||||||
(syntax-rules ()
|
(syntax-rules ()
|
||||||
[(_ p) (get-char p)]))
|
[(_ p) (get-char p)]))
|
||||||
|
@ -155,20 +168,20 @@
|
||||||
(die/p p 'tokenize "invalid eof inside string")]
|
(die/p p 'tokenize "invalid eof inside string")]
|
||||||
[(hex c) =>
|
[(hex c) =>
|
||||||
(lambda (n)
|
(lambda (n)
|
||||||
(let f ([n n])
|
(let f ([n n] [ac (cons c '(#\x))])
|
||||||
(let ([c (read-char p)])
|
(let ([c (read-char p)])
|
||||||
(cond
|
(cond
|
||||||
[(eof-object? n)
|
[(eof-object? n)
|
||||||
(die/p p 'tokenize "invalid eof inside string")]
|
(die/p p 'tokenize "invalid eof inside string")]
|
||||||
[(hex c) =>
|
[(hex c) =>
|
||||||
(lambda (v) (f (+ (* n 16) v)))]
|
(lambda (v) (f (+ (* n 16) v) (cons c ac)))]
|
||||||
[($char= c #\;)
|
[($char= c #\;)
|
||||||
(tokenize-string
|
(tokenize-string
|
||||||
(cons (integer->char n) ls) p)]
|
(cons (checked-integer->char n ac p) ls) p)]
|
||||||
[else
|
[else
|
||||||
(die/p-1 p 'tokenize
|
(die/p-1 p 'tokenize
|
||||||
"invalid char in escape sequence"
|
"invalid char in escape sequence"
|
||||||
c)]))))]
|
(list->string (reverse (cons c ac))))]))))]
|
||||||
[else
|
[else
|
||||||
(die/p-1 p 'tokenize
|
(die/p-1 p 'tokenize
|
||||||
"invalid char in escape sequence" c)]))]
|
"invalid char in escape sequence" c)]))]
|
||||||
|
@ -253,7 +266,7 @@
|
||||||
[(eof-object? c) d]
|
[(eof-object? c) d]
|
||||||
[(delimiter? c) d]
|
[(delimiter? c) d]
|
||||||
[else (die/p p 'tokenize "invalid character after sequence"
|
[else (die/p p 'tokenize "invalid character after sequence"
|
||||||
(string-append (string c) str))]))]
|
(string-append str (string c)))]))]
|
||||||
[else
|
[else
|
||||||
(let ([c (read-char p)])
|
(let ([c (read-char p)])
|
||||||
(cond
|
(cond
|
||||||
|
@ -325,21 +338,23 @@
|
||||||
[(or (eof-object? n) (delimiter? n))
|
[(or (eof-object? n) (delimiter? n))
|
||||||
'(datum . #\x)]
|
'(datum . #\x)]
|
||||||
[(hex n) =>
|
[(hex n) =>
|
||||||
(lambda (v)
|
(lambda (v)
|
||||||
(read-char p)
|
(read-char p)
|
||||||
(let f ([v v])
|
(let f ([v v] [ac (cons n '(#\x))])
|
||||||
(let ([c (peek-char p)])
|
(let ([c (peek-char p)])
|
||||||
(cond
|
(cond
|
||||||
[(eof-object? c)
|
[(eof-object? c)
|
||||||
(cons 'datum (integer->char v))]
|
(cons 'datum (checked-integer->char v ac p))]
|
||||||
[(delimiter? c)
|
[(delimiter? c)
|
||||||
(cons 'datum (integer->char v))]
|
(cons 'datum (checked-integer->char v ac p))]
|
||||||
[(hex c) =>
|
[(hex c) =>
|
||||||
(lambda (v0)
|
(lambda (v0)
|
||||||
(read-char p)
|
(read-char p)
|
||||||
(f (+ (* v 16) v0)))]
|
(f (+ (* v 16) v0) (cons c ac)))]
|
||||||
[else
|
[else
|
||||||
(die/p p 'tokenize "invalid character sequence")]))))]
|
(die/p p 'tokenize
|
||||||
|
"invalid character sequence"
|
||||||
|
(list->string (reverse (cons c ac))))]))))]
|
||||||
[else
|
[else
|
||||||
(die/p p 'tokenize "invalid character sequence"
|
(die/p p 'tokenize "invalid character sequence"
|
||||||
(string-append "#\\" (string n)))]))]
|
(string-append "#\\" (string n)))]))]
|
||||||
|
@ -355,11 +370,11 @@
|
||||||
(cond
|
(cond
|
||||||
[(and ($char<= #\0 x) ($char<= x #\9))
|
[(and ($char<= #\0 x) ($char<= x #\9))
|
||||||
($fx- ($char->fixnum x) ($char->fixnum #\0))]
|
($fx- ($char->fixnum x) ($char->fixnum #\0))]
|
||||||
[(and ($char<= #\a x) ($char<= x #\z))
|
[(and ($char<= #\a x) ($char<= x #\f))
|
||||||
($fx- ($char->fixnum x)
|
($fx- ($char->fixnum x)
|
||||||
($fx- ($char->fixnum #\a) 10))]
|
($fx- ($char->fixnum #\a) 10))]
|
||||||
[(and ($char<= #\A x) ($char<= x #\Z))
|
[(and ($char<= #\A x) ($char<= x #\F))
|
||||||
($fx- ($char->fixnum x)
|
($fx- ($char->fixnum x)
|
||||||
($fx- ($char->fixnum #\A) 10))]
|
($fx- ($char->fixnum #\A) 10))]
|
||||||
[else #f]))
|
[else #f]))
|
||||||
(define multiline-error
|
(define multiline-error
|
||||||
|
@ -897,7 +912,9 @@
|
||||||
(format "invalid eof after ~a"
|
(format "invalid eof after ~a"
|
||||||
(list->string (reverse ac))))]
|
(list->string (reverse ac))))]
|
||||||
[($char= #\; c)
|
[($char= #\; c)
|
||||||
(tokenize-identifier (cons (integer->char v) main-ac) p)]
|
(tokenize-identifier
|
||||||
|
(cons (checked-integer->char v ac p) main-ac)
|
||||||
|
p)]
|
||||||
[(hex c) =>
|
[(hex c) =>
|
||||||
(lambda (v0)
|
(lambda (v0)
|
||||||
(f (+ (* v 16) v0) (cons c ac)))]
|
(f (+ (* v 16) v0) (cons c ac)))]
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
1452
|
1453
|
||||||
|
|
Loading…
Reference in New Issue