Reader now explicitly checks for valid unicode range when reading
#\xHHHH "\xHHHH;" and foo\xHHHH;bar sequences.
This commit is contained in:
		
							parent
							
								
									89def78c3c
								
							
						
					
					
						commit
						b97b568e36
					
				| 
						 | 
					@ -52,6 +52,19 @@
 | 
				
			||||||
    (die/pos p -1 who msg arg*))
 | 
					    (die/pos p -1 who msg arg*))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  (define (checked-integer->char n ac p) 
 | 
				
			||||||
 | 
					    (define (valid-integer-char? n)
 | 
				
			||||||
 | 
					      (cond
 | 
				
			||||||
 | 
					        [(<= n #xD7FF)   #t]
 | 
				
			||||||
 | 
					        [(< n #xE000)    #f]
 | 
				
			||||||
 | 
					        [(<= n #x10FFFF) #t]
 | 
				
			||||||
 | 
					        [else            #f]))
 | 
				
			||||||
 | 
					    (if (valid-integer-char? n) 
 | 
				
			||||||
 | 
					        ($fixnum->char n)
 | 
				
			||||||
 | 
					        (die/p p 'tokenize 
 | 
				
			||||||
 | 
					          "invalid numeric value for character"
 | 
				
			||||||
 | 
					          (list->string (reverse ac)))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  (define-syntax read-char 
 | 
					  (define-syntax read-char 
 | 
				
			||||||
    (syntax-rules ()
 | 
					    (syntax-rules ()
 | 
				
			||||||
      [(_ p) (get-char p)]))
 | 
					      [(_ p) (get-char p)]))
 | 
				
			||||||
| 
						 | 
					@ -155,20 +168,20 @@
 | 
				
			||||||
                (die/p p 'tokenize "invalid eof inside string")]
 | 
					                (die/p p 'tokenize "invalid eof inside string")]
 | 
				
			||||||
               [(hex c) =>
 | 
					               [(hex c) =>
 | 
				
			||||||
                (lambda (n)
 | 
					                (lambda (n)
 | 
				
			||||||
                  (let f ([n n])
 | 
					                  (let f ([n n] [ac (cons c '(#\x))])
 | 
				
			||||||
                    (let ([c (read-char p)])
 | 
					                    (let ([c (read-char p)])
 | 
				
			||||||
                      (cond
 | 
					                      (cond
 | 
				
			||||||
                        [(eof-object? n) 
 | 
					                        [(eof-object? n) 
 | 
				
			||||||
                         (die/p p 'tokenize "invalid eof inside string")]
 | 
					                         (die/p p 'tokenize "invalid eof inside string")]
 | 
				
			||||||
                        [(hex c) =>
 | 
					                        [(hex c) =>
 | 
				
			||||||
                         (lambda (v) (f (+ (* n 16) v)))]
 | 
					                         (lambda (v) (f (+ (* n 16) v) (cons c ac)))]
 | 
				
			||||||
                        [($char= c #\;) 
 | 
					                        [($char= c #\;) 
 | 
				
			||||||
                         (tokenize-string
 | 
					                         (tokenize-string
 | 
				
			||||||
                           (cons (integer->char n) ls) p)]
 | 
					                           (cons (checked-integer->char n ac p) ls) p)]
 | 
				
			||||||
                        [else
 | 
					                        [else
 | 
				
			||||||
                         (die/p-1 p 'tokenize
 | 
					                         (die/p-1 p 'tokenize
 | 
				
			||||||
                           "invalid char in escape sequence"
 | 
					                           "invalid char in escape sequence"
 | 
				
			||||||
                           c)]))))]
 | 
					                           (list->string (reverse (cons c ac))))]))))]
 | 
				
			||||||
               [else 
 | 
					               [else 
 | 
				
			||||||
                (die/p-1 p 'tokenize
 | 
					                (die/p-1 p 'tokenize
 | 
				
			||||||
                  "invalid char in escape sequence" c)]))]
 | 
					                  "invalid char in escape sequence" c)]))]
 | 
				
			||||||
| 
						 | 
					@ -253,7 +266,7 @@
 | 
				
			||||||
           [(eof-object? c) d]
 | 
					           [(eof-object? c) d]
 | 
				
			||||||
           [(delimiter? c)  d]
 | 
					           [(delimiter? c)  d]
 | 
				
			||||||
           [else (die/p p 'tokenize "invalid character after sequence"
 | 
					           [else (die/p p 'tokenize "invalid character after sequence"
 | 
				
			||||||
                    (string-append (string c) str))]))]
 | 
					                    (string-append str (string c)))]))]
 | 
				
			||||||
       [else
 | 
					       [else
 | 
				
			||||||
        (let ([c (read-char p)])
 | 
					        (let ([c (read-char p)])
 | 
				
			||||||
          (cond
 | 
					          (cond
 | 
				
			||||||
| 
						 | 
					@ -325,21 +338,23 @@
 | 
				
			||||||
               [(or (eof-object? n) (delimiter? n))
 | 
					               [(or (eof-object? n) (delimiter? n))
 | 
				
			||||||
                '(datum . #\x)]
 | 
					                '(datum . #\x)]
 | 
				
			||||||
               [(hex n) =>
 | 
					               [(hex n) =>
 | 
				
			||||||
                (lambda (v) 
 | 
					                (lambda (v)
 | 
				
			||||||
                  (read-char p)
 | 
					                  (read-char p)
 | 
				
			||||||
                  (let f ([v v])
 | 
					                  (let f ([v v] [ac (cons n '(#\x))])
 | 
				
			||||||
                    (let ([c (peek-char p)])
 | 
					                    (let ([c (peek-char p)])
 | 
				
			||||||
                      (cond
 | 
					                      (cond
 | 
				
			||||||
                        [(eof-object? c)
 | 
					                        [(eof-object? c)
 | 
				
			||||||
                         (cons 'datum (integer->char v))]
 | 
					                         (cons 'datum (checked-integer->char v ac p))]
 | 
				
			||||||
                        [(delimiter? c)
 | 
					                        [(delimiter? c)
 | 
				
			||||||
                         (cons 'datum (integer->char v))]
 | 
					                         (cons 'datum (checked-integer->char v ac p))]
 | 
				
			||||||
                        [(hex c) =>
 | 
					                        [(hex c) =>
 | 
				
			||||||
                         (lambda (v0)
 | 
					                         (lambda (v0)
 | 
				
			||||||
                           (read-char p)
 | 
					                           (read-char p)
 | 
				
			||||||
                           (f (+ (* v 16) v0)))]
 | 
					                           (f (+ (* v 16) v0) (cons c ac)))]
 | 
				
			||||||
                        [else
 | 
					                        [else
 | 
				
			||||||
                         (die/p p 'tokenize "invalid character sequence")]))))]
 | 
					                         (die/p p 'tokenize 
 | 
				
			||||||
 | 
					                           "invalid character sequence"
 | 
				
			||||||
 | 
					                           (list->string (reverse (cons c ac))))]))))]
 | 
				
			||||||
               [else
 | 
					               [else
 | 
				
			||||||
                (die/p p 'tokenize "invalid character sequence"
 | 
					                (die/p p 'tokenize "invalid character sequence"
 | 
				
			||||||
                       (string-append "#\\" (string n)))]))]
 | 
					                       (string-append "#\\" (string n)))]))]
 | 
				
			||||||
| 
						 | 
					@ -355,11 +370,11 @@
 | 
				
			||||||
    (cond
 | 
					    (cond
 | 
				
			||||||
      [(and ($char<= #\0 x) ($char<= x #\9))
 | 
					      [(and ($char<= #\0 x) ($char<= x #\9))
 | 
				
			||||||
       ($fx- ($char->fixnum x) ($char->fixnum #\0))]
 | 
					       ($fx- ($char->fixnum x) ($char->fixnum #\0))]
 | 
				
			||||||
      [(and ($char<= #\a x) ($char<= x #\z))
 | 
					      [(and ($char<= #\a x) ($char<= x #\f))
 | 
				
			||||||
       ($fx- ($char->fixnum x) 
 | 
					       ($fx- ($char->fixnum x) 
 | 
				
			||||||
             ($fx- ($char->fixnum #\a) 10))]
 | 
					             ($fx- ($char->fixnum #\a) 10))]
 | 
				
			||||||
      [(and ($char<= #\A x) ($char<= x #\Z))
 | 
					      [(and ($char<= #\A x) ($char<= x #\F))
 | 
				
			||||||
       ($fx- ($char->fixnum x) 
 | 
					       ($fx- ($char->fixnum x)
 | 
				
			||||||
             ($fx- ($char->fixnum #\A) 10))]
 | 
					             ($fx- ($char->fixnum #\A) 10))]
 | 
				
			||||||
      [else #f]))
 | 
					      [else #f]))
 | 
				
			||||||
  (define multiline-error
 | 
					  (define multiline-error
 | 
				
			||||||
| 
						 | 
					@ -897,7 +912,9 @@
 | 
				
			||||||
                         (format "invalid eof after ~a"
 | 
					                         (format "invalid eof after ~a"
 | 
				
			||||||
                           (list->string (reverse ac))))]
 | 
					                           (list->string (reverse ac))))]
 | 
				
			||||||
                      [($char= #\; c)
 | 
					                      [($char= #\; c)
 | 
				
			||||||
                       (tokenize-identifier (cons (integer->char v) main-ac) p)]
 | 
					                       (tokenize-identifier 
 | 
				
			||||||
 | 
					                         (cons (checked-integer->char v ac p) main-ac)
 | 
				
			||||||
 | 
					                         p)]
 | 
				
			||||||
                      [(hex c) =>
 | 
					                      [(hex c) =>
 | 
				
			||||||
                       (lambda (v0)
 | 
					                       (lambda (v0)
 | 
				
			||||||
                         (f (+ (* v 16) v0) (cons c ac)))]
 | 
					                         (f (+ (* v 16) v0) (cons c ac)))]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1 +1 @@
 | 
				
			||||||
1452
 | 
					1453
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue