* reader now accepts string escape sequences like "\xDF;".

This commit is contained in:
Abdulaziz Ghuloum 2007-09-02 22:37:24 -04:00
parent 89786ff4d0
commit aa9f5e3ad1
3 changed files with 28 additions and 23 deletions

Binary file not shown.

View File

@ -76,6 +76,30 @@
[($char= #\n c) (tokenize-string (cons #\newline ls) p)]
[($char= #\r c) (tokenize-string (cons #\return ls) p)]
[($char= #\t c) (tokenize-string (cons #\tab ls) p)]
[($char= #\x c) ;;; unicode escape \xXXX;
(let ([c (read-char p)])
(cond
[(eof-object? c)
(error 'tokenize "invalid eof inside string")]
[(hex c) =>
(lambda (n)
(let f ([n n])
(let ([c (read-char p)])
(cond
[(eof-object? n)
(error 'tokenize "invalid eof inside string")]
[(hex c) =>
(lambda (v) (f (+ (* n 16) v)))]
[($char= c #\;)
(tokenize-string
(cons (integer->char n) ls) p)]
[else
(error 'tokenize
"invalid char ~a in escape sequence"
c)]))))]
[else
(error 'tokenize
"invalid char ~a in escape sequence" c)]))]
[else (error 'tokenize "invalid string escape \\~a" c)]))]
[else
(tokenize-string (cons c ls) p)]))))

View File

@ -16,6 +16,7 @@
(let ([n (hex->num num)])
(define (f x)
(if (string=? x "") 0 (- (hex->num x) n)))
;#(UC LC TC FC string-FC)
(cons n (vector (f uc) (f lc) (f tc) #f 0))))))
(define (remove-dups ls)
@ -98,25 +99,12 @@
(convert-full-fold-fields (cdr ls)))))]
[else (convert-full-fold-fields (cdr ls))])))]))
#;
(define (convert-index ls)
(let ([alist
(let f ([i 0] [ls ls])
(cond
[(null? ls) '()]
[else
(cons (cons (car ls) (- i (car ls)))
(f (add1 i) (cdr ls)))]))])
(map
(lambda (x)
(cons (car x) (+ (car x) (cdr x))))
(remove-dups alist))))
(let ([ls
;;; get initial table
(compute-foldcase
(map data-case
(get-unicode-data "UNIDATA/UnicodeData.txt")))])
;;; compute the string-foldcase data
(for-each
(lambda (x)
(let ([n (car x)] [chars (cdr x)])
@ -127,6 +115,7 @@
[else (error #f "~s is not there" n)])))
(convert-full-fold-fields
(get-unicode-data "UNIDATA/CaseFolding.txt")))
;;; done
(let ([ls (remove-dups ls)])
(define (p name idx)
(pretty-print
@ -137,14 +126,6 @@
(with-output-to-file "unicode-char-cases.ss"
(lambda ()
(printf ";;; DO NOT EDIT\n;;; automatically generated\n")
#;
(let ([ls (convert-index (map car ls))])
(pretty-print
`(define char-search-index-vector
',(list->vector (map car ls))))
(pretty-print
`(define char-adjustment-index-vector
',(list->vector (map cdr ls)))))
(printf ";;; ~s entries in table\n" (vector-length v0))
(pretty-print `(define charcase-search-vector ',v0))
(p 'char-upcase-adjustment-vector 0)