diff --git a/src/ikarus.boot b/src/ikarus.boot index 0235de3..9fa4972 100644 Binary files a/src/ikarus.boot and b/src/ikarus.boot differ diff --git a/src/ikarus.reader.ss b/src/ikarus.reader.ss index 4523afd..cea015b 100644 --- a/src/ikarus.reader.ss +++ b/src/ikarus.reader.ss @@ -76,6 +76,30 @@ [($char= #\n c) (tokenize-string (cons #\newline ls) p)] [($char= #\r c) (tokenize-string (cons #\return ls) p)] [($char= #\t c) (tokenize-string (cons #\tab ls) p)] + [($char= #\x c) ;;; unicode escape \xXXX; + (let ([c (read-char p)]) + (cond + [(eof-object? c) + (error 'tokenize "invalid eof inside string")] + [(hex c) => + (lambda (n) + (let f ([n n]) + (let ([c (read-char p)]) + (cond + [(eof-object? n) + (error 'tokenize "invalid eof inside string")] + [(hex c) => + (lambda (v) (f (+ (* n 16) v)))] + [($char= c #\;) + (tokenize-string + (cons (integer->char n) ls) p)] + [else + (error 'tokenize + "invalid char ~a in escape sequence" + c)]))))] + [else + (error 'tokenize + "invalid char ~a in escape sequence" c)]))] [else (error 'tokenize "invalid string escape \\~a" c)]))] [else (tokenize-string (cons c ls) p)])))) diff --git a/src/unicode/extract-char-cases.ss b/src/unicode/extract-char-cases.ss index 6762e69..7e9c7fa 100755 --- a/src/unicode/extract-char-cases.ss +++ b/src/unicode/extract-char-cases.ss @@ -16,6 +16,7 @@ (let ([n (hex->num num)]) (define (f x) (if (string=? x "") 0 (- (hex->num x) n))) + ;#(UC LC TC FC string-FC) (cons n (vector (f uc) (f lc) (f tc) #f 0)))))) (define (remove-dups ls) @@ -98,25 +99,12 @@ (convert-full-fold-fields (cdr ls)))))] [else (convert-full-fold-fields (cdr ls))])))])) - -#; -(define (convert-index ls) - (let ([alist - (let f ([i 0] [ls ls]) - (cond - [(null? ls) '()] - [else - (cons (cons (car ls) (- i (car ls))) - (f (add1 i) (cdr ls)))]))]) - (map - (lambda (x) - (cons (car x) (+ (car x) (cdr x)))) - (remove-dups alist)))) - (let ([ls + ;;; get initial table (compute-foldcase (map data-case (get-unicode-data "UNIDATA/UnicodeData.txt")))]) + ;;; compute the string-foldcase data (for-each (lambda (x) (let ([n (car x)] [chars (cdr x)]) @@ -127,6 +115,7 @@ [else (error #f "~s is not there" n)]))) (convert-full-fold-fields (get-unicode-data "UNIDATA/CaseFolding.txt"))) + ;;; done (let ([ls (remove-dups ls)]) (define (p name idx) (pretty-print @@ -137,14 +126,6 @@ (with-output-to-file "unicode-char-cases.ss" (lambda () (printf ";;; DO NOT EDIT\n;;; automatically generated\n") - #; - (let ([ls (convert-index (map car ls))]) - (pretty-print - `(define char-search-index-vector - ',(list->vector (map car ls)))) - (pretty-print - `(define char-adjustment-index-vector - ',(list->vector (map cdr ls))))) (printf ";;; ~s entries in table\n" (vector-length v0)) (pretty-print `(define charcase-search-vector ',v0)) (p 'char-upcase-adjustment-vector 0)