- reduced latency of construction of unicode composition tables.
This commit is contained in:
parent
9a3666d3ea
commit
15e8775c67
Binary file not shown.
|
@ -3037,14 +3037,6 @@ procedures.
|
|||
not recognize the \texttt{|p} notation.
|
||||
|
||||
|
||||
\item The following procedures are missing from \texttt{(rnrs unicode)}:
|
||||
\begin{Verbatim}
|
||||
string-titlecase
|
||||
string-normalize-nfc string-normalize-nfd
|
||||
string-normalize-nfkc string-normalize-nfkd
|
||||
\end{Verbatim}
|
||||
|
||||
|
||||
\item The following procedures are missing from \texttt{(rnrs arithmetic
|
||||
bitwise)}:
|
||||
\begin{Verbatim}
|
||||
|
@ -3060,8 +3052,7 @@ fxreverse-bit-field fxrotate-bit-field
|
|||
|
||||
\item The following procedures are missing from \texttt{(rnrs hashtables)}:
|
||||
\begin{Verbatim}
|
||||
make-eqv-hashtable make-hashtable equal-hash
|
||||
hashtable-hash-function hashtable-equivalence-function
|
||||
make-eqv-hashtable equal-hash
|
||||
\end{Verbatim}
|
||||
|
||||
\item The following procedures are missing from \texttt{(rnrs io ports)}:
|
||||
|
|
|
@ -98,13 +98,6 @@
|
|||
(not (fxzero? (fxand x y))))
|
||||
(define (char- x y)
|
||||
(fx- (char->integer x) (char->integer y)))
|
||||
(define (iota n)
|
||||
(let f ([n n] [ac '()])
|
||||
(cond
|
||||
[(= n 0) ac]
|
||||
[else
|
||||
(let ([n (- n 1)])
|
||||
(f n (cons n ac)))])))
|
||||
|
||||
(include "unicode/unicode-char-cases.ss")
|
||||
(include "unicode/unicode-charinfo.ss")
|
||||
|
@ -180,7 +173,7 @@
|
|||
(chars (cdr ac)
|
||||
(let f ([p (cdar ac)] [n n])
|
||||
(cond
|
||||
[(pair? p) (f (cdr p) (+ n 1))]
|
||||
[(pair? p) (f (cdr p) (fx+ n 1))]
|
||||
[else n])))]))
|
||||
(define (extend src ac src-len dst-len)
|
||||
(let f ([str str] [dst (make-string dst-len)] [i 0] [j 0] [ac (reverse ac)] [sigma* '()])
|
||||
|
@ -212,7 +205,7 @@
|
|||
(and (not (fx= i n))
|
||||
(or ($char-cased? (string-ref str i))
|
||||
(and ($char-case-ignorable? (string-ref str i))
|
||||
(scan (+ i incr) incr n)))))
|
||||
(scan (fx+ i incr) incr n)))))
|
||||
(and (scan (fx- i 1) -1 -1) (not (scan (fx+ i 1) +1 (string-length str)))))
|
||||
; scanning requires we have some character in place...guess nonfinal sigma
|
||||
(for-each (lambda (i) (string-set! str i nonfinal-sigma)) sigma*)
|
||||
|
@ -548,13 +541,10 @@
|
|||
(lambda (x y)
|
||||
(and (char=? (car x) (car y))
|
||||
(char=? (cdr x) (cdr y))))))
|
||||
(for-each
|
||||
(lambda (i)
|
||||
(unless (and (fx<= #xD800 i) (fx<= i #xDFFF))
|
||||
(unless (memv i ($composition-exclusions))
|
||||
(let* ([c (integer->char i)] [c* ($str-decomp-canon c)])
|
||||
(when (pair? c*) (hashtable-set! comp-table c* c))))))
|
||||
(iota #x110000)))
|
||||
(vector-for-each
|
||||
(lambda (c* c) (hashtable-set! comp-table c* c))
|
||||
(car ($composition-pairs))
|
||||
(cdr ($composition-pairs))))
|
||||
(with-output-to-string
|
||||
(lambda ()
|
||||
(let ([n (string-length s)])
|
||||
|
@ -607,23 +597,15 @@
|
|||
(s0 0)))))))
|
||||
|
||||
(define-string-op string-normalize-nfd
|
||||
(lambda (s)
|
||||
; need string? check
|
||||
($decompose s #t)))
|
||||
(lambda (s) ($decompose s #t)))
|
||||
|
||||
(define-string-op string-normalize-nfkd
|
||||
(lambda (s)
|
||||
; need string? check
|
||||
($decompose s #f)))
|
||||
(lambda (s) ($decompose s #f)))
|
||||
|
||||
(define-string-op string-normalize-nfc
|
||||
(lambda (s)
|
||||
; need string? check
|
||||
($compose ($decompose s #t))))
|
||||
(lambda (s) ($compose ($decompose s #t))))
|
||||
|
||||
(define-string-op string-normalize-nfkc
|
||||
(lambda (s)
|
||||
; need string? check
|
||||
($compose ($decompose s #f))))
|
||||
(lambda (s) ($compose ($decompose s #f))))
|
||||
|
||||
))
|
||||
|
|
|
@ -1 +1 @@
|
|||
1648
|
||||
1650
|
||||
|
|
|
@ -25,7 +25,13 @@
|
|||
|
||||
; dropping support for s16 inner vectors for now
|
||||
(include "extract-common.ss")
|
||||
|
||||
(define (iota n)
|
||||
(let f ([n n] [ac '()])
|
||||
(cond
|
||||
[(= n 0) ac]
|
||||
[else
|
||||
(let ([n (- n 1)])
|
||||
(f n (cons n ac)))])))
|
||||
(define ptr-bytes 4)
|
||||
|
||||
(define code-point-limit #x110000) ; as of Unicode 5.1
|
||||
|
@ -161,12 +167,39 @@
|
|||
(verify-identity! n cdrec)
|
||||
(table-set! table n (acc cdrec)))))
|
||||
ls)
|
||||
(time (commonize* table))
|
||||
(commonize* table)
|
||||
table)))
|
||||
|
||||
(define (get-composition-exclusions)
|
||||
(define (get-composition-pairs decomp-canon-table)
|
||||
(define ($str-decomp-canon c)
|
||||
(define (strop tbl c)
|
||||
(let ([n (char->integer c)])
|
||||
(if (and (fx< table-limit code-point-limit)
|
||||
(fx>= n table-limit))
|
||||
c
|
||||
(let ([x (table-ref tbl n)])
|
||||
(if (fixnum? x)
|
||||
(integer->char (fx+ x n))
|
||||
x)))))
|
||||
(strop decomp-canon-table c))
|
||||
(let ([exclusions
|
||||
(map hex->num
|
||||
(map car (get-unicode-data "UNIDATA/CompositionExclusions.txt"))))
|
||||
(map car (get-unicode-data
|
||||
"UNIDATA/CompositionExclusions.txt")))]
|
||||
[pairs '()])
|
||||
(for-each
|
||||
(lambda (i)
|
||||
(unless (and (fx<= #xD800 i) (fx<= i #xDFFF))
|
||||
(unless (memv i exclusions)
|
||||
(let* ([c (integer->char i)] [c* ($str-decomp-canon c)])
|
||||
(when (pair? c*)
|
||||
(set! pairs (cons (cons c* c) pairs)))))))
|
||||
(iota #x110000))
|
||||
(cons (list->vector (map car pairs))
|
||||
(list->vector (map cdr pairs)))))
|
||||
|
||||
|
||||
|
||||
|
||||
(let ([ls (map data-case (get-unicode-data "UNIDATA/UnicodeData.txt"))])
|
||||
(insert-foldcase-data! ls (get-unicode-data "UNIDATA/CaseFolding.txt"))
|
||||
|
@ -179,7 +212,8 @@
|
|||
(pretty-print
|
||||
`(module ($char-upcase $char-downcase $char-titlecase $char-foldcase
|
||||
$str-upcase $str-downcase $str-titlecase $str-foldcase
|
||||
$str-decomp-canon $str-decomp-compat $composition-exclusions)
|
||||
$str-decomp-canon $str-decomp-compat
|
||||
$composition-pairs)
|
||||
(define char-upcase-table ',(build-table chardata-ucchar ls))
|
||||
(define char-downcase-table ',(build-table chardata-lcchar ls))
|
||||
(define char-titlecase-table ',(build-table chardata-tcchar ls))
|
||||
|
@ -218,7 +252,8 @@
|
|||
(define ($str-foldcase c) (strop string-foldcase-table c))
|
||||
(define ($str-decomp-canon c) (strop decomp-canon-table c))
|
||||
(define ($str-decomp-compat c) (strop decomp-compat-table c))
|
||||
(define ($composition-exclusions)
|
||||
',(get-composition-exclusions))))))))
|
||||
(define ($composition-pairs)
|
||||
',(get-composition-pairs
|
||||
(build-table chardata-decomp-canon ls)))))))))
|
||||
|
||||
(printf "Happy Happy Joy Joy ~a\n" (sizeof cache))
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue