- completed all unicode primitives (thanks to Kent Dybvig)

This commit is contained in:
Abdulaziz Ghuloum 2008-10-28 19:59:40 -04:00
parent 7fa2aa75ab
commit 9a3666d3ea
43 changed files with 8990 additions and 87474 deletions

View File

@ -19,7 +19,7 @@ EXTRA_DIST=ikarus.boot.4.prebuilt ikarus.boot.8.prebuilt \
ikarus.records.procedural.ss ikarus.conditions.ss \
ikarus.singular-objects.ss ikarus.sort.ss ikarus.strings.ss \
ikarus.structs.ss ikarus.symbols.ss ikarus.timer.ss ikarus.trace.ss \
ikarus.unicode-conversion.ss ikarus.unicode-data.ss \
ikarus.unicode-conversion.ss ikarus.unicode.ss \
ikarus.vectors.ss ikarus.writer.ss makefile.ss \
pass-specify-rep-primops.ss pass-specify-rep.ss psyntax.builders.ss \
psyntax.compat.ss psyntax.config.ss psyntax.expander.ss \

View File

@ -174,7 +174,7 @@ EXTRA_DIST = ikarus.boot.4.prebuilt ikarus.boot.8.prebuilt \
ikarus.records.procedural.ss ikarus.conditions.ss \
ikarus.singular-objects.ss ikarus.sort.ss ikarus.strings.ss \
ikarus.structs.ss ikarus.symbols.ss ikarus.timer.ss ikarus.trace.ss \
ikarus.unicode-conversion.ss ikarus.unicode-data.ss \
ikarus.unicode-conversion.ss ikarus.unicode.ss \
ikarus.vectors.ss ikarus.writer.ss makefile.ss \
pass-specify-rep-primops.ss pass-specify-rep.ss psyntax.builders.ss \
psyntax.compat.ss psyntax.config.ss psyntax.expander.ss \

View File

@ -9,8 +9,6 @@
port-has-set-port-position!?
set-port-position!
make-eqv-hashtable equal-hash
string-normalize-nfc string-normalize-nfd
string-normalize-nfkc string-normalize-nfkd string-titlecase
)
(import (except (ikarus)
@ -22,8 +20,6 @@
port-has-set-port-position!?
set-port-position!
make-eqv-hashtable equal-hash
string-normalize-nfc string-normalize-nfd
string-normalize-nfkc string-normalize-nfkd string-titlecase
))
(define-syntax not-yet
@ -50,13 +46,10 @@
(not-yet
;;; should be implemented
string-titlecase
bitwise-rotate-bit-field bitwise-reverse-bit-field
fxreverse-bit-field
;;; not top priority at the moment
make-eqv-hashtable equal-hash
string-normalize-nfc string-normalize-nfd
string-normalize-nfkc string-normalize-nfkd
;;; won't be implemented
make-custom-binary-input/output-port
make-custom-textual-input/output-port

View File

@ -1,346 +0,0 @@
;;; Ikarus Scheme -- A compiler for R6RS Scheme.
;;; Copyright (C) 2006,2007,2008 Abdulaziz Ghuloum
;;;
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License version 3 as
;;; published by the Free Software Foundation.
;;;
;;; This program is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
(library (ikarus unicode-data)
(export
unicode-printable-char? char-downcase char-upcase
char-titlecase char-foldcase char-ci=? char-ci<? char-ci<=?
char-ci>? char-ci>=? string-ci=? string-ci<? string-ci<=?
string-ci>? string-ci>=? string-foldcase char-general-category
char-alphabetic? char-numeric? char-whitespace?
char-upper-case? char-lower-case? char-title-case?
string-upcase string-downcase)
(import
(ikarus system $fx)
(ikarus system $vectors)
(ikarus system $chars)
(ikarus system $pairs)
(ikarus system $strings)
(except (ikarus)
unicode-printable-char?
char-downcase char-upcase char-titlecase char-foldcase
char-ci=? char-ci<? char-ci<=? char-ci>? char-ci>=?
string-ci=? string-ci<? string-ci<=? string-ci>?
string-ci>=? string-foldcase char-general-category
char-alphabetic? char-numeric? char-whitespace?
char-upper-case? char-lower-case? char-title-case?
string-upcase string-downcase))
(include "unicode/unicode-char-cases.ss")
(include "unicode/unicode-charinfo.ss")
(define (binary-search n v)
(let ([k ($fx- ($vector-length v) 1)])
(let f ([i 0] [k k] [n n] [v v])
(cond
[($fx= i k) i]
[else
(let ([j ($fxsra ($fx+ i ($fx+ k 1)) 1)])
(cond
[($fx<= ($vector-ref v j) n) (f j k n v)]
[else (f i ($fx- j 1) n v)]))]))))
(define (lookup-char-info c)
(let ([v unicode-categories-lookup-vector]
[t unicode-categories-values-vector])
(define (f i k n)
(cond
[($fx= i k)
(let ([idx ($vector-ref t i)])
(if (fixnum? idx)
idx
(let ([idx2 ($fx- n ($vector-ref v i))])
($vector-ref idx idx2))))]
[else
(let ([j ($fxsra ($fx+ i ($fx+ k 1)) 1)])
(cond
[($fx<= ($vector-ref v j) n) (f j k n)]
[else (f i ($fx- j 1) n)]))]))
(f 0 (fx- (vector-length v) 1) (char->integer c))))
(define (char-general-category c)
(if (char? c)
(vector-ref unicode-categories-name-vector
(fxlogand 63 (lookup-char-info c)))
(die 'char-general-category "not a char" c)))
(define (char-has-property? c prop-val who)
(if (char? c)
(not (fxzero? (fxlogand (lookup-char-info c) prop-val)))
(die who "not a char" c)))
(define (unicode-printable-char? c)
(char-has-property? c constituent-property 'unicode-printable-char?))
(define (char-alphabetic? c)
(char-has-property? c alphabetic-property 'char-alphabetic?))
(define (char-numeric? c)
(char-has-property? c numeric-property 'char-numeric?))
(define (char-whitespace? c)
(char-has-property? c whitespace-property 'char-whitespace?))
(define (char-upper-case? c)
(char-has-property? c uppercase-property 'char-upper-case?))
(define (char-lower-case? c)
(char-has-property? c lowercase-property 'char-lower-case?))
(define (char-title-case? c)
(char-has-property? c titlecase-property 'char-title-case?))
(define (convert-char x adjustment-vec)
(let ([n ($char->fixnum x)])
(let ([idx (binary-search n charcase-search-vector)])
(let ([adj ($vector-ref adjustment-vec idx)])
($fx+ adj n)))))
(define (char-downcase x)
(if (char? x)
($fixnum->char
(convert-char x char-downcase-adjustment-vector))
(die 'char-downcase "not a character" x)))
(define (char-upcase x)
(if (char? x)
($fixnum->char
(convert-char x char-upcase-adjustment-vector))
(die 'char-downcase "not a character" x)))
(define (char-titlecase x)
(if (char? x)
($fixnum->char
(convert-char x char-titlecase-adjustment-vector))
(die 'char-downcase "not a character" x)))
(define (char-foldcase x)
(if (char? x)
($fixnum->char ($fold x))
(die 'char-downcase "not a character" x)))
(define ($fold x)
(convert-char x char-foldcase-adjustment-vector))
(define (char-ci-loop c0 ls p? who)
(or (null? ls)
(let ([c1 (car ls)])
(unless (char? c1) (die who "not a char" c1))
(let ([c1 ($fold c1)])
(if (p? c0 c1)
(char-ci-loop c1 (cdr ls) p? who)
(let f ([ls (cdr ls)] [who who])
(cond
[(null? ls) #f]
[(char? (car ls))
(f (cdr ls) who)]
[else (die who "not a char" (car ls))])))))))
(define char-ci=?
(case-lambda
[(x y)
(if (char? x)
(or (eq? x y)
(if (char? y)
($fx= ($fold x) ($fold y))
(die 'char-ci=? "not a char" y)))
(die 'char-ci=? "not a char" x))]
[(x)
(or (char? x) (die 'char-ci=? "not a char" x))]
[(x . x*)
(if (char? x)
(char-ci-loop ($fold x) x* = 'char-ci=?)
(die 'char-ci=? "not a char" x))]))
(define char-ci<?
(case-lambda
[(x y)
(if (char? x)
(or (eq? x y)
(if (char? y)
($fx< ($fold x) ($fold y))
(die 'char-ci<? "not a char" y)))
(die 'char-ci<? "not a char" x))]
[(x)
(or (char? x) (die 'char-ci<? "not a char" x))]
[(x . x*)
(if (char? x)
(char-ci-loop ($fold x) x* < 'char-ci<?)
(die 'char-ci<? "not a char" x))]))
(define char-ci<=?
(case-lambda
[(x y)
(if (char? x)
(or (eq? x y)
(if (char? y)
($fx<= ($fold x) ($fold y))
(die 'char-ci<=? "not a char" y)))
(die 'char-ci<=? "not a char" x))]
[(x)
(or (char? x) (die 'char-ci<=? "not a char" x))]
[(x . x*)
(if (char? x)
(char-ci-loop ($fold x) x* <= 'char-ci<=?)
(die 'char-ci<=? "not a char" x))]))
(define char-ci>?
(case-lambda
[(x y)
(if (char? x)
(or (eq? x y)
(if (char? y)
($fx> ($fold x) ($fold y))
(die 'char-ci>? "not a char" y)))
(die 'char-ci>? "not a char" x))]
[(x)
(or (char? x) (die 'char-ci>? "not a char" x))]
[(x . x*)
(if (char? x)
(char-ci-loop ($fold x) x* > 'char-ci>?)
(die 'char-ci>? "not a char" x))]))
(define char-ci>=?
(case-lambda
[(x y)
(if (char? x)
(or (eq? x y)
(if (char? y)
($fx>= ($fold x) ($fold y))
(die 'char-ci>=? "not a char" y)))
(die 'char-ci>=? "not a char" x))]
[(x)
(or (char? x) (die 'char-ci>=? "not a char" x))]
[(x . x*)
(if (char? x)
(char-ci-loop ($fold x) x* >= 'char-ci>=?)
(die 'char-ci>=? "not a char" x))]))
(define ($string-change-case str adjustment-vector)
(define (extend-length str ac)
(define (chars ac n)
(cond
[(null? ac) n]
[else
(chars (cdr ac)
(let f ([p (cdar ac)] [n n])
(cond
[(pair? p) (f (cdr p) (+ n 1))]
[else n])))]))
(let ([dst-len (chars ac (string-length str))])
(let f ([str str] [dst (make-string dst-len)] [i 0] [j 0] [ac (reverse ac)])
(cond
[(null? ac)
(string-copy! str i dst j (fx- (string-length str) i))
dst]
[else
(let ([idx (caar ac)] [c* (cdar ac)] [ac (cdr ac)])
(let ([cnt (fx- idx i)])
(string-copy! str i dst j cnt)
(let g ([str str] [dst dst]
[i (fx+ i cnt)] [j (fx+ j cnt)]
[ac ac] [c* c*])
(cond
[(pair? c*)
(string-set! dst j (car c*))
(g str dst i (fx+ j 1) ac (cdr c*))]
[else
(string-set! dst j c*)
(f str dst (fx+ i 1) (fx+ j 1) ac)]))))]))))
(let ([n (string-length str)])
(let f ([str str] [dst (make-string n)] [i 0] [n n] [ac '()])
(cond
[($fx= i n)
(if (null? ac)
dst
(extend-length dst ac))]
[else
(let* ([cn ($char->fixnum ($string-ref str i))])
(let ([n/ls
(vector-ref adjustment-vector
(binary-search cn charcase-search-vector))])
(cond
[(fixnum? n/ls)
(string-set! dst i ($fixnum->char ($fx+ cn n/ls)))
(f str dst ($fxadd1 i) n ac)]
[else
(f str dst (fxadd1 i) n
(cons (cons i n/ls) ac))])))]))))
(define ($string-foldcase str)
($string-change-case str string-foldcase-adjustment-vector))
(define (string-foldcase str)
(if (string? str)
($string-foldcase str)
(die 'string-foldcase "not a string" str)))
(define (string-upcase x)
(if (string? x)
($string-change-case x string-upcase-adjustment-vector)
(die 'string-upcase "not a string" x)))
(define (string-downcase x)
(if (string? x)
($string-change-case x string-downcase-adjustment-vector)
(die 'string-downcase "not a string" x)))
;;; FIXME: case-insensitive comparison procedures are slow.
(define string-ci-cmp
(lambda (who cmp)
(case-lambda
[(s1 s2)
(if (string? s1)
(if (string? s2)
(cmp ($string-foldcase s1) ($string-foldcase s2))
(die who "not a string" s2))
(die who "not a string" s1))]
[(s1 . s*)
(if (string? s1)
(let ([s1 ($string-foldcase s1)])
(let f ([s1 s1] [s* s*])
(cond
[(null? s*) #t]
[else
(let ([s2 (car s*)])
(if (string? s2)
(let ([s2 ($string-foldcase s2)])
(if (cmp s1 s2)
(f s2 (cdr s*))
(let f ([s* (cdr s*)])
(cond
[(null? s*) #f]
[(string? (car s*))
(f (cdr s*))]
[else
(die who "not a string"
(car s*))]))))
(die who "not a string" s2)))])))
(die who "not a string" s1))])))
(define string-ci=? (string-ci-cmp 'string-ci=? string=?))
(define string-ci<? (string-ci-cmp 'string-ci<? string<?))
(define string-ci<=? (string-ci-cmp 'string-ci<=? string<=?))
(define string-ci>? (string-ci-cmp 'string-ci>? string>?))
(define string-ci>=? (string-ci-cmp 'string-ci>=? string>=?))
)

629
scheme/ikarus.unicode.ss Normal file
View File

@ -0,0 +1,629 @@
;;; Copyright (C) 2008 Abdulaziz Ghuloum, R. Kent Dybvig
;;; Copyright (C) 2006,2007 Abdulaziz Ghuloum
;;;
;;; Permission is hereby granted, free of charge, to any person obtaining a
;;; copy of this software and associated documentation files (the "Software"),
;;; to deal in the Software without restriction, including without limitation
;;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
;;; and/or sell copies of the Software, and to permit persons to whom the
;;; Software is furnished to do so, subject to the following conditions:
;;;
;;; The above copyright notice and this permission notice shall be included in
;;; all copies or substantial portions of the Software.
;;;
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
;;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
;;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
;;; DEALINGS IN THE SOFTWARE.
(library (ikarus.unicode)
(export
unicode-printable-char?
char-upcase char-downcase char-titlecase char-foldcase
char-whitespace? char-lower-case? char-upper-case?
char-title-case? char-numeric?
char-alphabetic? char-general-category char-ci<? char-ci<=?
char-ci=? char-ci>? char-ci>=? string-upcase string-downcase
string-foldcase string-titlecase string-ci<? string-ci<=?
string-ci=? string-ci>? string-ci>=? string-normalize-nfd
string-normalize-nfkd string-normalize-nfc string-normalize-nfkc )
(import
(except (ikarus)
unicode-printable-char?
char-upcase char-downcase char-titlecase char-foldcase
char-whitespace? char-lower-case? char-upper-case?
char-title-case? char-numeric?
char-alphabetic? char-general-category char-ci<? char-ci<=?
char-ci=? char-ci>? char-ci>=? string-upcase string-downcase
string-foldcase string-titlecase string-ci<? string-ci<=?
string-ci=? string-ci>? string-ci>=? string-normalize-nfd
string-normalize-nfkd string-normalize-nfc string-normalize-nfkc ))
(module UNSAFE
(fx< fx<= fx> fx>= fx= fx+ fx-
fxior fxand fxsra fxsll fxzero?
integer->char char->integer
char<? char<=? char=? char>? char>=?
string-ref string-set! string-length
vector-ref vector-set! vector-length)
(import
(rename (ikarus system $strings)
($string-length string-length)
($string-ref string-ref)
($string-set! string-set!))
(rename (ikarus system $vectors)
($vector-length vector-length)
($vector-ref vector-ref)
($vector-set! vector-set!))
(rename (ikarus system $chars)
($char->fixnum char->integer)
($fixnum->char integer->char)
($char< char<?)
($char<= char<=?)
($char= char=?)
($char> char>?)
($char>= char>=?))
(rename (ikarus system $fx)
($fxzero? fxzero?)
($fxsra fxsra)
($fxsll fxsll)
($fxlogor fxior)
($fxlogand fxand)
($fx+ fx+)
($fx- fx-)
($fx< fx<)
($fx> fx>)
($fx>= fx>=)
($fx<= fx<=)
($fx= fx=))))
(module
(unicode-printable-char?
char-upcase char-downcase char-titlecase char-foldcase
char-whitespace? char-lower-case? char-upper-case?
char-title-case? char-numeric?
char-alphabetic? char-general-category char-ci<? char-ci<=?
char-ci=? char-ci>? char-ci>=? string-upcase string-downcase
string-foldcase string-titlecase string-ci<? string-ci<=?
string-ci=? string-ci>? string-ci>=? string-normalize-nfd
string-normalize-nfkd string-normalize-nfc string-normalize-nfkc)
(import UNSAFE)
(define (fxlogtest x y)
(not (fxzero? (fxand x y))))
(define (char- x y)
(fx- (char->integer x) (char->integer y)))
(define (iota n)
(let f ([n n] [ac '()])
(cond
[(= n 0) ac]
[else
(let ([n (- n 1)])
(f n (cons n ac)))])))
(include "unicode/unicode-char-cases.ss")
(include "unicode/unicode-charinfo.ss")
(define-syntax define-char-op
(syntax-rules ()
[(_ name unsafe-op)
(define name
(lambda (c)
(if (char? c)
(unsafe-op c)
(assertion-violation 'name "not a char" c))))]))
(define-char-op char-upcase $char-upcase)
(define-char-op char-downcase $char-downcase)
(define-char-op char-titlecase $char-titlecase)
(define-char-op char-foldcase $char-foldcase)
(define-char-op char-whitespace? $char-whitespace?)
(define-char-op char-lower-case? $char-lower-case?)
(define-char-op char-upper-case? $char-upper-case?)
(define-char-op char-title-case? $char-title-case?)
(define-char-op char-numeric? $char-numeric?)
(define-char-op unicode-printable-char? $char-constituent?)
(define-char-op char-alphabetic? $char-alphabetic?)
(define-char-op char-general-category $char-category)
(define (do-char-cmp a ls cmp who)
(if (char? a)
(let f ([a ($char-foldcase a)] [ls ls])
(cond
[(null? ls) #t]
[else
(let ([b (car ls)])
(if (char? b)
(let ([b ($char-foldcase b)])
(if (cmp a b)
(f b (cdr ls))
(let f ([ls (cdr ls)])
(if (null? ls)
#f
(if (char? (car ls))
(f (cdr ls))
(assertion-violation who
"not a char" (car ls)))))))
(assertion-violation who "not a char" b)))]))
(assertion-violation who "not a char" a)))
(define-syntax define-char-cmp
(syntax-rules ()
[(_ name cmp)
(define name
(case-lambda
[(c1 c2)
(if (char? c1)
(if (char? c2)
(cmp ($char-foldcase c1) ($char-foldcase c2))
(assertion-violation 'name "not a char" c2))
(assertion-violation 'name "not a char" c1))]
[(c1 . rest)
(do-char-cmp c1 rest (lambda (x y) (cmp x y)) 'name)]))]))
(define-char-cmp char-ci<? char<?)
(define-char-cmp char-ci<=? char<=?)
(define-char-cmp char-ci=? char=?)
(define-char-cmp char-ci>? char>?)
(define-char-cmp char-ci>=? char>=?)
(define (handle-special str ac)
(define (chars ac n)
(cond
[(null? ac) n]
[else
(chars (cdr ac)
(let f ([p (cdar ac)] [n n])
(cond
[(pair? p) (f (cdr p) (+ n 1))]
[else n])))]))
(define (extend src ac src-len dst-len)
(let f ([str str] [dst (make-string dst-len)] [i 0] [j 0] [ac (reverse ac)] [sigma* '()])
(cond
[(null? ac)
(string-copy! str i dst j (fx- src-len i))
(do-sigmas dst sigma*)]
[else
(let ([idx (caar ac)] [c* (cdar ac)] [ac (cdr ac)])
(let ([cnt (fx- idx i)])
(string-copy! str i dst j cnt)
(let g ([str str] [dst dst]
[i (fx+ i cnt)] [j (fx+ j cnt)]
[ac ac] [c* c*])
(cond
[(pair? c*)
(string-set! dst j (car c*))
(g str dst i (fx+ j 1) ac (cdr c*))]
[(char? c*)
(string-set! dst j c*)
(f str dst (fx+ i 1) (fx+ j 1) ac sigma*)]
[else ; assume c* = sigma
(f str dst (fx+ i 1) (fx+ j 1) ac (cons j sigma*))]))))])))
(define (do-sigmas str sigma*)
(define nonfinal-sigma #\x3c3)
(define final-sigma #\x3c2)
(define (final? i)
(define (scan i incr n)
(and (not (fx= i n))
(or ($char-cased? (string-ref str i))
(and ($char-case-ignorable? (string-ref str i))
(scan (+ i incr) incr n)))))
(and (scan (fx- i 1) -1 -1) (not (scan (fx+ i 1) +1 (string-length str)))))
; scanning requires we have some character in place...guess nonfinal sigma
(for-each (lambda (i) (string-set! str i nonfinal-sigma)) sigma*)
(for-each (lambda (i) (when (final? i) (string-set! str i final-sigma))) sigma*)
str)
(let* ([src-len (string-length str)]
[dst-len (chars ac src-len)])
(if (fx= dst-len src-len)
(do-sigmas str (map car ac))
(extend str ac src-len dst-len))))
(define ($string-change-case str cvt-char)
(let ([n (string-length str)])
(let f ([str str] [dst (make-string n)] [i 0] [n n] [ac '()])
(cond
[(fx= i n)
(if (null? ac)
dst
(handle-special dst ac))]
[else
(let ([c/ls (cvt-char (string-ref str i))])
(cond
[(char? c/ls)
(string-set! dst i c/ls)
(f str dst (fx+ i 1) n ac)]
[else
(f str dst (fx+ i 1) n
(cons (cons i c/ls) ac))]))]))))
(define-syntax define-string-op
(syntax-rules ()
[(_ name unsafe-op)
(define name
(lambda (s)
(if (string? s)
(unsafe-op s)
(assertion-violation 'name "not a string" s))))]))
(define-string-op string-upcase
(lambda (s) ($string-change-case s $str-upcase)))
(define-string-op string-foldcase
(lambda (s) ($string-change-case s $str-foldcase)))
(define-string-op string-downcase
(lambda (s) ($string-change-case s $str-downcase)))
(define-string-op string-titlecase
(lambda (str)
(let* ([n (string-length str)] [dst (make-string n)])
(define (trans2 s i seen-cased? ac)
(if (fx= i n)
(handle-special dst ac)
(s i seen-cased? ac)))
(define (trans1 s i c/ls seen-cased? ac)
(cond
[(char? c/ls)
(string-set! dst i c/ls)
(trans2 s (fx+ i 1) seen-cased? ac)]
[else
(trans2 s (fx+ i 1) seen-cased? (cons (cons i c/ls) ac))]))
(define (trans s i c seen-cased? ac)
(if seen-cased?
(trans1 s i ($str-downcase c) #t ac)
(if ($char-cased? c)
(trans1 s i ($str-titlecase c) #t ac)
(trans1 s i c #f ac))))
(define (s0 i ac)
(let ([c (string-ref str i)])
(cond
[($wb-aletter? c) (trans sAletter i c #f ac)]
[($wb-numeric? c) (trans sNumeric i c #f ac)]
[($wb-katakana? c) (trans sKatakana i c #f ac)]
[($wb-extendnumlet? c) (trans sExtendnumlet i c #f ac)]
[else (string-set! dst i c)
(let ([i (fx+ i 1)])
(if (fx= i n)
(handle-special dst ac)
(s0 i ac)))])))
(define (sAletter i seen-cased? ac)
(let ([c (string-ref str i)])
(cond
[(or ($wb-aletter? c) ($wb-extend? c) ($wb-format? c))
(trans sAletter i c seen-cased? ac)]
[(or ($wb-midletter? c) ($wb-midnumlet? c))
(trans sAletterMid i c seen-cased? ac)]
[($wb-numeric? c) (trans sNumeric i c seen-cased? ac)]
[($wb-extendnumlet? c) (trans sExtendnumlet i c seen-cased? ac)]
[else (s0 i ac)])))
(define (sAletterMid i seen-cased? ac)
(let ([c (string-ref str i)])
(cond
[(or ($wb-extend? c) ($wb-format? c))
(trans sAletterMid i c seen-cased? ac)]
[($wb-aletter? c) (trans sAletter i c seen-cased? ac)]
[else (s0 i ac)])))
(define (sNumeric i seen-cased? ac)
(let ([c (string-ref str i)])
(cond
[(or ($wb-numeric? c) ($wb-extend? c) ($wb-format? c))
(trans sNumeric c i seen-cased? ac)]
[(or ($wb-midnum? c) ($wb-midnumlet? c))
(trans sNumericMid i c seen-cased? ac)]
[($wb-aletter? c) (trans sAletter i c seen-cased? ac)]
[($wb-extendnumlet? c) (trans sExtendnumlet i c seen-cased? ac)]
[else (s0 i ac)])))
(define (sNumericMid i seen-cased? ac)
(let ([c (string-ref str i)])
(cond
[(or ($wb-extend? c) ($wb-format? c))
(trans sNumericMid i c seen-cased? ac)]
[($wb-numeric? c) (trans sNumeric i c seen-cased? ac)]
[else (s0 i ac)])))
(define (sKatakana i seen-cased? ac)
(let ([c (string-ref str i)])
(cond
[(or ($wb-katakana? c) ($wb-extend? c) ($wb-format? c))
(trans sKatakana i c seen-cased? ac)]
[($wb-extendnumlet? c) (trans sExtendnumlet i c seen-cased? ac)]
[else (s0 i ac)])))
(define (sExtendnumlet i seen-cased? ac)
(let ([c (string-ref str i)])
(cond
[(or ($wb-extendnumlet? c) ($wb-extend? c) ($wb-format? c))
(trans sExtendnumlet i c seen-cased? ac)]
[($wb-aletter? c) (trans sAletter i c seen-cased? ac)]
[($wb-numeric? c) (trans sNumeric i c seen-cased? ac)]
[($wb-katakana? c) (trans sKatakana i c seen-cased? ac)]
[else (s0 i ac)])))
(if (fx= n 0) dst (s0 0 '())))))
(define $string-ci=? ; two arguments, no string? checks
(lambda (s1 s2)
(let ([n1 (string-length s1)] [n2 (string-length s2)])
(if (fx= n1 0)
(fx= n2 0)
(and (not (fx= n2 0))
(let f ([i1 1]
[i2 1]
[c1* ($str-foldcase (string-ref s1 0))]
[c2* ($str-foldcase (string-ref s2 0))])
(if (char? c1*)
(if (char? c2*)
(and (char=? c1* c2*)
(if (fx= i1 n1)
(fx= i2 n2)
(and (not (fx= i2 n2))
(f (fx+ i1 1) (fx+ i2 1)
($str-foldcase (string-ref s1 i1))
($str-foldcase (string-ref s2 i2))))))
(and (char=? c1* (car c2*))
(not (fx= i1 n1))
(f (fx+ i1 1) i2
($str-foldcase (string-ref s1 i1))
(cdr c2*))))
(if (char? c2*)
(and (char=? (car c1*) c2*)
(not (fx= i2 n2))
(f i1 (fx+ i2 1) (cdr c1*)
($str-foldcase (string-ref s2 i2))))
(and (char=? (car c1*) (car c2*))
(f i1 i2 (cdr c1*) (cdr c2*)))))))))))
(define $string-ci<? ; two arguments, no string? checks
(lambda (s1 s2)
(let ([n1 (string-length s1)] [n2 (string-length s2)])
(and (not (fx= n2 0))
(or (fx= n1 0)
(let f ([i1 1]
[i2 1]
[c1* ($str-foldcase (string-ref s1 0))]
[c2* ($str-foldcase (string-ref s2 0))])
(if (char? c1*)
(if (char? c2*)
(or (char<? c1* c2*)
(and (char=? c1* c2*)
(not (fx= i2 n2))
(or (fx= i1 n1)
(f (fx+ i1 1) (fx+ i2 1)
($str-foldcase (string-ref s1 i1))
($str-foldcase (string-ref s2 i2))))))
(or (char<? c1* (car c2*))
(and (char=? c1* (car c2*))
(or (fx= i1 n1)
(f (fx+ i1 1) i2
($str-foldcase (string-ref s1 i1))
(cdr c2*))))))
(if (char? c2*)
(or (char<? (car c1*) c2*)
(and (char=? (car c1*) c2*)
(not (fx= i2 n2))
(f i1 (fx+ i2 1) (cdr c1*)
($str-foldcase (string-ref s2 i2)))))
(or (char<? (car c1*) (car c2*))
(and (char=? (car c1*) (car c2*))
(f i1 i2 (cdr c1*) (cdr c2*))))))))))))
(define (do-string-cmp a ls cmp who)
(if (string? a)
(let f ([a a] [ls ls])
(cond
[(null? ls) #t]
[else
(let ([b (car ls)])
(if (string? b)
(if (cmp a b)
(f b (cdr ls))
(let f ([ls (cdr ls)])
(if (null? ls)
#f
(if (string? (car ls))
(f (cdr ls))
(assertion-violation who
"not a string" (car ls)))))))
(assertion-violation who "not a string" b))]))
(assertion-violation who "not a string" a)))
(define-syntax define-string-cmp
(syntax-rules ()
[(_ name cmp)
(define name
(case-lambda
[(s1 s2)
(if (string? s1)
(if (string? s2)
(cmp s1 s2)
(assertion-violation 'name "not a string" s2))
(assertion-violation 'name "not a string" s2))]
[(s1 . rest)
(do-string-cmp s1 rest cmp 'name)]))]))
(define-string-cmp string-ci=? $string-ci=?)
(define-string-cmp string-ci<?
(lambda (s1 s2) ($string-ci<? s1 s2)))
(define-string-cmp string-ci<=?
(lambda (s1 s2) (not ($string-ci<? s2 s1))))
(define-string-cmp string-ci>=?
(lambda (s1 s2) (not ($string-ci<? s1 s2))))
(define-string-cmp string-ci>?
(lambda (s1 s2) ($string-ci<? s2 s1)))
(module (hangul-sbase hangul-slimit $hangul-decomp
hangul-lbase hangul-llimit
hangul-vbase hangul-vlimit
hangul-tbase hangul-tlimit
hangul-vcount hangul-tcount)
; adapted from UAX #15
(define SBase #xAC00)
(define LBase #x1100)
(define VBase #x1161)
(define TBase #x11A7)
(define LCount 19)
(define VCount 21)
(define TCount 28)
(define NCount (* VCount TCount))
(define SCount (* LCount NCount))
(define hangul-sbase (integer->char SBase))
(define hangul-slimit (integer->char (+ SBase SCount -1)))
(define hangul-lbase (integer->char LBase))
(define hangul-llimit (integer->char (+ LBase LCount -1)))
(define hangul-vbase (integer->char VBase))
(define hangul-vlimit (integer->char (+ VBase VCount -1)))
(define hangul-tbase (integer->char TBase))
(define hangul-tlimit (integer->char (+ TBase TCount -1)))
(define hangul-vcount VCount)
(define hangul-tcount TCount)
(define ($hangul-decomp c)
(let ([SIndex (char- c hangul-sbase)])
(let ([L (integer->char (fx+ LBase (fxdiv SIndex NCount)))]
[V (integer->char (fx+ VBase (fxdiv (fxmod SIndex NCount) TCount)))]
[adj (fxmod SIndex TCount)])
(if (fx= adj 0)
(cons* L V)
(cons* L V (integer->char (fx+ TBase adj))))))))
(define $decompose
; might should optimize for sequences of ascii characters
(lambda (s canonical?)
(define (canonical<? c1 c2)
(fx< ($char-combining-class c1) ($char-combining-class c2)))
(define (sort-and-flush comb*)
(for-each write-char
(list-sort canonical<? (reverse comb*))))
(define ($char-decomp c)
(if (and (char<=? hangul-sbase c) (char<=? c hangul-slimit))
($hangul-decomp c)
(if canonical?
($str-decomp-canon c)
($str-decomp-compat c))))
(with-output-to-string
(lambda ()
(let ([n (string-length s)])
(define (push-and-go c* c** i comb*)
(if (char? c*)
(go c* c** i comb*)
(go (car c*) (cons (cdr c*) c**) i comb*)))
(define (pop-and-go c** i comb*)
(if (null? c**)
(if (fx= i n)
(sort-and-flush comb*)
(go (string-ref s i) '() (fx+ i 1) comb*))
(push-and-go (car c**) (cdr c**) i comb*)))
(define (go c c** i comb*)
(let ([c* ($char-decomp c)])
(if (eq? c c*) ; should be eqv?
(if (fxzero? ($char-combining-class c))
(begin
(sort-and-flush comb*)
(write-char c)
(pop-and-go c** i '()))
(pop-and-go c** i (cons c comb*)))
(push-and-go c* c** i comb*))))
(pop-and-go '() 0 '()))))))
(define $compose
(let ([comp-table #f])
(define (lookup-composite c1 c2)
; needs to handle HANGUL
(hashtable-ref comp-table (cons c1 c2) #f))
(lambda (s)
(unless comp-table
(set! comp-table
(make-hashtable
(lambda (x)
(fxxor
(fxsll (char->integer (car x)) 7)
(char->integer (cdr x))))
(lambda (x y)
(and (char=? (car x) (car y))
(char=? (cdr x) (cdr y))))))
(for-each
(lambda (i)
(unless (and (fx<= #xD800 i) (fx<= i #xDFFF))
(unless (memv i ($composition-exclusions))
(let* ([c (integer->char i)] [c* ($str-decomp-canon c)])
(when (pair? c*) (hashtable-set! comp-table c* c))))))
(iota #x110000)))
(with-output-to-string
(lambda ()
(let ([n (string-length s)])
(define (dump c acc)
(write-char c)
(for-each write-char (reverse acc)))
(define (s0 i)
(unless (fx= i n)
(let ([c (string-ref s i)])
(if (fxzero? ($char-combining-class c))
(s1 (fx+ i 1) c)
(begin (write-char c) (s0 (fx+ i 1)))))))
(define (s1 i c)
(if (fx= i n)
(write-char c)
(let ([c1 (string-ref s i)])
(cond
[(and (and (char<=? hangul-lbase c)
(char<=? c hangul-llimit))
(and (char<=? hangul-vbase c1)
(char<=? c1 hangul-vlimit)))
(s1 (fx+ i 1)
(let ([lindex (char- c hangul-lbase)]
[vindex (char- c1 hangul-vbase)])
(integer->char
(fx+ (char->integer hangul-sbase)
(fx* (fx+ (fx* lindex hangul-vcount) vindex)
hangul-tcount)))))]
[(and (and (char<=? hangul-sbase c)
(char<=? c hangul-slimit))
(and (char<=? hangul-tbase c1)
(char<=? c1 hangul-tlimit))
(let ([sindex (char- c hangul-sbase)])
(fxzero? (fxmod sindex hangul-tcount))))
(let ([tindex (char- c1 hangul-tbase)])
(s1 (fx+ i 1) (integer->char (fx+ (char->integer c) tindex))))]
[else (s2 i c -1 '())]))))
(define (s2 i c class acc)
(if (fx= i n)
(dump c acc)
(let ([c1 (string-ref s i)])
(let ([class1 ($char-combining-class c1)])
(cond
[(and (fx< class class1) (lookup-composite c c1)) =>
(lambda (c) (s2 (fx+ i 1) c class acc))]
[(fx= class1 0)
(dump c acc)
(s1 (fx+ i 1) c1)]
[else (s2 (fx+ i 1) c class1 (cons c1 acc))])))))
(s0 0)))))))
(define-string-op string-normalize-nfd
(lambda (s)
; need string? check
($decompose s #t)))
(define-string-op string-normalize-nfkd
(lambda (s)
; need string? check
($decompose s #f)))
(define-string-op string-normalize-nfc
(lambda (s)
; need string? check
($compose ($decompose s #t))))
(define-string-op string-normalize-nfkc
(lambda (s)
; need string? check
($compose ($decompose s #f))))
))

View File

@ -69,7 +69,7 @@
"ikarus.date-string.ss"
"ikarus.symbols.ss"
"ikarus.vectors.ss"
"ikarus.unicode-data.ss"
"ikarus.unicode.ss"
"ikarus.string-to-number.ss"
"ikarus.numerics.ss"
"ikarus.conditions.ss"

View File

@ -116,7 +116,7 @@
((or (char<=? #\a c #\z)
(char<=? #\A c #\Z)
(char<=? #\0 c #\9)
(memv c '(#\- #\+ #\_)))
(memv c '(#\. #\- #\+ #\_)))
(write-char c p))
(else
(write-char #\% p)

View File

@ -1,338 +0,0 @@
# ArabicShaping-5.0.0.txt
# Date: 2006-07-14, 11:23:00 PST [KW]
#
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2006 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This file defines the shaping classes for Arabic and Syriac
# positional shaping, repeating in machine readable form the
# information printed in Tables 8-3, 8-7, 8-8, 8-11, 8-12, and
# 8-13 of The Unicode Standard, Version 4.0.
#
# See sections 8.2 and 8.3 of The Unicode Standard, Version 4.0
# for more information.
#
# Each line contains four fields, separated by a semicolon.
#
# Field 0: the code point, in 4-digit hexadecimal
# form, of an Arabic or Syriac character.
# Field 1: gives a short schematic name for that character,
# abbreviated from the normative Unicode character name.
# Field 2: defines the joining type (property name: Joining_Type)
# R Right_Joining
# L Left_Joining
# D Dual_Joining
# C Join_Causing
# U Non_Joining
# T Transparent
# See the Arabic block description for more information on these types.
# Field 3: defines the joining group (property name: Joining_Group)
#
# The values of the joining group are based schematically on character
# names. Where a schematic character name consists of two or more parts separated
# by spaces, the formal Joining_Group property value, as specified in
# PropertyValueAliases.txt, consists of the same name parts joined by
# underscores. Hence, the entry:
#
# 0629; TEH MARBUTA; R; TEH MARBUTA
#
# corresponds to [Joining_Group = Teh_Marbuta].
#
# Note: For historical reasons, the property value [Joining_Group = Hamza_On_Heh_Goal]
# is anachronistically named. It used to apply to both of the following characters
# in earlier versions of the standard:
#
# U+06C2 ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
# U+06C3 ARABIC LETTER TEH MARBUTA GOAL
#
# However, it currently applies only to U+06C3, and *not* to U+06C2.
# To avoid destabilizing existing Joining_Group property aliases, the
# value Hamza_On_Heh_Goal has not been changed, despite the fact that it
# no longer applies to Hamza On Heh Goal, but only to Teh Marbuta Goal.
#
# Note: Code points that are not explicitly listed in this file are
# either of joining type T or U:
#
# - Those that not explicitly listed that are of General Category Mn, Me, or Cf
# have joining type T.
# - All others not explicitly listed have type U.
#
# For an explicit listing of characters of joining type T, see
# the derived property file DerivedJoiningType.txt.
#
# There are currently no characters of type L defined in Unicode.
#
# #############################################################
# Unicode; Schematic Name; Joining Type; Joining Group
# Arabic characters
0600; ARABIC NUMBER SIGN; U; No_Joining_Group
0601; ARABIC SIGN SANAH; U; No_Joining_Group
0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group
0603; ARABIC SIGN SAFHA; U; No_Joining_Group
060B; AFGHANI SIGN; U; No_Joining_Group
0621; HAMZA; U; No_Joining_Group
0622; MADDA ON ALEF; R; ALEF
0623; HAMZA ON ALEF; R; ALEF
0624; HAMZA ON WAW; R; WAW
0625; HAMZA UNDER ALEF; R; ALEF
0626; HAMZA ON YEH; D; YEH
0627; ALEF; R; ALEF
0628; BEH; D; BEH
0629; TEH MARBUTA; R; TEH MARBUTA
062A; TEH; D; BEH
062B; THEH; D; BEH
062C; JEEM; D; HAH
062D; HAH; D; HAH
062E; KHAH; D; HAH
062F; DAL; R; DAL
0630; THAL; R; DAL
0631; REH; R; REH
0632; ZAIN; R; REH
0633; SEEN; D; SEEN
0634; SHEEN; D; SEEN
0635; SAD; D; SAD
0636; DAD; D; SAD
0637; TAH; D; TAH
0638; ZAH; D; TAH
0639; AIN; D; AIN
063A; GHAIN; D; AIN
0640; TATWEEL; C; No_Joining_Group
0641; FEH; D; FEH
0642; QAF; D; QAF
0643; KAF; D; KAF
0644; LAM; D; LAM
0645; MEEM; D; MEEM
0646; NOON; D; NOON
0647; HEH; D; HEH
0648; WAW; R; WAW
0649; ALEF MAKSURA; D; YEH
064A; YEH; D; YEH
066E; DOTLESS BEH; D; BEH
066F; DOTLESS QAF; D; QAF
0671; HAMZAT WASL ON ALEF; R; ALEF
0672; WAVY HAMZA ON ALEF; R; ALEF
0673; WAVY HAMZA UNDER ALEF; R; ALEF
0674; HIGH HAMZA; U; No_Joining_Group
0675; HIGH HAMZA ALEF; R; ALEF
0676; HIGH HAMZA WAW; R; WAW
0677; HIGH HAMZA WAW WITH DAMMA; R; WAW
0678; HIGH HAMZA YEH; D; YEH
0679; TEH WITH SMALL TAH; D; BEH
067A; TEH WITH 2 DOTS VERTICAL ABOVE; D; BEH
067B; BEH WITH 2 DOTS VERTICAL BELOW; D; BEH
067C; TEH WITH RING; D; BEH
067D; TEH WITH 3 DOTS ABOVE DOWNWARD; D; BEH
067E; TEH WITH 3 DOTS BELOW; D; BEH
067F; TEH WITH 4 DOTS ABOVE; D; BEH
0680; BEH WITH 4 DOTS BELOW; D; BEH
0681; HAMZA ON HAH; D; HAH
0682; HAH WITH 2 DOTS VERTICAL ABOVE; D; HAH
0683; HAH WITH MIDDLE 2 DOTS; D; HAH
0684; HAH WITH MIDDLE 2 DOTS VERTICAL; D; HAH
0685; HAH WITH 3 DOTS ABOVE; D; HAH
0686; HAH WITH MIDDLE 3 DOTS DOWNWARD; D; HAH
0687; HAH WITH MIDDLE 4 DOTS; D; HAH
0688; DAL WITH SMALL TAH; R; DAL
0689; DAL WITH RING; R; DAL
068A; DAL WITH DOT BELOW; R; DAL
068B; DAL WITH DOT BELOW AND SMALL TAH; R; DAL
068C; DAL WITH 2 DOTS ABOVE; R; DAL
068D; DAL WITH 2 DOTS BELOW; R; DAL
068E; DAL WITH 3 DOTS ABOVE; R; DAL
068F; DAL WITH 3 DOTS ABOVE DOWNWARD; R; DAL
0690; DAL WITH 4 DOTS ABOVE; R; DAL
0691; REH WITH SMALL TAH; R; REH
0692; REH WITH SMALL V; R; REH
0693; REH WITH RING; R; REH
0694; REH WITH DOT BELOW; R; REH
0695; REH WITH SMALL V BELOW; R; REH
0696; REH WITH DOT BELOW AND DOT ABOVE; R; REH
0697; REH WITH 2 DOTS ABOVE; R; REH
0698; REH WITH 3 DOTS ABOVE; R; REH
0699; REH WITH 4 DOTS ABOVE; R; REH
069A; SEEN WITH DOT BELOW AND DOT ABOVE; D; SEEN
069B; SEEN WITH 3 DOTS BELOW; D; SEEN
069C; SEEN WITH 3 DOTS BELOW AND 3 DOTS ABOVE; D; SEEN
069D; SAD WITH 2 DOTS BELOW; D; SAD
069E; SAD WITH 3 DOTS ABOVE; D; SAD
069F; TAH WITH 3 DOTS ABOVE; D; TAH
06A0; AIN WITH 3 DOTS ABOVE; D; AIN
06A1; DOTLESS FEH; D; FEH
06A2; FEH WITH DOT MOVED BELOW; D; FEH
06A3; FEH WITH DOT BELOW; D; FEH
06A4; FEH WITH 3 DOTS ABOVE; D; FEH
06A5; FEH WITH 3 DOTS BELOW; D; FEH
06A6; FEH WITH 4 DOTS ABOVE; D; FEH
06A7; QAF WITH DOT ABOVE; D; QAF
06A8; QAF WITH 3 DOTS ABOVE; D; QAF
06A9; KEHEH; D; GAF
06AA; SWASH KAF; D; SWASH KAF
06AB; KAF WITH RING; D; GAF
06AC; KAF WITH DOT ABOVE; D; KAF
06AD; KAF WITH 3 DOTS ABOVE; D; KAF
06AE; KAF WITH 3 DOTS BELOW; D; KAF
06AF; GAF; D; GAF
06B0; GAF WITH RING; D; GAF
06B1; GAF WITH 2 DOTS ABOVE; D; GAF
06B2; GAF WITH 2 DOTS BELOW; D; GAF
06B3; GAF WITH 2 DOTS VERTICAL BELOW; D; GAF
06B4; GAF WITH 3 DOTS ABOVE; D; GAF
06B5; LAM WITH SMALL V; D; LAM
06B6; LAM WITH DOT ABOVE; D; LAM
06B7; LAM WITH 3 DOTS ABOVE; D; LAM
06B8; LAM WITH 3 DOTS BELOW; D; LAM
06B9; NOON WITH DOT BELOW; D; NOON
06BA; DOTLESS NOON; D; NOON
06BB; DOTLESS NOON WITH SMALL TAH; D; NOON
06BC; NOON WITH RING; D; NOON
06BD; NOON WITH 3 DOTS ABOVE; D; NOON
06BE; KNOTTED HEH; D; KNOTTED HEH
06BF; HAH WITH MIDDLE 3 DOTS DOWNWARD AND DOT ABOVE; D; HAH
06C0; HAMZA ON HEH; R; TEH MARBUTA
06C1; HEH GOAL; D; HEH GOAL
06C2; HAMZA ON HEH GOAL; D; HEH GOAL
06C3; TEH MARBUTA GOAL; R; HAMZA ON HEH GOAL
06C4; WAW WITH RING; R; WAW
06C5; WAW WITH BAR; R; WAW
06C6; WAW WITH SMALL V; R; WAW
06C7; WAW WITH DAMMA; R; WAW
06C8; WAW WITH ALEF ABOVE; R; WAW
06C9; WAW WITH INVERTED SMALL V; R; WAW
06CA; WAW WITH 2 DOTS ABOVE; R; WAW
06CB; WAW WITH 3 DOTS ABOVE; R; WAW
06CC; DOTLESS YEH; D; YEH
06CD; YEH WITH TAIL; R; YEH WITH TAIL
06CE; YEH WITH SMALL V; D; YEH
06CF; WAW WITH DOT ABOVE; R; WAW
06D0; YEH WITH 2 DOTS VERTICAL BELOW; D; YEH
06D1; YEH WITH 3 DOTS BELOW; D; YEH
06D2; YEH BARREE; R; YEH BARREE
06D3; HAMZA ON YEH BARREE; R; YEH BARREE
06D5; AE; R; TEH MARBUTA
06DD; ARABIC END OF AYAH; U; No_Joining_Group
06EE; DAL WITH INVERTED V; R; DAL
06EF; REH WITH INVERTED V; R; REH
06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
06FB; DAD WITH DOT BELOW; D; SAD
06FC; GHAIN WITH DOT BELOW; D; AIN
06FF; HEH WITH INVERTED V; D; KNOTTED HEH
# Syriac characters
0710; ALAPH; R; ALAPH
0712; BETH; D; BETH
0713; GAMAL; D; GAMAL
0714; GAMAL GARSHUNI; D; GAMAL
0715; DALATH; R; DALATH RISH
0716; DOTLESS DALATH RISH; R; DALATH RISH
0717; HE; R; HE
0718; WAW; R; SYRIAC WAW
0719; ZAIN; R; ZAIN
071A; HETH; D; HETH
071B; TETH; D; TETH
071C; TETH GARSHUNI; D; TETH
071D; YUDH; D; YUDH
071E; YUDH HE; R; YUDH HE
071F; KAPH; D; KAPH
0720; LAMADH; D; LAMADH
0721; MIM; D; MIM
0722; NUN; D; NUN
0723; SEMKATH; D; SEMKATH
0724; FINAL SEMKATH; D; FINAL SEMKATH
0725; E; D; E
0726; PE; D; PE
0727; REVERSED PE; D; REVERSED PE
0728; SADHE; R; SADHE
0729; QAPH; D; QAPH
072A; RISH; R; DALATH RISH
072B; SHIN; D; SHIN
072C; TAW; R; TAW
072D; PERSIAN BHETH; D; BETH
072E; PERSIAN GHAMAL; D; GAMAL
072F; PERSIAN DHALATH; R; DALATH RISH
074D; SOGDIAN ZHAIN; R; ZHAIN
074E; SOGDIAN KHAPH; D; KHAPH
074F; SOGDIAN FE; D; FE
# Arabic supplement characters
0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH
0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH
0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH
0753; BEH WITH 3 DOTS POINTING UPWARDS BELOW AND 2 DOTS ABOVE; D; BEH
0754; BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH
0755; BEH WITH INVERTED SMALL V BELOW; D; BEH
0756; BEH WITH SMALL V; D; BEH
0757; HAH WITH 2 DOTS ABOVE; D; HAH
0758; HAH WITH 3 DOTS POINTING UPWARDS BELOW; D; HAH
0759; DAL WITH 2 DOTS VERTICALLY BELOW AND SMALL TAH; R; DAL
075A; DAL WITH INVERTED SMALL V BELOW; R; DAL
075B; REH WITH STROKE; R; REH
075C; SEEN WITH 4 DOTS ABOVE; D; SEEN
075D; AIN WITH 2 DOTS ABOVE; D; AIN
075E; AIN WITH 3 DOTS POINTING DOWNWARDS ABOVE; D; AIN
075F; AIN WITH 2 DOTS VERTICALLY ABOVE; D; AIN
0760; FEH WITH 2 DOTS BELOW; D; FEH
0761; FEH WITH 3 DOTS POINTING UPWARDS BELOW; D; FEH
0762; KEHEH WITH DOT ABOVE; D; GAF
0763; KEHEH WITH 3 DOTS ABOVE; D; GAF
0764; KEHEH WITH 3 DOTS POINTING UPWARDS BELOW; D; GAF
0765; MEEM WITH DOT ABOVE; D; MEEM
0766; MEEM WITH DOT BELOW; D; MEEM
0767; NOON WITH 2 DOTS BELOW; D; NOON
0768; NOON WITH SMALL TAH; D; NOON
0769; NOON WITH SMALL V; D; NOON
076A; LAM WITH BAR; D; LAM
076B; REH WITH 2 DOTS VERTICALLY ABOVE; R; REH
076C; REH WITH HAMZA ABOVE; R; REH
076D; SEEN WITH 2 DOTS VERTICALLY ABOVE; D; SEEN
# N'Ko Characters
07CA; NKO A; D; No_Joining_Group
07CB; NKO EE; D; No_Joining_Group
07CC; NKO I; D; No_Joining_Group
07CD; NKO E; D; No_Joining_Group
07CE; NKO U; D; No_Joining_Group
07CF; NKO OO; D; No_Joining_Group
07D0; NKO O; D; No_Joining_Group
07D1; NKO DAGBASINNA; D; No_Joining_Group
07D2; NKO N; D; No_Joining_Group
07D3; NKO BA; D; No_Joining_Group
07D4; NKO PA; D; No_Joining_Group
07D5; NKO TA; D; No_Joining_Group
07D6; NKO JA; D; No_Joining_Group
07D7; NKO CHA; D; No_Joining_Group
07D8; NKO DA; D; No_Joining_Group
07D9; NKO RA; D; No_Joining_Group
07DA; NKO RRA; D; No_Joining_Group
07DB; NKO SA; D; No_Joining_Group
07DC; NKO GBA; D; No_Joining_Group
07DD; NKO FA; D; No_Joining_Group
07DE; NKO KA; D; No_Joining_Group
07DF; NKO LA; D; No_Joining_Group
07E0; NKO NA WOLOSO; D; No_Joining_Group
07E1; NKO MA; D; No_Joining_Group
07E2; NKO NYA; D; No_Joining_Group
07E3; NKO NA; D; No_Joining_Group
07E4; NKO HA; D; No_Joining_Group
07E5; NKO WA; D; No_Joining_Group
07E6; NKO YA; D; No_Joining_Group
07E7; NKO NYA WOLOSO; D; No_Joining_Group
07E8; NKO JONA JA; D; No_Joining_Group
07E9; NKO JONA CHA; D; No_Joining_Group
07EA; NKO JONA RA; D; No_Joining_Group
07FA; NKO LAJANYALAN; C; No_Joining_Group
# Other
200D; ZERO WIDTH JOINER; C; No_Joining_Group
200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group
# EOF

View File

@ -1,582 +0,0 @@
# BidiMirroring-5.0.0.txt
# Date: 2006-02-16, 16:11:00 PST [KW]
#
# Bidi_Mirroring_Glyph Property
#
# This file is an informative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2006 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# This data file lists characters that have the mirrored property
# where there is another Unicode character that typically has a glyph
# that is the mirror image of the original character's glyph.
# The repertoire covered by the file is Unicode 5.0.0.
#
# The file contains a list of lines with mappings from one code point
# to another one for character-based mirroring.
# Note that for "real" mirroring, a rendering engine needs to select
# appropriate alternative glyphs, and that many Unicode characters do not
# have a mirror-image Unicode character.
#
# Each mapping line contains two fields, separated by a semicolon (';').
# Each of the two fields contains a code point represented as a
# variable-length hexadecimal value with 4 to 6 digits.
# A comment indicates where the characters are "BEST FIT" mirroring.
#
# Code points with the "mirrored" property but no appropriate mirrors are
# listed as comments at the end of the file.
#
# For information on bidi mirroring, see UAX #9: Bidirectional Algorithm,
# at http://www.unicode.org/unicode/reports/tr9/
#
# This file was originally created by Markus Scherer.
# Extended for Unicode 3.2, 4.0, 4.1, and 5.0 by Ken Whistler.
#
# ############################################################
0028; 0029 # LEFT PARENTHESIS
0029; 0028 # RIGHT PARENTHESIS
003C; 003E # LESS-THAN SIGN
003E; 003C # GREATER-THAN SIGN
005B; 005D # LEFT SQUARE BRACKET
005D; 005B # RIGHT SQUARE BRACKET
007B; 007D # LEFT CURLY BRACKET
007D; 007B # RIGHT CURLY BRACKET
00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0F3A; 0F3B # TIBETAN MARK GUG RTAGS GYON
0F3B; 0F3A # TIBETAN MARK GUG RTAGS GYAS
0F3C; 0F3D # TIBETAN MARK ANG KHANG GYON
0F3D; 0F3C # TIBETAN MARK ANG KHANG GYAS
169B; 169C # OGHAM FEATHER MARK
169C; 169B # OGHAM REVERSED FEATHER MARK
2018; 2019 # [BEST FIT] LEFT SINGLE QUOTATION MARK
2019; 2018 # [BEST FIT] RIGHT SINGLE QUOTATION MARK
201C; 201D # [BEST FIT] LEFT DOUBLE QUOTATION MARK
201D; 201C # [BEST FIT] RIGHT DOUBLE QUOTATION MARK
2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL
207D; 207E # SUPERSCRIPT LEFT PARENTHESIS
207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS
208D; 208E # SUBSCRIPT LEFT PARENTHESIS
208E; 208D # SUBSCRIPT RIGHT PARENTHESIS
2208; 220B # ELEMENT OF
2209; 220C # NOT AN ELEMENT OF
220A; 220D # SMALL ELEMENT OF
220B; 2208 # CONTAINS AS MEMBER
220C; 2209 # DOES NOT CONTAIN AS MEMBER
220D; 220A # SMALL CONTAINS AS MEMBER
2215; 29F5 # DIVISION SLASH
223C; 223D # TILDE OPERATOR
223D; 223C # REVERSED TILDE
2243; 22CD # ASYMPTOTICALLY EQUAL TO
2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO
2254; 2255 # COLON EQUALS
2255; 2254 # EQUALS COLON
2264; 2265 # LESS-THAN OR EQUAL TO
2265; 2264 # GREATER-THAN OR EQUAL TO
2266; 2267 # LESS-THAN OVER EQUAL TO
2267; 2266 # GREATER-THAN OVER EQUAL TO
2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
226A; 226B # MUCH LESS-THAN
226B; 226A # MUCH GREATER-THAN
226E; 226F # [BEST FIT] NOT LESS-THAN
226F; 226E # [BEST FIT] NOT GREATER-THAN
2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO
2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
2276; 2277 # LESS-THAN OR GREATER-THAN
2277; 2276 # GREATER-THAN OR LESS-THAN
2278; 2279 # NEITHER LESS-THAN NOR GREATER-THAN