* Added string-foldcase and string-ci=? (inefficiently)

This commit is contained in:
Abdulaziz Ghuloum 2007-06-18 02:49:40 +03:00
parent 551e5aa088
commit ab6162d2a7
15 changed files with 2542 additions and 184 deletions

Binary file not shown.

View File

@ -74,7 +74,7 @@
#define IK_CHAR_MASK 0xFF
#define IK_CHAR_SHIFT 8
#define IK_CHAR_VAL(x) (((int)(x)) >> IK_CHAR_SHIFT)
#define byte_to_scheme_char(x) ((ikp)(((x) << IK_CHAR_SHIFT) | IK_CHAR_TAG))
#define int_to_scheme_char(x) ((ikp)(((x) << IK_CHAR_SHIFT) | IK_CHAR_TAG))
#define IK_PAIR_SIZE 8
#define pair_size 8
#define pair_tag 1

View File

@ -377,9 +377,9 @@ static ikp do_read(ikpcb* pcb, fasl_port* p){
else if(c == 'N'){
return IK_NULL_OBJECT;
}
else if(c == 'C'){
char x = fasl_read_byte(p);
return byte_to_scheme_char(x);
else if(c == 'c'){
unsigned char x = (unsigned char) fasl_read_byte(p);
return int_to_scheme_char(x);
}
else if(c == 'G'){
/* G is for gensym */
@ -523,6 +523,11 @@ static ikp do_read(ikpcb* pcb, fasl_port* p){
}
return x;
}
else if(c == 'C'){
int n;
fasl_read_buf(p, &n, sizeof(int));
return int_to_scheme_char(n);
}
else {
fprintf(stderr, "invalid type '%c' (0x%02x) found in fasl file\n", c, c);
exit(-1);

Binary file not shown.

View File

@ -14,7 +14,8 @@
;;; "E" : denoting the end of file object
;;; "U" : denoting the unspecified value
;;; "I" + 4-bytes : denoting a fixnum (in host byte order)
;;; "C" + 1-byte : denoting a character
;;; "c" + 1-byte : denoting a small character (<= 255)
;;; "C" + 4-byte word: big char.
;;; "P" + object1 + object2 : a pair
;;; "V" + 4-bytes(n) + object ... : a vector of length n followed by n objects
;;; "v" + 4-byte(n) + octet ... : a bytevector of length n followed by n octets

View File

@ -37,8 +37,16 @@
(write-char #\I p)
(write-fixnum x p)]
[(char? x)
(write-char #\C p)
(write-char x p)]
(let ([n ($char->fixnum x)])
(if ($fx<= n 255)
(begin
(write-char #\c p)
(write-byte n p))
(begin
(write-char #\C p)
(write-int n p))))]
; (write-char #\C p)
; (write-char x p)]
[(boolean? x)
(write-char (if x #\T #\F) p)]
[(eof-object? x) (write-char #\E p)]

View File

@ -1,7 +1,7 @@
(library (ikarus writer)
(export write display format printf print-error error-handler
error)
error print-unicode)
(import
(ikarus system $chars)
(ikarus system $strings)
@ -12,9 +12,10 @@
(ikarus system $bytevectors)
(only (ikarus unicode-data) unicode-printable-char?)
(except (ikarus) write display format printf print-error
error-handler error))
error-handler error print-unicode))
(define print-unicode
(make-parameter #t))
(define char-table ; first nonprintable chars
'#("nul" "x1" "x2" "x3" "x4" "x5" "x6" "alarm"
@ -53,7 +54,7 @@
[(fx= i 127)
(write-char #\\ p)
(write-char* "delete" p)]
[(unicode-printable-char? x)
[(and (print-unicode) (unicode-printable-char? x))
(write-char #\\ p)
(write-char x p)]
[else

View File

@ -317,10 +317,12 @@
[string-set! i r]
[string-length i r]
[string=? i r]
[string-ci=? i r]
[substring i r]
[string-append i r]
[string->list i r]
[list->string i r]
[string-foldcase i unicode]
[uuid i]
[date-string i]
[vector i r]
@ -516,6 +518,7 @@
[pretty-print i]
[comment-handler i]
[print-gensym i symbols]
[print-unicode i]
[gensym-count i symbols]
[gensym-prefix i symbols]
[make-hash-table i]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,521 @@
# WordBreakProperty-5.0.0.txt
# Date: 2006-06-07, 23:23:03 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2006 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# ================================================
# Property: Word_Break
# All code points not explicitly listed for Word_Break
# have the value Other (XX).
# @missing: 0000..10FFFF; Other
# ================================================
00AD ; Format # Cf SOFT HYPHEN
0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
06DD ; Format # Cf ARABIC END OF AYAH
070F ; Format # Cf SYRIAC ABBREVIATION MARK
17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
200B ; Format # Cf ZERO WIDTH SPACE
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2063 ; Format # Cf [4] WORD JOINER..INVISIBLE SEPARATOR
206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0001 ; Format # Cf LANGUAGE TAG
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 136
# ================================================
3031..3035 ; Katakana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
309B..309C ; Katakana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
30A0 ; Katakana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
30FC..30FE ; Katakana # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO
31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
FF70 ; Katakana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
FF9E..FF9F ; Katakana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
# Total code points: 176
# ================================================
0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA ; ALetter # L& FEMININE ORDINAL INDICATOR
00B5 ; ALetter # L& MICRO SIGN
00BA ; ALetter # L& MASCULINE ORDINAL INDICATOR
00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB ; ALetter # Lo LATIN LETTER TWO WITH STROKE
01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP
0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE
037A ; ALetter # Lm GREEK YPOGEGRAMMENI
037B..037D ; ALetter # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0386 ; ALetter # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A ; ALetter # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C ; ALetter # L& GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03CE ; ALetter # L& [44] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER OMEGA WITH TONOS
03D0..03F5 ; ALetter # L& [38] GREEK BETA SYMBOL..GREEK LUNATE EPSILON SYMBOL
03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
048A..0513 ; ALetter # L& [138] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH HOOK
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
05D0..05EA ; ALetter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
05F0..05F2 ; ALetter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH
0621..063A ; ALetter # Lo [26] ARABIC LETTER HAMZA..ARABIC LETTER GHAIN
0640 ; ALetter # Lm ARABIC TATWEEL
0641..064A ; ALetter # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
066E..066F ; ALetter # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0671..06D3 ; ALetter # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D5 ; ALetter # Lo ARABIC LETTER AE
06E5..06E6 ; ALetter # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
06EE..06EF ; ALetter # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
06FA..06FC ; ALetter # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FF ; ALetter # Lo ARABIC LETTER HEH WITH INVERTED V
0710 ; ALetter # Lo SYRIAC LETTER ALAPH
0712..072F ; ALetter # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
074D..076D ; ALetter # Lo [33] SYRIAC LETTER SOGDIAN ZHAIN..ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE
0780..07A5 ; ALetter # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU
07B1 ; ALetter # Lo THAANA LETTER NAA
07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA
07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07FA ; ALetter # Lm NKO LAJANYALAN
0903 ; ALetter # Mc DEVANAGARI SIGN VISARGA
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
093E..0940 ; ALetter # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0949..094C ; ALetter # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
0950 ; ALetter # Lo DEVANAGARI OM
0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
097B..097F ; ALetter # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
0982..0983 ; ALetter # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
09AA..09B0 ; ALetter # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
09B2 ; ALetter # Lo BENGALI LETTER LA
09B6..09B9 ; ALetter # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
09BD ; ALetter # Lo BENGALI SIGN AVAGRAHA
09BF..09C0 ; ALetter # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II
09C7..09C8 ; ALetter # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
09CB..09CC ; ALetter # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
09CE ; ALetter # Lo BENGALI LETTER KHANDA TA
09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
0A03 ; ALetter # Mc GURMUKHI SIGN VISARGA
0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
0A2A..0A30 ; ALetter # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
0A32..0A33 ; ALetter # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
0A35..0A36 ; ALetter # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
0A38..0A39 ; ALetter # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
0A3E..0A40 ; ALetter # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
0A59..0A5C ; ALetter # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
0A5E ; ALetter # Lo GURMUKHI LETTER FA
0A72..0A74 ; ALetter # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR
0A83 ; ALetter # Mc GUJARATI SIGN VISARGA
0A85..0A8D ; ALetter # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
0A8F..0A91 ; ALetter # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
0A93..0AA8 ; ALetter # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA
0AAA..0AB0 ; ALetter # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
0AB2..0AB3 ; ALetter # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
0AB5..0AB9 ; ALetter # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
0ABD ; ALetter # Lo GUJARATI SIGN AVAGRAHA
0ABE..0AC0 ; ALetter # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
0AC9 ; ALetter # Mc GUJARATI VOWEL SIGN CANDRA O
0ACB..0ACC ; ALetter # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
0AD0 ; ALetter # Lo GUJARATI OM
0AE0..0AE1 ; ALetter # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
0B02..0B03 ; ALetter # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
0B05..0B0C ; ALetter # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
0B0F..0B10 ; ALetter # Lo [2] ORIYA LETTER E..ORIYA LETTER AI
0B13..0B28 ; ALetter # Lo [22] ORIYA LETTER O..ORIYA LETTER NA
0B2A..0B30 ; ALetter # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
0B32..0B33 ; ALetter # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
0B35..0B39 ; ALetter # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
0B3D ; ALetter # Lo ORIYA SIGN AVAGRAHA
0B40 ; ALetter # Mc ORIYA VOWEL SIGN II
0B47..0B48 ; ALetter # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
0B4B..0B4C ; ALetter # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
0B5C..0B5D ; ALetter # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
0B5F..0B61 ; ALetter # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
0B71 ; ALetter # Lo ORIYA LETTER WA
0B83 ; ALetter # Lo TAMIL SIGN VISARGA
0B85..0B8A ; ALetter # Lo [6] TAMIL LETTER A..TAMIL LETTER UU
0B8E..0B90 ; ALetter # Lo [3] TAMIL LETTER E..TAMIL LETTER AI
0B92..0B95 ; ALetter # Lo [4] TAMIL LETTER O..TAMIL LETTER KA
0B99..0B9A ; ALetter # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA
0B9C ; ALetter # Lo TAMIL LETTER JA
0B9E..0B9F ; ALetter # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA
0BA3..0BA4 ; ALetter # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA
0BA8..0BAA ; ALetter # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA
0BAE..0BB9 ; ALetter # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA
0BBF ; ALetter # Mc TAMIL VOWEL SIGN I
0BC1..0BC2 ; ALetter # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
0BC6..0BC8 ; ALetter # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
0BCA..0BCC ; ALetter # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
0C01..0C03 ; ALetter # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
0C05..0C0C ; ALetter # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
0C0E..0C10 ; ALetter # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
0C12..0C28 ; ALetter # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
0C2A..0C33 ; ALetter # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA
0C35..0C39 ; ALetter # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA
0C41..0C44 ; ALetter # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C82..0C83 ; ALetter # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA
0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA
0CBE ; ALetter # Mc KANNADA VOWEL SIGN AA
0CC0..0CC1 ; ALetter # Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U
0CC3..0CC4 ; ALetter # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR
0CC7..0CC8 ; ALetter # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
0CCA..0CCB ; ALetter # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CDE ; ALetter # Lo KANNADA LETTER FA
0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0D02..0D03 ; ALetter # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
0D05..0D0C ; ALetter # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
0D0E..0D10 ; ALetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
0D12..0D28 ; ALetter # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
0D2A..0D39 ; ALetter # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
0D3F..0D40 ; ALetter # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II
0D46..0D48 ; ALetter # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
0D4A..0D4C ; ALetter # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
0D60..0D61 ; ALetter # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
0D82..0D83 ; ALetter # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
0D9A..0DB1 ; ALetter # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
0DB3..0DBB ; ALetter # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
0DBD ; ALetter # Lo SINHALA LETTER DANTAJA LAYANNA
0DC0..0DC6 ; ALetter # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
0DD0..0DD1 ; ALetter # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
0DD8..0DDE ; ALetter # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA
0DF2..0DF3 ; ALetter # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
0F00 ; ALetter # Lo TIBETAN SYLLABLE OM
0F40..0F47 ; ALetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA
0F49..0F6A ; ALetter # Lo [34] TIBETAN LETTER NYA..TIBETAN LETTER FIXED-FORM RA
0F7F ; ALetter # Mc TIBETAN SIGN RNAM BCAD
0F88..0F8B ; ALetter # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR
1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA
125A..125D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
1260..1288 ; ALetter # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
128A..128D ; ALetter # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
1290..12B0 ; ALetter # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
12B2..12B5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
12B8..12BE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
12C0 ; ALetter # Lo ETHIOPIC SYLLABLE KXWA
12C2..12C5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
12C8..12D6 ; ALetter # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
12D8..1310 ; ALetter # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
1312..1315 ; ALetter # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
1318..135A ; ALetter # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
1700..170C ; ALetter # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA
170E..1711 ; ALetter # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA
1720..1731 ; ALetter # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
1740..1751 ; ALetter # Lo [18] BUHID LETTER A..BUHID LETTER HA
1760..176C ; ALetter # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
176E..1770 ; ALetter # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
1923..1926 ; ALetter # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
1929..192B ; ALetter # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
1930..1931 ; ALetter # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
1933..1938 ; ALetter # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
1A19..1A1B ; ALetter # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1B04 ; ALetter # Mc BALINESE SIGN BISAH
1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
1B35 ; ALetter # Mc BALINESE VOWEL SIGN TEDUNG
1B3B ; ALetter # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
1B3D..1B41 ; ALetter # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
1B43 ; ALetter # Mc BALINESE VOWEL SIGN PEPET TEDUNG
1B45..1B4B ; ALetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN
1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
1E00..1E9B ; ALetter # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE
1EA0..1EF9 ; ALetter # L& [90] LATIN CAPITAL LETTER A WITH DOT BELOW..LATIN SMALL LETTER Y WITH TILDE
1F00..1F15 ; ALetter # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
1F18..1F1D ; ALetter # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F20..1F45 ; ALetter # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
1F48..1F4D ; ALetter # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50..1F57 ; ALetter # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F59 ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F..1F7D ; ALetter # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
1F80..1FB4 ; ALetter # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6..1FBC ; ALetter # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBE ; ALetter # L& GREEK PROSGEGRAMMENI
1FC2..1FC4 ; ALetter # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6..1FCC ; ALetter # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FD0..1FD3 ; ALetter # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6..1FDB ; ALetter # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I
207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C
2107 ; ALetter # L& EULER CONSTANT
210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
2115 ; ALetter # L& DOUBLE-STRUCK CAPITAL N
2119..211D ; ALetter # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
2124 ; ALetter # L& DOUBLE-STRUCK CAPITAL Z
2126 ; ALetter # L& OHM SIGN
2128 ; ALetter # L& BLACK-LETTER CAPITAL Z
212A..212D ; ALetter # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C
212F..2134 ; ALetter # L& [6] SCRIPT SMALL E..SCRIPT SMALL O
2135..2138 ; ALetter # Lo [4] ALEF SYMBOL..DALET SYMBOL
2139 ; ALetter # L& INFORMATION SOURCE
213C..213F ; ALetter # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
2145..2149 ; ALetter # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
214E ; ALetter # L& TURNED SMALL F
2160..2182 ; ALetter # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
2183..2184 ; ALetter # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
2C60..2C6C ; ALetter # L& [13] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SMALL LETTER Z WITH DESCENDER
2C74..2C77 ; ALetter # L& [4] LATIN SMALL LETTER V WITH CURL..LATIN SMALL LETTER TAILLESS PHI
2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
2DA8..2DAE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
2DB0..2DB6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
2DB8..2DBE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
2DC0..2DC6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
2DC8..2DCE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
2DD0..2DD6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
2DD8..2DDE ; ALetter # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK
303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
303C ; ALetter # Lo MASU MARK
3105..312C ; ALetter # Lo [40] BOPOMOFO LETTER B..BOPOMOFO LETTER GN
3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
31A0..31B7 ; ALetter # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; ALetter # Lm YI SYLLABLE WU
A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
A717..A71A ; ALetter # Lm [4] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOWER RIGHT CORNER ANGLE
A800..A801 ; ALetter # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I
A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
A823..A824 ; ALetter # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
A827 ; ALetter # Mc SYLOTI NAGRI VOWEL SIGN OO
A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
FA30..FA6A ; ALetter # Lo [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ
FB1F..FB28 ; ALetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
FB2A..FB36 ; ALetter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
FB38..FB3C ; ALetter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
FB3E ; ALetter # Lo HEBREW LETTER MEM WITH DAGESH
FB40..FB41 ; ALetter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
FB43..FB44 ; ALetter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
FB46..FBB1 ; ALetter # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
FBD3..FD3D ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
FD50..FD8F ; ALetter # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
FD92..FDC7 ; ALetter # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
FDF0..FDFB ; ALetter # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
FE70..FE74 ; ALetter # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE76..FEFC ; ALetter # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
FF21..FF3A ; ALetter # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; ALetter # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
FFA0..FFBE ; ALetter # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
FFC2..FFC7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
FFCA..FFCF ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
FFD2..FFD7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
10000..1000B ; ALetter # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
1000D..10026 ; ALetter # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
10028..1003A ; ALetter # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
1003C..1003D ; ALetter # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
1003F..1004D ; ALetter # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
10050..1005D ; ALetter # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
10080..100FA ; ALetter # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
10140..10174 ; ALetter # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
10300..1031E ; ALetter # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
10330..10340 ; ALetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
10341 ; ALetter # Nl GOTHIC LETTER NINETY
10342..10349 ; ALetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED
10380..1039D ; ALetter # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
103A0..103C3 ; ALetter # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
103C8..103CF ; ALetter # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
10800..10805 ; ALetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
10808 ; ALetter # Lo CYPRIOT SYLLABLE JO
1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA
1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO
10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
10A00 ; ALetter # Lo KHAROSHTHI LETTER A
10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
1D4A2 ; ALetter # L& MATHEMATICAL SCRIPT CAPITAL G
1D4A5..1D4A6 ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
1D4A9..1D4AC ; ALetter # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
1D4AE..1D4B9 ; ALetter # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
1D4BB ; ALetter # L& MATHEMATICAL SCRIPT SMALL F
1D4BD..1D4C3 ; ALetter # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
1D4C5..1D505 ; ALetter # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
1D507..1D50A ; ALetter # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
1D50D..1D514 ; ALetter # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
1D516..1D51C ; ALetter # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
1D51E..1D539 ; ALetter # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
1D53B..1D53E ; ALetter # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
1D540..1D544 ; ALetter # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
1D546 ; ALetter # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O
1D54A..1D550 ; ALetter # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
1D552..1D6A5 ; ALetter # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
1D6A8..1D6C0 ; ALetter # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
1D6C2..1D6DA ; ALetter # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
1D6DC..1D6FA ; ALetter # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
1D6FC..1D714 ; ALetter # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
1D716..1D734 ; ALetter # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
1D736..1D74E ; ALetter # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
1D750..1D76E ; ALetter # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
1D770..1D788 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
# Total code points: 21149
# ================================================
0027 ; MidLetter # Po APOSTROPHE
003A ; MidLetter # Po COLON
00B7 ; MidLetter # Po MIDDLE DOT
05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM
2019 ; MidLetter # Pf RIGHT SINGLE QUOTATION MARK
2027 ; MidLetter # Po HYPHENATION POINT
# Total code points: 6
# ================================================
002C ; MidNum # Po COMMA
002E ; MidNum # Po FULL STOP
003B ; MidNum # Po SEMICOLON
037E ; MidNum # Po GREEK QUESTION MARK
0589 ; MidNum # Po ARMENIAN FULL STOP
060D ; MidNum # Po ARABIC DATE SEPARATOR
07F8 ; MidNum # Po NKO COMMA
2044 ; MidNum # Sm FRACTION SLASH
FE10 ; MidNum # Po PRESENTATION FORM FOR VERTICAL COMMA
FE13..FE14 ; MidNum # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON
# Total code points: 11
# ================================================
0030..0039 ; Numeric # Nd [10] DIGIT ZERO..DIGIT NINE
0660..0669 ; Numeric # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
066B..066C ; Numeric # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR
06F0..06F9 ; Numeric # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
07C0..07C9 ; Numeric # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
0966..096F ; Numeric # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
09E6..09EF ; Numeric # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
0A66..0A6F ; Numeric # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
0AE6..0AEF ; Numeric # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
0B66..0B6F ; Numeric # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
0BE6..0BEF ; Numeric # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
0C66..0C6F ; Numeric # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0CE6..0CEF ; Numeric # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0D66..0D6F ; Numeric # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
0E50..0E59 ; Numeric # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
0ED0..0ED9 ; Numeric # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
0F20..0F29 ; Numeric # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
1040..1049 ; Numeric # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
# Total code points: 282
# ================================================
005F ; ExtendNumLet # Pc LOW LINE
203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE
2054 ; ExtendNumLet # Pc INVERTED UNDERTIE
FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE
FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE
# Total code points: 10
# EOF

View File

@ -1,80 +0,0 @@
#!/usr/bin/env ikarus --r6rs-script
(import
(ikarus)
(unicode-data))
(define (hex->num x)
(read (open-input-string (format "#x~a" x))))
(define data-case
(lambda (fields)
(let ([num (car fields)]
[uc (list-ref fields uc-index)]
[lc (list-ref fields lc-index)]
[tc (list-ref fields tc-index)])
(let ([n (hex->num num)])
(define (f x)
(if (string=? x "") 0 (- (hex->num x) n)))
(cons n (vector (f uc) (f lc) (f tc) #f))))))
(define (remove-dups ls)
(let f ([ls ls] [last #f])
(cond
[(null? ls) '()]
[(equal? (cdar ls) last) (f (cdr ls) last)]
[else
(cons (car ls) (f (cdr ls) (cdar ls)))])))
(define (compute-foldcase ls)
(define (find-vec idx)
(cond
[(assq idx ls) => cdr]
[else (error 'find-vec "~s is missing" idx)]))
(define (upper i)
(+ i (vector-ref (find-vec i) 0)))
(define (lower i)
(+ i (vector-ref (find-vec i) 1)))
(define (set-folder! i j)
(vector-set! (find-vec i) 3 (- j i)))
(for-each
(lambda (x)
(let ([idx (car x)] [vec (cdr x)])
(vector-set! vec 3
(- (lower (upper idx)) idx))))
ls)
(for-each
(lambda (idx)
(let ([vec (find-vec idx)])
(vector-set! vec 3 0)))
;; turkic chars
'(#x130 #x131))
ls)
(define uc-index 12)
(define lc-index 13)
(define tc-index 14)
(let ([ls
(remove-dups
(compute-foldcase
(map data-case
(get-unicode-data))))])
(define (p name idx)
(pretty-print
`(define ,name
',(list->vector (map (lambda (x) (vector-ref (cdr x) idx)) ls)))))
(let ([v0 (list->vector (map car ls))])
(with-output-to-file "unicode-char-cases.ss"
(lambda ()
(printf ";;; DO NOT EDIT\n;;; automatically generated\n")
(printf ";;; ~s entries in table\n" (vector-length v0))
(pretty-print `(define charcase-search-vector ',v0))
(p 'char-upcase-adjustment-vector 0)
(p 'char-downcase-adjustment-vector 1)
(p 'char-titlecase-adjustment-vector 2)
(p 'char-foldcase-adjustment-vector 3))
'replace)))
(printf "Happy Happy Joy Joy\n")

View File

@ -68,7 +68,7 @@
(let ([ls (map cat (get-unicode-data))])
(let ([ls (map cat (get-unicode-data "UNIDATA/UnicodeData.txt"))])
(let ([wanted
(codes-in-cats ls
'(Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pd Pc Po Sc Sm Sk So Co))])

159
src/unicode/extract-char-cases.ss Executable file
View File

@ -0,0 +1,159 @@
#!/usr/bin/env ikarus --r6rs-script
(import
(ikarus)
(unicode-data))
(define (hex->num x)
(read (open-input-string (format "#x~a" x))))
(define data-case
(lambda (fields)
(let ([num (car fields)]
[uc (list-ref fields uc-index)]
[lc (list-ref fields lc-index)]
[tc (list-ref fields tc-index)])
(let ([n (hex->num num)])
(define (f x)
(if (string=? x "") 0 (- (hex->num x) n)))
(cons n (vector (f uc) (f lc) (f tc) #f 0))))))
(define (remove-dups ls)
(let f ([ls ls] [last #f])
(cond
[(null? ls) '()]
[(equal? (cdar ls) last) (f (cdr ls) last)]
[else
(cons (car ls) (f (cdr ls) (cdar ls)))])))
(define (compute-foldcase ls)
(define (find-vec idx)
(cond
[(assq idx ls) => cdr]
[else (error 'find-vec "~s is missing" idx)]))
(define (upper i)
(+ i (vector-ref (find-vec i) 0)))
(define (lower i)
(+ i (vector-ref (find-vec i) 1)))
(define (set-folder! i j)
(vector-set! (find-vec i) 3 (- j i)))
(for-each
(lambda (x)
(let ([idx (car x)] [vec (cdr x)])
(vector-set! vec 3
(- (lower (upper idx)) idx))))
ls)
(for-each
(lambda (idx)
(let ([vec (find-vec idx)])
(vector-set! vec 3 0)))
;; turkic chars
'(#x130 #x131))
ls)
(define uc-index 12)
(define lc-index 13)
(define tc-index 14)
(define (remove-spaces str)
(cond
[(= (string-length str) 0) str]
[(char=? (string-ref str 0) #\space)
(remove-spaces (substring str 1 (string-length str)))]
[else str]))
(define (split str)
(let f ([i 0] [n (string-length str)])
(cond
[(= i n) (list (substring str 0 n))]
[(char=? (string-ref str i) #\space)
(cons (substring str 0 i)
(split (substring str (+ i 1) n)))]
[else (f (add1 i) n)])))
(define (improperize ls)
(cond
[(null? (cdr ls)) (car ls)]
[else (cons (car ls) (improperize (cdr ls)))]))
(define (convert-full-fold-fields ls)
(cond
[(null? ls) '()]
[else
(let ([fields (car ls)])
(let ([cat (remove-spaces (cadr fields))])
(cond
[(member cat '("C" "F"))
(let ([n (hex->num (remove-spaces (car fields)))])
(let ([c* (map hex->num
(map remove-spaces
(split
(remove-spaces (caddr fields)))))])
(cons
(cons n
(if (= (length c*) 1)
(- (car c*) n)
(improperize (map integer->char c*))))
(convert-full-fold-fields (cdr ls)))))]
[else (convert-full-fold-fields (cdr ls))])))]))
#;
(define (convert-index ls)
(let ([alist
(let f ([i 0] [ls ls])
(cond
[(null? ls) '()]
[else
(cons (cons (car ls) (- i (car ls)))
(f (add1 i) (cdr ls)))]))])
(map
(lambda (x)
(cons (car x) (+ (car x) (cdr x))))
(remove-dups alist))))
(let ([ls
(compute-foldcase
(map data-case
(get-unicode-data "UNIDATA/UnicodeData.txt")))])
(for-each
(lambda (x)
(let ([n (car x)] [chars (cdr x)])
(cond
[(assq n ls) =>
(lambda (p)
(vector-set! (cdr p) 4 chars))]
[else (error #f "~s is not there" n)])))
(convert-full-fold-fields
(get-unicode-data "UNIDATA/CaseFolding.txt")))
(let ([ls (remove-dups ls)])
(define (p name idx)
(pretty-print
`(define ,name
',(list->vector (map (lambda (x) (vector-ref (cdr x) idx)) ls)))))
(parameterize ([print-unicode #f])
(let ([v0 (list->vector (map car ls))])
(with-output-to-file "unicode-char-cases.ss"
(lambda ()
(printf ";;; DO NOT EDIT\n;;; automatically generated\n")
#;
(let ([ls (convert-index (map car ls))])
(pretty-print
`(define char-search-index-vector
',(list->vector (map car ls))))
(pretty-print
`(define char-adjustment-index-vector
',(list->vector (map cdr ls)))))
(printf ";;; ~s entries in table\n" (vector-length v0))
(pretty-print `(define charcase-search-vector ',v0))
(p 'char-upcase-adjustment-vector 0)
(p 'char-downcase-adjustment-vector 1)
(p 'char-titlecase-adjustment-vector 2)
(p 'char-foldcase-adjustment-vector 3)
(p 'string-foldcase-adjustment-vector 4)
)
'replace)))))
(printf "Happy Happy Joy Joy\n")

View File

@ -1,6 +1,6 @@
;;; DO NOT EDIT
;;; automatically generated
;;; 1080 entries in table
;;; 1153 entries in table
(define charcase-search-vector
'#(0 65 91 97 123 181 182 192 215 216 223 224 247 248 255 256 257 258 259 260
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
@ -37,40 +37,45 @@
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
1297 1298 1299 1329 1369 1377 1415 4256 4304 7549 7550 7680 7681 7682 7683
7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698
7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713
7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728
7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743
7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758
7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773
7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788
7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803
7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818
7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7835 7840 7841
7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856
7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871
7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886
7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901
7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916
7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7936 7944
7952 7960 7968 7976 7984 7992 8000 8008 8016 8017 8018 8019 8020 8021 8022
8023 8025 8032 8040 8048 8050 8054 8056 8058 8060 8064 8072 8080 8088 8096
8104 8112 8114 8115 8116 8120 8122 8124 8125 8126 8127 8131 8132 8136 8140
8141 8144 8146 8152 8154 8157 8160 8162 8165 8166 8168 8170 8172 8173 8179
8180 8184 8186 8188 8189 8486 8487 8490 8491 8492 8498 8499 8526 8531 8544
8560 8576 8579 8580 8592 9398 9424 9450 11264 11312 11360 11361 11362 11363
11364 11365 11366 11367 11368 11369 11370 11371 11372 11380 11381 11382
11383 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402
11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413 11414
11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425 11426
11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438
11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449 11450
11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461 11462
11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473 11474
11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486
11487 11488 11489 11490 11491 11492 11520 11568 65313 65339 65345 65371
66560 66600 66640))
1297 1298 1299 1329 1369 1377 1415 1417 4256 4304 7549 7550 7680 7681 7682
7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697
7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712
7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727
7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742
7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757
7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772
7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787
7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802
7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817
7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832
7833 7834 7835 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851
7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866
7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881
7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896
7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911
7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926
7927 7928 7929 7936 7944 7952 7960 7968 7976 7984 7992 8000 8008 8016 8017
8018 8019 8020 8021 8022 8023 8025 8032 8040 8048 8050 8054 8056 8058 8060
8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078
8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093
8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108
8109 8110 8111 8112 8114 8115 8116 8118 8119 8120 8122 8124 8125 8126 8127
8130 8131 8132 8134 8135 8136 8140 8141 8144 8146 8147 8150 8151 8152 8154
8157 8160 8162 8163 8164 8165 8166 8167 8168 8170 8172 8173 8178 8179 8180
8182 8183 8184 8186 8188 8189 8486 8487 8490 8491 8492 8498 8499 8526 8531
8544 8560 8576 8579 8580 8592 9398 9424 9450 11264 11312 11360 11361 11362
11363 11364 11365 11366 11367 11368 11369 11370 11371 11372 11380 11381
11382 11383 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401
11402 11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413
11414 11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425
11426 11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437
11438 11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449
11450 11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461
11462 11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473
11474 11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485
11486 11487 11488 11489 11490 11491 11492 11520 11568 64256 64257 64258
64259 64260 64261 64275 64276 64277 64278 64279 64285 65313 65339 65345
65371 66560 66600 66640))
(define char-upcase-adjustment-vector
'#(0 0 0 -32 0 743 0 0 0 0 0 -32 0 -32 121 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
@ -93,22 +98,24 @@
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 -15 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -48 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -48 0 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 0 0 -59 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0
8 0 8 0 0 8 0 8 0 8 0 8 0 8 0 74 86 100 128 112 126 8 8 8 8 8 8 8 8 0 0 0 0
0 0 0 0 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 8 0
9 0 0 0 0 0 0 0 -7205 0 0 9 0 0 0 0 0 0 8 0 0 0 0 0 0 0 8 0 0 0 7 0 0 0 0 0
0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16 0 0 -1 0 0 -26 0 0 -48 0 -1 0
0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -59 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0
8 0 8 0 8 0 8 0 8 0 74 86 100 128 112 126 8 0 8 0 8 0 8 0 9 0 0 0 0 0 -7205
0 9 0 0 0 0 8 0 0 0 0 8 0 7 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16
0 0 -1 0 0 -26 0 0 -48 0 -1 0 0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -7264 0 0 0 -32 0 0 -40 0))
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -7264
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -32 0 0 -40 0))
(define char-downcase-adjustment-vector
'#(0 32 0 0 0 0 0 32 0 32 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -199 0 1 0 1 0 1 0 0
@ -127,20 +134,22 @@
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 7264 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0 -8 0 0 0 0 0 0 0 -8 0 -8 0 -8
0 0 0 0 -8 -74 -9 0 0 0 0 0 -86 -9 0 0 0 -8 -100 0 0 0 0 0 -8 -112 -7 0 0 0
-128 -126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 0 7264 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0 -8 0 0 0 0 0 0 0 0
0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8
0 0 0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 -8 -74 -9 0 0 0 0 0 0 0
0 -86 -9 0 0 0 0 0 0 -8 -100 0 0 0 0 0 0 0 0 -8 -112 -7 0 0 0 0 0 0 -128
-126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0
-10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 0 0 0 32 0 0 0 40 0 0))
1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 32 0 0 0 40 0 0))
(define char-titlecase-adjustment-vector
'#(0 0 0 -32 0 743 0 0 0 0 0 -32 0 -32 121 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
@ -163,22 +172,24 @@
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -15 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 0 -48 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 0 -48 0 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 0 0 -59 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -59 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0 8 0 8 0 8 0 8 0
8 0 74 86 100 128 112 126 8 0 8 0 8 0 8 0 9 0 0 0 0 0 -7205 0 9 0 0 0 0 8 0
0 0 0 8 0 7 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16 0 0 -1 0 0 -26
0 0 -48 0 -1 0 0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -7264 0 0 0 -32 0 0 -40 0))
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0 8 0 8
0 8 0 8 0 8 0 74 86 100 128 112 126 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 8 8 8 8
8 8 8 8 0 0 0 0 0 0 0 0 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 8 0 9 0 0 0 0 0 0 0
-7205 0 0 9 0 0 0 0 0 0 8 0 0 0 0 0 0 0 8 0 0 0 7 0 0 0 0 0 0 0 9 0 0 0 0 0
0 0 0 0 0 0 0 0 0 -28 0 0 -16 0 0 -1 0 0 -26 0 0 -48 0 -1 0 0 0 -10795
-10792 0 -1 0 -1 0 -1 0 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -7264 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 -32 0 0 -40 0))
(define char-foldcase-adjustment-vector
'#(0 32 0 0 0 775 0 32 0 32 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1
@ -197,17 +208,79 @@
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 7264 0 0 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 0 7264 0 0 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0
-58 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8
0 -8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 0 0 -8
-8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 -8
-74 -9 0 -7173 0 0 0 0 0 0 -86 -9 0 0 0 0 0 0 -8 -100 0 0 0 0 0 0 0 0 -8
-112 -7 0 0 0 0 0 0 -128 -126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0 0 1
0 0 26 0 0 48 0 1 0 -10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 32 0 0 0
40 0 0))
(define string-foldcase-adjustment-vector
'#(0 32 0 0 0 775 0 32 0 32 (#\s . #\s) 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
(#\i . #\x307) 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
(#\x2BC . #\n) 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -121 1 0 1 0 1 0 -268 0 210 1 0 1 0 206 1 0
205 1 0 0 79 202 203 1 0 205 207 0 211 209 1 0 0 0 211 213 0 214 1 0 1 0 1
0 218 1 0 218 0 1 0 218 1 0 217 1 0 1 0 219 1 0 0 1 0 0 0 0 2 1 0 2 1 0 2 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
(#\j . #\x30C) 2 1 0 1 0 -97 -56 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -130 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 0 10795 1 0 -163 10792 0 1 0 -195 69 71 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 116 0 0 0
38 0 37 64 63 (#\x3B9 #\x308 . #\x301) 32 0 0 (#\x3C5 #\x308 . #\x301) 0 1
0 0 0 -30 -25 0 -15 -22 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
-54 -48 0 0 -60 -64 0 1 0 -7 1 0 0 -130 80 32 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 -58 1 0 1 0
0 1 0 1 0 1 0 1 0 48 0 0 (#\x565 . #\x582) 0 7264 0 0 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0 -8 0 0 0
0 0 0 0 -8 0 -8 0 -8 0 0 0 0 -8 -74 -9 0 -7173 0 0 0 -86 -9 0 0 0 -8 -100 0
0 0 0 0 -8 -112 -7 0 0 0 -128 -126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0
0 1 0 0 26 0 0 48 0 1 0 -10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 (#\h . #\x331)
(#\t . #\x308) (#\w . #\x30A) (#\y . #\x30A) (#\a . #\x2BE) -58 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 32 0 0 0 40 0 0))
1 0 1 0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 (#\x3C5 . #\x313) 0
(#\x3C5 #\x313 . #\x300) 0 (#\x3C5 #\x313 . #\x301) 0
(#\x3C5 #\x313 . #\x342) 0 -8 0 -8 0 0 0 0 0 0 (#\x1F00 . #\x3B9)
(#\x1F01 . #\x3B9) (#\x1F02 . #\x3B9) (#\x1F03 . #\x3B9) (#\x1F04 . #\x3B9)
(#\x1F05 . #\x3B9) (#\x1F06 . #\x3B9) (#\x1F07 . #\x3B9) (#\x1F00 . #\x3B9)
(#\x1F01 . #\x3B9) (#\x1F02 . #\x3B9) (#\x1F03 . #\x3B9) (#\x1F04 . #\x3B9)
(#\x1F05 . #\x3B9) (#\x1F06 . #\x3B9) (#\x1F07 . #\x3B9) (#\x1F20 . #\x3B9)
(#\x1F21 . #\x3B9) (#\x1F22 . #\x3B9) (#\x1F23 . #\x3B9) (#\x1F24 . #\x3B9)
(#\x1F25 . #\x3B9) (#\x1F26 . #\x3B9) (#\x1F27 . #\x3B9) (#\x1F20 . #\x3B9)
(#\x1F21 . #\x3B9) (#\x1F22 . #\x3B9) (#\x1F23 . #\x3B9) (#\x1F24 . #\x3B9)
(#\x1F25 . #\x3B9) (#\x1F26 . #\x3B9) (#\x1F27 . #\x3B9) (#\x1F60 . #\x3B9)
(#\x1F61 . #\x3B9) (#\x1F62 . #\x3B9) (#\x1F63 . #\x3B9) (#\x1F64 . #\x3B9)
(#\x1F65 . #\x3B9) (#\x1F66 . #\x3B9) (#\x1F67 . #\x3B9) (#\x1F60 . #\x3B9)
(#\x1F61 . #\x3B9) (#\x1F62 . #\x3B9) (#\x1F63 . #\x3B9) (#\x1F64 . #\x3B9)
(#\x1F65 . #\x3B9) (#\x1F66 . #\x3B9) (#\x1F67 . #\x3B9) 0
(#\x1F70 . #\x3B9) (#\x3B1 . #\x3B9) (#\x3AC . #\x3B9) (#\x3B1 . #\x342)
(#\x3B1 #\x342 . #\x3B9) -8 -74 (#\x3B1 . #\x3B9) 0 -7173 0
(#\x1F74 . #\x3B9) (#\x3B7 . #\x3B9) (#\x3AE . #\x3B9) (#\x3B7 . #\x342)
(#\x3B7 #\x342 . #\x3B9) -86 (#\x3B7 . #\x3B9) 0 0 (#\x3B9 #\x308 . #\x300)
(#\x3B9 #\x308 . #\x301) (#\x3B9 . #\x342) (#\x3B9 #\x308 . #\x342) -8 -100
0 0 (#\x3C5 #\x308 . #\x300) (#\x3C5 #\x308 . #\x301) (#\x3C1 . #\x313) 0
(#\x3C5 . #\x342) (#\x3C5 #\x308 . #\x342) -8 -112 -7 0 (#\x1F7C . #\x3B9)
(#\x3C9 . #\x3B9) (#\x3CE . #\x3B9) (#\x3C9 . #\x342)
(#\x3C9 #\x342 . #\x3B9) -128 -126 (#\x3C9 . #\x3B9) 0 -7517 0 -8383 -8262
0 28 0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0 -10743 -3814 -10727 0 0 1 0 1 0 1 0
0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 (#\f . #\f)
(#\f . #\i) (#\f . #\l) (#\f #\f . #\i) (#\f #\f . #\l) (#\s . #\t)
(#\x574 . #\x576) (#\x574 . #\x565) (#\x574 . #\x56B) (#\x57E . #\x576)
(#\x574 . #\x56D) 0 32 0 0 0 40 0 0))

View File

@ -13,23 +13,24 @@
(eof-object)
(list->string (reverse ac)))]
[(char=? x #\newline)
(if (null? ac) (f) (list->string (reverse ac)))]
(if (null? ac) (f '()) (list->string (reverse ac)))]
[else (f (cons x ac))]))))
(define (find-semi str i n)
(define (find-semi/hash str i n)
(cond
[(or (fx= i n)
(char=? (string-ref str i) #\;)) i]
[else (find-semi str (+ i 1) n)]))
[(or (fx= i n) (memv (string-ref str i) '(#\; #\#))) i]
[else (find-semi/hash str (+ i 1) n)]))
(define (split str)
(let f ([i 0] [n (string-length str)])
(cond
[(= i n) '("")]
[(or (= i n) (memv (string-ref str i) '(#\#)))
'("")]
[else
(let ([j (find-semi str i n)])
(let ([j (find-semi/hash str i n)])
(cond
[(= j n) (list (substring str i j))]
[(or (= j n) (memv (string-ref str i) '(#\#)))
(list (substring str i j))]
[else
(cons (substring str i j)
(f (+ j 1) n))]))])))
@ -42,9 +43,11 @@
(reverse ls)]
[else
(let ([fields (split line)])
(f (cons fields ls)))]))))
(if (or (null? fields) (equal? fields '("")))
(f ls)
(f (cons fields ls))))]))))
(define (get-unicode-data)
(define (get-unicode-data filename)
(with-input-from-file
"UNIDATA/UnicodeData.txt"
filename
extract-uni-data)))