* Added string-foldcase and string-ci=? (inefficiently)
This commit is contained in:
parent
551e5aa088
commit
ab6162d2a7
BIN
bin/ikarus
BIN
bin/ikarus
Binary file not shown.
|
@ -74,7 +74,7 @@
|
|||
#define IK_CHAR_MASK 0xFF
|
||||
#define IK_CHAR_SHIFT 8
|
||||
#define IK_CHAR_VAL(x) (((int)(x)) >> IK_CHAR_SHIFT)
|
||||
#define byte_to_scheme_char(x) ((ikp)(((x) << IK_CHAR_SHIFT) | IK_CHAR_TAG))
|
||||
#define int_to_scheme_char(x) ((ikp)(((x) << IK_CHAR_SHIFT) | IK_CHAR_TAG))
|
||||
#define IK_PAIR_SIZE 8
|
||||
#define pair_size 8
|
||||
#define pair_tag 1
|
||||
|
|
|
@ -377,9 +377,9 @@ static ikp do_read(ikpcb* pcb, fasl_port* p){
|
|||
else if(c == 'N'){
|
||||
return IK_NULL_OBJECT;
|
||||
}
|
||||
else if(c == 'C'){
|
||||
char x = fasl_read_byte(p);
|
||||
return byte_to_scheme_char(x);
|
||||
else if(c == 'c'){
|
||||
unsigned char x = (unsigned char) fasl_read_byte(p);
|
||||
return int_to_scheme_char(x);
|
||||
}
|
||||
else if(c == 'G'){
|
||||
/* G is for gensym */
|
||||
|
@ -523,6 +523,11 @@ static ikp do_read(ikpcb* pcb, fasl_port* p){
|
|||
}
|
||||
return x;
|
||||
}
|
||||
else if(c == 'C'){
|
||||
int n;
|
||||
fasl_read_buf(p, &n, sizeof(int));
|
||||
return int_to_scheme_char(n);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "invalid type '%c' (0x%02x) found in fasl file\n", c, c);
|
||||
exit(-1);
|
||||
|
|
BIN
src/ikarus.boot
BIN
src/ikarus.boot
Binary file not shown.
|
@ -14,7 +14,8 @@
|
|||
;;; "E" : denoting the end of file object
|
||||
;;; "U" : denoting the unspecified value
|
||||
;;; "I" + 4-bytes : denoting a fixnum (in host byte order)
|
||||
;;; "C" + 1-byte : denoting a character
|
||||
;;; "c" + 1-byte : denoting a small character (<= 255)
|
||||
;;; "C" + 4-byte word: big char.
|
||||
;;; "P" + object1 + object2 : a pair
|
||||
;;; "V" + 4-bytes(n) + object ... : a vector of length n followed by n objects
|
||||
;;; "v" + 4-byte(n) + octet ... : a bytevector of length n followed by n octets
|
||||
|
|
|
@ -37,8 +37,16 @@
|
|||
(write-char #\I p)
|
||||
(write-fixnum x p)]
|
||||
[(char? x)
|
||||
(write-char #\C p)
|
||||
(write-char x p)]
|
||||
(let ([n ($char->fixnum x)])
|
||||
(if ($fx<= n 255)
|
||||
(begin
|
||||
(write-char #\c p)
|
||||
(write-byte n p))
|
||||
(begin
|
||||
(write-char #\C p)
|
||||
(write-int n p))))]
|
||||
; (write-char #\C p)
|
||||
; (write-char x p)]
|
||||
[(boolean? x)
|
||||
(write-char (if x #\T #\F) p)]
|
||||
[(eof-object? x) (write-char #\E p)]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
(library (ikarus writer)
|
||||
(export write display format printf print-error error-handler
|
||||
error)
|
||||
error print-unicode)
|
||||
(import
|
||||
(ikarus system $chars)
|
||||
(ikarus system $strings)
|
||||
|
@ -12,9 +12,10 @@
|
|||
(ikarus system $bytevectors)
|
||||
(only (ikarus unicode-data) unicode-printable-char?)
|
||||
(except (ikarus) write display format printf print-error
|
||||
error-handler error))
|
||||
|
||||
error-handler error print-unicode))
|
||||
|
||||
(define print-unicode
|
||||
(make-parameter #t))
|
||||
|
||||
(define char-table ; first nonprintable chars
|
||||
'#("nul" "x1" "x2" "x3" "x4" "x5" "x6" "alarm"
|
||||
|
@ -53,7 +54,7 @@
|
|||
[(fx= i 127)
|
||||
(write-char #\\ p)
|
||||
(write-char* "delete" p)]
|
||||
[(unicode-printable-char? x)
|
||||
[(and (print-unicode) (unicode-printable-char? x))
|
||||
(write-char #\\ p)
|
||||
(write-char x p)]
|
||||
[else
|
||||
|
|
|
@ -317,10 +317,12 @@
|
|||
[string-set! i r]
|
||||
[string-length i r]
|
||||
[string=? i r]
|
||||
[string-ci=? i r]
|
||||
[substring i r]
|
||||
[string-append i r]
|
||||
[string->list i r]
|
||||
[list->string i r]
|
||||
[string-foldcase i unicode]
|
||||
[uuid i]
|
||||
[date-string i]
|
||||
[vector i r]
|
||||
|
@ -516,6 +518,7 @@
|
|||
[pretty-print i]
|
||||
[comment-handler i]
|
||||
[print-gensym i symbols]
|
||||
[print-unicode i]
|
||||
[gensym-count i symbols]
|
||||
[gensym-prefix i symbols]
|
||||
[make-hash-table i]
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,521 @@
|
|||
# WordBreakProperty-5.0.0.txt
|
||||
# Date: 2006-06-07, 23:23:03 GMT [MD]
|
||||
#
|
||||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2006 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see UCD.html
|
||||
|
||||
# ================================================
|
||||
|
||||
# Property: Word_Break
|
||||
|
||||
# All code points not explicitly listed for Word_Break
|
||||
# have the value Other (XX).
|
||||
|
||||
# @missing: 0000..10FFFF; Other
|
||||
|
||||
# ================================================
|
||||
|
||||
00AD ; Format # Cf SOFT HYPHEN
|
||||
0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
|
||||
06DD ; Format # Cf ARABIC END OF AYAH
|
||||
070F ; Format # Cf SYRIAC ABBREVIATION MARK
|
||||
17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
|
||||
200B ; Format # Cf ZERO WIDTH SPACE
|
||||
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
|
||||
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
|
||||
2060..2063 ; Format # Cf [4] WORD JOINER..INVISIBLE SEPARATOR
|
||||
206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
|
||||
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
|
||||
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
|
||||
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
|
||||
E0001 ; Format # Cf LANGUAGE TAG
|
||||
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
|
||||
|
||||
# Total code points: 136
|
||||
|
||||
# ================================================
|
||||
|
||||
3031..3035 ; Katakana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
|
||||
309B..309C ; Katakana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
30A0 ; Katakana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
|
||||
30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO
|
||||
30FC..30FE ; Katakana # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
|
||||
30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO
|
||||
31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
|
||||
FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
FF70 ; Katakana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
|
||||
FF9E..FF9F ; Katakana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
|
||||
# Total code points: 176
|
||||
|
||||
# ================================================
|
||||
|
||||
0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
|
||||
0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
|
||||
00AA ; ALetter # L& FEMININE ORDINAL INDICATOR
|
||||
00B5 ; ALetter # L& MICRO SIGN
|
||||
00BA ; ALetter # L& MASCULINE ORDINAL INDICATOR
|
||||
00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
|
||||
00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
|
||||
01BB ; ALetter # Lo LATIN LETTER TWO WITH STROKE
|
||||
01BC..01BF ; ALetter # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
|
||||
01C0..01C3 ; ALetter # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
|
||||
01C4..0293 ; ALetter # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
|
||||
0294 ; ALetter # Lo LATIN LETTER GLOTTAL STOP
|
||||
0295..02AF ; ALetter # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
|
||||
02B0..02C1 ; ALetter # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
|
||||
02C6..02D1 ; ALetter # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
|
||||
02E0..02E4 ; ALetter # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
|
||||
02EE ; ALetter # Lm MODIFIER LETTER DOUBLE APOSTROPHE
|
||||
037A ; ALetter # Lm GREEK YPOGEGRAMMENI
|
||||
037B..037D ; ALetter # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
|
||||
0386 ; ALetter # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||
0388..038A ; ALetter # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||
038C ; ALetter # L& GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||
038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
|
||||
03A3..03CE ; ALetter # L& [44] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER OMEGA WITH TONOS
|
||||
03D0..03F5 ; ALetter # L& [38] GREEK BETA SYMBOL..GREEK LUNATE EPSILON SYMBOL
|
||||
03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
|
||||
048A..0513 ; ALetter # L& [138] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH HOOK
|
||||
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
|
||||
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
|
||||
0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
05D0..05EA ; ALetter # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
|
||||
05F0..05F2 ; ALetter # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
|
||||
05F3 ; ALetter # Po HEBREW PUNCTUATION GERESH
|
||||
0621..063A ; ALetter # Lo [26] ARABIC LETTER HAMZA..ARABIC LETTER GHAIN
|
||||
0640 ; ALetter # Lm ARABIC TATWEEL
|
||||
0641..064A ; ALetter # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
|
||||
066E..066F ; ALetter # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
|
||||
0671..06D3 ; ALetter # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
|
||||
06D5 ; ALetter # Lo ARABIC LETTER AE
|
||||
06E5..06E6 ; ALetter # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
|
||||
06EE..06EF ; ALetter # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
|
||||
06FA..06FC ; ALetter # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
|
||||
06FF ; ALetter # Lo ARABIC LETTER HEH WITH INVERTED V
|
||||
0710 ; ALetter # Lo SYRIAC LETTER ALAPH
|
||||
0712..072F ; ALetter # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH
|
||||
074D..076D ; ALetter # Lo [33] SYRIAC LETTER SOGDIAN ZHAIN..ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE
|
||||
0780..07A5 ; ALetter # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU
|
||||
07B1 ; ALetter # Lo THAANA LETTER NAA
|
||||
07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA
|
||||
07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
|
||||
07FA ; ALetter # Lm NKO LAJANYALAN
|
||||
0903 ; ALetter # Mc DEVANAGARI SIGN VISARGA
|
||||
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
|
||||
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
|
||||
093E..0940 ; ALetter # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
|
||||
0949..094C ; ALetter # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
|
||||
0950 ; ALetter # Lo DEVANAGARI OM
|
||||
0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
|
||||
097B..097F ; ALetter # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
|
||||
0982..0983 ; ALetter # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
|
||||
0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
|
||||
098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
|
||||
0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
|
||||
09AA..09B0 ; ALetter # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA
|
||||
09B2 ; ALetter # Lo BENGALI LETTER LA
|
||||
09B6..09B9 ; ALetter # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA
|
||||
09BD ; ALetter # Lo BENGALI SIGN AVAGRAHA
|
||||
09BF..09C0 ; ALetter # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II
|
||||
09C7..09C8 ; ALetter # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
|
||||
09CB..09CC ; ALetter # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
|
||||
09CE ; ALetter # Lo BENGALI LETTER KHANDA TA
|
||||
09DC..09DD ; ALetter # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA
|
||||
09DF..09E1 ; ALetter # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL
|
||||
09F0..09F1 ; ALetter # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL
|
||||
0A03 ; ALetter # Mc GURMUKHI SIGN VISARGA
|
||||
0A05..0A0A ; ALetter # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU
|
||||
0A0F..0A10 ; ALetter # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI
|
||||
0A13..0A28 ; ALetter # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA
|
||||
0A2A..0A30 ; ALetter # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA
|
||||
0A32..0A33 ; ALetter # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA
|
||||
0A35..0A36 ; ALetter # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA
|
||||
0A38..0A39 ; ALetter # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA
|
||||
0A3E..0A40 ; ALetter # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
|
||||
0A59..0A5C ; ALetter # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA
|
||||
0A5E ; ALetter # Lo GURMUKHI LETTER FA
|
||||
0A72..0A74 ; ALetter # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR
|
||||
0A83 ; ALetter # Mc GUJARATI SIGN VISARGA
|
||||
0A85..0A8D ; ALetter # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
|
||||
0A8F..0A91 ; ALetter # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
|
||||
0A93..0AA8 ; ALetter # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA
|
||||
0AAA..0AB0 ; ALetter # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA
|
||||
0AB2..0AB3 ; ALetter # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA
|
||||
0AB5..0AB9 ; ALetter # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA
|
||||
0ABD ; ALetter # Lo GUJARATI SIGN AVAGRAHA
|
||||
0ABE..0AC0 ; ALetter # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
|
||||
0AC9 ; ALetter # Mc GUJARATI VOWEL SIGN CANDRA O
|
||||
0ACB..0ACC ; ALetter # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
|
||||
0AD0 ; ALetter # Lo GUJARATI OM
|
||||
0AE0..0AE1 ; ALetter # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL
|
||||
0B02..0B03 ; ALetter # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
|
||||
0B05..0B0C ; ALetter # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L
|
||||
0B0F..0B10 ; ALetter # Lo [2] ORIYA LETTER E..ORIYA LETTER AI
|
||||
0B13..0B28 ; ALetter # Lo [22] ORIYA LETTER O..ORIYA LETTER NA
|
||||
0B2A..0B30 ; ALetter # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA
|
||||
0B32..0B33 ; ALetter # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA
|
||||
0B35..0B39 ; ALetter # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA
|
||||
0B3D ; ALetter # Lo ORIYA SIGN AVAGRAHA
|
||||
0B40 ; ALetter # Mc ORIYA VOWEL SIGN II
|
||||
0B47..0B48 ; ALetter # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
|
||||
0B4B..0B4C ; ALetter # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
|
||||
0B5C..0B5D ; ALetter # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA
|
||||
0B5F..0B61 ; ALetter # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL
|
||||
0B71 ; ALetter # Lo ORIYA LETTER WA
|
||||
0B83 ; ALetter # Lo TAMIL SIGN VISARGA
|
||||
0B85..0B8A ; ALetter # Lo [6] TAMIL LETTER A..TAMIL LETTER UU
|
||||
0B8E..0B90 ; ALetter # Lo [3] TAMIL LETTER E..TAMIL LETTER AI
|
||||
0B92..0B95 ; ALetter # Lo [4] TAMIL LETTER O..TAMIL LETTER KA
|
||||
0B99..0B9A ; ALetter # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA
|
||||
0B9C ; ALetter # Lo TAMIL LETTER JA
|
||||
0B9E..0B9F ; ALetter # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA
|
||||
0BA3..0BA4 ; ALetter # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA
|
||||
0BA8..0BAA ; ALetter # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA
|
||||
0BAE..0BB9 ; ALetter # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA
|
||||
0BBF ; ALetter # Mc TAMIL VOWEL SIGN I
|
||||
0BC1..0BC2 ; ALetter # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
|
||||
0BC6..0BC8 ; ALetter # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
|
||||
0BCA..0BCC ; ALetter # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
|
||||
0C01..0C03 ; ALetter # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
|
||||
0C05..0C0C ; ALetter # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L
|
||||
0C0E..0C10 ; ALetter # Lo [3] TELUGU LETTER E..TELUGU LETTER AI
|
||||
0C12..0C28 ; ALetter # Lo [23] TELUGU LETTER O..TELUGU LETTER NA
|
||||
0C2A..0C33 ; ALetter # Lo [10] TELUGU LETTER PA..TELUGU LETTER LLA
|
||||
0C35..0C39 ; ALetter # Lo [5] TELUGU LETTER VA..TELUGU LETTER HA
|
||||
0C41..0C44 ; ALetter # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
|
||||
0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
|
||||
0C82..0C83 ; ALetter # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
|
||||
0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
|
||||
0C8E..0C90 ; ALetter # Lo [3] KANNADA LETTER E..KANNADA LETTER AI
|
||||
0C92..0CA8 ; ALetter # Lo [23] KANNADA LETTER O..KANNADA LETTER NA
|
||||
0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
|
||||
0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
|
||||
0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA
|
||||
0CBE ; ALetter # Mc KANNADA VOWEL SIGN AA
|
||||
0CC0..0CC1 ; ALetter # Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U
|
||||
0CC3..0CC4 ; ALetter # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR
|
||||
0CC7..0CC8 ; ALetter # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
|
||||
0CCA..0CCB ; ALetter # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
|
||||
0CDE ; ALetter # Lo KANNADA LETTER FA
|
||||
0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
|
||||
0D02..0D03 ; ALetter # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
|
||||
0D05..0D0C ; ALetter # Lo [8] MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
|
||||
0D0E..0D10 ; ALetter # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI
|
||||
0D12..0D28 ; ALetter # Lo [23] MALAYALAM LETTER O..MALAYALAM LETTER NA
|
||||
0D2A..0D39 ; ALetter # Lo [16] MALAYALAM LETTER PA..MALAYALAM LETTER HA
|
||||
0D3F..0D40 ; ALetter # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II
|
||||
0D46..0D48 ; ALetter # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
|
||||
0D4A..0D4C ; ALetter # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
|
||||
0D60..0D61 ; ALetter # Lo [2] MALAYALAM LETTER VOCALIC RR..MALAYALAM LETTER VOCALIC LL
|
||||
0D82..0D83 ; ALetter # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
|
||||
0D85..0D96 ; ALetter # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
|
||||
0D9A..0DB1 ; ALetter # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
|
||||
0DB3..0DBB ; ALetter # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
|
||||
0DBD ; ALetter # Lo SINHALA LETTER DANTAJA LAYANNA
|
||||
0DC0..0DC6 ; ALetter # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
|
||||
0DD0..0DD1 ; ALetter # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
|
||||
0DD8..0DDE ; ALetter # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA
|
||||
0DF2..0DF3 ; ALetter # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
|
||||
0F00 ; ALetter # Lo TIBETAN SYLLABLE OM
|
||||
0F40..0F47 ; ALetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA
|
||||
0F49..0F6A ; ALetter # Lo [34] TIBETAN LETTER NYA..TIBETAN LETTER FIXED-FORM RA
|
||||
0F7F ; ALetter # Mc TIBETAN SIGN RNAM BCAD
|
||||
0F88..0F8B ; ALetter # Lo [4] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN GRU MED RGYINGS
|
||||
10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
|
||||
10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
|
||||
10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR
|
||||
1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
|
||||
115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
|
||||
11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
|
||||
1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
|
||||
124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
|
||||
1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
|
||||
1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA
|
||||
125A..125D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
|
||||
1260..1288 ; ALetter # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
|
||||
128A..128D ; ALetter # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
|
||||
1290..12B0 ; ALetter # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
|
||||
12B2..12B5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
|
||||
12B8..12BE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
|
||||
12C0 ; ALetter # Lo ETHIOPIC SYLLABLE KXWA
|
||||
12C2..12C5 ; ALetter # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
|
||||
12C8..12D6 ; ALetter # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
|
||||
12D8..1310 ; ALetter # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
|
||||
1312..1315 ; ALetter # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
|
||||
1318..135A ; ALetter # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
|
||||
1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
|
||||
13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
|
||||
1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
|
||||
166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
|
||||
1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
|
||||
16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
|
||||
16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
|
||||
1700..170C ; ALetter # Lo [13] TAGALOG LETTER A..TAGALOG LETTER YA
|
||||
170E..1711 ; ALetter # Lo [4] TAGALOG LETTER LA..TAGALOG LETTER HA
|
||||
1720..1731 ; ALetter # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA
|
||||
1740..1751 ; ALetter # Lo [18] BUHID LETTER A..BUHID LETTER HA
|
||||
1760..176C ; ALetter # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA
|
||||
176E..1770 ; ALetter # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA
|
||||
1820..1842 ; ALetter # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI
|
||||
1843 ; ALetter # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
|
||||
1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
|
||||
1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
|
||||
1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
|
||||
1923..1926 ; ALetter # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
|
||||
1929..192B ; ALetter # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
|
||||
1930..1931 ; ALetter # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
|
||||
1933..1938 ; ALetter # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
|
||||
1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
|
||||
1A19..1A1B ; ALetter # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
|
||||
1B04 ; ALetter # Mc BALINESE SIGN BISAH
|
||||
1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
|
||||
1B35 ; ALetter # Mc BALINESE VOWEL SIGN TEDUNG
|
||||
1B3B ; ALetter # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
|
||||
1B3D..1B41 ; ALetter # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
|
||||
1B43 ; ALetter # Mc BALINESE VOWEL SIGN PEPET TEDUNG
|
||||
1B45..1B4B ; ALetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK
|
||||
1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
|
||||
1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
|
||||
1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
|
||||
1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN
|
||||
1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
|
||||
1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
|
||||
1E00..1E9B ; ALetter # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE
|
||||
1EA0..1EF9 ; ALetter # L& [90] LATIN CAPITAL LETTER A WITH DOT BELOW..LATIN SMALL LETTER Y WITH TILDE
|
||||
1F00..1F15 ; ALetter # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
|
||||
1F18..1F1D ; ALetter # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
|
||||
1F20..1F45 ; ALetter # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
|
||||
1F48..1F4D ; ALetter # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
|
||||
1F50..1F57 ; ALetter # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
|
||||
1F59 ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA
|
||||
1F5B ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
|
||||
1F5D ; ALetter # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
|
||||
1F5F..1F7D ; ALetter # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
|
||||
1F80..1FB4 ; ALetter # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FB6..1FBC ; ALetter # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
1FBE ; ALetter # L& GREEK PROSGEGRAMMENI
|
||||
1FC2..1FC4 ; ALetter # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FC6..1FCC ; ALetter # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
1FD0..1FD3 ; ALetter # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
1FD6..1FDB ; ALetter # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
|
||||
1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
|
||||
1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I
|
||||
207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N
|
||||
2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
|
||||
2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C
|
||||
2107 ; ALetter # L& EULER CONSTANT
|
||||
210A..2113 ; ALetter # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
|
||||
2115 ; ALetter # L& DOUBLE-STRUCK CAPITAL N
|
||||
2119..211D ; ALetter # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
|
||||
2124 ; ALetter # L& DOUBLE-STRUCK CAPITAL Z
|
||||
2126 ; ALetter # L& OHM SIGN
|
||||
2128 ; ALetter # L& BLACK-LETTER CAPITAL Z
|
||||
212A..212D ; ALetter # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C
|
||||
212F..2134 ; ALetter # L& [6] SCRIPT SMALL E..SCRIPT SMALL O
|
||||
2135..2138 ; ALetter # Lo [4] ALEF SYMBOL..DALET SYMBOL
|
||||
2139 ; ALetter # L& INFORMATION SOURCE
|
||||
213C..213F ; ALetter # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
|
||||
2145..2149 ; ALetter # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
|
||||
214E ; ALetter # L& TURNED SMALL F
|
||||
2160..2182 ; ALetter # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
|
||||
2183..2184 ; ALetter # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
|
||||
24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
|
||||
2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
|
||||
2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
|
||||
2C60..2C6C ; ALetter # L& [13] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SMALL LETTER Z WITH DESCENDER
|
||||
2C74..2C77 ; ALetter # L& [4] LATIN SMALL LETTER V WITH CURL..LATIN SMALL LETTER TAILLESS PHI
|
||||
2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
|
||||
2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
|
||||
2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
|
||||
2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
|
||||
2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
|
||||
2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
|
||||
2DA8..2DAE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
|
||||
2DB0..2DB6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
|
||||
2DB8..2DBE ; ALetter # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
|
||||
2DC0..2DC6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
|
||||
2DC8..2DCE ; ALetter # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
|
||||
2DD0..2DD6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
|
||||
2DD8..2DDE ; ALetter # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
|
||||
3005 ; ALetter # Lm IDEOGRAPHIC ITERATION MARK
|
||||
303B ; ALetter # Lm VERTICAL IDEOGRAPHIC ITERATION MARK
|
||||
303C ; ALetter # Lo MASU MARK
|
||||
3105..312C ; ALetter # Lo [40] BOPOMOFO LETTER B..BOPOMOFO LETTER GN
|
||||
3131..318E ; ALetter # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
|
||||
31A0..31B7 ; ALetter # Lo [24] BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
|
||||
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
|
||||
A015 ; ALetter # Lm YI SYLLABLE WU
|
||||
A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
|
||||
A717..A71A ; ALetter # Lm [4] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOWER RIGHT CORNER ANGLE
|
||||
A800..A801 ; ALetter # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I
|
||||
A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O
|
||||
A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO
|
||||
A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
|
||||
A823..A824 ; ALetter # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
|
||||
A827 ; ALetter # Mc SYLOTI NAGRI VOWEL SIGN OO
|
||||
A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
|
||||
AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
|
||||
FA30..FA6A ; ALetter # Lo [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
|
||||
FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
|
||||
FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
|
||||
FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ
|
||||
FB1F..FB28 ; ALetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
|
||||
FB2A..FB36 ; ALetter # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
|
||||
FB38..FB3C ; ALetter # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
|
||||
FB3E ; ALetter # Lo HEBREW LETTER MEM WITH DAGESH
|
||||
FB40..FB41 ; ALetter # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
|
||||
FB43..FB44 ; ALetter # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
|
||||
FB46..FBB1 ; ALetter # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
|
||||
FBD3..FD3D ; ALetter # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
|
||||
FD50..FD8F ; ALetter # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
|
||||
FD92..FDC7 ; ALetter # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
|
||||
FDF0..FDFB ; ALetter # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
|
||||
FE70..FE74 ; ALetter # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
|
||||
FE76..FEFC ; ALetter # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
|
||||
FF21..FF3A ; ALetter # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
FF41..FF5A ; ALetter # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
|
||||
FFA0..FFBE ; ALetter # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
|
||||
FFC2..FFC7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
|
||||
FFCA..FFCF ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
|
||||
FFD2..FFD7 ; ALetter # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
|
||||
FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
|
||||
10000..1000B ; ALetter # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
|
||||
1000D..10026 ; ALetter # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
|
||||
10028..1003A ; ALetter # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
|
||||
1003C..1003D ; ALetter # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
|
||||
1003F..1004D ; ALetter # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
|
||||
10050..1005D ; ALetter # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
|
||||
10080..100FA ; ALetter # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
|
||||
10140..10174 ; ALetter # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
|
||||
10300..1031E ; ALetter # Lo [31] OLD ITALIC LETTER A..OLD ITALIC LETTER UU
|
||||
10330..10340 ; ALetter # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
|
||||
10341 ; ALetter # Nl GOTHIC LETTER NINETY
|
||||
10342..10349 ; ALetter # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
|
||||
1034A ; ALetter # Nl GOTHIC LETTER NINE HUNDRED
|
||||
10380..1039D ; ALetter # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU
|
||||
103A0..103C3 ; ALetter # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
|
||||
103C8..103CF ; ALetter # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
|
||||
103D1..103D5 ; ALetter # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED
|
||||
10400..1044F ; ALetter # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW
|
||||
10450..1049D ; ALetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
|
||||
10800..10805 ; ALetter # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
|
||||
10808 ; ALetter # Lo CYPRIOT SYLLABLE JO
|
||||
1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
|
||||
10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
|
||||
1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA
|
||||
1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO
|
||||
10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
|
||||
10A00 ; ALetter # Lo KHAROSHTHI LETTER A
|
||||
10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
|
||||
10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
|
||||
10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
|
||||
12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
|
||||
12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
|
||||
1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
|
||||
1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
|
||||
1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
|
||||
1D4A2 ; ALetter # L& MATHEMATICAL SCRIPT CAPITAL G
|
||||
1D4A5..1D4A6 ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
|
||||
1D4A9..1D4AC ; ALetter # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
|
||||
1D4AE..1D4B9 ; ALetter # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
|
||||
1D4BB ; ALetter # L& MATHEMATICAL SCRIPT SMALL F
|
||||
1D4BD..1D4C3 ; ALetter # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
|
||||
1D4C5..1D505 ; ALetter # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
|
||||
1D507..1D50A ; ALetter # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
|
||||
1D50D..1D514 ; ALetter # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
|
||||
1D516..1D51C ; ALetter # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
|
||||
1D51E..1D539 ; ALetter # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
|
||||
1D53B..1D53E ; ALetter # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
|
||||
1D540..1D544 ; ALetter # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
|
||||
1D546 ; ALetter # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O
|
||||
1D54A..1D550 ; ALetter # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
|
||||
1D552..1D6A5 ; ALetter # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
|
||||
1D6A8..1D6C0 ; ALetter # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
|
||||
1D6C2..1D6DA ; ALetter # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
|
||||
1D6DC..1D6FA ; ALetter # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
|
||||
1D6FC..1D714 ; ALetter # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
|
||||
1D716..1D734 ; ALetter # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
|
||||
1D736..1D74E ; ALetter # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
|
||||
1D750..1D76E ; ALetter # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
|
||||
1D770..1D788 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
|
||||
1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
|
||||
1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
|
||||
1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
|
||||
|
||||
# Total code points: 21149
|
||||
|
||||
# ================================================
|
||||
|
||||
0027 ; MidLetter # Po APOSTROPHE
|
||||
003A ; MidLetter # Po COLON
|
||||
00B7 ; MidLetter # Po MIDDLE DOT
|
||||
05F4 ; MidLetter # Po HEBREW PUNCTUATION GERSHAYIM
|
||||
2019 ; MidLetter # Pf RIGHT SINGLE QUOTATION MARK
|
||||
2027 ; MidLetter # Po HYPHENATION POINT
|
||||
|
||||
# Total code points: 6
|
||||
|
||||
# ================================================
|
||||
|
||||
002C ; MidNum # Po COMMA
|
||||
002E ; MidNum # Po FULL STOP
|
||||
003B ; MidNum # Po SEMICOLON
|
||||
037E ; MidNum # Po GREEK QUESTION MARK
|
||||
0589 ; MidNum # Po ARMENIAN FULL STOP
|
||||
060D ; MidNum # Po ARABIC DATE SEPARATOR
|
||||
07F8 ; MidNum # Po NKO COMMA
|
||||
2044 ; MidNum # Sm FRACTION SLASH
|
||||
FE10 ; MidNum # Po PRESENTATION FORM FOR VERTICAL COMMA
|
||||
FE13..FE14 ; MidNum # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON
|
||||
|
||||
# Total code points: 11
|
||||
|
||||
# ================================================
|
||||
|
||||
0030..0039 ; Numeric # Nd [10] DIGIT ZERO..DIGIT NINE
|
||||
0660..0669 ; Numeric # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
066B..066C ; Numeric # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR
|
||||
06F0..06F9 ; Numeric # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
07C0..07C9 ; Numeric # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE
|
||||
0966..096F ; Numeric # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
09E6..09EF ; Numeric # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
|
||||
0A66..0A6F ; Numeric # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
|
||||
0AE6..0AEF ; Numeric # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
|
||||
0B66..0B6F ; Numeric # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE
|
||||
0BE6..0BEF ; Numeric # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
|
||||
0C66..0C6F ; Numeric # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
|
||||
0CE6..0CEF ; Numeric # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
0D66..0D6F ; Numeric # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
|
||||
0E50..0E59 ; Numeric # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
|
||||
0ED0..0ED9 ; Numeric # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
|
||||
0F20..0F29 ; Numeric # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
|
||||
1040..1049 ; Numeric # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
|
||||
17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
|
||||
1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
|
||||
1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
|
||||
19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
|
||||
1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
|
||||
104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
|
||||
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
|
||||
|
||||
# Total code points: 282
|
||||
|
||||
# ================================================
|
||||
|
||||
005F ; ExtendNumLet # Pc LOW LINE
|
||||
203F..2040 ; ExtendNumLet # Pc [2] UNDERTIE..CHARACTER TIE
|
||||
2054 ; ExtendNumLet # Pc INVERTED UNDERTIE
|
||||
FE33..FE34 ; ExtendNumLet # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
|
||||
FE4D..FE4F ; ExtendNumLet # Pc [3] DASHED LOW LINE..WAVY LOW LINE
|
||||
FF3F ; ExtendNumLet # Pc FULLWIDTH LOW LINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# EOF
|
|
@ -1,80 +0,0 @@
|
|||
#!/usr/bin/env ikarus --r6rs-script
|
||||
|
||||
(import
|
||||
(ikarus)
|
||||
(unicode-data))
|
||||
|
||||
(define (hex->num x)
|
||||
(read (open-input-string (format "#x~a" x))))
|
||||
|
||||
(define data-case
|
||||
(lambda (fields)
|
||||
(let ([num (car fields)]
|
||||
[uc (list-ref fields uc-index)]
|
||||
[lc (list-ref fields lc-index)]
|
||||
[tc (list-ref fields tc-index)])
|
||||
(let ([n (hex->num num)])
|
||||
(define (f x)
|
||||
(if (string=? x "") 0 (- (hex->num x) n)))
|
||||
(cons n (vector (f uc) (f lc) (f tc) #f))))))
|
||||
|
||||
(define (remove-dups ls)
|
||||
(let f ([ls ls] [last #f])
|
||||
(cond
|
||||
[(null? ls) '()]
|
||||
[(equal? (cdar ls) last) (f (cdr ls) last)]
|
||||
[else
|
||||
(cons (car ls) (f (cdr ls) (cdar ls)))])))
|
||||
|
||||
(define (compute-foldcase ls)
|
||||
(define (find-vec idx)
|
||||
(cond
|
||||
[(assq idx ls) => cdr]
|
||||
[else (error 'find-vec "~s is missing" idx)]))
|
||||
(define (upper i)
|
||||
(+ i (vector-ref (find-vec i) 0)))
|
||||
(define (lower i)
|
||||
(+ i (vector-ref (find-vec i) 1)))
|
||||
(define (set-folder! i j)
|
||||
(vector-set! (find-vec i) 3 (- j i)))
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(let ([idx (car x)] [vec (cdr x)])
|
||||
(vector-set! vec 3
|
||||
(- (lower (upper idx)) idx))))
|
||||
ls)
|
||||
(for-each
|
||||
(lambda (idx)
|
||||
(let ([vec (find-vec idx)])
|
||||
(vector-set! vec 3 0)))
|
||||
;; turkic chars
|
||||
'(#x130 #x131))
|
||||
ls)
|
||||
|
||||
(define uc-index 12)
|
||||
(define lc-index 13)
|
||||
(define tc-index 14)
|
||||
|
||||
(let ([ls
|
||||
(remove-dups
|
||||
(compute-foldcase
|
||||
(map data-case
|
||||
(get-unicode-data))))])
|
||||
(define (p name idx)
|
||||
(pretty-print
|
||||
`(define ,name
|
||||
',(list->vector (map (lambda (x) (vector-ref (cdr x) idx)) ls)))))
|
||||
(let ([v0 (list->vector (map car ls))])
|
||||
(with-output-to-file "unicode-char-cases.ss"
|
||||
(lambda ()
|
||||
(printf ";;; DO NOT EDIT\n;;; automatically generated\n")
|
||||
(printf ";;; ~s entries in table\n" (vector-length v0))
|
||||
(pretty-print `(define charcase-search-vector ',v0))
|
||||
(p 'char-upcase-adjustment-vector 0)
|
||||
(p 'char-downcase-adjustment-vector 1)
|
||||
(p 'char-titlecase-adjustment-vector 2)
|
||||
(p 'char-foldcase-adjustment-vector 3))
|
||||
'replace)))
|
||||
|
||||
|
||||
(printf "Happy Happy Joy Joy\n")
|
|
@ -68,7 +68,7 @@
|
|||
|
||||
|
||||
|
||||
(let ([ls (map cat (get-unicode-data))])
|
||||
(let ([ls (map cat (get-unicode-data "UNIDATA/UnicodeData.txt"))])
|
||||
(let ([wanted
|
||||
(codes-in-cats ls
|
||||
'(Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pd Pc Po Sc Sm Sk So Co))])
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
#!/usr/bin/env ikarus --r6rs-script
|
||||
|
||||
(import
|
||||
(ikarus)
|
||||
(unicode-data))
|
||||
|
||||
(define (hex->num x)
|
||||
(read (open-input-string (format "#x~a" x))))
|
||||
|
||||
(define data-case
|
||||
(lambda (fields)
|
||||
(let ([num (car fields)]
|
||||
[uc (list-ref fields uc-index)]
|
||||
[lc (list-ref fields lc-index)]
|
||||
[tc (list-ref fields tc-index)])
|
||||
(let ([n (hex->num num)])
|
||||
(define (f x)
|
||||
(if (string=? x "") 0 (- (hex->num x) n)))
|
||||
(cons n (vector (f uc) (f lc) (f tc) #f 0))))))
|
||||
|
||||
(define (remove-dups ls)
|
||||
(let f ([ls ls] [last #f])
|
||||
(cond
|
||||
[(null? ls) '()]
|
||||
[(equal? (cdar ls) last) (f (cdr ls) last)]
|
||||
[else
|
||||
(cons (car ls) (f (cdr ls) (cdar ls)))])))
|
||||
|
||||
(define (compute-foldcase ls)
|
||||
(define (find-vec idx)
|
||||
(cond
|
||||
[(assq idx ls) => cdr]
|
||||
[else (error 'find-vec "~s is missing" idx)]))
|
||||
(define (upper i)
|
||||
(+ i (vector-ref (find-vec i) 0)))
|
||||
(define (lower i)
|
||||
(+ i (vector-ref (find-vec i) 1)))
|
||||
(define (set-folder! i j)
|
||||
(vector-set! (find-vec i) 3 (- j i)))
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(let ([idx (car x)] [vec (cdr x)])
|
||||
(vector-set! vec 3
|
||||
(- (lower (upper idx)) idx))))
|
||||
ls)
|
||||
(for-each
|
||||
(lambda (idx)
|
||||
(let ([vec (find-vec idx)])
|
||||
(vector-set! vec 3 0)))
|
||||
;; turkic chars
|
||||
'(#x130 #x131))
|
||||
ls)
|
||||
|
||||
(define uc-index 12)
|
||||
(define lc-index 13)
|
||||
(define tc-index 14)
|
||||
|
||||
|
||||
(define (remove-spaces str)
|
||||
(cond
|
||||
[(= (string-length str) 0) str]
|
||||
[(char=? (string-ref str 0) #\space)
|
||||
(remove-spaces (substring str 1 (string-length str)))]
|
||||
[else str]))
|
||||
|
||||
(define (split str)
|
||||
(let f ([i 0] [n (string-length str)])
|
||||
(cond
|
||||
[(= i n) (list (substring str 0 n))]
|
||||
[(char=? (string-ref str i) #\space)
|
||||
(cons (substring str 0 i)
|
||||
(split (substring str (+ i 1) n)))]
|
||||
[else (f (add1 i) n)])))
|
||||
|
||||
(define (improperize ls)
|
||||
(cond
|
||||
[(null? (cdr ls)) (car ls)]
|
||||
[else (cons (car ls) (improperize (cdr ls)))]))
|
||||
|
||||
(define (convert-full-fold-fields ls)
|
||||
(cond
|
||||
[(null? ls) '()]
|
||||
[else
|
||||
(let ([fields (car ls)])
|
||||
(let ([cat (remove-spaces (cadr fields))])
|
||||
(cond
|
||||
[(member cat '("C" "F"))
|
||||
(let ([n (hex->num (remove-spaces (car fields)))])
|
||||
(let ([c* (map hex->num
|
||||
(map remove-spaces
|
||||
(split
|
||||
(remove-spaces (caddr fields)))))])
|
||||
(cons
|
||||
(cons n
|
||||
(if (= (length c*) 1)
|
||||
(- (car c*) n)
|
||||
(improperize (map integer->char c*))))
|
||||
(convert-full-fold-fields (cdr ls)))))]
|
||||
[else (convert-full-fold-fields (cdr ls))])))]))
|
||||
|
||||
|
||||
#;
|
||||
(define (convert-index ls)
|
||||
(let ([alist
|
||||
(let f ([i 0] [ls ls])
|
||||
(cond
|
||||
[(null? ls) '()]
|
||||
[else
|
||||
(cons (cons (car ls) (- i (car ls)))
|
||||
(f (add1 i) (cdr ls)))]))])
|
||||
(map
|
||||
(lambda (x)
|
||||
(cons (car x) (+ (car x) (cdr x))))
|
||||
(remove-dups alist))))
|
||||
|
||||
(let ([ls
|
||||
(compute-foldcase
|
||||
(map data-case
|
||||
(get-unicode-data "UNIDATA/UnicodeData.txt")))])
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(let ([n (car x)] [chars (cdr x)])
|
||||
(cond
|
||||
[(assq n ls) =>
|
||||
(lambda (p)
|
||||
(vector-set! (cdr p) 4 chars))]
|
||||
[else (error #f "~s is not there" n)])))
|
||||
(convert-full-fold-fields
|
||||
(get-unicode-data "UNIDATA/CaseFolding.txt")))
|
||||
(let ([ls (remove-dups ls)])
|
||||
(define (p name idx)
|
||||
(pretty-print
|
||||
`(define ,name
|
||||
',(list->vector (map (lambda (x) (vector-ref (cdr x) idx)) ls)))))
|
||||
(parameterize ([print-unicode #f])
|
||||
(let ([v0 (list->vector (map car ls))])
|
||||
(with-output-to-file "unicode-char-cases.ss"
|
||||
(lambda ()
|
||||
(printf ";;; DO NOT EDIT\n;;; automatically generated\n")
|
||||
#;
|
||||
(let ([ls (convert-index (map car ls))])
|
||||
(pretty-print
|
||||
`(define char-search-index-vector
|
||||
',(list->vector (map car ls))))
|
||||
(pretty-print
|
||||
`(define char-adjustment-index-vector
|
||||
',(list->vector (map cdr ls)))))
|
||||
(printf ";;; ~s entries in table\n" (vector-length v0))
|
||||
(pretty-print `(define charcase-search-vector ',v0))
|
||||
(p 'char-upcase-adjustment-vector 0)
|
||||
(p 'char-downcase-adjustment-vector 1)
|
||||
(p 'char-titlecase-adjustment-vector 2)
|
||||
(p 'char-foldcase-adjustment-vector 3)
|
||||
(p 'string-foldcase-adjustment-vector 4)
|
||||
)
|
||||
'replace)))))
|
||||
|
||||
|
||||
(printf "Happy Happy Joy Joy\n")
|
|
@ -1,6 +1,6 @@
|
|||
;;; DO NOT EDIT
|
||||
;;; automatically generated
|
||||
;;; 1080 entries in table
|
||||
;;; 1153 entries in table
|
||||
(define charcase-search-vector
|
||||
'#(0 65 91 97 123 181 182 192 215 216 223 224 247 248 255 256 257 258 259 260
|
||||
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
|
||||
|
@ -37,40 +37,45 @@
|
|||
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
|
||||
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
|
||||
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
|
||||
1297 1298 1299 1329 1369 1377 1415 4256 4304 7549 7550 7680 7681 7682 7683
|
||||
7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698
|
||||
7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713
|
||||
7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728
|
||||
7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743
|
||||
7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758
|
||||
7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773
|
||||
7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788
|
||||
7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803
|
||||
7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818
|
||||
7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7835 7840 7841
|
||||
7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856
|
||||
7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871
|
||||
7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886
|
||||
7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901
|
||||
7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916
|
||||
7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7936 7944
|
||||
7952 7960 7968 7976 7984 7992 8000 8008 8016 8017 8018 8019 8020 8021 8022
|
||||
8023 8025 8032 8040 8048 8050 8054 8056 8058 8060 8064 8072 8080 8088 8096
|
||||
8104 8112 8114 8115 8116 8120 8122 8124 8125 8126 8127 8131 8132 8136 8140
|
||||
8141 8144 8146 8152 8154 8157 8160 8162 8165 8166 8168 8170 8172 8173 8179
|
||||
8180 8184 8186 8188 8189 8486 8487 8490 8491 8492 8498 8499 8526 8531 8544
|
||||
8560 8576 8579 8580 8592 9398 9424 9450 11264 11312 11360 11361 11362 11363
|
||||
11364 11365 11366 11367 11368 11369 11370 11371 11372 11380 11381 11382
|
||||
11383 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402
|
||||
11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413 11414
|
||||
11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425 11426
|
||||
11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438
|
||||
11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449 11450
|
||||
11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461 11462
|
||||
11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473 11474
|
||||
11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486
|
||||
11487 11488 11489 11490 11491 11492 11520 11568 65313 65339 65345 65371
|
||||
66560 66600 66640))
|
||||
1297 1298 1299 1329 1369 1377 1415 1417 4256 4304 7549 7550 7680 7681 7682
|
||||
7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697
|
||||
7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712
|
||||
7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727
|
||||
7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742
|
||||
7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757
|
||||
7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772
|
||||
7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787
|
||||
7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802
|
||||
7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817
|
||||
7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832
|
||||
7833 7834 7835 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851
|
||||
7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866
|
||||
7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881
|
||||
7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896
|
||||
7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911
|
||||
7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926
|
||||
7927 7928 7929 7936 7944 7952 7960 7968 7976 7984 7992 8000 8008 8016 8017
|
||||
8018 8019 8020 8021 8022 8023 8025 8032 8040 8048 8050 8054 8056 8058 8060
|
||||
8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078
|
||||
8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093
|
||||
8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108
|
||||
8109 8110 8111 8112 8114 8115 8116 8118 8119 8120 8122 8124 8125 8126 8127
|
||||
8130 8131 8132 8134 8135 8136 8140 8141 8144 8146 8147 8150 8151 8152 8154
|
||||
8157 8160 8162 8163 8164 8165 8166 8167 8168 8170 8172 8173 8178 8179 8180
|
||||
8182 8183 8184 8186 8188 8189 8486 8487 8490 8491 8492 8498 8499 8526 8531
|
||||
8544 8560 8576 8579 8580 8592 9398 9424 9450 11264 11312 11360 11361 11362
|
||||
11363 11364 11365 11366 11367 11368 11369 11370 11371 11372 11380 11381
|
||||
11382 11383 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401
|
||||
11402 11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413
|
||||
11414 11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425
|
||||
11426 11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437
|
||||
11438 11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449
|
||||
11450 11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461
|
||||
11462 11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473
|
||||
11474 11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485
|
||||
11486 11487 11488 11489 11490 11491 11492 11520 11568 64256 64257 64258
|
||||
64259 64260 64261 64275 64276 64277 64278 64279 64285 65313 65339 65345
|
||||
65371 66560 66600 66640))
|
||||
(define char-upcase-adjustment-vector
|
||||
'#(0 0 0 -32 0 743 0 0 0 0 0 -32 0 -32 121 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
|
@ -93,22 +98,24 @@
|
|||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 -15 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -48 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -48 0 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 0 0 -59 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0
|
||||
8 0 8 0 0 8 0 8 0 8 0 8 0 8 0 74 86 100 128 112 126 8 8 8 8 8 8 8 8 0 0 0 0
|
||||
0 0 0 0 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 8 0
|
||||
9 0 0 0 0 0 0 0 -7205 0 0 9 0 0 0 0 0 0 8 0 0 0 0 0 0 0 8 0 0 0 7 0 0 0 0 0
|
||||
0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16 0 0 -1 0 0 -26 0 0 -48 0 -1 0
|
||||
0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -59 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0
|
||||
8 0 8 0 8 0 8 0 8 0 74 86 100 128 112 126 8 0 8 0 8 0 8 0 9 0 0 0 0 0 -7205
|
||||
0 9 0 0 0 0 8 0 0 0 0 8 0 7 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16
|
||||
0 0 -1 0 0 -26 0 0 -48 0 -1 0 0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -7264 0 0 0 -32 0 0 -40 0))
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -7264
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -32 0 0 -40 0))
|
||||
(define char-downcase-adjustment-vector
|
||||
'#(0 32 0 0 0 0 0 32 0 32 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -199 0 1 0 1 0 1 0 0
|
||||
|
@ -127,20 +134,22 @@
|
|||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 7264 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0 -8 0 0 0 0 0 0 0 -8 0 -8 0 -8
|
||||
0 0 0 0 -8 -74 -9 0 0 0 0 0 -86 -9 0 0 0 -8 -100 0 0 0 0 0 -8 -112 -7 0 0 0
|
||||
-128 -126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 0 7264 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0 -8 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8
|
||||
0 0 0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 -8 -74 -9 0 0 0 0 0 0 0
|
||||
0 -86 -9 0 0 0 0 0 0 -8 -100 0 0 0 0 0 0 0 0 -8 -112 -7 0 0 0 0 0 0 -128
|
||||
-126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0
|
||||
-10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 0 0 0 32 0 0 0 40 0 0))
|
||||
1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 32 0 0 0 40 0 0))
|
||||
(define char-titlecase-adjustment-vector
|
||||
'#(0 0 0 -32 0 743 0 0 0 0 0 -32 0 -32 121 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
|
@ -163,22 +172,24 @@
|
|||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -15 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 0 -48 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 0 -48 0 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 0 0 -59 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -59 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0 8 0 8 0 8 0 8 0
|
||||
8 0 74 86 100 128 112 126 8 0 8 0 8 0 8 0 9 0 0 0 0 0 -7205 0 9 0 0 0 0 8 0
|
||||
0 0 0 8 0 7 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16 0 0 -1 0 0 -26
|
||||
0 0 -48 0 -1 0 0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -7264 0 0 0 -32 0 0 -40 0))
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0 8 0 8
|
||||
0 8 0 8 0 8 0 74 86 100 128 112 126 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 8 8 8 8
|
||||
8 8 8 8 0 0 0 0 0 0 0 0 8 8 8 8 8 8 8 8 0 0 0 0 0 0 0 0 8 0 9 0 0 0 0 0 0 0
|
||||
-7205 0 0 9 0 0 0 0 0 0 8 0 0 0 0 0 0 0 8 0 0 0 7 0 0 0 0 0 0 0 9 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 -28 0 0 -16 0 0 -1 0 0 -26 0 0 -48 0 -1 0 0 0 -10795
|
||||
-10792 0 -1 0 -1 0 -1 0 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -7264 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 -32 0 0 -40 0))
|
||||
(define char-foldcase-adjustment-vector
|
||||
'#(0 32 0 0 0 775 0 32 0 32 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 1 0 1 0 0 1
|
||||
|
@ -197,17 +208,79 @@
|
|||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 7264 0 0 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 0 7264 0 0 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0
|
||||
-58 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8
|
||||
0 -8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 0 0 -8
|
||||
-8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 0 0 -8 -8 -8 -8 -8 -8 -8 -8 0 0 0 0 0 0 -8
|
||||
-74 -9 0 -7173 0 0 0 0 0 0 -86 -9 0 0 0 0 0 0 -8 -100 0 0 0 0 0 0 0 0 -8
|
||||
-112 -7 0 0 0 0 0 0 -128 -126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0 0 1
|
||||
0 0 26 0 0 48 0 1 0 -10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 32 0 0 0
|
||||
40 0 0))
|
||||
(define string-foldcase-adjustment-vector
|
||||
'#(0 32 0 0 0 775 0 32 0 32 (#\s . #\s) 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
(#\i . #\x307) 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
(#\x2BC . #\n) 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -121 1 0 1 0 1 0 -268 0 210 1 0 1 0 206 1 0
|
||||
205 1 0 0 79 202 203 1 0 205 207 0 211 209 1 0 0 0 211 213 0 214 1 0 1 0 1
|
||||
0 218 1 0 218 0 1 0 218 1 0 217 1 0 1 0 219 1 0 0 1 0 0 0 0 2 1 0 2 1 0 2 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
(#\j . #\x30C) 2 1 0 1 0 -97 -56 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -130 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 0 10795 1 0 -163 10792 0 1 0 -195 69 71 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 116 0 0 0
|
||||
38 0 37 64 63 (#\x3B9 #\x308 . #\x301) 32 0 0 (#\x3C5 #\x308 . #\x301) 0 1
|
||||
0 0 0 -30 -25 0 -15 -22 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
-54 -48 0 0 -60 -64 0 1 0 -7 1 0 0 -130 80 32 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 -58 1 0 1 0
|
||||
0 1 0 1 0 1 0 1 0 48 0 0 (#\x565 . #\x582) 0 7264 0 0 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0 -8 0 0 0
|
||||
0 0 0 0 -8 0 -8 0 -8 0 0 0 0 -8 -74 -9 0 -7173 0 0 0 -86 -9 0 0 0 -8 -100 0
|
||||
0 0 0 0 -8 -112 -7 0 0 0 -128 -126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0
|
||||
0 1 0 0 26 0 0 48 0 1 0 -10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 (#\h . #\x331)
|
||||
(#\t . #\x308) (#\w . #\x30A) (#\y . #\x30A) (#\a . #\x2BE) -58 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 32 0 0 0 40 0 0))
|
||||
1 0 1 0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 (#\x3C5 . #\x313) 0
|
||||
(#\x3C5 #\x313 . #\x300) 0 (#\x3C5 #\x313 . #\x301) 0
|
||||
(#\x3C5 #\x313 . #\x342) 0 -8 0 -8 0 0 0 0 0 0 (#\x1F00 . #\x3B9)
|
||||
(#\x1F01 . #\x3B9) (#\x1F02 . #\x3B9) (#\x1F03 . #\x3B9) (#\x1F04 . #\x3B9)
|
||||
(#\x1F05 . #\x3B9) (#\x1F06 . #\x3B9) (#\x1F07 . #\x3B9) (#\x1F00 . #\x3B9)
|
||||
(#\x1F01 . #\x3B9) (#\x1F02 . #\x3B9) (#\x1F03 . #\x3B9) (#\x1F04 . #\x3B9)
|
||||
(#\x1F05 . #\x3B9) (#\x1F06 . #\x3B9) (#\x1F07 . #\x3B9) (#\x1F20 . #\x3B9)
|
||||
(#\x1F21 . #\x3B9) (#\x1F22 . #\x3B9) (#\x1F23 . #\x3B9) (#\x1F24 . #\x3B9)
|
||||
(#\x1F25 . #\x3B9) (#\x1F26 . #\x3B9) (#\x1F27 . #\x3B9) (#\x1F20 . #\x3B9)
|
||||
(#\x1F21 . #\x3B9) (#\x1F22 . #\x3B9) (#\x1F23 . #\x3B9) (#\x1F24 . #\x3B9)
|
||||
(#\x1F25 . #\x3B9) (#\x1F26 . #\x3B9) (#\x1F27 . #\x3B9) (#\x1F60 . #\x3B9)
|
||||
(#\x1F61 . #\x3B9) (#\x1F62 . #\x3B9) (#\x1F63 . #\x3B9) (#\x1F64 . #\x3B9)
|
||||
(#\x1F65 . #\x3B9) (#\x1F66 . #\x3B9) (#\x1F67 . #\x3B9) (#\x1F60 . #\x3B9)
|
||||
(#\x1F61 . #\x3B9) (#\x1F62 . #\x3B9) (#\x1F63 . #\x3B9) (#\x1F64 . #\x3B9)
|
||||
(#\x1F65 . #\x3B9) (#\x1F66 . #\x3B9) (#\x1F67 . #\x3B9) 0
|
||||
(#\x1F70 . #\x3B9) (#\x3B1 . #\x3B9) (#\x3AC . #\x3B9) (#\x3B1 . #\x342)
|
||||
(#\x3B1 #\x342 . #\x3B9) -8 -74 (#\x3B1 . #\x3B9) 0 -7173 0
|
||||
(#\x1F74 . #\x3B9) (#\x3B7 . #\x3B9) (#\x3AE . #\x3B9) (#\x3B7 . #\x342)
|
||||
(#\x3B7 #\x342 . #\x3B9) -86 (#\x3B7 . #\x3B9) 0 0 (#\x3B9 #\x308 . #\x300)
|
||||
(#\x3B9 #\x308 . #\x301) (#\x3B9 . #\x342) (#\x3B9 #\x308 . #\x342) -8 -100
|
||||
0 0 (#\x3C5 #\x308 . #\x300) (#\x3C5 #\x308 . #\x301) (#\x3C1 . #\x313) 0
|
||||
(#\x3C5 . #\x342) (#\x3C5 #\x308 . #\x342) -8 -112 -7 0 (#\x1F7C . #\x3B9)
|
||||
(#\x3C9 . #\x3B9) (#\x3CE . #\x3B9) (#\x3C9 . #\x342)
|
||||
(#\x3C9 #\x342 . #\x3B9) -128 -126 (#\x3C9 . #\x3B9) 0 -7517 0 -8383 -8262
|
||||
0 28 0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0 -10743 -3814 -10727 0 0 1 0 1 0 1 0
|
||||
0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 (#\f . #\f)
|
||||
(#\f . #\i) (#\f . #\l) (#\f #\f . #\i) (#\f #\f . #\l) (#\s . #\t)
|
||||
(#\x574 . #\x576) (#\x574 . #\x565) (#\x574 . #\x56B) (#\x57E . #\x576)
|
||||
(#\x574 . #\x56D) 0 32 0 0 0 40 0 0))
|
||||
|
|
|
@ -13,23 +13,24 @@
|
|||
(eof-object)
|
||||
(list->string (reverse ac)))]
|
||||
[(char=? x #\newline)
|
||||
(if (null? ac) (f) (list->string (reverse ac)))]
|
||||
(if (null? ac) (f '()) (list->string (reverse ac)))]
|
||||
[else (f (cons x ac))]))))
|
||||
|
||||
(define (find-semi str i n)
|
||||
(define (find-semi/hash str i n)
|
||||
(cond
|
||||
[(or (fx= i n)
|
||||
(char=? (string-ref str i) #\;)) i]
|
||||
[else (find-semi str (+ i 1) n)]))
|
||||
[(or (fx= i n) (memv (string-ref str i) '(#\; #\#))) i]
|
||||
[else (find-semi/hash str (+ i 1) n)]))
|
||||
|
||||
(define (split str)
|
||||
(let f ([i 0] [n (string-length str)])
|
||||
(cond
|
||||
[(= i n) '("")]
|
||||
[(or (= i n) (memv (string-ref str i) '(#\#)))
|
||||
'("")]
|
||||
[else
|
||||
(let ([j (find-semi str i n)])
|
||||
(let ([j (find-semi/hash str i n)])
|
||||
(cond
|
||||
[(= j n) (list (substring str i j))]
|
||||
[(or (= j n) (memv (string-ref str i) '(#\#)))
|
||||
(list (substring str i j))]
|
||||
[else
|
||||
(cons (substring str i j)
|
||||
(f (+ j 1) n))]))])))
|
||||
|
@ -42,9 +43,11 @@
|
|||
(reverse ls)]
|
||||
[else
|
||||
(let ([fields (split line)])
|
||||
(f (cons fields ls)))]))))
|
||||
(if (or (null? fields) (equal? fields '("")))
|
||||
(f ls)
|
||||
(f (cons fields ls))))]))))
|
||||
|
||||
(define (get-unicode-data)
|
||||
(define (get-unicode-data filename)
|
||||
(with-input-from-file
|
||||
"UNIDATA/UnicodeData.txt"
|
||||
filename
|
||||
extract-uni-data)))
|
||||
|
|
Loading…
Reference in New Issue