/* utf.c - UTF-8 support code ; ; Copyright (c) 2022-2022, The CHICKEN Team ; All rights reserved. ; ; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following ; conditions are met: ; ; Redistributions of source code must retain the above copyright notice, this list of conditions and the following ; disclaimer. ; Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following ; disclaimer in the documentation and/or other materials provided with the distribution. ; Neither the name of the author nor the names of its contributors may be used to endorse or promote ; products derived from this software without specific prior written permission. ; ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS ; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR ; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ; POSSIBILITY OF SUCH DAMAGE. */ #include "chicken.h" #if defined(_WIN32) && !defined(__CYGWIN__) # include #endif /* partially generated by mkrunetype.awk from http://git.suckless.org/ubase/ see LICENSE for more information */ #define nelem(x) (sizeof (x) / sizeof *(x)) static int rune1cmp(const void *v1, const void *v2) { int r1 = *(int *)v1, r2 = *(int *)v2; return r1 - r2; } static int rune2cmp(const void *v1, const void *v2) { int r = *(int *)v1, *p = (int *)v2; if(r >= p[0] && r <= p[1]) return 0; else return r - p[0]; } static int runemapcmp(const void *v1, const void *v2) { return *(int *)v1 - *(int *)v2; } /* generated by mkrunetype.awk from http://git.suckless.org/ubase/ see LICENSE for more information */ static int upper3[][2] = { { 0x0100, 0x012E }, { 0x0132, 0x0136 }, { 0x0139, 0x0147 }, { 0x014A, 0x0176 }, { 0x0179, 0x017D }, { 0x0182, 0x0184 }, { 0x01A0, 0x01A4 }, { 0x01B3, 0x01B5 }, { 0x01CD, 0x01DB }, { 0x01DE, 0x01EE }, { 0x01F8, 0x021E }, { 0x0222, 0x0232 }, { 0x0246, 0x024E }, { 0x0370, 0x0372 }, { 0x03D8, 0x03EE }, { 0x0460, 0x0480 }, { 0x048A, 0x04BE }, { 0x04C1, 0x04CD }, { 0x04D0, 0x052E }, { 0x1E00, 0x1E94 }, { 0x1EA0, 0x1EFE }, { 0x2C67, 0x2C6B }, { 0x2C80, 0x2CE2 }, { 0x2CEB, 0x2CED }, { 0xA640, 0xA66C }, { 0xA680, 0xA69A }, { 0xA722, 0xA72E }, { 0xA732, 0xA76E }, { 0xA779, 0xA77B }, { 0xA77E, 0xA786 }, { 0xA790, 0xA792 }, { 0xA796, 0xA7A8 }, { 0xA7B4, 0xA7C2 }, { 0xA7C7, 0xA7C9 }, { 0xA7D6, 0xA7D8 }, }; static int upper2[][3] = { { 0x0041, 0x005A, 0x0061 }, { 0x00C0, 0x00D6, 0x00E0 }, { 0x00D8, 0x00DE, 0x00F8 }, { 0x0189, 0x018A, 0x0256 }, { 0x01B1, 0x01B2, 0x028A }, { 0x0388, 0x038A, 0x03AD }, { 0x038E, 0x038F, 0x03CD }, { 0x0391, 0x03A1, 0x03B1 }, { 0x03A3, 0x03AB, 0x03C3 }, { 0x03D2, 0x03D4, 0x03D2 }, { 0x03FD, 0x03FF, 0x037B }, { 0x0400, 0x040F, 0x0450 }, { 0x0410, 0x042F, 0x0430 }, { 0x0531, 0x0556, 0x0561 }, { 0x10A0, 0x10C5, 0x2D00 }, { 0x13A0, 0x13EF, 0xAB70 }, { 0x13F0, 0x13F5, 0x13F8 }, { 0x1C90, 0x1CBA, 0x10D0 }, { 0x1CBD, 0x1CBF, 0x10FD }, { 0x1F08, 0x1F0F, 0x1F00 }, { 0x1F18, 0x1F1D, 0x1F10 }, { 0x1F28, 0x1F2F, 0x1F20 }, { 0x1F38, 0x1F3F, 0x1F30 }, { 0x1F48, 0x1F4D, 0x1F40 }, { 0x1F68, 0x1F6F, 0x1F60 }, { 0x1FB8, 0x1FB9, 0x1FB0 }, { 0x1FBA, 0x1FBB, 0x1F70 }, { 0x1FC8, 0x1FCB, 0x1F72 }, { 0x1FD8, 0x1FD9, 0x1FD0 }, { 0x1FDA, 0x1FDB, 0x1F76 }, { 0x1FE8, 0x1FE9, 0x1FE0 }, { 0x1FEA, 0x1FEB, 0x1F7A }, { 0x1FF8, 0x1FF9, 0x1F78 }, { 0x1FFA, 0x1FFB, 0x1F7C }, { 0x210B, 0x210D, 0x210B }, { 0x2110, 0x2112, 0x2110 }, { 0x2119, 0x211D, 0x2119 }, { 0x212C, 0x212D, 0x212C }, { 0x2130, 0x2131, 0x2130 }, { 0x213E, 0x213F, 0x213E }, { 0x2C00, 0x2C2F, 0x2C30 }, { 0x2C7E, 0x2C7F, 0x023F }, { 0xFF21, 0xFF3A, 0xFF41 }, { 0x10400, 0x10427, 0x10428 }, { 0x104B0, 0x104D3, 0x104D8 }, { 0x10570, 0x1057A, 0x10597 }, { 0x1057C, 0x1058A, 0x105A3 }, { 0x1058C, 0x10592, 0x105B3 }, { 0x10594, 0x10595, 0x105BB }, { 0x10C80, 0x10CB2, 0x10CC0 }, { 0x118A0, 0x118BF, 0x118C0 }, { 0x16E40, 0x16E5F, 0x16E60 }, { 0x1D400, 0x1D419, 0x1D400 }, { 0x1D434, 0x1D44D, 0x1D434 }, { 0x1D468, 0x1D481, 0x1D468 }, { 0x1D49E, 0x1D49F, 0x1D49E }, { 0x1D4A5, 0x1D4A6, 0x1D4A5 }, { 0x1D4A9, 0x1D4AC, 0x1D4A9 }, { 0x1D4AE, 0x1D4B5, 0x1D4AE }, { 0x1D4D0, 0x1D4E9, 0x1D4D0 }, { 0x1D504, 0x1D505, 0x1D504 }, { 0x1D507, 0x1D50A, 0x1D507 }, { 0x1D50D, 0x1D514, 0x1D50D }, { 0x1D516, 0x1D51C, 0x1D516 }, { 0x1D538, 0x1D539, 0x1D538 }, { 0x1D53B, 0x1D53E, 0x1D53B }, { 0x1D540, 0x1D544, 0x1D540 }, { 0x1D54A, 0x1D550, 0x1D54A }, { 0x1D56C, 0x1D585, 0x1D56C }, { 0x1D5A0, 0x1D5B9, 0x1D5A0 }, { 0x1D5D4, 0x1D5ED, 0x1D5D4 }, { 0x1D608, 0x1D621, 0x1D608 }, { 0x1D63C, 0x1D655, 0x1D63C }, { 0x1D670, 0x1D689, 0x1D670 }, { 0x1D6A8, 0x1D6C0, 0x1D6A8 }, { 0x1D6E2, 0x1D6FA, 0x1D6E2 }, { 0x1D71C, 0x1D734, 0x1D71C }, { 0x1D756, 0x1D76E, 0x1D756 }, { 0x1D790, 0x1D7A8, 0x1D790 }, { 0x1E900, 0x1E921, 0x1E922 }, }; static int upper1[][2] = { { 0x0130, 0x0069 }, { 0x0178, 0x00FF }, { 0x0181, 0x0253 }, { 0x0186, 0x0254 }, { 0x0187, 0x0188 }, { 0x018B, 0x018C }, { 0x018E, 0x01DD }, { 0x018F, 0x0259 }, { 0x0190, 0x025B }, { 0x0191, 0x0192 }, { 0x0193, 0x0260 }, { 0x0194, 0x0263 }, { 0x0196, 0x0269 }, { 0x0197, 0x0268 }, { 0x0198, 0x0199 }, { 0x019C, 0x026F }, { 0x019D, 0x0272 }, { 0x019F, 0x0275 }, { 0x01A6, 0x0280 }, { 0x01A7, 0x01A8 }, { 0x01A9, 0x0283 }, { 0x01AC, 0x01AD }, { 0x01AE, 0x0288 }, { 0x01AF, 0x01B0 }, { 0x01B7, 0x0292 }, { 0x01B8, 0x01B9 }, { 0x01BC, 0x01BD }, { 0x01C4, 0x01C6 }, { 0x01C7, 0x01C9 }, { 0x01CA, 0x01CC }, { 0x01F1, 0x01F3 }, { 0x01F4, 0x01F5 }, { 0x01F6, 0x0195 }, { 0x01F7, 0x01BF }, { 0x0220, 0x019E }, { 0x023A, 0x2C65 }, { 0x023B, 0x023C }, { 0x023D, 0x019A }, { 0x023E, 0x2C66 }, { 0x0241, 0x0242 }, { 0x0243, 0x0180 }, { 0x0244, 0x0289 }, { 0x0245, 0x028C }, { 0x0376, 0x0377 }, { 0x037F, 0x03F3 }, { 0x0386, 0x03AC }, { 0x038C, 0x03CC }, { 0x03CF, 0x03D7 }, { 0x03F4, 0x03B8 }, { 0x03F7, 0x03F8 }, { 0x03F9, 0x03F2 }, { 0x03FA, 0x03FB }, { 0x04C0, 0x04CF }, { 0x10C7, 0x2D27 }, { 0x10CD, 0x2D2D }, { 0x1E9E, 0x00DF }, { 0x1F59, 0x1F51 }, { 0x1F5B, 0x1F53 }, { 0x1F5D, 0x1F55 }, { 0x1F5F, 0x1F57 }, { 0x1FEC, 0x1FE5 }, { 0x2102, 0x2102 }, { 0x2107, 0x2107 }, { 0x2115, 0x2115 }, { 0x2124, 0x2124 }, { 0x2126, 0x03C9 }, { 0x2128, 0x2128 }, { 0x212A, 0x006B }, { 0x212B, 0x00E5 }, { 0x2132, 0x214E }, { 0x2133, 0x2133 }, { 0x2145, 0x2145 }, { 0x2183, 0x2184 }, { 0x2C60, 0x2C61 }, { 0x2C62, 0x026B }, { 0x2C63, 0x1D7D }, { 0x2C64, 0x027D }, { 0x2C6D, 0x0251 }, { 0x2C6E, 0x0271 }, { 0x2C6F, 0x0250 }, { 0x2C70, 0x0252 }, { 0x2C72, 0x2C73 }, { 0x2C75, 0x2C76 }, { 0x2CF2, 0x2CF3 }, { 0xA77D, 0x1D79 }, { 0xA78B, 0xA78C }, { 0xA78D, 0x0265 }, { 0xA7AA, 0x0266 }, { 0xA7AB, 0x025C }, { 0xA7AC, 0x0261 }, { 0xA7AD, 0x026C }, { 0xA7AE, 0x026A }, { 0xA7B0, 0x029E }, { 0xA7B1, 0x0287 }, { 0xA7B2, 0x029D }, { 0xA7B3, 0xAB53 }, { 0xA7C4, 0xA794 }, { 0xA7C5, 0x0282 }, { 0xA7C6, 0x1D8E }, { 0xA7D0, 0xA7D1 }, { 0xA7F5, 0xA7F6 }, { 0x1D49C, 0x1D49C }, { 0x1D4A2, 0x1D4A2 }, { 0x1D546, 0x1D546 }, { 0x1D7CA, 0x1D7CA }, }; C_regparm int C_utf_isupper(int r) { int *match; if((match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &rune2cmp))) return !((r - match[0]) % 2); if(bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &rune2cmp)) return 1; if(bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &rune1cmp)) return 1; return 0; } C_regparm int C_utf_char_downcase(int r) { int *match; match = bsearch(&r, upper3, nelem(upper3), sizeof *upper3, &rune2cmp); if (match) return ((r - match[0]) % 2) ? r : r + 1; match = bsearch(&r, upper2, nelem(upper2), sizeof *upper2, &rune2cmp); if (match) return match[2] + (r - match[0]); match = bsearch(&r, upper1, nelem(upper1), sizeof *upper1, &rune1cmp); if (match) return match[1]; return r; } static int lower4[][2] = { { 0x0101, 0x012F }, { 0x0133, 0x0137 }, { 0x013A, 0x0148 }, { 0x014B, 0x0177 }, { 0x017A, 0x017E }, { 0x0183, 0x0185 }, { 0x01A1, 0x01A5 }, { 0x01B4, 0x01B6 }, { 0x01CE, 0x01DC }, { 0x01DF, 0x01EF }, { 0x01F9, 0x021F }, { 0x0223, 0x0233 }, { 0x0247, 0x024F }, { 0x0371, 0x0373 }, { 0x03D9, 0x03EF }, { 0x0461, 0x0481 }, { 0x048B, 0x04BF }, { 0x04C2, 0x04CE }, { 0x04D1, 0x052F }, { 0x1E01, 0x1E95 }, { 0x1EA1, 0x1EFF }, { 0x2C68, 0x2C6C }, { 0x2C81, 0x2CE3 }, { 0x2CEC, 0x2CEE }, { 0xA641, 0xA66D }, { 0xA681, 0xA69B }, { 0xA723, 0xA72F }, { 0xA733, 0xA76F }, { 0xA77A, 0xA77C }, { 0xA77F, 0xA787 }, { 0xA791, 0xA793 }, { 0xA797, 0xA7A9 }, { 0xA7B5, 0xA7C3 }, { 0xA7C8, 0xA7CA }, { 0xA7D7, 0xA7D9 }, }; static int lower2[][3] = { { 0x0061, 0x007A, 0x0041 }, { 0x00E0, 0x00F6, 0x00C0 }, { 0x00F8, 0x00FE, 0x00D8 }, { 0x01AA, 0x01AB, 0x01AA }, { 0x0234, 0x0239, 0x0234 }, { 0x023F, 0x0240, 0x2C7E }, { 0x0256, 0x0257, 0x0189 }, { 0x025D, 0x025F, 0x025D }, { 0x026D, 0x026E, 0x026D }, { 0x0273, 0x0274, 0x0273 }, { 0x0276, 0x027C, 0x0276 }, { 0x027E, 0x027F, 0x027E }, { 0x0284, 0x0286, 0x0284 }, { 0x028A, 0x028B, 0x01B1 }, { 0x028D, 0x0291, 0x028D }, { 0x0295, 0x029C, 0x0295 }, { 0x029F, 0x02AF, 0x029F }, { 0x037B, 0x037D, 0x03FD }, { 0x03AD, 0x03AF, 0x0388 }, { 0x03B1, 0x03C1, 0x0391 }, { 0x03C3, 0x03CB, 0x03A3 }, { 0x03CD, 0x03CE, 0x038E }, { 0x0430, 0x044F, 0x0410 }, { 0x0450, 0x045F, 0x0400 }, { 0x0561, 0x0586, 0x0531 }, { 0x0587, 0x0588, 0x0587 }, { 0x10D0, 0x10FA, 0x1C90 }, { 0x10FD, 0x10FF, 0x1CBD }, { 0x13F8, 0x13FD, 0x13F0 }, { 0x1C83, 0x1C84, 0x0421 }, { 0x1D00, 0x1D2B, 0x1D00 }, { 0x1D6B, 0x1D77, 0x1D6B }, { 0x1D7A, 0x1D7C, 0x1D7A }, { 0x1D7E, 0x1D8D, 0x1D7E }, { 0x1D8F, 0x1D9A, 0x1D8F }, { 0x1E96, 0x1E9A, 0x1E96 }, { 0x1E9C, 0x1E9D, 0x1E9C }, { 0x1F00, 0x1F07, 0x1F08 }, { 0x1F10, 0x1F15, 0x1F18 }, { 0x1F20, 0x1F27, 0x1F28 }, { 0x1F30, 0x1F37, 0x1F38 }, { 0x1F40, 0x1F45, 0x1F48 }, { 0x1F60, 0x1F67, 0x1F68 }, { 0x1F70, 0x1F71, 0x1FBA }, { 0x1F72, 0x1F75, 0x1FC8 }, { 0x1F76, 0x1F77, 0x1FDA }, { 0x1F78, 0x1F79, 0x1FF8 }, { 0x1F7A, 0x1F7B, 0x1FEA }, { 0x1F7C, 0x1F7D, 0x1FFA }, { 0x1F80, 0x1F87, 0x1F88 }, { 0x1F90, 0x1F97, 0x1F98 }, { 0x1FA0, 0x1FA7, 0x1FA8 }, { 0x1FB0, 0x1FB1, 0x1FB8 }, { 0x1FB6, 0x1FB7, 0x1FB6 }, { 0x1FC6, 0x1FC7, 0x1FC6 }, { 0x1FD0, 0x1FD1, 0x1FD8 }, { 0x1FD2, 0x1FD3, 0x1FD2 }, { 0x1FD6, 0x1FD7, 0x1FD6 }, { 0x1FE0, 0x1FE1, 0x1FE8 }, { 0x1FE2, 0x1FE4, 0x1FE2 }, { 0x1FE6, 0x1FE7, 0x1FE6 }, { 0x1FF6, 0x1FF7, 0x1FF6 }, { 0x210E, 0x210F, 0x210E }, { 0x213C, 0x213D, 0x213C }, { 0x2146, 0x2149, 0x2146 }, { 0x2C30, 0x2C5F, 0x2C00 }, { 0x2C77, 0x2C7B, 0x2C77 }, { 0x2D00, 0x2D25, 0x10A0 }, { 0xA730, 0xA731, 0xA730 }, { 0xA771, 0xA778, 0xA771 }, { 0xAB30, 0xAB52, 0xAB30 }, { 0xAB54, 0xAB5A, 0xAB54 }, { 0xAB60, 0xAB68, 0xAB60 }, { 0xAB70, 0xABBF, 0x13A0 }, { 0xFB00, 0xFB06, 0xFB00 }, { 0xFB13, 0xFB17, 0xFB13 }, { 0xFF41, 0xFF5A, 0xFF21 }, { 0x10428, 0x1044F, 0x10400 }, { 0x104D8, 0x104FB, 0x104B0 }, { 0x10597, 0x105A1, 0x10570 }, { 0x105A3, 0x105B1, 0x1057C }, { 0x105B3, 0x105B9, 0x1058C }, { 0x105BB, 0x105BC, 0x10594 }, { 0x10CC0, 0x10CF2, 0x10C80 }, { 0x118C0, 0x118DF, 0x118A0 }, { 0x16E60, 0x16E7F, 0x16E40 }, { 0x1D41A, 0x1D433, 0x1D41A }, { 0x1D44E, 0x1D454, 0x1D44E }, { 0x1D456, 0x1D467, 0x1D456 }, { 0x1D482, 0x1D49B, 0x1D482 }, { 0x1D4B6, 0x1D4B9, 0x1D4B6 }, { 0x1D4BD, 0x1D4C3, 0x1D4BD }, { 0x1D4C5, 0x1D4CF, 0x1D4C5 }, { 0x1D4EA, 0x1D503, 0x1D4EA }, { 0x1D51E, 0x1D537, 0x1D51E }, { 0x1D552, 0x1D56B, 0x1D552 }, { 0x1D586, 0x1D59F, 0x1D586 }, { 0x1D5BA, 0x1D5D3, 0x1D5BA }, { 0x1D5EE, 0x1D607, 0x1D5EE }, { 0x1D622, 0x1D63B, 0x1D622 }, { 0x1D656, 0x1D66F, 0x1D656 }, { 0x1D68A, 0x1D6A5, 0x1D68A }, { 0x1D6C2, 0x1D6DA, 0x1D6C2 }, { 0x1D6DC, 0x1D6E1, 0x1D6DC }, { 0x1D6FC, 0x1D714, 0x1D6FC }, { 0x1D716, 0x1D71B, 0x1D716 }, { 0x1D736, 0x1D74E, 0x1D736 }, { 0x1D750, 0x1D755, 0x1D750 }, { 0x1D770, 0x1D788, 0x1D770 }, { 0x1D78A, 0x1D78F, 0x1D78A }, { 0x1D7AA, 0x1D7C2, 0x1D7AA }, { 0x1D7C4, 0x1D7C9, 0x1D7C4 }, { 0x1DF00, 0x1DF09, 0x1DF00 }, { 0x1DF0B, 0x1DF1E, 0x1DF0B }, { 0x1E922, 0x1E943, 0x1E900 }, }; static int lower1[][2] = { { 0x00B5, 0x039C }, { 0x00DF, 0x00DF }, { 0x00FF, 0x0178 }, { 0x0131, 0x0049 }, { 0x0138, 0x0138 }, { 0x0149, 0x0149 }, { 0x017F, 0x0053 }, { 0x0180, 0x0243 }, { 0x0188, 0x0187 }, { 0x018C, 0x018B }, { 0x018D, 0x018D }, { 0x0192, 0x0191 }, { 0x0195, 0x01F6 }, { 0x0199, 0x0198 }, { 0x019A, 0x023D }, { 0x019B, 0x019B }, { 0x019E, 0x0220 }, { 0x01A8, 0x01A7 }, { 0x01AD, 0x01AC }, { 0x01B0, 0x01AF }, { 0x01B9, 0x01B8 }, { 0x01BA, 0x01BA }, { 0x01BD, 0x01BC }, { 0x01BE, 0x01BE }, { 0x01BF, 0x01F7 }, { 0x01C6, 0x01C4 }, { 0x01C9, 0x01C7 }, { 0x01CC, 0x01CA }, { 0x01DD, 0x018E }, { 0x01F0, 0x01F0 }, { 0x01F3, 0x01F1 }, { 0x01F5, 0x01F4 }, { 0x0221, 0x0221 }, { 0x023C, 0x023B }, { 0x0242, 0x0241 }, { 0x0250, 0x2C6F }, { 0x0251, 0x2C6D }, { 0x0252, 0x2C70 }, { 0x0253, 0x0181 }, { 0x0254, 0x0186 }, { 0x0255, 0x0255 }, { 0x0258, 0x0258 }, { 0x0259, 0x018F }, { 0x025A, 0x025A }, { 0x025B, 0x0190 }, { 0x025C, 0xA7AB }, { 0x0260, 0x0193 }, { 0x0261, 0xA7AC }, { 0x0262, 0x0262 }, { 0x0263, 0x0194 }, { 0x0264, 0x0264 }, { 0x0265, 0xA78D }, { 0x0266, 0xA7AA }, { 0x0267, 0x0267 }, { 0x0268, 0x0197 }, { 0x0269, 0x0196 }, { 0x026A, 0xA7AE }, { 0x026B, 0x2C62 }, { 0x026C, 0xA7AD }, { 0x026F, 0x019C }, { 0x0270, 0x0270 }, { 0x0271, 0x2C6E }, { 0x0272, 0x019D }, { 0x0275, 0x019F }, { 0x027D, 0x2C64 }, { 0x0280, 0x01A6 }, { 0x0281, 0x0281 }, { 0x0282, 0xA7C5 }, { 0x0283, 0x01A9 }, { 0x0287, 0xA7B1 }, { 0x0288, 0x01AE }, { 0x0289, 0x0244 }, { 0x028C, 0x0245 }, { 0x0292, 0x01B7 }, { 0x0293, 0x0293 }, { 0x029D, 0xA7B2 }, { 0x029E, 0xA7B0 }, { 0x0377, 0x0376 }, { 0x0390, 0x0390 }, { 0x03AC, 0x0386 }, { 0x03B0, 0x03B0 }, { 0x03C2, 0x03A3 }, { 0x03CC, 0x038C }, { 0x03D0, 0x0392 }, { 0x03D1, 0x0398 }, { 0x03D5, 0x03A6 }, { 0x03D6, 0x03A0 }, { 0x03D7, 0x03CF }, { 0x03F0, 0x039A }, { 0x03F1, 0x03A1 }, { 0x03F2, 0x03F9 }, { 0x03F3, 0x037F }, { 0x03F5, 0x0395 }, { 0x03F8, 0x03F7 }, { 0x03FB, 0x03FA }, { 0x03FC, 0x03FC }, { 0x04CF, 0x04C0 }, { 0x0560, 0x0560 }, { 0x1C80, 0x0412 }, { 0x1C81, 0x0414 }, { 0x1C82, 0x041E }, { 0x1C85, 0x0422 }, { 0x1C86, 0x042A }, { 0x1C87, 0x0462 }, { 0x1C88, 0xA64A }, { 0x1D79, 0xA77D }, { 0x1D7D, 0x2C63 }, { 0x1D8E, 0xA7C6 }, { 0x1E9B, 0x1E60 }, { 0x1E9F, 0x1E9F }, { 0x1F50, 0x1F50 }, { 0x1F51, 0x1F59 }, { 0x1F52, 0x1F52 }, { 0x1F53, 0x1F5B }, { 0x1F54, 0x1F54 }, { 0x1F55, 0x1F5D }, { 0x1F56, 0x1F56 }, { 0x1F57, 0x1F5F }, { 0x1FB2, 0x1FB2 }, { 0x1FB3, 0x1FBC }, { 0x1FB4, 0x1FB4 }, { 0x1FBE, 0x0399 }, { 0x1FC2, 0x1FC2 }, { 0x1FC3, 0x1FCC }, { 0x1FC4, 0x1FC4 }, { 0x1FE5, 0x1FEC }, { 0x1FF2, 0x1FF2 }, { 0x1FF3, 0x1FFC }, { 0x1FF4, 0x1FF4 }, { 0x210A, 0x210A }, { 0x2113, 0x2113 }, { 0x212F, 0x212F }, { 0x2134, 0x2134 }, { 0x2139, 0x2139 }, { 0x214E, 0x2132 }, { 0x2184, 0x2183 }, { 0x2C61, 0x2C60 }, { 0x2C65, 0x023A }, { 0x2C66, 0x023E }, { 0x2C71, 0x2C71 }, { 0x2C73, 0x2C72 }, { 0x2C74, 0x2C74 }, { 0x2C76, 0x2C75 }, { 0x2CE4, 0x2CE4 }, { 0x2CF3, 0x2CF2 }, { 0x2D27, 0x10C7 }, { 0x2D2D, 0x10CD }, { 0xA78C, 0xA78B }, { 0xA78E, 0xA78E }, { 0xA794, 0xA7C4 }, { 0xA795, 0xA795 }, { 0xA7AF, 0xA7AF }, { 0xA7D1, 0xA7D0 }, { 0xA7D3, 0xA7D3 }, { 0xA7D5, 0xA7D5 }, { 0xA7F6, 0xA7F5 }, { 0xA7FA, 0xA7FA }, { 0xAB53, 0xA7B3 }, { 0x1D4BB, 0x1D4BB }, { 0x1D7CB, 0x1D7CB }, }; C_regparm int C_utf_islower(int r) { int *match; if((match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &rune2cmp))) return !((r - match[0]) % 2); if(bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &rune2cmp)) return 1; if(bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &rune1cmp)) return 1; return 0; } C_regparm int C_utf_char_upcase(int r) { int *match; match = bsearch(&r, lower4, nelem(lower4), sizeof *lower4, &rune2cmp); if (match) return ((r - match[0]) % 2) ? r : r - 1; match = bsearch(&r, lower2, nelem(lower2), sizeof *lower2, &rune2cmp); if (match) return match[2] + (r - match[0]); match = bsearch(&r, lower1, nelem(lower1), sizeof *lower1, &rune1cmp); if (match) return match[1]; return r; } static int digit2[][2] = { { 0x0030, 0x0039 }, { 0x0660, 0x0669 }, { 0x06F0, 0x06F9 }, { 0x07C0, 0x07C9 }, { 0x0966, 0x096F }, { 0x09E6, 0x09EF }, { 0x0A66, 0x0A6F }, { 0x0AE6, 0x0AEF }, { 0x0B66, 0x0B6F }, { 0x0BE6, 0x0BEF }, { 0x0C66, 0x0C6F }, { 0x0CE6, 0x0CEF }, { 0x0D66, 0x0D6F }, { 0x0DE6, 0x0DEF }, { 0x0E50, 0x0E59 }, { 0x0ED0, 0x0ED9 }, { 0x0F20, 0x0F29 }, { 0x1040, 0x1049 }, { 0x1090, 0x1099 }, { 0x17E0, 0x17E9 }, { 0x1810, 0x1819 }, { 0x1946, 0x194F }, { 0x19D0, 0x19D9 }, { 0x1A80, 0x1A89 }, { 0x1A90, 0x1A99 }, { 0x1B50, 0x1B59 }, { 0x1BB0, 0x1BB9 }, { 0x1C40, 0x1C49 }, { 0x1C50, 0x1C59 }, { 0xA620, 0xA629 }, { 0xA8D0, 0xA8D9 }, { 0xA900, 0xA909 }, { 0xA9D0, 0xA9D9 }, { 0xA9F0, 0xA9F9 }, { 0xAA50, 0xAA59 }, { 0xABF0, 0xABF9 }, { 0xFF10, 0xFF19 }, { 0x104A0, 0x104A9 }, { 0x10D30, 0x10D39 }, { 0x11066, 0x1106F }, { 0x110F0, 0x110F9 }, { 0x11136, 0x1113F }, { 0x111D0, 0x111D9 }, { 0x112F0, 0x112F9 }, { 0x11450, 0x11459 }, { 0x114D0, 0x114D9 }, { 0x11650, 0x11659 }, { 0x116C0, 0x116C9 }, { 0x11730, 0x11739 }, { 0x118E0, 0x118E9 }, { 0x11950, 0x11959 }, { 0x11C50, 0x11C59 }, { 0x11D50, 0x11D59 }, { 0x11DA0, 0x11DA9 }, { 0x16A60, 0x16A69 }, { 0x16AC0, 0x16AC9 }, { 0x16B50, 0x16B59 }, { 0x1D7CE, 0x1D7FF }, { 0x1E140, 0x1E149 }, { 0x1E2F0, 0x1E2F9 }, { 0x1E950, 0x1E959 }, { 0x1FBF0, 0x1FBF9 }, }; C_regparm int C_utf_isdigit(int r) { int *dp = bsearch(&r, digit2, nelem(digit2), sizeof *digit2, &rune2cmp); if(dp != NULL) return 1 + r - dp[ 0 ]; return 0; } static int alpha3[][2] = { { 0x00D6, 0x00D8 }, { 0x00F6, 0x00F8 }, { 0x02EC, 0x02EE }, { 0x0374, 0x0376 }, { 0x037D, 0x037F }, { 0x0386, 0x0388 }, { 0x038A, 0x038E }, { 0x03A1, 0x03A3 }, { 0x03F5, 0x03F7 }, { 0x052F, 0x0531 }, { 0x066F, 0x0671 }, { 0x06D3, 0x06D5 }, { 0x0710, 0x0712 }, { 0x0887, 0x0889 }, { 0x09A8, 0x09AA }, { 0x09B0, 0x09B2 }, { 0x09DD, 0x09DF }, { 0x0A28, 0x0A2A }, { 0x0A30, 0x0A32 }, { 0x0A33, 0x0A35 }, { 0x0A36, 0x0A38 }, { 0x0A5C, 0x0A5E }, { 0x0A8D, 0x0A8F }, { 0x0A91, 0x0A93 }, { 0x0AA8, 0x0AAA }, { 0x0AB0, 0x0AB2 }, { 0x0AB3, 0x0AB5 }, { 0x0B28, 0x0B2A }, { 0x0B30, 0x0B32 }, { 0x0B33, 0x0B35 }, { 0x0B5D, 0x0B5F }, { 0x0B83, 0x0B85 }, { 0x0B90, 0x0B92 }, { 0x0B9A, 0x0B9E }, { 0x0C0C, 0x0C0E }, { 0x0C10, 0x0C12 }, { 0x0C28, 0x0C2A }, { 0x0C8C, 0x0C8E }, { 0x0C90, 0x0C92 }, { 0x0CA8, 0x0CAA }, { 0x0CB3, 0x0CB5 }, { 0x0CDE, 0x0CE0 }, { 0x0D0C, 0x0D0E }, { 0x0D10, 0x0D12 }, { 0x0DB1, 0x0DB3 }, { 0x0DBB, 0x0DBD }, { 0x0E30, 0x0E32 }, { 0x0E82, 0x0E86 }, { 0x0E8A, 0x0E8C }, { 0x0EA3, 0x0EA7 }, { 0x0EB0, 0x0EB2 }, { 0x0EC4, 0x0EC6 }, { 0x0F47, 0x0F49 }, { 0x10C5, 0x10C7 }, { 0x10FA, 0x10FC }, { 0x1248, 0x124A }, { 0x1256, 0x125A }, { 0x1288, 0x128A }, { 0x12B0, 0x12B2 }, { 0x12BE, 0x12C2 }, { 0x12D6, 0x12D8 }, { 0x1310, 0x1312 }, { 0x167F, 0x1681 }, { 0x176C, 0x176E }, { 0x18A8, 0x18AA }, { 0x1CEC, 0x1CEE }, { 0x1CF3, 0x1CF5 }, { 0x1F57, 0x1F5F }, { 0x1FB4, 0x1FB6 }, { 0x1FBC, 0x1FBE }, { 0x1FC4, 0x1FC6 }, { 0x1FF4, 0x1FF6 }, { 0x2113, 0x2115 }, { 0x2124, 0x212A }, { 0x212D, 0x212F }, { 0x2D25, 0x2D27 }, { 0x2DA6, 0x2DA8 }, { 0x2DAE, 0x2DB0 }, { 0x2DB6, 0x2DB8 }, { 0x2DBE, 0x2DC0 }, { 0x2DC6, 0x2DC8 }, { 0x2DCE, 0x2DD0 }, { 0x2DD6, 0x2DD8 }, { 0x309F, 0x30A1 }, { 0x30FA, 0x30FC }, { 0x312F, 0x3131 }, { 0xA7D1, 0xA7D5 }, { 0xA801, 0xA803 }, { 0xA805, 0xA807 }, { 0xA80A, 0xA80C }, { 0xA8FB, 0xA8FD }, { 0xA9E4, 0xA9E6 }, { 0xA9FE, 0xAA00 }, { 0xAA42, 0xAA44 }, { 0xAAAF, 0xAAB1 }, { 0xAAC0, 0xAAC2 }, { 0xAB26, 0xAB28 }, { 0xAB2E, 0xAB30 }, { 0xAB5A, 0xAB5C }, { 0xFB1D, 0xFB1F }, { 0xFB28, 0xFB2A }, { 0xFB36, 0xFB38 }, { 0xFB3C, 0xFB40 }, { 0xFB41, 0xFB43 }, { 0xFB44, 0xFB46 }, { 0xFE74, 0xFE76 }, { 0x1000B, 0x1000D }, { 0x10026, 0x10028 }, { 0x1003A, 0x1003C }, { 0x1003D, 0x1003F }, { 0x10340, 0x10342 }, { 0x1057A, 0x1057C }, { 0x1058A, 0x1058C }, { 0x10592, 0x10594 }, { 0x10595, 0x10597 }, { 0x105A1, 0x105A3 }, { 0x105B1, 0x105B3 }, { 0x105B9, 0x105BB }, { 0x10785, 0x10787 }, { 0x107B0, 0x107B2 }, { 0x10808, 0x1080A }, { 0x10835, 0x10837 }, { 0x108F2, 0x108F4 }, { 0x10A13, 0x10A15 }, { 0x10A17, 0x10A19 }, { 0x10AC7, 0x10AC9 }, { 0x111DA, 0x111DC }, { 0x11211, 0x11213 }, { 0x11286, 0x1128A }, { 0x1128D, 0x1128F }, { 0x1129D, 0x1129F }, { 0x11328, 0x1132A }, { 0x11330, 0x11332 }, { 0x11333, 0x11335 }, { 0x114C5, 0x114C7 }, { 0x11913, 0x11915 }, { 0x11916, 0x11918 }, { 0x1193F, 0x11941 }, { 0x119E1, 0x119E3 }, { 0x11C08, 0x11C0A }, { 0x11D06, 0x11D08 }, { 0x11D09, 0x11D0B }, { 0x11D65, 0x11D67 }, { 0x11D68, 0x11D6A }, { 0x16FE1, 0x16FE3 }, { 0x1AFF3, 0x1AFF5 }, { 0x1AFFB, 0x1AFFD }, { 0x1AFFE, 0x1B000 }, { 0x1D454, 0x1D456 }, { 0x1D49C, 0x1D49E }, { 0x1D4AC, 0x1D4AE }, { 0x1D4B9, 0x1D4BD }, { 0x1D4C3, 0x1D4C5 }, { 0x1D505, 0x1D507 }, { 0x1D514, 0x1D516 }, { 0x1D51C, 0x1D51E }, { 0x1D539, 0x1D53B }, { 0x1D53E, 0x1D540 }, { 0x1D544, 0x1D546 }, { 0x1D550, 0x1D552 }, { 0x1D6C0, 0x1D6C2 }, { 0x1D6DA, 0x1D6DC }, { 0x1D6FA, 0x1D6FC }, { 0x1D714, 0x1D716 }, { 0x1D734, 0x1D736 }, { 0x1D74E, 0x1D750 }, { 0x1D76E, 0x1D770 }, { 0x1D788, 0x1D78A }, { 0x1D7A8, 0x1D7AA }, { 0x1D7C2, 0x1D7C4 }, { 0x1E7E6, 0x1E7E8 }, { 0x1E7EB, 0x1E7ED }, { 0x1E7EE, 0x1E7F0 }, { 0x1E7FE, 0x1E800 }, { 0x1EE03, 0x1EE05 }, { 0x1EE1F, 0x1EE21 }, { 0x1EE22, 0x1EE24 }, { 0x1EE27, 0x1EE29 }, { 0x1EE32, 0x1EE34 }, { 0x1EE37, 0x1EE3B }, { 0x1EE47, 0x1EE4D }, { 0x1EE4F, 0x1EE51 }, { 0x1EE52, 0x1EE54 }, { 0x1EE57, 0x1EE61 }, { 0x1EE62, 0x1EE64 }, { 0x1EE6A, 0x1EE6C }, { 0x1EE72, 0x1EE74 }, { 0x1EE77, 0x1EE79 }, { 0x1EE7C, 0x1EE80 }, { 0x1EE89, 0x1EE8B }, { 0x1EEA3, 0x1EEA5 }, { 0x1EEA9, 0x1EEAB }, }; static int alpha2[][2] = { { 0x0041, 0x005A }, { 0x0061, 0x007A }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 }, { 0x00F8, 0x02C1 }, { 0x02C6, 0x02D1 }, { 0x02E0, 0x02E4 }, { 0x0370, 0x0374 }, { 0x0376, 0x0377 }, { 0x037A, 0x037D }, { 0x0388, 0x038A }, { 0x038E, 0x03A1 }, { 0x03A3, 0x03F5 }, { 0x03F7, 0x0481 }, { 0x048A, 0x052F }, { 0x0531, 0x0556 }, { 0x0560, 0x0588 }, { 0x05D0, 0x05EA }, { 0x05EF, 0x05F2 }, { 0x0620, 0x064A }, { 0x066E, 0x066F }, { 0x0671, 0x06D3 }, { 0x06E5, 0x06E6 }, { 0x06EE, 0x06EF }, { 0x06FA, 0x06FC }, { 0x0712, 0x072F }, { 0x074D, 0x07A5 }, { 0x07CA, 0x07EA }, { 0x07F4, 0x07F5 }, { 0x0800, 0x0815 }, { 0x0840, 0x0858 }, { 0x0860, 0x086A }, { 0x0870, 0x0887 }, { 0x0889, 0x088E }, { 0x08A0, 0x08C9 }, { 0x0904, 0x0939 }, { 0x0958, 0x0961 }, { 0x0971, 0x0980 }, { 0x0985, 0x098C }, { 0x098F, 0x0990 }, { 0x0993, 0x09A8 }, { 0x09AA, 0x09B0 }, { 0x09B6, 0x09B9 }, { 0x09DC, 0x09DD }, { 0x09DF, 0x09E1 }, { 0x09F0, 0x09F1 }, { 0x0A05, 0x0A0A }, { 0x0A0F, 0x0A10 }, { 0x0A13, 0x0A28 }, { 0x0A2A, 0x0A30 }, { 0x0A32, 0x0A33 }, { 0x0A35, 0x0A36 }, { 0x0A38, 0x0A39 }, { 0x0A59, 0x0A5C }, { 0x0A72, 0x0A74 }, { 0x0A85, 0x0A8D }, { 0x0A8F, 0x0A91 }, { 0x0A93, 0x0AA8 }, { 0x0AAA, 0x0AB0 }, { 0x0AB2, 0x0AB3 }, { 0x0AB5, 0x0AB9 }, { 0x0AE0, 0x0AE1 }, { 0x0B05, 0x0B0C }, { 0x0B0F, 0x0B10 }, { 0x0B13, 0x0B28 }, { 0x0B2A, 0x0B30 }, { 0x0B32, 0x0B33 }, { 0x0B35, 0x0B39 }, { 0x0B5C, 0x0B5D }, { 0x0B5F, 0x0B61 }, { 0x0B85, 0x0B8A }, { 0x0B8E, 0x0B90 }, { 0x0B92, 0x0B95 }, { 0x0B99, 0x0B9A }, { 0x0B9E, 0x0B9F }, { 0x0BA3, 0x0BA4 }, { 0x0BA8, 0x0BAA }, { 0x0BAE, 0x0BB9 }, { 0x0C05, 0x0C0C }, { 0x0C0E, 0x0C10 }, { 0x0C12, 0x0C28 }, { 0x0C2A, 0x0C39 }, { 0x0C58, 0x0C5A }, { 0x0C60, 0x0C61 }, { 0x0C85, 0x0C8C }, { 0x0C8E, 0x0C90 }, { 0x0C92, 0x0CA8 }, { 0x0CAA, 0x0CB3 }, { 0x0CB5, 0x0CB9 }, { 0x0CDD, 0x0CDE }, { 0x0CE0, 0x0CE1 }, { 0x0CF1, 0x0CF2 }, { 0x0D04, 0x0D0C }, { 0x0D0E, 0x0D10 }, { 0x0D12, 0x0D3A }, { 0x0D54, 0x0D56 }, { 0x0D5F, 0x0D61 }, { 0x0D7A, 0x0D7F }, { 0x0D85, 0x0D96 }, { 0x0D9A, 0x0DB1 }, { 0x0DB3, 0x0DBB }, { 0x0DC0, 0x0DC6 }, { 0x0E01, 0x0E30 }, { 0x0E32, 0x0E33 }, { 0x0E40, 0x0E46 }, { 0x0E81, 0x0E82 }, { 0x0E86, 0x0E8A }, { 0x0E8C, 0x0EA3 }, { 0x0EA7, 0x0EB0 }, { 0x0EB2, 0x0EB3 }, { 0x0EC0, 0x0EC4 }, { 0x0EDC, 0x0EDF }, { 0x0F40, 0x0F47 }, { 0x0F49, 0x0F6C }, { 0x0F88, 0x0F8C }, { 0x1000, 0x102A }, { 0x1050, 0x1055 }, { 0x105A, 0x105D }, { 0x1065, 0x1066 }, { 0x106E, 0x1070 }, { 0x1075, 0x1081 }, { 0x10A0, 0x10C5 }, { 0x10D0, 0x10FA }, { 0x10FC, 0x1248 }, { 0x124A, 0x124D }, { 0x1250, 0x1256 }, { 0x125A, 0x125D }, { 0x1260, 0x1288 }, { 0x128A, 0x128D }, { 0x1290, 0x12B0 }, { 0x12B2, 0x12B5 }, { 0x12B8, 0x12BE }, { 0x12C2, 0x12C5 }, { 0x12C8, 0x12D6 }, { 0x12D8, 0x1310 }, { 0x1312, 0x1315 }, { 0x1318, 0x135A }, { 0x1380, 0x138F }, { 0x13A0, 0x13F5 }, { 0x13F8, 0x13FD }, { 0x1401, 0x166C }, { 0x166F, 0x167F }, { 0x1681, 0x169A }, { 0x16A0, 0x16EA }, { 0x16F1, 0x16F8 }, { 0x1700, 0x1711 }, { 0x171F, 0x1731 }, { 0x1740, 0x1751 }, { 0x1760, 0x176C }, { 0x176E, 0x1770 }, { 0x1780, 0x17B3 }, { 0x1820, 0x1878 }, { 0x1880, 0x1884 }, { 0x1887, 0x18A8 }, { 0x18B0, 0x18F5 }, { 0x1900, 0x191E }, { 0x1950, 0x196D }, { 0x1970, 0x1974 }, { 0x1980, 0x19AB }, { 0x19B0, 0x19C9 }, { 0x1A00, 0x1A16 }, { 0x1A20, 0x1A54 }, { 0x1B05, 0x1B33 }, { 0x1B45, 0x1B4C }, { 0x1B83, 0x1BA0 }, { 0x1BAE, 0x1BAF }, { 0x1BBA, 0x1BE5 }, { 0x1C00, 0x1C23 }, { 0x1C4D, 0x1C4F }, { 0x1C5A, 0x1C7D }, { 0x1C80, 0x1C88 }, { 0x1C90, 0x1CBA }, { 0x1CBD, 0x1CBF }, { 0x1CE9, 0x1CEC }, { 0x1CEE, 0x1CF3 }, { 0x1CF5, 0x1CF6 }, { 0x1D00, 0x1DBF }, { 0x1E00, 0x1F15 }, { 0x1F18, 0x1F1D }, { 0x1F20, 0x1F45 }, { 0x1F48, 0x1F4D }, { 0x1F50, 0x1F57 }, { 0x1F5F, 0x1F7D }, { 0x1F80, 0x1FB4 }, { 0x1FB6, 0x1FBC }, { 0x1FC2, 0x1FC4 }, { 0x1FC6, 0x1FCC }, { 0x1FD0, 0x1FD3 }, { 0x1FD6, 0x1FDB }, { 0x1FE0, 0x1FEC }, { 0x1FF2, 0x1FF4 }, { 0x1FF6, 0x1FFC }, { 0x2090, 0x209C }, { 0x210A, 0x2113 }, { 0x2119, 0x211D }, { 0x212A, 0x212D }, { 0x212F, 0x2139 }, { 0x213C, 0x213F }, { 0x2145, 0x2149 }, { 0x2183, 0x2184 }, { 0x2C00, 0x2CE4 }, { 0x2CEB, 0x2CEE }, { 0x2CF2, 0x2CF3 }, { 0x2D00, 0x2D25 }, { 0x2D30, 0x2D67 }, { 0x2D80, 0x2D96 }, { 0x2DA0, 0x2DA6 }, { 0x2DA8, 0x2DAE }, { 0x2DB0, 0x2DB6 }, { 0x2DB8, 0x2DBE }, { 0x2DC0, 0x2DC6 }, { 0x2DC8, 0x2DCE }, { 0x2DD0, 0x2DD6 }, { 0x2DD8, 0x2DDE }, { 0x3005, 0x3006 }, { 0x3031, 0x3035 }, { 0x303B, 0x303C }, { 0x3041, 0x3096 }, { 0x309D, 0x309F }, { 0x30A1, 0x30FA }, { 0x30FC, 0x30FF }, { 0x3105, 0x312F }, { 0x3131, 0x318E }, { 0x31A0, 0x31BF }, { 0x31F0, 0x31FF }, { 0x9FFF, 0xA48C }, { 0xA4D0, 0xA4FD }, { 0xA500, 0xA60C }, { 0xA610, 0xA61F }, { 0xA62A, 0xA62B }, { 0xA640, 0xA66E }, { 0xA67F, 0xA69D }, { 0xA6A0, 0xA6E5 }, { 0xA717, 0xA71F }, { 0xA722, 0xA788 }, { 0xA78B, 0xA7CA }, { 0xA7D0, 0xA7D1 }, { 0xA7D5, 0xA7D9 }, { 0xA7F2, 0xA801 }, { 0xA803, 0xA805 }, { 0xA807, 0xA80A }, { 0xA80C, 0xA822 }, { 0xA840, 0xA873 }, { 0xA882, 0xA8B3 }, { 0xA8F2, 0xA8F7 }, { 0xA8FD, 0xA8FE }, { 0xA90A, 0xA925 }, { 0xA930, 0xA946 }, { 0xA960, 0xA97C }, { 0xA984, 0xA9B2 }, { 0xA9E0, 0xA9E4 }, { 0xA9E6, 0xA9EF }, { 0xA9FA, 0xA9FE }, { 0xAA00, 0xAA28 }, { 0xAA40, 0xAA42 }, { 0xAA44, 0xAA4B }, { 0xAA60, 0xAA76 }, { 0xAA7E, 0xAAAF }, { 0xAAB5, 0xAAB6 }, { 0xAAB9, 0xAABD }, { 0xAADB, 0xAADD }, { 0xAAE0, 0xAAEA }, { 0xAAF2, 0xAAF4 }, { 0xAB01, 0xAB06 }, { 0xAB09, 0xAB0E }, { 0xAB11, 0xAB16 }, { 0xAB20, 0xAB26 }, { 0xAB28, 0xAB2E }, { 0xAB30, 0xAB5A }, { 0xAB5C, 0xAB69 }, { 0xAB70, 0xABE2 }, { 0xD7B0, 0xD7C6 }, { 0xD7CB, 0xD7FB }, { 0xF900, 0xFA6D }, { 0xFA70, 0xFAD9 }, { 0xFB00, 0xFB06 }, { 0xFB13, 0xFB17 }, { 0xFB1F, 0xFB28 }, { 0xFB2A, 0xFB36 }, { 0xFB38, 0xFB3C }, { 0xFB40, 0xFB41 }, { 0xFB43, 0xFB44 }, { 0xFB46, 0xFBB1 }, { 0xFBD3, 0xFD3D }, { 0xFD50, 0xFD8F }, { 0xFD92, 0xFDC7 }, { 0xFDF0, 0xFDFB }, { 0xFE70, 0xFE74 }, { 0xFE76, 0xFEFC }, { 0xFF21, 0xFF3A }, { 0xFF41, 0xFF5A }, { 0xFF66, 0xFFBE }, { 0xFFC2, 0xFFC7 }, { 0xFFCA, 0xFFCF }, { 0xFFD2, 0xFFD7 }, { 0xFFDA, 0xFFDC }, { 0x10000, 0x1000B }, { 0x1000D, 0x10026 }, { 0x10028, 0x1003A }, { 0x1003C, 0x1003D }, { 0x1003F, 0x1004D }, { 0x10050, 0x1005D }, { 0x10080, 0x100FA }, { 0x10280, 0x1029C }, { 0x102A0, 0x102D0 }, { 0x10300, 0x1031F }, { 0x1032D, 0x10340 }, { 0x10342, 0x10349 }, { 0x10350, 0x10375 }, { 0x10380, 0x1039D }, { 0x103A0, 0x103C3 }, { 0x103C8, 0x103CF }, { 0x10400, 0x1049D }, { 0x104B0, 0x104D3 }, { 0x104D8, 0x104FB }, { 0x10500, 0x10527 }, { 0x10530, 0x10563 }, { 0x10570, 0x1057A }, { 0x1057C, 0x1058A }, { 0x1058C, 0x10592 }, { 0x10594, 0x10595 }, { 0x10597, 0x105A1 }, { 0x105A3, 0x105B1 }, { 0x105B3, 0x105B9 }, { 0x105BB, 0x105BC }, { 0x10600, 0x10736 }, { 0x10740, 0x10755 }, { 0x10760, 0x10767 }, { 0x10780, 0x10785 }, { 0x10787, 0x107B0 }, { 0x107B2, 0x107BA }, { 0x10800, 0x10805 }, { 0x1080A, 0x10835 }, { 0x10837, 0x10838 }, { 0x1083F, 0x10855 }, { 0x10860, 0x10876 }, { 0x10880, 0x1089E }, { 0x108E0, 0x108F2 }, { 0x108F4, 0x108F5 }, { 0x10900, 0x10915 }, { 0x10920, 0x10939 }, { 0x10980, 0x109B7 }, { 0x109BE, 0x109BF }, { 0x10A10, 0x10A13 }, { 0x10A15, 0x10A17 }, { 0x10A19, 0x10A35 }, { 0x10A60, 0x10A7C }, { 0x10A80, 0x10A9C }, { 0x10AC0, 0x10AC7 }, { 0x10AC9, 0x10AE4 }, { 0x10B00, 0x10B35 }, { 0x10B40, 0x10B55 }, { 0x10B60, 0x10B72 }, { 0x10B80, 0x10B91 }, { 0x10C00, 0x10C48 }, { 0x10C80, 0x10CB2 }, { 0x10CC0, 0x10CF2 }, { 0x10D00, 0x10D23 }, { 0x10E80, 0x10EA9 }, { 0x10EB0, 0x10EB1 }, { 0x10F00, 0x10F1C }, { 0x10F30, 0x10F45 }, { 0x10F70, 0x10F81 }, { 0x10FB0, 0x10FC4 }, { 0x10FE0, 0x10FF6 }, { 0x11003, 0x11037 }, { 0x11071, 0x11072 }, { 0x11083, 0x110AF }, { 0x110D0, 0x110E8 }, { 0x11103, 0x11126 }, { 0x11150, 0x11172 }, { 0x11183, 0x111B2 }, { 0x111C1, 0x111C4 }, { 0x11200, 0x11211 }, { 0x11213, 0x1122B }, { 0x11280, 0x11286 }, { 0x1128A, 0x1128D }, { 0x1128F, 0x1129D }, { 0x1129F, 0x112A8 }, { 0x112B0, 0x112DE }, { 0x11305, 0x1130C }, { 0x1130F, 0x11310 }, { 0x11313, 0x11328 }, { 0x1132A, 0x11330 }, { 0x11332, 0x11333 }, { 0x11335, 0x11339 }, { 0x1135D, 0x11361 }, { 0x11400, 0x11434 }, { 0x11447, 0x1144A }, { 0x1145F, 0x11461 }, { 0x11480, 0x114AF }, { 0x114C4, 0x114C5 }, { 0x11580, 0x115AE }, { 0x115D8, 0x115DB }, { 0x11600, 0x1162F }, { 0x11680, 0x116AA }, { 0x11700, 0x1171A }, { 0x11740, 0x11746 }, { 0x11800, 0x1182B }, { 0x118A0, 0x118DF }, { 0x118FF, 0x11906 }, { 0x1190C, 0x11913 }, { 0x11915, 0x11916 }, { 0x11918, 0x1192F }, { 0x119A0, 0x119A7 }, { 0x119AA, 0x119D0 }, { 0x11A0B, 0x11A32 }, { 0x11A5C, 0x11A89 }, { 0x11AB0, 0x11AF8 }, { 0x11C00, 0x11C08 }, { 0x11C0A, 0x11C2E }, { 0x11C72, 0x11C8F }, { 0x11D00, 0x11D06 }, { 0x11D08, 0x11D09 }, { 0x11D0B, 0x11D30 }, { 0x11D60, 0x11D65 }, { 0x11D67, 0x11D68 }, { 0x11D6A, 0x11D89 }, { 0x11EE0, 0x11EF2 }, { 0x12000, 0x12399 }, { 0x12480, 0x12543 }, { 0x12F90, 0x12FF0 }, { 0x13000, 0x1342E }, { 0x14400, 0x14646 }, { 0x16800, 0x16A38 }, { 0x16A40, 0x16A5E }, { 0x16A70, 0x16ABE }, { 0x16AD0, 0x16AED }, { 0x16B00, 0x16B2F }, { 0x16B40, 0x16B43 }, { 0x16B63, 0x16B77 }, { 0x16B7D, 0x16B8F }, { 0x16E40, 0x16E7F }, { 0x16F00, 0x16F4A }, { 0x16F93, 0x16F9F }, { 0x16FE0, 0x16FE1 }, { 0x18800, 0x18CD5 }, { 0x1AFF0, 0x1AFF3 }, { 0x1AFF5, 0x1AFFB }, { 0x1AFFD, 0x1AFFE }, { 0x1B000, 0x1B122 }, { 0x1B150, 0x1B152 }, { 0x1B164, 0x1B167 }, { 0x1B170, 0x1B2FB }, { 0x1BC00, 0x1BC6A }, { 0x1BC70, 0x1BC7C }, { 0x1BC80, 0x1BC88 }, { 0x1BC90, 0x1BC99 }, { 0x1D400, 0x1D454 }, { 0x1D456, 0x1D49C }, { 0x1D49E, 0x1D49F }, { 0x1D4A5, 0x1D4A6 }, { 0x1D4A9, 0x1D4AC }, { 0x1D4AE, 0x1D4B9 }, { 0x1D4BD, 0x1D4C3 }, { 0x1D4C5, 0x1D505 }, { 0x1D507, 0x1D50A }, { 0x1D50D, 0x1D514 }, { 0x1D516, 0x1D51C }, { 0x1D51E, 0x1D539 }, { 0x1D53B, 0x1D53E }, { 0x1D540, 0x1D544 }, { 0x1D54A, 0x1D550 }, { 0x1D552, 0x1D6A5 }, { 0x1D6A8, 0x1D6C0 }, { 0x1D6C2, 0x1D6DA }, { 0x1D6DC, 0x1D6FA }, { 0x1D6FC, 0x1D714 }, { 0x1D716, 0x1D734 }, { 0x1D736, 0x1D74E }, { 0x1D750, 0x1D76E }, { 0x1D770, 0x1D788 }, { 0x1D78A, 0x1D7A8 }, { 0x1D7AA, 0x1D7C2 }, { 0x1D7C4, 0x1D7CB }, { 0x1DF00, 0x1DF1E }, { 0x1E100, 0x1E12C }, { 0x1E137, 0x1E13D }, { 0x1E290, 0x1E2AD }, { 0x1E2C0, 0x1E2EB }, { 0x1E7E0, 0x1E7E6 }, { 0x1E7E8, 0x1E7EB }, { 0x1E7ED, 0x1E7EE }, { 0x1E7F0, 0x1E7FE }, { 0x1E800, 0x1E8C4 }, { 0x1E900, 0x1E943 }, { 0x1EE00, 0x1EE03 }, { 0x1EE05, 0x1EE1F }, { 0x1EE21, 0x1EE22 }, { 0x1EE29, 0x1EE32 }, { 0x1EE34, 0x1EE37 }, { 0x1EE4D, 0x1EE4F }, { 0x1EE51, 0x1EE52 }, { 0x1EE61, 0x1EE62 }, { 0x1EE67, 0x1EE6A }, { 0x1EE6C, 0x1EE72 }, { 0x1EE74, 0x1EE77 }, { 0x1EE79, 0x1EE7C }, { 0x1EE80, 0x1EE89 }, { 0x1EE8B, 0x1EE9B }, { 0x1EEA1, 0x1EEA3 }, { 0x1EEA5, 0x1EEA9 }, { 0x1EEAB, 0x1EEBB }, { 0x2F800, 0x2FA1D }, }; static int alpha1[] = { 0x00AA, 0x00B5, 0x00BA, 0x0559, 0x06FF, 0x07B1, 0x07FA, 0x081A, 0x0824, 0x0828, 0x093D, 0x0950, 0x09BD, 0x09CE, 0x09FC, 0x0ABD, 0x0AD0, 0x0AF9, 0x0B3D, 0x0B71, 0x0BD0, 0x0C3D, 0x0C5D, 0x0C80, 0x0CBD, 0x0D3D, 0x0D4E, 0x0EBD, 0x0F00, 0x103F, 0x1061, 0x108E, 0x10CD, 0x17D7, 0x17DC, 0x1AA7, 0x1CFA, 0x2071, 0x207F, 0x2102, 0x2107, 0x214E, 0x2D2D, 0x2D6F, 0x2E2F, 0x3400, 0x4DBF, 0x4E00, 0xA9CF, 0xAA7A, 0xAC00, 0xD7A3, 0x1083C, 0x10A00, 0x10F27, 0x11075, 0x11144, 0x11147, 0x11176, 0x1133D, 0x11350, 0x11644, 0x116B8, 0x11909, 0x11A00, 0x11A3A, 0x11A50, 0x11A9D, 0x11C40, 0x11D46, 0x11D98, 0x11FB0, 0x16F50, 0x17000, 0x187F7, 0x18D00, 0x18D08, 0x1D4A2, 0x1E14E, 0x1E94B, 0x1EE42, 0x20000, 0x2A6DF, 0x2A700, 0x2B738, 0x2B740, 0x2B81D, 0x2B820, 0x2CEA1, 0x2CEB0, 0x2EBE0, 0x30000, 0x3134A, }; C_regparm int C_utf_isalpha(int r) { int *match; if((match = bsearch(&r, alpha3, nelem(alpha3), sizeof *alpha3, &rune2cmp))) return !((r - match[0]) % 2); if(bsearch(&r, alpha2, nelem(alpha2), sizeof *alpha2, &rune2cmp)) return 1; if(bsearch(&r, alpha1, nelem(alpha1), sizeof *alpha1, &rune1cmp)) return 1; return 0; } static int space2[][2] = { { 0x0009, 0x000D }, { 0x001C, 0x0020 }, { 0x2000, 0x200A }, { 0x2028, 0x2029 }, }; static int space1[] = { 0x0085, 0x00A0, 0x1680, 0x202F, 0x205F, 0x3000, }; C_regparm int C_utf_isspace(int r) { if(bsearch(&r, space2, nelem(space2), sizeof *space2, &rune2cmp)) return 1; if(bsearch(&r, space1, nelem(space1), sizeof *space1, &rune1cmp)) return 1; return 0; } static int fold1[][ 2 ] = { {0x0041, 0x0061}, {0x0042, 0x0062}, {0x0043, 0x0063}, {0x0044, 0x0064}, {0x0045, 0x0065}, {0x0046, 0x0066}, {0x0047, 0x0067}, {0x0048, 0x0068}, {0x0049, 0x0069}, {0x004A, 0x006A}, {0x004B, 0x006B}, {0x004C, 0x006C}, {0x004D, 0x006D}, {0x004E, 0x006E}, {0x004F, 0x006F}, {0x0050, 0x0070}, {0x0051, 0x0071}, {0x0052, 0x0072}, {0x0053, 0x0073}, {0x0054, 0x0074}, {0x0055, 0x0075}, {0x0056, 0x0076}, {0x0057, 0x0077}, {0x0058, 0x0078}, {0x0059, 0x0079}, {0x005A, 0x007A}, {0x00B5, 0x03BC}, {0x00C0, 0x00E0}, {0x00C1, 0x00E1}, {0x00C2, 0x00E2}, {0x00C3, 0x00E3}, {0x00C4, 0x00E4}, {0x00C5, 0x00E5}, {0x00C6, 0x00E6}, {0x00C7, 0x00E7}, {0x00C8, 0x00E8}, {0x00C9, 0x00E9}, {0x00CA, 0x00EA}, {0x00CB, 0x00EB}, {0x00CC, 0x00EC}, {0x00CD, 0x00ED}, {0x00CE, 0x00EE}, {0x00CF, 0x00EF}, {0x00D0, 0x00F0}, {0x00D1, 0x00F1}, {0x00D2, 0x00F2}, {0x00D3, 0x00F3}, {0x00D4, 0x00F4}, {0x00D5, 0x00F5}, {0x00D6, 0x00F6}, {0x00D8, 0x00F8}, {0x00D9, 0x00F9}, {0x00DA, 0x00FA}, {0x00DB, 0x00FB}, {0x00DC, 0x00FC}, {0x00DD, 0x00FD}, {0x00DE, 0x00FE}, {0x0100, 0x0101}, {0x0102, 0x0103}, {0x0104, 0x0105}, {0x0106, 0x0107}, {0x0108, 0x0109}, {0x010A, 0x010B}, {0x010C, 0x010D}, {0x010E, 0x010F}, {0x0110, 0x0111}, {0x0112, 0x0113}, {0x0114, 0x0115}, {0x0116, 0x0117}, {0x0118, 0x0119}, {0x011A, 0x011B}, {0x011C, 0x011D}, {0x011E, 0x011F}, {0x0120, 0x0121}, {0x0122, 0x0123}, {0x0124, 0x0125}, {0x0126, 0x0127}, {0x0128, 0x0129}, {0x012A, 0x012B}, {0x012C, 0x012D}, {0x012E, 0x012F}, {0x0132, 0x0133}, {0x0134, 0x0135}, {0x0136, 0x0137}, {0x0139, 0x013A}, {0x013B, 0x013C}, {0x013D, 0x013E}, {0x013F, 0x0140}, {0x0141, 0x0142}, {0x0143, 0x0144}, {0x0145, 0x0146}, {0x0147, 0x0148}, {0x014A, 0x014B}, {0x014C, 0x014D}, {0x014E, 0x014F}, {0x0150, 0x0151}, {0x0152, 0x0153}, {0x0154, 0x0155}, {0x0156, 0x0157}, {0x0158, 0x0159}, {0x015A, 0x015B}, {0x015C, 0x015D}, {0x015E, 0x015F}, {0x0160, 0x0161}, {0x0162, 0x0163}, {0x0164, 0x0165}, {0x0166, 0x0167}, {0x0168, 0x0169}, {0x016A, 0x016B}, {0x016C, 0x016D}, {0x016E, 0x016F}, {0x0170, 0x0171}, {0x0172, 0x0173}, {0x0174, 0x0175}, {0x0176, 0x0177}, {0x0178, 0x00FF}, {0x0179, 0x017A}, {0x017B, 0x017C}, {0x017D, 0x017E}, {0x017F, 0x0073}, {0x0181, 0x0253}, {0x0182, 0x0183}, {0x0184, 0x0185}, {0x0186, 0x0254}, {0x0187, 0x0188}, {0x0189, 0x0256}, {0x018A, 0x0257}, {0x018B, 0x018C}, {0x018E, 0x01DD}, {0x018F, 0x0259}, {0x0190, 0x025B}, {0x0191, 0x0192}, {0x0193, 0x0260}, {0x0194, 0x0263}, {0x0196, 0x0269}, {0x0197, 0x0268}, {0x0198, 0x0199}, {0x019C, 0x026F}, {0x019D, 0x0272}, {0x019F, 0x0275}, {0x01A0, 0x01A1}, {0x01A2, 0x01A3}, {0x01A4, 0x01A5}, {0x01A6, 0x0280}, {0x01A7, 0x01A8}, {0x01A9, 0x0283}, {0x01AC, 0x01AD}, {0x01AE, 0x0288}, {0x01AF, 0x01B0}, {0x01B1, 0x028A}, {0x01B2, 0x028B}, {0x01B3, 0x01B4}, {0x01B5, 0x01B6}, {0x01B7, 0x0292}, {0x01B8, 0x01B9}, {0x01BC, 0x01BD}, {0x01C4, 0x01C6}, {0x01C5, 0x01C6}, {0x01C7, 0x01C9}, {0x01C8, 0x01C9}, {0x01CA, 0x01CC}, {0x01CB, 0x01CC}, {0x01CD, 0x01CE}, {0x01CF, 0x01D0}, {0x01D1, 0x01D2}, {0x01D3, 0x01D4}, {0x01D5, 0x01D6}, {0x01D7, 0x01D8}, {0x01D9, 0x01DA}, {0x01DB, 0x01DC}, {0x01DE, 0x01DF}, {0x01E0, 0x01E1}, {0x01E2, 0x01E3}, {0x01E4, 0x01E5}, {0x01E6, 0x01E7}, {0x01E8, 0x01E9}, {0x01EA, 0x01EB}, {0x01EC, 0x01ED}, {0x01EE, 0x01EF}, {0x01F1, 0x01F3}, {0x01F2, 0x01F3}, {0x01F4, 0x01F5}, {0x01F6, 0x0195}, {0x01F7, 0x01BF}, {0x01F8, 0x01F9}, {0x01FA, 0x01FB}, {0x01FC, 0x01FD}, {0x01FE, 0x01FF}, {0x0200, 0x0201}, {0x0202, 0x0203}, {0x0204, 0x0205}, {0x0206, 0x0207}, {0x0208, 0x0209}, {0x020A, 0x020B}, {0x020C, 0x020D}, {0x020E, 0x020F}, {0x0210, 0x0211}, {0x0212, 0x0213}, {0x0214, 0x0215}, {0x0216, 0x0217}, {0x0218, 0x0219}, {0x021A, 0x021B}, {0x021C, 0x021D}, {0x021E, 0x021F}, {0x0220, 0x019E}, {0x0222, 0x0223}, {0x0224, 0x0225}, {0x0226, 0x0227}, {0x0228, 0x0229}, {0x022A, 0x022B}, {0x022C, 0x022D}, {0x022E, 0x022F}, {0x0230, 0x0231}, {0x0232, 0x0233}, {0x023A, 0x2C65}, {0x023B, 0x023C}, {0x023D, 0x019A}, {0x023E, 0x2C66}, {0x0241, 0x0242}, {0x0243, 0x0180}, {0x0244, 0x0289}, {0x0245, 0x028C}, {0x0246, 0x0247}, {0x0248, 0x0249}, {0x024A, 0x024B}, {0x024C, 0x024D}, {0x024E, 0x024F}, {0x0345, 0x03B9}, {0x0370, 0x0371}, {0x0372, 0x0373}, {0x0376, 0x0377}, {0x037F, 0x03F3}, {0x0386, 0x03AC}, {0x0388, 0x03AD}, {0x0389, 0x03AE}, {0x038A, 0x03AF}, {0x038C, 0x03CC}, {0x038E, 0x03CD}, {0x038F, 0x03CE}, {0x0391, 0x03B1}, {0x0392, 0x03B2}, {0x0393, 0x03B3}, {0x0394, 0x03B4}, {0x0395, 0x03B5}, {0x0396, 0x03B6}, {0x0397, 0x03B7}, {0x0398, 0x03B8}, {0x0399, 0x03B9}, {0x039A, 0x03BA}, {0x039B, 0x03BB}, {0x039C, 0x03BC}, {0x039D, 0x03BD}, {0x039E, 0x03BE}, {0x039F, 0x03BF}, {0x03A0, 0x03C0}, {0x03A1, 0x03C1}, {0x03A3, 0x03C3}, {0x03A4, 0x03C4}, {0x03A5, 0x03C5}, {0x03A6, 0x03C6}, {0x03A7, 0x03C7}, {0x03A8, 0x03C8}, {0x03A9, 0x03C9}, {0x03AA, 0x03CA}, {0x03AB, 0x03CB}, {0x03C2, 0x03C3}, {0x03CF, 0x03D7}, {0x03D0, 0x03B2}, {0x03D1, 0x03B8}, {0x03D5, 0x03C6}, {0x03D6, 0x03C0}, {0x03D8, 0x03D9}, {0x03DA, 0x03DB}, {0x03DC, 0x03DD}, {0x03DE, 0x03DF}, {0x03E0, 0x03E1}, {0x03E2, 0x03E3}, {0x03E4, 0x03E5}, {0x03E6, 0x03E7}, {0x03E8, 0x03E9}, {0x03EA, 0x03EB}, {0x03EC, 0x03ED}, {0x03EE, 0x03EF}, {0x03F0, 0x03BA}, {0x03F1, 0x03C1}, {0x03F4, 0x03B8}, {0x03F5, 0x03B5}, {0x03F7, 0x03F8}, {0x03F9, 0x03F2}, {0x03FA, 0x03FB}, {0x03FD, 0x037B}, {0x03FE, 0x037C}, {0x03FF, 0x037D}, {0x0400, 0x0450}, {0x0401, 0x0451}, {0x0402, 0x0452}, {0x0403, 0x0453}, {0x0404, 0x0454}, {0x0405, 0x0455}, {0x0406, 0x0456}, {0x0407, 0x0457}, {0x0408, 0x0458}, {0x0409, 0x0459}, {0x040A, 0x045A}, {0x040B, 0x045B}, {0x040C, 0x045C}, {0x040D, 0x045D}, {0x040E, 0x045E}, {0x040F, 0x045F}, {0x0410, 0x0430}, {0x0411, 0x0431}, {0x0412, 0x0432}, {0x0413, 0x0433}, {0x0414, 0x0434}, {0x0415, 0x0435}, {0x0416, 0x0436}, {0x0417, 0x0437}, {0x0418, 0x0438}, {0x0419, 0x0439}, {0x041A, 0x043A}, {0x041B, 0x043B}, {0x041C, 0x043C}, {0x041D, 0x043D}, {0x041E, 0x043E}, {0x041F, 0x043F}, {0x0420, 0x0440}, {0x0421, 0x0441}, {0x0422, 0x0442}, {0x0423, 0x0443}, {0x0424, 0x0444}, {0x0425, 0x0445}, {0x0426, 0x0446}, {0x0427, 0x0447}, {0x0428, 0x0448}, {0x0429, 0x0449}, {0x042A, 0x044A}, {0x042B, 0x044B}, {0x042C, 0x044C}, {0x042D, 0x044D}, {0x042E, 0x044E}, {0x042F, 0x044F}, {0x0460, 0x0461}, {0x0462, 0x0463}, {0x0464, 0x0465}, {0x0466, 0x0467}, {0x0468, 0x0469}, {0x046A, 0x046B}, {0x046C, 0x046D}, {0x046E, 0x046F}, {0x0470, 0x0471}, {0x0472, 0x0473}, {0x0474, 0x0475}, {0x0476, 0x0477}, {0x0478, 0x0479}, {0x047A, 0x047B}, {0x047C, 0x047D}, {0x047E, 0x047F}, {0x0480, 0x0481}, {0x048A, 0x048B}, {0x048C, 0x048D}, {0x048E, 0x048F}, {0x0490, 0x0491}, {0x0492, 0x0493}, {0x0494, 0x0495}, {0x0496, 0x0497}, {0x0498, 0x0499}, {0x049A, 0x049B}, {0x049C, 0x049D}, {0x049E, 0x049F}, {0x04A0, 0x04A1}, {0x04A2, 0x04A3}, {0x04A4, 0x04A5}, {0x04A6, 0x04A7}, {0x04A8, 0x04A9}, {0x04AA, 0x04AB}, {0x04AC, 0x04AD}, {0x04AE, 0x04AF}, {0x04B0, 0x04B1}, {0x04B2, 0x04B3}, {0x04B4, 0x04B5}, {0x04B6, 0x04B7}, {0x04B8, 0x04B9}, {0x04BA, 0x04BB}, {0x04BC, 0x04BD}, {0x04BE, 0x04BF}, {0x04C0, 0x04CF}, {0x04C1, 0x04C2}, {0x04C3, 0x04C4}, {0x04C5, 0x04C6}, {0x04C7, 0x04C8}, {0x04C9, 0x04CA}, {0x04CB, 0x04CC}, {0x04CD, 0x04CE}, {0x04D0, 0x04D1}, {0x04D2, 0x04D3}, {0x04D4, 0x04D5}, {0x04D6, 0x04D7}, {0x04D8, 0x04D9}, {0x04DA, 0x04DB}, {0x04DC, 0x04DD}, {0x04DE, 0x04DF}, {0x04E0, 0x04E1}, {0x04E2, 0x04E3}, {0x04E4, 0x04E5}, {0x04E6, 0x04E7}, {0x04E8, 0x04E9}, {0x04EA, 0x04EB}, {0x04EC, 0x04ED}, {0x04EE, 0x04EF}, {0x04F0, 0x04F1}, {0x04F2, 0x04F3}, {0x04F4, 0x04F5}, {0x04F6, 0x04F7}, {0x04F8, 0x04F9}, {0x04FA, 0x04FB}, {0x04FC, 0x04FD}, {0x04FE, 0x04FF}, {0x0500, 0x0501}, {0x0502, 0x0503}, {0x0504, 0x0505}, {0x0506, 0x0507}, {0x0508, 0x0509}, {0x050A, 0x050B}, {0x050C, 0x050D}, {0x050E, 0x050F}, {0x0510, 0x0511}, {0x0512, 0x0513}, {0x0514, 0x0515}, {0x0516, 0x0517}, {0x0518, 0x0519}, {0x051A, 0x051B}, {0x051C, 0x051D}, {0x051E, 0x051F}, {0x0520, 0x0521}, {0x0522, 0x0523}, {0x0524, 0x0525}, {0x0526, 0x0527}, {0x0528, 0x0529}, {0x052A, 0x052B}, {0x052C, 0x052D}, {0x052E, 0x052F}, {0x0531, 0x0561}, {0x0532, 0x0562}, {0x0533, 0x0563}, {0x0534, 0x0564}, {0x0535, 0x0565}, {0x0536, 0x0566}, {0x0537, 0x0567}, {0x0538, 0x0568}, {0x0539, 0x0569}, {0x053A, 0x056A}, {0x053B, 0x056B}, {0x053C, 0x056C}, {0x053D, 0x056D}, {0x053E, 0x056E}, {0x053F, 0x056F}, {0x0540, 0x0570}, {0x0541, 0x0571}, {0x0542, 0x0572}, {0x0543, 0x0573}, {0x0544, 0x0574}, {0x0545, 0x0575}, {0x0546, 0x0576}, {0x0547, 0x0577}, {0x0548, 0x0578}, {0x0549, 0x0579}, {0x054A, 0x057A}, {0x054B, 0x057B}, {0x054C, 0x057C}, {0x054D, 0x057D}, {0x054E, 0x057E}, {0x054F, 0x057F}, {0x0550, 0x0580}, {0x0551, 0x0581}, {0x0552, 0x0582}, {0x0553, 0x0583}, {0x0554, 0x0584}, {0x0555, 0x0585}, {0x0556, 0x0586}, {0x10A0, 0x2D00}, {0x10A1, 0x2D01}, {0x10A2, 0x2D02}, {0x10A3, 0x2D03}, {0x10A4, 0x2D04}, {0x10A5, 0x2D05}, {0x10A6, 0x2D06}, {0x10A7, 0x2D07}, {0x10A8, 0x2D08}, {0x10A9, 0x2D09}, {0x10AA, 0x2D0A}, {0x10AB, 0x2D0B}, {0x10AC, 0x2D0C}, {0x10AD, 0x2D0D}, {0x10AE, 0x2D0E}, {0x10AF, 0x2D0F}, {0x10B0, 0x2D10}, {0x10B1, 0x2D11}, {0x10B2, 0x2D12}, {0x10B3, 0x2D13}, {0x10B4, 0x2D14}, {0x10B5, 0x2D15}, {0x10B6, 0x2D16}, {0x10B7, 0x2D17}, {0x10B8, 0x2D18}, {0x10B9, 0x2D19}, {0x10BA, 0x2D1A}, {0x10BB, 0x2D1B}, {0x10BC, 0x2D1C}, {0x10BD, 0x2D1D}, {0x10BE, 0x2D1E}, {0x10BF, 0x2D1F}, {0x10C0, 0x2D20}, {0x10C1, 0x2D21}, {0x10C2, 0x2D22}, {0x10C3, 0x2D23}, {0x10C4, 0x2D24}, {0x10C5, 0x2D25}, {0x10C7, 0x2D27}, {0x10CD, 0x2D2D}, {0x13F8, 0x13F0}, {0x13F9, 0x13F1}, {0x13FA, 0x13F2}, {0x13FB, 0x13F3}, {0x13FC, 0x13F4}, {0x13FD, 0x13F5}, {0x1C80, 0x0432}, {0x1C81, 0x0434}, {0x1C82, 0x043E}, {0x1C83, 0x0441}, {0x1C84, 0x0442}, {0x1C85, 0x0442}, {0x1C86, 0x044A}, {0x1C87, 0x0463}, {0x1C88, 0xA64B}, {0x1C90, 0x10D0}, {0x1C91, 0x10D1}, {0x1C92, 0x10D2}, {0x1C93, 0x10D3}, {0x1C94, 0x10D4}, {0x1C95, 0x10D5}, {0x1C96, 0x10D6}, {0x1C97, 0x10D7}, {0x1C98, 0x10D8}, {0x1C99, 0x10D9}, {0x1C9A, 0x10DA}, {0x1C9B, 0x10DB}, {0x1C9C, 0x10DC}, {0x1C9D, 0x10DD}, {0x1C9E, 0x10DE}, {0x1C9F, 0x10DF}, {0x1CA0, 0x10E0}, {0x1CA1, 0x10E1}, {0x1CA2, 0x10E2}, {0x1CA3, 0x10E3}, {0x1CA4, 0x10E4}, {0x1CA5, 0x10E5}, {0x1CA6, 0x10E6}, {0x1CA7, 0x10E7}, {0x1CA8, 0x10E8}, {0x1CA9, 0x10E9}, {0x1CAA, 0x10EA}, {0x1CAB, 0x10EB}, {0x1CAC, 0x10EC}, {0x1CAD, 0x10ED}, {0x1CAE, 0x10EE}, {0x1CAF, 0x10EF}, {0x1CB0, 0x10F0}, {0x1CB1, 0x10F1}, {0x1CB2, 0x10F2}, {0x1CB3, 0x10F3}, {0x1CB4, 0x10F4}, {0x1CB5, 0x10F5}, {0x1CB6, 0x10F6}, {0x1CB7, 0x10F7}, {0x1CB8, 0x10F8}, {0x1CB9, 0x10F9}, {0x1CBA, 0x10FA}, {0x1CBD, 0x10FD}, {0x1CBE, 0x10FE}, {0x1CBF, 0x10FF}, {0x1E00, 0x1E01}, {0x1E02, 0x1E03}, {0x1E04, 0x1E05}, {0x1E06, 0x1E07}, {0x1E08, 0x1E09}, {0x1E0A, 0x1E0B}, {0x1E0C, 0x1E0D}, {0x1E0E, 0x1E0F}, {0x1E10, 0x1E11}, {0x1E12, 0x1E13}, {0x1E14, 0x1E15}, {0x1E16, 0x1E17}, {0x1E18, 0x1E19}, {0x1E1A, 0x1E1B}, {0x1E1C, 0x1E1D}, {0x1E1E, 0x1E1F}, {0x1E20, 0x1E21}, {0x1E22, 0x1E23}, {0x1E24, 0x1E25}, {0x1E26, 0x1E27}, {0x1E28, 0x1E29}, {0x1E2A, 0x1E2B}, {0x1E2C, 0x1E2D}, {0x1E2E, 0x1E2F}, {0x1E30, 0x1E31}, {0x1E32, 0x1E33}, {0x1E34, 0x1E35}, {0x1E36, 0x1E37}, {0x1E38, 0x1E39}, {0x1E3A, 0x1E3B}, {0x1E3C, 0x1E3D}, {0x1E3E, 0x1E3F}, {0x1E40, 0x1E41}, {0x1E42, 0x1E43}, {0x1E44, 0x1E45}, {0x1E46, 0x1E47}, {0x1E48, 0x1E49}, {0x1E4A, 0x1E4B}, {0x1E4C, 0x1E4D}, {0x1E4E, 0x1E4F}, {0x1E50, 0x1E51}, {0x1E52, 0x1E53}, {0x1E54, 0x1E55}, {0x1E56, 0x1E57}, {0x1E58, 0x1E59}, {0x1E5A, 0x1E5B}, {0x1E5C, 0x1E5D}, {0x1E5E, 0x1E5F}, {0x1E60, 0x1E61}, {0x1E62, 0x1E63}, {0x1E64, 0x1E65}, {0x1E66, 0x1E67}, {0x1E68, 0x1E69}, {0x1E6A, 0x1E6B}, {0x1E6C, 0x1E6D}, {0x1E6E, 0x1E6F}, {0x1E70, 0x1E71}, {0x1E72, 0x1E73}, {0x1E74, 0x1E75}, {0x1E76, 0x1E77}, {0x1E78, 0x1E79}, {0x1E7A, 0x1E7B}, {0x1E7C, 0x1E7D}, {0x1E7E, 0x1E7F}, {0x1E80, 0x1E81}, {0x1E82, 0x1E83}, {0x1E84, 0x1E85}, {0x1E86, 0x1E87}, {0x1E88, 0x1E89}, {0x1E8A, 0x1E8B}, {0x1E8C, 0x1E8D}, {0x1E8E, 0x1E8F}, {0x1E90, 0x1E91}, {0x1E92, 0x1E93}, {0x1E94, 0x1E95}, {0x1E9B, 0x1E61}, {0x1E9E, 0x00DF}, {0x1EA0, 0x1EA1}, {0x1EA2, 0x1EA3}, {0x1EA4, 0x1EA5}, {0x1EA6, 0x1EA7}, {0x1EA8, 0x1EA9}, {0x1EAA, 0x1EAB}, {0x1EAC, 0x1EAD}, {0x1EAE, 0x1EAF}, {0x1EB0, 0x1EB1}, {0x1EB2, 0x1EB3}, {0x1EB4, 0x1EB5}, {0x1EB6, 0x1EB7}, {0x1EB8, 0x1EB9}, {0x1EBA, 0x1EBB}, {0x1EBC, 0x1EBD}, {0x1EBE, 0x1EBF}, {0x1EC0, 0x1EC1}, {0x1EC2, 0x1EC3}, {0x1EC4, 0x1EC5}, {0x1EC6, 0x1EC7}, {0x1EC8, 0x1EC9}, {0x1ECA, 0x1ECB}, {0x1ECC, 0x1ECD}, {0x1ECE, 0x1ECF}, {0x1ED0, 0x1ED1}, {0x1ED2, 0x1ED3}, {0x1ED4, 0x1ED5}, {0x1ED6, 0x1ED7}, {0x1ED8, 0x1ED9}, {0x1EDA, 0x1EDB}, {0x1EDC, 0x1EDD}, {0x1EDE, 0x1EDF}, {0x1EE0, 0x1EE1}, {0x1EE2, 0x1EE3}, {0x1EE4, 0x1EE5}, {0x1EE6, 0x1EE7}, {0x1EE8, 0x1EE9}, {0x1EEA, 0x1EEB}, {0x1EEC, 0x1EED}, {0x1EEE, 0x1EEF}, {0x1EF0, 0x1EF1}, {0x1EF2, 0x1EF3}, {0x1EF4, 0x1EF5}, {0x1EF6, 0x1EF7}, {0x1EF8, 0x1EF9}, {0x1EFA, 0x1EFB}, {0x1EFC, 0x1EFD}, {0x1EFE, 0x1EFF}, {0x1F08, 0x1F00}, {0x1F09, 0x1F01}, {0x1F0A, 0x1F02}, {0x1F0B, 0x1F03}, {0x1F0C, 0x1F04}, {0x1F0D, 0x1F05}, {0x1F0E, 0x1F06}, {0x1F0F, 0x1F07}, {0x1F18, 0x1F10}, {0x1F19, 0x1F11}, {0x1F1A, 0x1F12}, {0x1F1B, 0x1F13}, {0x1F1C, 0x1F14}, {0x1F1D, 0x1F15}, {0x1F28, 0x1F20}, {0x1F29, 0x1F21}, {0x1F2A, 0x1F22}, {0x1F2B, 0x1F23}, {0x1F2C, 0x1F24}, {0x1F2D, 0x1F25}, {0x1F2E, 0x1F26}, {0x1F2F, 0x1F27}, {0x1F38, 0x1F30}, {0x1F39, 0x1F31}, {0x1F3A, 0x1F32}, {0x1F3B, 0x1F33}, {0x1F3C, 0x1F34}, {0x1F3D, 0x1F35}, {0x1F3E, 0x1F36}, {0x1F3F, 0x1F37}, {0x1F48, 0x1F40}, {0x1F49, 0x1F41}, {0x1F4A, 0x1F42}, {0x1F4B, 0x1F43}, {0x1F4C, 0x1F44}, {0x1F4D, 0x1F45}, {0x1F59, 0x1F51}, {0x1F5B, 0x1F53}, {0x1F5D, 0x1F55}, {0x1F5F, 0x1F57}, {0x1F68, 0x1F60}, {0x1F69, 0x1F61}, {0x1F6A, 0x1F62}, {0x1F6B, 0x1F63}, {0x1F6C, 0x1F64}, {0x1F6D, 0x1F65}, {0x1F6E, 0x1F66}, {0x1F6F, 0x1F67}, {0x1F88, 0x1F80}, {0x1F89, 0x1F81}, {0x1F8A, 0x1F82}, {0x1F8B, 0x1F83}, {0x1F8C, 0x1F84}, {0x1F8D, 0x1F85}, {0x1F8E, 0x1F86}, {0x1F8F, 0x1F87}, {0x1F98, 0x1F90}, {0x1F99, 0x1F91}, {0x1F9A, 0x1F92}, {0x1F9B, 0x1F93}, {0x1F9C, 0x1F94}, {0x1F9D, 0x1F95}, {0x1F9E, 0x1F96}, {0x1F9F, 0x1F97}, {0x1FA8, 0x1FA0}, {0x1FA9, 0x1FA1}, {0x1FAA, 0x1FA2}, {0x1FAB, 0x1FA3}, {0x1FAC, 0x1FA4}, {0x1FAD, 0x1FA5}, {0x1FAE, 0x1FA6}, {0x1FAF, 0x1FA7}, {0x1FB8, 0x1FB0}, {0x1FB9, 0x1FB1}, {0x1FBA, 0x1F70}, {0x1FBB, 0x1F71}, {0x1FBC, 0x1FB3}, {0x1FBE, 0x03B9}, {0x1FC8, 0x1F72}, {0x1FC9, 0x1F73}, {0x1FCA, 0x1F74}, {0x1FCB, 0x1F75}, {0x1FCC, 0x1FC3}, {0x1FD8, 0x1FD0}, {0x1FD9, 0x1FD1}, {0x1FDA, 0x1F76}, {0x1FDB, 0x1F77}, {0x1FE8, 0x1FE0}, {0x1FE9, 0x1FE1}, {0x1FEA, 0x1F7A}, {0x1FEB, 0x1F7B}, {0x1FEC, 0x1FE5}, {0x1FF8, 0x1F78}, {0x1FF9, 0x1F79}, {0x1FFA, 0x1F7C}, {0x1FFB, 0x1F7D}, {0x1FFC, 0x1FF3}, {0x2126, 0x03C9}, {0x212A, 0x006B}, {0x212B, 0x00E5}, {0x2132, 0x214E}, {0x2160, 0x2170}, {0x2161, 0x2171}, {0x2162, 0x2172}, {0x2163, 0x2173}, {0x2164, 0x2174}, {0x2165, 0x2175}, {0x2166, 0x2176}, {0x2167, 0x2177}, {0x2168, 0x2178}, {0x2169, 0x2179}, {0x216A, 0x217A}, {0x216B, 0x217B}, {0x216C, 0x217C}, {0x216D, 0x217D}, {0x216E, 0x217E}, {0x216F, 0x217F}, {0x2183, 0x2184}, {0x24B6, 0x24D0}, {0x24B7, 0x24D1}, {0x24B8, 0x24D2}, {0x24B9, 0x24D3}, {0x24BA, 0x24D4}, {0x24BB, 0x24D5}, {0x24BC, 0x24D6}, {0x24BD, 0x24D7}, {0x24BE, 0x24D8}, {0x24BF, 0x24D9}, {0x24C0, 0x24DA}, {0x24C1, 0x24DB}, {0x24C2, 0x24DC}, {0x24C3, 0x24DD}, {0x24C4, 0x24DE}, {0x24C5, 0x24DF}, {0x24C6, 0x24E0}, {0x24C7, 0x24E1}, {0x24C8, 0x24E2}, {0x24C9, 0x24E3}, {0x24CA, 0x24E4}, {0x24CB, 0x24E5}, {0x24CC, 0x24E6}, {0x24CD, 0x24E7}, {0x24CE, 0x24E8}, {0x24CF, 0x24E9}, {0x2C00, 0x2C30}, {0x2C01, 0x2C31}, {0x2C02, 0x2C32}, {0x2C03, 0x2C33}, {0x2C04, 0x2C34}, {0x2C05, 0x2C35}, {0x2C06, 0x2C36}, {0x2C07, 0x2C37}, {0x2C08, 0x2C38}, {0x2C09, 0x2C39}, {0x2C0A, 0x2C3A}, {0x2C0B, 0x2C3B}, {0x2C0C, 0x2C3C}, {0x2C0D, 0x2C3D}, {0x2C0E, 0x2C3E}, {0x2C0F, 0x2C3F}, {0x2C10, 0x2C40}, {0x2C11, 0x2C41}, {0x2C12, 0x2C42}, {0x2C13, 0x2C43}, {0x2C14, 0x2C44}, {0x2C15, 0x2C45}, {0x2C16, 0x2C46}, {0x2C17, 0x2C47}, {0x2C18, 0x2C48}, {0x2C19, 0x2C49}, {0x2C1A, 0x2C4A}, {0x2C1B, 0x2C4B}, {0x2C1C, 0x2C4C}, {0x2C1D, 0x2C4D}, {0x2C1E, 0x2C4E}, {0x2C1F, 0x2C4F}, {0x2C20, 0x2C50}, {0x2C21, 0x2C51}, {0x2C22, 0x2C52}, {0x2C23, 0x2C53}, {0x2C24, 0x2C54}, {0x2C25, 0x2C55}, {0x2C26, 0x2C56}, {0x2C27, 0x2C57}, {0x2C28, 0x2C58}, {0x2C29, 0x2C59}, {0x2C2A, 0x2C5A}, {0x2C2B, 0x2C5B}, {0x2C2C, 0x2C5C}, {0x2C2D, 0x2C5D}, {0x2C2E, 0x2C5E}, {0x2C2F, 0x2C5F}, {0x2C60, 0x2C61}, {0x2C62, 0x026B}, {0x2C63, 0x1D7D}, {0x2C64, 0x027D}, {0x2C67, 0x2C68}, {0x2C69, 0x2C6A}, {0x2C6B, 0x2C6C}, {0x2C6D, 0x0251}, {0x2C6E, 0x0271}, {0x2C6F, 0x0250}, {0x2C70, 0x0252}, {0x2C72, 0x2C73}, {0x2C75, 0x2C76}, {0x2C7E, 0x023F}, {0x2C7F, 0x0240}, {0x2C80, 0x2C81}, {0x2C82, 0x2C83}, {0x2C84, 0x2C85}, {0x2C86, 0x2C87}, {0x2C88, 0x2C89}, {0x2C8A, 0x2C8B}, {0x2C8C, 0x2C8D}, {0x2C8E, 0x2C8F}, {0x2C90, 0x2C91}, {0x2C92, 0x2C93}, {0x2C94, 0x2C95}, {0x2C96, 0x2C97}, {0x2C98, 0x2C99}, {0x2C9A, 0x2C9B}, {0x2C9C, 0x2C9D}, {0x2C9E, 0x2C9F}, {0x2CA0, 0x2CA1}, {0x2CA2, 0x2CA3}, {0x2CA4, 0x2CA5}, {0x2CA6, 0x2CA7}, {0x2CA8, 0x2CA9}, {0x2CAA, 0x2CAB}, {0x2CAC, 0x2CAD}, {0x2CAE, 0x2CAF}, {0x2CB0, 0x2CB1}, {0x2CB2, 0x2CB3}, {0x2CB4, 0x2CB5}, {0x2CB6, 0x2CB7}, {0x2CB8, 0x2CB9}, {0x2CBA, 0x2CBB}, {0x2CBC, 0x2CBD}, {0x2CBE, 0x2CBF}, {0x2CC0, 0x2CC1}, {0x2CC2, 0x2CC3}, {0x2CC4, 0x2CC5}, {0x2CC6, 0x2CC7}, {0x2CC8, 0x2CC9}, {0x2CCA, 0x2CCB}, {0x2CCC, 0x2CCD}, {0x2CCE, 0x2CCF}, {0x2CD0, 0x2CD1}, {0x2CD2, 0x2CD3}, {0x2CD4, 0x2CD5}, {0x2CD6, 0x2CD7}, {0x2CD8, 0x2CD9}, {0x2CDA, 0x2CDB}, {0x2CDC, 0x2CDD}, {0x2CDE, 0x2CDF}, {0x2CE0, 0x2CE1}, {0x2CE2, 0x2CE3}, {0x2CEB, 0x2CEC}, {0x2CED, 0x2CEE}, {0x2CF2, 0x2CF3}, {0xA640, 0xA641}, {0xA642, 0xA643}, {0xA644, 0xA645}, {0xA646, 0xA647}, {0xA648, 0xA649}, {0xA64A, 0xA64B}, {0xA64C, 0xA64D}, {0xA64E, 0xA64F}, {0xA650, 0xA651}, {0xA652, 0xA653}, {0xA654, 0xA655}, {0xA656, 0xA657}, {0xA658, 0xA659}, {0xA65A, 0xA65B}, {0xA65C, 0xA65D}, {0xA65E, 0xA65F}, {0xA660, 0xA661}, {0xA662, 0xA663}, {0xA664, 0xA665}, {0xA666, 0xA667}, {0xA668, 0xA669}, {0xA66A, 0xA66B}, {0xA66C, 0xA66D}, {0xA680, 0xA681}, {0xA682, 0xA683}, {0xA684, 0xA685}, {0xA686, 0xA687}, {0xA688, 0xA689}, {0xA68A, 0xA68B}, {0xA68C, 0xA68D}, {0xA68E, 0xA68F}, {0xA690, 0xA691}, {0xA692, 0xA693}, {0xA694, 0xA695}, {0xA696, 0xA697}, {0xA698, 0xA699}, {0xA69A, 0xA69B}, {0xA722, 0xA723}, {0xA724, 0xA725}, {0xA726, 0xA727}, {0xA728, 0xA729}, {0xA72A, 0xA72B}, {0xA72C, 0xA72D}, {0xA72E, 0xA72F}, {0xA732, 0xA733}, {0xA734, 0xA735}, {0xA736, 0xA737}, {0xA738, 0xA739}, {0xA73A, 0xA73B}, {0xA73C, 0xA73D}, {0xA73E, 0xA73F}, {0xA740, 0xA741}, {0xA742, 0xA743}, {0xA744, 0xA745}, {0xA746, 0xA747}, {0xA748, 0xA749}, {0xA74A, 0xA74B}, {0xA74C, 0xA74D}, {0xA74E, 0xA74F}, {0xA750, 0xA751}, {0xA752, 0xA753}, {0xA754, 0xA755}, {0xA756, 0xA757}, {0xA758, 0xA759}, {0xA75A, 0xA75B}, {0xA75C, 0xA75D}, {0xA75E, 0xA75F}, {0xA760, 0xA761}, {0xA762, 0xA763}, {0xA764, 0xA765}, {0xA766, 0xA767}, {0xA768, 0xA769}, {0xA76A, 0xA76B}, {0xA76C, 0xA76D}, {0xA76E, 0xA76F}, {0xA779, 0xA77A}, {0xA77B, 0xA77C}, {0xA77D, 0x1D79}, {0xA77E, 0xA77F}, {0xA780, 0xA781}, {0xA782, 0xA783}, {0xA784, 0xA785}, {0xA786, 0xA787}, {0xA78B, 0xA78C}, {0xA78D, 0x0265}, {0xA790, 0xA791}, {0xA792, 0xA793}, {0xA796, 0xA797}, {0xA798, 0xA799}, {0xA79A, 0xA79B}, {0xA79C, 0xA79D}, {0xA79E, 0xA79F}, {0xA7A0, 0xA7A1}, {0xA7A2, 0xA7A3}, {0xA7A4, 0xA7A5}, {0xA7A6, 0xA7A7}, {0xA7A8, 0xA7A9}, {0xA7AA, 0x0266}, {0xA7AB, 0x025C}, {0xA7AC, 0x0261}, {0xA7AD, 0x026C}, {0xA7AE, 0x026A}, {0xA7B0, 0x029E}, {0xA7B1, 0x0287}, {0xA7B2, 0x029D}, {0xA7B3, 0xAB53}, {0xA7B4, 0xA7B5}, {0xA7B6, 0xA7B7}, {0xA7B8, 0xA7B9}, {0xA7BA, 0xA7BB}, {0xA7BC, 0xA7BD}, {0xA7BE, 0xA7BF}, {0xA7C0, 0xA7C1}, {0xA7C2, 0xA7C3}, {0xA7C4, 0xA794}, {0xA7C5, 0x0282}, {0xA7C6, 0x1D8E}, {0xA7C7, 0xA7C8}, {0xA7C9, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D6, 0xA7D7}, {0xA7D8, 0xA7D9}, {0xA7F5, 0xA7F6}, {0xAB70, 0x13A0}, {0xAB71, 0x13A1}, {0xAB72, 0x13A2}, {0xAB73, 0x13A3}, {0xAB74, 0x13A4}, {0xAB75, 0x13A5}, {0xAB76, 0x13A6}, {0xAB77, 0x13A7}, {0xAB78, 0x13A8}, {0xAB79, 0x13A9}, {0xAB7A, 0x13AA}, {0xAB7B, 0x13AB}, {0xAB7C, 0x13AC}, {0xAB7D, 0x13AD}, {0xAB7E, 0x13AE}, {0xAB7F, 0x13AF}, {0xAB80, 0x13B0}, {0xAB81, 0x13B1}, {0xAB82, 0x13B2}, {0xAB83, 0x13B3}, {0xAB84, 0x13B4}, {0xAB85, 0x13B5}, {0xAB86, 0x13B6}, {0xAB87, 0x13B7}, {0xAB88, 0x13B8}, {0xAB89, 0x13B9}, {0xAB8A, 0x13BA}, {0xAB8B, 0x13BB}, {0xAB8C, 0x13BC}, {0xAB8D, 0x13BD}, {0xAB8E, 0x13BE}, {0xAB8F, 0x13BF}, {0xAB90, 0x13C0}, {0xAB91, 0x13C1}, {0xAB92, 0x13C2}, {0xAB93, 0x13C3}, {0xAB94, 0x13C4}, {0xAB95, 0x13C5}, {0xAB96, 0x13C6}, {0xAB97, 0x13C7}, {0xAB98, 0x13C8}, {0xAB99, 0x13C9}, {0xAB9A, 0x13CA}, {0xAB9B, 0x13CB}, {0xAB9C, 0x13CC}, {0xAB9D, 0x13CD}, {0xAB9E, 0x13CE}, {0xAB9F, 0x13CF}, {0xABA0, 0x13D0}, {0xABA1, 0x13D1}, {0xABA2, 0x13D2}, {0xABA3, 0x13D3}, {0xABA4, 0x13D4}, {0xABA5, 0x13D5}, {0xABA6, 0x13D6}, {0xABA7, 0x13D7}, {0xABA8, 0x13D8}, {0xABA9, 0x13D9}, {0xABAA, 0x13DA}, {0xABAB, 0x13DB}, {0xABAC, 0x13DC}, {0xABAD, 0x13DD}, {0xABAE, 0x13DE}, {0xABAF, 0x13DF}, {0xABB0, 0x13E0}, {0xABB1, 0x13E1}, {0xABB2, 0x13E2}, {0xABB3, 0x13E3}, {0xABB4, 0x13E4}, {0xABB5, 0x13E5}, {0xABB6, 0x13E6}, {0xABB7, 0x13E7}, {0xABB8, 0x13E8}, {0xABB9, 0x13E9}, {0xABBA, 0x13EA}, {0xABBB, 0x13EB}, {0xABBC, 0x13EC}, {0xABBD, 0x13ED}, {0xABBE, 0x13EE}, {0xABBF, 0x13EF}, {0xFF21, 0xFF41}, {0xFF22, 0xFF42}, {0xFF23, 0xFF43}, {0xFF24, 0xFF44}, {0xFF25, 0xFF45}, {0xFF26, 0xFF46}, {0xFF27, 0xFF47}, {0xFF28, 0xFF48}, {0xFF29, 0xFF49}, {0xFF2A, 0xFF4A}, {0xFF2B, 0xFF4B}, {0xFF2C, 0xFF4C}, {0xFF2D, 0xFF4D}, {0xFF2E, 0xFF4E}, {0xFF2F, 0xFF4F}, {0xFF30, 0xFF50}, {0xFF31, 0xFF51}, {0xFF32, 0xFF52}, {0xFF33, 0xFF53}, {0xFF34, 0xFF54}, {0xFF35, 0xFF55}, {0xFF36, 0xFF56}, {0xFF37, 0xFF57}, {0xFF38, 0xFF58}, {0xFF39, 0xFF59}, {0xFF3A, 0xFF5A}, {0x10400, 0x10428}, {0x10401, 0x10429}, {0x10402, 0x1042A}, {0x10403, 0x1042B}, {0x10404, 0x1042C}, {0x10405, 0x1042D}, {0x10406, 0x1042E}, {0x10407, 0x1042F}, {0x10408, 0x10430}, {0x10409, 0x10431}, {0x1040A, 0x10432}, {0x1040B, 0x10433}, {0x1040C, 0x10434}, {0x1040D, 0x10435}, {0x1040E, 0x10436}, {0x1040F, 0x10437}, {0x10410, 0x10438}, {0x10411, 0x10439}, {0x10412, 0x1043A}, {0x10413, 0x1043B}, {0x10414, 0x1043C}, {0x10415, 0x1043D}, {0x10416, 0x1043E}, {0x10417, 0x1043F}, {0x10418, 0x10440}, {0x10419, 0x10441}, {0x1041A, 0x10442}, {0x1041B, 0x10443}, {0x1041C, 0x10444}, {0x1041D, 0x10445}, {0x1041E, 0x10446}, {0x1041F, 0x10447}, {0x10420, 0x10448}, {0x10421, 0x10449}, {0x10422, 0x1044A}, {0x10423, 0x1044B}, {0x10424, 0x1044C}, {0x10425, 0x1044D}, {0x10426, 0x1044E}, {0x10427, 0x1044F}, {0x104B0, 0x104D8}, {0x104B1, 0x104D9}, {0x104B2, 0x104DA}, {0x104B3, 0x104DB}, {0x104B4, 0x104DC}, {0x104B5, 0x104DD}, {0x104B6, 0x104DE}, {0x104B7, 0x104DF}, {0x104B8, 0x104E0}, {0x104B9, 0x104E1}, {0x104BA, 0x104E2}, {0x104BB, 0x104E3}, {0x104BC, 0x104E4}, {0x104BD, 0x104E5}, {0x104BE, 0x104E6}, {0x104BF, 0x104E7}, {0x104C0, 0x104E8}, {0x104C1, 0x104E9}, {0x104C2, 0x104EA}, {0x104C3, 0x104EB}, {0x104C4, 0x104EC}, {0x104C5, 0x104ED}, {0x104C6, 0x104EE}, {0x104C7, 0x104EF}, {0x104C8, 0x104F0}, {0x104C9, 0x104F1}, {0x104CA, 0x104F2}, {0x104CB, 0x104F3}, {0x104CC, 0x104F4}, {0x104CD, 0x104F5}, {0x104CE, 0x104F6}, {0x104CF, 0x104F7}, {0x104D0, 0x104F8}, {0x104D1, 0x104F9}, {0x104D2, 0x104FA}, {0x104D3, 0x104FB}, {0x10570, 0x10597}, {0x10571, 0x10598}, {0x10572, 0x10599}, {0x10573, 0x1059A}, {0x10574, 0x1059B}, {0x10575, 0x1059C}, {0x10576, 0x1059D}, {0x10577, 0x1059E}, {0x10578, 0x1059F}, {0x10579, 0x105A0}, {0x1057A, 0x105A1}, {0x1057C, 0x105A3}, {0x1057D, 0x105A4}, {0x1057E, 0x105A5}, {0x1057F, 0x105A6}, {0x10580, 0x105A7}, {0x10581, 0x105A8}, {0x10582, 0x105A9}, {0x10583, 0x105AA}, {0x10584, 0x105AB}, {0x10585, 0x105AC}, {0x10586, 0x105AD}, {0x10587, 0x105AE}, {0x10588, 0x105AF}, {0x10589, 0x105B0}, {0x1058A, 0x105B1}, {0x1058C, 0x105B3}, {0x1058D, 0x105B4}, {0x1058E, 0x105B5}, {0x1058F, 0x105B6}, {0x10590, 0x105B7}, {0x10591, 0x105B8}, {0x10592, 0x105B9}, {0x10594, 0x105BB}, {0x10595, 0x105BC}, {0x10C80, 0x10CC0}, {0x10C81, 0x10CC1}, {0x10C82, 0x10CC2}, {0x10C83, 0x10CC3}, {0x10C84, 0x10CC4}, {0x10C85, 0x10CC5}, {0x10C86, 0x10CC6}, {0x10C87, 0x10CC7}, {0x10C88, 0x10CC8}, {0x10C89, 0x10CC9}, {0x10C8A, 0x10CCA}, {0x10C8B, 0x10CCB}, {0x10C8C, 0x10CCC}, {0x10C8D, 0x10CCD}, {0x10C8E, 0x10CCE}, {0x10C8F, 0x10CCF}, {0x10C90, 0x10CD0}, {0x10C91, 0x10CD1}, {0x10C92, 0x10CD2}, {0x10C93, 0x10CD3}, {0x10C94, 0x10CD4}, {0x10C95, 0x10CD5}, {0x10C96, 0x10CD6}, {0x10C97, 0x10CD7}, {0x10C98, 0x10CD8}, {0x10C99, 0x10CD9}, {0x10C9A, 0x10CDA}, {0x10C9B, 0x10CDB}, {0x10C9C, 0x10CDC}, {0x10C9D, 0x10CDD}, {0x10C9E, 0x10CDE}, {0x10C9F, 0x10CDF}, {0x10CA0, 0x10CE0}, {0x10CA1, 0x10CE1}, {0x10CA2, 0x10CE2}, {0x10CA3, 0x10CE3}, {0x10CA4, 0x10CE4}, {0x10CA5, 0x10CE5}, {0x10CA6, 0x10CE6}, {0x10CA7, 0x10CE7}, {0x10CA8, 0x10CE8}, {0x10CA9, 0x10CE9}, {0x10CAA, 0x10CEA}, {0x10CAB, 0x10CEB}, {0x10CAC, 0x10CEC}, {0x10CAD, 0x10CED}, {0x10CAE, 0x10CEE}, {0x10CAF, 0x10CEF}, {0x10CB0, 0x10CF0}, {0x10CB1, 0x10CF1}, {0x10CB2, 0x10CF2}, {0x118A0, 0x118C0}, {0x118A1, 0x118C1}, {0x118A2, 0x118C2}, {0x118A3, 0x118C3}, {0x118A4, 0x118C4}, {0x118A5, 0x118C5}, {0x118A6, 0x118C6}, {0x118A7, 0x118C7}, {0x118A8, 0x118C8}, {0x118A9, 0x118C9}, {0x118AA, 0x118CA}, {0x118AB, 0x118CB}, {0x118AC, 0x118CC}, {0x118AD, 0x118CD}, {0x118AE, 0x118CE}, {0x118AF, 0x118CF}, {0x118B0, 0x118D0}, {0x118B1, 0x118D1}, {0x118B2, 0x118D2}, {0x118B3, 0x118D3}, {0x118B4, 0x118D4}, {0x118B5, 0x118D5}, {0x118B6, 0x118D6}, {0x118B7, 0x118D7}, {0x118B8, 0x118D8}, {0x118B9, 0x118D9}, {0x118BA, 0x118DA}, {0x118BB, 0x118DB}, {0x118BC, 0x118DC}, {0x118BD, 0x118DD}, {0x118BE, 0x118DE}, {0x118BF, 0x118DF}, {0x16E40, 0x16E60}, {0x16E41, 0x16E61}, {0x16E42, 0x16E62}, {0x16E43, 0x16E63}, {0x16E44, 0x16E64}, {0x16E45, 0x16E65}, {0x16E46, 0x16E66}, {0x16E47, 0x16E67}, {0x16E48, 0x16E68}, {0x16E49, 0x16E69}, {0x16E4A, 0x16E6A}, {0x16E4B, 0x16E6B}, {0x16E4C, 0x16E6C}, {0x16E4D, 0x16E6D}, {0x16E4E, 0x16E6E}, {0x16E4F, 0x16E6F}, {0x16E50, 0x16E70}, {0x16E51, 0x16E71}, {0x16E52, 0x16E72}, {0x16E53, 0x16E73}, {0x16E54, 0x16E74}, {0x16E55, 0x16E75}, {0x16E56, 0x16E76}, {0x16E57, 0x16E77}, {0x16E58, 0x16E78}, {0x16E59, 0x16E79}, {0x16E5A, 0x16E7A}, {0x16E5B, 0x16E7B}, {0x16E5C, 0x16E7C}, {0x16E5D, 0x16E7D}, {0x16E5E, 0x16E7E}, {0x16E5F, 0x16E7F}, {0x1E900, 0x1E922}, {0x1E901, 0x1E923}, {0x1E902, 0x1E924}, {0x1E903, 0x1E925}, {0x1E904, 0x1E926}, {0x1E905, 0x1E927}, {0x1E906, 0x1E928}, {0x1E907, 0x1E929}, {0x1E908, 0x1E92A}, {0x1E909, 0x1E92B}, {0x1E90A, 0x1E92C}, {0x1E90B, 0x1E92D}, {0x1E90C, 0x1E92E}, {0x1E90D, 0x1E92F}, {0x1E90E, 0x1E930}, {0x1E90F, 0x1E931}, {0x1E910, 0x1E932}, {0x1E911, 0x1E933}, {0x1E912, 0x1E934}, {0x1E913, 0x1E935}, {0x1E914, 0x1E936}, {0x1E915, 0x1E937}, {0x1E916, 0x1E938}, {0x1E917, 0x1E939}, {0x1E918, 0x1E93A}, {0x1E919, 0x1E93B}, {0x1E91A, 0x1E93C}, {0x1E91B, 0x1E93D}, {0x1E91C, 0x1E93E}, {0x1E91D, 0x1E93F}, {0x1E91E, 0x1E940}, {0x1E91F, 0x1E941}, {0x1E920, 0x1E942}, {0x1E921, 0x1E943}, }; static int fold2[][ 4 ] = { {0x00DF, 0x0073, 0x0073, 0x0}, {0x0130, 0x0069, 0x0307, 0x0}, {0x0149, 0x02BC, 0x006E, 0x0}, {0x01F0, 0x006A, 0x030C, 0x0}, {0x0390, 0x03B9, 0x0308, 0x0301}, {0x03B0, 0x03C5, 0x0308, 0x0301}, {0x0587, 0x0565, 0x0582, 0x0}, {0x1E96, 0x0068, 0x0331, 0x0}, {0x1E97, 0x0074, 0x0308, 0x0}, {0x1E98, 0x0077, 0x030A, 0x0}, {0x1E99, 0x0079, 0x030A, 0x0}, {0x1E9A, 0x0061, 0x02BE, 0x0}, {0x1E9E, 0x0073, 0x0073, 0x0}, {0x1F50, 0x03C5, 0x0313, 0x0}, {0x1F52, 0x03C5, 0x0313, 0x0300}, {0x1F54, 0x03C5, 0x0313, 0x0301}, {0x1F56, 0x03C5, 0x0313, 0x0342}, {0x1F80, 0x1F00, 0x03B9, 0x0}, {0x1F81, 0x1F01, 0x03B9, 0x0}, {0x1F82, 0x1F02, 0x03B9, 0x0}, {0x1F83, 0x1F03, 0x03B9, 0x0}, {0x1F84, 0x1F04, 0x03B9, 0x0}, {0x1F85, 0x1F05, 0x03B9, 0x0}, {0x1F86, 0x1F06, 0x03B9, 0x0}, {0x1F87, 0x1F07, 0x03B9, 0x0}, {0x1F88, 0x1F00, 0x03B9, 0x0}, {0x1F89, 0x1F01, 0x03B9, 0x0}, {0x1F8A, 0x1F02, 0x03B9, 0x0}, {0x1F8B, 0x1F03, 0x03B9, 0x0}, {0x1F8C, 0x1F04, 0x03B9, 0x0}, {0x1F8D, 0x1F05, 0x03B9, 0x0}, {0x1F8E, 0x1F06, 0x03B9, 0x0}, {0x1F8F, 0x1F07, 0x03B9, 0x0}, {0x1F90, 0x1F20, 0x03B9, 0x0}, {0x1F91, 0x1F21, 0x03B9, 0x0}, {0x1F92, 0x1F22, 0x03B9, 0x0}, {0x1F93, 0x1F23, 0x03B9, 0x0}, {0x1F94, 0x1F24, 0x03B9, 0x0}, {0x1F95, 0x1F25, 0x03B9, 0x0}, {0x1F96, 0x1F26, 0x03B9, 0x0}, {0x1F97, 0x1F27, 0x03B9, 0x0}, {0x1F98, 0x1F20, 0x03B9, 0x0}, {0x1F99, 0x1F21, 0x03B9, 0x0}, {0x1F9A, 0x1F22, 0x03B9, 0x0}, {0x1F9B, 0x1F23, 0x03B9, 0x0}, {0x1F9C, 0x1F24, 0x03B9, 0x0}, {0x1F9D, 0x1F25, 0x03B9, 0x0}, {0x1F9E, 0x1F26, 0x03B9, 0x0}, {0x1F9F, 0x1F27, 0x03B9, 0x0}, {0x1FA0, 0x1F60, 0x03B9, 0x0}, {0x1FA1, 0x1F61, 0x03B9, 0x0}, {0x1FA2, 0x1F62, 0x03B9, 0x0}, {0x1FA3, 0x1F63, 0x03B9, 0x0}, {0x1FA4, 0x1F64, 0x03B9, 0x0}, {0x1FA5, 0x1F65, 0x03B9, 0x0}, {0x1FA6, 0x1F66, 0x03B9, 0x0}, {0x1FA7, 0x1F67, 0x03B9, 0x0}, {0x1FA8, 0x1F60, 0x03B9, 0x0}, {0x1FA9, 0x1F61, 0x03B9, 0x0}, {0x1FAA, 0x1F62, 0x03B9, 0x0}, {0x1FAB, 0x1F63, 0x03B9, 0x0}, {0x1FAC, 0x1F64, 0x03B9, 0x0}, {0x1FAD, 0x1F65, 0x03B9, 0x0}, {0x1FAE, 0x1F66, 0x03B9, 0x0}, {0x1FAF, 0x1F67, 0x03B9, 0x0}, {0x1FB2, 0x1F70, 0x03B9, 0x0}, {0x1FB3, 0x03B1, 0x03B9, 0x0}, {0x1FB4, 0x03AC, 0x03B9, 0x0}, {0x1FB6, 0x03B1, 0x0342, 0x0}, {0x1FB7, 0x03B1, 0x0342, 0x03B9}, {0x1FBC, 0x03B1, 0x03B9, 0x0}, {0x1FC2, 0x1F74, 0x03B9, 0x0}, {0x1FC3, 0x03B7, 0x03B9, 0x0}, {0x1FC4, 0x03AE, 0x03B9, 0x0}, {0x1FC6, 0x03B7, 0x0342, 0x0}, {0x1FC7, 0x03B7, 0x0342, 0x03B9}, {0x1FCC, 0x03B7, 0x03B9, 0x0}, {0x1FD2, 0x03B9, 0x0308, 0x0300}, {0x1FD3, 0x03B9, 0x0308, 0x0301}, {0x1FD6, 0x03B9, 0x0342, 0x0}, {0x1FD7, 0x03B9, 0x0308, 0x0342}, {0x1FE2, 0x03C5, 0x0308, 0x0300}, {0x1FE3, 0x03C5, 0x0308, 0x0301}, {0x1FE4, 0x03C1, 0x0313, 0x0}, {0x1FE6, 0x03C5, 0x0342, 0x0}, {0x1FE7, 0x03C5, 0x0308, 0x0342}, {0x1FF2, 0x1F7C, 0x03B9, 0x0}, {0x1FF3, 0x03C9, 0x03B9, 0x0}, {0x1FF4, 0x03CE, 0x03B9, 0x0}, {0x1FF6, 0x03C9, 0x0342, 0x0}, {0x1FF7, 0x03C9, 0x0342, 0x03B9}, {0x1FFC, 0x03C9, 0x03B9, 0x0}, {0xFB00, 0x0066, 0x0066, 0x0}, {0xFB01, 0x0066, 0x0069, 0x0}, {0xFB02, 0x0066, 0x006C, 0x0}, {0xFB03, 0x0066, 0x0066, 0x0069}, {0xFB04, 0x0066, 0x0066, 0x006C}, {0xFB05, 0x0073, 0x0074, 0x0}, {0xFB06, 0x0073, 0x0074, 0x0}, {0xFB13, 0x0574, 0x0576, 0x0}, {0xFB14, 0x0574, 0x0565, 0x0}, {0xFB15, 0x0574, 0x056B, 0x0}, {0xFB16, 0x057E, 0x0576, 0x0}, {0xFB17, 0x0574, 0x056D, 0x0}, }; /* Branchless UTF-8 decoder * https://raw.githubusercontent.com/skeeto/branchless-utf8/ * This is free and unencumbered software released into the public domain. */ /* Decode the next character, C, from BUF, reporting errors in E. * * Since this is a branchless decoder, four bytes will be read from the * buffer regardless of the actual length of the next character. This * means the buffer _must_ have at least three bytes of zero padding * following the end of the data stream. * * Errors are reported in E, which will be non-zero if the parsed * character was somehow invalid: invalid byte sequence, non-canonical * encoding, or a surrogate half. * * The function returns a pointer to the next character. When an error * occurs, this pointer will be a guess that depends on the particular * error, but it will always advance at least one byte. */ static const char lengths[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 4, 0 }; static C_char *utf8_decode(C_char *buf, C_u32 *c, int *e) { static const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; static const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; static const int shiftc[] = {0, 18, 12, 6, 0}; static const int shifte[] = {0, 6, 4, 2, 0}; #ifdef DEBUGBUILD if(buf == NULL) C_panic_hook(C_text("possibly invalid string index")); #endif unsigned char *s = (unsigned char *)buf; int len = lengths[s[0] >> 3]; /* Compute the pointer to the next character early so that the next * iteration can start working on the next character. Neither Clang * nor GCC figure out this reordering on their own. */ unsigned char *next = s + len + !len; /* Assume a four-byte character and load four bytes. Unused bits are * shifted out. */ *c = (C_u32)(s[0] & masks[len]) << 18; *c |= (C_u32)(s[1] & 0x3f) << 12; *c |= (C_u32)(s[2] & 0x3f) << 6; *c |= (C_u32)(s[3] & 0x3f) << 0; *c >>= shiftc[len]; /* Accumulate the various error conditions. */ *e = (*c < mins[len]) << 6; // non-canonical encoding *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? *e |= (*c > 0x10FFFF) << 8; // out of range? *e |= (s[1] & 0xc0) >> 2; *e |= (s[2] & 0xc0) >> 4; *e |= (s[3] ) >> 6; *e ^= 0x2a; // top two bits of each tail byte correct? *e >>= shifte[len]; /* now make all that optimization pointless... */ if(*e) { *c = 0xdc00 | *s; return (C_char *)s + 1; } return (C_char *)next; } /* */ static C_char *utf8_encode(C_u32 u, C_char *p1) { unsigned char *p = (unsigned char *)p1; if(u < 0x80) *(p++) = u; else if((u & 0xff00) == 0xdc00) { *(p++) = u & 0xff; } else if(u < 0x800) { *(p++) = (u >> 6) | 0xC0; *(p++) = (u & 0x3F) | 0x80; } else if(u < 0x10000) { *(p++) = (u >> 12) | 0xE0; *(p++) = ((u >> 6) & 0x3F) | 0x80; *(p++) = (u & 0x3F) | 0x80; } else if(u < 0x110000) { *(p++) = (u >> 18) | 0xF0; *(p++) = ((u >> 12) & 0x3F) | 0x80; *(p++) = ((u >> 6) & 0x3F) | 0x80; *(p++) = (u & 0x3F) | 0x80; } return (C_char *)p; } static C_char *utf_index1(C_word s, C_word i) { C_word i0 = C_unfix(C_block_item(s, 2)); C_word count = C_unfix(C_block_item(s, 1)); C_word off = 0, index = 0; C_char *p1, *p = C_c_string(C_block_item(s, 0)); int e; C_u32 c; if(i >= i0) { p += off = C_unfix(C_block_item(s, 3)); index = i0; } while(index <= count) { if(index == i) { C_set_block_item(s, 2, C_fix(index)); C_set_block_item(s, 3, C_fix(off)); return p; } p1 = p; p = utf8_decode(p, &c, &e); ++index; off += p - p1; } return NULL; } static C_char *utf_index(C_word s, C_word i) { C_word bv = C_block_item(s, 0); if(i == 0) { C_set_block_item(s, 2, C_fix(0)); C_set_block_item(s, 3, C_fix(0)); return C_c_string(bv); } else if(C_header_size(bv) - 1 == C_unfix(C_block_item(s, 1))) { /* len == codepoints */ C_set_block_item(s, 2, C_fix(i)); C_set_block_item(s, 3, C_fix(i)); return C_c_string(bv) + i; } return utf_index1(s, i); } C_regparm C_word C_utf_subchar(C_word s, C_word i) { C_char *p = utf_index(s, C_unfix(i)); int e; C_u32 c; utf8_decode(p, &c, &e); return C_make_character(c); } C_regparm C_word C_utf_setsubchar(C_word s, C_word i, C_word c) { C_char buf[ 4 ]; C_char *p = utf8_encode(C_character_code(c), buf); int e; C_u32 old; C_char *p1 = utf_index(s, C_unfix(i)); C_char *p2 = utf8_decode(p1, &old, &e); int nl = p - buf, ol = p2 - p1; C_word bv = C_block_item(s, 0); C_word bvlen = C_header_size(bv) - 1; int prefix = C_unfix(C_block_item(s, 3)); /* offset */ int suffix = bvlen - prefix - ol; if(nl > ol) { int tl = bvlen + nl - ol; if(C_in_scratchspacep(bv)) C_mutate_scratch_slot(NULL, bv); C_word bvn = C_scratch_alloc(C_SIZEOF_BYTEVECTOR(tl + 1)); C_block_header_init(bvn, C_make_header(C_BYTEVECTOR_TYPE, tl + 1)); if(prefix) C_memcpy(C_c_string(bvn), C_c_string(bv), prefix); C_memcpy((C_char *)C_data_pointer(bvn) + prefix, buf, nl); C_memcpy((C_char *)C_data_pointer(bvn) + prefix + nl, (C_char *)C_data_pointer(bv) + prefix + ol, suffix + 1); /* include 0 byte */ C_mutate_slot(&C_block_item(s, 0), bvn); C_mutate_scratch_slot(&C_block_item(s, 0), bvn); } else if(nl < ol) { C_memcpy(p1, buf, nl); C_memmove(p1 + nl, p1 + ol, suffix + 1); /* include 0 byte */ C_block_header_init(bv, C_make_header(C_BYTEVECTOR_TYPE, bvlen - (ol - nl) + 1)); } else C_memcpy(p1, buf, nl); return C_SCHEME_UNDEFINED; } /* copy c bytes of bv into s at position i, occupying len characters */ C_regparm C_word C_utf_overwrite(C_word s, C_word i, C_word len, C_word bv, C_word c) { C_word bvs = C_block_item(s, 0); C_word bvlen = C_header_size(bvs) - 1; C_char *p1 = utf_index(s, C_unfix(i)); C_char *p2 = utf_index(s, C_unfix(i) + C_unfix(len)); int count = C_unfix(c); int d = p2 - p1; int prefix = p1 - (C_char *)C_data_pointer(bvs); int suffix = bvlen - prefix - d; if(count > d) { int tl = bvlen + count - d; C_word bvn = C_scratch_alloc(C_SIZEOF_BYTEVECTOR(tl + 1)); if(C_in_scratchspacep(bvs)) C_mutate_scratch_slot(NULL, bvs); C_block_header_init(bvn, C_make_header(C_BYTEVECTOR_TYPE, tl + 1)); if(prefix) C_memcpy(C_c_string(bvn), C_data_pointer(bvs), prefix); C_memcpy((C_char *)C_data_pointer(bvn) + prefix, (C_char *)C_data_pointer(bv), count); C_memcpy((C_char *)C_data_pointer(bvn) + prefix + count, p2, suffix + 1); /* include 0 byte */ C_mutate_slot(&C_block_item(s, 0), bvn); C_mutate_scratch_slot(&C_block_item(s, 0), bvn); } else if(count < d && count) { C_memcpy(p1, C_data_pointer(bv), count); C_memmove(p1 + count, p2, suffix + 1); /* include 0 byte */ C_block_header_init(bvs, C_make_header(C_BYTEVECTOR_TYPE, bvlen - (d - count) + 1)); } else if(count) C_memcpy(p1, C_data_pointer(bv), count); return C_SCHEME_UNDEFINED; } C_regparm C_word C_utf_compare(C_word s1, C_word s2, C_word start1, C_word start2, C_word len) { C_char *p1 = utf_index(s1, C_unfix(start1)); C_char *p2 = utf_index(s2, C_unfix(start2)); int e, n = C_unfix(len); while(n--) { C_u32 c1, c2; p1 = utf8_decode(p1, &c1, &e); p2 = utf8_decode(p2, &c2, &e); if(c1 != c2) return C_fix((C_word)c1 - (C_word)c2); } return C_fix(0); } C_regparm C_word C_utf_compare_ci(C_word s1, C_word s2, C_word start1, C_word start2, C_word len) { C_char *p1 = utf_index(s1, C_unfix(start1)); C_char *p2 = utf_index(s2, C_unfix(start2)); int e, n = C_unfix(len); while(n--) { C_u32 c1, c2; int *m, r1, r2, i; p1 = utf8_decode(p1, &c1, &e); p2 = utf8_decode(p2, &c2, &e); if(c1 >= 'A' && c1 <= 'Z') r1 = c1 + 32; else r1 = c1; if(c2 >= 'A' && c2 <= 'Z') r2 = c2 + 32; else r2 = c2; if(r1 == r2) continue; if(r1 < 128 || r2 < 128) goto fail; m = bsearch(&r1, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp); if(m) { for(i = 1; i < 3; ++i) { if(m[ i ] == 0) break; if(m[ i ] != c2) return C_fix(m[ i ] - c2); if(i != 2 && m[ i + 1 ] != 0) p2 = utf8_decode(p2, &c2, &e); } } else { m = bsearch(&r1, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp); if(m) { if(m[ 1 ] != c2) return C_fix(m[ 1 ] - c2); } } m = bsearch(&r2, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp); if(m) { for(i = 1; i < 3; ++i) { if(m[ i ] == 0) break; if(c1 != m[ i ]) return C_fix(c1 - m[ i ]); if(i != 2 && m[ i + 1 ]) p1 = utf8_decode(p1, &c1, &e); } } else { m = bsearch(&r2, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp); if(m) { if(c1 != m[ 1 ]) return C_fix(c1 - m[ 1 ]); } } continue; fail: return C_fix(r1 - r2); } return C_fix(0); } /* XXX inline this? */ C_regparm C_word C_utf_equal(C_word s1, C_word s2) { C_word b1 = C_block_item(s1, 0); C_word b2 = C_block_item(s2, 0); int n1 = C_header_size(b1); int n2 = C_header_size(b2); if(n1 != n2) return C_SCHEME_FALSE; return C_mk_bool(C_memcmp(C_c_string(b1), C_c_string(b2), n1) == 0); } /* XXX inline this? */ C_regparm C_word C_utf_equal_ci(C_word s1, C_word s2) { C_word n1 = C_block_item(s1, 1); if(n1 != C_block_item(s2, 1)) return C_SCHEME_FALSE; return C_mk_bool(C_utf_compare_ci(s1, s2, C_fix(0), C_fix(0), n1) == C_fix(0)); } C_regparm C_word C_utf_copy(C_word from, C_word to, C_word start1, C_word end1, C_word start2) { C_char *p1 = utf_index(from, C_unfix(start1)); C_char *p2 = utf_index(to, C_unfix(start2)); C_char *p3 = utf_index(from, C_unfix(end1)); C_memcpy(p2, p1, p3 - p1); return C_SCHEME_UNDEFINED; } /* compute byte-index from char-index */ C_regparm C_word C_utf_position(C_word str, C_word index) { C_char *p1 = utf_index(str, C_unfix(index)); return C_fix(p1 - C_c_string(C_block_item(str, 0))); } /* compute char-index from byte-index (slow, uncached) */ C_regparm int C_utf_char_position(C_word bv, int pos) { int p = 0; C_u32 c; int e; C_char *ptr = C_c_string(bv), *ptr2; while(pos > 0) { ptr2 = utf8_decode(ptr, &c, &e); pos -= ptr2 - ptr; ptr = ptr2; ++p; } return p; } /* compute byte-offset between two char-indices */ C_regparm C_word C_utf_range(C_word str, C_word start, C_word end) { C_char *p1 = utf_index(str, C_unfix(start)); C_char *p2 = utf_index(str, C_unfix(end)); return C_fix(p2 - p1); } /* Count characters - slow variant, handles invalid sequences */ C_regparm int C_utf_count(C_char *s, int len) { int i = 0; C_u32 c; int e; C_char *s2; while (len > 0) { s2 = utf8_decode(s, &c, &e); len -= (s2 - s); s = s2; i++; } return i; } /* Count characters - slow variant, detects invalid sequences */ C_regparm C_word C_utf_validate(C_word bv, C_word blen) { int i = 0; C_u32 c; int e; C_char *s = C_c_string(bv), *s2; int len = C_unfix(blen); while (len > 0) { s2 = utf8_decode(s, &c, &e); if(e) return C_SCHEME_FALSE; len -= (s2 - s); s = s2; i++; } return C_fix(i); } /* count characters, fast, unsafe variant http://canonical.org/~kragen/strlen-utf8.html */ C_regparm int C_utf_fast_count(C_char *s, int len) { int i = 0, j = 0; while (len--) { if ((s[i] & 0xc0) != 0x80) j++; i++; } return j; } C_regparm C_word C_utf_bytes(C_word chr) { int e; char buf[ 5 ]; C_char *p1 = utf8_encode(C_character_code(chr), buf); return C_fix(p1 - buf); } C_regparm C_char * C_utf_encode(C_char *str, int chr) { return utf8_encode(chr, str); } C_regparm C_word C_utf_decode(C_word bv, C_word pos) { C_u32 c; int e; utf8_decode(C_c_string(bv) + C_unfix(pos), &c, &e); return C_make_character(c); } C_regparm C_word C_utf_decode_ptr(C_char *bv) { C_u32 c; int e; utf8_decode(bv, &c, &e); return C_make_character(c); } C_regparm C_word C_utf_advance(C_word bv, C_word pos) { C_char *p1 = (C_char *)C_data_pointer(bv) + C_unfix(pos); C_u32 c; int e; C_char *p2 = utf8_decode(p1, &c, &e); return C_fix(C_unfix(pos) + p2 - p1); } C_regparm C_word C_utf_insert(C_word bv, C_word pos, C_word c) { C_char *p1 = C_c_string(bv) + C_unfix(pos); C_char *p2 = utf8_encode(C_character_code(c), p1); return C_fix(C_unfix(pos) + p2 - p1); } C_regparm C_word C_utf_fill(C_word bv, C_word chr) { char buf[ 5 ]; int size = C_header_size(bv) - 1; int len = C_utf_encode(buf, C_character_code(chr)) - buf; C_char *p; int n; if(len == 1) { C_memset(C_data_pointer(bv), *buf, size); return bv; } p = C_data_pointer(bv); n = size / len; while(n--) { C_memcpy(p, buf, len); p += len; } ((C_char *)C_data_pointer(bv))[ size ] = 0; /* terminating zero */ return bv; } C_regparm int C_utf_expect(int byte) { int len = lengths[ byte >> 3 ]; return len + !len; } /* take bytevector section and compute full + incomplete codepoints */ C_regparm C_word C_utf_fragment_counts(C_word bv, C_word pos, C_word len) { int full = 0; C_uchar *ptr = C_data_pointer(bv) + C_unfix(pos); int count = C_unfix(len); while(count) { unsigned int byte = *(ptr++); int n = lengths[ byte >> 3 ]; int bn = n + !n; if(count >= bn) { ++full; count -= bn; ptr += bn - 1; } else return C_fix((full << 4) | (bn - count)); } return C_fix(full << 4); } C_regparm void C_utf_putc(int chr, C_FILEPTR fp) { C_char buf[ 5 ]; C_char *p = utf8_encode(chr, buf); *p = '\0'; C_fputs(buf, fp); } C_regparm C_word C_utf_list_size(C_word lst) { int n = 0; while(!C_immediatep(lst) && C_header_bits(lst) == C_PAIR_TYPE) { C_word x = C_block_item(lst, 0); if(((x) & C_IMMEDIATE_TYPE_BITS) == C_CHARACTER_BITS) n += C_unfix(C_utf_bytes(x)); lst = C_block_item(lst, 1); } return C_fix(n); } C_regparm C_word C_latin_to_utf(C_word from, C_word to, C_word start, C_word len) { int n = C_unfix(len); C_uchar *pf = (C_uchar *)C_c_string(from) + C_unfix(start); C_char *pt = C_c_string(to), *pt0 = pt; while(n-- > 0) { C_u32 c = *(pf++); pt = utf8_encode(c, pt); } return C_fix(pt - pt0); } C_regparm C_word C_utf_to_latin(C_word from, C_word to, C_word start, C_word blen) { int n = C_unfix(blen); C_char *pf = C_c_string(from) + C_unfix(start), *pf2; C_char *pt = C_c_string(to), *pt0 = pt; C_u32 c; int e; while(n > 0) { pf2 = utf8_decode(pf, &c, &e); n -= pf2 - pf; pf = pf2; *(pt++) = c & 0xff; } *pt = '\0'; return C_fix(pt - pt0); } C_regparm C_word C_utf_char_foldcase(C_word c) { int r = C_character_code(c); int *m = bsearch(&r, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp); if(m) return C_make_character(m[ 1 ]); return c; } C_regparm C_word C_utf_string_foldcase(C_word from, C_word to, C_word len) { C_u32 c; int e; C_char *pf = C_c_string(from), *pf2; C_char *pt = C_c_string(to), *pt0 = pt; int count = C_unfix(len); while(count > 0) { pf2 = utf8_decode(pf, &c, &e); if(!e) { int r = c; int *m = bsearch(&r, fold2, nelem(fold2), sizeof(*fold2), &runemapcmp); if(m) { pt = utf8_encode(m[ 1 ], pt); if(m[ 3 ] != 0) { pt = utf8_encode(m[ 2 ], pt); c = m[ 3 ]; } else c = m[ 2 ]; } else { m = bsearch(&r, fold1, nelem(fold1), sizeof(*fold1), &runemapcmp); if(m) c = m[ 1 ]; } } pt = utf8_encode(c, pt); count -= pf2 - pf; pf = pf2; } return C_fix(pt - pt0); } #if defined(_WIN32) && !defined(__CYGWIN__) #define C_WCHAR_FNBUF_SIZE 2048 static C_WCHAR fnbuf[ C_WCHAR_FNBUF_SIZE ], *pfnbuf; C_regparm C_WCHAR *C_utf16(C_word bv, int cont) { int len = C_header_size(bv) - 1; C_WCHAR *p; if(!cont) pfnbuf = fnbuf; p = pfnbuf; int n = MultiByteToWideChar(CP_UTF8, 0, C_c_string(bv), -1, pfnbuf, C_WCHAR_FNBUF_SIZE - (pfnbuf - fnbuf)); if(n == 0) C_decoding_error(bv, C_fix(0)); pfnbuf += n; return p; } C_regparm C_char *C_utf8(C_WCHAR *str) { int n = WideCharToMultiByte(CP_UTF8, 0, str, -1, (C_char *)fnbuf, C_WCHAR_FNBUF_SIZE, NULL, NULL); if(n == 0) C_decoding_error(C_SCHEME_UNDEFINED, C_fix(0)); ((C_char *)fnbuf)[ n ] = '\0'; return (C_char *)fnbuf; } #endif