* Added unicode-char-cases file to the unicode directory that

contains char-downcase, char-upcase, char-titlecase, and
  char-foldcase tables.
This commit is contained in:
Abdulaziz Ghuloum 2007-06-17 15:04:01 +03:00
parent 1750aba832
commit e28c1a6de7
5 changed files with 367 additions and 53 deletions

View File

@ -0,0 +1,16 @@
Fields:
Name
General Category
Canoninal Combining Class
Bidi Class
Decomposition type/mapping
Numeric type/value
Bidi Mirrored
Unicode 1 name
Iso Comment
Simple Uppercase Mapping (field 12)
Simple Lowercase Mapping (field 13)
Simple Titlecase Mapping (field 14)

77
src/unicode/extract-cases.ss Executable file
View File

@ -0,0 +1,77 @@
#!/usr/bin/env ikarus --r6rs-script
(import
(ikarus)
(unicode-data))
(define (hex->num x)
(read (open-input-string (format "#x~a" x))))
(define data-case
(lambda (fields)
(let ([num (car fields)]
[uc (list-ref fields uc-index)]
[lc (list-ref fields lc-index)]
[tc (list-ref fields tc-index)])
(let ([n (hex->num num)])
(define (f x)
(if (string=? x "") 0 (- (hex->num x) n)))
(cons n (vector (f uc) (f lc) (f tc) #f))))))
(define (remove-dups ls)
(let f ([ls ls] [last #f])
(cond
[(null? ls) '()]
[(equal? (cdar ls) last) (f (cdr ls) last)]
[else
(cons (car ls) (f (cdr ls) (cdar ls)))])))
(define (compute-foldcase ls)
(define (find-vec idx)
(let f ([ls ls])
(cond
[(null? ls) (error 'find-vec "cannot find ~s" idx)]
[(= (caar ls) idx) (cdar ls)]
[else (f (cdr ls))])))
(let ([v (list->vector (map cdr ls))])
(define (upper i)
(+ i (vector-ref (find-vec i) 0)))
(define (lower i)
(+ i (vector-ref (find-vec i) 1)))
(define (set-folder! i j)
(vector-set! (find-vec i) 3 (- j i)))
(for-each
(lambda (x)
(let ([idx (car x)] [vec (cdr x)])
(vector-set! vec 3
(- (lower (upper idx)) idx))))
ls))
ls)
(define uc-index 12)
(define lc-index 13)
(define tc-index 14)
(let ([ls
(remove-dups
(compute-foldcase
(map data-case
(get-unicode-data))))])
(define (p name idx)
(pretty-print
`(define ,name
',(list->vector (map (lambda (x) (vector-ref (cdr x) idx)) ls)))))
(let ([v0 (list->vector (map car ls))])
(with-output-to-file "unicode-char-cases.ss"
(lambda ()
(printf ";;; DO NOT EDIT\n;;; automatically generated\n")
(printf ";;; ~s entries in table\n" (vector-length v0))
(pretty-print `(define charcase-search-vector ',v0))
(p 'char-upcase-adjustment-vector 0)
(p 'char-downcase-adjustment-vector 1)
(p 'char-titlecase-adjustment-vector 2)
(p 'char-foldcase-adjustment-vector 3))
'replace)))
(printf "Happy Happy Joy Joy\n")

View File

@ -1,55 +1,10 @@
#!/usr/bin/env ikarus --r6rs-script
(import (ikarus))
(import
(ikarus)
(unicode-data))
(define (read-line)
(let f ([ac '()])
(let ([x (read-char)])
(cond
[(eof-object? x)
(if (null? ac)
(eof-object)
(list->string (reverse ac)))]
[(char=? x #\newline)
(if (null? ac) (f) (list->string (reverse ac)))]
[else (f (cons x ac))]))))
(define (find-semi str i n)
(cond
[(or (fx= i n)
(char=? (string-ref str i) #\;)) i]
[else (find-semi str (+ i 1) n)]))
(define (split str)
(let f ([i 0] [n (string-length str)])
(cond
[(= i n) '()]
[else
(let ([j (find-semi str i n)])
(cond
[(= j n) (list (substring str i j))]
[else
(cons (substring str i j)
(f (+ j 1) n))]))])))
(define (extract-uni-data)
(let f ([ls '()])
(let ([line (read-line)])
(cond
[(eof-object? line)
(reverse ls)]
[else
(let ([fields (split line)])
(let ([num (car fields)]
[cat (caddr fields)])
(f (cons
(cons
(read
(open-input-string (format "#x~a" num)))
(string->symbol cat))
ls))))]))))
(define (codes-in-cats ls cats)
(let f ([ls ls] [ac '()])
(cond
@ -78,7 +33,6 @@
(cons i (f (+ i 1) #f ls))
(f (+ i 1) #f ls))]))))
(define (odd? n) (= (fxlogand n 1) 1))
(define (search-on? n v)
(let ([k (- (vector-length v) 1)])
@ -105,12 +59,16 @@
(f (+ i 1) ls)))))
(define (cat fields)
(let ([num (car fields)]
[cat (caddr fields)])
(cons
(read (open-input-string (format "#x~a" num)))
(string->symbol cat))))
(let ([ls
(with-input-from-file
"UNIDATA/UnicodeData.txt"
extract-uni-data)])
(let ([ls (map cat (get-unicode-data))])
(let ([wanted
(codes-in-cats ls
'(Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pd Pc Po Sc Sm Sk So Co))])

View File

@ -0,0 +1,213 @@
;;; DO NOT EDIT
;;; automatically generated
;;; 1080 entries in table
(define charcase-search-vector
'#(0 65 91 97 123 181 182 192 215 216 223 224 247 248 255 256 257 258 259 260
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
414 415 416 417 418 419 420 421 422 423 424 425 426 428 429 430 431 432 433
435 436 437 438 439 440 441 442 444 445 446 447 448 452 453 454 455 456 457
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
553 554 555 556 557 558 559 560 561 562 563 564 570 571 572 573 574 575 577
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 595 596 597 598
600 601 602 603 604 608 609 611 612 616 617 618 619 620 623 624 626 627 629
630 637 638 640 641 643 644 648 649 650 652 653 658 659 837 838 891 894 902
903 904 908 910 912 913 940 941 944 945 962 963 972 973 976 977 978 981 982
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
1016 1017 1018 1019 1020 1021 1024 1040 1072 1104 1120 1121 1122 1123 1124
1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
1297 1298 1299 1329 1369 1377 1415 4256 4304 7549 7550 7680 7681 7682 7683
7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698
7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713
7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728
7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743
7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758
7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773
7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788
7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803
7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818
7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7835 7840 7841
7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856
7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871
7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886
7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901
7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916
7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7936 7944
7952 7960 7968 7976 7984 7992 8000 8008 8016 8017 8018 8019 8020 8021 8022
8023 8025 8032 8040 8048 8050 8054 8056 8058 8060 8064 8072 8080 8088 8096
8104 8112 8114 8115 8116 8120 8122 8124 8125 8126 8127 8131 8132 8136 8140
8141 8144 8146 8152 8154 8157 8160 8162 8165 8166 8168 8170 8172 8173 8179
8180 8184 8186 8188 8189 8486 8487 8490 8491 8492 8498 8499 8526 8531 8544
8560 8576 8579 8580 8592 9398 9424 9450 11264 11312 11360 11361 11362 11363
11364 11365 11366 11367 11368 11369 11370 11371 11372 11380 11381 11382
11383 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402
11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413 11414
11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425 11426
11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438
11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449 11450
11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461 11462
11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473 11474
11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486
11487 11488 11489 11490 11491 11492 11520 11568 65313 65339 65345 65371
66560 66600 66640))
(define char-upcase-adjustment-vector
'#(0 0 0 -32 0 743 0 0 0 0 0 -32 0 -32 121 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -232 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 -300 195 0 0 -1 0
-1 0 0 -1 0 0 -1 0 0 0 0 0 -1 0 0 97 0 0 0 -1 163 0 0 0 130 0 0 -1 0 -1 0
-1 0 0 -1 0 0 0 -1 0 0 -1 0 0 -1 0 -1 0 0 -1 0 0 -1 0 56 0 0 -1 -2 0 -1 -2
0 -1 -2 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -79 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 0 -1 -2 0 -1 0 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 -1 0 0 0 0 -1 0 0 0 0 -1 0
-1 0 -1 0 -1 0 -1 0 -210 -206 0 -205 0 -202 0 -203 0 -205 0 -207 0 -209
-211 0 10743 0 -211 0 -213 0 -214 0 10727 0 -218 0 -218 0 -218 -69 -217 -71
0 -219 0 84 0 130 0 0 0 0 0 0 0 0 -38 -37 0 -32 -31 -32 -64 -63 -62 -57 0
-47 -54 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -86
-80 7 0 0 -96 0 0 -1 0 0 -1 0 0 0 0 -32 -80 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 -15 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -48 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -59 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0
8 0 8 0 8 0 8 0 8 0 74 86 100 128 112 126 8 0 8 0 8 0 8 0 9 0 0 0 0 0 -7205
0 9 0 0 0 0 8 0 0 0 0 8 0 7 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16
0 0 -1 0 0 -26 0 0 -48 0 -1 0 0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -7264 0 0 0 -32 0 0 -40 0))
(define char-downcase-adjustment-vector
'#(0 32 0 0 0 0 0 32 0 32 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -199 0 1 0 1 0 1 0 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -121 1 0 1 0 1 0 0 0 210
1 0 1 0 206 1 0 205 1 0 0 79 202 203 1 0 205 207 0 211 209 1 0 0 0 211 213
0 214 1 0 1 0 1 0 218 1 0 218 0 1 0 218 1 0 217 1 0 1 0 219 1 0 0 1 0 0 0 0
2 1 0 2 1 0 2 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 0 2 1 0 1 0 -97 -56 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -130 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 0 10795 1 0 -163 10792 0 1 0 -195 69 71 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 38
0 37 64 63 0 32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 0 0 0 0 -60 0 0 1 0 -7 1 0 0 -130 80 32 0 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 7264 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0 -8 0 0 0 0 0 0 0 -8 0 -8 0 -8
0 0 0 0 -8 -74 -9 0 0 0 0 0 -86 -9 0 0 0 -8 -100 0 0 0 0 0 -8 -112 -7 0 0 0
-128 -126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0
-10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 0 0 0 32 0 0 0 40 0 0))
(define char-titlecase-adjustment-vector
'#(0 0 0 -32 0 743 0 0 0 0 0 -32 0 -32 121 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -232 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 -300 195 0 0 -1 0
-1 0 0 -1 0 0 -1 0 0 0 0 0 -1 0 0 97 0 0 0 -1 163 0 0 0 130 0 0 -1 0 -1 0
-1 0 0 -1 0 0 0 -1 0 0 -1 0 0 -1 0 -1 0 0 -1 0 0 -1 0 56 0 1 0 -1 1 0 -1 1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -79 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 1 0 -1 0 -1 0 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 -1 0 0 0 0 -1 0 0 0 0 -1 0 -1 0 -1 0
-1 0 -1 0 -210 -206 0 -205 0 -202 0 -203 0 -205 0 -207 0 -209 -211 0 10743
0 -211 0 -213 0 -214 0 10727 0 -218 0 -218 0 -218 -69 -217 -71 0 -219 0 84
0 130 0 0 0 0 0 0 0 0 -38 -37 0 -32 -31 -32 -64 -63 -62 -57 0 -47 -54 0 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -86 -80 7 0 0 -96
0 0 -1 0 0 -1 0 0 0 0 -32 -80 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -15 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 0 -48 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -59 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0 8 0 8 0 8 0 8 0
8 0 74 86 100 128 112 126 8 0 8 0 8 0 8 0 9 0 0 0 0 0 -7205 0 9 0 0 0 0 8 0
0 0 0 8 0 7 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16 0 0 -1 0 0 -26
0 0 -48 0 -1 0 0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
-1 0 -1 0 -7264 0 0 0 -32 0 0 -40 0))
(define char-foldcase-adjustment-vector
'#(0 32 0 0 0 775 0 32 0 32 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -199 -200 1 0 1 0 1
0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -121 1 0 1 0 1 0 -268
0 210 1 0 1 0 206 1 0 205 1 0 0 79 202 203 1 0 205 207 0 211 209 1 0 0 0
211 213 0 214 1 0 1 0 1 0 218 1 0 218 0 1 0 218 1 0 217 1 0 1 0 219 1 0 0 1
0 0 0 0 2 1 0 2 1 0 2 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 0 2 1 0 1 0 -97 -56 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -130 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 0 10795 1 0 -163 10792 0 1 0 -195 69 71 1 0 1 0 1 0 1 0 1 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 116
0 0 0 38 0 37 64 63 0 32 0 0 0 0 1 0 0 0 -30 -25 0 -15 -22 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -54 -48 0 0 -60 -64 0 1 0 -7 1 0 0 -130
80 32 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 7264 0 0 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 -58
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0
-8 0 0 0 0 0 0 0 -8 0 -8 0 -8 0 0 0 0 -8 -74 -9 0 -7173 0 0 0 -86 -9 0 0 0
-8 -100 0 0 0 0 0 -8 -112 -7 0 0 0 -128 -126 -9 0 -7517 0 -8383 -8262 0 28
0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0 -10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1
0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 32 0 0 0 40 0 0))

50
src/unicode/unicode-data.ss Executable file
View File

@ -0,0 +1,50 @@
(library (unicode-data)
(export get-unicode-data)
(import (ikarus))
(define (read-line)
(let f ([ac '()])
(let ([x (read-char)])
(cond
[(eof-object? x)
(if (null? ac)
(eof-object)
(list->string (reverse ac)))]
[(char=? x #\newline)
(if (null? ac) (f) (list->string (reverse ac)))]
[else (f (cons x ac))]))))
(define (find-semi str i n)
(cond
[(or (fx= i n)
(char=? (string-ref str i) #\;)) i]
[else (find-semi str (+ i 1) n)]))
(define (split str)
(let f ([i 0] [n (string-length str)])
(cond
[(= i n) '("")]
[else
(let ([j (find-semi str i n)])
(cond
[(= j n) (list (substring str i j))]
[else
(cons (substring str i j)
(f (+ j 1) n))]))])))
(define (extract-uni-data)
(let f ([ls '()])
(let ([line (read-line)])
(cond
[(eof-object? line)
(reverse ls)]
[else
(let ([fields (split line)])
(f (cons fields ls)))]))))
(define (get-unicode-data)
(with-input-from-file
"UNIDATA/UnicodeData.txt"
extract-uni-data)))