* Added unicode-char-cases file to the unicode directory that
contains char-downcase, char-upcase, char-titlecase, and char-foldcase tables.
This commit is contained in:
parent
1750aba832
commit
e28c1a6de7
|
@ -0,0 +1,16 @@
|
|||
|
||||
Fields:
|
||||
|
||||
Name
|
||||
General Category
|
||||
Canoninal Combining Class
|
||||
Bidi Class
|
||||
Decomposition type/mapping
|
||||
Numeric type/value
|
||||
Bidi Mirrored
|
||||
Unicode 1 name
|
||||
Iso Comment
|
||||
Simple Uppercase Mapping (field 12)
|
||||
Simple Lowercase Mapping (field 13)
|
||||
Simple Titlecase Mapping (field 14)
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
#!/usr/bin/env ikarus --r6rs-script
|
||||
|
||||
(import
|
||||
(ikarus)
|
||||
(unicode-data))
|
||||
|
||||
(define (hex->num x)
|
||||
(read (open-input-string (format "#x~a" x))))
|
||||
|
||||
(define data-case
|
||||
(lambda (fields)
|
||||
(let ([num (car fields)]
|
||||
[uc (list-ref fields uc-index)]
|
||||
[lc (list-ref fields lc-index)]
|
||||
[tc (list-ref fields tc-index)])
|
||||
(let ([n (hex->num num)])
|
||||
(define (f x)
|
||||
(if (string=? x "") 0 (- (hex->num x) n)))
|
||||
(cons n (vector (f uc) (f lc) (f tc) #f))))))
|
||||
|
||||
(define (remove-dups ls)
|
||||
(let f ([ls ls] [last #f])
|
||||
(cond
|
||||
[(null? ls) '()]
|
||||
[(equal? (cdar ls) last) (f (cdr ls) last)]
|
||||
[else
|
||||
(cons (car ls) (f (cdr ls) (cdar ls)))])))
|
||||
|
||||
(define (compute-foldcase ls)
|
||||
(define (find-vec idx)
|
||||
(let f ([ls ls])
|
||||
(cond
|
||||
[(null? ls) (error 'find-vec "cannot find ~s" idx)]
|
||||
[(= (caar ls) idx) (cdar ls)]
|
||||
[else (f (cdr ls))])))
|
||||
(let ([v (list->vector (map cdr ls))])
|
||||
(define (upper i)
|
||||
(+ i (vector-ref (find-vec i) 0)))
|
||||
(define (lower i)
|
||||
(+ i (vector-ref (find-vec i) 1)))
|
||||
(define (set-folder! i j)
|
||||
(vector-set! (find-vec i) 3 (- j i)))
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(let ([idx (car x)] [vec (cdr x)])
|
||||
(vector-set! vec 3
|
||||
(- (lower (upper idx)) idx))))
|
||||
ls))
|
||||
ls)
|
||||
|
||||
(define uc-index 12)
|
||||
(define lc-index 13)
|
||||
(define tc-index 14)
|
||||
|
||||
(let ([ls
|
||||
(remove-dups
|
||||
(compute-foldcase
|
||||
(map data-case
|
||||
(get-unicode-data))))])
|
||||
(define (p name idx)
|
||||
(pretty-print
|
||||
`(define ,name
|
||||
',(list->vector (map (lambda (x) (vector-ref (cdr x) idx)) ls)))))
|
||||
(let ([v0 (list->vector (map car ls))])
|
||||
(with-output-to-file "unicode-char-cases.ss"
|
||||
(lambda ()
|
||||
(printf ";;; DO NOT EDIT\n;;; automatically generated\n")
|
||||
(printf ";;; ~s entries in table\n" (vector-length v0))
|
||||
(pretty-print `(define charcase-search-vector ',v0))
|
||||
(p 'char-upcase-adjustment-vector 0)
|
||||
(p 'char-downcase-adjustment-vector 1)
|
||||
(p 'char-titlecase-adjustment-vector 2)
|
||||
(p 'char-foldcase-adjustment-vector 3))
|
||||
'replace)))
|
||||
|
||||
|
||||
(printf "Happy Happy Joy Joy\n")
|
|
@ -1,55 +1,10 @@
|
|||
#!/usr/bin/env ikarus --r6rs-script
|
||||
|
||||
(import (ikarus))
|
||||
(import
|
||||
(ikarus)
|
||||
(unicode-data))
|
||||
|
||||
|
||||
(define (read-line)
|
||||
(let f ([ac '()])
|
||||
(let ([x (read-char)])
|
||||
(cond
|
||||
[(eof-object? x)
|
||||
(if (null? ac)
|
||||
(eof-object)
|
||||
(list->string (reverse ac)))]
|
||||
[(char=? x #\newline)
|
||||
(if (null? ac) (f) (list->string (reverse ac)))]
|
||||
[else (f (cons x ac))]))))
|
||||
|
||||
(define (find-semi str i n)
|
||||
(cond
|
||||
[(or (fx= i n)
|
||||
(char=? (string-ref str i) #\;)) i]
|
||||
[else (find-semi str (+ i 1) n)]))
|
||||
|
||||
(define (split str)
|
||||
(let f ([i 0] [n (string-length str)])
|
||||
(cond
|
||||
[(= i n) '()]
|
||||
[else
|
||||
(let ([j (find-semi str i n)])
|
||||
(cond
|
||||
[(= j n) (list (substring str i j))]
|
||||
[else
|
||||
(cons (substring str i j)
|
||||
(f (+ j 1) n))]))])))
|
||||
|
||||
(define (extract-uni-data)
|
||||
(let f ([ls '()])
|
||||
(let ([line (read-line)])
|
||||
(cond
|
||||
[(eof-object? line)
|
||||
(reverse ls)]
|
||||
[else
|
||||
(let ([fields (split line)])
|
||||
(let ([num (car fields)]
|
||||
[cat (caddr fields)])
|
||||
(f (cons
|
||||
(cons
|
||||
(read
|
||||
(open-input-string (format "#x~a" num)))
|
||||
(string->symbol cat))
|
||||
ls))))]))))
|
||||
|
||||
(define (codes-in-cats ls cats)
|
||||
(let f ([ls ls] [ac '()])
|
||||
(cond
|
||||
|
@ -78,7 +33,6 @@
|
|||
(cons i (f (+ i 1) #f ls))
|
||||
(f (+ i 1) #f ls))]))))
|
||||
|
||||
(define (odd? n) (= (fxlogand n 1) 1))
|
||||
|
||||
(define (search-on? n v)
|
||||
(let ([k (- (vector-length v) 1)])
|
||||
|
@ -105,12 +59,16 @@
|
|||
(f (+ i 1) ls)))))
|
||||
|
||||
|
||||
(define (cat fields)
|
||||
(let ([num (car fields)]
|
||||
[cat (caddr fields)])
|
||||
(cons
|
||||
(read (open-input-string (format "#x~a" num)))
|
||||
(string->symbol cat))))
|
||||
|
||||
|
||||
(let ([ls
|
||||
(with-input-from-file
|
||||
"UNIDATA/UnicodeData.txt"
|
||||
extract-uni-data)])
|
||||
|
||||
(let ([ls (map cat (get-unicode-data))])
|
||||
(let ([wanted
|
||||
(codes-in-cats ls
|
||||
'(Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pd Pc Po Sc Sm Sk So Co))])
|
||||
|
|
|
@ -0,0 +1,213 @@
|
|||
;;; DO NOT EDIT
|
||||
;;; automatically generated
|
||||
;;; 1080 entries in table
|
||||
(define charcase-search-vector
|
||||
'#(0 65 91 97 123 181 182 192 215 216 223 224 247 248 255 256 257 258 259 260
|
||||
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
|
||||
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
|
||||
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
|
||||
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
|
||||
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
|
||||
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
|
||||
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
|
||||
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
|
||||
414 415 416 417 418 419 420 421 422 423 424 425 426 428 429 430 431 432 433
|
||||
435 436 437 438 439 440 441 442 444 445 446 447 448 452 453 454 455 456 457
|
||||
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
|
||||
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
|
||||
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
|
||||
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
|
||||
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
|
||||
553 554 555 556 557 558 559 560 561 562 563 564 570 571 572 573 574 575 577
|
||||
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 595 596 597 598
|
||||
600 601 602 603 604 608 609 611 612 616 617 618 619 620 623 624 626 627 629
|
||||
630 637 638 640 641 643 644 648 649 650 652 653 658 659 837 838 891 894 902
|
||||
903 904 908 910 912 913 940 941 944 945 962 963 972 973 976 977 978 981 982
|
||||
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000
|
||||
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
|
||||
1016 1017 1018 1019 1020 1021 1024 1040 1072 1104 1120 1121 1122 1123 1124
|
||||
1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139
|
||||
1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154
|
||||
1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
|
||||
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
|
||||
1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
|
||||
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
|
||||
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
|
||||
1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
|
||||
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
|
||||
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281
|
||||
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
|
||||
1297 1298 1299 1329 1369 1377 1415 4256 4304 7549 7550 7680 7681 7682 7683
|
||||
7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698
|
||||
7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713
|
||||
7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728
|
||||
7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743
|
||||
7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758
|
||||
7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773
|
||||
7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788
|
||||
7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803
|
||||
7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818
|
||||
7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7835 7840 7841
|
||||
7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856
|
||||
7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871
|
||||
7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886
|
||||
7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901
|
||||
7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916
|
||||
7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7936 7944
|
||||
7952 7960 7968 7976 7984 7992 8000 8008 8016 8017 8018 8019 8020 8021 8022
|
||||
8023 8025 8032 8040 8048 8050 8054 8056 8058 8060 8064 8072 8080 8088 8096
|
||||
8104 8112 8114 8115 8116 8120 8122 8124 8125 8126 8127 8131 8132 8136 8140
|
||||
8141 8144 8146 8152 8154 8157 8160 8162 8165 8166 8168 8170 8172 8173 8179
|
||||
8180 8184 8186 8188 8189 8486 8487 8490 8491 8492 8498 8499 8526 8531 8544
|
||||
8560 8576 8579 8580 8592 9398 9424 9450 11264 11312 11360 11361 11362 11363
|
||||
11364 11365 11366 11367 11368 11369 11370 11371 11372 11380 11381 11382
|
||||
11383 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402
|
||||
11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413 11414
|
||||
11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425 11426
|
||||
11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438
|
||||
11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449 11450
|
||||
11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461 11462
|
||||
11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473 11474
|
||||
11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486
|
||||
11487 11488 11489 11490 11491 11492 11520 11568 65313 65339 65345 65371
|
||||
66560 66600 66640))
|
||||
(define char-upcase-adjustment-vector
|
||||
'#(0 0 0 -32 0 743 0 0 0 0 0 -32 0 -32 121 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -232 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 -300 195 0 0 -1 0
|
||||
-1 0 0 -1 0 0 -1 0 0 0 0 0 -1 0 0 97 0 0 0 -1 163 0 0 0 130 0 0 -1 0 -1 0
|
||||
-1 0 0 -1 0 0 0 -1 0 0 -1 0 0 -1 0 -1 0 0 -1 0 0 -1 0 56 0 0 -1 -2 0 -1 -2
|
||||
0 -1 -2 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -79 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 0 -1 -2 0 -1 0 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 -1 0 0 0 0 -1 0 0 0 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -210 -206 0 -205 0 -202 0 -203 0 -205 0 -207 0 -209
|
||||
-211 0 10743 0 -211 0 -213 0 -214 0 10727 0 -218 0 -218 0 -218 -69 -217 -71
|
||||
0 -219 0 84 0 130 0 0 0 0 0 0 0 0 -38 -37 0 -32 -31 -32 -64 -63 -62 -57 0
|
||||
-47 -54 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -86
|
||||
-80 7 0 0 -96 0 0 -1 0 0 -1 0 0 0 0 -32 -80 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 -15 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -48 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -59 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0
|
||||
8 0 8 0 8 0 8 0 8 0 74 86 100 128 112 126 8 0 8 0 8 0 8 0 9 0 0 0 0 0 -7205
|
||||
0 9 0 0 0 0 8 0 0 0 0 8 0 7 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16
|
||||
0 0 -1 0 0 -26 0 0 -48 0 -1 0 0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -7264 0 0 0 -32 0 0 -40 0))
|
||||
(define char-downcase-adjustment-vector
|
||||
'#(0 32 0 0 0 0 0 32 0 32 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -199 0 1 0 1 0 1 0 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -121 1 0 1 0 1 0 0 0 210
|
||||
1 0 1 0 206 1 0 205 1 0 0 79 202 203 1 0 205 207 0 211 209 1 0 0 0 211 213
|
||||
0 214 1 0 1 0 1 0 218 1 0 218 0 1 0 218 1 0 217 1 0 1 0 219 1 0 0 1 0 0 0 0
|
||||
2 1 0 2 1 0 2 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 0 2 1 0 1 0 -97 -56 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -130 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 0 10795 1 0 -163 10792 0 1 0 -195 69 71 1 0 1 0 1 0 1 0 1 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 38
|
||||
0 37 64 63 0 32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 0 0 0 0 -60 0 0 1 0 -7 1 0 0 -130 80 32 0 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 7264 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0 -8 0 0 0 0 0 0 0 -8 0 -8 0 -8
|
||||
0 0 0 0 -8 -74 -9 0 0 0 0 0 -86 -9 0 0 0 -8 -100 0 0 0 0 0 -8 -112 -7 0 0 0
|
||||
-128 -126 -9 0 -7517 0 -8383 -8262 0 28 0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0
|
||||
-10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 0 0 0 32 0 0 0 40 0 0))
|
||||
(define char-titlecase-adjustment-vector
|
||||
'#(0 0 0 -32 0 743 0 0 0 0 0 -32 0 -32 121 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -232 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 -300 195 0 0 -1 0
|
||||
-1 0 0 -1 0 0 -1 0 0 0 0 0 -1 0 0 97 0 0 0 -1 163 0 0 0 130 0 0 -1 0 -1 0
|
||||
-1 0 0 -1 0 0 0 -1 0 0 -1 0 0 -1 0 -1 0 0 -1 0 0 -1 0 56 0 1 0 -1 1 0 -1 1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -79 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 1 0 -1 0 -1 0 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 0 -1 0 0 0 0 -1 0 0 0 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -210 -206 0 -205 0 -202 0 -203 0 -205 0 -207 0 -209 -211 0 10743
|
||||
0 -211 0 -213 0 -214 0 10727 0 -218 0 -218 0 -218 -69 -217 -71 0 -219 0 84
|
||||
0 130 0 0 0 0 0 0 0 0 -38 -37 0 -32 -31 -32 -64 -63 -62 -57 0 -47 -54 0 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -86 -80 7 0 0 -96
|
||||
0 0 -1 0 0 -1 0 0 0 0 -32 -80 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1
|
||||
0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 -15 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 0 -48 0 0 0 3814 0 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -59 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 8 0 8 0 8 0 8 0 8 0 0 8 0 8 0 8 0 8 0
|
||||
8 0 74 86 100 128 112 126 8 0 8 0 8 0 8 0 9 0 0 0 0 0 -7205 0 9 0 0 0 0 8 0
|
||||
0 0 0 8 0 7 0 0 0 0 0 9 0 0 0 0 0 0 0 0 0 0 0 0 -28 0 0 -16 0 0 -1 0 0 -26
|
||||
0 0 -48 0 -1 0 0 0 -10795 -10792 0 -1 0 -1 0 -1 0 0 -1 0 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0 -1 0
|
||||
-1 0 -1 0 -7264 0 0 0 -32 0 0 -40 0))
|
||||
(define char-foldcase-adjustment-vector
|
||||
'#(0 32 0 0 0 775 0 32 0 32 0 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -199 -200 1 0 1 0 1
|
||||
0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -121 1 0 1 0 1 0 -268
|
||||
0 210 1 0 1 0 206 1 0 205 1 0 0 79 202 203 1 0 205 207 0 211 209 1 0 0 0
|
||||
211 213 0 214 1 0 1 0 1 0 218 1 0 218 0 1 0 218 1 0 217 1 0 1 0 219 1 0 0 1
|
||||
0 0 0 0 2 1 0 2 1 0 2 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 0 2 1 0 1 0 -97 -56 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -130 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 0 10795 1 0 -163 10792 0 1 0 -195 69 71 1 0 1 0 1 0 1 0 1 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 116
|
||||
0 0 0 38 0 37 64 63 0 32 0 0 0 0 1 0 0 0 -30 -25 0 -15 -22 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 -54 -48 0 0 -60 -64 0 1 0 -7 1 0 0 -130
|
||||
80 32 0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 15 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 48 0 0 0 7264 0 0 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
|
||||
0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 -58
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 -8 0 -8 0 -8 0 -8 0 -8 0 0 0 0 0 0 0 0 -8 0
|
||||
-8 0 0 0 0 0 0 0 -8 0 -8 0 -8 0 0 0 0 -8 -74 -9 0 -7173 0 0 0 -86 -9 0 0 0
|
||||
-8 -100 0 0 0 0 0 -8 -112 -7 0 0 0 -128 -126 -9 0 -7517 0 -8383 -8262 0 28
|
||||
0 0 0 16 0 0 1 0 0 26 0 0 48 0 1 0 -10743 -3814 -10727 0 0 1 0 1 0 1 0 0 1
|
||||
0 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
|
||||
1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 32 0 0 0 40 0 0))
|
|
@ -0,0 +1,50 @@
|
|||
|
||||
|
||||
(library (unicode-data)
|
||||
(export get-unicode-data)
|
||||
(import (ikarus))
|
||||
|
||||
(define (read-line)
|
||||
(let f ([ac '()])
|
||||
(let ([x (read-char)])
|
||||
(cond
|
||||
[(eof-object? x)
|
||||
(if (null? ac)
|
||||
(eof-object)
|
||||
(list->string (reverse ac)))]
|
||||
[(char=? x #\newline)
|
||||
(if (null? ac) (f) (list->string (reverse ac)))]
|
||||
[else (f (cons x ac))]))))
|
||||
|
||||
(define (find-semi str i n)
|
||||
(cond
|
||||
[(or (fx= i n)
|
||||
(char=? (string-ref str i) #\;)) i]
|
||||
[else (find-semi str (+ i 1) n)]))
|
||||
|
||||
(define (split str)
|
||||
(let f ([i 0] [n (string-length str)])
|
||||
(cond
|
||||
[(= i n) '("")]
|
||||
[else
|
||||
(let ([j (find-semi str i n)])
|
||||
(cond
|
||||
[(= j n) (list (substring str i j))]
|
||||
[else
|
||||
(cons (substring str i j)
|
||||
(f (+ j 1) n))]))])))
|
||||
|
||||
(define (extract-uni-data)
|
||||
(let f ([ls '()])
|
||||
(let ([line (read-line)])
|
||||
(cond
|
||||
[(eof-object? line)
|
||||
(reverse ls)]
|
||||
[else
|
||||
(let ([fields (split line)])
|
||||
(f (cons fields ls)))]))))
|
||||
|
||||
(define (get-unicode-data)
|
||||
(with-input-from-file
|
||||
"UNIDATA/UnicodeData.txt"
|
||||
extract-uni-data)))
|
Loading…
Reference in New Issue