fixed a bug in parsing UnicodeData.txt (not accounting for <First>
and <Last> special tokens.
This commit is contained in:
parent
2adc9cb85d
commit
735803a312
scheme
|
@ -1 +1 @@
|
||||||
1740
|
1741
|
||||||
|
|
|
@ -211,6 +211,12 @@
|
||||||
[(equal? (cdar ls) (cdr st)) (f (+ i 1) st (cdr ls) ac)]
|
[(equal? (cdar ls) (cdr st)) (f (+ i 1) st (cdr ls) ac)]
|
||||||
[else (f 1 (car ls) (cdr ls) (cons (cons i st) ac))])))
|
[else (f 1 (car ls) (cdr ls) (cons (cons i st) ac))])))
|
||||||
|
|
||||||
|
(define (string-suffix? s1 s2)
|
||||||
|
(let ([n1 (string-length s1)] [n2 (string-length s2)])
|
||||||
|
(and (>= n1 n2) (string=? (substring s1 (- n1 n2) n1) s2))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
; create table, placing all in category Cn until proven otherwise
|
; create table, placing all in category Cn until proven otherwise
|
||||||
(let ([tbl (make-table (category/flags 'Cn))])
|
(let ([tbl (make-table (category/flags 'Cn))])
|
||||||
(define (setprop n prop) (table-set! tbl n prop))
|
(define (setprop n prop) (table-set! tbl n prop))
|
||||||
|
@ -222,28 +228,28 @@
|
||||||
;;; field8: if set, then the char has the numeric property
|
;;; field8: if set, then the char has the numeric property
|
||||||
;;; field12: if set, then the char has upper-case mapping and is thus cased
|
;;; field12: if set, then the char has upper-case mapping and is thus cased
|
||||||
;;; field13: if set, then the char has lower-case mapping and is thus cased
|
;;; field13: if set, then the char has lower-case mapping and is thus cased
|
||||||
(let f ([ls (map (lambda (x)
|
(let f ([ls (get-unicode-data "UNIDATA/UnicodeData.txt")])
|
||||||
|
(unless (null? ls)
|
||||||
|
(let ([x (car ls)] [ls (cdr ls)])
|
||||||
(let ([n (hex->num (list-ref x 0))]
|
(let ([n (hex->num (list-ref x 0))]
|
||||||
[cclass (string->number (list-ref x 3))]
|
[cclass (string->number (list-ref x 3))]
|
||||||
[cat/flags (category/flags (string->symbol (list-ref x 2)))]
|
[cat/flags (category/flags (string->symbol (list-ref x 2)))]
|
||||||
[num (if (string=? (list-ref x 8) "") 0 numeric-property)]
|
[num (if (string=? (list-ref x 8) "") 0 numeric-property)]
|
||||||
[cased (if (and (string=? (list-ref x 12) "") (string=? (list-ref x 13) ""))
|
[cased (if (and (string=? (list-ref x 12) "") (string=? (list-ref x 13) ""))
|
||||||
0 cased-property)])
|
0 cased-property)])
|
||||||
(cons n (fxior num cased
|
(let ([props (fxior num cased
|
||||||
(fxsll cclass combining-class-shift)
|
(fxsll cclass combining-class-shift)
|
||||||
cat/flags))))
|
cat/flags)])
|
||||||
(get-unicode-data "UNIDATA/UnicodeData.txt"))])
|
(if (string-suffix? (list-ref x 1) "First>")
|
||||||
(unless (null? ls)
|
(let ([y (car ls)] [ls (cdr ls)])
|
||||||
(cond
|
(unless (string-suffix? (list-ref y 1) "Last>")
|
||||||
[(null? (cdr ls)) (setprop (caar ls) (cdar ls))]
|
(error #f "expected entry marked Last following entry marked First for ~x" n))
|
||||||
[(or (= (+ 1 (caar ls)) (caadr ls))
|
(let ([m (hex->num (list-ref y 0))])
|
||||||
(not (= (cdar ls) (cdadr ls))))
|
(do ([n n (fx+ n 1)])
|
||||||
(setprop (caar ls) (cdar ls))
|
((fx> n m))
|
||||||
(f (cdr ls))]
|
(setprop n props)))
|
||||||
[else
|
(f ls))
|
||||||
(let f ([n (caar ls)] [j (caadr ls)] [p (cdar ls)])
|
(begin (setprop n props) (f ls))))))))
|
||||||
(unless (> n j) (setprop n p) (f (+ n 1) j p)))
|
|
||||||
(f (cddr ls))])))
|
|
||||||
;;; interesting parts of each element in WordBreakProperty.txt are:
|
;;; interesting parts of each element in WordBreakProperty.txt are:
|
||||||
;;; field0: the character index, numeric
|
;;; field0: the character index, numeric
|
||||||
;;; field1: the word-break property
|
;;; field1: the word-break property
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue