2007-06-17 19:49:40 -04:00
|
|
|
#!/usr/bin/env ikarus --r6rs-script
|
2007-10-25 16:27:34 -04:00
|
|
|
;;; Ikarus Scheme -- A compiler for R6RS Scheme.
|
2008-01-29 00:34:34 -05:00
|
|
|
;;; Copyright (C) 2006,2007,2008 Abdulaziz Ghuloum
|
2007-10-25 16:27:34 -04:00
|
|
|
;;;
|
|
|
|
;;; This program is free software: you can redistribute it and/or modify
|
|
|
|
;;; it under the terms of the GNU General Public License version 3 as
|
|
|
|
;;; published by the Free Software Foundation.
|
|
|
|
;;;
|
|
|
|
;;; This program is distributed in the hope that it will be useful, but
|
|
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
;;; General Public License for more details.
|
|
|
|
;;;
|
|
|
|
;;; You should have received a copy of the GNU General Public License
|
|
|
|
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2007-06-17 19:49:40 -04:00
|
|
|
|
|
|
|
(import
|
|
|
|
(ikarus)
|
|
|
|
(unicode-data))
|
|
|
|
|
2007-10-25 16:27:34 -04:00
|
|
|
(define license
|
|
|
|
'";;; Ikarus Scheme -- A compiler for R6RS Scheme.
|
2008-01-29 00:34:34 -05:00
|
|
|
;;; Copyright (C) 2006,2007,2008 Abdulaziz Ghuloum
|
2007-10-25 16:27:34 -04:00
|
|
|
;;;
|
|
|
|
;;; This program is free software: you can redistribute it and/or modify
|
|
|
|
;;; it under the terms of the GNU General Public License version 3 as
|
|
|
|
;;; published by the Free Software Foundation.
|
|
|
|
;;;
|
|
|
|
;;; This program is distributed in the hope that it will be useful, but
|
|
|
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
;;; General Public License for more details.
|
|
|
|
;;;
|
|
|
|
;;; You should have received a copy of the GNU General Public License
|
|
|
|
;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
")
|
|
|
|
|
|
|
|
|
2007-06-17 19:49:40 -04:00
|
|
|
(define (hex->num x)
|
2008-08-09 10:12:22 -04:00
|
|
|
(read (open-string-input-port (format "#x~a" x))))
|
2007-06-17 19:49:40 -04:00
|
|
|
|
|
|
|
(define data-case
|
|
|
|
(lambda (fields)
|
|
|
|
(let ([num (car fields)]
|
|
|
|
[uc (list-ref fields uc-index)]
|
|
|
|
[lc (list-ref fields lc-index)]
|
|
|
|
[tc (list-ref fields tc-index)])
|
|
|
|
(let ([n (hex->num num)])
|
|
|
|
(define (f x)
|
|
|
|
(if (string=? x "") 0 (- (hex->num x) n)))
|
2007-09-02 22:37:24 -04:00
|
|
|
;#(UC LC TC FC string-FC)
|
2008-08-09 10:12:22 -04:00
|
|
|
(cons n (vector (f uc) (f lc) (f tc) #f 0 (f uc) (f lc) (f tc)))))))
|
2007-06-17 19:49:40 -04:00
|
|
|
|
|
|
|
(define (remove-dups ls)
|
|
|
|
(let f ([ls ls] [last #f])
|
|
|
|
(cond
|
|
|
|
[(null? ls) '()]
|
|
|
|
[(equal? (cdar ls) last) (f (cdr ls) last)]
|
|
|
|
[else
|
|
|
|
(cons (car ls) (f (cdr ls) (cdar ls)))])))
|
|
|
|
|
|
|
|
(define (compute-foldcase ls)
|
|
|
|
(define (find-vec idx)
|
|
|
|
(cond
|
|
|
|
[(assq idx ls) => cdr]
|
|
|
|
[else (error 'find-vec "~s is missing" idx)]))
|
|
|
|
(define (upper i)
|
|
|
|
(+ i (vector-ref (find-vec i) 0)))
|
|
|
|
(define (lower i)
|
|
|
|
(+ i (vector-ref (find-vec i) 1)))
|
|
|
|
(define (set-folder! i j)
|
|
|
|
(vector-set! (find-vec i) 3 (- j i)))
|
|
|
|
(for-each
|
|
|
|
(lambda (x)
|
|
|
|
(let ([idx (car x)] [vec (cdr x)])
|
|
|
|
(vector-set! vec 3
|
|
|
|
(- (lower (upper idx)) idx))))
|
|
|
|
ls)
|
|
|
|
(for-each
|
|
|
|
(lambda (idx)
|
|
|
|
(let ([vec (find-vec idx)])
|
|
|
|
(vector-set! vec 3 0)))
|
|
|
|
;; turkic chars
|
|
|
|
'(#x130 #x131))
|
|
|
|
ls)
|
|
|
|
|
|
|
|
(define uc-index 12)
|
|
|
|
(define lc-index 13)
|
|
|
|
(define tc-index 14)
|
|
|
|
|
|
|
|
|
|
|
|
(define (remove-spaces str)
|
|
|
|
(cond
|
|
|
|
[(= (string-length str) 0) str]
|
|
|
|
[(char=? (string-ref str 0) #\space)
|
|
|
|
(remove-spaces (substring str 1 (string-length str)))]
|
|
|
|
[else str]))
|
|
|
|
|
|
|
|
(define (split str)
|
|
|
|
(let f ([i 0] [n (string-length str)])
|
|
|
|
(cond
|
|
|
|
[(= i n) (list (substring str 0 n))]
|
|
|
|
[(char=? (string-ref str i) #\space)
|
|
|
|
(cons (substring str 0 i)
|
|
|
|
(split (substring str (+ i 1) n)))]
|
|
|
|
[else (f (add1 i) n)])))
|
|
|
|
|
|
|
|
(define (improperize ls)
|
|
|
|
(cond
|
|
|
|
[(null? (cdr ls)) (car ls)]
|
|
|
|
[else (cons (car ls) (improperize (cdr ls)))]))
|
|
|
|
|
|
|
|
(define (convert-full-fold-fields ls)
|
|
|
|
(cond
|
|
|
|
[(null? ls) '()]
|
|
|
|
[else
|
|
|
|
(let ([fields (car ls)])
|
|
|
|
(let ([cat (remove-spaces (cadr fields))])
|
|
|
|
(cond
|
|
|
|
[(member cat '("C" "F"))
|
|
|
|
(let ([n (hex->num (remove-spaces (car fields)))])
|
|
|
|
(let ([c* (map hex->num
|
|
|
|
(map remove-spaces
|
|
|
|
(split
|
|
|
|
(remove-spaces (caddr fields)))))])
|
|
|
|
(cons
|
|
|
|
(cons n
|
|
|
|
(if (= (length c*) 1)
|
|
|
|
(- (car c*) n)
|
|
|
|
(improperize (map integer->char c*))))
|
|
|
|
(convert-full-fold-fields (cdr ls)))))]
|
|
|
|
[else (convert-full-fold-fields (cdr ls))])))]))
|
|
|
|
|
2008-08-09 10:12:22 -04:00
|
|
|
(define-struct spcase (lc tc uc))
|
|
|
|
|
|
|
|
(define (convert-special-casing ls)
|
|
|
|
(cond
|
|
|
|
[(null? ls) '()]
|
|
|
|
[else
|
|
|
|
(let ([fields (car ls)])
|
|
|
|
(cond
|
|
|
|
[(or (<= (length fields) 4)
|
|
|
|
(= 0 (string-length (remove-spaces (list-ref fields 4)))))
|
|
|
|
(let ([n (hex->num (remove-spaces (car fields)))])
|
|
|
|
(define (field-data str)
|
|
|
|
(let ([c* (map hex->num
|
|
|
|
(map remove-spaces
|
|
|
|
(split (remove-spaces str))))])
|
|
|
|
(if (= (length c*) 1)
|
|
|
|
(- (car c*) n)
|
|
|
|
(improperize (map integer->char c*)))))
|
|
|
|
(cons
|
|
|
|
(cons n
|
|
|
|
(make-spcase
|
|
|
|
(field-data (list-ref fields 1))
|
|
|
|
(field-data (list-ref fields 2))
|
|
|
|
(field-data (list-ref fields 3))))
|
|
|
|
(convert-special-casing (cdr ls))))]
|
|
|
|
[else (convert-special-casing (cdr ls))]))]))
|
|
|
|
|
|
|
|
(define (with-output-to-file* file thunk)
|
|
|
|
(when (file-exists? file) (delete-file file))
|
|
|
|
(with-output-to-file file thunk))
|
|
|
|
|
2007-06-17 19:49:40 -04:00
|
|
|
(let ([ls
|
2007-09-02 22:37:24 -04:00
|
|
|
;;; get initial table
|
2007-06-17 19:49:40 -04:00
|
|
|
(compute-foldcase
|
|
|
|
(map data-case
|
|
|
|
(get-unicode-data "UNIDATA/UnicodeData.txt")))])
|
2007-09-02 22:37:24 -04:00
|
|
|
;;; compute the string-foldcase data
|
2007-06-17 19:49:40 -04:00
|
|
|
(for-each
|
|
|
|
(lambda (x)
|
|
|
|
(let ([n (car x)] [chars (cdr x)])
|
|
|
|
(cond
|
|
|
|
[(assq n ls) =>
|
|
|
|
(lambda (p)
|
|
|
|
(vector-set! (cdr p) 4 chars))]
|
2008-08-09 10:12:22 -04:00
|
|
|
[else (error #f "not there" n)])))
|
2007-06-17 19:49:40 -04:00
|
|
|
(convert-full-fold-fields
|
|
|
|
(get-unicode-data "UNIDATA/CaseFolding.txt")))
|
2008-08-09 10:12:22 -04:00
|
|
|
(for-each
|
|
|
|
(lambda (x)
|
|
|
|
(let ([n (car x)] [cases (cdr x)])
|
|
|
|
(cond
|
|
|
|
[(assq n ls) =>
|
|
|
|
(lambda (p)
|
|
|
|
(let ([v (cdr p)])
|
|
|
|
(vector-set! (cdr p) 5 (spcase-uc cases))
|
|
|
|
(vector-set! (cdr p) 6 (spcase-lc cases))
|
|
|
|
(vector-set! (cdr p) 7 (spcase-tc cases))))]
|
|
|
|
[else (error #f "not here" n)])))
|
|
|
|
(convert-special-casing
|
|
|
|
(get-unicode-data "UNIDATA/SpecialCasing.txt")))
|
2007-09-02 22:37:24 -04:00
|
|
|
;;; done
|
2007-06-17 19:49:40 -04:00
|
|
|
(let ([ls (remove-dups ls)])
|
|
|
|
(define (p name idx)
|
|
|
|
(pretty-print
|
|
|
|
`(define ,name
|
|
|
|
',(list->vector (map (lambda (x) (vector-ref (cdr x) idx)) ls)))))
|
2008-08-09 10:12:22 -04:00
|
|
|
(parameterize ([print-unicode #f] [pretty-width 80])
|
2007-06-17 19:49:40 -04:00
|
|
|
(let ([v0 (list->vector (map car ls))])
|
2008-08-09 10:12:22 -04:00
|
|
|
(with-output-to-file* "unicode-char-cases.ss"
|
2007-06-17 19:49:40 -04:00
|
|
|
(lambda ()
|
2007-10-25 16:27:34 -04:00
|
|
|
(display license)
|
2007-06-17 19:49:40 -04:00
|
|
|
(printf ";;; DO NOT EDIT\n;;; automatically generated\n")
|
|
|
|
(printf ";;; ~s entries in table\n" (vector-length v0))
|
|
|
|
(pretty-print `(define charcase-search-vector ',v0))
|
|
|
|
(p 'char-upcase-adjustment-vector 0)
|
|
|
|
(p 'char-downcase-adjustment-vector 1)
|
|
|
|
(p 'char-titlecase-adjustment-vector 2)
|
|
|
|
(p 'char-foldcase-adjustment-vector 3)
|
|
|
|
(p 'string-foldcase-adjustment-vector 4)
|
2008-08-09 10:12:22 -04:00
|
|
|
(p 'string-upcase-adjustment-vector 5)
|
|
|
|
(p 'string-downcase-adjustment-vector 6)
|
|
|
|
(p 'string-titlecase-adjustment-vector 7)
|
|
|
|
))))))
|
2007-06-17 19:49:40 -04:00
|
|
|
|
|
|
|
|
|
|
|
(printf "Happy Happy Joy Joy\n")
|