;;; Ikarus Scheme -- A compiler for R6RS Scheme. ;;; Copyright (C) 2006,2007 Abdulaziz Ghuloum ;;; ;;; This program is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License version 3 as ;;; published by the Free Software Foundation. ;;; ;;; This program is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see <http://www.gnu.org/licenses/>. (library (ikarus unicode-data) (export unicode-printable-char? char-downcase char-upcase char-titlecase char-foldcase char-ci=? char-ci<? char-ci<=? char-ci>? char-ci>=? string-ci=? string-ci<? string-ci<=? string-ci>? string-ci>=? string-foldcase char-general-category char-alphabetic? char-numeric? char-whitespace? char-upper-case? char-lower-case? char-title-case? ) (import (ikarus system $fx) (ikarus system $vectors) (ikarus system $chars) (ikarus system $pairs) (ikarus system $strings) (except (ikarus) char-downcase char-upcase char-titlecase char-foldcase char-ci=? char-ci<? char-ci<=? char-ci>? char-ci>=? string-ci=? string-ci<? string-ci<=? string-ci>? string-ci>=? string-foldcase char-general-category char-alphabetic? char-numeric? char-whitespace? char-upper-case? char-lower-case? char-title-case? )) (include "unicode/unicode-char-cases.ss") (include "unicode/unicode-charinfo.ss") (define (binary-search n v) (let ([k ($fx- ($vector-length v) 1)]) (let f ([i 0] [k k] [n n] [v v]) (cond [($fx= i k) i] [else (let ([j ($fxsra ($fx+ i ($fx+ k 1)) 1)]) (cond [($fx<= ($vector-ref v j) n) (f j k n v)] [else (f i ($fx- j 1) n v)]))])))) (define (lookup-char-info c) (let ([v unicode-categories-lookup-vector] [t unicode-categories-values-vector]) (define (f i k n) (cond [($fx= i k) (let ([idx ($vector-ref t i)]) (if (fixnum? idx) idx (let ([idx2 ($fx- n ($vector-ref v i))]) ($vector-ref idx idx2))))] [else (let ([j ($fxsra ($fx+ i ($fx+ k 1)) 1)]) (cond [($fx<= ($vector-ref v j) n) (f j k n)] [else (f i ($fx- j 1) n)]))])) (f 0 (fx- (vector-length v) 1) (char->integer c)))) (define (char-general-category c) (if (char? c) (vector-ref unicode-categories-name-vector (fxlogand 63 (lookup-char-info c))) (die 'char-general-category "not a char" c))) (define (char-has-property? c prop-val who) (if (char? c) (not (fxzero? (fxlogand (lookup-char-info c) prop-val))) (die who "not a char" c))) (define (unicode-printable-char? c) (char-has-property? c constituent-property 'unicode-printable-char?)) (define (char-alphabetic? c) (char-has-property? c alphabetic-property 'char-alphabetic?)) (define (char-numeric? c) (char-has-property? c numeric-property 'char-numeric?)) (define (char-whitespace? c) (char-has-property? c whitespace-property 'char-whitespace?)) (define (char-upper-case? c) (char-has-property? c uppercase-property 'char-upper-case?)) (define (char-lower-case? c) (char-has-property? c lowercase-property 'char-lower-case?)) (define (char-title-case? c) (char-has-property? c titlecase-property 'char-title-case?)) (define (convert-char x adjustment-vec) (let ([n ($char->fixnum x)]) (let ([idx (binary-search n charcase-search-vector)]) (let ([adj ($vector-ref adjustment-vec idx)]) ($fx+ adj n))))) (define (char-downcase x) (if (char? x) ($fixnum->char (convert-char x char-downcase-adjustment-vector)) (die 'char-downcase "not a character" x))) (define (char-upcase x) (if (char? x) ($fixnum->char (convert-char x char-upcase-adjustment-vector)) (die 'char-downcase "not a character" x))) (define (char-titlecase x) (if (char? x) ($fixnum->char (convert-char x char-titlecase-adjustment-vector)) (die 'char-downcase "not a character" x))) (define (char-foldcase x) (if (char? x) ($fixnum->char ($fold x)) (die 'char-downcase "not a character" x))) (define ($fold x) (convert-char x char-foldcase-adjustment-vector)) (define (char-ci-loop c0 ls p? who) (or (null? ls) (let ([c1 (car ls)]) (unless (char? c1) (die who "not a char" c1)) (let ([c1 ($fold c1)]) (if (p? c0 c1) (char-ci-loop c1 (cdr ls) p? who) (let f ([ls (cdr ls)] [who who]) (cond [(null? ls) #f] [(char? (car ls)) (f (cdr ls) who)] [else (die who "not a char" (car ls))]))))))) (define char-ci=? (case-lambda [(x y) (if (char? x) (or (eq? x y) (if (char? y) ($fx= ($fold x) ($fold y)) (die 'char-ci=? "not a char" y))) (die 'char-ci=? "not a char" x))] [(x) (or (char? x) (die 'char-ci=? "not a char" x))] [(x . x*) (if (char? x) (char-ci-loop x x* char=? 'char-ci=?) (die 'char-ci=? "not a char" x))])) (define char-ci<? (case-lambda [(x y) (if (char? x) (or (eq? x y) (if (char? y) ($fx< ($fold x) ($fold y)) (die 'char-ci<? "not a char" y))) (die 'char-ci<? "not a char" x))] [(x) (or (char? x) (die 'char-ci<? "not a char" x))] [(x . x*) (if (char? x) (char-ci-loop x x* char<? 'char-ci<?) (die 'char-ci<? "not a char" x))])) (define char-ci<=? (case-lambda [(x y) (if (char? x) (or (eq? x y) (if (char? y) ($fx<= ($fold x) ($fold y)) (die 'char-ci<=? "not a char" y))) (die 'char-ci<=? "not a char" x))] [(x) (or (char? x) (die 'char-ci<=? "not a char" x))] [(x . x*) (if (char? x) (char-ci-loop x x* char<=? 'char-ci<=?) (die 'char-ci<=? "not a char" x))])) (define char-ci>? (case-lambda [(x y) (if (char? x) (or (eq? x y) (if (char? y) ($fx> ($fold x) ($fold y)) (die 'char-ci>? "not a char" y))) (die 'char-ci>? "not a char" x))] [(x) (or (char? x) (die 'char-ci>? "not a char" x))] [(x . x*) (if (char? x) (char-ci-loop x x* char>? 'char-ci>?) (die 'char-ci>? "not a char" x))])) (define char-ci>=? (case-lambda [(x y) (if (char? x) (or (eq? x y) (if (char? y) ($fx>= ($fold x) ($fold y)) (die 'char-ci>=? "not a char" y))) (die 'char-ci>=? "not a char" x))] [(x) (or (char? x) (die 'char-ci>=? "not a char" x))] [(x . x*) (if (char? x) (char-ci-loop x x* char>=? 'char-ci>=?) (die 'char-ci>=? "not a char" x))])) (define ($string-foldcase str) (define (extend-length str ac) (define (chars ac n) (cond [(null? ac) n] [else (chars (cdr ac) (let f ([p (cdar ac)] [n n]) (cond [(pair? p) (f (cdr p) (+ n 1))] [else n])))])) (let ([dst-len (chars ac (string-length str))]) (let f ([str str] [dst (make-string dst-len)] [i 0] [j 0] [ac (reverse ac)]) (cond [(null? ac) (string-copy! str i dst j (fx- (string-length str) i)) dst] [else (let ([idx (caar ac)] [c* (cdar ac)] [ac (cdr ac)]) (let ([cnt (fx- idx i)]) (string-copy! str i dst j cnt) (let g ([str str] [dst dst] [i (fx+ i cnt)] [j (fx+ j cnt)] [ac ac] [c* c*]) (cond [(pair? c*) (string-set! dst j (car c*)) (g str dst i (fx+ j 1) ac (cdr c*))] [else (string-set! dst j c*) (f str dst (fx+ i 1) (fx+ j 1) ac)]))))])))) (let ([n (string-length str)]) (let f ([str str] [dst (make-string n)] [i 0] [n n] [ac '()]) (cond [($fx= i n) (if (null? ac) dst (extend-length dst ac))] [else (let* ([cn ($char->fixnum ($string-ref str i))]) (let ([n/ls (vector-ref string-foldcase-adjustment-vector (binary-search cn charcase-search-vector))]) (cond [(fixnum? n/ls) (string-set! dst i ($fixnum->char ($fx+ cn n/ls))) (f str dst ($fxadd1 i) n ac)] [else (f str dst (fxadd1 i) n (cons (cons i n/ls) ac))])))])))) (define (string-foldcase str) (if (string? str) ($string-foldcase str) (die 'string-foldcase "not a string" str))) ;;; FIXME: case-insensitive comparison procedures are slow. (define string-ci-cmp (lambda (who cmp) (case-lambda [(s1 s2) (if (string? s1) (if (string? s2) (cmp ($string-foldcase s1) ($string-foldcase s2)) (die who "not a string" s2)) (die who "not a string" s1))] [(s1 . s*) (if (string? s1) (let ([s1 ($string-foldcase s1)]) (let f ([s1 s1] [s* s*]) (cond [(null? s*) #t] [else (let ([s2 (car s*)]) (if (string? s2) (let ([s2 ($string-foldcase s2)]) (if (cmp s1 s2) (f s2 (cdr s*)) (let f ([s* (cdr s*)]) (cond [(null? s*) #f] [(string? (car s*)) (f (cdr s*))] [else (die who "not a string" (car s*))])))) (die who "not a string" s2)))]))) (die who "not a string" s1))]))) (define string-ci=? (string-ci-cmp 'string-ci=? string=?)) (define string-ci<? (string-ci-cmp 'string-ci<? string<?)) (define string-ci<=? (string-ci-cmp 'string-ci<=? string<=?)) (define string-ci>? (string-ci-cmp 'string-ci>? string>?)) (define string-ci>=? (string-ci-cmp 'string-ci>=? string>=?)) )