2007-10-25 16:27:34 -04:00
|
|
|
;;; Ikarus Scheme -- A compiler for R6RS Scheme.
|
2008-01-29 00:34:34 -05:00
|
|
|
;;; Copyright (C) 2006,2007,2008 Abdulaziz Ghuloum
|
2008-10-28 19:59:40 -04:00
|
|
|
|
|
|
|
;;; Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
;;; copy of this software and associated documentation files (the "Software"),
|
|
|
|
;;; to deal in the Software without restriction, including without limitation
|
|
|
|
;;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
;;; and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
;;; Software is furnished to do so, subject to the following conditions:
|
2007-10-25 16:27:34 -04:00
|
|
|
;;;
|
2008-10-28 19:59:40 -04:00
|
|
|
;;; The above copyright notice and this permission notice shall be included in
|
|
|
|
;;; all copies or substantial portions of the Software.
|
2007-10-25 16:27:34 -04:00
|
|
|
;;;
|
2008-10-28 19:59:40 -04:00
|
|
|
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
;;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
;;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
;;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
;;; DEALINGS IN THE SOFTWARE.
|
2007-06-17 08:04:01 -04:00
|
|
|
|
|
|
|
(library (unicode-data)
|
|
|
|
(export get-unicode-data)
|
2008-10-28 19:59:40 -04:00
|
|
|
(import (rnrs))
|
2007-06-17 08:04:01 -04:00
|
|
|
|
2007-06-17 19:49:40 -04:00
|
|
|
(define (find-semi/hash str i n)
|
2007-06-17 08:04:01 -04:00
|
|
|
(cond
|
2008-10-28 19:59:40 -04:00
|
|
|
[(or (fx=? i n) (memv (string-ref str i) '(#\; #\#))) i]
|
2007-06-17 19:49:40 -04:00
|
|
|
[else (find-semi/hash str (+ i 1) n)]))
|
2007-06-17 08:04:01 -04:00
|
|
|
|
2007-10-10 05:28:07 -04:00
|
|
|
(define (cleanup str)
|
2007-09-13 06:11:26 -04:00
|
|
|
(let ([lo
|
|
|
|
(let f ([i 0] [n (string-length str)])
|
|
|
|
(cond
|
|
|
|
[(= i n) n]
|
|
|
|
[(char=? #\space (string-ref str i))
|
2008-10-28 19:59:40 -04:00
|
|
|
(f (+ i 1) n)]
|
2007-09-13 06:11:26 -04:00
|
|
|
[else i]))]
|
|
|
|
[hi
|
2008-10-28 19:59:40 -04:00
|
|
|
(let f ([i (- (string-length str) 1)])
|
2007-09-13 06:11:26 -04:00
|
|
|
(cond
|
|
|
|
[(< i 0) i]
|
|
|
|
[(char=? #\space (string-ref str i))
|
2008-10-28 19:59:40 -04:00
|
|
|
(f (- i 1))]
|
2007-09-13 07:04:13 -04:00
|
|
|
[else (+ i 1)]))])
|
|
|
|
(if (> hi lo)
|
|
|
|
(substring str lo hi)
|
2007-09-13 06:11:26 -04:00
|
|
|
"")))
|
|
|
|
|
2007-06-17 08:04:01 -04:00
|
|
|
(define (split str)
|
|
|
|
(let f ([i 0] [n (string-length str)])
|
|
|
|
(cond
|
2007-06-17 19:49:40 -04:00
|
|
|
[(or (= i n) (memv (string-ref str i) '(#\#)))
|
|
|
|
'("")]
|
2007-06-17 08:04:01 -04:00
|
|
|
[else
|
2007-06-17 19:49:40 -04:00
|
|
|
(let ([j (find-semi/hash str i n)])
|
2007-06-17 08:04:01 -04:00
|
|
|
(cond
|
2007-06-17 19:49:40 -04:00
|
|
|
[(or (= j n) (memv (string-ref str i) '(#\#)))
|
2007-09-13 06:11:26 -04:00
|
|
|
(list (cleanup (substring str i j)))]
|
2007-06-17 08:04:01 -04:00
|
|
|
[else
|
2007-09-13 06:11:26 -04:00
|
|
|
(cons (cleanup (substring str i j))
|
2007-06-17 08:04:01 -04:00
|
|
|
(f (+ j 1) n))]))])))
|
|
|
|
|
|
|
|
(define (extract-uni-data)
|
|
|
|
(let f ([ls '()])
|
2008-10-28 19:59:40 -04:00
|
|
|
(let ([line (get-line (current-input-port))])
|
2007-06-17 08:04:01 -04:00
|
|
|
(cond
|
|
|
|
[(eof-object? line)
|
|
|
|
(reverse ls)]
|
|
|
|
[else
|
|
|
|
(let ([fields (split line)])
|
2007-06-17 19:49:40 -04:00
|
|
|
(if (or (null? fields) (equal? fields '("")))
|
|
|
|
(f ls)
|
|
|
|
(f (cons fields ls))))]))))
|
2007-06-17 08:04:01 -04:00
|
|
|
|
2007-06-17 19:49:40 -04:00
|
|
|
(define (get-unicode-data filename)
|
2007-06-17 08:04:01 -04:00
|
|
|
(with-input-from-file
|
2007-06-17 19:49:40 -04:00
|
|
|
filename
|
2007-06-17 08:04:01 -04:00
|
|
|
extract-uni-data)))
|