2007-06-17 08:04:01 -04:00
|
|
|
|
|
|
|
|
|
|
|
(library (unicode-data)
|
|
|
|
(export get-unicode-data)
|
|
|
|
(import (ikarus))
|
|
|
|
|
|
|
|
(define (read-line)
|
|
|
|
(let f ([ac '()])
|
|
|
|
(let ([x (read-char)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? x)
|
|
|
|
(if (null? ac)
|
|
|
|
(eof-object)
|
|
|
|
(list->string (reverse ac)))]
|
|
|
|
[(char=? x #\newline)
|
2007-06-17 19:49:40 -04:00
|
|
|
(if (null? ac) (f '()) (list->string (reverse ac)))]
|
2007-06-17 08:04:01 -04:00
|
|
|
[else (f (cons x ac))]))))
|
|
|
|
|
2007-06-17 19:49:40 -04:00
|
|
|
(define (find-semi/hash str i n)
|
2007-06-17 08:04:01 -04:00
|
|
|
(cond
|
2007-06-17 19:49:40 -04:00
|
|
|
[(or (fx= i n) (memv (string-ref str i) '(#\; #\#))) i]
|
|
|
|
[else (find-semi/hash str (+ i 1) n)]))
|
2007-06-17 08:04:01 -04:00
|
|
|
|
2007-09-13 06:11:26 -04:00
|
|
|
(define (cleanup str)
|
|
|
|
(let ([lo
|
|
|
|
(let f ([i 0] [n (string-length str)])
|
|
|
|
(cond
|
|
|
|
[(= i n) n]
|
|
|
|
[(char=? #\space (string-ref str i))
|
|
|
|
(f (add1 i) n)]
|
|
|
|
[else i]))]
|
|
|
|
[hi
|
|
|
|
(let f ([i (sub1 (string-length str))])
|
|
|
|
(cond
|
|
|
|
[(< i 0) i]
|
|
|
|
[(char=? #\space (string-ref str i))
|
|
|
|
(f (sub1 i))]
|
|
|
|
[else i]))])
|
|
|
|
(if (> hi lo)
|
|
|
|
(substring str lo (+ hi 1))
|
|
|
|
"")))
|
|
|
|
|
2007-06-17 08:04:01 -04:00
|
|
|
(define (split str)
|
|
|
|
(let f ([i 0] [n (string-length str)])
|
|
|
|
(cond
|
2007-06-17 19:49:40 -04:00
|
|
|
[(or (= i n) (memv (string-ref str i) '(#\#)))
|
|
|
|
'("")]
|
2007-06-17 08:04:01 -04:00
|
|
|
[else
|
2007-06-17 19:49:40 -04:00
|
|
|
(let ([j (find-semi/hash str i n)])
|
2007-06-17 08:04:01 -04:00
|
|
|
(cond
|
2007-06-17 19:49:40 -04:00
|
|
|
[(or (= j n) (memv (string-ref str i) '(#\#)))
|
2007-09-13 06:11:26 -04:00
|
|
|
(list (cleanup (substring str i j)))]
|
2007-06-17 08:04:01 -04:00
|
|
|
[else
|
2007-09-13 06:11:26 -04:00
|
|
|
(cons (cleanup (substring str i j))
|
2007-06-17 08:04:01 -04:00
|
|
|
(f (+ j 1) n))]))])))
|
|
|
|
|
|
|
|
(define (extract-uni-data)
|
|
|
|
(let f ([ls '()])
|
|
|
|
(let ([line (read-line)])
|
|
|
|
(cond
|
|
|
|
[(eof-object? line)
|
|
|
|
(reverse ls)]
|
|
|
|
[else
|
|
|
|
(let ([fields (split line)])
|
2007-06-17 19:49:40 -04:00
|
|
|
(if (or (null? fields) (equal? fields '("")))
|
|
|
|
(f ls)
|
|
|
|
(f (cons fields ls))))]))))
|
2007-06-17 08:04:01 -04:00
|
|
|
|
2007-06-17 19:49:40 -04:00
|
|
|
(define (get-unicode-data filename)
|
2007-06-17 08:04:01 -04:00
|
|
|
(with-input-from-file
|
2007-06-17 19:49:40 -04:00
|
|
|
filename
|
2007-06-17 08:04:01 -04:00
|
|
|
extract-uni-data)))
|