469 lines
16 KiB
Scheme
469 lines
16 KiB
Scheme
; Taken directly from the SRFI document.
|
|
|
|
(define-syntax let-values
|
|
(syntax-rules ()
|
|
((let-values (?binding ...) ?body0 ?body1 ...)
|
|
(let-values "bind" (?binding ...) () (begin ?body0 ?body1 ...)))
|
|
|
|
((let-values "bind" () ?tmps ?body)
|
|
(let ?tmps ?body))
|
|
|
|
((let-values "bind" ((?b0 ?e0) ?binding ...) ?tmps ?body)
|
|
(let-values "mktmp" ?b0 ?e0 () (?binding ...) ?tmps ?body))
|
|
|
|
((let-values "mktmp" () ?e0 ?args ?bindings ?tmps ?body)
|
|
(call-with-values
|
|
(lambda () ?e0)
|
|
(lambda ?args
|
|
(let-values "bind" ?bindings ?tmps ?body))))
|
|
|
|
((let-values "mktmp" (?a . ?b) ?e0 (?arg ...) ?bindings (?tmp ...) ?body)
|
|
(let-values "mktmp" ?b ?e0 (?arg ... x) ?bindings (?tmp ... (?a x)) ?body))
|
|
|
|
((let-values "mktmp" ?a ?e0 (?arg ...) ?bindings (?tmp ...) ?body)
|
|
(call-with-values
|
|
(lambda () ?e0)
|
|
(lambda (?arg ... . x)
|
|
(let-values "bind" ?bindings (?tmp ... (?a x)) ?body))))))
|
|
|
|
(define-syntax let*-values
|
|
(syntax-rules ()
|
|
((let*-values () ?body0 ?body1 ...)
|
|
(begin ?body0 ?body1 ...))
|
|
|
|
((let*-values (?binding0 ?binding1 ...) ?body0 ?body1 ...)
|
|
(let-values (?binding0)
|
|
(let*-values (?binding1 ...) ?body0 ?body1 ...)))))
|
|
|
|
(define (add1 x) (+ x 1))
|
|
(define-syntax when
|
|
(syntax-rules
|
|
()
|
|
((when test expr ...)
|
|
(if test (begin expr ...)))))
|
|
|
|
(define-syntax unless
|
|
(syntax-rules
|
|
()
|
|
((unless test expr ...)
|
|
(if (not test) (begin expr ...)))))
|
|
|
|
(define (sub1 x)
|
|
(- x 1))
|
|
|
|
(define (void . a)
|
|
(if #f #f))
|
|
|
|
(define-syntax begin0
|
|
(syntax-rules
|
|
()
|
|
((begin0 expr1 expr ...)
|
|
(let ((r expr1))
|
|
(begin expr ...)
|
|
r))))
|
|
|
|
(define andmap
|
|
(lambda (f list0 . lists)
|
|
(if (null? list0)
|
|
(and)
|
|
(let loop ((lists (cons list0 lists)))
|
|
(if (null? (cdr (car lists)))
|
|
(apply f (map car lists))
|
|
(and (apply f (map car lists))
|
|
(loop (map cdr lists))))))))
|
|
(define null '())
|
|
|
|
; stolen from mzlib/functior.ss
|
|
(define (quicksort l less-than)
|
|
(let* ((v (list->vector l))
|
|
(count (vector-length v)))
|
|
(let loop ((min 0)(max count))
|
|
(if (< min (sub1 max))
|
|
(let ((pval (vector-ref v min)))
|
|
(let pivot-loop ((pivot min)
|
|
(pos (add1 min)))
|
|
(if (< pos max)
|
|
(let ((cval (vector-ref v pos)))
|
|
(if (less-than cval pval)
|
|
(begin
|
|
(vector-set! v pos (vector-ref v pivot))
|
|
(vector-set! v pivot cval)
|
|
(pivot-loop (add1 pivot) (add1 pos)))
|
|
(pivot-loop pivot (add1 pos))))
|
|
(if (= min pivot)
|
|
(loop (add1 pivot) max)
|
|
(begin
|
|
(loop min pivot)
|
|
(loop pivot max))))))))
|
|
(vector->list v)))
|
|
|
|
;;; HACK!
|
|
(define call/ec call-with-current-continuation)
|
|
(define-syntax let/ec
|
|
(syntax-rules
|
|
()
|
|
((let/ec k expr ...)
|
|
(call-with-current-continuation (lambda (k) expr ...)))))
|
|
|
|
|
|
;;; HACK!
|
|
(define (make-parameter val)
|
|
(lambda ()
|
|
val))
|
|
|
|
;;;;;;;;;;;
|
|
|
|
;; Token ::= Contents | Start-tag | End-tag | Eof
|
|
|
|
(define read-comments (make-parameter #f))
|
|
(define collapse-whitespace (make-parameter #f))
|
|
|
|
;; read-xml : [Input-port] -> Document
|
|
(define (read-xml . maybe-port)
|
|
(read-from-port (if (null? maybe-port) (current-input-port) (car maybe-port))))
|
|
|
|
|
|
;; read-from-port : Input-port -> Document
|
|
(define (read-from-port in)
|
|
(let*-values (((in pos) (positionify in))
|
|
((misc0 start) (read-misc in pos)))
|
|
(make-document (make-prolog misc0 #f)
|
|
(cond
|
|
((start-tag? start) (read-element start in pos))
|
|
((element? start) start)
|
|
(else (error 'read-xml "expected root element - received ~a" start)))
|
|
(let-values (((misc1 end-of-file) (read-misc in pos)))
|
|
(unless (eof-object? end-of-file)
|
|
(error 'read-xml "extra stuff at end of document ~a" end-of-file))
|
|
misc1))))
|
|
|
|
;; read-misc : Input-port (-> Nat) -> (listof Misc) Token
|
|
(define (read-misc in pos)
|
|
(let read-more ()
|
|
(let ((x (lex in pos)))
|
|
(cond
|
|
((or (pi? x) (comment? x))
|
|
(let-values (((lst next) (read-more)))
|
|
(values (cons x lst) next)))
|
|
((and (pcdata? x) (andmap char-whitespace? (string->list (pcdata-string x))))
|
|
(read-more))
|
|
(else (values null x))))))
|
|
|
|
;; read-element : Start-tag Input-port (-> Nat) -> Element
|
|
(define (read-element start in pos)
|
|
(let ((name (start-tag-name start))
|
|
(a (source-start start))
|
|
(b (source-stop start)))
|
|
(make-element
|
|
a b name (start-tag-attrs start)
|
|
(let read-content ()
|
|
(let ((x (lex in pos)))
|
|
(cond
|
|
((eof-object? x)
|
|
(error 'read-xml "unclosed ~a tag at [~a ~a]" name a b))
|
|
((start-tag? x) (cons (read-element x in pos) (read-content)))
|
|
((end-tag? x)
|
|
(unless (eq? name (end-tag-name x))
|
|
(error 'read-xml "start tag ~a at [~a ~a] doesn't match end tag ~a at [~a ~a]"
|
|
name a b (end-tag-name x) (source-start x) (source-stop x)))
|
|
null)
|
|
((entity? x) (cons (expand-entity x) (read-content)))
|
|
((comment? x) (if (read-comments)
|
|
(cons x (read-content))
|
|
(read-content)))
|
|
(else (cons x (read-content)))))))))
|
|
|
|
;; expand-entity : Entity -> (U Entity Pcdata)
|
|
;; more here - allow expansion of user defined entities
|
|
(define (expand-entity x)
|
|
(let ((expanded (default-entity-table (entity-text x))))
|
|
(if expanded
|
|
(make-pcdata (source-start x) (source-stop x) expanded)
|
|
x)))
|
|
|
|
;; default-entity-table : Symbol -> (U #f String)
|
|
(define (default-entity-table name)
|
|
(case name
|
|
((amp) "&")
|
|
((lt) "<")
|
|
((gt) ">")
|
|
((quot) "\"")
|
|
((apos) "'")
|
|
(else #f)))
|
|
|
|
;; lex : Input-port (-> Nat) -> Token
|
|
(define (lex in pos)
|
|
(let ((c (peek-char in)))
|
|
(cond
|
|
((eof-object? c) c)
|
|
((eq? c #\&) (lex-entity in pos))
|
|
((eq? c #\<) (lex-tag-cdata-pi-comment in pos))
|
|
(else (lex-pcdata in pos)))))
|
|
|
|
;; lex-entity : Input-port (-> Nat) -> Entity
|
|
(define (lex-entity in pos)
|
|
(let ((start (pos)))
|
|
(read-char in)
|
|
(let ((data (case (peek-char in)
|
|
((#\#)
|
|
(read-char in)
|
|
(let ((n (case (peek-char in)
|
|
((#\x) (read-char in)
|
|
(string->number (read-until #\; in pos) 16))
|
|
(else (string->number (read-until #\; in pos))))))
|
|
(unless (number? n)
|
|
(lex-error in pos "malformed numeric entity"))
|
|
n))
|
|
(else
|
|
(begin0
|
|
(lex-name in pos)
|
|
(unless (eq? (read-char in) #\;)
|
|
(lex-error in pos "expected ; at the end of an entity")))))))
|
|
(make-entity start (pos) data))))
|
|
|
|
;; lex-tag-cdata-pi-comment : Input-port (-> Nat) -> Start-tag | Element | End-tag | Pcdata | Pi | Comment
|
|
(define (lex-tag-cdata-pi-comment in pos)
|
|
(let ((start (pos)))
|
|
(read-char in)
|
|
(case (non-eof peek-char in pos)
|
|
((#\!)
|
|
(read-char in)
|
|
(case (non-eof peek-char in pos)
|
|
((#\-) (read-char in)
|
|
(unless (eq? (read-char in) #\-)
|
|
(lex-error in pos "expected second - after <!-"))
|
|
(let ((data (lex-comment-contents in pos)))
|
|
(unless (eq? (read-char in) #\>)
|
|
(lex-error in pos "expected > to end comment (\"--\" can't appear in comments)"))
|
|
;(make-comment start (pos) data)
|
|
(make-comment data)))
|
|
((#\[) (read-char in)
|
|
(unless (string=? (read-string 6 in) "CDATA[")
|
|
(lex-error in pos "expected CDATA following <["))
|
|
(let ((data (lex-cdata-contents in pos)))
|
|
(make-pcdata start (pos) data)))
|
|
(else (skip-dtd in pos)
|
|
(skip-space in)
|
|
(unless (eq? (peek-char in) #\<)
|
|
(lex-error in pos "expected pi, comment, or element after doctype"))
|
|
(lex-tag-cdata-pi-comment in pos))))
|
|
((#\?) (read-char in)
|
|
(let ((name (lex-name in pos)))
|
|
(skip-space in)
|
|
(let ((data (lex-pi-data in pos)))
|
|
(make-pi start (pos) name data))))
|
|
((#\/) (read-char in)
|
|
(let ((name (lex-name in pos)))
|
|
(skip-space in)
|
|
(unless (eq? (read-char in) #\>)
|
|
(lex-error in pos "expected > to close ~a's end tag" name))
|
|
(make-end-tag start (pos) name)))
|
|
(else
|
|
(let ((name (lex-name in pos))
|
|
(attrs (lex-attributes in pos)))
|
|
(skip-space in)
|
|
(case (read-char in)
|
|
((#\/)
|
|
(unless (eq? (read-char in) #\>)
|
|
(lex-error in pos "expected > to close empty element ~a" name))
|
|
(make-element start (pos) name attrs null))
|
|
((#\>) (make-start-tag start (pos) name attrs))
|
|
(else (lex-error in pos "expected / or > to close tag ~a" name))))))))
|
|
|
|
;; lex-attributes : Input-port (-> Nat) -> (listof Attribute)
|
|
(define (lex-attributes in pos)
|
|
(quicksort (let loop ()
|
|
(skip-space in)
|
|
(cond
|
|
((name-start? (peek-char in))
|
|
(cons (lex-attribute in pos) (loop)))
|
|
(else null)))
|
|
(lambda (a b)
|
|
(let ((na (attribute-name a))
|
|
(nb (attribute-name b)))
|
|
(cond
|
|
((eq? na nb) (lex-error in pos "duplicated attribute name ~a" na))
|
|
(else (string<? (symbol->string na) (symbol->string nb))))))))
|
|
|
|
;; lex-attribute : Input-port (-> Nat) -> Attribute
|
|
(define (lex-attribute in pos)
|
|
(let ((start (pos))
|
|
(name (lex-name in pos)))
|
|
(skip-space in)
|
|
(unless (eq? (read-char in) #\=)
|
|
(lex-error in pos "expected = in attribute ~a" name))
|
|
(skip-space in)
|
|
;; more here - handle entites and disallow "<"
|
|
(let* ((delimiter (read-char in))
|
|
(value (case delimiter
|
|
((#\' #\")
|
|
(list->string
|
|
(let read-more ()
|
|
(let ((c (non-eof peek-char in pos)))
|
|
(cond
|
|
((eq? c delimiter) (read-char in) null)
|
|
((eq? c #\&)
|
|
(let ((entity (expand-entity (lex-entity in pos))))
|
|
(if (pcdata? entity)
|
|
(append (string->list (pcdata-string entity)) (read-more))
|
|
;; more here - do something with user defined entites
|
|
(read-more))))
|
|
(else (read-char in) (cons c (read-more))))))))
|
|
(else (lex-error in pos "attribute values must be in ''s or in \"\"s")))))
|
|
(make-attribute start (pos) name value))))
|
|
|
|
;; skip-space : Input-port -> Void
|
|
;; deviation - should sometimes insist on at least one space
|
|
(define (skip-space in)
|
|
(let loop ()
|
|
(let ((c (peek-char in)))
|
|
(when (and (not (eof-object? c)) (char-whitespace? c))
|
|
(read-char in)
|
|
(loop)))))
|
|
|
|
;; lex-pcdata : Input-port (-> Nat) -> Pcdata
|
|
;; deviation - disallow ]]> "for compatability" with SGML, sec 2.4 XML spec
|
|
(define (lex-pcdata in pos)
|
|
(let ((start (pos))
|
|
(data (let loop ()
|
|
(let ((next (peek-char in)))
|
|
(cond
|
|
((or (eof-object? next) (eq? next #\&) (eq? next #\<))
|
|
null)
|
|
((and (char-whitespace? next) (collapse-whitespace))
|
|
(skip-space in)
|
|
(cons #\space (loop)))
|
|
(else (cons (read-char in) (loop))))))))
|
|
(make-pcdata start
|
|
(pos)
|
|
(list->string data))))
|
|
|
|
;; lex-name : Input-port (-> Nat) -> Symbol
|
|
(define (lex-name in pos)
|
|
(let ((c (read-char in)))
|
|
(unless (name-start? c)
|
|
(lex-error in pos "expected name, received ~a" c))
|
|
(string->symbol
|
|
(list->string
|
|
(cons c (let lex-rest ()
|
|
(cond
|
|
((name-char? (peek-char in))
|
|
(cons (read-char in) (lex-rest)))
|
|
(else null))))))))
|
|
|
|
;; skip-dtd : Input-port (-> Nat) -> Void
|
|
(define (skip-dtd in pos)
|
|
(let skip ()
|
|
(case (non-eof read-char in pos)
|
|
((#\') (read-until #\' in pos) (skip))
|
|
((#\") (read-until #\" in pos) (skip))
|
|
((#\<)
|
|
(case (non-eof read-char in pos)
|
|
((#\!) (case (non-eof read-char in pos)
|
|
((#\-) (read-char in) (lex-comment-contents in pos) (read-char in) (skip))
|
|
(else (skip) (skip))))
|
|
((#\?) (lex-pi-data in pos) (skip))
|
|
(else (skip) (skip))))
|
|
((#\>) (void))
|
|
(else (skip)))))
|
|
|
|
;; name-start? : Char -> Bool
|
|
(define (name-start? ch)
|
|
(or (char-alphabetic? ch)
|
|
(eq? ch #\_)
|
|
(eq? ch #\:)))
|
|
|
|
;; name-char? : Char -> Bool
|
|
(define (name-char? ch)
|
|
(or (name-start? ch)
|
|
(char-numeric? ch)
|
|
(eq? ch #\.)
|
|
(eq? ch #\-)))
|
|
|
|
;; read-until : Char Input-port (-> Nat) -> String
|
|
;; discards the stop character, too
|
|
(define (read-until char in pos)
|
|
(list->string
|
|
(let read-more ()
|
|
(let ((c (non-eof read-char in pos)))
|
|
(cond
|
|
((eq? c char) null)
|
|
(else (cons c (read-more))))))))
|
|
|
|
;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Nat) -> Char
|
|
(define (non-eof f in pos)
|
|
(let ((c (f in)))
|
|
(cond
|
|
((eof-object? c) (lex-error in pos "unexpected eof"))
|
|
(else c))))
|
|
|
|
;; gen-read-until-string : String -> Input-port (-> Nat) -> String
|
|
;; uses Knuth-Morris-Pratt from
|
|
;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876
|
|
;; discards stop from input
|
|
(define (gen-read-until-string stop)
|
|
(let* ((len (string-length stop))
|
|
(prefix (make-vector len 0))
|
|
(fall-back
|
|
(lambda (k c)
|
|
(let ((k (let loop ((k k))
|
|
(cond
|
|
((and (> k 0) (not (eq? (string-ref stop k) c)))
|
|
(loop (vector-ref prefix (sub1 k))))
|
|
(else k)))))
|
|
(if (eq? (string-ref stop k) c)
|
|
(add1 k)
|
|
k)))))
|
|
(let init ((k 0) (q 1))
|
|
(when (< q len)
|
|
(let ((k (fall-back k (string-ref stop q))))
|
|
(vector-set! prefix q k)
|
|
(init k (add1 q)))))
|
|
;; (vector-ref prefix x) = the longest suffix that matches a prefix of stop
|
|
(lambda (in pos)
|
|
(list->string
|
|
(let/ec out
|
|
(let loop ((matched 0) (out out))
|
|
(let* ((c (non-eof read-char in pos))
|
|
(matched (fall-back matched c)))
|
|
(cond
|
|
((= matched len) (out null))
|
|
((zero? matched) (cons c (let/ec out (loop matched out))))
|
|
(else (cons c (loop matched out)))))))))))
|
|
|
|
;; "-->" makes more sense, but "--" follows the spec.
|
|
(define lex-comment-contents (gen-read-until-string "--"))
|
|
(define lex-pi-data (gen-read-until-string "?>"))
|
|
(define lex-cdata-contents (gen-read-until-string "]]>"))
|
|
|
|
;; positionify : Input-port -> Input-port (-> Nat)
|
|
|
|
;; Well, this really depends on scsh-0.6
|
|
;; For S48 you probably need to do something completely different
|
|
|
|
(define (positionify in)
|
|
(let ((n 0); port-limit as absolute value
|
|
(old-handler (port-handler in)))
|
|
(let ((handler (make-buffered-input-port-handler
|
|
(port-handler-discloser old-handler)
|
|
(port-handler-close old-handler)
|
|
(lambda (data buffer start needed)
|
|
(let ((res
|
|
((port-handler-buffer-proc old-handler)
|
|
data buffer start needed)))
|
|
(if (number? res)
|
|
(set! n (+ n res)))
|
|
res))
|
|
(port-handler-ready? old-handler)
|
|
(port-handler-steal old-handler))))
|
|
(set-port-handler! in handler)
|
|
(values in
|
|
(lambda ()
|
|
(- n (- (port-limit in) (port-index in))))))))
|
|
|
|
;; lex-error : Input-port String (-> Nat) TST* -> alpha
|
|
(define (lex-error in pos str . rest)
|
|
(error 'lex-error " at positon:" (pos) str rest))
|
|
|
|
|