; Taken directly from the SRFI document. (define-syntax let-values (syntax-rules () ((let-values (?binding ...) ?body0 ?body1 ...) (let-values "bind" (?binding ...) () (begin ?body0 ?body1 ...))) ((let-values "bind" () ?tmps ?body) (let ?tmps ?body)) ((let-values "bind" ((?b0 ?e0) ?binding ...) ?tmps ?body) (let-values "mktmp" ?b0 ?e0 () (?binding ...) ?tmps ?body)) ((let-values "mktmp" () ?e0 ?args ?bindings ?tmps ?body) (call-with-values (lambda () ?e0) (lambda ?args (let-values "bind" ?bindings ?tmps ?body)))) ((let-values "mktmp" (?a . ?b) ?e0 (?arg ...) ?bindings (?tmp ...) ?body) (let-values "mktmp" ?b ?e0 (?arg ... x) ?bindings (?tmp ... (?a x)) ?body)) ((let-values "mktmp" ?a ?e0 (?arg ...) ?bindings (?tmp ...) ?body) (call-with-values (lambda () ?e0) (lambda (?arg ... . x) (let-values "bind" ?bindings (?tmp ... (?a x)) ?body)))))) (define-syntax let*-values (syntax-rules () ((let*-values () ?body0 ?body1 ...) (begin ?body0 ?body1 ...)) ((let*-values (?binding0 ?binding1 ...) ?body0 ?body1 ...) (let-values (?binding0) (let*-values (?binding1 ...) ?body0 ?body1 ...))))) (define (add1 x) (+ x 1)) (define-syntax when (syntax-rules () ((when test expr ...) (if test (begin expr ...))))) (define-syntax unless (syntax-rules () ((unless test expr ...) (if (not test) (begin expr ...))))) (define (sub1 x) (- x 1)) (define (void . a) (if #f #f)) (define-syntax begin0 (syntax-rules () ((begin0 expr1 expr ...) (let ((r expr1)) (begin expr ...) r)))) (define andmap (lambda (f list0 . lists) (if (null? list0) (and) (let loop ((lists (cons list0 lists))) (if (null? (cdr (car lists))) (apply f (map car lists)) (and (apply f (map car lists)) (loop (map cdr lists)))))))) (define null '()) ; stolen from mzlib/functior.ss (define (quicksort l less-than) (let* ((v (list->vector l)) (count (vector-length v))) (let loop ((min 0)(max count)) (if (< min (sub1 max)) (let ((pval (vector-ref v min))) (let pivot-loop ((pivot min) (pos (add1 min))) (if (< pos max) (let ((cval (vector-ref v pos))) (if (less-than cval pval) (begin (vector-set! v pos (vector-ref v pivot)) (vector-set! v pivot cval) (pivot-loop (add1 pivot) (add1 pos))) (pivot-loop pivot (add1 pos)))) (if (= min pivot) (loop (add1 pivot) max) (begin (loop min pivot) (loop pivot max)))))))) (vector->list v))) ;;; HACK! (define call/ec call-with-current-continuation) (define-syntax let/ec (syntax-rules () ((let/ec k expr ...) (call-with-current-continuation (lambda (k) expr ...))))) ;;; HACK! (define (make-parameter val) (lambda () val)) ;;;;;;;;;;; ;; Token ::= Contents | Start-tag | End-tag | Eof (define read-comments (make-parameter #f)) (define collapse-whitespace (make-parameter #f)) ;; read-xml : [Input-port] -> Document (define (read-xml . maybe-port) (read-from-port (if (null? maybe-port) (current-input-port) (car maybe-port)))) ;; read-from-port : Input-port -> Document (define (read-from-port in) (let*-values (((in pos) (positionify in)) ((misc0 start) (read-misc in pos))) (make-document (make-prolog misc0 #f) (cond ((start-tag? start) (read-element start in pos)) ((element? start) start) (else (error 'read-xml "expected root element - received ~a" start))) (let-values (((misc1 end-of-file) (read-misc in pos))) (unless (eof-object? end-of-file) (error 'read-xml "extra stuff at end of document ~a" end-of-file)) misc1)))) ;; read-misc : Input-port (-> Nat) -> (listof Misc) Token (define (read-misc in pos) (let read-more () (let ((x (lex in pos))) (cond ((or (pi? x) (comment? x)) (let-values (((lst next) (read-more))) (values (cons x lst) next))) ((and (pcdata? x) (andmap char-whitespace? (string->list (pcdata-string x)))) (read-more)) (else (values null x)))))) ;; read-element : Start-tag Input-port (-> Nat) -> Element (define (read-element start in pos) (let ((name (start-tag-name start)) (a (source-start start)) (b (source-stop start))) (make-element a b name (start-tag-attrs start) (let read-content () (let ((x (lex in pos))) (cond ((eof-object? x) (error 'read-xml "unclosed ~a tag at [~a ~a]" name a b)) ((start-tag? x) (cons (read-element x in pos) (read-content))) ((end-tag? x) (unless (eq? name (end-tag-name x)) (error 'read-xml "start tag ~a at [~a ~a] doesn't match end tag ~a at [~a ~a]" name a b (end-tag-name x) (source-start x) (source-stop x))) null) ((entity? x) (cons (expand-entity x) (read-content))) ((comment? x) (if (read-comments) (cons x (read-content)) (read-content))) (else (cons x (read-content))))))))) ;; expand-entity : Entity -> (U Entity Pcdata) ;; more here - allow expansion of user defined entities (define (expand-entity x) (let ((expanded (default-entity-table (entity-text x)))) (if expanded (make-pcdata (source-start x) (source-stop x) expanded) x))) ;; default-entity-table : Symbol -> (U #f String) (define (default-entity-table name) (case name ((amp) "&") ((lt) "<") ((gt) ">") ((quot) "\"") ((apos) "'") (else #f))) ;; lex : Input-port (-> Nat) -> Token (define (lex in pos) (let ((c (peek-char in))) (cond ((eof-object? c) c) ((eq? c #\&) (lex-entity in pos)) ((eq? c #\<) (lex-tag-cdata-pi-comment in pos)) (else (lex-pcdata in pos))))) ;; lex-entity : Input-port (-> Nat) -> Entity (define (lex-entity in pos) (let ((start (pos))) (read-char in) (let ((data (case (peek-char in) ((#\#) (read-char in) (let ((n (case (peek-char in) ((#\x) (read-char in) (string->number (read-until #\; in pos) 16)) (else (string->number (read-until #\; in pos)))))) (unless (number? n) (lex-error in pos "malformed numeric entity")) n)) (else (begin0 (lex-name in pos) (unless (eq? (read-char in) #\;) (lex-error in pos "expected ; at the end of an entity"))))))) (make-entity start (pos) data)))) ;; lex-tag-cdata-pi-comment : Input-port (-> Nat) -> Start-tag | Element | End-tag | Pcdata | Pi | Comment (define (lex-tag-cdata-pi-comment in pos) (let ((start (pos))) (read-char in) (case (non-eof peek-char in pos) ((#\!) (read-char in) (case (non-eof peek-char in pos) ((#\-) (read-char in) (unless (eq? (read-char in) #\-) (lex-error in pos "expected second - after ) (lex-error in pos "expected > to end comment (\"--\" can't appear in comments)")) ;(make-comment start (pos) data) (make-comment data))) ((#\[) (read-char in) (unless (string=? (read-string 6 in) "CDATA[") (lex-error in pos "expected CDATA following <[")) (let ((data (lex-cdata-contents in pos))) (make-pcdata start (pos) data))) (else (skip-dtd in pos) (skip-space in) (unless (eq? (peek-char in) #\<) (lex-error in pos "expected pi, comment, or element after doctype")) (lex-tag-cdata-pi-comment in pos)))) ((#\?) (read-char in) (let ((name (lex-name in pos))) (skip-space in) (let ((data (lex-pi-data in pos))) (make-pi start (pos) name data)))) ((#\/) (read-char in) (let ((name (lex-name in pos))) (skip-space in) (unless (eq? (read-char in) #\>) (lex-error in pos "expected > to close ~a's end tag" name)) (make-end-tag start (pos) name))) (else (let ((name (lex-name in pos)) (attrs (lex-attributes in pos))) (skip-space in) (case (read-char in) ((#\/) (unless (eq? (read-char in) #\>) (lex-error in pos "expected > to close empty element ~a" name)) (make-element start (pos) name attrs null)) ((#\>) (make-start-tag start (pos) name attrs)) (else (lex-error in pos "expected / or > to close tag ~a" name)))))))) ;; lex-attributes : Input-port (-> Nat) -> (listof Attribute) (define (lex-attributes in pos) (quicksort (let loop () (skip-space in) (cond ((name-start? (peek-char in)) (cons (lex-attribute in pos) (loop))) (else null))) (lambda (a b) (let ((na (attribute-name a)) (nb (attribute-name b))) (cond ((eq? na nb) (lex-error in pos "duplicated attribute name ~a" na)) (else (stringstring na) (symbol->string nb)))))))) ;; lex-attribute : Input-port (-> Nat) -> Attribute (define (lex-attribute in pos) (let ((start (pos)) (name (lex-name in pos))) (skip-space in) (unless (eq? (read-char in) #\=) (lex-error in pos "expected = in attribute ~a" name)) (skip-space in) ;; more here - handle entites and disallow "<" (let* ((delimiter (read-char in)) (value (case delimiter ((#\' #\") (list->string (let read-more () (let ((c (non-eof peek-char in pos))) (cond ((eq? c delimiter) (read-char in) null) ((eq? c #\&) (let ((entity (expand-entity (lex-entity in pos)))) (if (pcdata? entity) (append (string->list (pcdata-string entity)) (read-more)) ;; more here - do something with user defined entites (read-more)))) (else (read-char in) (cons c (read-more)))))))) (else (lex-error in pos "attribute values must be in ''s or in \"\"s"))))) (make-attribute start (pos) name value)))) ;; skip-space : Input-port -> Void ;; deviation - should sometimes insist on at least one space (define (skip-space in) (let loop () (let ((c (peek-char in))) (when (and (not (eof-object? c)) (char-whitespace? c)) (read-char in) (loop))))) ;; lex-pcdata : Input-port (-> Nat) -> Pcdata ;; deviation - disallow ]]> "for compatability" with SGML, sec 2.4 XML spec (define (lex-pcdata in pos) (let ((start (pos)) (data (let loop () (let ((next (peek-char in))) (cond ((or (eof-object? next) (eq? next #\&) (eq? next #\<)) null) ((and (char-whitespace? next) (collapse-whitespace)) (skip-space in) (cons #\space (loop))) (else (cons (read-char in) (loop)))))))) (make-pcdata start (pos) (list->string data)))) ;; lex-name : Input-port (-> Nat) -> Symbol (define (lex-name in pos) (let ((c (read-char in))) (unless (name-start? c) (lex-error in pos "expected name, received ~a" c)) (string->symbol (list->string (cons c (let lex-rest () (cond ((name-char? (peek-char in)) (cons (read-char in) (lex-rest))) (else null)))))))) ;; skip-dtd : Input-port (-> Nat) -> Void (define (skip-dtd in pos) (let skip () (case (non-eof read-char in pos) ((#\') (read-until #\' in pos) (skip)) ((#\") (read-until #\" in pos) (skip)) ((#\<) (case (non-eof read-char in pos) ((#\!) (case (non-eof read-char in pos) ((#\-) (read-char in) (lex-comment-contents in pos) (read-char in) (skip)) (else (skip) (skip)))) ((#\?) (lex-pi-data in pos) (skip)) (else (skip) (skip)))) ((#\>) (void)) (else (skip))))) ;; name-start? : Char -> Bool (define (name-start? ch) (or (char-alphabetic? ch) (eq? ch #\_) (eq? ch #\:))) ;; name-char? : Char -> Bool (define (name-char? ch) (or (name-start? ch) (char-numeric? ch) (eq? ch #\.) (eq? ch #\-))) ;; read-until : Char Input-port (-> Nat) -> String ;; discards the stop character, too (define (read-until char in pos) (list->string (let read-more () (let ((c (non-eof read-char in pos))) (cond ((eq? c char) null) (else (cons c (read-more)))))))) ;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Nat) -> Char (define (non-eof f in pos) (let ((c (f in))) (cond ((eof-object? c) (lex-error in pos "unexpected eof")) (else c)))) ;; gen-read-until-string : String -> Input-port (-> Nat) -> String ;; uses Knuth-Morris-Pratt from ;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876 ;; discards stop from input (define (gen-read-until-string stop) (let* ((len (string-length stop)) (prefix (make-vector len 0)) (fall-back (lambda (k c) (let ((k (let loop ((k k)) (cond ((and (> k 0) (not (eq? (string-ref stop k) c))) (loop (vector-ref prefix (sub1 k)))) (else k))))) (if (eq? (string-ref stop k) c) (add1 k) k))))) (let init ((k 0) (q 1)) (when (< q len) (let ((k (fall-back k (string-ref stop q)))) (vector-set! prefix q k) (init k (add1 q))))) ;; (vector-ref prefix x) = the longest suffix that matches a prefix of stop (lambda (in pos) (list->string (let/ec out (let loop ((matched 0) (out out)) (let* ((c (non-eof read-char in pos)) (matched (fall-back matched c))) (cond ((= matched len) (out null)) ((zero? matched) (cons c (let/ec out (loop matched out)))) (else (cons c (loop matched out))))))))))) ;; "-->" makes more sense, but "--" follows the spec. (define lex-comment-contents (gen-read-until-string "--")) (define lex-pi-data (gen-read-until-string "?>")) (define lex-cdata-contents (gen-read-until-string "]]>")) ;; positionify : Input-port -> Input-port (-> Nat) ;; Well, this really depends on scsh-0.6 ;; For S48 you probably need to do something completely different (define (positionify in) (let ((n 0); port-limit as absolute value (old-handler (port-handler in))) (let ((handler (make-buffered-input-port-handler (port-handler-discloser old-handler) (port-handler-close old-handler) (lambda (data buffer start needed) (let ((res ((port-handler-buffer-proc old-handler) data buffer start needed))) (if (number? res) (set! n (+ n res))) res)) (port-handler-ready? old-handler) (port-handler-steal old-handler)))) (set-port-handler! in handler) (values in (lambda () (- n (- (port-limit in) (port-index in)))))))) ;; lex-error : Input-port String (-> Nat) TST* -> alpha (define (lex-error in pos str . rest) (error 'lex-error " at positon:" (pos) str rest))