diff --git a/scheme/xml/doc.txt b/scheme/xml/doc.txt
new file mode 100644
index 0000000..ed8fc66
--- /dev/null
+++ b/scheme/xml/doc.txt
@@ -0,0 +1,180 @@
+_XML_ Library
+=============
+
+Files: xml.ss xmlr.ss xmls.ss
+Signature: xml^
+
+Basic XML Data Types
+====================
+
+Document:
+ This structure represents an XML document. The only useful part is
+ the document-element, which contains all the content. The rest of
+ of the structure contains DTD information, which isn't supported,
+ and processing-instructions.
+
+Element:
+ Each pair of start/end tags and everything in between is an element.
+ It has the following pieces:
+ a name
+ attributes
+ contents including sub-elements
+Xexpr:
+ S-expression representations of XML data.
+
+The end of this document has more details.
+
+Functions
+=========
+
+> read-xml : [Input-port] -> Document
+ reads in an XML document from the given or current input port
+ XML documents contain exactly one element. It throws an xml-read:error
+ if there isn't any element or if there are more than one element.
+
+> write-xml : Document [Output-port] -> Void
+ writes a document to the given or current output port, currently
+ ignoring everything except the document's root element.
+
+> write-xml/content : Content [Output-port] -> Void
+ writes a document's contents to the given or current output port
+
+> display-xml : Document [Output-port] -> Void
+ just like write-xml, but newlines and indentation make the output more
+ readable, though less technically correct when white space is
+ significant.
+
+> display-xml/content : Content [Output-port] -> Void
+ just like write-xml/content, but with indentation and newlines
+
+> xml->xexpr : Content -> Xexpr
+ converts the interesting part of an XML document into an Xexpression
+
+> xexpr->xml : Xexpr -> Content
+ converts an Xexpression into the interesting part of an XML document
+
+> xexpr->string : Xexpression -> String
+ converts an Xexpression into a string representation
+
+> eliminate-whitespace : (listof Symbol) (Bool -> Bool) -> Element -> Element
+ Some elements should not contain any text, only other tags, except they
+ often contain whitespace for formating purposes. Given a list of tag names
+ and the identity function, eliminate-whitespace produces a function that
+ filters out pcdata consisting solely of whitespace from those elements and
+ raises and error if any non-whitespace text appears. Passing in the function
+ called "not" instead of the identity function filters all elements which are not
+ named in the list. Using void filters all elements regardless of the list.
+
+Parameters
+==========
+
+> empty-tag-shorthand : 'always | 'never | (listof Symbol)
+ Default: 'always
+ This determines if the output functions should use the tag
+ notation instead of writing . The first form is the
+ preferred XML notation. However, most browsers designed for HTML
+ will only properly render XHTML if the document uses a mixture of the
+ two formats. _html-empty-tags_ contains the W3 consortium's
+ recommended list of XHTML tags that should use the shorthand.
+
+> collapse-whitespace : Bool
+ Default: #f
+ All consecutive whitespace is replaced by a single space.
+ CDATA sections are not affected.
+
+> trim-whitespace : Bool
+ This parameter no longer exists. Consider using collapse-whitespace
+ and eliminate-whitespace instead.
+
+> read-comments : Bool
+ Default: #f
+ Comments, by definition, should be ignored by programs. However,
+ interoperating with ad hoc extentions to other languages sometimes
+ requires processing comments anyway.
+
+> xexpr-drop-empty-attributes : Bool
+ Default: #f
+ It's easier to write functions processing Xexpressions, if they always
+ have a list of attributes. On the other hand, it's less cumbersome to
+ write Xexpresssions by hand without empty lists of attributes
+ everywhere. Normally xml->xexpr leaves in empty attribute lists.
+ Setting this parameter to #t drops them, so further editing the
+ Xexpression by hand is less annoying.
+
+Examples
+========
+
+Reading an Xexpression:
+ (xml->xexpr (document-element (read-xml input-port)))
+
+Writing an Xexpression:
+ (empty-tag-shorthand html-empty-tags)
+ (write-xml/content (xexpr->xml `(html (head (title ,banner))
+ (body ((bgcolor "white"))
+ ,text)))
+ output-port)
+
+What this Library Doesn't Provide
+=================================
+
+ Document Type Declaration (DTD) processing
+ Validation
+ Expanding user-defined entites
+ Reading user-defined entites in attributes
+ Unicode support
+
+XML Datatype Details
+====================
+
+Note: Users of the XML collection don't need to know most of these definitions.
+
+Note: Xexpr is the only important one to understand. Even then,
+ Processing-instructions may be ignored.
+
+> Xexpr ::= String
+ | (list* Symbol (listof (list Symbol String)) (list Xexpr))
+ | (cons Symbol (listof Xexpr)) ;; an element with no attributes
+ | Symbol ;; symbolic entities such as
+ | Number ;; numeric entities like
+ | Misc
+
+> Document ::= (make-document Prolog Element (listof Processing-instruction))
+ (define-struct document (prolog element misc))
+
+> Prolog ::= (make-prolog (listof Misc) #f)
+ (define-struct prolog (misc dtd))
+
+> Element ::= (make-element Location Location
+ Symbol
+ (listof Attribute)
+ (listof Content))
+ (define-struct (element struct:source) (name attributes content))
+
+> Attribute ::= (make-attribute Location Location Symbol String)
+ (define-struct (attribute struct:source) (name value))
+
+> Content ::= Pcdata
+ | Element
+ | Entity
+ | Misc
+
+ Misc ::= Comment
+ | Processing-instruction
+
+> Pcdata ::= (make-pcdata Location Location String)
+ (define-struct (pcdata struct:source) (string))
+
+> Entity ::= (make-entity (U Nat Symbol))
+ (define-struct entity (text))
+
+> Processing-instruction ::= (make-pi Location Location String (list String))
+ (define-struct (pi struct:source) (target-name instruction))
+
+> Comment ::= (make-comment String)
+ (define-struct comment (text))
+
+ Source ::= (make-source Location Location)
+ (define-struct source (start stop))
+
+ Location ::= Nat
+ | Symbol
diff --git a/scheme/xml/plt.scm b/scheme/xml/plt.scm
new file mode 100644
index 0000000..084e74e
--- /dev/null
+++ b/scheme/xml/plt.scm
@@ -0,0 +1,150 @@
+; Taken directly from the SRFI document.
+
+(define-syntax let-values
+ (syntax-rules ()
+ ((let-values (?binding ...) ?body0 ?body1 ...)
+ (let-values "bind" (?binding ...) () (begin ?body0 ?body1 ...)))
+
+ ((let-values "bind" () ?tmps ?body)
+ (let ?tmps ?body))
+
+ ((let-values "bind" ((?b0 ?e0) ?binding ...) ?tmps ?body)
+ (let-values "mktmp" ?b0 ?e0 () (?binding ...) ?tmps ?body))
+
+ ((let-values "mktmp" () ?e0 ?args ?bindings ?tmps ?body)
+ (call-with-values
+ (lambda () ?e0)
+ (lambda ?args
+ (let-values "bind" ?bindings ?tmps ?body))))
+
+ ((let-values "mktmp" (?a . ?b) ?e0 (?arg ...) ?bindings (?tmp ...) ?body)
+ (let-values "mktmp" ?b ?e0 (?arg ... x) ?bindings (?tmp ... (?a x)) ?body))
+
+ ((let-values "mktmp" ?a ?e0 (?arg ...) ?bindings (?tmp ...) ?body)
+ (call-with-values
+ (lambda () ?e0)
+ (lambda (?arg ... . x)
+ (let-values "bind" ?bindings (?tmp ... (?a x)) ?body))))))
+
+(define-syntax let*-values
+ (syntax-rules ()
+ ((let*-values () ?body0 ?body1 ...)
+ (begin ?body0 ?body1 ...))
+
+ ((let*-values (?binding0 ?binding1 ...) ?body0 ?body1 ...)
+ (let-values (?binding0)
+ (let*-values (?binding1 ...) ?body0 ?body1 ...)))))
+
+(define (add1 x) (+ x 1))
+(define (sub1 x) (- x 1))
+
+(define-syntax when
+ (syntax-rules
+ ()
+ ((when test expr ...)
+ (if test (begin expr ...)))))
+
+(define-syntax unless
+ (syntax-rules
+ ()
+ ((unless test expr ...)
+ (if (not test) (begin expr ...)))))
+
+(define (void . a)
+ (if #f #f))
+
+(define-syntax begin0
+ (syntax-rules
+ ()
+ ((begin0 expr1 expr ...)
+ (let ((r expr1))
+ (begin expr ...)
+ r))))
+
+(define andmap
+ (lambda (f list0 . lists)
+ (if (null? list0)
+ (and)
+ (let loop ((lists (cons list0 lists)))
+ (if (null? (cdr (car lists)))
+ (apply f (map car lists))
+ (and (apply f (map car lists))
+ (loop (map cdr lists))))))))
+(define null '())
+
+; stolen from mzlib/functior.ss
+(define (quicksort l less-than)
+ (let* ((v (list->vector l))
+ (count (vector-length v)))
+ (let loop ((min 0)(max count))
+ (if (< min (sub1 max))
+ (let ((pval (vector-ref v min)))
+ (let pivot-loop ((pivot min)
+ (pos (add1 min)))
+ (if (< pos max)
+ (let ((cval (vector-ref v pos)))
+ (if (less-than cval pval)
+ (begin
+ (vector-set! v pos (vector-ref v pivot))
+ (vector-set! v pivot cval)
+ (pivot-loop (add1 pivot) (add1 pos)))
+ (pivot-loop pivot (add1 pos))))
+ (if (= min pivot)
+ (loop (add1 pivot) max)
+ (begin
+ (loop min pivot)
+ (loop pivot max))))))))
+ (vector->list v)))
+
+;;; HACK!
+(define call/ec call-with-current-continuation)
+(define-syntax let/ec
+ (syntax-rules
+ ()
+ ((let/ec k expr ...)
+ (call-with-current-continuation (lambda (k) expr ...)))))
+
+
+;;; HACK!
+(define (make-parameter val)
+ (lambda ()
+ val))
+
+(define (list* . args)
+ (if (null? (cdr args))
+ (car args)
+ (cons (car args) (apply list* (cdr args)))))
+
+(define (format str . args)
+ (apply (structure-ref big-scheme format) #f str args))
+
+(define fprintf (structure-ref big-scheme format))
+
+(define foldr (structure-ref list-lib fold-right))
+
+(define regexp posix-string->regexp)
+
+;;; convert "\\1y \\2" to '(1 "y " 2)
+(define (convert-string str)
+ (let ((e.s
+ (regexp-fold (rx (: "\\" numeric))
+ (lambda (s m nil)
+ (cons (match:end m)
+ (append (cdr nil)
+ (list (substring str (car nil) (match:start m))
+ (string->number
+ (string-drop (match:substring m) 2))))))
+ (cons 0 '()) str)))
+ (append (cdr e.s) (list (substring str (car e.s) (string-length str))))))
+
+;;; does not handle &
+(define (regexp-replace* pattern string insert-string)
+ (apply regexp-substitute/global #f pattern string
+ (append (cons 'pre (convert-string insert-string)) (list 'post))))
+
+(define (compose f g)
+ (lambda (x)
+ (call-with-values (lambda () (g x)) f)))
+
+(define open-output-string make-string-output-port)
+(define get-output-string string-output-port-output)
\ No newline at end of file
diff --git a/scheme/xml/reader.scm b/scheme/xml/reader.scm
new file mode 100644
index 0000000..6ae99da
--- /dev/null
+++ b/scheme/xml/reader.scm
@@ -0,0 +1,468 @@
+; Taken directly from the SRFI document.
+
+(define-syntax let-values
+ (syntax-rules ()
+ ((let-values (?binding ...) ?body0 ?body1 ...)
+ (let-values "bind" (?binding ...) () (begin ?body0 ?body1 ...)))
+
+ ((let-values "bind" () ?tmps ?body)
+ (let ?tmps ?body))
+
+ ((let-values "bind" ((?b0 ?e0) ?binding ...) ?tmps ?body)
+ (let-values "mktmp" ?b0 ?e0 () (?binding ...) ?tmps ?body))
+
+ ((let-values "mktmp" () ?e0 ?args ?bindings ?tmps ?body)
+ (call-with-values
+ (lambda () ?e0)
+ (lambda ?args
+ (let-values "bind" ?bindings ?tmps ?body))))
+
+ ((let-values "mktmp" (?a . ?b) ?e0 (?arg ...) ?bindings (?tmp ...) ?body)
+ (let-values "mktmp" ?b ?e0 (?arg ... x) ?bindings (?tmp ... (?a x)) ?body))
+
+ ((let-values "mktmp" ?a ?e0 (?arg ...) ?bindings (?tmp ...) ?body)
+ (call-with-values
+ (lambda () ?e0)
+ (lambda (?arg ... . x)
+ (let-values "bind" ?bindings (?tmp ... (?a x)) ?body))))))
+
+(define-syntax let*-values
+ (syntax-rules ()
+ ((let*-values () ?body0 ?body1 ...)
+ (begin ?body0 ?body1 ...))
+
+ ((let*-values (?binding0 ?binding1 ...) ?body0 ?body1 ...)
+ (let-values (?binding0)
+ (let*-values (?binding1 ...) ?body0 ?body1 ...)))))
+
+(define (add1 x) (+ x 1))
+(define-syntax when
+ (syntax-rules
+ ()
+ ((when test expr ...)
+ (if test (begin expr ...)))))
+
+(define-syntax unless
+ (syntax-rules
+ ()
+ ((unless test expr ...)
+ (if (not test) (begin expr ...)))))
+
+(define (sub1 x)
+ (- x 1))
+
+(define (void . a)
+ (if #f #f))
+
+(define-syntax begin0
+ (syntax-rules
+ ()
+ ((begin0 expr1 expr ...)
+ (let ((r expr1))
+ (begin expr ...)
+ r))))
+
+(define andmap
+ (lambda (f list0 . lists)
+ (if (null? list0)
+ (and)
+ (let loop ((lists (cons list0 lists)))
+ (if (null? (cdr (car lists)))
+ (apply f (map car lists))
+ (and (apply f (map car lists))
+ (loop (map cdr lists))))))))
+(define null '())
+
+; stolen from mzlib/functior.ss
+(define (quicksort l less-than)
+ (let* ((v (list->vector l))
+ (count (vector-length v)))
+ (let loop ((min 0)(max count))
+ (if (< min (sub1 max))
+ (let ((pval (vector-ref v min)))
+ (let pivot-loop ((pivot min)
+ (pos (add1 min)))
+ (if (< pos max)
+ (let ((cval (vector-ref v pos)))
+ (if (less-than cval pval)
+ (begin
+ (vector-set! v pos (vector-ref v pivot))
+ (vector-set! v pivot cval)
+ (pivot-loop (add1 pivot) (add1 pos)))
+ (pivot-loop pivot (add1 pos))))
+ (if (= min pivot)
+ (loop (add1 pivot) max)
+ (begin
+ (loop min pivot)
+ (loop pivot max))))))))
+ (vector->list v)))
+
+;;; HACK!
+(define call/ec call-with-current-continuation)
+(define-syntax let/ec
+ (syntax-rules
+ ()
+ ((let/ec k expr ...)
+ (call-with-current-continuation (lambda (k) expr ...)))))
+
+
+;;; HACK!
+(define (make-parameter val)
+ (lambda ()
+ val))
+
+;;;;;;;;;;;
+
+;; Token ::= Contents | Start-tag | End-tag | Eof
+
+(define read-comments (make-parameter #f))
+(define collapse-whitespace (make-parameter #f))
+
+;; read-xml : [Input-port] -> Document
+(define (read-xml . maybe-port)
+ (read-from-port (if (null? maybe-port) (current-input-port) (car maybe-port))))
+
+
+;; read-from-port : Input-port -> Document
+(define (read-from-port in)
+ (let*-values (((in pos) (positionify in))
+ ((misc0 start) (read-misc in pos)))
+ (make-document (make-prolog misc0 #f)
+ (cond
+ ((start-tag? start) (read-element start in pos))
+ ((element? start) start)
+ (else (error 'read-xml "expected root element - received ~a" start)))
+ (let-values (((misc1 end-of-file) (read-misc in pos)))
+ (unless (eof-object? end-of-file)
+ (error 'read-xml "extra stuff at end of document ~a" end-of-file))
+ misc1))))
+
+;; read-misc : Input-port (-> Nat) -> (listof Misc) Token
+(define (read-misc in pos)
+ (let read-more ()
+ (let ((x (lex in pos)))
+ (cond
+ ((or (pi? x) (comment? x))
+ (let-values (((lst next) (read-more)))
+ (values (cons x lst) next)))
+ ((and (pcdata? x) (andmap char-whitespace? (string->list (pcdata-string x))))
+ (read-more))
+ (else (values null x))))))
+
+;; read-element : Start-tag Input-port (-> Nat) -> Element
+(define (read-element start in pos)
+ (let ((name (start-tag-name start))
+ (a (source-start start))
+ (b (source-stop start)))
+ (make-element
+ a b name (start-tag-attrs start)
+ (let read-content ()
+ (let ((x (lex in pos)))
+ (cond
+ ((eof-object? x)
+ (error 'read-xml "unclosed ~a tag at [~a ~a]" name a b))
+ ((start-tag? x) (cons (read-element x in pos) (read-content)))
+ ((end-tag? x)
+ (unless (eq? name (end-tag-name x))
+ (error 'read-xml "start tag ~a at [~a ~a] doesn't match end tag ~a at [~a ~a]"
+ name a b (end-tag-name x) (source-start x) (source-stop x)))
+ null)
+ ((entity? x) (cons (expand-entity x) (read-content)))
+ ((comment? x) (if (read-comments)
+ (cons x (read-content))
+ (read-content)))
+ (else (cons x (read-content)))))))))
+
+;; expand-entity : Entity -> (U Entity Pcdata)
+;; more here - allow expansion of user defined entities
+(define (expand-entity x)
+ (let ((expanded (default-entity-table (entity-text x))))
+ (if expanded
+ (make-pcdata (source-start x) (source-stop x) expanded)
+ x)))
+
+;; default-entity-table : Symbol -> (U #f String)
+(define (default-entity-table name)
+ (case name
+ ((amp) "&")
+ ((lt) "<")
+ ((gt) ">")
+ ((quot) "\"")
+ ((apos) "'")
+ (else #f)))
+
+;; lex : Input-port (-> Nat) -> Token
+(define (lex in pos)
+ (let ((c (peek-char in)))
+ (cond
+ ((eof-object? c) c)
+ ((eq? c #\&) (lex-entity in pos))
+ ((eq? c #\<) (lex-tag-cdata-pi-comment in pos))
+ (else (lex-pcdata in pos)))))
+
+;; lex-entity : Input-port (-> Nat) -> Entity
+(define (lex-entity in pos)
+ (let ((start (pos)))
+ (read-char in)
+ (let ((data (case (peek-char in)
+ ((#\#)
+ (read-char in)
+ (let ((n (case (peek-char in)
+ ((#\x) (read-char in)
+ (string->number (read-until #\; in pos) 16))
+ (else (string->number (read-until #\; in pos))))))
+ (unless (number? n)
+ (lex-error in pos "malformed numeric entity"))
+ n))
+ (else
+ (begin0
+ (lex-name in pos)
+ (unless (eq? (read-char in) #\;)
+ (lex-error in pos "expected ; at the end of an entity")))))))
+ (make-entity start (pos) data))))
+
+;; lex-tag-cdata-pi-comment : Input-port (-> Nat) -> Start-tag | Element | End-tag | Pcdata | Pi | Comment
+(define (lex-tag-cdata-pi-comment in pos)
+ (let ((start (pos)))
+ (read-char in)
+ (case (non-eof peek-char in pos)
+ ((#\!)
+ (read-char in)
+ (case (non-eof peek-char in pos)
+ ((#\-) (read-char in)
+ (unless (eq? (read-char in) #\-)
+ (lex-error in pos "expected second - after )
+ (lex-error in pos "expected > to end comment (\"--\" can't appear in comments)"))
+ ;(make-comment start (pos) data)
+ (make-comment data)))
+ ((#\[) (read-char in)
+ (unless (string=? (read-string 6 in) "CDATA[")
+ (lex-error in pos "expected CDATA following <["))
+ (let ((data (lex-cdata-contents in pos)))
+ (make-pcdata start (pos) data)))
+ (else (skip-dtd in pos)
+ (skip-space in)
+ (unless (eq? (peek-char in) #\<)
+ (lex-error in pos "expected pi, comment, or element after doctype"))
+ (lex-tag-cdata-pi-comment in pos))))
+ ((#\?) (read-char in)
+ (let ((name (lex-name in pos)))
+ (skip-space in)
+ (let ((data (lex-pi-data in pos)))
+ (make-pi start (pos) name data))))
+ ((#\/) (read-char in)
+ (let ((name (lex-name in pos)))
+ (skip-space in)
+ (unless (eq? (read-char in) #\>)
+ (lex-error in pos "expected > to close ~a's end tag" name))
+ (make-end-tag start (pos) name)))
+ (else
+ (let ((name (lex-name in pos))
+ (attrs (lex-attributes in pos)))
+ (skip-space in)
+ (case (read-char in)
+ ((#\/)
+ (unless (eq? (read-char in) #\>)
+ (lex-error in pos "expected > to close empty element ~a" name))
+ (make-element start (pos) name attrs null))
+ ((#\>) (make-start-tag start (pos) name attrs))
+ (else (lex-error in pos "expected / or > to close tag ~a" name))))))))
+
+;; lex-attributes : Input-port (-> Nat) -> (listof Attribute)
+(define (lex-attributes in pos)
+ (quicksort (let loop ()
+ (skip-space in)
+ (cond
+ ((name-start? (peek-char in))
+ (cons (lex-attribute in pos) (loop)))
+ (else null)))
+ (lambda (a b)
+ (let ((na (attribute-name a))
+ (nb (attribute-name b)))
+ (cond
+ ((eq? na nb) (lex-error in pos "duplicated attribute name ~a" na))
+ (else (string (symbol->string na) (symbol->string nb))))))))
+
+;; lex-attribute : Input-port (-> Nat) -> Attribute
+(define (lex-attribute in pos)
+ (let ((start (pos))
+ (name (lex-name in pos)))
+ (skip-space in)
+ (unless (eq? (read-char in) #\=)
+ (lex-error in pos "expected = in attribute ~a" name))
+ (skip-space in)
+ ;; more here - handle entites and disallow "<"
+ (let* ((delimiter (read-char in))
+ (value (case delimiter
+ ((#\' #\")
+ (list->string
+ (let read-more ()
+ (let ((c (non-eof peek-char in pos)))
+ (cond
+ ((eq? c delimiter) (read-char in) null)
+ ((eq? c #\&)
+ (let ((entity (expand-entity (lex-entity in pos))))
+ (if (pcdata? entity)
+ (append (string->list (pcdata-string entity)) (read-more))
+ ;; more here - do something with user defined entites
+ (read-more))))
+ (else (read-char in) (cons c (read-more))))))))
+ (else (lex-error in pos "attribute values must be in ''s or in \"\"s")))))
+ (make-attribute start (pos) name value))))
+
+;; skip-space : Input-port -> Void
+;; deviation - should sometimes insist on at least one space
+(define (skip-space in)
+ (let loop ()
+ (let ((c (peek-char in)))
+ (when (and (not (eof-object? c)) (char-whitespace? c))
+ (read-char in)
+ (loop)))))
+
+;; lex-pcdata : Input-port (-> Nat) -> Pcdata
+;; deviation - disallow ]]> "for compatability" with SGML, sec 2.4 XML spec
+(define (lex-pcdata in pos)
+ (let ((start (pos))
+ (data (let loop ()
+ (let ((next (peek-char in)))
+ (cond
+ ((or (eof-object? next) (eq? next #\&) (eq? next #\<))
+ null)
+ ((and (char-whitespace? next) (collapse-whitespace))
+ (skip-space in)
+ (cons #\space (loop)))
+ (else (cons (read-char in) (loop))))))))
+ (make-pcdata start
+ (pos)
+ (list->string data))))
+
+;; lex-name : Input-port (-> Nat) -> Symbol
+(define (lex-name in pos)
+ (let ((c (read-char in)))
+ (unless (name-start? c)
+ (lex-error in pos "expected name, received ~a" c))
+ (string->symbol
+ (list->string
+ (cons c (let lex-rest ()
+ (cond
+ ((name-char? (peek-char in))
+ (cons (read-char in) (lex-rest)))
+ (else null))))))))
+
+;; skip-dtd : Input-port (-> Nat) -> Void
+(define (skip-dtd in pos)
+ (let skip ()
+ (case (non-eof read-char in pos)
+ ((#\') (read-until #\' in pos) (skip))
+ ((#\") (read-until #\" in pos) (skip))
+ ((#\<)
+ (case (non-eof read-char in pos)
+ ((#\!) (case (non-eof read-char in pos)
+ ((#\-) (read-char in) (lex-comment-contents in pos) (read-char in) (skip))
+ (else (skip) (skip))))
+ ((#\?) (lex-pi-data in pos) (skip))
+ (else (skip) (skip))))
+ ((#\>) (void))
+ (else (skip)))))
+
+;; name-start? : Char -> Bool
+(define (name-start? ch)
+ (or (char-alphabetic? ch)
+ (eq? ch #\_)
+ (eq? ch #\:)))
+
+;; name-char? : Char -> Bool
+(define (name-char? ch)
+ (or (name-start? ch)
+ (char-numeric? ch)
+ (eq? ch #\.)
+ (eq? ch #\-)))
+
+;; read-until : Char Input-port (-> Nat) -> String
+;; discards the stop character, too
+(define (read-until char in pos)
+ (list->string
+ (let read-more ()
+ (let ((c (non-eof read-char in pos)))
+ (cond
+ ((eq? c char) null)
+ (else (cons c (read-more))))))))
+
+;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Nat) -> Char
+(define (non-eof f in pos)
+ (let ((c (f in)))
+ (cond
+ ((eof-object? c) (lex-error in pos "unexpected eof"))
+ (else c))))
+
+;; gen-read-until-string : String -> Input-port (-> Nat) -> String
+;; uses Knuth-Morris-Pratt from
+;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876
+;; discards stop from input
+(define (gen-read-until-string stop)
+ (let* ((len (string-length stop))
+ (prefix (make-vector len 0))
+ (fall-back
+ (lambda (k c)
+ (let ((k (let loop ((k k))
+ (cond
+ ((and (> k 0) (not (eq? (string-ref stop k) c)))
+ (loop (vector-ref prefix (sub1 k))))
+ (else k)))))
+ (if (eq? (string-ref stop k) c)
+ (add1 k)
+ k)))))
+ (let init ((k 0) (q 1))
+ (when (< q len)
+ (let ((k (fall-back k (string-ref stop q))))
+ (vector-set! prefix q k)
+ (init k (add1 q)))))
+ ;; (vector-ref prefix x) = the longest suffix that matches a prefix of stop
+ (lambda (in pos)
+ (list->string
+ (let/ec out
+ (let loop ((matched 0) (out out))
+ (let* ((c (non-eof read-char in pos))
+ (matched (fall-back matched c)))
+ (cond
+ ((= matched len) (out null))
+ ((zero? matched) (cons c (let/ec out (loop matched out))))
+ (else (cons c (loop matched out)))))))))))
+
+;; "-->" makes more sense, but "--" follows the spec.
+(define lex-comment-contents (gen-read-until-string "--"))
+(define lex-pi-data (gen-read-until-string "?>"))
+(define lex-cdata-contents (gen-read-until-string "]]>"))
+
+;; positionify : Input-port -> Input-port (-> Nat)
+
+;; Well, this really depends on scsh-0.6
+;; For S48 you probably need to do something completely different
+
+(define (positionify in)
+ (let ((n 0); port-limit as absolute value
+ (old-handler (port-handler in)))
+ (let ((handler (make-buffered-input-port-handler
+ (port-handler-discloser old-handler)
+ (port-handler-close old-handler)
+ (lambda (data buffer start needed)
+ (let ((res
+ ((port-handler-buffer-proc old-handler)
+ data buffer start needed)))
+ (if (number? res)
+ (set! n (+ n res)))
+ res))
+ (port-handler-ready? old-handler)
+ (port-handler-steal old-handler))))
+ (set-port-handler! in handler)
+ (values in
+ (lambda ()
+ (- n (- (port-limit in) (port-index in))))))))
+
+;; lex-error : Input-port String (-> Nat) TST* -> alpha
+(define (lex-error in pos str . rest)
+ (error 'lex-error " at positon:" (pos) str rest))
+
+
diff --git a/scheme/xml/space.scm b/scheme/xml/space.scm
new file mode 100644
index 0000000..1448611
--- /dev/null
+++ b/scheme/xml/space.scm
@@ -0,0 +1,26 @@
+;; eliminate-whitespace : (listof Symbol) (Bool -> Bool) -> Element -> Element
+(define (eliminate-whitespace special eliminate-special?)
+ (letrec ((blank-it
+ (lambda (el)
+ (let ((name (element-name el))
+ (content (map (lambda (x)
+ (if (element? x) (blank-it x) x))
+ (element-content el))))
+ (make-element
+ (source-start el)
+ (source-stop el)
+ name
+ (element-attributes el)
+ (cond
+ ((eliminate-special? (memq (element-name el) special))
+ (filter (lambda (s)
+ (not (and (pcdata? s)
+ (or (all-blank (pcdata-string s))
+ (error 'eliminate-blanks "Element <~a> is not allowed to contain text ~s" name (pcdata-string s))))))
+ content))
+ (else content)))))))
+ blank-it))
+
+;; all-blank : String -> Bool
+(define (all-blank s)
+ (andmap char-whitespace? (string->list s)))
diff --git a/scheme/xml/structures.scm b/scheme/xml/structures.scm
new file mode 100644
index 0000000..4ed32ba
--- /dev/null
+++ b/scheme/xml/structures.scm
@@ -0,0 +1,125 @@
+;; Location ::= Nat | Symbol
+;; Source ::= (make-source Location Location)
+(define-record-type source :source
+ (make-source start stop)
+ source?
+ (start really-source-start)
+ (stop really-source-stop))
+
+(define (source-start obj)
+ (cond ((element? obj) (element-start obj))
+ ((attribute? obj) (attribute-start obj))
+ ((pcdata? obj) (pcdata-start obj))
+ ((entity? obj) (entity-start obj))
+ ((pi? obj) (pi-start obj))
+ ((start-tag? obj) (start-tag-start obj))
+ ((end-tag? obj) (end-tag-start obj))
+ (else (really-source-start obj))))
+
+(define (source-stop obj)
+ (cond ((element? obj) (element-stop obj))
+ ((attribute? obj) (attribute-stop obj))
+ ((pcdata? obj) (pcdata-stop obj))
+ ((entity? obj) (entity-stop obj))
+ ((pi? obj) (pi-stop obj))
+ ((start-tag? obj) (start-tag-stop obj))
+ ((end-tag? obj) (end-tag-stop obj))
+ (else (really-source-stop obj))))
+
+;; Document ::= (make-document Prolog Element (listof Misc))
+(define-record-type document :document
+ (make-document prolog element misc)
+ document?
+ (prolog document-prolog)
+ (element document-element)
+ (misc document-misc))
+
+;; Prolog ::= (make-prolog (listof Misc) #f)
+(define-record-type prolog :prolog
+ (make-prolog misc dtd)
+ prolog?
+ (misc prolog-misc)
+ (dtd prolog-dtd))
+
+;; Element ::= (make-element Location Location Symbol (listof Attribute) (listof Content))
+(define-record-type element :element
+ (make-element start stop name attributes content)
+ element?
+ (start element-start)
+ (stop element-stop)
+ (name element-name)
+ (attributes element-attributes)
+ (content element-content))
+
+;; Attribute ::= (make-attribute Location Location Symbol String)
+(define-record-type attribute :attribute
+ (make-attribute start stop name value)
+ attribute?
+ (start attribute-start)
+ (stop attribute-stop)
+ (name attribute-name)
+ (value attribute-value))
+
+;; Pcdata ::= (make-pcdata Location Location String)
+(define-record-type pcdata :pcdata
+ (make-pcdata start stop string)
+ pcdata?
+ (start pcdata-start)
+ (stop pcdata-stop)
+ (string pcdata-string))
+
+;; Content ::= Pcdata
+;; | Element
+;; | Entity
+;; | Misc
+
+;; Misc ::= Comment
+;; | Processing-instruction
+
+;; Entity ::= (make-entity Location Location (U Nat Symbol))
+(define-record-type entity :entity
+ (make-entity start stop text)
+ entity?
+ (start entity-start)
+ (stop entity-stop)
+ (text entity-text))
+
+;; Processing-instruction ::= (make-pi Location Location String (list String))
+;; also represents XMLDecl
+(define-record-type pi :pi
+ (make-pi start stop target-name instruction)
+ pi?
+ (start pi-start)
+ (stop pi-stop)
+ (target-name pi-target-name)
+ (instruction pi-instruction))
+
+;; Comment ::= (make-comment String)
+(define-record-type comment :comment
+ (make-comment text)
+ comment?
+ (text comment-text))
+
+;; content? : TST -> Bool
+(define (content? x)
+ (or (pcdata? x) (element? x) (entity? x) (comment? x) (pi? x)))
+
+
+;;; moved here from reader as it inherits from source
+;; Start-tag ::= (make-start-tag Location Location Symbol (listof Attribute))
+(define-record-type start-tag :start-tag
+ (make-start-tag start stop name attrs)
+ start-tag?
+ (start start-tag-start)
+ (stop start-tag-stop)
+ (name start-tag-name)
+ (attrs start-tag-attrs))
+
+
+;; End-tag ::= (make-end-tag Location Location Symbol)
+(define-record-type end-tag :end-tag
+ (make-end-tag start stop name)
+ end-tag?
+ (start end-tag-start)
+ (stop end-tag-stop)
+ (name end-tag-name))
diff --git a/scheme/xml/writer.scm b/scheme/xml/writer.scm
new file mode 100644
index 0000000..7ee4c74
--- /dev/null
+++ b/scheme/xml/writer.scm
@@ -0,0 +1,129 @@
+
+;;(define empty-tag-shorthand (make-parameter #t))
+;;(define empty-tag-shorthand (make-parameter void))
+
+;; (empty-tag-shorthand) : (U 'always 'never (listof Symbol))
+(define empty-tag-shorthand (make-parameter 'always))
+
+(define html-empty-tags '(param meta link isindex input img hr frame col br basefont base area))
+
+;; var-argify : (a Output-port -> b) -> (a [Output-port] -> b)
+(define (var-argify f)
+ (lambda (x . maybe-port)
+ (f x (if (null? maybe-port)
+ (current-output-port)
+ (car maybe-port)))))
+
+;; gen-write/display-xml/content : (Nat Output-port -> Void) -> Content [Output-Port]-> Void
+(define (gen-write/display-xml/content dent)
+ (var-argify (lambda (c out) (write-xml-content c 0 dent out))))
+
+;; indent : Nat Output-port -> Void
+(define (indent n out)
+ (newline out)
+ (let loop ((n n))
+ (unless (zero? n)
+ (display #\space out)
+ (loop (sub1 n)))))
+
+;; write-xml/content : Content [Output-port] -> Void
+(define write-xml/content (gen-write/display-xml/content void))
+
+;; display-xml/content : Content [Output-port] -> Void
+(define display-xml/content (gen-write/display-xml/content indent))
+
+;; gen-write/display-xml : (Content [Output-port] -> Void) -> Document [Output-port] -> Void
+(define (gen-write/display-xml output-content)
+ (var-argify (lambda (doc out)
+ (display-outside-misc (prolog-misc (document-prolog doc)) out)
+ (output-content (document-element doc) out)
+ (display-outside-misc (document-misc doc) out))))
+
+;; write-xml : Document [Output-port] -> Void
+(define write-xml (gen-write/display-xml write-xml/content))
+
+;; display-xml : Document [Output-port] -> Void
+(define display-xml (gen-write/display-xml display-xml/content))
+
+;; display-outside-misc : (listof Misc) Output-port -> Void
+(define (display-outside-misc misc out)
+ (for-each (lambda (x)
+ ((cond
+ ((comment? x) write-xml-comment)
+ ((pi? x) write-xml-pi)) x 0 void out)
+ (newline out))
+ misc))
+
+;; write-xml-content : Content Nat (Nat Output-Stream -> Void) Output-Stream -> Void
+(define (write-xml-content el over dent out)
+ ((cond
+ ((element? el) write-xml-element)
+ ((pcdata? el) write-xml-pcdata)
+ ((entity? el) write-xml-entity)
+ ((comment? el) write-xml-comment)
+ ((pi? el) write-xml-pi)
+ (else (error 'write-xml-content "received ~a" el)))
+ el over dent out))
+
+;; write-xml-element : Element Nat (Nat Output-Stream -> Void) Output-Stream -> Void
+(define (write-xml-element el over dent out)
+ (let* ((name (element-name el))
+ (start (lambda (f) (write-xml-base (format f name) over dent out)))
+ (content (element-content el)))
+ (start "<~a")
+ (for-each (lambda (att)
+ (fprintf out " ~s=~s" (attribute-name att)
+ (escape (attribute-value att) escape-attribute-table)))
+ (element-attributes el))
+ (if (and (null? content)
+ (let ((short (empty-tag-shorthand)))
+ (case short
+ ((always) #t)
+ ((never) #f)
+ (else (memq name short)))))
+ (fprintf out " />")
+ (begin
+ (fprintf out ">")
+ (for-each (lambda (c) (write-xml-content c (incr over) dent out)) content)
+ (start "~a")
+ (fprintf out ">")))))
+
+;; write-xml-base : (U String Char Symbol) Nat (Nat Output-Stream -> Void) Output-Stream -> Void
+(define (write-xml-base el over dent out)
+ (dent over out)
+ (display el out))
+
+;; write-xml-pcdata : Pcdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void
+(define (write-xml-pcdata str over dent out)
+ (write-xml-base (escape (pcdata-string str) escape-table) over dent out))
+
+;; write-xml-pi : Processing-instruction Nat (Nat Output-Stream -> Void) Output-Stream -> Void
+(define (write-xml-pi pi over dent out)
+ (write-xml-base (format "~a ~a?>" (pi-target-name pi) (pi-instruction pi)) over dent out))
+
+;; write-xml-comment : Comment Nat (Nat Output-Stream -> Void) Output-Stream -> Void
+(define (write-xml-comment comment over dent out)
+ (write-xml-base (format "" (comment-text comment)) over dent out))
+
+;; write-xml-entity : Entity Nat (Nat Output-stream -> Void) Output-stream -> Void
+(define (write-xml-entity entity over dent out)
+ (let ((n (entity-text entity)))
+ (fprintf out (if (number? n) "~a;" "&~a;") n)))
+
+(define escape-table
+ (map (lambda (x y) (cons (regexp (symbol->string x)) y))
+ '(< > &)
+ '("<" ">" "&")))
+
+(define escape-attribute-table
+ (list* (cons (regexp "'") "'") (cons (regexp "\"") """) escape-table))
+
+;; escape : String -> String
+;; more here - this could be much more efficient
+(define (escape x table)
+ (foldr (lambda (esc str) (regexp-replace* (car esc) str (cdr esc)))
+ x
+ table))
+
+;; incr : Nat -> Nat
+(define (incr n) (+ n 2))
diff --git a/scheme/xml/xexpr.scm b/scheme/xml/xexpr.scm
new file mode 100644
index 0000000..95ebafa
--- /dev/null
+++ b/scheme/xml/xexpr.scm
@@ -0,0 +1,81 @@
+; (import xml-structs^ writer^ mzlib:function^)
+;; Xexpr ::= String
+;; | (list* Symbol (listof Attribute-srep) (listof Xexpr))
+;; | (cons Symbol (listof Xexpr))
+;; | Symbol
+;; | Nat
+;; | Comment
+;; | Processing-instruction
+;; Attribute-srep ::= (list Symbol String)
+
+;; sorting is no longer necessary, since xt3d uses xml->zxexpr, which sorts.
+
+;; assoc-sort : (listof (list Symbol a)) -> (listof (list Symbol a))
+(define (assoc-sort to-sort)
+ (quicksort to-sort (bcompose string (compose symbol->string car))))
+
+(define xexpr-drop-empty-attributes (make-parameter #f))
+
+;; xml->xexpr : Content -> Xexpr
+;; The contract is loosely enforced.
+(define (xml->xexpr x)
+ (let* ((non-dropping-combine
+ (lambda (atts body)
+ (cons (assoc-sort (map attribute->srep atts))
+ body)))
+ (combine (if (xexpr-drop-empty-attributes)
+ (lambda (atts body)
+ (if (null? atts)
+ body
+ (non-dropping-combine atts body)))
+ non-dropping-combine)))
+ (let loop ((x x))
+ (cond
+ ((element? x)
+ (let ((body (map loop (element-content x)))
+ (atts (element-attributes x)))
+ (cons (element-name x) (combine atts body))))
+ ((pcdata? x) (pcdata-string x))
+ ((entity? x) (entity-text x))
+ ((or (comment? x) (pi? x)) x)
+ ((document? x) (error 'xml->xexpr "Expected content, given ~a~nUse document-element to extract the content." x))
+ (else (error 'xml->xexpr "Expected content, given ~a" x))))))
+
+;; attribute->srep : Attribute -> Attribute-srep
+(define (attribute->srep a)
+ (list (attribute-name a) (attribute-value a)))
+
+;; srep->attribute : Attribute-srep -> Attribute
+(define (srep->attribute a)
+ (unless (and (pair? a) (pair? (cdr a)) (null? (cddr a)) (symbol? (car a)) (string? (cadr a)))
+ (error 'srep->attribute "expected (cons Symbol String) given ~a" a))
+ (make-attribute 'scheme 'scheme (car a) (cadr a)))
+
+;; xexpr->xml : Xexpr -> Content
+;; The contract is enforced.
+(define (xexpr->xml x)
+ (cond
+ ((pair? x)
+ (let ((f (lambda (atts body)
+ (unless (list? body)
+ (error 'xexpr->xml "expected a list of xexprs a the body in ~a" x))
+ (make-element 'scheme 'scheme (car x)
+ atts
+ (map xexpr->xml body)))))
+ (if (and (pair? (cdr x)) (or (null? (cadr x)) (and (pair? (cadr x)) (pair? (caadr x)))))
+ (f (map srep->attribute (cadr x)) (cddr x))
+ (f null (cdr x)))))
+ ((string? x) (make-pcdata 'scheme 'scheme x))
+ ((or (symbol? x) (and (integer? x) (>= x 0))) (make-entity 'scheme 'scheme x))
+ ((or (comment? x) (pi? x)) x)
+ (else (error 'xexpr->xml "malformed xexpr ~s" x))))
+
+;; xexpr->string : Xexpression -> String
+(define (xexpr->string xexpr)
+ (let ((port (open-output-string)))
+ (write-xml/content (xexpr->xml xexpr) port)
+ (get-output-string port)))
+
+;; bcompose : (a a -> c) (b -> a) -> (b b -> c)
+(define (bcompose f g)
+ (lambda (x y) (f (g x) (g y))))
diff --git a/scheme/xml/xml-packages.scm b/scheme/xml/xml-packages.scm
new file mode 100644
index 0000000..5a5c19f
--- /dev/null
+++ b/scheme/xml/xml-packages.scm
@@ -0,0 +1,121 @@
+(define-interface xml-structures-interface
+ (export source-start
+ source-stop
+ make-document document? document-prolog document-element document-misc
+ make-prolog prolog? prolog-misc prolog-dtd
+ make-element element? element-name element-attributes element-content
+ make-attribute attribute? attribute-name attribute-value
+ make-pcdata pcdata? pcdata-string
+ make-entity entity? entity-text
+ make-pi pi? pi-target-name pi-instruction
+ make-comment comment? comment-text
+ content?
+ make-start-tag start-tag? start-tag-name start-tag-attrs
+ make-end-tag end-tag? end-tag-name))
+
+(define-structure xml-structures xml-structures-interface
+ (open scheme
+ extended-ports
+ define-record-types)
+ (files structures))
+
+(define-interface plt-compat-interface
+ (export let-values
+ let*-values
+ add1 sub1
+ when unless
+ begin0
+ void
+ andmap
+ quicksort
+ make-parameter
+ let/ec call/ec
+ list* null
+ format
+ fprintf
+ regexp regexp-replace*
+ foldr
+ compose
+ open-output-string get-output-string
+ ))
+
+(define-structure plt-compat plt-compat-interface
+ (open scsh
+ scheme
+ string-lib
+ structure-refs)
+ (access big-scheme ;; format
+ list-lib) ;; fold
+ (files plt))
+
+(define-interface reader-interface
+ (export read-xml
+ read-comments
+ collapse-whitespace))
+
+(define-structure reader reader-interface
+ (open scsh ;read-string
+ scheme
+ xml-structures
+ i/o
+ i/o-internal
+ ports
+ plt-compat
+ signals)
+ (files reader))
+
+(define-interface writer-interface
+ (export write-xml
+ display-xml
+ write-xml/content
+ display-xml/content
+ empty-tag-shorthand
+ html-empty-tags))
+
+(define-structure writer writer-interface
+ (open scheme
+ xml-structures
+ signals
+ plt-compat)
+ (files writer))
+
+
+(define-interface space-interface
+ (export eliminate-whitespace))
+
+(define-structure space space-interface
+ (open scheme
+ plt-compat
+ signals
+ list-lib
+ xml-structures)
+ (files space))
+
+(define-interface xexpr-interface
+ (export xml->xexpr
+ xexpr->xml
+ xexpr->string
+ xexpr-drop-empty-attributes))
+
+(define-structure xexpr xexpr-interface
+ (open scheme
+ plt-compat
+ writer
+ signals
+ xml-structures)
+ (files xexpr))
+
+(define-structure xml (compound-interface xml-structures-interface
+ reader-interface
+ writer-interface
+ xexpr-interface
+ space-interface)
+ (open scheme
+ plt-compat
+ xml-structures
+ reader
+ writer
+ xexpr
+ space))
+
+
\ No newline at end of file