diff --git a/scheme/xml/doc.txt b/scheme/xml/doc.txt index ed8fc66..126762e 100644 --- a/scheme/xml/doc.txt +++ b/scheme/xml/doc.txt @@ -31,6 +31,11 @@ Functions reads in an XML document from the given or current input port XML documents contain exactly one element. It throws an xml-read:error if there isn't any element or if there are more than one element. + + Malformed xml is reported with source locations in + the form `l.c/o', where l is the line number, c is + the column number and o is the number of characters + from the beginning of the file. > write-xml : Document [Output-port] -> Void writes a document to the given or current output port, currently @@ -131,50 +136,148 @@ Note: Users of the XML collection don't need to know most of these definitions. Note: Xexpr is the only important one to understand. Even then, Processing-instructions may be ignored. -> Xexpr ::= String - | (list* Symbol (listof (list Symbol String)) (list Xexpr)) - | (cons Symbol (listof Xexpr)) ;; an element with no attributes - | Symbol ;; symbolic entities such as   - | Number ;; numeric entities like  - | Misc +> Xexpr = String + | (list* Symbol (listof (list Symbol String)) (list Xexpr)) + | (cons Symbol (listof Xexpr)) ;; an element with no attributes + | Symbol ;; symbolic entities such as   + | Number ;; numeric entities like  + | Misc -> Document ::= (make-document Prolog Element (listof Processing-instruction)) +> Document = (make-document Prolog Element (listof Processing-instruction)) (define-struct document (prolog element misc)) -> Prolog ::= (make-prolog (listof Misc) #f) - (define-struct prolog (misc dtd)) +> Prolog = (make-prolog (listof Misc) Document-type [Misc ...]) + (define-struct prolog (misc dtd misc2)) + The last field is a (listof Misc), but the maker accepts optional + arguments instead for backwards compatibility. -> Element ::= (make-element Location Location +> Document-type = #f | (make-document-type Symbol External-dtd #f) + (define-struct document-type (name external inlined)) + +> External-dtd = (make-external-dtd/public str str) + | (make-external-dtd/system str) + | #f + (define-struct external-dtd (system)) + (define-struct (external-dtd/public external-dtd) (public)) + (define-struct (external-dtd/system external-dtd) ()) + +> Element = (make-element Location Location Symbol (listof Attribute) (listof Content)) (define-struct (element struct:source) (name attributes content)) -> Attribute ::= (make-attribute Location Location Symbol String) +> Attribute = (make-attribute Location Location Symbol String) (define-struct (attribute struct:source) (name value)) -> Content ::= Pcdata - | Element - | Entity - | Misc +> Content = Pcdata + | Element + | Entity + | Misc - Misc ::= Comment - | Processing-instruction + Misc = Comment + | Processing-instruction -> Pcdata ::= (make-pcdata Location Location String) +> Pcdata = (make-pcdata Location Location String) (define-struct (pcdata struct:source) (string)) -> Entity ::= (make-entity (U Nat Symbol)) +> Entity = (make-entity (U Nat Symbol)) (define-struct entity (text)) -> Processing-instruction ::= (make-pi Location Location String (list String)) +> Processing-instruction = (make-pi Location Location String (list String)) (define-struct (pi struct:source) (target-name instruction)) -> Comment ::= (make-comment String) +> Comment = (make-comment String) (define-struct comment (text)) - Source ::= (make-source Location Location) + Source = (make-source Location Location) (define-struct source (start stop)) - Location ::= Nat - | Symbol + Location = Nat + | Symbol + + +The PList Library +================= + +Files: plist.ss + +The PList library provides the ability to read and write xml documents which +conform to the "plist" DTD, used to store 'dictionaries' of string - value +associations. + +To Load +======= + +(require (lib "plist.ss" "xml")) + +Functions +========= + +> read-plist : Port -> PLDict + reads a plist from a port, and produces a 'dict' x-expression + +> write-plist : PLDict Port -> Void + writes a plist to the given port. May raise the exn:application:type + exception if the plist is badly formed. + +Datatypes +========= + +NB: all of these are subtypes of x-expression: + +> PLDict = (list 'dict Assoc-pair ...) + +> PLAssoc-pair = (list 'assoc-pair String PLValue) + +> PLValue = String + + | (list 'true) + | (list 'false) + | (list 'integer Integer) + | (list 'real Real) + | PLDict + | PLArray + +> PLArray = (list 'array PLValue ...) + +In fact, the PList DTD also defines Data and Date types, but we're ignoring +these for the moment. + +Examples +======== + +Here's a sample PLDict: + +(define my-dict + `(dict (assoc-pair "first-key" + "just a string + with some whitespace in it") + (assoc-pair "second-key" + (false)) + (assoc-pair "third-key" + (dict )) + (assoc-pair "fourth-key" + (dict (assoc-pair "inner-key" + (real 3.432)))) + (assoc-pair "fifth-key" + (array (integer 14) + "another string" + (true))) + (assoc-pair "sixth-key" + (array)))) + +Let's write it to disk: + + (call-with-output-file "/Users/clements/tmp.plist" + (lambda (port) + (write-plist my-dict port)) + 'truncate) + +Let's read it back from the disk: + + (define new-dict + (call-with-input-file "/Users/clements/tmp.plist" + (lambda (port) + (read-plist port)))) + diff --git a/scheme/xml/plt.scm b/scheme/xml/plt.scm index 084e74e..cf55698 100644 --- a/scheme/xml/plt.scm +++ b/scheme/xml/plt.scm @@ -106,9 +106,12 @@ ;;; HACK! -(define (make-parameter val) - (lambda () - val)) +(define (make-parameter val . maybe-guard) + (if (null? maybe-guard) + (lambda () + val) + (lambda () + ((car maybe-guard) val)))) (define (list* . args) (if (null? (cdr args)) diff --git a/scheme/xml/reader.scm b/scheme/xml/reader.scm index 6ae99da..b516450 100644 --- a/scheme/xml/reader.scm +++ b/scheme/xml/reader.scm @@ -1,118 +1,3 @@ -; Taken directly from the SRFI document. - -(define-syntax let-values - (syntax-rules () - ((let-values (?binding ...) ?body0 ?body1 ...) - (let-values "bind" (?binding ...) () (begin ?body0 ?body1 ...))) - - ((let-values "bind" () ?tmps ?body) - (let ?tmps ?body)) - - ((let-values "bind" ((?b0 ?e0) ?binding ...) ?tmps ?body) - (let-values "mktmp" ?b0 ?e0 () (?binding ...) ?tmps ?body)) - - ((let-values "mktmp" () ?e0 ?args ?bindings ?tmps ?body) - (call-with-values - (lambda () ?e0) - (lambda ?args - (let-values "bind" ?bindings ?tmps ?body)))) - - ((let-values "mktmp" (?a . ?b) ?e0 (?arg ...) ?bindings (?tmp ...) ?body) - (let-values "mktmp" ?b ?e0 (?arg ... x) ?bindings (?tmp ... (?a x)) ?body)) - - ((let-values "mktmp" ?a ?e0 (?arg ...) ?bindings (?tmp ...) ?body) - (call-with-values - (lambda () ?e0) - (lambda (?arg ... . x) - (let-values "bind" ?bindings (?tmp ... (?a x)) ?body)))))) - -(define-syntax let*-values - (syntax-rules () - ((let*-values () ?body0 ?body1 ...) - (begin ?body0 ?body1 ...)) - - ((let*-values (?binding0 ?binding1 ...) ?body0 ?body1 ...) - (let-values (?binding0) - (let*-values (?binding1 ...) ?body0 ?body1 ...))))) - -(define (add1 x) (+ x 1)) -(define-syntax when - (syntax-rules - () - ((when test expr ...) - (if test (begin expr ...))))) - -(define-syntax unless - (syntax-rules - () - ((unless test expr ...) - (if (not test) (begin expr ...))))) - -(define (sub1 x) - (- x 1)) - -(define (void . a) - (if #f #f)) - -(define-syntax begin0 - (syntax-rules - () - ((begin0 expr1 expr ...) - (let ((r expr1)) - (begin expr ...) - r)))) - -(define andmap - (lambda (f list0 . lists) - (if (null? list0) - (and) - (let loop ((lists (cons list0 lists))) - (if (null? (cdr (car lists))) - (apply f (map car lists)) - (and (apply f (map car lists)) - (loop (map cdr lists)))))))) -(define null '()) - -; stolen from mzlib/functior.ss -(define (quicksort l less-than) - (let* ((v (list->vector l)) - (count (vector-length v))) - (let loop ((min 0)(max count)) - (if (< min (sub1 max)) - (let ((pval (vector-ref v min))) - (let pivot-loop ((pivot min) - (pos (add1 min))) - (if (< pos max) - (let ((cval (vector-ref v pos))) - (if (less-than cval pval) - (begin - (vector-set! v pos (vector-ref v pivot)) - (vector-set! v pivot cval) - (pivot-loop (add1 pivot) (add1 pos))) - (pivot-loop pivot (add1 pos)))) - (if (= min pivot) - (loop (add1 pivot) max) - (begin - (loop min pivot) - (loop pivot max)))))))) - (vector->list v))) - -;;; HACK! -(define call/ec call-with-current-continuation) -(define-syntax let/ec - (syntax-rules - () - ((let/ec k expr ...) - (call-with-current-continuation (lambda (k) expr ...))))) - - -;;; HACK! -(define (make-parameter val) - (lambda () - val)) - -;;;;;;;;;;; - ;; Token ::= Contents | Start-tag | End-tag | Eof (define read-comments (make-parameter #f)) @@ -137,7 +22,7 @@ (error 'read-xml "extra stuff at end of document ~a" end-of-file)) misc1)))) -;; read-misc : Input-port (-> Nat) -> (listof Misc) Token +;; read-misc : Input-port (-> Location) -> (listof Misc) Token (define (read-misc in pos) (let read-more () (let ((x (lex in pos))) @@ -149,7 +34,7 @@ (read-more)) (else (values null x)))))) -;; read-element : Start-tag Input-port (-> Nat) -> Element +;; read-element : Start-tag Input-port (-> Location) -> Element (define (read-element start in pos) (let ((name (start-tag-name start)) (a (source-start start)) @@ -160,12 +45,19 @@ (let ((x (lex in pos))) (cond ((eof-object? x) - (error 'read-xml "unclosed ~a tag at [~a ~a]" name a b)) + (error 'read-xml "unclosed ~a tag at [~a ~a]" name + (format-source a) + (format-source b))) ((start-tag? x) (cons (read-element x in pos) (read-content))) ((end-tag? x) (unless (eq? name (end-tag-name x)) (error 'read-xml "start tag ~a at [~a ~a] doesn't match end tag ~a at [~a ~a]" - name a b (end-tag-name x) (source-start x) (source-stop x))) + name + (format-source a) + (format-source b) + (end-tag-name x) + (format-source (source-start x)) + (format-source (source-stop x)))) null) ((entity? x) (cons (expand-entity x) (read-content))) ((comment? x) (if (read-comments) @@ -191,7 +83,7 @@ ((apos) "'") (else #f))) -;; lex : Input-port (-> Nat) -> Token +;; lex : Input-port (-> Location) -> Token (define (lex in pos) (let ((c (peek-char in))) (cond @@ -200,7 +92,7 @@ ((eq? c #\<) (lex-tag-cdata-pi-comment in pos)) (else (lex-pcdata in pos))))) -;; lex-entity : Input-port (-> Nat) -> Entity +;; lex-entity : Input-port (-> Location) -> Entity (define (lex-entity in pos) (let ((start (pos))) (read-char in) @@ -221,7 +113,7 @@ (lex-error in pos "expected ; at the end of an entity"))))))) (make-entity start (pos) data)))) -;; lex-tag-cdata-pi-comment : Input-port (-> Nat) -> Start-tag | Element | End-tag | Pcdata | Pi | Comment +;; lex-tag-cdata-pi-comment : Input-port (-> Location) -> Start-tag | Element | End-tag | Pcdata | Pi | Comment (define (lex-tag-cdata-pi-comment in pos) (let ((start (pos))) (read-char in) @@ -268,9 +160,9 @@ (lex-error in pos "expected > to close empty element ~a" name)) (make-element start (pos) name attrs null)) ((#\>) (make-start-tag start (pos) name attrs)) - (else (lex-error in pos "expected / or > to close tag ~a" name)))))))) + (else (lex-error in pos "expected / or > to close tag `~a'" name)))))))) -;; lex-attributes : Input-port (-> Nat) -> (listof Attribute) +;; lex-attributes : Input-port (-> Location) -> (listof Attribute) (define (lex-attributes in pos) (quicksort (let loop () (skip-space in) @@ -285,7 +177,7 @@ ((eq? na nb) (lex-error in pos "duplicated attribute name ~a" na)) (else (stringstring na) (symbol->string nb)))))))) -;; lex-attribute : Input-port (-> Nat) -> Attribute +;; lex-attribute : Input-port (-> Location) -> Attribute (define (lex-attribute in pos) (let ((start (pos)) (name (lex-name in pos))) @@ -321,7 +213,7 @@ (read-char in) (loop))))) -;; lex-pcdata : Input-port (-> Nat) -> Pcdata +;; lex-pcdata : Input-port (-> Location) -> Pcdata ;; deviation - disallow ]]> "for compatability" with SGML, sec 2.4 XML spec (define (lex-pcdata in pos) (let ((start (pos)) @@ -338,7 +230,7 @@ (pos) (list->string data)))) -;; lex-name : Input-port (-> Nat) -> Symbol +;; lex-name : Input-port (-> Location) -> Symbol (define (lex-name in pos) (let ((c (read-char in))) (unless (name-start? c) @@ -351,7 +243,7 @@ (cons (read-char in) (lex-rest))) (else null)))))))) -;; skip-dtd : Input-port (-> Nat) -> Void +;; skip-dtd : Input-port (-> Location) -> Void (define (skip-dtd in pos) (let skip () (case (non-eof read-char in pos) @@ -380,7 +272,7 @@ (eq? ch #\.) (eq? ch #\-))) -;; read-until : Char Input-port (-> Nat) -> String +;; read-until : Char Input-port (-> Location) -> String ;; discards the stop character, too (define (read-until char in pos) (list->string @@ -390,14 +282,14 @@ ((eq? c char) null) (else (cons c (read-more)))))))) -;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Nat) -> Char +;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Location) -> Char (define (non-eof f in pos) (let ((c (f in))) (cond ((eof-object? c) (lex-error in pos "unexpected eof")) (else c)))) -;; gen-read-until-string : String -> Input-port (-> Nat) -> String +;; gen-read-until-string : String -> Input-port (-> Location) -> String ;; uses Knuth-Morris-Pratt from ;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876 ;; discards stop from input @@ -436,13 +328,15 @@ (define lex-pi-data (gen-read-until-string "?>")) (define lex-cdata-contents (gen-read-until-string "]]>")) -;; positionify : Input-port -> Input-port (-> Nat) +;; positionify : Input-port -> Input-port (-> Location) ;; Well, this really depends on scsh-0.6 ;; For S48 you probably need to do something completely different (define (positionify in) - (let ((n 0); port-limit as absolute value + (let ((line 1) + (char 0) + (offset 0) (old-handler (port-handler in))) (let ((handler (make-buffered-input-port-handler (port-handler-discloser old-handler) @@ -452,17 +346,32 @@ ((port-handler-buffer-proc old-handler) data buffer start needed))) (if (number? res) - (set! n (+ n res))) + (begin + (set! char (add1 char)) + (set! offset (add1 offset)) + (let ((c (peek-char in))) + (when (equal? c #\newline) + (set! line (+ line 1)) + (set! char 0)) + c))) res)) (port-handler-ready? old-handler) (port-handler-steal old-handler)))) (set-port-handler! in handler) (values in (lambda () - (- n (- (port-limit in) (port-index in)))))))) + (make-location line char offset)))))) +; (- n (- (port-limit in) (port-index in)))))))) -;; lex-error : Input-port String (-> Nat) TST* -> alpha +;; lex-error : Input-port String (-> Location) TST* -> alpha (define (lex-error in pos str . rest) - (error 'lex-error " at positon:" (pos) str rest)) + (error 'lex-error " at position:" (format-source (pos)) str rest)) + +;; format-source : Location -> string +;; to format the source location for an error message +(define (format-source loc) + (if (location? loc) + (format #f "~a.~a/~a" (location-line loc) (location-char loc) (location-offset loc)) + (format #f "~a" loc))) diff --git a/scheme/xml/structures.scm b/scheme/xml/structures.scm index 4ed32ba..5e986d9 100644 --- a/scheme/xml/structures.scm +++ b/scheme/xml/structures.scm @@ -1,8 +1,15 @@ -;; Location ::= Nat | Symbol +; Location = (make-location Nat Nat) | Symbol +(define-record-type location :location + (make-location line char offset) + location? + (line location-line) + (char location-char) + (offset location-offset)) + ;; Source ::= (make-source Location Location) (define-record-type source :source (make-source start stop) - source? + really-source? (start really-source-start) (stop really-source-stop)) @@ -26,6 +33,16 @@ ((end-tag? obj) (end-tag-stop obj)) (else (really-source-stop obj)))) +(define (does-any-satisfy? preds obj) + (if (null? preds) + #f + (or ((car preds) obj) (does-any-satisfy? (cdr preds) obj)))) + +(define (source? obj) + (does-any-satisfy? (list really-source? element? attribute? pcdata? + entity? pi? start-tag? end-tag?) + obj)) + ;; Document ::= (make-document Prolog Element (listof Misc)) (define-record-type document :document (make-document prolog element misc) @@ -34,12 +51,64 @@ (element document-element) (misc document-misc)) -;; Prolog ::= (make-prolog (listof Misc) #f) + ; Prolog = (make-prolog (listof Misc) Document-type [Misc ...]) + ; The Misc items after the Document-type are optional arguments to maintain + ; backward compatability with older versions of the XML library. + ;(define-struct prolog (misc dtd misc2)) + (define-record-type prolog :prolog - (make-prolog misc dtd) + (really-make-prolog misc dtd misc2) prolog? (misc prolog-misc) - (dtd prolog-dtd)) + (dtd prolog-dtd) + (misc2 prolog-misc2)) + +(define (make-prolog misc dtd misc2) + (really-make-prolog misc dtd misc2)) + +;;; Document-type = (make-document-type sym External-dtd #f) +;;; | #f + +(define-record-type document-type :document-type + (make-document-type name external inlined) + really-document-type? + (name document-type-name) + (external document-type-external) + (inlined document-type-inlined)) + +;;; External-dtd = (make-external-dtd/public str str) +;;; | (make-external-dtd/system str) +;;; | #f +(define-record-type external-dtd :external-dtd + (make-external-dtd system) + really-external-dtd? + (system really-external-dtd-system)) + +(define (external-dtd-system external-dtd) + (cond ((really-external-dtd? external-dtd) + (really-external-dtd-system external-dtd)) + ((external-dtd/public? external-dtd) + (external-dtd/public-system external-dtd)) + ((external-dtd/system? external-dtd) + (external-dtd/system-system external-dtd)) + (else (error "bottom of external-dtd-system" external-dtd)))) + +(define (external-dtd? obj) + (does-any-satisfy? (list really-external-dtd? external-dtd/public? + external-dtd/system?) + obj)) + +(define-record-type external-dtd/public :external-dtd/public + (make-external-dtd/public system public) + external-dtd/public? + (system external-dtd/public-system) + (public external-dtd/public-public)) + +(define-record-type external-dtd/system :external-dtd/system + (make-external-dtd/system system) + external-dtd/system? + (system external-dtd/system-system)) + ;; Element ::= (make-element Location Location Symbol (listof Attribute) (listof Content)) (define-record-type element :element diff --git a/scheme/xml/writer.scm b/scheme/xml/writer.scm index 7ee4c74..1f5dc7c 100644 --- a/scheme/xml/writer.scm +++ b/scheme/xml/writer.scm @@ -3,7 +3,12 @@ ;;(define empty-tag-shorthand (make-parameter void)) ;; (empty-tag-shorthand) : (U 'always 'never (listof Symbol)) -(define empty-tag-shorthand (make-parameter 'always)) +(define empty-tag-shorthand + (make-parameter 'always + (lambda (x) + (if (or (eq? x 'always) (eq? x 'never) (and (list? x) (andmap symbol? x))) + x + (error 'empty-tag-shorthand "expected 'always, 'never, or a list of symbols: received ~a" x))))) (define html-empty-tags '(param meta link isindex input img hr frame col br basefont base area)) @@ -16,7 +21,8 @@ ;; gen-write/display-xml/content : (Nat Output-port -> Void) -> Content [Output-Port]-> Void (define (gen-write/display-xml/content dent) - (var-argify (lambda (c out) (write-xml-content c 0 dent out)))) + (var-argify (lambda (c out) + (write-xml-content c 0 dent out)))) ;; indent : Nat Output-port -> Void (define (indent n out) @@ -35,9 +41,28 @@ ;; gen-write/display-xml : (Content [Output-port] -> Void) -> Document [Output-port] -> Void (define (gen-write/display-xml output-content) (var-argify (lambda (doc out) - (display-outside-misc (prolog-misc (document-prolog doc)) out) - (output-content (document-element doc) out) - (display-outside-misc (document-misc doc) out)))) + (let ((prolog (document-prolog doc))) + (display-outside-misc (prolog-misc prolog) out) + (display-dtd (prolog-dtd prolog) out) + (display-outside-misc (prolog-misc2 prolog) out)) + (output-content (document-element doc) out) + (display-outside-misc (document-misc doc) out)))) + +; display-dtd : document-type oport -> void +(define (display-dtd dtd out) + (when dtd + (fprintf out "" out) + (newline out))) ;; write-xml : Document [Output-port] -> Void (define write-xml (gen-write/display-xml write-xml/content)) @@ -50,7 +75,8 @@ (for-each (lambda (x) ((cond ((comment? x) write-xml-comment) - ((pi? x) write-xml-pi)) x 0 void out) + ((pi? x) write-xml-pi) + (else (error "bottom " x))) x 0 void out) (newline out)) misc)) diff --git a/scheme/xml/xml-packages.scm b/scheme/xml/xml-packages.scm index 5a5c19f..4df4261 100644 --- a/scheme/xml/xml-packages.scm +++ b/scheme/xml/xml-packages.scm @@ -1,8 +1,13 @@ (define-interface xml-structures-interface (export source-start source-stop + make-location location? location-line location-char location-offset make-document document? document-prolog document-element document-misc - make-prolog prolog? prolog-misc prolog-dtd + make-prolog prolog? prolog-misc prolog-dtd prolog-misc2 + make-document-type document-type-name document-type-external + external-dtd-system external-dtd/system? + make-external-dtd/public external-dtd/public? external-dtd/public-public + make-external-dtd/system make-element element? element-name element-attributes element-content make-attribute attribute? attribute-name attribute-value make-pcdata pcdata? pcdata-string @@ -15,6 +20,7 @@ (define-structure xml-structures xml-structures-interface (open scheme + signals extended-ports define-record-types) (files structures))