implementing chunked transfer coding:

*new proc  READ-MESSAGE-BODY: a high-level interface for reading in
message bodies (should be used by all handlers)
*new proc READ-ORDINARY-BODY: reads in message bodies with no transfer
coding applied
*new proc READ-CHUNKED-BODY: reads in message bodies in chunked
transfer coding
*new proc GET-CHUNK-SIZE: reads in and parses the size of the next
chunk in a chunked message body
*helper procs READ-AND-DISCARD-TRAILER and DISCARD-LINE-TERMINATOR
*minor changes to GET-NUMERIC-FIELD-VALUE (cosmetic)
This commit is contained in:
vibr 2005-04-16 20:45:33 +00:00
parent 555d52806d
commit 15049e1c58
1 changed files with 131 additions and 27 deletions

View File

@ -1,4 +1,4 @@
;;; A library of procs for handlers. ;;; A library of procs for request handlers.
;;; This file is part of the Scheme Untergrund Networking package. ;;; This file is part of the Scheme Untergrund Networking package.
@ -6,14 +6,52 @@
;;; the distribution. ;;; the distribution.
;; interpolate host info from our request's net connection. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; return string. Example: "134.2.12.72:7777" ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (get-socket-host-string req) ;;; Reading in the message body of a request.
(let ((addr (socket-local-address (request-socket req)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(call-with-values ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(lambda ()(socket-address->internet-address addr))
(lambda (ipaddr portnum) ;;; Read in the message body of an HTTP message and return it as a string.
(string-append (format-internet-host-address ipaddr) ":" (number->string portnum)))))) ;;;
;;; READ-MESSAGE-BODY handles ordinary message bodies as well as
;;; message bodies to which the transfer coding "chunked" has been
;;; applied.
;;;
;;; Note: all request handlers should use READ-MESSAGE-BODY, and should not
;;; read in message bodies by themselves: READ-MESSAGE-BODY implements
;;; the correct order in which a message body's length is determined.
;;; (See RFC 2616, 4.4 for precedence of Transfer-encoding header over Content-length header.)
(define (read-message-body req)
(let ((inport (socket:inport (request-socket req))))
(if (chunked-transfer-coding? req)
(read-chunked-body inport)
(read-ordinary-body inport req))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Reading in ordinary bodies (no transfer coding applied)
;;; Read in the message body, return it as a string.
(define (read-ordinary-body inport req)
(let* ((body-length (get-body-length-from-content-length req)) ;make sure we have a valid Content-length header in request
(maybe-body (read-string body-length inport)))
(or maybe-body
(fatal-syntax-error "EOF while reading in message body"))))
;;Get length of the request's message body from Content-length header or
;;throw fatal-syntax-error if no such header
(define (get-body-length-from-content-length req)
(let
;;try to get field value of first Content-length header (RFC 2616 allows only one Content-length: header)
((maybe-length (get-numeric-field-value req 'content-length)))
(or maybe-length
(fatal-syntax-error "No Content-Length header in request"))))
;; GET-NUMERIC-FIELD-VALUE ;; GET-NUMERIC-FIELD-VALUE
@ -33,16 +71,13 @@
(if field-content ;; request contained "field-name" header (if field-content ;; request contained "field-name" header
(let ;;see * below (let ;;see * below
((field-value (string->number (string-trim-both field-content char-set:blank)))) ;;char-set:blank = Space + Tab = LWS from RFC2616 after folding ((field-value (string->number (string-trim-both field-content char-set:blank)))) ;;char-set:blank = Space + Tab = LWS from RFC2616 after folding
(if (and (integer? field-value) (>= field-value 0)) ;;yes, field value contained only digits, and at least one digit. (if (and field-value (>= field-value 0)) ;;yes, field value contained only digits.
field-value field-value
(http-error (fatal-syntax-error
(status-code bad-request) req
(format #f (format #f
"~A header contained only whitespace, or characters other than digits, or whitespace between digits" "~A header contained only whitespace, or characters other than digits, or whitespace between digits"
field-name)))) field-name))))
#f))) #f)))
;;* RFC 2616, 4.2: The field-content does not include any leading or ;;* RFC 2616, 4.2: The field-content does not include any leading or
;;trailing LWS: linear white space occurring before the first ;;trailing LWS: linear white space occurring before the first
;;non-whitespace character of the field-value or after the last ;;non-whitespace character of the field-value or after the last
@ -51,22 +86,11 @@
;;field value. ;;field value.
;;(probably read-rfc822-headers in rfc822.scm should do the job of skipping leading and trailing whitespace?) ;;(probably read-rfc822-headers in rfc822.scm should do the job of skipping leading and trailing whitespace?)
;;get request's message-body length from Content-length: header or
;;throw http-error if no such header
(define (get-body-length-from-content-length req)
(let
;;try to get field value of first Content-length header (RFC 2616 allows only one Content-length: header)
((maybe-length (get-numeric-field-value req 'content-length)))
(or maybe-length
(http-error (status-code bad-request) req "No Content-Length header in request"))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Decode chunked entity bodies: "chunked transfer-coding" ;;; Decoding chunked entity bodies: "chunked transfer-coding"
;;; See RFC 2616, 3.6.1 and 19.4.6 for the composition of chunked entity bodies.
;;; See RFC 2616, 4.4 for precedence of Transfer-encoding header over Content-length header.
;;; Is the request's entity body sent in chunked transfer-encoding? ;;; Is the request's entity body sent in chunked transfer-encoding?
@ -89,3 +113,83 @@
; the first token of the (extracted last) transfer-coding must be "chunked" to indicate chunked transfer coding ; the first token of the (extracted last) transfer-coding must be "chunked" to indicate chunked transfer coding
(string-prefix? "chunked" last-transfer-coding))))) (string-prefix? "chunked" last-transfer-coding)))))
;;; Read in the chunked entity body, return it as a string.
;;; (See RFC 2616, 3.6.1 and 19.4.6 for the composition of chunked entity bodies.)
(define (read-chunked-body inport)
(let read-chunks ((chunk-size (get-chunk-size inport))
(res ""))
(if (= 0 chunk-size) ;last-chunk
(begin (read-and-discard-trailer inport); see comment *1
res)
(let ((maybe-chunk-data (read-string chunk-size inport)))
(if maybe-chunk-data
(begin (discard-line-terminator inport)
(read-chunks (get-chunk-size inport) (string-append res maybe-chunk-data)))
(fatal-syntax-error "EOF while reading in chunk-data in chunked entity body"))))))
;comment *1:
;
;This is were we don't achieve conditional compliance: we ought to read
;in the entity-headers in the trailer and incorporate them into the
;request record. Within our current scheme (where reading in the
;entity-body is the request-handlers job - while the request-handler
;is only called _after_ the request record has been built) this is not
;possible.
;
;Note that in their current state (04/2005) the handlers actually
;disregard most request headers anyway (even the request headers
;parsed into the request record).
;
;Alternatively we could check the request for a Trailer header and
;respond with 500 if we find one. Problem here: the clients "SHOULD
;include a Trailer header field in a message using chunked
;transfer-coding with a non-empty trailer" (14.40) - they are not
;obliged to! So even if we check for a Trailer header we may still
;silently disregard a trailer.
;;;Read in a chunk-size line within a chunked entity body; return the chunk-size as an integer.
;;; (See RFC 2616, 3.6.1 and 19.4.6 for the composition of chunked entity bodies.)
(define (get-chunk-size inport)
(let ((chunk-size-line (read-crlf-line inport)))
(if (eof-object? chunk-size-line)
(fatal-syntax-error "EOF while reading in chunk-size in chunked entity body")
(let* ((chunk-extensions-index (string-contains chunk-size-line "; "))
(hex-string (if chunk-extensions-index
(string-take chunk-size-line chunk-extensions-index)
chunk-size-line))
(chunk-size-int (string->number (string-trim-both hex-string char-set:blank) 16)))
(if (and chunk-size-int (>= chunk-size-int 0)) ; yes, chunk-size contained only hex chars
chunk-size-int
(fatal-syntax-error "Chunk-size within chunked entity body is incorrect or syntactically faulty"))))))
(define (read-and-discard-trailer inport)
(with-fatal-error-handler
(lambda (c decline)
(fatal-syntax-error "Illegal RFC 822 field syntax within trailer of entity message body"))
(read-rfc822-headers inport)))
(define (discard-line-terminator inport)
(read-char inport) (read-char inport)) ;;assuming the line terminator is CRLF as required by RFC 2616
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Misc
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; interpolate host info from our request's net connection.
;; return string. Example: "134.2.12.72:7777"
(define (get-socket-host-string req)
(let ((addr (socket-local-address (request-socket req))))
(call-with-values
(lambda ()(socket-address->internet-address addr))
(lambda (ipaddr portnum)
(string-append (format-internet-host-address ipaddr) ":" (number->string portnum))))))