*add comment on another mistake in RFC 2616 (query part of
Request-URIs only allowed for absoluteURIs) *add copy of Appendix A of RFC 2396 for convenience
This commit is contained in:
parent
69948e9561
commit
ba78eba433
|
@ -1,6 +1,6 @@
|
||||||
;;; HTTP 1.1 Request-URI parsing and unparsing -*- Scheme -*-
|
;;; HTTP 1.1 Request-URI parsing and unparsing -*- Scheme -*-
|
||||||
|
|
||||||
;;; Copyright (c) 1995 by Olin Shivers.
|
;;; Copyright (c) 2005 by Viola Brunner.
|
||||||
;;; For copyright information, see the file COPYING which comes with
|
;;; For copyright information, see the file COPYING which comes with
|
||||||
;;; the distribution.
|
;;; the distribution.
|
||||||
|
|
||||||
|
@ -8,9 +8,13 @@
|
||||||
;;; RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1
|
;;; RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1
|
||||||
;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
|
;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
|
||||||
;;;
|
;;;
|
||||||
;;; RFC 2616 adopts definitions of regexps from RFC 2396.
|
;;; RFC 2616 adopts definitions of regexps from RFC 2396
|
||||||
|
;;; (see copy of Appendix A of RFC 2396 below)
|
||||||
|
|
||||||
|
|
||||||
|
;;; Note: there are 2 Problems in RFC 2616 concerning URIS:
|
||||||
|
|
||||||
|
;;; Problem 1:
|
||||||
;;; RFC 2616 is ambiguous in defining Request_URIS:
|
;;; RFC 2616 is ambiguous in defining Request_URIS:
|
||||||
;;;
|
;;;
|
||||||
;;; section 5.1.2 states:
|
;;; section 5.1.2 states:
|
||||||
|
@ -19,24 +23,39 @@
|
||||||
;;;
|
;;;
|
||||||
;;; whilst section 3.2.2 defines the 'http_URL'
|
;;; whilst section 3.2.2 defines the 'http_URL'
|
||||||
;;; http_URL = "http://" host [ ":" port ] [ abs_path [ "?" query ]]
|
;;; http_URL = "http://" host [ ":" port ] [ abs_path [ "?" query ]]
|
||||||
|
|
||||||
|
|
||||||
;;; Since allowing for general absoluteURIs doesn't make too much sense
|
|
||||||
;;; we implement only Request_URIs as follows:
|
|
||||||
;;; Request-URI = ( http_URL | abs_path) ["#" fragment]
|
|
||||||
;;;
|
;;;
|
||||||
;;; where http_URL is a subset of absoluteURI
|
;;; Solution to Problem 1:
|
||||||
|
;;; Since allowing for general absoluteURIs doesn't make too much sense
|
||||||
|
;;; we implement Request_URIs of the form
|
||||||
|
;;; Request-URI = ( http_URL | abs_path) ["#" fragment]
|
||||||
|
;;; where http_URL is a only a subset of absoluteURI
|
||||||
|
|
||||||
;;; [ "#" fragment ] is allowed even though
|
|
||||||
;;; RFC 2616 disallowes the #fragment part
|
|
||||||
;;; (while RFC 1945 for HTTP/1.0 allowed it).
|
|
||||||
;;; (This is for compatibility with buggy clients).
|
|
||||||
|
|
||||||
|
;;; Problem 2:
|
||||||
|
;;; according to RFC 2616, section 5.1.2, the Request-URI may only
|
||||||
|
;;; have a [? query] part if it's an absoluteURI; on the other hand
|
||||||
|
;;; only requests being made to proxies are supposed to use
|
||||||
|
;;; absoluteURIs; abs_path is the normal case. So this must be a mistake.
|
||||||
|
;;; See also http://skrb.org/ietf/http_errata.html#uriquery
|
||||||
|
;;;
|
||||||
|
;;; Solution to Problem 2:
|
||||||
|
;;, we implement Request_URIs of the form
|
||||||
|
;;; Request-URI = ( http_URL | abs_path ["?" query] ) ["#" fragment]
|
||||||
|
|
||||||
|
|
||||||
|
;;; Here we depart from the RFCs:
|
||||||
|
;;; RFC 2616 and 1945 disallow a #fragment-suffix of the Request-URI.
|
||||||
|
;;; For compatibility with buggy clients we _do_ allow for it.
|
||||||
|
;;; (Apache does so, too).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;;; RexExps for Request-URIs as scsh SREs
|
;;; RexExps for Request-URIs as scsh SREs
|
||||||
;;; stick to RFC terminology throughout
|
;;; stick to RFC terminology throughout
|
||||||
|
;;; (see copy of Appendix A of RFC 2396 below)
|
||||||
|
;;;
|
||||||
|
;;; we implement Request_URIs of the form
|
||||||
|
;;; Request-URI = ( http_URL | abs_path) ["#" fragment]
|
||||||
|
|
||||||
(define digit (rx numeric))
|
(define digit (rx numeric))
|
||||||
|
|
||||||
|
@ -276,3 +295,60 @@
|
||||||
(define (escape-query query)
|
(define (escape-query query)
|
||||||
(escape query query-reserved-and-excluded))
|
(escape query query-reserved-and-excluded))
|
||||||
|
|
||||||
|
;; Appendix A of RFC 2396
|
||||||
|
;;
|
||||||
|
;A. Collected BNF for URI
|
||||||
|
|
||||||
|
; URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
|
||||||
|
; absoluteURI = scheme ":" ( hier_part | opaque_part )
|
||||||
|
; relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
|
||||||
|
; hier_part = ( net_path | abs_path ) [ "?" query ]
|
||||||
|
; opaque_part = uric_no_slash *uric
|
||||||
|
; uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
|
||||||
|
; "&" | "=" | "+" | "$" | ","
|
||||||
|
; net_path = "//" authority [ abs_path ]
|
||||||
|
; abs_path = "/" path_segments
|
||||||
|
; rel_path = rel_segment [ abs_path ]
|
||||||
|
; rel_segment = 1*( unreserved | escaped |
|
||||||
|
; ";" | "@" | "&" | "=" | "+" | "$" | "," )
|
||||||
|
; scheme = alpha *( alpha | digit | "+" | "-" | "." )
|
||||||
|
; authority = server | reg_name
|
||||||
|
; reg_name = 1*( unreserved | escaped | "$" | "," |
|
||||||
|
; ";" | ":" | "@" | "&" | "=" | "+" )
|
||||||
|
; server = [ [ userinfo "@" ] hostport ]
|
||||||
|
; userinfo = *( unreserved | escaped |
|
||||||
|
; ";" | ":" | "&" | "=" | "+" | "$" | "," )
|
||||||
|
; hostport = host [ ":" port ]
|
||||||
|
; host = hostname | IPv4address
|
||||||
|
; hostname = *( domainlabel "." ) toplabel [ "." ]
|
||||||
|
; domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
|
||||||
|
; toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
||||||
|
; IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
|
||||||
|
; port = *digit
|
||||||
|
; path = [ abs_path | opaque_part ]
|
||||||
|
; path_segments = segment *( "/" segment )
|
||||||
|
; segment = *pchar *( ";" param )
|
||||||
|
; param = *pchar
|
||||||
|
; pchar = unreserved | escaped |
|
||||||
|
; ":" | "@" | "&" | "=" | "+" | "$" | ","
|
||||||
|
; query = *uric
|
||||||
|
; fragment = *uric
|
||||||
|
; uric = reserved | unreserved | escaped
|
||||||
|
; reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
|
||||||
|
; "$" | ","
|
||||||
|
; unreserved = alphanum | mark
|
||||||
|
; mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
|
||||||
|
; "(" | ")"
|
||||||
|
; escaped = "%" hex hex
|
||||||
|
; hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
|
||||||
|
; "a" | "b" | "c" | "d" | "e" | "f"
|
||||||
|
; alphanum = alpha | digit
|
||||||
|
; alpha = lowalpha | upalpha
|
||||||
|
; lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
|
||||||
|
; "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
|
||||||
|
; "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
|
||||||
|
; upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
|
||||||
|
; "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
|
||||||
|
; "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
|
||||||
|
; digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
|
||||||
|
; "8" | "9"
|
||||||
|
|
Loading…
Reference in New Issue