change URL-Parser's interface: parser now preserves info whether

Request-URI's path ends with a slash.
(See http://httpd.apache.org/docs-2.0/misc/rewriteguide.html ->
"Trailing slash problem" for reasons).
This commit is contained in:
vibr 2005-04-13 10:32:29 +00:00
parent 9118345aaa
commit fe6b3fffac
2 changed files with 40 additions and 17 deletions

View File

@ -26,26 +26,35 @@ The \ex{url} module contains procedures to parse and unparse HTTP 1.1 Request-UR
The \var{port} slot is an integer or \sharpf.
The \var{path} slot is a list containing the Request-URI's path
split at slashes and \emph{unescaped}.
The \var{path} slot is a list of strings containing the
Request-URI's path split at slashes and \emph{unescaped}.If the
Request-URI's path ends with a slash, an empty string is inserted as
the last element of the list.
The \var{query} slot is an non-empty-string, still in its
\emph{escaped} representation, or \sharpf.
\end{desc}
%
Examples for Request-URI strings and the slots of the corresponding http-url record:
Examples for Request-URI strings and the slots of the corresponding
http-url record: \nopagebreak
\begin{alltt}
"http://foo.bar.org:7777///foo%20foo//bar.htm?bulb%20bulb"
\(\Rightarrow\) "foo.bar.org" 7777 '("foo foo" "bar.htm") "bulb%20bulb"
"http://foo.bar.org/"
"http://foo.bar.org"
\(\Rightarrow\) "foo.bar.org" #f '() #f
"http://foo.bar.org//"
\(\Rightarrow\) "foo.bar.org" #f '("") #f
"/foo%20foo//bar.htm?bulb%20bulb"
\(\Rightarrow\) #f #f '("foo foo" "bar.htm") "bulb%20bulb"
"/foo%20foo//?bulb%20bulb"
\(\Rightarrow\) #f #f '("foo foo" "") "bulb%20bulb"
"/"
\(\Rightarrow\) #f #f '() #f
\(\Rightarrow\) #f #f '("") #f
\end{alltt}

View File

@ -194,23 +194,34 @@
;;; SPLIT-PATH assumes abs-path if either #f or matches the RegExp abs_path,
;;; no checks are done.
;;;
;;; remark: abs_path allows for strings containing several consecutive slashes;
;;; Remark: abs_path allows for strings containing several consecutive slashes;
;;; SPLIT-ABS-PATH treats them as one slash.
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz"))
;;; (e.g., "/foo///bar//baz" => ("foo" "bar" "baz"))
;;;
;;; Note: we have to differentiate between paths with trailing
;;; slash(es) and paths without and hand that information over
;;; to the request handler. (See
;;; http://httpd.apache.org/docs-2.0/misc/rewriteguide.html ->
;;;"Trailing Slash problem" for the reasons.)
;;; If there is one or more trailing slash(es) the last element of the
;;; returned list will be an empty string.
;;; (e.g., "/foo///bar//baz//" => ("foo" "bar" "baz" ""))
(define (split-abs-path abs-path)
(if abs-path
(regexp-fold-right
(rx (+ (~ ("/"))))
(lambda (match i res)
(cons (match:substring match 0) res))
'()
abs-path)
(let* ((trailing-slash (char=? #\/ (string-ref abs-path (- (string-length abs-path) 1))))
(last-element (if trailing-slash '("") '())))
(regexp-fold-right
(rx (+ (~ ("/"))))
(lambda (match i res)
(cons (match:substring match 0) res))
last-element
abs-path))
'()))
'()))
;;; record type HTTP-URL for Request_URIs
@ -219,9 +230,12 @@
;;;
;;; The PORT slot is an integer or #f.
;;;
;;; The PATH slot is the Request_URI's path split at slashes
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz"))
;;; and unescaped.
;;; The PATH slot is a list of strings containing the Request_URI's
;;; path split at slashes and unescaped. If the Request_URI's path
;;; ends with a slash, an empty string is inserted as the last element
;;; of the list.
;;; (e.g., "/foo///bar//baz" => ("foo" "bar" "baz"))
;;; (e.g., "/foo///bar//baz//" => ("foo" "bar" "baz" ""))
;;;
;;; The QUERY slot is an non-empty-string, still in its escaped
;;; representation, or #f.