change URL-Parser's interface: parser now preserves info whether

Request-URI's path ends with a slash.
(See http://httpd.apache.org/docs-2.0/misc/rewriteguide.html ->
"Trailing slash problem" for reasons).
This commit is contained in:
vibr 2005-04-13 10:32:29 +00:00
parent 9118345aaa
commit fe6b3fffac
2 changed files with 40 additions and 17 deletions

View File

@ -26,26 +26,35 @@ The \ex{url} module contains procedures to parse and unparse HTTP 1.1 Request-UR
The \var{port} slot is an integer or \sharpf. The \var{port} slot is an integer or \sharpf.
The \var{path} slot is a list containing the Request-URI's path The \var{path} slot is a list of strings containing the
split at slashes and \emph{unescaped}. Request-URI's path split at slashes and \emph{unescaped}.If the
Request-URI's path ends with a slash, an empty string is inserted as
the last element of the list.
The \var{query} slot is an non-empty-string, still in its The \var{query} slot is an non-empty-string, still in its
\emph{escaped} representation, or \sharpf. \emph{escaped} representation, or \sharpf.
\end{desc} \end{desc}
% %
Examples for Request-URI strings and the slots of the corresponding http-url record: Examples for Request-URI strings and the slots of the corresponding
http-url record: \nopagebreak
\begin{alltt} \begin{alltt}
"http://foo.bar.org:7777///foo%20foo//bar.htm?bulb%20bulb" "http://foo.bar.org:7777///foo%20foo//bar.htm?bulb%20bulb"
\(\Rightarrow\) "foo.bar.org" 7777 '("foo foo" "bar.htm") "bulb%20bulb" \(\Rightarrow\) "foo.bar.org" 7777 '("foo foo" "bar.htm") "bulb%20bulb"
"http://foo.bar.org/" "http://foo.bar.org"
\(\Rightarrow\) "foo.bar.org" #f '() #f \(\Rightarrow\) "foo.bar.org" #f '() #f
"http://foo.bar.org//"
\(\Rightarrow\) "foo.bar.org" #f '("") #f
"/foo%20foo//bar.htm?bulb%20bulb" "/foo%20foo//bar.htm?bulb%20bulb"
\(\Rightarrow\) #f #f '("foo foo" "bar.htm") "bulb%20bulb" \(\Rightarrow\) #f #f '("foo foo" "bar.htm") "bulb%20bulb"
"/foo%20foo//?bulb%20bulb"
\(\Rightarrow\) #f #f '("foo foo" "") "bulb%20bulb"
"/" "/"
\(\Rightarrow\) #f #f '() #f \(\Rightarrow\) #f #f '("") #f
\end{alltt} \end{alltt}

View File

@ -194,23 +194,34 @@
;;; SPLIT-PATH assumes abs-path if either #f or matches the RegExp abs_path, ;;; SPLIT-PATH assumes abs-path if either #f or matches the RegExp abs_path,
;;; no checks are done. ;;; no checks are done.
;;; ;;;
;;; remark: abs_path allows for strings containing several consecutive slashes; ;;; Remark: abs_path allows for strings containing several consecutive slashes;
;;; SPLIT-ABS-PATH treats them as one slash. ;;; SPLIT-ABS-PATH treats them as one slash.
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz")) ;;; (e.g., "/foo///bar//baz" => ("foo" "bar" "baz"))
;;;
;;; Note: we have to differentiate between paths with trailing
;;; slash(es) and paths without and hand that information over
;;; to the request handler. (See
;;; http://httpd.apache.org/docs-2.0/misc/rewriteguide.html ->
;;;"Trailing Slash problem" for the reasons.)
;;; If there is one or more trailing slash(es) the last element of the
;;; returned list will be an empty string.
;;; (e.g., "/foo///bar//baz//" => ("foo" "bar" "baz" ""))
(define (split-abs-path abs-path) (define (split-abs-path abs-path)
(if abs-path (if abs-path
(regexp-fold-right (let* ((trailing-slash (char=? #\/ (string-ref abs-path (- (string-length abs-path) 1))))
(rx (+ (~ ("/")))) (last-element (if trailing-slash '("") '())))
(lambda (match i res) (regexp-fold-right
(cons (match:substring match 0) res)) (rx (+ (~ ("/"))))
'() (lambda (match i res)
abs-path) (cons (match:substring match 0) res))
last-element
abs-path))
'())) '()))
;;; record type HTTP-URL for Request_URIs ;;; record type HTTP-URL for Request_URIs
@ -219,9 +230,12 @@
;;; ;;;
;;; The PORT slot is an integer or #f. ;;; The PORT slot is an integer or #f.
;;; ;;;
;;; The PATH slot is the Request_URI's path split at slashes ;;; The PATH slot is a list of strings containing the Request_URI's
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz")) ;;; path split at slashes and unescaped. If the Request_URI's path
;;; and unescaped. ;;; ends with a slash, an empty string is inserted as the last element
;;; of the list.
;;; (e.g., "/foo///bar//baz" => ("foo" "bar" "baz"))
;;; (e.g., "/foo///bar//baz//" => ("foo" "bar" "baz" ""))
;;; ;;;
;;; The QUERY slot is an non-empty-string, still in its escaped ;;; The QUERY slot is an non-empty-string, still in its escaped
;;; representation, or #f. ;;; representation, or #f.