From fe6b3fffac9677d9c414fca64b15111ae0fd3c4d Mon Sep 17 00:00:00 2001 From: vibr Date: Wed, 13 Apr 2005 10:32:29 +0000 Subject: [PATCH] change URL-Parser's interface: parser now preserves info whether Request-URI's path ends with a slash. (See http://httpd.apache.org/docs-2.0/misc/rewriteguide.html -> "Trailing slash problem" for reasons). --- doc/latex/url.tex | 19 ++++++++++++++----- scheme/lib/url.scm | 38 ++++++++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/doc/latex/url.tex b/doc/latex/url.tex index dce4654..9b3af67 100644 --- a/doc/latex/url.tex +++ b/doc/latex/url.tex @@ -26,26 +26,35 @@ The \ex{url} module contains procedures to parse and unparse HTTP 1.1 Request-UR The \var{port} slot is an integer or \sharpf. - The \var{path} slot is a list containing the Request-URI's path - split at slashes and \emph{unescaped}. + The \var{path} slot is a list of strings containing the + Request-URI's path split at slashes and \emph{unescaped}.If the + Request-URI's path ends with a slash, an empty string is inserted as + the last element of the list. The \var{query} slot is an non-empty-string, still in its \emph{escaped} representation, or \sharpf. \end{desc} % -Examples for Request-URI strings and the slots of the corresponding http-url record: +Examples for Request-URI strings and the slots of the corresponding +http-url record: \nopagebreak \begin{alltt} "http://foo.bar.org:7777///foo%20foo//bar.htm?bulb%20bulb" \(\Rightarrow\) "foo.bar.org" 7777 '("foo foo" "bar.htm") "bulb%20bulb" -"http://foo.bar.org/" +"http://foo.bar.org" \(\Rightarrow\) "foo.bar.org" #f '() #f +"http://foo.bar.org//" +\(\Rightarrow\) "foo.bar.org" #f '("") #f + "/foo%20foo//bar.htm?bulb%20bulb" \(\Rightarrow\) #f #f '("foo foo" "bar.htm") "bulb%20bulb" +"/foo%20foo//?bulb%20bulb" +\(\Rightarrow\) #f #f '("foo foo" "") "bulb%20bulb" + "/" -\(\Rightarrow\) #f #f '() #f +\(\Rightarrow\) #f #f '("") #f \end{alltt} diff --git a/scheme/lib/url.scm b/scheme/lib/url.scm index 1ad4a7e..0bd870a 100644 --- a/scheme/lib/url.scm +++ b/scheme/lib/url.scm @@ -194,23 +194,34 @@ ;;; SPLIT-PATH assumes abs-path if either #f or matches the RegExp abs_path, ;;; no checks are done. ;;; -;;; remark: abs_path allows for strings containing several consecutive slashes; +;;; Remark: abs_path allows for strings containing several consecutive slashes; ;;; SPLIT-ABS-PATH treats them as one slash. -;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz")) +;;; (e.g., "/foo///bar//baz" => ("foo" "bar" "baz")) +;;; +;;; Note: we have to differentiate between paths with trailing +;;; slash(es) and paths without and hand that information over +;;; to the request handler. (See +;;; http://httpd.apache.org/docs-2.0/misc/rewriteguide.html -> +;;;"Trailing Slash problem" for the reasons.) +;;; If there is one or more trailing slash(es) the last element of the +;;; returned list will be an empty string. +;;; (e.g., "/foo///bar//baz//" => ("foo" "bar" "baz" "")) (define (split-abs-path abs-path) (if abs-path - (regexp-fold-right - (rx (+ (~ ("/")))) - (lambda (match i res) - (cons (match:substring match 0) res)) - '() - abs-path) + (let* ((trailing-slash (char=? #\/ (string-ref abs-path (- (string-length abs-path) 1)))) + (last-element (if trailing-slash '("") '()))) + (regexp-fold-right + (rx (+ (~ ("/")))) + (lambda (match i res) + (cons (match:substring match 0) res)) + last-element + abs-path)) - '())) + '())) ;;; record type HTTP-URL for Request_URIs @@ -219,9 +230,12 @@ ;;; ;;; The PORT slot is an integer or #f. ;;; -;;; The PATH slot is the Request_URI's path split at slashes -;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz")) -;;; and unescaped. +;;; The PATH slot is a list of strings containing the Request_URI's +;;; path split at slashes and unescaped. If the Request_URI's path +;;; ends with a slash, an empty string is inserted as the last element +;;; of the list. +;;; (e.g., "/foo///bar//baz" => ("foo" "bar" "baz")) +;;; (e.g., "/foo///bar//baz//" => ("foo" "bar" "baz" "")) ;;; ;;; The QUERY slot is an non-empty-string, still in its escaped ;;; representation, or #f.