diff --git a/doc/latex/url.tex b/doc/latex/url.tex index bf5a289..1e876a3 100644 --- a/doc/latex/url.tex +++ b/doc/latex/url.tex @@ -1,64 +1,12 @@ \chapter{Parsing and Processing URLs}\label{cha:url} % -This modules contains procedures to parse and unparse URLs. Until -now, only the parsing of HTTP URLs is implemented. - -\section{Server Records} - -A \textit{server} value describes path prefixes of the form -\var{user}:\var{password}@\var{host}:\var{port}. These are -frequently used as the initial prefix of URLs describing Internet -resources. - -\defun{make-server}{user password host port}{server} -\defunx{server?}{thing}{boolean} -\defunx{server-user}{server}{string-or-\sharpf} -\defunx{server-password}{server}{string-or-\sharpf} -\defunx{server-host}{server}{string-or-\sharpf} -\defunx{server-port}{server}{string-or-\sharpf} -\begin{desc} - \ex{Make-server} creates a new server record. Each slot is a - decoded string or \sharpf. (\var{Port} is also a string.) - - \ex{server?} is the corresponding predicate, \ex{server-user}, - \ex{server-password}, \ex{server-host} and \ex{server-port} - are the correspondig selectors. -\end{desc} - -\defun{parse-server}{path default}{server} -\defunx{server->string}{server}{string} -\begin{desc} - \ex{Parse-server} parses a URI path \var{path} (a list representing - a path, not a string) into a server value. Default values are taken - from the server \var{default} except for the host. The values - are unescaped and stored into a server record that is returned. - \ex{Fatal-syntax-error} is called, if the specified path has no - initial to slashes (i.e., it starts with `//\ldots'). - - \ex{server->string} just does the inverse job: it unparses - \var{server} into a string. The elements of the record - are escaped before they are put together. - - Example: -\begin{alltt} -> (define default (make-server "andreas" "se ret" "www.sf.net" "80")) -> (server->string default) -"andreas:se\%20ret@www.sf.net:80" -> (parse-server '("" "" "foo\%20bar@www.scsh.net" "docu" "index.html") - default) -'#{server} -> (server->string ##) -"foo\%20bar:se\%20ret@www.scsh.net:80" -\end{alltt} -% -For details about escaping and unescaping see Chapter~\ref{cha:uri}. -\end{desc} +This modules contains procedures to parse and unparse HTTP 1.1 Request-URIs. \section{HTTP URLs} \defun{parse-uri} {uri-string } {host port path query} \label{proc:parse-uri} \begin{desc} - Parses an HTTP 1.1 \var{uri\=string} into its four fields. + Parses an HTTP 1.1 Request-URI \var{uri\=string} into its four fields. The fields returned are \emph{not} decoded. If \var{uri\=string} is not an http URL but an abs\_path the \var{host}, \var{port} @@ -69,7 +17,7 @@ For details about escaping and unescaping see Chapter~\ref{cha:uri}. \end{desc} This parser does not absolutely conform to RFC 2616 in allowing a fragment-suffix. Furthermore only http URLs, not absolute URIs in general are -recognized. +recognized (see source for further explanation). \defun{make-http-url}{server path search frag-id}{http-url} \defunx{http-url?}{thing}{boolean} diff --git a/scheme/lib/url.scm b/scheme/lib/url.scm index e341272..8a53576 100644 --- a/scheme/lib/url.scm +++ b/scheme/lib/url.scm @@ -160,96 +160,6 @@ abs-path)) -;;; Unresolved issues: -;;; - The server parser shouldn't substitute default values -- -;;; that should happen in a separate step. - -;;; The steps in hacking a URL are: -;;; - Take the UID, parse it, and resolve it with the context UID, if any. -;;; - Consult the UID's . Pick the appropriate URL parser and parse. - - -;;; Server strings: //:@:/ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; A SERVER record describes path-prefixes of the form -;;; //:@:/ -;;; These are frequently used as the initial prefix of URL's describing -;;; Internet resources. - -;;Note: the server record-type and its associated procedures are -;;a relict of the parsing of general URIs. -;;Neither HTTP 1.0 nor HTTP 1.1 allow for the 'host'-part (see regexp above) -;;of the Request_URI to contain a :@ section -;;(as long as the ambiguity of the definition of Request_URIs is 'solved' as explained above). - -(define-record-type server :server ; Each slot is a decoded string or #f. - (make-server user password host port) - server? - (user server-user) - (password server-password) - (host server-host) - (port server-port)) - -;;; Parse a URI path (a list representing a path, not a string!) into -;;; a server record. Default values are taken from the server -;;; record DEFAULT except for the host. Returns a server record if -;;; it wins. CADDR drops the server portion of the path. In fact, -;;; fatal-syntax-error is called, if the path doesn't start with '//'. - - ; -(define (parse-server path default) - (if (and (pair? path) ; The thing better begin - (string=? (car path) "") ; with // (i.e., have two - (pair? (cdr path)) ; initial "" elements). - (string=? (cadr path) "")) - - (let* ((uhs (caddr path)) ; Server string. - (uhs-len (string-length uhs)) - (at (string-index uhs #\@)) ; Usr:passwd at-sign, if any. - - (colon1 (and at (string-index uhs #\:))) ; Usr:passwd colon, - (colon1 (and colon1 (< colon1 at) colon1)) ; if any. - - (colon2 (string-index uhs #\: (or at 0)))) ; Host:port colon, if any. - (make-server (if at - (unescape-uri uhs 0 (or colon1 at)) - (server-user default)) - (if colon1 - (unescape-uri uhs (+ colon1 1) at) - (server-password default)) - (unescape-uri uhs (if at (+ at 1) 0) - (or colon2 uhs-len)) - (if colon2 - (unescape-uri uhs (+ colon2 1) uhs-len) - (server-port default)))) - - (fatal-syntax-error "URL must begin with //..." path))) - -;;; Unparser - -(define server-escaped-chars - (char-set-union uri-escaped-chars ; @ and : are also special - (string->char-set "@:"))) ; in UH strings. - -(define (server->string uh) - (let* ((us (server-user uh)) - (pw (server-password uh)) - (ho (server-host uh)) - (po (server-port uh)) - - ;; Encode before assembly in case pieces contain colons or at-signs. - (e (lambda (s) (escape-uri s server-escaped-chars))) - - (user/passwd (if us - `(,(e us) . ,(if pw `(":" ,(e pw) "@") '("@"))) - '())) - (host/port (if ho - `(,(e ho) . ,(if po `(":" ,(e po)) '())) - '()))) - - (apply string-append (append user/passwd host/port)))) - - ;;; HTTP URL parsing ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;