2002-06-08 11:07:01 -04:00
|
|
|
;;; URL parsing and unparsing -*- Scheme -*-
|
2002-08-27 05:03:22 -04:00
|
|
|
|
|
|
|
;;; This file is part of the Scheme Untergrund Networking package.
|
|
|
|
|
2002-06-08 11:07:01 -04:00
|
|
|
;;; Copyright (c) 1995 by Olin Shivers.
|
2002-08-27 05:03:22 -04:00
|
|
|
;;; For copyright information, see the file COPYING which comes with
|
|
|
|
;;; the distribution.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2003-01-15 05:32:35 -05:00
|
|
|
;;; I'm only implementing HTTP URL's right now.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
;;; References:
|
2002-08-26 07:18:44 -04:00
|
|
|
;;; - http://www.w3.org/Addressing/rfc1738.txt
|
2002-06-08 11:07:01 -04:00
|
|
|
;;; Original RFC
|
|
|
|
;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/Overview.html
|
|
|
|
;;; General Web page of URI pointers.
|
|
|
|
|
|
|
|
|
|
|
|
;;; Unresolved issues:
|
|
|
|
;;; - The userhost parser shouldn't substitute default values --
|
|
|
|
;;; that should happen in a separate step.
|
|
|
|
|
|
|
|
;;; The steps in hacking a URL are:
|
|
|
|
;;; - Take the UID, parse it, and resolve it with the context UID, if any.
|
|
|
|
;;; - Consult the UID's <scheme>. Pick the appropriate URL parser and parse.
|
|
|
|
|
|
|
|
|
|
|
|
;;; Userhost strings: //<user>:<password>@<host>:<port>/
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
;;; A USERHOST record describes path-prefixes of the form
|
|
|
|
;;; //<user>:<password>@<host>:<port>/
|
|
|
|
;;; These are frequently used as the initial prefix of URL's describing
|
|
|
|
;;; Internet resources.
|
|
|
|
|
2002-11-29 09:56:58 -05:00
|
|
|
(define-record-type userhost :userhost ; Each slot is a decoded string or #f.
|
|
|
|
(make-userhost user password host port)
|
|
|
|
userhost?
|
|
|
|
(user userhost-user)
|
|
|
|
(password userhost-password)
|
|
|
|
(host userhost-host)
|
|
|
|
(port userhost-port))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
;;; Parse a URI path (a list representing a path, not a string!) into
|
|
|
|
;;; a userhost record. Default values are taken from the userhost
|
|
|
|
;;; record DEFAULT except for the host. Returns a userhost record if
|
|
|
|
;;; it wins. CADDR drops the userhost portion of the path. In fact,
|
|
|
|
;;; fatal-syntax-error is called, if the path doesn't start with '//'.
|
|
|
|
|
2003-01-15 05:32:35 -05:00
|
|
|
;
|
2002-06-08 11:07:01 -04:00
|
|
|
(define (parse-userhost path default)
|
2003-01-15 05:32:35 -05:00
|
|
|
(if (and (pair? path) ; The thing better begin
|
|
|
|
(string=? (car path) "") ; with // (i.e., have two
|
|
|
|
(pair? (cdr path)) ; initial "" elements).
|
2002-06-08 11:07:01 -04:00
|
|
|
(string=? (cadr path) ""))
|
|
|
|
|
2003-01-15 05:32:35 -05:00
|
|
|
(let* ((uhs (caddr path)) ; Userhost string.
|
2002-06-08 11:07:01 -04:00
|
|
|
(uhs-len (string-length uhs))
|
2003-01-15 05:32:35 -05:00
|
|
|
(at (string-index uhs #\@)) ; Usr:passwd at-sign, if any.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2003-01-15 05:32:35 -05:00
|
|
|
(colon1 (and at (string-index uhs #\:))) ; Usr:passwd colon,
|
|
|
|
(colon1 (and colon1 (< colon1 at) colon1)) ; if any.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2003-01-15 05:32:35 -05:00
|
|
|
(colon2 (string-index uhs #\: (or at 0)))) ; Host:port colon, if any.
|
2002-06-08 11:07:01 -04:00
|
|
|
(make-userhost (if at
|
|
|
|
(unescape-uri uhs 0 (or colon1 at))
|
2002-11-29 09:56:58 -05:00
|
|
|
(userhost-user default))
|
2002-06-08 11:07:01 -04:00
|
|
|
(if colon1
|
|
|
|
(unescape-uri uhs (+ colon1 1) at)
|
2002-11-29 09:56:58 -05:00
|
|
|
(userhost-password default))
|
2002-06-08 11:07:01 -04:00
|
|
|
(unescape-uri uhs (if at (+ at 1) 0)
|
|
|
|
(or colon2 uhs-len))
|
|
|
|
(if colon2
|
|
|
|
(unescape-uri uhs (+ colon2 1) uhs-len)
|
2002-11-29 09:56:58 -05:00
|
|
|
(userhost-port default))))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
(fatal-syntax-error "URL must begin with //..." path)))
|
|
|
|
|
|
|
|
;;; Unparser
|
|
|
|
|
|
|
|
(define userhost-escaped-chars
|
2003-01-15 05:32:35 -05:00
|
|
|
(char-set-union uri-escaped-chars ; @ and : are also special
|
|
|
|
(string->char-set "@:"))) ; in UH strings.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
(define (userhost->string uh)
|
2002-11-29 09:56:58 -05:00
|
|
|
(let* ((us (userhost-user uh))
|
|
|
|
(pw (userhost-password uh))
|
|
|
|
(ho (userhost-host uh))
|
|
|
|
(po (userhost-port uh))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
;; Encode before assembly in case pieces contain colons or at-signs.
|
|
|
|
(e (lambda (s) (escape-uri s userhost-escaped-chars)))
|
|
|
|
|
2003-01-15 05:32:35 -05:00
|
|
|
(user/passwd (if us
|
|
|
|
`(,(e us) . ,(if pw `(":" ,(e pw) "@") '("@")))
|
2002-06-08 11:07:01 -04:00
|
|
|
'()))
|
2003-01-15 05:32:35 -05:00
|
|
|
(host/port (if ho
|
|
|
|
`(,(e ho) . ,(if po `(":" ,(e po)) '()))
|
2002-06-08 11:07:01 -04:00
|
|
|
'())))
|
|
|
|
|
|
|
|
(apply string-append (append user/passwd host/port))))
|
|
|
|
|
|
|
|
|
|
|
|
;;; HTTP URL parsing
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
;;; The PATH slot of this record is the URL's path split at slashes,
|
|
|
|
;;; e.g., "foo/bar//baz/" => ("foo" "bar" "" "baz" "")
|
|
|
|
;;; These elements are in raw, unescaped format. To convert back to
|
2003-01-14 10:01:21 -05:00
|
|
|
;;; a string, use (uri-path->uri (map escape-uri pathlist)).
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2002-11-29 09:56:58 -05:00
|
|
|
(define-record-type http-url :http-url
|
|
|
|
(make-http-url userhost path search frag-id)
|
|
|
|
http-url?
|
|
|
|
(userhost http-url-userhost) ; Initial //anonymous@clark.lcs.mit.edu:80/
|
|
|
|
(path http-url-path) ; Rest of path, split at slashes & decoded.
|
|
|
|
(search http-url-search)
|
|
|
|
(frag-id http-url-frag-id))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
|
|
|
|
;;; <scheme> : <path> ? <search> # <frag-id> <scheme>, <search>, and
|
|
|
|
;;; <frag-id> are strings; <path> is a non-empty string list -- the
|
|
|
|
;;; URI's path split at slashes. Optional parts of the URI, when
|
|
|
|
;;; missing, are specified as #f. If <scheme> is "http", then the
|
|
|
|
;;; other three parts can be passed to PARSE-HTTP-URL, which parses
|
|
|
|
;;; them into a HTTP-URL record. All strings come back from the URI
|
|
|
|
;;; parser encoded. SEARCH and FRAG-ID are left that way; this parser
|
|
|
|
;;; decodes the path elements.
|
|
|
|
;;;
|
|
|
|
;;; Returns a HTTP-URL record, if possible. Otherwise
|
|
|
|
;;; FATAL-SYNTAX-ERROR is called.
|
|
|
|
|
|
|
|
(define (parse-http-url path search frag-id)
|
|
|
|
(let ((uh (parse-userhost path default-http-userhost)))
|
2002-11-29 09:56:58 -05:00
|
|
|
(if (or (userhost-user uh) (userhost-password uh))
|
2002-06-08 11:07:01 -04:00
|
|
|
(fatal-syntax-error
|
2003-01-15 05:32:35 -05:00
|
|
|
"HTTP URL's may not specify a user or password field" path))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2003-01-15 05:32:35 -05:00
|
|
|
(make-http-url uh (map unescape-uri (cdddr path)) search frag-id)))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
|
|
|
|
;;; Default http port is 80.
|
|
|
|
(define default-http-userhost (make-userhost #f #f #f "80"))
|
|
|
|
|
|
|
|
|
|
|
|
;;; Unparse.
|
|
|
|
|
|
|
|
(define (http-url->string url)
|
|
|
|
(string-append "http://"
|
2002-11-29 09:56:58 -05:00
|
|
|
(userhost->string (http-url-userhost url))
|
2002-06-08 11:07:01 -04:00
|
|
|
"/"
|
2003-01-14 10:01:21 -05:00
|
|
|
(uri-path->uri (map escape-uri (http-url-path url)))
|
2002-11-29 09:56:58 -05:00
|
|
|
(cond ((http-url-search url) =>
|
2002-06-08 11:07:01 -04:00
|
|
|
(lambda (s) (string-append "?" s)))
|
|
|
|
(else ""))
|
2002-11-29 09:56:58 -05:00
|
|
|
(cond ((http-url-frag-id url) =>
|
2002-06-08 11:07:01 -04:00
|
|
|
(lambda (fi) (string-append "#" fi)))
|
|
|
|
(else ""))))
|