2004-10-06 15:10:49 -04:00
|
|
|
;;; HTTP 1.1 Request-URI parsing and unparsing -*- Scheme -*-
|
2002-08-27 05:03:22 -04:00
|
|
|
|
2002-06-08 11:07:01 -04:00
|
|
|
;;; Copyright (c) 1995 by Olin Shivers.
|
2002-08-27 05:03:22 -04:00
|
|
|
;;; For copyright information, see the file COPYING which comes with
|
|
|
|
;;; the distribution.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
;;; References:
|
2004-10-06 15:10:49 -04:00
|
|
|
;;; RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1
|
|
|
|
;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
|
2004-10-11 04:54:41 -04:00
|
|
|
;;;
|
|
|
|
;;; RFC 2616 adopts definitions of regexps from RFC 2396.
|
|
|
|
|
2004-10-06 15:10:49 -04:00
|
|
|
|
2004-10-11 04:54:41 -04:00
|
|
|
;;; RFC 2616 is ambiguous in defining Request_URIS:
|
|
|
|
;;;
|
|
|
|
;;; section 5.1.2 states:
|
2004-10-06 15:10:49 -04:00
|
|
|
;;; HTTP 1.1 Request-URIS are of the form
|
|
|
|
;;; Request-URI = "*" | absoluteURI | abs_path | authority
|
|
|
|
;;;
|
2004-10-11 04:54:41 -04:00
|
|
|
;;; whilst section 3.2.2 defines the 'http_URL'
|
|
|
|
;;; http_URL = "http://" host [ ":" port ] [ abs_path [ "?" query ]]
|
2004-10-06 15:10:49 -04:00
|
|
|
|
2004-10-11 04:54:41 -04:00
|
|
|
|
|
|
|
;;; Since allowing for general absoluteURIs doesn't make too much sense
|
|
|
|
;;; we implement only Request_URIs as follows:
|
|
|
|
;;; Request-URI = ( http_URL | abs_path) ["#" fragment]
|
|
|
|
;;;
|
2004-10-06 15:10:49 -04:00
|
|
|
;;; where http_URL is a subset of absoluteURI
|
|
|
|
|
2004-10-11 04:54:41 -04:00
|
|
|
;;; [ "#" fragment ] is allowed even though
|
2004-10-06 15:10:49 -04:00
|
|
|
;;; RFC 2616 disallowes the #fragment part
|
|
|
|
;;; (while RFC 1945 for HTTP/1.0 allowed it).
|
|
|
|
;;; (This is for compatibility with buggy clients).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; RexExps for Request-URIs as scsh SREs
|
|
|
|
;;; stick to RFC terminology throughout
|
|
|
|
|
|
|
|
(define digit (rx numeric))
|
|
|
|
|
|
|
|
(define alpha (rx alphanum))
|
|
|
|
|
|
|
|
(define alphanum (rx alphanumeric))
|
|
|
|
|
|
|
|
(define hex (rx hex-digit))
|
|
|
|
|
|
|
|
(define escaped (rx "%" ,hex ,hex))
|
|
|
|
|
|
|
|
(define mark (rx ( "-_.!~*'()")))
|
|
|
|
|
|
|
|
(define unreserved (rx (| ,alphanum ,mark)))
|
|
|
|
|
|
|
|
(define reserved (rx ( ";/?:@&=+$,")))
|
|
|
|
|
|
|
|
(define uric (rx (| ,reserved ,unreserved ,escaped)))
|
|
|
|
|
|
|
|
(define fragment (rx (* ,uric)))
|
|
|
|
|
|
|
|
(define query (rx (* ,uric)))
|
|
|
|
|
|
|
|
(define pchar-charset (rx ( ":@&=+$,")))
|
|
|
|
|
|
|
|
(define pchar (rx (| ,unreserved ,escaped ,pchar-charset)))
|
|
|
|
|
|
|
|
(define param (rx (* ,pchar)))
|
|
|
|
|
|
|
|
(define segment (rx (:
|
|
|
|
(* ,pchar)
|
|
|
|
(* (: ";" ,param)))))
|
|
|
|
|
|
|
|
(define path-segments (rx (:
|
|
|
|
,segment
|
|
|
|
(* (: "/" ,segment)))))
|
|
|
|
|
|
|
|
(define abs_path (rx (:
|
|
|
|
"/"
|
|
|
|
,path-segments)))
|
|
|
|
|
|
|
|
|
|
|
|
(define port (rx (* ,digit)))
|
|
|
|
|
|
|
|
(define IPv4address (rx (+ ,digit) "." (+ ,digit) "." (+ ,digit) "." (+ ,digit)))
|
|
|
|
|
|
|
|
(define toplabel (rx (:
|
|
|
|
(|
|
|
|
|
,alpha
|
|
|
|
(:
|
|
|
|
,alpha
|
|
|
|
(* (| ,alphanum "-"))
|
|
|
|
,alphanum)))))
|
|
|
|
|
|
|
|
(define domainlabel (rx (:
|
|
|
|
(|
|
|
|
|
,alphanum
|
|
|
|
(: ,alphanum
|
|
|
|
(* (| ,alphanum "-"))
|
|
|
|
,alphanum)))))
|
|
|
|
|
|
|
|
(define hostname (rx (:
|
|
|
|
(* (: ,domainlabel "."))
|
|
|
|
,toplabel
|
|
|
|
(? "."))))
|
|
|
|
|
|
|
|
(define host (rx (| ,hostname ,IPv4address)))
|
|
|
|
|
|
|
|
(define http_URL (rx (:
|
|
|
|
"http://"
|
|
|
|
(submatch
|
|
|
|
,host)
|
|
|
|
(?
|
|
|
|
(: ":" (submatch ,port)))
|
|
|
|
(?
|
|
|
|
(: (submatch ,abs_path)
|
|
|
|
(?
|
|
|
|
(: "?" (submatch ,query))))))))
|
|
|
|
|
|
|
|
(define http_URL_with_frag (rx (: bos ,@http_URL (? "#" ,fragment) eos)))
|
|
|
|
|
|
|
|
(define abs_path_with_frag (rx (: bos (submatch ,abs_path) (? "#" ,fragment) eos)))
|
|
|
|
|
|
|
|
(define Request-URI (rx (| ,@http_URL_with_frag ,@abs_path_with_frag)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;parse HTTP 1.1 Request-URI
|
|
|
|
|
|
|
|
(define (parse-uri request-uri)
|
|
|
|
(cond
|
|
|
|
|
|
|
|
((regexp-search abs_path_with_frag request-uri)
|
|
|
|
=> (lambda (match)
|
|
|
|
(values #f #f (split-abs-path (match:substring match 1)) #f)))
|
|
|
|
|
|
|
|
((regexp-search http_URL_with_frag request-uri)
|
|
|
|
=>(lambda (match)
|
|
|
|
(let ((host (match:substring match 1))
|
|
|
|
(port (match:substring match 2))
|
|
|
|
(path (split-abs-path (match:substring match 3)))
|
|
|
|
(query (match:substring match 4)))
|
|
|
|
(values host port path query))))
|
|
|
|
|
|
|
|
(else
|
|
|
|
(fatal-syntax-error "Request-URI syntactically faulty"))))
|
|
|
|
|
|
|
|
|
|
|
|
;; split the string abs-path at slashes, return list of segments.
|
|
|
|
;; SPLIT-PATH assumes abs-path matches the RegExp abs_path, no checks are done.
|
|
|
|
|
|
|
|
;; minor remark: abs_path allows for strings containing several consecutive slashes;
|
|
|
|
;; SPLIT-ABS-PATH treats them as one slash.
|
|
|
|
|
|
|
|
(define (split-abs-path abs-path)
|
|
|
|
(regexp-fold-right
|
|
|
|
(rx (+ (~ ("/"))))
|
|
|
|
(lambda (match i res)
|
|
|
|
(cons (match:substring match 0) res))
|
|
|
|
'()
|
|
|
|
abs-path))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
|
|
|
|
;;; HTTP URL parsing
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
;;; The PATH slot of this record is the URL's path split at slashes,
|
|
|
|
;;; e.g., "foo/bar//baz/" => ("foo" "bar" "" "baz" "")
|
|
|
|
;;; These elements are in raw, unescaped format. To convert back to
|
2003-01-14 10:01:21 -05:00
|
|
|
;;; a string, use (uri-path->uri (map escape-uri pathlist)).
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2002-11-29 09:56:58 -05:00
|
|
|
(define-record-type http-url :http-url
|
2003-01-15 07:19:45 -05:00
|
|
|
(make-http-url server path search fragment-identifier)
|
2002-11-29 09:56:58 -05:00
|
|
|
http-url?
|
2003-01-15 05:59:46 -05:00
|
|
|
(server http-url-server) ; Initial //anonymous@clark.lcs.mit.edu:80/
|
2002-11-29 09:56:58 -05:00
|
|
|
(path http-url-path) ; Rest of path, split at slashes & decoded.
|
|
|
|
(search http-url-search)
|
2003-01-15 07:19:45 -05:00
|
|
|
(fragment-identifier http-url-fragment-identifier))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2004-10-06 15:10:49 -04:00
|
|
|
;(define-new-record-type http-url :http-url
|
|
|
|
; (make-http-url hostname port path query)
|
|
|
|
; http-url?
|
|
|
|
; (hostname http-url-hostname)
|
|
|
|
; (port http-url-port)
|
|
|
|
; (path http-url-path)
|
|
|
|
; (query http-url-query))
|
|
|
|
|
2002-06-08 11:07:01 -04:00
|
|
|
;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
|
|
|
|
;;; <scheme> : <path> ? <search> # <frag-id> <scheme>, <search>, and
|
|
|
|
;;; <frag-id> are strings; <path> is a non-empty string list -- the
|
|
|
|
;;; URI's path split at slashes. Optional parts of the URI, when
|
|
|
|
;;; missing, are specified as #f. If <scheme> is "http", then the
|
|
|
|
;;; other three parts can be passed to PARSE-HTTP-URL, which parses
|
|
|
|
;;; them into a HTTP-URL record. All strings come back from the URI
|
|
|
|
;;; parser encoded. SEARCH and FRAG-ID are left that way; this parser
|
|
|
|
;;; decodes the path elements.
|
|
|
|
;;;
|
|
|
|
;;; Returns a HTTP-URL record, if possible. Otherwise
|
|
|
|
;;; FATAL-SYNTAX-ERROR is called.
|
|
|
|
|
|
|
|
(define (parse-http-url path search frag-id)
|
2003-01-15 05:59:46 -05:00
|
|
|
(let ((uh (parse-server path default-http-server)))
|
|
|
|
(if (or (server-user uh) (server-password uh))
|
2002-06-08 11:07:01 -04:00
|
|
|
(fatal-syntax-error
|
2003-01-15 05:32:35 -05:00
|
|
|
"HTTP URL's may not specify a user or password field" path))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2003-01-15 05:32:35 -05:00
|
|
|
(make-http-url uh (map unescape-uri (cdddr path)) search frag-id)))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2003-01-15 05:36:16 -05:00
|
|
|
(define (parse-http-url-string string)
|
|
|
|
(call-with-values
|
|
|
|
(lambda () (parse-uri string))
|
|
|
|
(lambda (scheme path search frag-id)
|
|
|
|
(if (string=? scheme "http")
|
|
|
|
(parse-http-url path search frag-id)
|
|
|
|
(fatal-syntax-error "not an HTTP URL" path)))))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
;;; Default http port is 80.
|
2003-01-15 05:59:46 -05:00
|
|
|
(define default-http-server (make-server #f #f #f "80"))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
|
|
|
|
;;; Unparse.
|
|
|
|
|
|
|
|
(define (http-url->string url)
|
|
|
|
(string-append "http://"
|
2003-01-15 05:59:46 -05:00
|
|
|
(server->string (http-url-server url))
|
2002-06-08 11:07:01 -04:00
|
|
|
"/"
|
2003-01-14 10:01:21 -05:00
|
|
|
(uri-path->uri (map escape-uri (http-url-path url)))
|
2002-11-29 09:56:58 -05:00
|
|
|
(cond ((http-url-search url) =>
|
2002-06-08 11:07:01 -04:00
|
|
|
(lambda (s) (string-append "?" s)))
|
|
|
|
(else ""))
|
2003-01-15 07:19:45 -05:00
|
|
|
(cond ((http-url-fragment-identifier url) =>
|
2002-06-08 11:07:01 -04:00
|
|
|
(lambda (fi) (string-append "#" fi)))
|
|
|
|
(else ""))))
|