2004-10-06 15:10:49 -04:00
|
|
|
;;; HTTP 1.1 Request-URI parsing and unparsing -*- Scheme -*-
|
2002-08-27 05:03:22 -04:00
|
|
|
|
2002-06-08 11:07:01 -04:00
|
|
|
;;; Copyright (c) 1995 by Olin Shivers.
|
2002-08-27 05:03:22 -04:00
|
|
|
;;; For copyright information, see the file COPYING which comes with
|
|
|
|
;;; the distribution.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
|
|
|
;;; References:
|
2004-10-06 15:10:49 -04:00
|
|
|
;;; RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1
|
|
|
|
;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
|
2004-10-11 04:54:41 -04:00
|
|
|
;;;
|
|
|
|
;;; RFC 2616 adopts definitions of regexps from RFC 2396.
|
|
|
|
|
2004-10-06 15:10:49 -04:00
|
|
|
|
2004-10-11 04:54:41 -04:00
|
|
|
;;; RFC 2616 is ambiguous in defining Request_URIS:
|
|
|
|
;;;
|
|
|
|
;;; section 5.1.2 states:
|
2004-10-06 15:10:49 -04:00
|
|
|
;;; HTTP 1.1 Request-URIS are of the form
|
|
|
|
;;; Request-URI = "*" | absoluteURI | abs_path | authority
|
|
|
|
;;;
|
2004-10-11 04:54:41 -04:00
|
|
|
;;; whilst section 3.2.2 defines the 'http_URL'
|
|
|
|
;;; http_URL = "http://" host [ ":" port ] [ abs_path [ "?" query ]]
|
2004-10-06 15:10:49 -04:00
|
|
|
|
2004-10-11 04:54:41 -04:00
|
|
|
|
|
|
|
;;; Since allowing for general absoluteURIs doesn't make too much sense
|
|
|
|
;;; we implement only Request_URIs as follows:
|
|
|
|
;;; Request-URI = ( http_URL | abs_path) ["#" fragment]
|
|
|
|
;;;
|
2004-10-06 15:10:49 -04:00
|
|
|
;;; where http_URL is a subset of absoluteURI
|
|
|
|
|
2004-10-11 04:54:41 -04:00
|
|
|
;;; [ "#" fragment ] is allowed even though
|
2004-10-06 15:10:49 -04:00
|
|
|
;;; RFC 2616 disallowes the #fragment part
|
|
|
|
;;; (while RFC 1945 for HTTP/1.0 allowed it).
|
|
|
|
;;; (This is for compatibility with buggy clients).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; RexExps for Request-URIs as scsh SREs
|
|
|
|
;;; stick to RFC terminology throughout
|
|
|
|
|
|
|
|
(define digit (rx numeric))
|
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
(define alpha (rx alphabetic))
|
2004-10-06 15:10:49 -04:00
|
|
|
|
|
|
|
(define alphanum (rx alphanumeric))
|
|
|
|
|
|
|
|
(define hex (rx hex-digit))
|
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
(define escaped (rx (: "%" ,hex ,hex)))
|
2004-10-06 15:10:49 -04:00
|
|
|
|
|
|
|
(define mark (rx ( "-_.!~*'()")))
|
|
|
|
|
2004-10-18 13:35:40 -04:00
|
|
|
(define unreserved (rx (~ (~ (| ,alphanum ,mark)))))
|
2004-10-06 15:10:49 -04:00
|
|
|
|
|
|
|
(define reserved (rx ( ";/?:@&=+$,")))
|
|
|
|
|
|
|
|
(define uric (rx (| ,reserved ,unreserved ,escaped)))
|
|
|
|
|
|
|
|
(define fragment (rx (* ,uric)))
|
|
|
|
|
|
|
|
(define query (rx (* ,uric)))
|
|
|
|
|
|
|
|
(define pchar-charset (rx ( ":@&=+$,")))
|
|
|
|
|
|
|
|
(define pchar (rx (| ,unreserved ,escaped ,pchar-charset)))
|
|
|
|
|
|
|
|
(define param (rx (* ,pchar)))
|
|
|
|
|
|
|
|
(define segment (rx (:
|
|
|
|
(* ,pchar)
|
|
|
|
(* (: ";" ,param)))))
|
|
|
|
|
|
|
|
(define path-segments (rx (:
|
|
|
|
,segment
|
|
|
|
(* (: "/" ,segment)))))
|
|
|
|
|
|
|
|
(define abs_path (rx (:
|
|
|
|
"/"
|
|
|
|
,path-segments)))
|
|
|
|
|
|
|
|
|
|
|
|
(define port (rx (* ,digit)))
|
|
|
|
|
|
|
|
(define IPv4address (rx (+ ,digit) "." (+ ,digit) "." (+ ,digit) "." (+ ,digit)))
|
|
|
|
|
|
|
|
(define toplabel (rx (:
|
|
|
|
(|
|
|
|
|
,alpha
|
|
|
|
(:
|
|
|
|
,alpha
|
|
|
|
(* (| ,alphanum "-"))
|
|
|
|
,alphanum)))))
|
|
|
|
|
|
|
|
(define domainlabel (rx (:
|
|
|
|
(|
|
|
|
|
,alphanum
|
|
|
|
(: ,alphanum
|
|
|
|
(* (| ,alphanum "-"))
|
|
|
|
,alphanum)))))
|
|
|
|
|
|
|
|
(define hostname (rx (:
|
|
|
|
(* (: ,domainlabel "."))
|
|
|
|
,toplabel
|
|
|
|
(? "."))))
|
|
|
|
|
|
|
|
(define host (rx (| ,hostname ,IPv4address)))
|
|
|
|
|
|
|
|
(define http_URL (rx (:
|
|
|
|
"http://"
|
|
|
|
(submatch
|
|
|
|
,host)
|
|
|
|
(?
|
|
|
|
(: ":" (submatch ,port)))
|
|
|
|
(?
|
|
|
|
(: (submatch ,abs_path)
|
|
|
|
(?
|
|
|
|
(: "?" (submatch ,query))))))))
|
|
|
|
|
|
|
|
(define http_URL_with_frag (rx (: bos ,@http_URL (? "#" ,fragment) eos)))
|
|
|
|
|
|
|
|
(define abs_path_with_frag (rx (: bos (submatch ,abs_path) (? "#" ,fragment) eos)))
|
|
|
|
|
|
|
|
(define Request-URI (rx (| ,@http_URL_with_frag ,@abs_path_with_frag)))
|
|
|
|
|
|
|
|
|
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
;;; parse a HTTP 1.1 Request_URI
|
|
|
|
;;;
|
|
|
|
;;; return matches of regexps host, port, abs_path, query;
|
|
|
|
;;;
|
|
|
|
;;; If request-uri is a relative URI, host, port and query are #f;
|
|
|
|
;;; port and query are also #f if they are not given in an absolute URI.
|
|
|
|
;;; If there's no abs_path given, or abs_path is "/", path is the empty list;
|
|
|
|
;;; otherwise it is a list containing the path's segments.
|
|
|
|
;;;
|
|
|
|
;;; don't decode yet!
|
2004-10-06 15:10:49 -04:00
|
|
|
|
|
|
|
(define (parse-uri request-uri)
|
|
|
|
(cond
|
|
|
|
|
|
|
|
((regexp-search abs_path_with_frag request-uri)
|
|
|
|
=> (lambda (match)
|
|
|
|
(values #f #f (split-abs-path (match:substring match 1)) #f)))
|
|
|
|
|
|
|
|
((regexp-search http_URL_with_frag request-uri)
|
|
|
|
=>(lambda (match)
|
|
|
|
(let ((host (match:substring match 1))
|
|
|
|
(port (match:substring match 2))
|
|
|
|
(path (split-abs-path (match:substring match 3)))
|
|
|
|
(query (match:substring match 4)))
|
|
|
|
(values host port path query))))
|
|
|
|
|
|
|
|
(else
|
|
|
|
(fatal-syntax-error "Request-URI syntactically faulty"))))
|
|
|
|
|
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
;;; split the string abs-path at slashes, return list of 'segments' (see RegExp definition above).
|
|
|
|
;;;
|
|
|
|
;;; SPLIT-PATH assumes abs-path if either #f or matches the RegExp abs_path,
|
|
|
|
;;; no checks are done.
|
|
|
|
;;;
|
|
|
|
;;; remark: abs_path allows for strings containing several consecutive slashes;
|
|
|
|
;;; SPLIT-ABS-PATH treats them as one slash.
|
|
|
|
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz"))
|
|
|
|
|
2004-10-06 15:10:49 -04:00
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
(define (split-abs-path abs-path)
|
2004-10-06 15:10:49 -04:00
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
(if abs-path
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
(regexp-fold-right
|
|
|
|
(rx (+ (~ ("/"))))
|
|
|
|
(lambda (match i res)
|
|
|
|
(cons (match:substring match 0) res))
|
|
|
|
'()
|
|
|
|
abs-path)
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
'()))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
|
|
|
|
;;; record type HTTP-URL for Request_URIs
|
|
|
|
;;;
|
|
|
|
;;; The HOST slot is a non-empty-string or #f.
|
|
|
|
;;;
|
|
|
|
;;; The PORT slot is an integer or #f.
|
|
|
|
;;;
|
|
|
|
;;; The PATH slot is the Request_URI's path split at slashes
|
|
|
|
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz"))
|
|
|
|
;;; and decoded.
|
|
|
|
;;;
|
|
|
|
;;; The QUERY slot is a decoded non-empty-string or #f.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2002-11-29 09:56:58 -05:00
|
|
|
(define-record-type http-url :http-url
|
2004-10-11 13:01:32 -04:00
|
|
|
(make-http-url host port path query)
|
2002-11-29 09:56:58 -05:00
|
|
|
http-url?
|
2004-10-11 13:01:32 -04:00
|
|
|
(host http-url-host)
|
|
|
|
(port http-url-port)
|
|
|
|
(path http-url-path)
|
|
|
|
(query http-url-query))
|
|
|
|
|
|
|
|
;;; decode various parts of the Request_URI as returned by PARSE-URI;
|
|
|
|
;;; returns a HTTP-URL record.
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
(define (parsed-uri->http-url host port path query)
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
(let ((portnumber (and port (string->number port)))
|
2004-10-14 13:18:24 -04:00
|
|
|
(decoded-path (map unescape path))
|
|
|
|
(decoded-query (and query (unescape query))))
|
2002-06-08 11:07:01 -04:00
|
|
|
|
2004-10-11 13:01:32 -04:00
|
|
|
(make-http-url host portnumber decoded-path decoded-query)))
|
|
|
|
|
|
|
|
;;really decode query here??
|
|
|
|
|
|
|
|
;;; parse a HTTP 1.1. Request_URI into a http-url record
|
|
|
|
|
|
|
|
(define (parse-http-url uri-string)
|
2003-01-15 05:36:16 -05:00
|
|
|
(call-with-values
|
2004-10-11 13:01:32 -04:00
|
|
|
(lambda () (parse-uri uri-string))
|
|
|
|
parsed-uri->http-url))
|
|
|
|
|
|
|
|
;;; Unparse a http-url record into its corresponding Request_URI
|
|
|
|
|
|
|
|
(define (http-url->uri-string http-url)
|
|
|
|
|
|
|
|
(let* ((host (http-url-host http-url))
|
|
|
|
(scheme-and-host-string
|
|
|
|
(if host
|
|
|
|
(string-append "http://" host)
|
|
|
|
""))
|
|
|
|
|
|
|
|
(port (http-url-port http-url))
|
|
|
|
(port-string
|
|
|
|
(if port
|
|
|
|
(string-append ":" (number->string port))
|
|
|
|
""))
|
|
|
|
|
|
|
|
(path (http-url-path http-url))
|
|
|
|
(path-string
|
|
|
|
(fold-right
|
|
|
|
(lambda (segment res)
|
2004-10-18 13:35:40 -04:00
|
|
|
(string-append "/" (escape-segment segment) res))
|
2004-10-11 13:01:32 -04:00
|
|
|
""
|
|
|
|
path))
|
|
|
|
|
|
|
|
(query (http-url-query http-url))
|
|
|
|
(query-string (if query
|
2004-10-18 13:35:40 -04:00
|
|
|
(string-append "?" (escape-query query))
|
2004-10-11 13:01:32 -04:00
|
|
|
"")))
|
|
|
|
|
|
|
|
(string-append scheme-and-host-string port-string path-string query-string)))
|
|
|
|
|
2004-10-14 13:18:24 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;; decode a URI
|
|
|
|
;;; walk over string s and unescape all occurrences of RegExp 'escaped' (see above).
|
|
|
|
|
|
|
|
;;; Remark:
|
|
|
|
;;; we assume no non-ASCII characters occur in the URI; therefore the
|
|
|
|
;;; ascii table is used for conversion of the octet the hexnumber
|
|
|
|
;;; represents to a char.
|
|
|
|
|
|
|
|
;;; Caution:
|
2004-10-18 13:35:40 -04:00
|
|
|
;;; a URI must be separated into its components (e.g. parsed by
|
2004-10-14 13:18:24 -04:00
|
|
|
;;; PARSE-URI) before the escaped characters within those components
|
|
|
|
;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI.
|
|
|
|
|
|
|
|
(define (unescape s)
|
|
|
|
(regexp-fold
|
|
|
|
escaped
|
|
|
|
(lambda (start-search match res)
|
|
|
|
(let* ((start-match (match:start match))
|
|
|
|
(hexchar-low (string-ref s (+ start-match 2)))
|
|
|
|
(hexchar-high (string-ref s (+ start-match 1)))
|
|
|
|
(hex-low (hexchar->int hexchar-low))
|
|
|
|
(hex-high (hexchar->int hexchar-high))
|
|
|
|
(ascii (+ (* 16 hex-high) hex-low)))
|
|
|
|
(string-append
|
|
|
|
res
|
|
|
|
(substring s start-search start-match)
|
|
|
|
(string (ascii->char ascii)))))
|
|
|
|
""
|
|
|
|
s
|
|
|
|
(lambda (start-search res)
|
|
|
|
(string-append res (substring s start-search (string-length s))))))
|
|
|
|
|
|
|
|
|
|
|
|
; make use of the fact that numbers and characters are in order in the ascii table
|
|
|
|
(define (hexchar->int c)
|
|
|
|
(- (char->ascii c)
|
|
|
|
(if (char-numeric? c)
|
|
|
|
(char->ascii #\0)
|
|
|
|
(- (if (char-upper-case? c)
|
|
|
|
(char->ascii #\A)
|
|
|
|
(char->ascii #\a))
|
|
|
|
10))))
|
|
|
|
|
|
|
|
|
2004-10-18 13:35:40 -04:00
|
|
|
;;; encode URIs:
|
|
|
|
;;; replace characters which are reserved or excluded by their escaped representation.
|
|
|
|
|
|
|
|
;;; Caution:
|
|
|
|
;;; Each component of a URI may have its own set of characters that are reserved,
|
|
|
|
;;; -> differentiate between components.
|
|
|
|
|
|
|
|
;;; Caution:
|
|
|
|
;;; don't encode an already encoded string; #\% chars would be escaped again.
|
|
|
|
|
|
|
|
;;; not allowed within component 'segment' in 'abs_path'
|
|
|
|
(define segment-reserved-and-excluded (rx (~ ,unreserved ,pchar-charset (";"))))
|
|
|
|
|
|
|
|
;;; not allowed within component 'query'
|
|
|
|
(define query-reserved-and-excluded (rx (~ ,unreserved ,reserved )))
|
|
|
|
|
|
|
|
;;; encode 'abs_path' portion of a URI:
|
|
|
|
;;; use SPLIT-PATH to split abs_path into its segments,
|
|
|
|
;;; then apply UNESCAPE-SEGMENT to the segments.
|
|
|
|
(define (escape-segment segment)
|
|
|
|
(escape segment segment-reserved-and-excluded))
|
|
|
|
|
|
|
|
;;; encode 'query' portion of a URI
|
|
|
|
(define (escape-query query)
|
|
|
|
(escape query query-reserved-and-excluded))
|
|
|
|
|
|
|
|
(define (escape s regexp)
|
|
|
|
(regexp-fold
|
|
|
|
regexp
|
|
|
|
(lambda (start-search match res)
|
|
|
|
(let* ((start-match (match:start match))
|
|
|
|
(forbidden-char (string-ref s start-match)))
|
|
|
|
(string-append
|
|
|
|
res
|
|
|
|
(substring s start-search start-match)
|
|
|
|
(ascii->escaped (char->ascii forbidden-char)))))
|
|
|
|
""
|
|
|
|
s
|
|
|
|
(lambda (start-search res)
|
|
|
|
(string-append res (substring s start-search (string-length s))))))
|
|
|
|
|
|
|
|
(define (ascii->escaped dec-int)
|
|
|
|
(let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF))
|
|
|
|
(hex-int-low (bitwise-and dec-int #xF)))
|
|
|
|
(string-append
|
|
|
|
"%" (int->hexstring hex-int-high) (int->hexstring hex-int-low))))
|
|
|
|
|
|
|
|
(define int->hexstring
|
|
|
|
(let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"
|
|
|
|
"A" "B" "C" "D" "E" "F")))
|
|
|
|
(lambda (i) (vector-ref table i))))
|
|
|
|
|
|
|
|
|
|
|
|
|