*fix two typos in RegExps

*better comments for PARSE-URI, SPLIT-PATH
*make SPLIT-PATH really accept PARSE-URI's return values
*restructure record-type HTTP-URL
*new procedure PARSED-URI->HTTP-URL
*rewrite PARSE-HTTP-URL to use PARSE-URI and PARSED-URI->HTTP-URL
*remove out-dated comments
*remove out-dated procedure PARSE-HTTP-URL-STRING
*remove DEFAULT-HTTP-SERVER (relict of server record-type)
*rewrite HTTP-URL->STRING
*rename HTTP-URL->STRING to HTTP-URL->URI-STRING
This commit is contained in:
vibr 2004-10-11 17:01:32 +00:00
parent fe08e779f0
commit 932f03a638
1 changed files with 93 additions and 72 deletions

View File

@ -40,13 +40,13 @@
(define digit (rx numeric)) (define digit (rx numeric))
(define alpha (rx alphanum)) (define alpha (rx alphabetic))
(define alphanum (rx alphanumeric)) (define alphanum (rx alphanumeric))
(define hex (rx hex-digit)) (define hex (rx hex-digit))
(define escaped (rx "%" ,hex ,hex)) (define escaped (rx (: "%" ,hex ,hex)))
(define mark (rx ( "-_.!~*'()"))) (define mark (rx ( "-_.!~*'()")))
@ -124,7 +124,16 @@
;;parse HTTP 1.1 Request-URI ;;; parse a HTTP 1.1 Request_URI
;;;
;;; return matches of regexps host, port, abs_path, query;
;;;
;;; If request-uri is a relative URI, host, port and query are #f;
;;; port and query are also #f if they are not given in an absolute URI.
;;; If there's no abs_path given, or abs_path is "/", path is the empty list;
;;; otherwise it is a list containing the path's segments.
;;;
;;; don't decode yet!
(define (parse-uri request-uri) (define (parse-uri request-uri)
(cond (cond
@ -145,88 +154,100 @@
(fatal-syntax-error "Request-URI syntactically faulty")))) (fatal-syntax-error "Request-URI syntactically faulty"))))
;; split the string abs-path at slashes, return list of segments. ;;; split the string abs-path at slashes, return list of 'segments' (see RegExp definition above).
;; SPLIT-PATH assumes abs-path matches the RegExp abs_path, no checks are done. ;;;
;;; SPLIT-PATH assumes abs-path if either #f or matches the RegExp abs_path,
;;; no checks are done.
;;;
;;; remark: abs_path allows for strings containing several consecutive slashes;
;;; SPLIT-ABS-PATH treats them as one slash.
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz"))
;; minor remark: abs_path allows for strings containing several consecutive slashes;
;; SPLIT-ABS-PATH treats them as one slash.
(define (split-abs-path abs-path) (define (split-abs-path abs-path)
(regexp-fold-right
(rx (+ (~ ("/")))) (if abs-path
(lambda (match i res)
(cons (match:substring match 0) res)) (regexp-fold-right
'() (rx (+ (~ ("/"))))
abs-path)) (lambda (match i res)
(cons (match:substring match 0) res))
'()
abs-path)
'()))
;;; HTTP URL parsing ;;; record type HTTP-URL for Request_URIs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;
;;; The HOST slot is a non-empty-string or #f.
;;; The PATH slot of this record is the URL's path split at slashes, ;;;
;;; e.g., "foo/bar//baz/" => ("foo" "bar" "" "baz" "") ;;; The PORT slot is an integer or #f.
;;; These elements are in raw, unescaped format. To convert back to ;;;
;;; a string, use (uri-path->uri (map escape-uri pathlist)). ;;; The PATH slot is the Request_URI's path split at slashes
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz"))
;;; and decoded.
;;;
;;; The QUERY slot is a decoded non-empty-string or #f.
(define-record-type http-url :http-url (define-record-type http-url :http-url
(make-http-url server path search fragment-identifier) (make-http-url host port path query)
http-url? http-url?
(server http-url-server) ; Initial //anonymous@clark.lcs.mit.edu:80/ (host http-url-host)
(path http-url-path) ; Rest of path, split at slashes & decoded. (port http-url-port)
(search http-url-search) (path http-url-path)
(fragment-identifier http-url-fragment-identifier)) (query http-url-query))
;(define-new-record-type http-url :http-url ;;; decode various parts of the Request_URI as returned by PARSE-URI;
; (make-http-url hostname port path query) ;;; returns a HTTP-URL record.
; http-url?
; (hostname http-url-hostname)
; (port http-url-port)
; (path http-url-path)
; (query http-url-query))
;;; The URI parser (parse-uri in uri.scm) maps a string to four parts: (define (parsed-uri->http-url host port path query)
;;; <scheme> : <path> ? <search> # <frag-id> <scheme>, <search>, and
;;; <frag-id> are strings; <path> is a non-empty string list -- the
;;; URI's path split at slashes. Optional parts of the URI, when
;;; missing, are specified as #f. If <scheme> is "http", then the
;;; other three parts can be passed to PARSE-HTTP-URL, which parses
;;; them into a HTTP-URL record. All strings come back from the URI
;;; parser encoded. SEARCH and FRAG-ID are left that way; this parser
;;; decodes the path elements.
;;;
;;; Returns a HTTP-URL record, if possible. Otherwise
;;; FATAL-SYNTAX-ERROR is called.
(define (parse-http-url path search frag-id) (let ((portnumber (and port (string->number port)))
(let ((uh (parse-server path default-http-server))) (decoded-path (map unescape-uri path))
(if (or (server-user uh) (server-password uh)) (decoded-query (and query (unescape-uri query))))
(fatal-syntax-error
"HTTP URL's may not specify a user or password field" path))
(make-http-url uh (map unescape-uri (cdddr path)) search frag-id))) (make-http-url host portnumber decoded-path decoded-query)))
(define (parse-http-url-string string) ;;TODO: separate procedures for decoding path and query
;;really decode query here??
;;; parse a HTTP 1.1. Request_URI into a http-url record
(define (parse-http-url uri-string)
(call-with-values (call-with-values
(lambda () (parse-uri string)) (lambda () (parse-uri uri-string))
(lambda (scheme path search frag-id) parsed-uri->http-url))
(if (string=? scheme "http")
(parse-http-url path search frag-id)
(fatal-syntax-error "not an HTTP URL" path)))))
;;; Default http port is 80. ;;; Unparse a http-url record into its corresponding Request_URI
(define default-http-server (make-server #f #f #f "80"))
(define (http-url->uri-string http-url)
;;; Unparse. (let* ((host (http-url-host http-url))
(scheme-and-host-string
(if host
(string-append "http://" host)
""))
(define (http-url->string url) (port (http-url-port http-url))
(string-append "http://" (port-string
(server->string (http-url-server url)) (if port
"/" (string-append ":" (number->string port))
(uri-path->uri (map escape-uri (http-url-path url))) ""))
(cond ((http-url-search url) =>
(lambda (s) (string-append "?" s))) (path (http-url-path http-url))
(else "")) (path-string
(cond ((http-url-fragment-identifier url) => (fold-right
(lambda (fi) (string-append "#" fi))) (lambda (segment res)
(else "")))) (string-append "/" (escape-uri segment) res))
""
path))
(query (http-url-query http-url))
(query-string (if query
(string-append "?" (escape-uri query))
"")))
(string-append scheme-and-host-string port-string path-string query-string)))
;;TODO: separate procedures for encoding path and query