*fix two typos in RegExps

*better comments for PARSE-URI, SPLIT-PATH
*make SPLIT-PATH really accept PARSE-URI's return values
*restructure record-type HTTP-URL
*new procedure PARSED-URI->HTTP-URL
*rewrite PARSE-HTTP-URL to use PARSE-URI and PARSED-URI->HTTP-URL
*remove out-dated comments
*remove out-dated procedure PARSE-HTTP-URL-STRING
*remove DEFAULT-HTTP-SERVER (relict of server record-type)
*rewrite HTTP-URL->STRING
*rename HTTP-URL->STRING to HTTP-URL->URI-STRING
This commit is contained in:
vibr 2004-10-11 17:01:32 +00:00
parent fe08e779f0
commit 932f03a638
1 changed files with 93 additions and 72 deletions

View File

@ -40,13 +40,13 @@
(define digit (rx numeric))
(define alpha (rx alphanum))
(define alpha (rx alphabetic))
(define alphanum (rx alphanumeric))
(define hex (rx hex-digit))
(define escaped (rx "%" ,hex ,hex))
(define escaped (rx (: "%" ,hex ,hex)))
(define mark (rx ( "-_.!~*'()")))
@ -124,7 +124,16 @@
;;parse HTTP 1.1 Request-URI
;;; parse a HTTP 1.1 Request_URI
;;;
;;; return matches of regexps host, port, abs_path, query;
;;;
;;; If request-uri is a relative URI, host, port and query are #f;
;;; port and query are also #f if they are not given in an absolute URI.
;;; If there's no abs_path given, or abs_path is "/", path is the empty list;
;;; otherwise it is a list containing the path's segments.
;;;
;;; don't decode yet!
(define (parse-uri request-uri)
(cond
@ -145,88 +154,100 @@
(fatal-syntax-error "Request-URI syntactically faulty"))))
;; split the string abs-path at slashes, return list of segments.
;; SPLIT-PATH assumes abs-path matches the RegExp abs_path, no checks are done.
;; minor remark: abs_path allows for strings containing several consecutive slashes;
;; SPLIT-ABS-PATH treats them as one slash.
(define (split-abs-path abs-path)
(regexp-fold-right
(rx (+ (~ ("/"))))
(lambda (match i res)
(cons (match:substring match 0) res))
'()
abs-path))
;;; split the string abs-path at slashes, return list of 'segments' (see RegExp definition above).
;;;
;;; SPLIT-PATH assumes abs-path if either #f or matches the RegExp abs_path,
;;; no checks are done.
;;;
;;; remark: abs_path allows for strings containing several consecutive slashes;
;;; SPLIT-ABS-PATH treats them as one slash.
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz"))
;;; HTTP URL parsing
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (split-abs-path abs-path)
;;; The PATH slot of this record is the URL's path split at slashes,
;;; e.g., "foo/bar//baz/" => ("foo" "bar" "" "baz" "")
;;; These elements are in raw, unescaped format. To convert back to
;;; a string, use (uri-path->uri (map escape-uri pathlist)).
(if abs-path
(regexp-fold-right
(rx (+ (~ ("/"))))
(lambda (match i res)
(cons (match:substring match 0) res))
'()
abs-path)
'()))
;;; record type HTTP-URL for Request_URIs
;;;
;;; The HOST slot is a non-empty-string or #f.
;;;
;;; The PORT slot is an integer or #f.
;;;
;;; The PATH slot is the Request_URI's path split at slashes
;;; (e.g., "/foo///bar//baz/" => ("foo" "bar" "baz"))
;;; and decoded.
;;;
;;; The QUERY slot is a decoded non-empty-string or #f.
(define-record-type http-url :http-url
(make-http-url server path search fragment-identifier)
(make-http-url host port path query)
http-url?
(server http-url-server) ; Initial //anonymous@clark.lcs.mit.edu:80/
(path http-url-path) ; Rest of path, split at slashes & decoded.
(search http-url-search)
(fragment-identifier http-url-fragment-identifier))
(host http-url-host)
(port http-url-port)
(path http-url-path)
(query http-url-query))
;(define-new-record-type http-url :http-url
; (make-http-url hostname port path query)
; http-url?
; (hostname http-url-hostname)
; (port http-url-port)
; (path http-url-path)
; (query http-url-query))
;;; decode various parts of the Request_URI as returned by PARSE-URI;
;;; returns a HTTP-URL record.
;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
;;; <scheme> : <path> ? <search> # <frag-id> <scheme>, <search>, and
;;; <frag-id> are strings; <path> is a non-empty string list -- the
;;; URI's path split at slashes. Optional parts of the URI, when
;;; missing, are specified as #f. If <scheme> is "http", then the
;;; other three parts can be passed to PARSE-HTTP-URL, which parses
;;; them into a HTTP-URL record. All strings come back from the URI
;;; parser encoded. SEARCH and FRAG-ID are left that way; this parser
;;; decodes the path elements.
;;;
;;; Returns a HTTP-URL record, if possible. Otherwise
;;; FATAL-SYNTAX-ERROR is called.
(define (parsed-uri->http-url host port path query)
(define (parse-http-url path search frag-id)
(let ((uh (parse-server path default-http-server)))
(if (or (server-user uh) (server-password uh))
(fatal-syntax-error
"HTTP URL's may not specify a user or password field" path))
(let ((portnumber (and port (string->number port)))
(decoded-path (map unescape-uri path))
(decoded-query (and query (unescape-uri query))))
(make-http-url uh (map unescape-uri (cdddr path)) search frag-id)))
(make-http-url host portnumber decoded-path decoded-query)))
(define (parse-http-url-string string)
;;TODO: separate procedures for decoding path and query
;;really decode query here??
;;; parse a HTTP 1.1. Request_URI into a http-url record
(define (parse-http-url uri-string)
(call-with-values
(lambda () (parse-uri string))
(lambda (scheme path search frag-id)
(if (string=? scheme "http")
(parse-http-url path search frag-id)
(fatal-syntax-error "not an HTTP URL" path)))))
(lambda () (parse-uri uri-string))
parsed-uri->http-url))
;;; Default http port is 80.
(define default-http-server (make-server #f #f #f "80"))
;;; Unparse a http-url record into its corresponding Request_URI
(define (http-url->uri-string http-url)
;;; Unparse.
(let* ((host (http-url-host http-url))
(scheme-and-host-string
(if host
(string-append "http://" host)
""))
(define (http-url->string url)
(string-append "http://"
(server->string (http-url-server url))
"/"
(uri-path->uri (map escape-uri (http-url-path url)))
(cond ((http-url-search url) =>
(lambda (s) (string-append "?" s)))
(else ""))
(cond ((http-url-fragment-identifier url) =>
(lambda (fi) (string-append "#" fi)))
(else ""))))
(port (http-url-port http-url))
(port-string
(if port
(string-append ":" (number->string port))
""))
(path (http-url-path http-url))
(path-string
(fold-right
(lambda (segment res)
(string-append "/" (escape-uri segment) res))
""
path))
(query (http-url-query http-url))
(query-string (if query
(string-append "?" (escape-uri query))
"")))
(string-append scheme-and-host-string port-string path-string query-string)))
;;TODO: separate procedures for encoding path and query