parse HTTP 1.1 URIs:
* add RegExps * add proc PARSE-URI * add proc SPLIT-ABS-PATH
This commit is contained in:
parent
d9950a9b0b
commit
a1e79c4fc7
|
@ -1,18 +1,152 @@
|
||||||
;;; URL parsing and unparsing -*- Scheme -*-
|
;;; HTTP 1.1 Request-URI parsing and unparsing -*- Scheme -*-
|
||||||
|
|
||||||
;;; This file is part of the Scheme Untergrund Networking package.
|
|
||||||
|
|
||||||
;;; Copyright (c) 1995 by Olin Shivers.
|
;;; Copyright (c) 1995 by Olin Shivers.
|
||||||
;;; For copyright information, see the file COPYING which comes with
|
;;; For copyright information, see the file COPYING which comes with
|
||||||
;;; the distribution.
|
;;; the distribution.
|
||||||
|
|
||||||
;;; I'm only implementing HTTP URL's right now.
|
|
||||||
|
|
||||||
;;; References:
|
;;; References:
|
||||||
;;; - http://www.w3.org/Addressing/rfc1738.txt
|
;;; RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1
|
||||||
;;; Original RFC
|
;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
|
||||||
;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/Overview.html
|
|
||||||
;;; General Web page of URI pointers.
|
;;; HTTP 1.1 Request-URIS are of the form
|
||||||
|
;;; Request-URI = "*" | absoluteURI | abs_path | authority
|
||||||
|
;;;
|
||||||
|
;;; We implement only the subset
|
||||||
|
;;; Request-URI = ( http_URL | abs_path) ["#" fragment]
|
||||||
|
|
||||||
|
;;; where http_URL is a subset of absoluteURI
|
||||||
|
|
||||||
|
;;; and [ "#" fragment ] is allowed even though
|
||||||
|
;;; RFC 2616 disallowes the #fragment part
|
||||||
|
;;; (while RFC 1945 for HTTP/1.0 allowed it).
|
||||||
|
;;; (This is for compatibility with buggy clients).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;;; RexExps for Request-URIs as scsh SREs
|
||||||
|
;;; stick to RFC terminology throughout
|
||||||
|
|
||||||
|
(define digit (rx numeric))
|
||||||
|
|
||||||
|
(define alpha (rx alphanum))
|
||||||
|
|
||||||
|
(define alphanum (rx alphanumeric))
|
||||||
|
|
||||||
|
(define hex (rx hex-digit))
|
||||||
|
|
||||||
|
(define escaped (rx "%" ,hex ,hex))
|
||||||
|
|
||||||
|
(define mark (rx ( "-_.!~*'()")))
|
||||||
|
|
||||||
|
(define unreserved (rx (| ,alphanum ,mark)))
|
||||||
|
|
||||||
|
(define reserved (rx ( ";/?:@&=+$,")))
|
||||||
|
|
||||||
|
(define uric (rx (| ,reserved ,unreserved ,escaped)))
|
||||||
|
|
||||||
|
(define fragment (rx (* ,uric)))
|
||||||
|
|
||||||
|
(define query (rx (* ,uric)))
|
||||||
|
|
||||||
|
(define pchar-charset (rx ( ":@&=+$,")))
|
||||||
|
|
||||||
|
(define pchar (rx (| ,unreserved ,escaped ,pchar-charset)))
|
||||||
|
|
||||||
|
(define param (rx (* ,pchar)))
|
||||||
|
|
||||||
|
(define segment (rx (:
|
||||||
|
(* ,pchar)
|
||||||
|
(* (: ";" ,param)))))
|
||||||
|
|
||||||
|
(define path-segments (rx (:
|
||||||
|
,segment
|
||||||
|
(* (: "/" ,segment)))))
|
||||||
|
|
||||||
|
(define abs_path (rx (:
|
||||||
|
"/"
|
||||||
|
,path-segments)))
|
||||||
|
|
||||||
|
|
||||||
|
(define port (rx (* ,digit)))
|
||||||
|
|
||||||
|
(define IPv4address (rx (+ ,digit) "." (+ ,digit) "." (+ ,digit) "." (+ ,digit)))
|
||||||
|
|
||||||
|
(define toplabel (rx (:
|
||||||
|
(|
|
||||||
|
,alpha
|
||||||
|
(:
|
||||||
|
,alpha
|
||||||
|
(* (| ,alphanum "-"))
|
||||||
|
,alphanum)))))
|
||||||
|
|
||||||
|
(define domainlabel (rx (:
|
||||||
|
(|
|
||||||
|
,alphanum
|
||||||
|
(: ,alphanum
|
||||||
|
(* (| ,alphanum "-"))
|
||||||
|
,alphanum)))))
|
||||||
|
|
||||||
|
(define hostname (rx (:
|
||||||
|
(* (: ,domainlabel "."))
|
||||||
|
,toplabel
|
||||||
|
(? "."))))
|
||||||
|
|
||||||
|
(define host (rx (| ,hostname ,IPv4address)))
|
||||||
|
|
||||||
|
(define http_URL (rx (:
|
||||||
|
"http://"
|
||||||
|
(submatch
|
||||||
|
,host)
|
||||||
|
(?
|
||||||
|
(: ":" (submatch ,port)))
|
||||||
|
(?
|
||||||
|
(: (submatch ,abs_path)
|
||||||
|
(?
|
||||||
|
(: "?" (submatch ,query))))))))
|
||||||
|
|
||||||
|
(define http_URL_with_frag (rx (: bos ,@http_URL (? "#" ,fragment) eos)))
|
||||||
|
|
||||||
|
(define abs_path_with_frag (rx (: bos (submatch ,abs_path) (? "#" ,fragment) eos)))
|
||||||
|
|
||||||
|
(define Request-URI (rx (| ,@http_URL_with_frag ,@abs_path_with_frag)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;;parse HTTP 1.1 Request-URI
|
||||||
|
|
||||||
|
(define (parse-uri request-uri)
|
||||||
|
(cond
|
||||||
|
|
||||||
|
((regexp-search abs_path_with_frag request-uri)
|
||||||
|
=> (lambda (match)
|
||||||
|
(values #f #f (split-abs-path (match:substring match 1)) #f)))
|
||||||
|
|
||||||
|
((regexp-search http_URL_with_frag request-uri)
|
||||||
|
=>(lambda (match)
|
||||||
|
(let ((host (match:substring match 1))
|
||||||
|
(port (match:substring match 2))
|
||||||
|
(path (split-abs-path (match:substring match 3)))
|
||||||
|
(query (match:substring match 4)))
|
||||||
|
(values host port path query))))
|
||||||
|
|
||||||
|
(else
|
||||||
|
(fatal-syntax-error "Request-URI syntactically faulty"))))
|
||||||
|
|
||||||
|
|
||||||
|
;; split the string abs-path at slashes, return list of segments.
|
||||||
|
;; SPLIT-PATH assumes abs-path matches the RegExp abs_path, no checks are done.
|
||||||
|
|
||||||
|
;; minor remark: abs_path allows for strings containing several consecutive slashes;
|
||||||
|
;; SPLIT-ABS-PATH treats them as one slash.
|
||||||
|
|
||||||
|
(define (split-abs-path abs-path)
|
||||||
|
(regexp-fold-right
|
||||||
|
(rx (+ (~ ("/"))))
|
||||||
|
(lambda (match i res)
|
||||||
|
(cons (match:substring match 0) res))
|
||||||
|
'()
|
||||||
|
abs-path))
|
||||||
|
|
||||||
|
|
||||||
;;; Unresolved issues:
|
;;; Unresolved issues:
|
||||||
|
@ -115,6 +249,14 @@
|
||||||
(search http-url-search)
|
(search http-url-search)
|
||||||
(fragment-identifier http-url-fragment-identifier))
|
(fragment-identifier http-url-fragment-identifier))
|
||||||
|
|
||||||
|
;(define-new-record-type http-url :http-url
|
||||||
|
; (make-http-url hostname port path query)
|
||||||
|
; http-url?
|
||||||
|
; (hostname http-url-hostname)
|
||||||
|
; (port http-url-port)
|
||||||
|
; (path http-url-path)
|
||||||
|
; (query http-url-query))
|
||||||
|
|
||||||
;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
|
;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
|
||||||
;;; <scheme> : <path> ? <search> # <frag-id> <scheme>, <search>, and
|
;;; <scheme> : <path> ? <search> # <frag-id> <scheme>, <search>, and
|
||||||
;;; <frag-id> are strings; <path> is a non-empty string list -- the
|
;;; <frag-id> are strings; <path> is a non-empty string list -- the
|
||||||
|
|
Loading…
Reference in New Issue