parse HTTP 1.1 URIs:
* add RegExps * add proc PARSE-URI * add proc SPLIT-ABS-PATH
This commit is contained in:
parent
d9950a9b0b
commit
a1e79c4fc7
|
@ -1,18 +1,152 @@
|
|||
;;; URL parsing and unparsing -*- Scheme -*-
|
||||
|
||||
;;; This file is part of the Scheme Untergrund Networking package.
|
||||
;;; HTTP 1.1 Request-URI parsing and unparsing -*- Scheme -*-
|
||||
|
||||
;;; Copyright (c) 1995 by Olin Shivers.
|
||||
;;; For copyright information, see the file COPYING which comes with
|
||||
;;; the distribution.
|
||||
|
||||
;;; I'm only implementing HTTP URL's right now.
|
||||
|
||||
;;; References:
|
||||
;;; - http://www.w3.org/Addressing/rfc1738.txt
|
||||
;;; Original RFC
|
||||
;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/Overview.html
|
||||
;;; General Web page of URI pointers.
|
||||
;;; RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1
|
||||
;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
|
||||
|
||||
;;; HTTP 1.1 Request-URIS are of the form
|
||||
;;; Request-URI = "*" | absoluteURI | abs_path | authority
|
||||
;;;
|
||||
;;; We implement only the subset
|
||||
;;; Request-URI = ( http_URL | abs_path) ["#" fragment]
|
||||
|
||||
;;; where http_URL is a subset of absoluteURI
|
||||
|
||||
;;; and [ "#" fragment ] is allowed even though
|
||||
;;; RFC 2616 disallowes the #fragment part
|
||||
;;; (while RFC 1945 for HTTP/1.0 allowed it).
|
||||
;;; (This is for compatibility with buggy clients).
|
||||
|
||||
|
||||
|
||||
|
||||
;;; RexExps for Request-URIs as scsh SREs
|
||||
;;; stick to RFC terminology throughout
|
||||
|
||||
(define digit (rx numeric))
|
||||
|
||||
(define alpha (rx alphanum))
|
||||
|
||||
(define alphanum (rx alphanumeric))
|
||||
|
||||
(define hex (rx hex-digit))
|
||||
|
||||
(define escaped (rx "%" ,hex ,hex))
|
||||
|
||||
(define mark (rx ( "-_.!~*'()")))
|
||||
|
||||
(define unreserved (rx (| ,alphanum ,mark)))
|
||||
|
||||
(define reserved (rx ( ";/?:@&=+$,")))
|
||||
|
||||
(define uric (rx (| ,reserved ,unreserved ,escaped)))
|
||||
|
||||
(define fragment (rx (* ,uric)))
|
||||
|
||||
(define query (rx (* ,uric)))
|
||||
|
||||
(define pchar-charset (rx ( ":@&=+$,")))
|
||||
|
||||
(define pchar (rx (| ,unreserved ,escaped ,pchar-charset)))
|
||||
|
||||
(define param (rx (* ,pchar)))
|
||||
|
||||
(define segment (rx (:
|
||||
(* ,pchar)
|
||||
(* (: ";" ,param)))))
|
||||
|
||||
(define path-segments (rx (:
|
||||
,segment
|
||||
(* (: "/" ,segment)))))
|
||||
|
||||
(define abs_path (rx (:
|
||||
"/"
|
||||
,path-segments)))
|
||||
|
||||
|
||||
(define port (rx (* ,digit)))
|
||||
|
||||
(define IPv4address (rx (+ ,digit) "." (+ ,digit) "." (+ ,digit) "." (+ ,digit)))
|
||||
|
||||
(define toplabel (rx (:
|
||||
(|
|
||||
,alpha
|
||||
(:
|
||||
,alpha
|
||||
(* (| ,alphanum "-"))
|
||||
,alphanum)))))
|
||||
|
||||
(define domainlabel (rx (:
|
||||
(|
|
||||
,alphanum
|
||||
(: ,alphanum
|
||||
(* (| ,alphanum "-"))
|
||||
,alphanum)))))
|
||||
|
||||
(define hostname (rx (:
|
||||
(* (: ,domainlabel "."))
|
||||
,toplabel
|
||||
(? "."))))
|
||||
|
||||
(define host (rx (| ,hostname ,IPv4address)))
|
||||
|
||||
(define http_URL (rx (:
|
||||
"http://"
|
||||
(submatch
|
||||
,host)
|
||||
(?
|
||||
(: ":" (submatch ,port)))
|
||||
(?
|
||||
(: (submatch ,abs_path)
|
||||
(?
|
||||
(: "?" (submatch ,query))))))))
|
||||
|
||||
(define http_URL_with_frag (rx (: bos ,@http_URL (? "#" ,fragment) eos)))
|
||||
|
||||
(define abs_path_with_frag (rx (: bos (submatch ,abs_path) (? "#" ,fragment) eos)))
|
||||
|
||||
(define Request-URI (rx (| ,@http_URL_with_frag ,@abs_path_with_frag)))
|
||||
|
||||
|
||||
|
||||
;;parse HTTP 1.1 Request-URI
|
||||
|
||||
(define (parse-uri request-uri)
|
||||
(cond
|
||||
|
||||
((regexp-search abs_path_with_frag request-uri)
|
||||
=> (lambda (match)
|
||||
(values #f #f (split-abs-path (match:substring match 1)) #f)))
|
||||
|
||||
((regexp-search http_URL_with_frag request-uri)
|
||||
=>(lambda (match)
|
||||
(let ((host (match:substring match 1))
|
||||
(port (match:substring match 2))
|
||||
(path (split-abs-path (match:substring match 3)))
|
||||
(query (match:substring match 4)))
|
||||
(values host port path query))))
|
||||
|
||||
(else
|
||||
(fatal-syntax-error "Request-URI syntactically faulty"))))
|
||||
|
||||
|
||||
;; split the string abs-path at slashes, return list of segments.
|
||||
;; SPLIT-PATH assumes abs-path matches the RegExp abs_path, no checks are done.
|
||||
|
||||
;; minor remark: abs_path allows for strings containing several consecutive slashes;
|
||||
;; SPLIT-ABS-PATH treats them as one slash.
|
||||
|
||||
(define (split-abs-path abs-path)
|
||||
(regexp-fold-right
|
||||
(rx (+ (~ ("/"))))
|
||||
(lambda (match i res)
|
||||
(cons (match:substring match 0) res))
|
||||
'()
|
||||
abs-path))
|
||||
|
||||
|
||||
;;; Unresolved issues:
|
||||
|
@ -115,6 +249,14 @@
|
|||
(search http-url-search)
|
||||
(fragment-identifier http-url-fragment-identifier))
|
||||
|
||||
;(define-new-record-type http-url :http-url
|
||||
; (make-http-url hostname port path query)
|
||||
; http-url?
|
||||
; (hostname http-url-hostname)
|
||||
; (port http-url-port)
|
||||
; (path http-url-path)
|
||||
; (query http-url-query))
|
||||
|
||||
;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
|
||||
;;; <scheme> : <path> ? <search> # <frag-id> <scheme>, <search>, and
|
||||
;;; <frag-id> are strings; <path> is a non-empty string list -- the
|
||||
|
|
Loading…
Reference in New Issue