PARSE-URI modified: search-part is now correctly recognized, no matter frag is given or not

This commit is contained in:
interp 2001-07-16 11:31:41 +00:00
parent a90dfae496
commit cd5e03ec9a
1 changed files with 10 additions and 7 deletions

17
uri.scm
View File

@ -44,18 +44,17 @@
;;; If it's a question-mark, then that's the <search> part -- remove it.
;;; - What's left is the path. Split at slashes. "" -> ("")
;;;
;;; This scheme is tolerant of the various ways people build broken URI's
;;; out there on the Net. It was given to me by Dan Connolly of the W3C.
;;; This scheme is tolerant of the various ways people build broken
;;; URI's out there on the Net , p.e. \#= is a reserved character, but
;;; used unescaped in the search-part. It was given to me by Dan
;;; Connolly of the W3C and slightly modified.
;;; Returns four values: scheme, path, search, frag-id. Each value is
;;; either #f or a string except of the path, which is a nonempty list
;;; of string (as mentioned above).
;;; MG: I think including = here will break up things, since it may be
;;; part of the search string, preventing the ? to be found (+ and &
;;; are excluded anyway).
(define uri-reserved (string->char-set ";/#?: "))
(define uri-reserved (string->char-set ";/#?: ="))
(define (parse-uri s)
(let* ((slen (string-length s))
@ -69,7 +68,11 @@
(sharp (and rs-last (char=? (string-ref s rs-last) #\#) rs-last))
;; Search backwards for ? (or intervening reserved char).
(rs-penult (if sharp (char-set-rindex s uri-reserved sharp) rs-last))
;; (NB: #\= may be after #\? and before #\#)
(rs-penult (char-set-rindex
s
(char-set-delete uri-reserved #\=)
(or sharp slen)))
(ques (and rs-penult (char=? (string-ref s rs-penult) #\?) rs-penult))
(path-end (or ques sharp slen)))