*remove ESCAPE-URI (didn't reliably differentiate between different portions of

a URI)
*move INT->HEXCHAR to url.scm
This commit is contained in:
vibr 2004-10-18 16:37:32 +00:00
parent 44a8ef28be
commit c48446ba7f
1 changed files with 0 additions and 107 deletions

View File

@ -5,110 +5,3 @@
;;; Copyright (c) 1995 by Olin Shivers.
;;; For copyright information, see the file COPYING which comes with
;;; the distribution.
;;; URI syntax -- [scheme] : path [? search ] [# fragmentid]
;;; References:
;;; - http://www.w3.org/Addressing/rfc1630.txt
;;; Original RFC
;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/URI_Overview.html
;;; General Web page of URI pointers.
(define int->hexchar
(let ((table '#(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
#\A #\B #\C #\D #\E #\F)))
(lambda (i) (vector-ref table i))))
;;; Caution:
;;; All reserved chars (e.g., slash, sharp, colon) get escaped: "=;/#?: "
;;; So don't apply this proc to chunks of text with syntactically meaningful
;;; reserved chars (e.g., paths with URI slashes or colons) -- they'll be
;;; escaped, and lose their special meaning. E.g. it would be a mistake
;;; to apply ESCAPE-URI to "//lcs.mit.edu:8001/foo/bar.html" because the
;;; slashes and colons would be escaped.
(define uri-escaped-chars
(char-set-complement
;; RFC 2396 (URI Generic Syntax) specifies unreserved = alphanum | mark
(char-set-union char-set:letter+digit
(string->char-set "-_.!~*'()"))))
;;; Takes a set of chars to escape. This is because we sometimes need to
;;; escape larger sets of chars for different parts of a URI.
(define (escape-uri s . maybe-escaped-chars)
(let-optionals maybe-escaped-chars ((escaped-chars uri-escaped-chars))
(let ((nlen (string-fold
(lambda (c i)
(+ i
(if (char-set-contains? escaped-chars c)
3
1)))
0
s))) ; new length of escaped string
(if (= nlen (string-length s))
s
(let ((ns (make-string nlen)))
(string-fold
(lambda (c i) ; replace each occurance of an
; character to escape with %ff where ff
; is the ascii-code in hexadecimal
; notation
(+ i (cond
((char-set-contains? escaped-chars c)
(string-set! ns i #\%)
(let* ((d (char->ascii c))
(dhi (bitwise-and (arithmetic-shift d -4) #xF))
(dlo (bitwise-and d #xF)))
(string-set! ns (+ i 1)
(int->hexchar dhi))
(string-set! ns (+ i 2)
(int->hexchar dlo)))
3)
(else (string-set! ns i c)
1))))
0
s)
ns)))))
;;; Cribbed from scsh's fname.scm
(define (split-uri uri start end) ; Split at /'s (infix grammar).
(let split ((i start)) ; "" -> ("")
(cond
((>= i end) '(""))
((string-index uri #\/ i) =>
(lambda (slash)
(cons (substring uri i slash)
(split (+ slash 1)))))
(else (list (substring uri i end))))))
;;; The elements of PLIST must be escaped in case they contain slashes.
;;; This procedure doesn't escape them for you; you must do that yourself:
;;; (uri-path->uri (map escape-uri pathlist))
(define (uri-path->uri plist)
(string-join plist "/")) ; Insert slashes between elts of PLIST.
(define (simplify-uri-path p)
(if (null? p)
#f ; P must be non-null
(let lp ((path-list (cdr p))
(stack (list (car p))))
(if (null? path-list) ; we're done
(reverse stack)
(cond
((string=? (car path-list) "..") ; back up
; neither the empty path nor root
(if (not (or (null? stack) (string=? (car stack) "")))
(lp (cdr path-list) (cdr stack))
#f))
((string=? (car path-list) ".") ; leave this
(lp (cdr path-list) stack))
((string=? (car path-list) "") ; back to root
(lp (cdr path-list) '("")))
(else ; usual segment
(lp (cdr path-list) (cons (car path-list) stack))))))))