*work around scsh bug (?) in definition of charset UNRESERVED

*new procs for encoding URIs:
-general proc ESCAPE taking an RegExp representing forbidden chars as argument
-specialized procs ESCAPE-SEGMENT, ESCAPE-QUERY
*new helper proc ASCII->ESCAPED
*use ESCAPE-SEGMENT and ESCAPE-URI in HTTP-URL->URI-STRING
This commit is contained in:
vibr 2004-10-18 17:35:40 +00:00
parent c48446ba7f
commit 584bfa2cdb
1 changed files with 58 additions and 5 deletions

View File

@ -50,7 +50,7 @@
(define mark (rx ( "-_.!~*'()"))) (define mark (rx ( "-_.!~*'()")))
(define unreserved (rx (| ,alphanum ,mark))) (define unreserved (rx (~ (~ (| ,alphanum ,mark)))))
(define reserved (rx ( ";/?:@&=+$,"))) (define reserved (rx ( ";/?:@&=+$,")))
@ -238,18 +238,17 @@
(path-string (path-string
(fold-right (fold-right
(lambda (segment res) (lambda (segment res)
(string-append "/" (escape-uri segment) res)) (string-append "/" (escape-segment segment) res))
"" ""
path)) path))
(query (http-url-query http-url)) (query (http-url-query http-url))
(query-string (if query (query-string (if query
(string-append "?" (escape-uri query)) (string-append "?" (escape-query query))
""))) "")))
(string-append scheme-and-host-string port-string path-string query-string))) (string-append scheme-and-host-string port-string path-string query-string)))
;;TODO: separate procedures for encoding path and query
@ -262,7 +261,7 @@
;;; represents to a char. ;;; represents to a char.
;;; Caution: ;;; Caution:
;;; a URI must be separated into its components (i.e. parsed by ;;; a URI must be separated into its components (e.g. parsed by
;;; PARSE-URI) before the escaped characters within those components ;;; PARSE-URI) before the escaped characters within those components
;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI. ;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI.
@ -297,3 +296,57 @@
10)))) 10))))
;;; encode URIs:
;;; replace characters which are reserved or excluded by their escaped representation.
;;; Caution:
;;; Each component of a URI may have its own set of characters that are reserved,
;;; -> differentiate between components.
;;; Caution:
;;; don't encode an already encoded string; #\% chars would be escaped again.
;;; not allowed within component 'segment' in 'abs_path'
(define segment-reserved-and-excluded (rx (~ ,unreserved ,pchar-charset (";"))))
;;; not allowed within component 'query'
(define query-reserved-and-excluded (rx (~ ,unreserved ,reserved )))
;;; encode 'abs_path' portion of a URI:
;;; use SPLIT-PATH to split abs_path into its segments,
;;; then apply UNESCAPE-SEGMENT to the segments.
(define (escape-segment segment)
(escape segment segment-reserved-and-excluded))
;;; encode 'query' portion of a URI
(define (escape-query query)
(escape query query-reserved-and-excluded))
(define (escape s regexp)
(regexp-fold
regexp
(lambda (start-search match res)
(let* ((start-match (match:start match))
(forbidden-char (string-ref s start-match)))
(string-append
res
(substring s start-search start-match)
(ascii->escaped (char->ascii forbidden-char)))))
""
s
(lambda (start-search res)
(string-append res (substring s start-search (string-length s))))))
(define (ascii->escaped dec-int)
(let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF))
(hex-int-low (bitwise-and dec-int #xF)))
(string-append
"%" (int->hexstring hex-int-high) (int->hexstring hex-int-low))))
(define int->hexstring
(let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"
"A" "B" "C" "D" "E" "F")))
(lambda (i) (vector-ref table i))))