From 584bfa2cdb2efb08e73985d00564703422c3dcc4 Mon Sep 17 00:00:00 2001 From: vibr Date: Mon, 18 Oct 2004 17:35:40 +0000 Subject: [PATCH] *work around scsh bug (?) in definition of charset UNRESERVED *new procs for encoding URIs: -general proc ESCAPE taking an RegExp representing forbidden chars as argument -specialized procs ESCAPE-SEGMENT, ESCAPE-QUERY *new helper proc ASCII->ESCAPED *use ESCAPE-SEGMENT and ESCAPE-URI in HTTP-URL->URI-STRING --- scheme/lib/url.scm | 63 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/scheme/lib/url.scm b/scheme/lib/url.scm index 08cc001..19b5833 100644 --- a/scheme/lib/url.scm +++ b/scheme/lib/url.scm @@ -50,7 +50,7 @@ (define mark (rx ( "-_.!~*'()"))) -(define unreserved (rx (| ,alphanum ,mark))) +(define unreserved (rx (~ (~ (| ,alphanum ,mark))))) (define reserved (rx ( ";/?:@&=+$,"))) @@ -238,18 +238,17 @@ (path-string (fold-right (lambda (segment res) - (string-append "/" (escape-uri segment) res)) + (string-append "/" (escape-segment segment) res)) "" path)) (query (http-url-query http-url)) (query-string (if query - (string-append "?" (escape-uri query)) + (string-append "?" (escape-query query)) ""))) (string-append scheme-and-host-string port-string path-string query-string))) -;;TODO: separate procedures for encoding path and query @@ -262,7 +261,7 @@ ;;; represents to a char. ;;; Caution: -;;; a URI must be separated into its components (i.e. parsed by +;;; a URI must be separated into its components (e.g. parsed by ;;; PARSE-URI) before the escaped characters within those components ;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI. @@ -297,3 +296,57 @@ 10)))) +;;; encode URIs: +;;; replace characters which are reserved or excluded by their escaped representation. + +;;; Caution: +;;; Each component of a URI may have its own set of characters that are reserved, +;;; -> differentiate between components. + +;;; Caution: +;;; don't encode an already encoded string; #\% chars would be escaped again. + +;;; not allowed within component 'segment' in 'abs_path' +(define segment-reserved-and-excluded (rx (~ ,unreserved ,pchar-charset (";")))) + +;;; not allowed within component 'query' +(define query-reserved-and-excluded (rx (~ ,unreserved ,reserved ))) + +;;; encode 'abs_path' portion of a URI: +;;; use SPLIT-PATH to split abs_path into its segments, +;;; then apply UNESCAPE-SEGMENT to the segments. +(define (escape-segment segment) + (escape segment segment-reserved-and-excluded)) + +;;; encode 'query' portion of a URI +(define (escape-query query) + (escape query query-reserved-and-excluded)) + +(define (escape s regexp) + (regexp-fold + regexp + (lambda (start-search match res) + (let* ((start-match (match:start match)) + (forbidden-char (string-ref s start-match))) + (string-append + res + (substring s start-search start-match) + (ascii->escaped (char->ascii forbidden-char))))) + "" + s + (lambda (start-search res) + (string-append res (substring s start-search (string-length s)))))) + +(define (ascii->escaped dec-int) + (let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF)) + (hex-int-low (bitwise-and dec-int #xF))) + (string-append + "%" (int->hexstring hex-int-high) (int->hexstring hex-int-low)))) + +(define int->hexstring + (let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9" + "A" "B" "C" "D" "E" "F"))) + (lambda (i) (vector-ref table i)))) + + +