From ed1e4428c5c6c1995395366b6506e57d7ecff488 Mon Sep 17 00:00:00 2001 From: vibr Date: Mon, 18 Oct 2004 18:23:03 +0000 Subject: [PATCH] *move general procedures ESCAPE, UNESCAPE and their helper procs from url.scm to uri.scm -->Parser/Unparser for HTTP 1.1 URIs is now complete and resides in url.scm; Encoder/Decoder applicable to URIs in general resides in uri.scm. (All has been rewritten from scratch, next to nothing of Olin's code is left). <-- --- scheme/lib/uri.scm | 93 ++++++++++++++++++++++++++++++++++++++++++++++ scheme/lib/url.scm | 80 ++------------------------------------- 2 files changed, 96 insertions(+), 77 deletions(-) diff --git a/scheme/lib/uri.scm b/scheme/lib/uri.scm index 4ec9834..12b24d3 100644 --- a/scheme/lib/uri.scm +++ b/scheme/lib/uri.scm @@ -5,3 +5,96 @@ ;;; Copyright (c) 1995 by Olin Shivers. ;;; For copyright information, see the file COPYING which comes with ;;; the distribution. + +;;; References: +;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; decode a URI +;;; walk over string s and unescape all occurrences of RegExp 'escaped' (see url.scm). + +;;; Remark: +;;; we assume no non-ASCII characters occur in the URI; therefore the +;;; ascii table is used for conversion of the octet the hexnumber +;;; represents to a char. + +;;; Caution: +;;; a URI must be separated into its components (for a HTTP-URL e.g. parsed by +;;; PARSE-URI) before the escaped characters within those components +;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI. + +(define (unescape s) + (regexp-fold + escaped + (lambda (start-search match res) + (let* ((start-match (match:start match)) + (hexchar-low (string-ref s (+ start-match 2))) + (hexchar-high (string-ref s (+ start-match 1))) + (hex-low (hexchar->int hexchar-low)) + (hex-high (hexchar->int hexchar-high)) + (ascii (+ (* 16 hex-high) hex-low))) + (string-append + res + (substring s start-search start-match) + (string (ascii->char ascii))))) + "" + s + (lambda (start-search res) + (string-append res (substring s start-search (string-length s)))))) + + +; make use of the fact that numbers and characters are in order in the ascii table +(define (hexchar->int c) + (- (char->ascii c) + (if (char-numeric? c) + (char->ascii #\0) + (- (if (char-upper-case? c) + (char->ascii #\A) + (char->ascii #\a)) + 10)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; encode a URI: +;;; replace characters which are reserved or excluded by their escaped representation. + +;;; Caution: +;;; Each component of a URI may have its own set of characters that are reserved, +;;; -> differentiate between components by writing specialized procedures +;;; (see url.scm for examples) + +;;; Caution: +;;; don't encode an already encoded string; #\% chars would be escaped again. + + +;;; escape occurrences of RegExp regexp in string s +(define (escape s regexp) + (regexp-fold + regexp + (lambda (start-search match res) + (let* ((start-match (match:start match)) + (forbidden-char (string-ref s start-match))) + (string-append + res + (substring s start-search start-match) + (ascii->escaped (char->ascii forbidden-char))))) + "" + s + (lambda (start-search res) + (string-append res (substring s start-search (string-length s)))))) + +;;;generate string representing hex-ascii-code for the decimal-ascii-code DEC-INT +(define (ascii->escaped dec-int) + (let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF)) + (hex-int-low (bitwise-and dec-int #xF))) + (string-append + "%" (int->hexstring hex-int-high) (int->hexstring hex-int-low)))) + +(define int->hexstring + (let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9" + "A" "B" "C" "D" "E" "F"))) + (lambda (i) (vector-ref table i)))) + + + diff --git a/scheme/lib/url.scm b/scheme/lib/url.scm index 19b5833..cfd2b64 100644 --- a/scheme/lib/url.scm +++ b/scheme/lib/url.scm @@ -252,60 +252,14 @@ -;;; decode a URI -;;; walk over string s and unescape all occurrences of RegExp 'escaped' (see above). +;;; decoding and encoding Request-URIs: -;;; Remark: -;;; we assume no non-ASCII characters occur in the URI; therefore the -;;; ascii table is used for conversion of the octet the hexnumber -;;; represents to a char. +;;; to decode Request-URIs use UNESCAPE from uri.scm -;;; Caution: -;;; a URI must be separated into its components (e.g. parsed by -;;; PARSE-URI) before the escaped characters within those components -;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI. - -(define (unescape s) - (regexp-fold - escaped - (lambda (start-search match res) - (let* ((start-match (match:start match)) - (hexchar-low (string-ref s (+ start-match 2))) - (hexchar-high (string-ref s (+ start-match 1))) - (hex-low (hexchar->int hexchar-low)) - (hex-high (hexchar->int hexchar-high)) - (ascii (+ (* 16 hex-high) hex-low))) - (string-append - res - (substring s start-search start-match) - (string (ascii->char ascii))))) - "" - s - (lambda (start-search res) - (string-append res (substring s start-search (string-length s)))))) - - -; make use of the fact that numbers and characters are in order in the ascii table -(define (hexchar->int c) - (- (char->ascii c) - (if (char-numeric? c) - (char->ascii #\0) - (- (if (char-upper-case? c) - (char->ascii #\A) - (char->ascii #\a)) - 10)))) - - -;;; encode URIs: -;;; replace characters which are reserved or excluded by their escaped representation. - -;;; Caution: +;;; encode Request-URIs: ;;; Each component of a URI may have its own set of characters that are reserved, ;;; -> differentiate between components. -;;; Caution: -;;; don't encode an already encoded string; #\% chars would be escaped again. - ;;; not allowed within component 'segment' in 'abs_path' (define segment-reserved-and-excluded (rx (~ ,unreserved ,pchar-charset (";")))) @@ -322,31 +276,3 @@ (define (escape-query query) (escape query query-reserved-and-excluded)) -(define (escape s regexp) - (regexp-fold - regexp - (lambda (start-search match res) - (let* ((start-match (match:start match)) - (forbidden-char (string-ref s start-match))) - (string-append - res - (substring s start-search start-match) - (ascii->escaped (char->ascii forbidden-char))))) - "" - s - (lambda (start-search res) - (string-append res (substring s start-search (string-length s)))))) - -(define (ascii->escaped dec-int) - (let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF)) - (hex-int-low (bitwise-and dec-int #xF))) - (string-append - "%" (int->hexstring hex-int-high) (int->hexstring hex-int-low)))) - -(define int->hexstring - (let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9" - "A" "B" "C" "D" "E" "F"))) - (lambda (i) (vector-ref table i)))) - - -