;;; -*- Scheme -*- ;;; This file is part of the Scheme Untergrund Networking package. ;;; Copyright (c) 1995 by Olin Shivers. ;;; For copyright information, see the file COPYING which comes with ;;; the distribution. ;;; References: ;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; decode a URI ;;; walk over string s and unescape all occurrences of RegExp 'escaped' (see url.scm). ;;; Remark: ;;; we assume no non-ASCII characters occur in the URI; therefore the ;;; ascii table is used for conversion of the octet the hexnumber ;;; represents to a char. ;;; Caution: ;;; a URI must be separated into its components (for a HTTP-URL e.g. parsed by ;;; PARSE-URI) before the escaped characters within those components ;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI. (define (unescape s) (regexp-fold escaped (lambda (start-search match res) (let* ((start-match (match:start match)) (hexchar-low (string-ref s (+ start-match 2))) (hexchar-high (string-ref s (+ start-match 1))) (hex-low (hexchar->int hexchar-low)) (hex-high (hexchar->int hexchar-high)) (ascii (+ (* 16 hex-high) hex-low))) (string-append res (substring s start-search start-match) (string (ascii->char ascii))))) "" s (lambda (start-search res) (string-append res (substring s start-search (string-length s)))))) ; make use of the fact that numbers and characters are in order in the ascii table (define (hexchar->int c) (- (char->ascii c) (if (char-numeric? c) (char->ascii #\0) (- (if (char-upper-case? c) (char->ascii #\A) (char->ascii #\a)) 10)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; encode a URI: ;;; replace characters which are reserved or excluded by their escaped representation. ;;; Caution: ;;; Each component of a URI may have its own set of characters that are reserved, ;;; -> differentiate between components by writing specialized procedures ;;; (see url.scm for examples) ;;; Caution: ;;; don't encode an already encoded string; #\% chars would be escaped again. ;;; escape occurrences of RegExp regexp in string s (define (escape s regexp) (regexp-fold regexp (lambda (start-search match res) (let* ((start-match (match:start match)) (forbidden-char (string-ref s start-match))) (string-append res (substring s start-search start-match) (ascii->escaped (char->ascii forbidden-char))))) "" s (lambda (start-search res) (string-append res (substring s start-search (string-length s)))))) ;;;generate string representing hex-ascii-code for the decimal-ascii-code DEC-INT (define (ascii->escaped dec-int) (let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF)) (hex-int-low (bitwise-and dec-int #xF))) (string-append "%" (int->hexstring hex-int-high) (int->hexstring hex-int-low)))) (define int->hexstring (let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9" "A" "B" "C" "D" "E" "F"))) (lambda (i) (vector-ref table i))))