*new procedure UNESCAPE (unescape URI-components using RegExps)

*move HEXCHAR->INT from uri.scm to here
*use UNESCAPE in PARSED-URI->HTTP-URL
This commit is contained in:
vibr 2004-10-14 17:18:24 +00:00
parent 9e71b351d4
commit 44a8ef28be
1 changed files with 49 additions and 3 deletions

View File

@ -204,12 +204,11 @@
(define (parsed-uri->http-url host port path query)
(let ((portnumber (and port (string->number port)))
(decoded-path (map unescape-uri path))
(decoded-query (and query (unescape-uri query))))
(decoded-path (map unescape path))
(decoded-query (and query (unescape query))))
(make-http-url host portnumber decoded-path decoded-query)))
;;TODO: separate procedures for decoding path and query
;;really decode query here??
;;; parse a HTTP 1.1. Request_URI into a http-url record
@ -251,3 +250,50 @@
(string-append scheme-and-host-string port-string path-string query-string)))
;;TODO: separate procedures for encoding path and query
;;; decode a URI
;;; walk over string s and unescape all occurrences of RegExp 'escaped' (see above).
;;; Remark:
;;; we assume no non-ASCII characters occur in the URI; therefore the
;;; ascii table is used for conversion of the octet the hexnumber
;;; represents to a char.
;;; Caution:
;;; a URI must be separated into its components (i.e. parsed by
;;; PARSE-URI) before the escaped characters within those components
;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI.
(define (unescape s)
(regexp-fold
escaped
(lambda (start-search match res)
(let* ((start-match (match:start match))
(hexchar-low (string-ref s (+ start-match 2)))
(hexchar-high (string-ref s (+ start-match 1)))
(hex-low (hexchar->int hexchar-low))
(hex-high (hexchar->int hexchar-high))
(ascii (+ (* 16 hex-high) hex-low)))
(string-append
res
(substring s start-search start-match)
(string (ascii->char ascii)))))
""
s
(lambda (start-search res)
(string-append res (substring s start-search (string-length s))))))
; make use of the fact that numbers and characters are in order in the ascii table
(define (hexchar->int c)
(- (char->ascii c)
(if (char-numeric? c)
(char->ascii #\0)
(- (if (char-upper-case? c)
(char->ascii #\A)
(char->ascii #\a))
10))))