*move general procedures ESCAPE, UNESCAPE and their helper procs from
url.scm to uri.scm -->Parser/Unparser for HTTP 1.1 URIs is now complete and resides in url.scm; Encoder/Decoder applicable to URIs in general resides in uri.scm. (All has been rewritten from scratch, next to nothing of Olin's code is left). <--
This commit is contained in:
parent
584bfa2cdb
commit
ed1e4428c5
|
@ -5,3 +5,96 @@
|
||||||
;;; Copyright (c) 1995 by Olin Shivers.
|
;;; Copyright (c) 1995 by Olin Shivers.
|
||||||
;;; For copyright information, see the file COPYING which comes with
|
;;; For copyright information, see the file COPYING which comes with
|
||||||
;;; the distribution.
|
;;; the distribution.
|
||||||
|
|
||||||
|
;;; References:
|
||||||
|
;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
|
||||||
|
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; decode a URI
|
||||||
|
;;; walk over string s and unescape all occurrences of RegExp 'escaped' (see url.scm).
|
||||||
|
|
||||||
|
;;; Remark:
|
||||||
|
;;; we assume no non-ASCII characters occur in the URI; therefore the
|
||||||
|
;;; ascii table is used for conversion of the octet the hexnumber
|
||||||
|
;;; represents to a char.
|
||||||
|
|
||||||
|
;;; Caution:
|
||||||
|
;;; a URI must be separated into its components (for a HTTP-URL e.g. parsed by
|
||||||
|
;;; PARSE-URI) before the escaped characters within those components
|
||||||
|
;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI.
|
||||||
|
|
||||||
|
(define (unescape s)
|
||||||
|
(regexp-fold
|
||||||
|
escaped
|
||||||
|
(lambda (start-search match res)
|
||||||
|
(let* ((start-match (match:start match))
|
||||||
|
(hexchar-low (string-ref s (+ start-match 2)))
|
||||||
|
(hexchar-high (string-ref s (+ start-match 1)))
|
||||||
|
(hex-low (hexchar->int hexchar-low))
|
||||||
|
(hex-high (hexchar->int hexchar-high))
|
||||||
|
(ascii (+ (* 16 hex-high) hex-low)))
|
||||||
|
(string-append
|
||||||
|
res
|
||||||
|
(substring s start-search start-match)
|
||||||
|
(string (ascii->char ascii)))))
|
||||||
|
""
|
||||||
|
s
|
||||||
|
(lambda (start-search res)
|
||||||
|
(string-append res (substring s start-search (string-length s))))))
|
||||||
|
|
||||||
|
|
||||||
|
; make use of the fact that numbers and characters are in order in the ascii table
|
||||||
|
(define (hexchar->int c)
|
||||||
|
(- (char->ascii c)
|
||||||
|
(if (char-numeric? c)
|
||||||
|
(char->ascii #\0)
|
||||||
|
(- (if (char-upper-case? c)
|
||||||
|
(char->ascii #\A)
|
||||||
|
(char->ascii #\a))
|
||||||
|
10))))
|
||||||
|
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;; encode a URI:
|
||||||
|
;;; replace characters which are reserved or excluded by their escaped representation.
|
||||||
|
|
||||||
|
;;; Caution:
|
||||||
|
;;; Each component of a URI may have its own set of characters that are reserved,
|
||||||
|
;;; -> differentiate between components by writing specialized procedures
|
||||||
|
;;; (see url.scm for examples)
|
||||||
|
|
||||||
|
;;; Caution:
|
||||||
|
;;; don't encode an already encoded string; #\% chars would be escaped again.
|
||||||
|
|
||||||
|
|
||||||
|
;;; escape occurrences of RegExp regexp in string s
|
||||||
|
(define (escape s regexp)
|
||||||
|
(regexp-fold
|
||||||
|
regexp
|
||||||
|
(lambda (start-search match res)
|
||||||
|
(let* ((start-match (match:start match))
|
||||||
|
(forbidden-char (string-ref s start-match)))
|
||||||
|
(string-append
|
||||||
|
res
|
||||||
|
(substring s start-search start-match)
|
||||||
|
(ascii->escaped (char->ascii forbidden-char)))))
|
||||||
|
""
|
||||||
|
s
|
||||||
|
(lambda (start-search res)
|
||||||
|
(string-append res (substring s start-search (string-length s))))))
|
||||||
|
|
||||||
|
;;;generate string representing hex-ascii-code for the decimal-ascii-code DEC-INT
|
||||||
|
(define (ascii->escaped dec-int)
|
||||||
|
(let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF))
|
||||||
|
(hex-int-low (bitwise-and dec-int #xF)))
|
||||||
|
(string-append
|
||||||
|
"%" (int->hexstring hex-int-high) (int->hexstring hex-int-low))))
|
||||||
|
|
||||||
|
(define int->hexstring
|
||||||
|
(let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"
|
||||||
|
"A" "B" "C" "D" "E" "F")))
|
||||||
|
(lambda (i) (vector-ref table i))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -252,60 +252,14 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;;; decode a URI
|
;;; decoding and encoding Request-URIs:
|
||||||
;;; walk over string s and unescape all occurrences of RegExp 'escaped' (see above).
|
|
||||||
|
|
||||||
;;; Remark:
|
;;; to decode Request-URIs use UNESCAPE from uri.scm
|
||||||
;;; we assume no non-ASCII characters occur in the URI; therefore the
|
|
||||||
;;; ascii table is used for conversion of the octet the hexnumber
|
|
||||||
;;; represents to a char.
|
|
||||||
|
|
||||||
;;; Caution:
|
;;; encode Request-URIs:
|
||||||
;;; a URI must be separated into its components (e.g. parsed by
|
|
||||||
;;; PARSE-URI) before the escaped characters within those components
|
|
||||||
;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI.
|
|
||||||
|
|
||||||
(define (unescape s)
|
|
||||||
(regexp-fold
|
|
||||||
escaped
|
|
||||||
(lambda (start-search match res)
|
|
||||||
(let* ((start-match (match:start match))
|
|
||||||
(hexchar-low (string-ref s (+ start-match 2)))
|
|
||||||
(hexchar-high (string-ref s (+ start-match 1)))
|
|
||||||
(hex-low (hexchar->int hexchar-low))
|
|
||||||
(hex-high (hexchar->int hexchar-high))
|
|
||||||
(ascii (+ (* 16 hex-high) hex-low)))
|
|
||||||
(string-append
|
|
||||||
res
|
|
||||||
(substring s start-search start-match)
|
|
||||||
(string (ascii->char ascii)))))
|
|
||||||
""
|
|
||||||
s
|
|
||||||
(lambda (start-search res)
|
|
||||||
(string-append res (substring s start-search (string-length s))))))
|
|
||||||
|
|
||||||
|
|
||||||
; make use of the fact that numbers and characters are in order in the ascii table
|
|
||||||
(define (hexchar->int c)
|
|
||||||
(- (char->ascii c)
|
|
||||||
(if (char-numeric? c)
|
|
||||||
(char->ascii #\0)
|
|
||||||
(- (if (char-upper-case? c)
|
|
||||||
(char->ascii #\A)
|
|
||||||
(char->ascii #\a))
|
|
||||||
10))))
|
|
||||||
|
|
||||||
|
|
||||||
;;; encode URIs:
|
|
||||||
;;; replace characters which are reserved or excluded by their escaped representation.
|
|
||||||
|
|
||||||
;;; Caution:
|
|
||||||
;;; Each component of a URI may have its own set of characters that are reserved,
|
;;; Each component of a URI may have its own set of characters that are reserved,
|
||||||
;;; -> differentiate between components.
|
;;; -> differentiate between components.
|
||||||
|
|
||||||
;;; Caution:
|
|
||||||
;;; don't encode an already encoded string; #\% chars would be escaped again.
|
|
||||||
|
|
||||||
;;; not allowed within component 'segment' in 'abs_path'
|
;;; not allowed within component 'segment' in 'abs_path'
|
||||||
(define segment-reserved-and-excluded (rx (~ ,unreserved ,pchar-charset (";"))))
|
(define segment-reserved-and-excluded (rx (~ ,unreserved ,pchar-charset (";"))))
|
||||||
|
|
||||||
|
@ -322,31 +276,3 @@
|
||||||
(define (escape-query query)
|
(define (escape-query query)
|
||||||
(escape query query-reserved-and-excluded))
|
(escape query query-reserved-and-excluded))
|
||||||
|
|
||||||
(define (escape s regexp)
|
|
||||||
(regexp-fold
|
|
||||||
regexp
|
|
||||||
(lambda (start-search match res)
|
|
||||||
(let* ((start-match (match:start match))
|
|
||||||
(forbidden-char (string-ref s start-match)))
|
|
||||||
(string-append
|
|
||||||
res
|
|
||||||
(substring s start-search start-match)
|
|
||||||
(ascii->escaped (char->ascii forbidden-char)))))
|
|
||||||
""
|
|
||||||
s
|
|
||||||
(lambda (start-search res)
|
|
||||||
(string-append res (substring s start-search (string-length s))))))
|
|
||||||
|
|
||||||
(define (ascii->escaped dec-int)
|
|
||||||
(let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF))
|
|
||||||
(hex-int-low (bitwise-and dec-int #xF)))
|
|
||||||
(string-append
|
|
||||||
"%" (int->hexstring hex-int-high) (int->hexstring hex-int-low))))
|
|
||||||
|
|
||||||
(define int->hexstring
|
|
||||||
(let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"
|
|
||||||
"A" "B" "C" "D" "E" "F")))
|
|
||||||
(lambda (i) (vector-ref table i))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue