sunet/scheme/lib/uri.scm

101 lines
3.1 KiB
Scheme
Raw Normal View History

2002-06-08 11:07:01 -04:00
;;; -*- Scheme -*-
2002-08-27 05:03:22 -04:00
;;; This file is part of the Scheme Untergrund Networking package.
2002-06-08 11:07:01 -04:00
;;; Copyright (c) 1995 by Olin Shivers.
2002-08-27 05:03:22 -04:00
;;; For copyright information, see the file COPYING which comes with
;;; the distribution.
;;; References:
;;; RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; decode a URI
;;; walk over string s and unescape all occurrences of RegExp 'escaped' (see url.scm).
;;; Remark:
;;; we assume no non-ASCII characters occur in the URI; therefore the
;;; ascii table is used for conversion of the octet the hexnumber
;;; represents to a char.
;;; Caution:
;;; a URI must be separated into its components (for a HTTP-URL e.g. parsed by
;;; PARSE-URI) before the escaped characters within those components
;;; can be safely decoded. Don't use UNESCAPE on an unparsed URI.
(define (unescape s)
(regexp-fold
escaped
(lambda (start-search match res)
(let* ((start-match (match:start match))
(hexchar-low (string-ref s (+ start-match 2)))
(hexchar-high (string-ref s (+ start-match 1)))
(hex-low (hexchar->int hexchar-low))
(hex-high (hexchar->int hexchar-high))
(ascii (+ (* 16 hex-high) hex-low)))
(string-append
res
(substring s start-search start-match)
(string (ascii->char ascii)))))
""
s
(lambda (start-search res)
(string-append res (substring s start-search (string-length s))))))
; make use of the fact that numbers and characters are in order in the ascii table
(define (hexchar->int c)
(- (char->ascii c)
(if (char-numeric? c)
(char->ascii #\0)
(- (if (char-upper-case? c)
(char->ascii #\A)
(char->ascii #\a))
10))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; encode a URI:
;;; replace characters which are reserved or excluded by their escaped representation.
;;; Caution:
;;; Each component of a URI may have its own set of characters that are reserved,
;;; -> differentiate between components by writing specialized procedures
;;; (see url.scm for examples)
;;; Caution:
;;; don't encode an already encoded string; #\% chars would be escaped again.
;;; escape occurrences of RegExp regexp in string s
(define (escape s regexp)
(regexp-fold
regexp
(lambda (start-search match res)
(let* ((start-match (match:start match))
(forbidden-char (string-ref s start-match)))
(string-append
res
(substring s start-search start-match)
(ascii->escaped (char->ascii forbidden-char)))))
""
s
(lambda (start-search res)
(string-append res (substring s start-search (string-length s))))))
;;;generate string representing hex-ascii-code for the decimal-ascii-code DEC-INT
(define (ascii->escaped dec-int)
(let* ((hex-int-high (bitwise-and (arithmetic-shift dec-int -4) #xF))
(hex-int-low (bitwise-and dec-int #xF)))
(string-append
"%" (int->hexstring hex-int-high) (int->hexstring hex-int-low))))
(define int->hexstring
(let ((table '#("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"
"A" "B" "C" "D" "E" "F")))
(lambda (i) (vector-ref table i))))