documentation added

uri.scm fixed:
 - uri-escaped-chars contains the characters to escape
 - escape-uri really uses optional argument
 - some files changed so they can use new char-set-lib instead of char-set-package
This commit is contained in:
interp 2001-05-17 16:48:41 +00:00
parent 2ff53cca7d
commit 5a15d24738
5 changed files with 49 additions and 33 deletions

View File

@ -253,7 +253,7 @@
;;; but I hand-coded it because it's short, and I didn't want invoke the ;;; but I hand-coded it because it's short, and I didn't want invoke the
;;; regexp machinery for something so simple. ;;; regexp machinery for something so simple.
(define non-whitespace (char-set-invert char-set:whitespace)) (define non-whitespace (char-set-complement char-set:whitespace))
(define (string->words s) (define (string->words s)
(let recur ((start 0)) (let recur ((start 0))

View File

@ -87,7 +87,7 @@
string-prefix? string-prefix?
string-suffix? string-suffix?
trim-spaces) trim-spaces)
(open char-set-package let-opt scheme) (open char-set-lib let-opt scheme)
(files stringhax)) (files stringhax))
(define-structure uri-package (export parse-uri (define-structure uri-package (export parse-uri
@ -105,7 +105,7 @@
condhax condhax
ascii ascii
strings strings
char-set-package char-set-lib
bitwise bitwise
field-reader-package field-reader-package
scheme) scheme)
@ -147,7 +147,7 @@
receiving receiving
condhax condhax
string-lib string-lib
char-set-package char-set-lib
uri-package uri-package
scsh-utilities scsh-utilities
httpd-error httpd-error
@ -241,7 +241,7 @@
switch-syntax switch-syntax
condhax condhax
strings strings
char-set-package char-set-lib
defrec-package defrec-package
define-record-types define-record-types
handle handle

View File

@ -73,16 +73,20 @@
;;; printing these field names out, it looks best if you capitalise ;;; printing these field names out, it looks best if you capitalise
;;; them with (CAPITALIZE-STRING (SYMBOL->STRING FIELD-NAME)). ;;; them with (CAPITALIZE-STRING (SYMBOL->STRING FIELD-NAME)).
;;; - BODY List of strings which are the field's body, e.g. ;;; - BODY List of strings which are the field's body, e.g.
;;; ("shivers@lcs.mit.edu"). Each list element is one line from ;;; ("shivers@lcs.mit.edu"). Each list element is one line
;;; the field's body, so if the field spreads out over three lines, ;;; from the field's body, so if the field spreads out
;;; then the body is a list of three strings. The terminating ;;; over three lines, then the body is a list of three
;;; cr/lf's are trimmed from each string. ;;; strings. The terminating cr/lf's are trimmed from each
;;; string. A leading space or a leading horizontal tab
;;; is also trimmed, but one and only one.
;;; When there are no more fields -- EOF or a blank line has terminated the ;;; When there are no more fields -- EOF or a blank line has terminated the
;;; header section -- then the procedure returns [#f #f]. ;;; header section -- then the procedure returns [#f #f].
;;; ;;;
;;; The %READ-RFC822-FIELD variant allows you to specify your own read-line ;;; The %READ-RFC822-FIELD variant allows you to specify your own
;;; procedure. The one used by READ-RFC822-FIELD terminates lines with either ;;; read-line procedure. The one used by READ-RFC822-FIELD terminates
;;; cr/lf or just lf, and it trims the terminator from the line. ;;; lines with either cr/lf or just lf, and it trims the terminator
;;; from the line. Your read-line procedure should trim the terminator
;;; of a line so an empty line is returned just as an empty string.
(define htab (ascii->char 9)) (define htab (ascii->char 9))
@ -113,6 +117,7 @@
(+ colon 1) (+ colon 1)
(string-length line1))))) (string-length line1)))))
(let ((c (peek-char port))) ; Could return EOF. (let ((c (peek-char port))) ; Could return EOF.
;;; RFC822: continuous lines has to start with a space or a htab
(if (or (eqv? c #\space) (eqv? c htab)) (if (or (eqv? c #\space) (eqv? c htab))
(lp (cons (read-line port) lines)) (lp (cons (read-line port) lines))
(values name (reverse lines)))))))) (values name (reverse lines))))))))

41
uri.scm
View File

@ -47,8 +47,9 @@
;;; This scheme is tolerant of the various ways people build broken URI's ;;; This scheme is tolerant of the various ways people build broken URI's
;;; out there on the Net. It was given to me by Dan Connolly of the W3C. ;;; out there on the Net. It was given to me by Dan Connolly of the W3C.
;;; Returns four values: scheme, path, search, frag-id. ;;; Returns four values: scheme, path, search, frag-id. Each value is
;;; Each value is either #f or a string. ;;; either #f or a string except of the path, which is a nonempty list
;;; of string (as mentioned above).
;;; MG: I think including = here will break up things, since it may be ;;; MG: I think including = here will break up things, since it may be
@ -98,19 +99,23 @@
(if (and (zero? hits) (zero? start) (= end (string-length s))) s (if (and (zero? hits) (zero? start) (= end (string-length s))) s
(let* ((nlen (- (- end start) (* hits 2))) (let* ((nlen (- (- end start) (* hits 2))) ; the new
(ns (make-string nlen))) ; length of the
; unescaped
; string
(ns (make-string nlen))) ; the result
(let lp ((i start) (j 0)) (let lp ((i start) (j 0)) ; sweap over the string
(if (< j nlen) (if (< j nlen)
(lp (? ((esc-seq? i) (lp (? ((esc-seq? i) ; unescape
; escape-sequence
(string-set! ns j (string-set! ns j
(let ((d1 (string-ref s (+ i 1))) (let ((d1 (string-ref s (+ i 1)))
(d2 (string-ref s (+ i 2)))) (d2 (string-ref s (+ i 2))))
(ascii->char (+ (* 16 (hexchar->int d1)) (ascii->char (+ (* 16 (hexchar->int d1))
(hexchar->int d2))))) (hexchar->int d2)))))
(+ i 3)) (+ i 3))
(else (string-set! ns j (string-ref s i)) (else (string-set! ns j (string-ref s i))
(+ i 1))) (+ i 1)))
(+ j 1)))) (+ j 1))))
ns))))) ns)))))
@ -119,10 +124,11 @@
(let ((hex-digits (string->char-set "0123456789abcdefABCDEF"))) (let ((hex-digits (string->char-set "0123456789abcdefABCDEF")))
(lambda (c) (char-set-contains? hex-digits c)))) (lambda (c) (char-set-contains? hex-digits c))))
(define (hexchar->int c) ; make use of the fact that numbers and characters are in order in the ascii table
(define (hexchar->int c)
(- (char->ascii c) (- (char->ascii c)
(if (char-numeric? c) (if (char-numeric? c)
(char->ascii #\0) (char->ascii #\0)
(- (if (char-upper-case? c) (- (if (char-upper-case? c)
(char->ascii #\A) (char->ascii #\A)
(char->ascii #\a)) (char->ascii #\a))
@ -143,8 +149,8 @@
;;; slashes and colons would be escaped. ;;; slashes and colons would be escaped.
(define uri-escaped-chars (define uri-escaped-chars
(char-set-invert (char-set-union char-set:alphanumeric (char-set-union (char-set-complement char-set:letter+digit)
(string->char-set "$-_@.&!*\"'(),+")))) (string->char-set "$-_@.&!*\"'(),+")))
;;; Takes a set of chars to escape. This is because we sometimes need to ;;; Takes a set of chars to escape. This is because we sometimes need to
;;; escape larger sets of chars for different parts of a URI. ;;; escape larger sets of chars for different parts of a URI.
@ -154,15 +160,18 @@
(let ((nlen (string-reduce 0 (let ((nlen (string-reduce 0
(lambda (c i) (lambda (c i)
(+ i (+ i
(if (char-set-contains? uri-escaped-chars c) (if (char-set-contains? escaped-chars c)
3 1))) 3 1)))
s))) s))) ; new length of escaped string
(if (= nlen (string-length s)) s (if (= nlen (string-length s)) s
(let ((ns (make-string nlen))) (let ((ns (make-string nlen)))
(string-reduce (string-reduce
0 0
(lambda (c i) (lambda (c i) ; replace each occurance of an
(+ i (? ((char-set-contains? uri-escaped-chars c) ; character to escape with %ff where ff
; is the ascii-code in hexadecimal
; notation
(+ i (? ((char-set-contains? escaped-chars c)
(string-set! ns i #\%) (string-set! ns i #\%)
(let* ((d (char->ascii c)) (let* ((d (char->ascii c))
(dhi (bitwise-and (arithmetic-shift d -4) #xF)) (dhi (bitwise-and (arithmetic-shift d -4) #xF))

12
url.scm
View File

@ -38,10 +38,12 @@
host host
port) port)
;;; Parse a URI path into a userhost record. Default values are taken ;;; Parse a URI path (a list representing a path, not a string!) into
;;; from the userhost record DEFAULT. Returns a userhost record if it ;;; a userhost record. Default values are taken from the userhost
;;; wins, and #f if it cannot parse the path. CDDDR drops the userhost ;;; record DEFAULT except for the host. Returns a userhost record if
;;; portion of the path. ;;; it wins, and #f if it cannot parse the path. CADDR drops the
;;; userhost portion of the path. In fact, fatal-syntax-error is
;;; called, if the path doesn't start with '//'.
(define (parse-userhost path default) (define (parse-userhost path default)
(if (and (pair? path) ; The thing better begin (if (and (pair? path) ; The thing better begin
@ -110,7 +112,7 @@
search search
frag-id) frag-id)
;;; The URI parser maps a string to four parts: ;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
;;; <scheme> : <path> ? <search> # <frag-id> ;;; <scheme> : <path> ? <search> # <frag-id>
;;; <scheme>, <search>, and <frag-id> are strings; <path> is a non-empty ;;; <scheme>, <search>, and <frag-id> are strings; <path> is a non-empty
;;; string list -- the URI's path split at slashes. Optional parts of the ;;; string list -- the URI's path split at slashes. Optional parts of the