From 5a15d2473875d452266e60f72f6318af0256ba76 Mon Sep 17 00:00:00 2001 From: interp Date: Thu, 17 May 2001 16:48:41 +0000 Subject: [PATCH] documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package --- httpd-core.scm | 2 +- modules.scm | 8 ++++---- rfc822.scm | 19 ++++++++++++------- uri.scm | 41 +++++++++++++++++++++++++---------------- url.scm | 12 +++++++----- 5 files changed, 49 insertions(+), 33 deletions(-) diff --git a/httpd-core.scm b/httpd-core.scm index 613c549..a298a9e 100644 --- a/httpd-core.scm +++ b/httpd-core.scm @@ -253,7 +253,7 @@ ;;; but I hand-coded it because it's short, and I didn't want invoke the ;;; regexp machinery for something so simple. -(define non-whitespace (char-set-invert char-set:whitespace)) +(define non-whitespace (char-set-complement char-set:whitespace)) (define (string->words s) (let recur ((start 0)) diff --git a/modules.scm b/modules.scm index 287f29d..89a4f48 100644 --- a/modules.scm +++ b/modules.scm @@ -87,7 +87,7 @@ string-prefix? string-suffix? trim-spaces) - (open char-set-package let-opt scheme) + (open char-set-lib let-opt scheme) (files stringhax)) (define-structure uri-package (export parse-uri @@ -105,7 +105,7 @@ condhax ascii strings - char-set-package + char-set-lib bitwise field-reader-package scheme) @@ -147,7 +147,7 @@ receiving condhax string-lib - char-set-package + char-set-lib uri-package scsh-utilities httpd-error @@ -241,7 +241,7 @@ switch-syntax condhax strings - char-set-package + char-set-lib defrec-package define-record-types handle diff --git a/rfc822.scm b/rfc822.scm index d5c629e..7cc26ca 100644 --- a/rfc822.scm +++ b/rfc822.scm @@ -73,16 +73,20 @@ ;;; printing these field names out, it looks best if you capitalise ;;; them with (CAPITALIZE-STRING (SYMBOL->STRING FIELD-NAME)). ;;; - BODY List of strings which are the field's body, e.g. -;;; ("shivers@lcs.mit.edu"). Each list element is one line from -;;; the field's body, so if the field spreads out over three lines, -;;; then the body is a list of three strings. The terminating -;;; cr/lf's are trimmed from each string. +;;; ("shivers@lcs.mit.edu"). Each list element is one line +;;; from the field's body, so if the field spreads out +;;; over three lines, then the body is a list of three +;;; strings. The terminating cr/lf's are trimmed from each +;;; string. A leading space or a leading horizontal tab +;;; is also trimmed, but one and only one. ;;; When there are no more fields -- EOF or a blank line has terminated the ;;; header section -- then the procedure returns [#f #f]. ;;; -;;; The %READ-RFC822-FIELD variant allows you to specify your own read-line -;;; procedure. The one used by READ-RFC822-FIELD terminates lines with either -;;; cr/lf or just lf, and it trims the terminator from the line. +;;; The %READ-RFC822-FIELD variant allows you to specify your own +;;; read-line procedure. The one used by READ-RFC822-FIELD terminates +;;; lines with either cr/lf or just lf, and it trims the terminator +;;; from the line. Your read-line procedure should trim the terminator +;;; of a line so an empty line is returned just as an empty string. (define htab (ascii->char 9)) @@ -113,6 +117,7 @@ (+ colon 1) (string-length line1))))) (let ((c (peek-char port))) ; Could return EOF. +;;; RFC822: continuous lines has to start with a space or a htab (if (or (eqv? c #\space) (eqv? c htab)) (lp (cons (read-line port) lines)) (values name (reverse lines)))))))) diff --git a/uri.scm b/uri.scm index 136c1cb..57c9ef9 100644 --- a/uri.scm +++ b/uri.scm @@ -47,8 +47,9 @@ ;;; This scheme is tolerant of the various ways people build broken URI's ;;; out there on the Net. It was given to me by Dan Connolly of the W3C. -;;; Returns four values: scheme, path, search, frag-id. -;;; Each value is either #f or a string. +;;; Returns four values: scheme, path, search, frag-id. Each value is +;;; either #f or a string except of the path, which is a nonempty list +;;; of string (as mentioned above). ;;; MG: I think including = here will break up things, since it may be @@ -98,19 +99,23 @@ (if (and (zero? hits) (zero? start) (= end (string-length s))) s - (let* ((nlen (- (- end start) (* hits 2))) - (ns (make-string nlen))) + (let* ((nlen (- (- end start) (* hits 2))) ; the new + ; length of the + ; unescaped + ; string + (ns (make-string nlen))) ; the result - (let lp ((i start) (j 0)) + (let lp ((i start) (j 0)) ; sweap over the string (if (< j nlen) - (lp (? ((esc-seq? i) + (lp (? ((esc-seq? i) ; unescape + ; escape-sequence (string-set! ns j (let ((d1 (string-ref s (+ i 1))) (d2 (string-ref s (+ i 2)))) (ascii->char (+ (* 16 (hexchar->int d1)) (hexchar->int d2))))) (+ i 3)) - (else (string-set! ns j (string-ref s i)) + (else (string-set! ns j (string-ref s i)) (+ i 1))) (+ j 1)))) ns))))) @@ -119,10 +124,11 @@ (let ((hex-digits (string->char-set "0123456789abcdefABCDEF"))) (lambda (c) (char-set-contains? hex-digits c)))) -(define (hexchar->int c) +; make use of the fact that numbers and characters are in order in the ascii table +(define (hexchar->int c) (- (char->ascii c) (if (char-numeric? c) - (char->ascii #\0) + (char->ascii #\0) (- (if (char-upper-case? c) (char->ascii #\A) (char->ascii #\a)) @@ -143,8 +149,8 @@ ;;; slashes and colons would be escaped. (define uri-escaped-chars - (char-set-invert (char-set-union char-set:alphanumeric - (string->char-set "$-_@.&!*\"'(),+")))) + (char-set-union (char-set-complement char-set:letter+digit) + (string->char-set "$-_@.&!*\"'(),+"))) ;;; Takes a set of chars to escape. This is because we sometimes need to ;;; escape larger sets of chars for different parts of a URI. @@ -154,15 +160,18 @@ (let ((nlen (string-reduce 0 (lambda (c i) (+ i - (if (char-set-contains? uri-escaped-chars c) + (if (char-set-contains? escaped-chars c) 3 1))) - s))) + s))) ; new length of escaped string (if (= nlen (string-length s)) s - (let ((ns (make-string nlen))) + (let ((ns (make-string nlen))) (string-reduce 0 - (lambda (c i) - (+ i (? ((char-set-contains? uri-escaped-chars c) + (lambda (c i) ; replace each occurance of an + ; character to escape with %ff where ff + ; is the ascii-code in hexadecimal + ; notation + (+ i (? ((char-set-contains? escaped-chars c) (string-set! ns i #\%) (let* ((d (char->ascii c)) (dhi (bitwise-and (arithmetic-shift d -4) #xF)) diff --git a/url.scm b/url.scm index 85ae438..f94ab73 100644 --- a/url.scm +++ b/url.scm @@ -38,10 +38,12 @@ host port) -;;; Parse a URI path into a userhost record. Default values are taken -;;; from the userhost record DEFAULT. Returns a userhost record if it -;;; wins, and #f if it cannot parse the path. CDDDR drops the userhost -;;; portion of the path. +;;; Parse a URI path (a list representing a path, not a string!) into +;;; a userhost record. Default values are taken from the userhost +;;; record DEFAULT except for the host. Returns a userhost record if +;;; it wins, and #f if it cannot parse the path. CADDR drops the +;;; userhost portion of the path. In fact, fatal-syntax-error is +;;; called, if the path doesn't start with '//'. (define (parse-userhost path default) (if (and (pair? path) ; The thing better begin @@ -110,7 +112,7 @@ search frag-id) -;;; The URI parser maps a string to four parts: +;;; The URI parser (parse-uri in uri.scm) maps a string to four parts: ;;; : ? # ;;; , , and are strings; is a non-empty ;;; string list -- the URI's path split at slashes. Optional parts of the