From 5a15d2473875d452266e60f72f6318af0256ba76 Mon Sep 17 00:00:00 2001
From: interp <interp>
Date: Thu, 17 May 2001 16:48:41 +0000
Subject: [PATCH] documentation added uri.scm fixed:  - uri-escaped-chars
 contains the characters to escape  - escape-uri really uses optional argument
  - some files changed so they can use new char-set-lib instead of
 char-set-package

---
 httpd-core.scm |  2 +-
 modules.scm    |  8 ++++----
 rfc822.scm     | 19 ++++++++++++-------
 uri.scm        | 41 +++++++++++++++++++++++++----------------
 url.scm        | 12 +++++++-----
 5 files changed, 49 insertions(+), 33 deletions(-)
diff --git a/httpd-core.scm b/httpd-core.scm
index 613c549..a298a9e 100644
--- a/httpd-core.scm
+++ b/httpd-core.scm
@@ -253,7 +253,7 @@
 ;;; but I hand-coded it because it's short, and I didn't want invoke the
 ;;; regexp machinery for something so simple.
 
-(define non-whitespace (char-set-invert char-set:whitespace))
+(define non-whitespace (char-set-complement char-set:whitespace))
 
 (define (string->words s)
   (let recur ((start 0))
diff --git a/modules.scm b/modules.scm
index 287f29d..89a4f48 100644
--- a/modules.scm
+++ b/modules.scm
@@ -87,7 +87,7 @@
 				  string-prefix?
 				  string-suffix?
 				  trim-spaces)
-  (open char-set-package let-opt scheme) 
+  (open char-set-lib let-opt scheme) 
   (files stringhax))
 
 (define-structure uri-package (export parse-uri
@@ -105,7 +105,7 @@
 	condhax
 	ascii
 	strings
-	char-set-package
+	char-set-lib
 	bitwise
 	field-reader-package
 	scheme)
@@ -147,7 +147,7 @@
 	receiving
 	condhax
 	string-lib
-	char-set-package
+	char-set-lib
 	uri-package
 	scsh-utilities
 	httpd-error
@@ -241,7 +241,7 @@
 	switch-syntax
 	condhax
 	strings
-	char-set-package
+	char-set-lib
 	defrec-package
 	define-record-types
 	handle
diff --git a/rfc822.scm b/rfc822.scm
index d5c629e..7cc26ca 100644
--- a/rfc822.scm
+++ b/rfc822.scm
@@ -73,16 +73,20 @@
 ;;;             printing these field names out, it looks best if you capitalise
 ;;;             them with (CAPITALIZE-STRING (SYMBOL->STRING FIELD-NAME)).
 ;;; - BODY	List of strings which are the field's body, e.g. 
-;;;             ("shivers@lcs.mit.edu"). Each list element is one line from
-;;;             the field's body, so if the field spreads out over three lines,
-;;;             then the body is a list of three strings. The terminating
-;;;             cr/lf's are trimmed from each string.
+;;;             ("shivers@lcs.mit.edu"). Each list element is one line
+;;;             from the field's body, so if the field spreads out
+;;;             over three lines, then the body is a list of three
+;;;             strings. The terminating cr/lf's are trimmed from each
+;;;             string.  A leading space or a leading horizontal tab
+;;;             is also trimmed, but one and only one.
 ;;; When there are no more fields -- EOF or a blank line has terminated the
 ;;; header section -- then the procedure returns [#f #f].
 ;;; 
-;;; The %READ-RFC822-FIELD variant allows you to specify your own read-line
-;;; procedure. The one used by READ-RFC822-FIELD terminates lines with either
-;;; cr/lf or just lf, and it trims the terminator from the line.
+;;; The %READ-RFC822-FIELD variant allows you to specify your own
+;;; read-line procedure. The one used by READ-RFC822-FIELD terminates
+;;; lines with either cr/lf or just lf, and it trims the terminator
+;;; from the line. Your read-line procedure should trim the terminator
+;;; of a line so an empty line is returned just as an empty string.
 
 (define htab (ascii->char 9))
 
@@ -113,6 +117,7 @@
 						 (+ colon 1)
 						 (string-length line1)))))
 		  (let ((c (peek-char port))) ; Could return EOF.
+;;;  RFC822: continuous lines has to start with a space or a htab 
 		    (if (or (eqv? c #\space) (eqv? c htab))
 			(lp (cons (read-line port) lines))
 			(values name (reverse lines))))))))
diff --git a/uri.scm b/uri.scm
index 136c1cb..57c9ef9 100644
--- a/uri.scm
+++ b/uri.scm
@@ -47,8 +47,9 @@
 ;;; This scheme is tolerant of the various ways people build broken URI's
 ;;; out there on the Net. It was given to me by Dan Connolly of the W3C.
 
-;;; Returns four values: scheme, path, search, frag-id.
-;;; Each value is either #f or a string.
+;;; Returns four values: scheme, path, search, frag-id.  Each value is
+;;; either #f or a string except of the path, which is a nonempty list
+;;; of string (as mentioned above).
 
 
 ;;; MG: I think including = here will break up things, since it may be
@@ -98,19 +99,23 @@
 	 
       (if (and (zero? hits) (zero? start) (= end (string-length s))) s
 
-	  (let* ((nlen (- (- end start) (* hits 2)))
-		 (ns (make-string nlen)))
+	  (let* ((nlen (- (- end start) (* hits 2)))   ; the new
+						       ; length of the
+						       ; unescaped
+						       ; string
+		 (ns (make-string nlen)))              ; the result
 
-	    (let lp ((i start) (j 0))
+	    (let lp ((i start) (j 0))                  ; sweap over the string
 	      (if (< j nlen)
-		  (lp (? ((esc-seq? i)
+		  (lp (? ((esc-seq? i)                 ; unescape
+						       ; escape-sequence
 			  (string-set! ns j
 				       (let ((d1 (string-ref s (+ i 1)))
 					     (d2 (string-ref s (+ i 2))))
 					 (ascii->char (+ (* 16 (hexchar->int d1))
 							 (hexchar->int d2)))))
 			  (+ i 3))
-			 (else (string-set! ns j (string-ref s i))
+			 (else (string-set! ns j (string-ref s i))  
 			       (+ i 1)))
 		      (+ j 1))))
 	    ns)))))
@@ -119,10 +124,11 @@
   (let ((hex-digits (string->char-set "0123456789abcdefABCDEF")))
     (lambda (c) (char-set-contains? hex-digits c))))
 
-(define (hexchar->int c)
+; make use of the fact that numbers and characters are in order in the ascii table
+(define (hexchar->int c)      
   (- (char->ascii c) 
      (if (char-numeric? c)
-	 (char->ascii #\0)
+	 (char->ascii #\0)         
 	 (- (if (char-upper-case? c)
 		(char->ascii #\A)
 		(char->ascii #\a))
@@ -143,8 +149,8 @@
 ;;; slashes and colons would be escaped.
 
 (define uri-escaped-chars
-  (char-set-invert (char-set-union char-set:alphanumeric
-				   (string->char-set "$-_@.&!*\"'(),+"))))
+  (char-set-union (char-set-complement char-set:letter+digit)
+				   (string->char-set "$-_@.&!*\"'(),+")))
 
 ;;; Takes a set of chars to escape. This is because we sometimes need to
 ;;; escape larger sets of chars for different parts of a URI.
@@ -154,15 +160,18 @@
     (let ((nlen (string-reduce 0
 			       (lambda (c i)
 				 (+ i
-				    (if (char-set-contains? uri-escaped-chars c)
+				    (if (char-set-contains? escaped-chars c)
 					3 1)))
-			       s)))
+			       s)))    ; new length of escaped string
       (if (= nlen (string-length s)) s
-	  (let ((ns (make-string nlen)))
+	  (let ((ns (make-string nlen))) 
 	    (string-reduce
 	     0
-	     (lambda (c i)
-	       (+ i (? ((char-set-contains? uri-escaped-chars c)
+	     (lambda (c i)     ; replace each occurance of an
+			       ; character to escape with %ff where ff
+			       ; is the ascii-code in hexadecimal
+			       ; notation
+	       (+ i (? ((char-set-contains? escaped-chars c)
 			(string-set! ns i #\%)
 			(let* ((d (char->ascii c))
 			       (dhi (bitwise-and (arithmetic-shift d -4) #xF))
diff --git a/url.scm b/url.scm
index 85ae438..f94ab73 100644
--- a/url.scm
+++ b/url.scm
@@ -38,10 +38,12 @@
   host
   port)
 
-;;; Parse a URI path into a userhost record. Default values are taken
-;;; from the userhost record DEFAULT. Returns a userhost record if it
-;;; wins, and #f if it cannot parse the path. CDDDR drops the userhost
-;;; portion of the path.
+;;; Parse a URI path (a list representing a path, not a string!) into
+;;; a userhost record. Default values are taken from the userhost
+;;; record DEFAULT except for the host. Returns a userhost record if
+;;; it wins, and #f if it cannot parse the path. CADDR drops the
+;;; userhost portion of the path. In fact, fatal-syntax-error is
+;;; called, if the path doesn't start with '//'.
 
 (define (parse-userhost path default)
   (if (and (pair? path)				; The thing better begin
@@ -110,7 +112,7 @@
   search
   frag-id)
 
-;;; The URI parser maps a string to four parts:
+;;; The URI parser (parse-uri in uri.scm) maps a string to four parts:
 ;;;     <scheme> : <path> ? <search> # <frag-id>
 ;;; <scheme>, <search>, and <frag-id> are strings; <path> is a non-empty
 ;;; string list -- the URI's path split at slashes. Optional parts of the