sunet/uri.scm

;;; -*- Scheme -*-
;;; Copyright (c) 1995 by Olin Shivers.

;;; URI syntax -- [scheme] : path [? search ] [# fragmentid]

;;; Imports and non-R4RS'isms
;;;	let-optionals
;;;	receive values				(MV return)
;;;     ascii->char char->ascii			
;;;	index rindex
;;;	char-set-index char-set-rindex
;;;	string-reduce
;;;	char-set package
;;;	bitwise logical funs and arithmetic-shift
;;; 	join-strings				(scsh field-reader code.)


;;; References:
;;; - ftp://ftp.internic.net/rfc/rfc1630.txt 
;;;   Original RFC
;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/URI_Overview.html
;;;   General Web page of URI pointers.

;;; I wrote a URI parser that slavishly obeyed Tim Berners-Lee's
;;; spec (rfc 1630). This was a waste of time, as most URL's do not
;;; obey his spec, which is incomplete and inconsistent with the URL spec
;;; in any event. This parser is much simpler. It parses a URI into four
;;; fields:
;;;     [ <scheme> ] : <path> [ ? <search> ] [ # fragid ]
;;; The returned fields are *not* unescaped, as the rules for parsing the
;;; <path> component in particular need unescaped text, and are dependent
;;; on <scheme>. The URL parser is responsible for doing this.
;;; If the <scheme>, <search> or <fragid> portions are not specified,
;;; they are #f. Otherwise, <scheme>, <search>, and <fragid> are strings;
;;; <path> is a non-empty string list.

;;; The parsing technique is inwards from both ends.
;;; - First we search forwards for the first reserved char (= ; / # ? : space)
;;;   If it's a colon, then that's the <scheme> part, otw no <scheme> part.
;;;   Remove it.
;;; - Then we search backwards from the end for the last reserved char.
;;;   If it's a sharp, then that's the <fragment-id> part -- remove it.
;;; - Then we search backwards from the end for the last reserved char.
;;;   If it's a question-mark, then that's the <search> part -- remove it.
;;; - What's left is the path. Split at slashes. "" -> ("")
;;;
;;; This scheme is tolerant of the various ways people build broken
;;; URI's out there on the Net , p.e. \#= is a reserved character, but
;;; used unescaped in the search-part. It was given to me by Dan
;;; Connolly of the W3C and slightly modified.

;;; Returns four values: scheme, path, search, frag-id.  Each value is
;;; either #f or a string except of the path, which is a nonempty list
;;; of string (as mentioned above).


(define uri-reserved (string->char-set ";/#?: ="))

(define (parse-uri s)
  (let* ((slen (string-length s))
	 ;; Search forwards for colon (or intervening reserved char).
	 (rs1 (string-index s uri-reserved))	; 1st reserved char
	 (colon (and rs1 (char=? (string-ref s rs1) #\:) rs1))
	 (path-start (if colon (+ colon 1) 0))

	 ;; Search backwards for # (or intervening reserved char).
	 (rs-last (string-index-right s uri-reserved))
	 (sharp (and rs-last (char=? (string-ref s rs-last) #\#) rs-last))

	 ;; Search backwards for ? (or intervening reserved char).
	 ;; (NB: #\= may be after #\? and before #\#)
	 (rs-penult (string-index-right
		     s                      
		     (char-set-delete uri-reserved #\=)
		     (or sharp slen))) 
	 (ques (and rs-penult (char=? (string-ref s rs-penult) #\?) rs-penult))

	 (path-end (or ques sharp slen)))
    (values (and colon (substring s 0 colon))
	    (split-uri-path s path-start path-end)
	    (and ques (substring s (+ ques 1) (or sharp slen)))
	    (and sharp (substring s (+ sharp 1) slen)))))

;;; Caution:
;;; Don't use this proc until *after* you've parsed the URL -- unescaping
;;; might introduce reserved chars (like slashes and colons) that could
;;; blow your parse.

(define (unescape-uri s . maybe-start/end)
  (let-optionals maybe-start/end ((start 0)
				  (end (string-length s)))
    (let* ((esc-seq? (lambda (i) (and (< (+ i 2) end)
				      (char=? (string-ref s i) #\%)
				      (hex-digit? (string-ref s (+ i 1)))
				      (hex-digit? (string-ref s (+ i 2))))))
	   (hits (let lp ((i start) (hits 0)) ; count # of esc seqs.
		   (if (< i end)
		       (if (esc-seq? i)
			   (lp (+ i 3) (+ hits 1))
			   (lp (+ i 1) hits))
		       hits))))
	 
      (if (and (zero? hits) (zero? start) (= end (string-length s))) s

	  (let* ((nlen (- (- end start) (* hits 2)))   ; the new
						       ; length of the
						       ; unescaped
						       ; string
		 (ns (make-string nlen)))              ; stores the result

	    (let lp ((i start) (j 0))                  ; sweap over the string
	      (if (< j nlen)
		  (lp (cond 
		       ((esc-seq? i)                 ; unescape
						       ; escape-sequence
			(string-set! ns j
				     (let ((d1 (string-ref s (+ i 1)))
					   (d2 (string-ref s (+ i 2))))
				       (ascii->char (+ (* 16 (hexchar->int d1))
						       (hexchar->int d2)))))
			(+ i 3))
		       (else (string-set! ns j (string-ref s i))  
			     (+ i 1)))
		      (+ j 1))))
	    ns)))))

(define hex-digit?
  (let ((hex-digits (string->char-set "0123456789abcdefABCDEF")))
    (lambda (c) (char-set-contains? hex-digits c))))

; make use of the fact that numbers and characters are in order in the ascii table
(define (hexchar->int c)      
  (- (char->ascii c) 
     (if (char-numeric? c)
	 (char->ascii #\0)         
	 (- (if (char-upper-case? c)
		(char->ascii #\A)
		(char->ascii #\a))
	    10))))

(define int->hexchar
  (let ((table '#(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
		  #\A #\B #\C #\D #\E #\F)))
    (lambda (i) (vector-ref table i))))
  

;;; Caution:
;;; All reserved chars (e.g., slash, sharp, colon) get escaped: "=;/#?: "
;;; So don't apply this proc to chunks of text with syntactically meaningful
;;; reserved chars (e.g., paths with URI slashes or colons) -- they'll be 
;;; escaped, and lose their special meaning. E.g. it would be a mistake
;;; to apply ESCAPE-URI to "//lcs.mit.edu:8001/foo/bar.html" because the
;;; slashes and colons would be escaped.

(define uri-escaped-chars
  (char-set-complement (char-set-union char-set:letter+digit
				       (string->char-set "$-_@.&!*\"'(),+"))))

;;; Takes a set of chars to escape. This is because we sometimes need to
;;; escape larger sets of chars for different parts of a URI.

(define (escape-uri s . maybe-escaped-chars)
  (let-optionals maybe-escaped-chars ((escaped-chars uri-escaped-chars))
    (let ((nlen (string-fold
		 (lambda (c i)
		   (+ i
		      (if (char-set-contains? escaped-chars c)
			  3 1)))
		 0
		 s)))    ; new length of escaped string
      (if (= nlen (string-length s)) s
	  (let ((ns (make-string nlen))) 
	    (string-fold
	     (lambda (c i)     ; replace each occurance of an
			       ; character to escape with %ff where ff
			       ; is the ascii-code in hexadecimal
			       ; notation
	       (+ i (cond 
		     ((char-set-contains? escaped-chars c)
		      (string-set! ns i #\%)
		      (let* ((d (char->ascii c))
			     (dhi (bitwise-and (arithmetic-shift d -4) #xF))
			     (dlo (bitwise-and d #xF)))
			(string-set! ns (+ i 1)
				     (int->hexchar dhi))
			(string-set! ns (+ i 2)
				     (int->hexchar dlo)))
		      3)
		     (else (string-set! ns i c)
			   1))))
	     0
	     s)
	    ns)))))


;;; Four args: context URI's  <scheme> : <path> values, and
;;;            main URI's     <scheme> : <path> values.
;;; If the path cannot be resolved, return #f #f (this occurs if <path>
;;; begins with n sequential slashes, and <context-path> doesn't
;;; have that many sequential slashes anywhere). All paths are
;;; represented as non-empty lists.

(define (resolve-uri cscheme cp  scheme p)
  (if scheme (values scheme p)	; If URI has own <scheme>, it is absolute.

      (if (and (pair? p) (string=? (car p) ""))	  ; Path P begins with a slash.

	  (receive (numsl p)			; Count and strip off initial 
	      (do ((i 1       (+ i 1))  	; slashes (i.e., initial ""'s)
		   (q (cdr p) (cdr q)))
		  ((or (null? q) (not (string=? (car q) "")))
		   (values i q)))

	    ;; Skip through CP until we find that many sequential /'s.
	    (let lp ((cp-tail cp)
		     (rhead '())			; CP prefix, reversed.
		     (j 0))				; J counts sequential /
	      
	      (cond
	       ((and (pair? cp-tail) (string=? (car cp-tail) "")) ; More ""'s
		(lp (cdr cp-tail)
		    (cons (car cp-tail) rhead)
		    (+ j 0)))

	       ((= j numsl)				; Win
		(values cscheme (simplify-uri-path (rev-append rhead p))))

	       ((pair? cp-tail)				; Keep looking.
		(lp (cdr cp-tail)
		    (cons (car cp-tail) rhead)
		    1))

	       (else (values #f #f)))))		; Lose.


	  ;; P doesn't begin with a slash.
	  (values cscheme (simplify-uri-path
			    (rev-append (cdr (reverse cp)) ; Drop non-dir part
					p))))))		   ; and append P.


(define (rev-append a b)		; (append (reverse a) b)
  (let rev-app ((a a) (b b))		; Should be defined in a list-proc
    (if (pair? a)			; package, not here.
	(rev-app (cdr a) (cons (car a) b))
	b)))

;;; Cribbed from scsh's fname.scm

(define (split-uri-path uri start end)		; Split at /'s (infix grammar).
  (let split ((i start))			; "" -> ("")
    (cond
     ((>= i end) '(""))
     ((string-index uri #\/ i) =>
      (lambda (slash)
	(cons (substring uri i slash)
	      (split (+ slash 1)))))
     (else (list (substring uri i end))))))


;;; The elements of PLIST must be escaped in case they contain slashes.
;;; This procedure doesn't escape them for you; you must do that yourself:
;;;     (uri-path-list->path (map escape-uri pathlist))

(define (uri-path-list->path plist)
  (string-join plist "/"))		; Insert slashes between elts of PLIST.


;;; Remove . and <segment>/.. elements from path.  The result is a
;;; (maybe empty) list representing a path that does not contain "." 
;;; and ".." elements neither at the beginning nor somewhere else. I
;;; tried to follow RFC2396 here. The procedure returns #f if the path
;;; tries to back up past root (like "//.." or "/foo/../.."). "//" may
;;; occur somewhere in the path but not being backed up. Usually,
;;; relative paths are intended to be used with a base
;;; url. Accordingly to RFC2396 (as I hope) relative paths are
;;; considered not to start with "/". They are appended to a base
;;; URL-path and then simplified. So before you start to simplify a
;;; URL try to find out if it is a relative path (i.e. it does not
;;; start with a "/").

(define (simplify-uri-path p)    
  (if (null? p) #f                               ; P must be non-null
      (let lp ((path-list (cdr p))
	       (stack (list (car p))))
	(if (null? path-list)                   ; we're done
	  (reverse stack)
	  (cond
	   ((string=? (car path-list) "..")     ; back up
					     ; neither the empty path nor root
	    (if (not (or (null? stack) (string=? (car stack) "")))  
		(lp (cdr path-list) (cdr stack))
		#f))
	   ((string=? (car path-list) ".")      ; leave this
	    (lp (cdr path-list) stack))
	   ((string=? (car path-list) "")       ; back to root
	    (lp (cdr path-list) '("")))
	   (else                                ; usual segment
	    (lp (cdr path-list) (cons (car path-list) stack))))))))
* empty log message * 2000-09-26 10:35:26 -04:00			`;;; -- Scheme --`
			`;;; Copyright (c) 1995 by Olin Shivers.`

			`;;; URI syntax -- [scheme] : path [? search ] [# fragmentid]`

			`;;; Imports and non-R4RS'isms`
			`;;; let-optionals`
			`;;; receive values (MV return)`
			`;;; ascii->char char->ascii`
			`;;; index rindex`
			`;;; char-set-index char-set-rindex`
			`;;; string-reduce`
			`;;; char-set package`
			`;;; bitwise logical funs and arithmetic-shift`
			`;;; join-strings (scsh field-reader code.)`


			`;;; References:`
			`;;; - ftp://ftp.internic.net/rfc/rfc1630.txt`
			`;;; Original RFC`
			`;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/URI_Overview.html`
			`;;; General Web page of URI pointers.`

			`;;; I wrote a URI parser that slavishly obeyed Tim Berners-Lee's`
			`;;; spec (rfc 1630). This was a waste of time, as most URL's do not`
			`;;; obey his spec, which is incomplete and inconsistent with the URL spec`
			`;;; in any event. This parser is much simpler. It parses a URI into four`
			`;;; fields:`
			`;;; [ <scheme> ] : <path> [ ? <search> ] [ # fragid ]`
			`;;; The returned fields are not unescaped, as the rules for parsing the`
			`;;; <path> component in particular need unescaped text, and are dependent`
			`;;; on <scheme>. The URL parser is responsible for doing this.`
			`;;; If the <scheme>, <search> or <fragid> portions are not specified,`
			`;;; they are #f. Otherwise, <scheme>, <search>, and <fragid> are strings;`
			`;;; <path> is a non-empty string list.`

			`;;; The parsing technique is inwards from both ends.`
			`;;; - First we search forwards for the first reserved char (= ; / # ? : space)`
			`;;; If it's a colon, then that's the <scheme> part, otw no <scheme> part.`
			`;;; Remove it.`
			`;;; - Then we search backwards from the end for the last reserved char.`
			`;;; If it's a sharp, then that's the <fragment-id> part -- remove it.`
			`;;; - Then we search backwards from the end for the last reserved char.`
			`;;; If it's a question-mark, then that's the <search> part -- remove it.`
			`;;; - What's left is the path. Split at slashes. "" -> ("")`
			`;;;`
PARSE-URI modified: search-part is now correctly recognized, no matter frag is given or not 2001-07-16 07:31:41 -04:00			`;;; This scheme is tolerant of the various ways people build broken`
			`;;; URI's out there on the Net , p.e. \#= is a reserved character, but`
			`;;; used unescaped in the search-part. It was given to me by Dan`
			`;;; Connolly of the W3C and slightly modified.`
* empty log message * 2000-09-26 10:35:26 -04:00
documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package 2001-05-17 12:48:41 -04:00			`;;; Returns four values: scheme, path, search, frag-id. Each value is`
			`;;; either #f or a string except of the path, which is a nonempty list`
			`;;; of string (as mentioned above).`
* empty log message * 2000-09-26 10:35:26 -04:00
thread-support merged 2001-04-27 12:19:34 -04:00
PARSE-URI modified: search-part is now correctly recognized, no matter frag is given or not 2001-07-16 07:31:41 -04:00			`(define uri-reserved (string->char-set ";/#?: ="))`
* empty log message * 2000-09-26 10:35:26 -04:00
			`(define (parse-uri s)`
			`(let* ((slen (string-length s))`
			`;; Search forwards for colon (or intervening reserved char).`
* removed STRING-HAX library from sunet. (now using SRFI 13 & 14 from scsh) 2002-04-21 14:55:18 -04:00			`(rs1 (string-index s uri-reserved)) ; 1st reserved char`
* empty log message * 2000-09-26 10:35:26 -04:00			`(colon (and rs1 (char=? (string-ref s rs1) #\:) rs1))`
			`(path-start (if colon (+ colon 1) 0))`

			`;; Search backwards for # (or intervening reserved char).`
* removed STRING-HAX library from sunet. (now using SRFI 13 & 14 from scsh) 2002-04-21 14:55:18 -04:00			`(rs-last (string-index-right s uri-reserved))`
* empty log message * 2000-09-26 10:35:26 -04:00			`(sharp (and rs-last (char=? (string-ref s rs-last) #\#) rs-last))`

			`;; Search backwards for ? (or intervening reserved char).`
PARSE-URI modified: search-part is now correctly recognized, no matter frag is given or not 2001-07-16 07:31:41 -04:00			`;; (NB: #\= may be after #\? and before #\#)`
* removed STRING-HAX library from sunet. (now using SRFI 13 & 14 from scsh) 2002-04-21 14:55:18 -04:00			`(rs-penult (string-index-right`
PARSE-URI modified: search-part is now correctly recognized, no matter frag is given or not 2001-07-16 07:31:41 -04:00			`s`
			`(char-set-delete uri-reserved #\=)`
			`(or sharp slen)))`
* empty log message * 2000-09-26 10:35:26 -04:00			`(ques (and rs-penult (char=? (string-ref s rs-penult) #\?) rs-penult))`

			`(path-end (or ques sharp slen)))`
			`(values (and colon (substring s 0 colon))`
			`(split-uri-path s path-start path-end)`
			`(and ques (substring s (+ ques 1) (or sharp slen)))`
			`(and sharp (substring s (+ sharp 1) slen)))))`

			`;;; Caution:`
			`;;; Don't use this proc until after you've parsed the URL -- unescaping`
			`;;; might introduce reserved chars (like slashes and colons) that could`
			`;;; blow your parse.`

			`(define (unescape-uri s . maybe-start/end)`
			`(let-optionals maybe-start/end ((start 0)`
			`(end (string-length s)))`
			`(let* ((esc-seq? (lambda (i) (and (< (+ i 2) end)`
			`(char=? (string-ref s i) #\%)`
			`(hex-digit? (string-ref s (+ i 1)))`
			`(hex-digit? (string-ref s (+ i 2))))))`
			`(hits (let lp ((i start) (hits 0)) ; count # of esc seqs.`
			`(if (< i end)`
			`(if (esc-seq? i)`
			`(lp (+ i 3) (+ hits 1))`
			`(lp (+ i 1) hits))`
			`hits))))`

			`(if (and (zero? hits) (zero? start) (= end (string-length s))) s`

documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package 2001-05-17 12:48:41 -04:00			`(let* ((nlen (- (- end start) (* hits 2))) ; the new`
			`; length of the`
			`; unescaped`
			`; string`
* Documentation for uri.scm finished (except for resolve-uri-procedure as I can't figure out what this procedure is intended to do; someone out there who want to let me know?) * new simplify-uri-path written (as the old one did not seem to work correctly) 2001-05-20 14:57:31 -04:00			`(ns (make-string nlen))) ; stores the result`
* empty log message * 2000-09-26 10:35:26 -04:00
documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package 2001-05-17 12:48:41 -04:00			`(let lp ((i start) (j 0)) ; sweap over the string`
* empty log message * 2000-09-26 10:35:26 -04:00			`(if (< j nlen)`
sunet can be used with scsh 0.6 now (hopefully). explicitly named every interface moved TOOTHLESS.SCM to MODULES.SCM removed macros from CONDITIONALS.SCM (UNLESS, WHEN, ?, SWITCH, SWITCHQ) thereby removed conditionals.scm and the modules SWITCH-SYNTAX and CONDHAX. 2001-08-20 07:31:03 -04:00			`(lp (cond`
			`((esc-seq? i) ; unescape`
documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package 2001-05-17 12:48:41 -04:00			`; escape-sequence`
sunet can be used with scsh 0.6 now (hopefully). explicitly named every interface moved TOOTHLESS.SCM to MODULES.SCM removed macros from CONDITIONALS.SCM (UNLESS, WHEN, ?, SWITCH, SWITCHQ) thereby removed conditionals.scm and the modules SWITCH-SYNTAX and CONDHAX. 2001-08-20 07:31:03 -04:00			`(string-set! ns j`
			`(let ((d1 (string-ref s (+ i 1)))`
			`(d2 (string-ref s (+ i 2))))`
			`(ascii->char (+ (* 16 (hexchar->int d1))`
			`(hexchar->int d2)))))`
			`(+ i 3))`
			`(else (string-set! ns j (string-ref s i))`
			`(+ i 1)))`
* empty log message * 2000-09-26 10:35:26 -04:00			`(+ j 1))))`
			`ns)))))`

			`(define hex-digit?`
			`(let ((hex-digits (string->char-set "0123456789abcdefABCDEF")))`
			`(lambda (c) (char-set-contains? hex-digits c))))`

documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package 2001-05-17 12:48:41 -04:00			`; make use of the fact that numbers and characters are in order in the ascii table`
			`(define (hexchar->int c)`
* empty log message * 2000-09-26 10:35:26 -04:00			`(- (char->ascii c)`
			`(if (char-numeric? c)`
documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package 2001-05-17 12:48:41 -04:00			`(char->ascii #\0)`
* empty log message * 2000-09-26 10:35:26 -04:00			`(- (if (char-upper-case? c)`
			`(char->ascii #\A)`
			`(char->ascii #\a))`
			`10))))`

			`(define int->hexchar`
			`(let ((table '#(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9`
			`#\A #\B #\C #\D #\E #\F)))`
			`(lambda (i) (vector-ref table i))))`


			`;;; Caution:`
			`;;; All reserved chars (e.g., slash, sharp, colon) get escaped: "=;/#?: "`
			`;;; So don't apply this proc to chunks of text with syntactically meaningful`
			`;;; reserved chars (e.g., paths with URI slashes or colons) -- they'll be`
			`;;; escaped, and lose their special meaning. E.g. it would be a mistake`
			`;;; to apply ESCAPE-URI to "//lcs.mit.edu:8001/foo/bar.html" because the`
			`;;; slashes and colons would be escaped.`

			`(define uri-escaped-chars`
Fix completely borked URI-ESCAPED-CHARS. 2002-02-23 12:14:48 -05:00			`(char-set-complement (char-set-union char-set:letter+digit`
			`(string->char-set "$-_@.&!*\"'(),+"))))`
* empty log message * 2000-09-26 10:35:26 -04:00
			`;;; Takes a set of chars to escape. This is because we sometimes need to`
			`;;; escape larger sets of chars for different parts of a URI.`

			`(define (escape-uri s . maybe-escaped-chars)`
			`(let-optionals maybe-escaped-chars ((escaped-chars uri-escaped-chars))`
* removed introduced bug in ESCAPE-URI, happened by removing stringhax.scm (string-reduce was not fold-right but fold-left) 2002-04-26 06:55:28 -04:00			`(let ((nlen (string-fold`
* removed STRING-HAX library from sunet. (now using SRFI 13 & 14 from scsh) 2002-04-21 14:55:18 -04:00			`(lambda (c i)`
			`(+ i`
			`(if (char-set-contains? escaped-chars c)`
			`3 1)))`
			`0`
			`s))) ; new length of escaped string`
* empty log message * 2000-09-26 10:35:26 -04:00			`(if (= nlen (string-length s)) s`
documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package 2001-05-17 12:48:41 -04:00			`(let ((ns (make-string nlen)))`
* removed introduced bug in ESCAPE-URI, happened by removing stringhax.scm (string-reduce was not fold-right but fold-left) 2002-04-26 06:55:28 -04:00			`(string-fold`
documentation added uri.scm fixed: - uri-escaped-chars contains the characters to escape - escape-uri really uses optional argument - some files changed so they can use new char-set-lib instead of char-set-package 2001-05-17 12:48:41 -04:00			`(lambda (c i) ; replace each occurance of an`
			`; character to escape with %ff where ff`
			`; is the ascii-code in hexadecimal`
			`; notation`
sunet can be used with scsh 0.6 now (hopefully). explicitly named every interface moved TOOTHLESS.SCM to MODULES.SCM removed macros from CONDITIONALS.SCM (UNLESS, WHEN, ?, SWITCH, SWITCHQ) thereby removed conditionals.scm and the modules SWITCH-SYNTAX and CONDHAX. 2001-08-20 07:31:03 -04:00			`(+ i (cond`
			`((char-set-contains? escaped-chars c)`
			`(string-set! ns i #\%)`
			`(let* ((d (char->ascii c))`
			`(dhi (bitwise-and (arithmetic-shift d -4) #xF))`
			`(dlo (bitwise-and d #xF)))`
			`(string-set! ns (+ i 1)`
			`(int->hexchar dhi))`
			`(string-set! ns (+ i 2)`
			`(int->hexchar dlo)))`
			`3)`
			`(else (string-set! ns i c)`
			`1))))`
* removed STRING-HAX library from sunet. (now using SRFI 13 & 14 from scsh) 2002-04-21 14:55:18 -04:00			`0`
* empty log message * 2000-09-26 10:35:26 -04:00			`s)`
			`ns)))))`


			`;;; Four args: context URI's <scheme> : <path> values, and`
			`;;; main URI's <scheme> : <path> values.`
			`;;; If the path cannot be resolved, return #f #f (this occurs if <path>`
			`;;; begins with n sequential slashes, and <context-path> doesn't`
			`;;; have that many sequential slashes anywhere). All paths are`
			`;;; represented as non-empty lists.`

			`(define (resolve-uri cscheme cp scheme p)`
			`(if scheme (values scheme p) ; If URI has own <scheme>, it is absolute.`

			`(if (and (pair? p) (string=? (car p) "")) ; Path P begins with a slash.`

			`(receive (numsl p) ; Count and strip off initial`
			`(do ((i 1 (+ i 1)) ; slashes (i.e., initial ""'s)`
			`(q (cdr p) (cdr q)))`
			`((or (null? q) (not (string=? (car q) "")))`
			`(values i q)))`

			`;; Skip through CP until we find that many sequential /'s.`
			`(let lp ((cp-tail cp)`
			`(rhead '()) ; CP prefix, reversed.`
			`(j 0)) ; J counts sequential /`

sunet can be used with scsh 0.6 now (hopefully). explicitly named every interface moved TOOTHLESS.SCM to MODULES.SCM removed macros from CONDITIONALS.SCM (UNLESS, WHEN, ?, SWITCH, SWITCHQ) thereby removed conditionals.scm and the modules SWITCH-SYNTAX and CONDHAX. 2001-08-20 07:31:03 -04:00			`(cond`
			`((and (pair? cp-tail) (string=? (car cp-tail) "")) ; More ""'s`
			`(lp (cdr cp-tail)`
			`(cons (car cp-tail) rhead)`
			`(+ j 0)))`
* empty log message * 2000-09-26 10:35:26 -04:00
sunet can be used with scsh 0.6 now (hopefully). explicitly named every interface moved TOOTHLESS.SCM to MODULES.SCM removed macros from CONDITIONALS.SCM (UNLESS, WHEN, ?, SWITCH, SWITCHQ) thereby removed conditionals.scm and the modules SWITCH-SYNTAX and CONDHAX. 2001-08-20 07:31:03 -04:00			`((= j numsl) ; Win`
			`(values cscheme (simplify-uri-path (rev-append rhead p))))`
* empty log message * 2000-09-26 10:35:26 -04:00
sunet can be used with scsh 0.6 now (hopefully). explicitly named every interface moved TOOTHLESS.SCM to MODULES.SCM removed macros from CONDITIONALS.SCM (UNLESS, WHEN, ?, SWITCH, SWITCHQ) thereby removed conditionals.scm and the modules SWITCH-SYNTAX and CONDHAX. 2001-08-20 07:31:03 -04:00			`((pair? cp-tail) ; Keep looking.`
			`(lp (cdr cp-tail)`
			`(cons (car cp-tail) rhead)`
			`1))`
* empty log message * 2000-09-26 10:35:26 -04:00
sunet can be used with scsh 0.6 now (hopefully). explicitly named every interface moved TOOTHLESS.SCM to MODULES.SCM removed macros from CONDITIONALS.SCM (UNLESS, WHEN, ?, SWITCH, SWITCHQ) thereby removed conditionals.scm and the modules SWITCH-SYNTAX and CONDHAX. 2001-08-20 07:31:03 -04:00			`(else (values #f #f))))) ; Lose.`
* empty log message * 2000-09-26 10:35:26 -04:00

			`;; P doesn't begin with a slash.`
			`(values cscheme (simplify-uri-path`
			`(rev-append (cdr (reverse cp)) ; Drop non-dir part`
			`p)))))) ; and append P.`


			`(define (rev-append a b) ; (append (reverse a) b)`
			`(let rev-app ((a a) (b b)) ; Should be defined in a list-proc`
			`(if (pair? a) ; package, not here.`
			`(rev-app (cdr a) (cons (car a) b))`
			`b)))`

			`;;; Cribbed from scsh's fname.scm`

			`(define (split-uri-path uri start end) ; Split at /'s (infix grammar).`
			`(let split ((i start)) ; "" -> ("")`
sunet can be used with scsh 0.6 now (hopefully). explicitly named every interface moved TOOTHLESS.SCM to MODULES.SCM removed macros from CONDITIONALS.SCM (UNLESS, WHEN, ?, SWITCH, SWITCHQ) thereby removed conditionals.scm and the modules SWITCH-SYNTAX and CONDHAX. 2001-08-20 07:31:03 -04:00			`(cond`
			`((>= i end) '(""))`
			`((string-index uri #\/ i) =>`
			`(lambda (slash)`
			`(cons (substring uri i slash)`
			`(split (+ slash 1)))))`
			`(else (list (substring uri i end))))))`
* empty log message * 2000-09-26 10:35:26 -04:00

			`;;; The elements of PLIST must be escaped in case they contain slashes.`
			`;;; This procedure doesn't escape them for you; you must do that yourself:`
			`;;; (uri-path-list->path (map escape-uri pathlist))`

			`(define (uri-path-list->path plist)`
Fixed broken usage of string-join. 2001-10-08 13:33:13 -04:00			`(string-join plist "/")) ; Insert slashes between elts of PLIST.`
* empty log message * 2000-09-26 10:35:26 -04:00

* Documentation for uri.scm finished (except for resolve-uri-procedure as I can't figure out what this procedure is intended to do; someone out there who want to let me know?) * new simplify-uri-path written (as the old one did not seem to work correctly) 2001-05-20 14:57:31 -04:00			`;;; Remove . and <segment>/.. elements from path. The result is a`
			`;;; (maybe empty) list representing a path that does not contain "."`
			`;;; and ".." elements neither at the beginning nor somewhere else. I`
			`;;; tried to follow RFC2396 here. The procedure returns #f if the path`
			`;;; tries to back up past root (like "//.." or "/foo/../.."). "//" may`
			`;;; occur somewhere in the path but not being backed up. Usually,`
			`;;; relative paths are intended to be used with a base`
			`;;; url. Accordingly to RFC2396 (as I hope) relative paths are`
			`;;; considered not to start with "/". They are appended to a base`
			`;;; URL-path and then simplified. So before you start to simplify a`
			`;;; URL try to find out if it is a relative path (i.e. it does not`
			`;;; start with a "/").`

* finished adding syslogs to ftpd * trim-spaces doesn't throw exception on emtpy strings any more * spawn-thunk in FTPD moved to new procedure * always catching SOCKET->STRING errors * log-calls don't produce extra calculations anymore * consequently logging reply codes : ---------------------------------------------------------------------- 2001-07-13 13:21:39 -04:00			`(define (simplify-uri-path p)`
			`(if (null? p) #f ; P must be non-null`
* Documentation for uri.scm finished (except for resolve-uri-procedure as I can't figure out what this procedure is intended to do; someone out there who want to let me know?) * new simplify-uri-path written (as the old one did not seem to work correctly) 2001-05-20 14:57:31 -04:00			`(let lp ((path-list (cdr p))`
			`(stack (list (car p))))`
			`(if (null? path-list) ; we're done`
			`(reverse stack)`
			`(cond`
			`((string=? (car path-list) "..") ; back up`
			`; neither the empty path nor root`
			`(if (not (or (null? stack) (string=? (car stack) "")))`
			`(lp (cdr path-list) (cdr stack))`
			`#f))`
			`((string=? (car path-list) ".") ; leave this`
			`(lp (cdr path-list) stack))`
			`((string=? (car path-list) "") ; back to root`
			`(lp (cdr path-list) '("")))`
			`(else ; usual segment`
			`(lp (cdr path-list) (cons (car path-list) stack))))))))`

* finished adding syslogs to ftpd * trim-spaces doesn't throw exception on emtpy strings any more * spawn-thunk in FTPD moved to new procedure * always catching SOCKET->STRING errors * log-calls don't produce extra calculations anymore * consequently logging reply codes : ---------------------------------------------------------------------- 2001-07-13 13:21:39 -04:00