;;; -*- Scheme -*-

;;; This file is part of the Scheme Untergrund Networking package.

;;; Copyright (c) 1995 by Olin Shivers.
;;; For copyright information, see the file COPYING which comes with
;;; the distribution.

;;; URI syntax -- [scheme] : path [? search ] [# fragmentid]

;;; References:
;;; - http://www.w3.org/Addressing/rfc1630.txt
;;;   Original RFC
;;; - http://www.w3.org/hypertext/WWW/Addressing/URL/URI_Overview.html
;;;   General Web page of URI pointers.

(define uri-reserved (string->char-set ";/#?: ="))

(define uri-reserved-sans-= (char-set-delete uri-reserved #\=))

(define (parse-uri s)
  (let* ((slen (string-length s))
	 ;; Search forwards for colon (or intervening reserved char).
	 (rs1 (string-index s uri-reserved))	; 1st reserved char
	 (colon (and rs1 (char=? (string-ref s rs1) #\:) rs1))
	 (path-start (if colon (+ colon 1) 0))

	 ;; Search backwards for # (or intervening reserved char).
	 (rs-last (string-index-right s uri-reserved))
	 (sharp (and rs-last (char=? (string-ref s rs-last) #\#) rs-last))

	 ;; Search backwards for ? (or intervening reserved char).
	 ;; (NB: #\= may be after #\? and before #\#)
	 (rs-penult (string-index-right s                      
					uri-reserved-sans-=
					path-start
					(or sharp slen)))
	 (ques (and rs-penult (char=? (string-ref s rs-penult) #\?) rs-penult))

	 (path-end (or ques sharp slen)))
    (values (and colon (substring s 0 colon))
	    (split-uri s path-start path-end)
	    (and ques (substring s (+ ques 1) (or sharp slen)))
	    (and sharp (substring s (+ sharp 1) slen)))))

;;; Caution:
;;; Don't use this proc until *after* you've parsed the URL -- unescaping
;;; might introduce reserved chars (like slashes and colons) that could
;;; blow your parse.

(define (unescape-uri s . maybe-start/end)
  (let-optionals maybe-start/end ((start 0)
				  (end (string-length s)))
    (let* ((esc-seq? (lambda (i) (and (< (+ i 2) end)
				      (char=? (string-ref s i) #\%)
				      (hex-digit? (string-ref s (+ i 1)))
				      (hex-digit? (string-ref s (+ i 2))))))
	   (hits (let lp ((i start) (hits 0)) ; count # of esc seqs.
		   (if (< i end)
		       (if (esc-seq? i)
			   (lp (+ i 3) (+ hits 1))
			   (lp (+ i 1) hits))
		       hits))))
	 
      (if (and (zero? hits) (zero? start) (= end (string-length s)))
	  s
	  (let* ((nlen (- (- end start) (* hits 2))) ; the new length
						     ; of the
						     ; unescaped
						     ; string stores
						     ; the result
		 (ns (make-string nlen)))

	    (let lp ((i start) (j 0))	; sweep over the string
	      (if (< j nlen)
		  (lp (cond 
		       ((esc-seq? i)	; unescape
					; escape-sequence
			(string-set! ns j
				     (let ((d1 (string-ref s (+ i 1)))
					   (d2 (string-ref s (+ i 2))))
				       (ascii->char (+ (* 16 (hexchar->int d1))
						       (hexchar->int d2)))))
			(+ i 3))
		       (else (string-set! ns j (string-ref s i))  
			     (+ i 1)))
		      (+ j 1))))
	    ns)))))

(define hex-digit?
  (let ((hex-digits (string->char-set "0123456789abcdefABCDEF")))
    (lambda (c) (char-set-contains? hex-digits c))))

; make use of the fact that numbers and characters are in order in the ascii table
(define (hexchar->int c)      
  (- (char->ascii c) 
     (if (char-numeric? c)
	 (char->ascii #\0)         
	 (- (if (char-upper-case? c)
		(char->ascii #\A)
		(char->ascii #\a))
	    10))))

(define int->hexchar
  (let ((table '#(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
		  #\A #\B #\C #\D #\E #\F)))
    (lambda (i) (vector-ref table i))))
  

;;; Caution:
;;; All reserved chars (e.g., slash, sharp, colon) get escaped: "=;/#?: "
;;; So don't apply this proc to chunks of text with syntactically meaningful
;;; reserved chars (e.g., paths with URI slashes or colons) -- they'll be 
;;; escaped, and lose their special meaning. E.g. it would be a mistake
;;; to apply ESCAPE-URI to "//lcs.mit.edu:8001/foo/bar.html" because the
;;; slashes and colons would be escaped.

(define uri-escaped-chars
  (char-set-complement (char-set-union char-set:letter+digit
				       (string->char-set "$-_@.&!*\"'(),+"))))

;;; Takes a set of chars to escape. This is because we sometimes need to
;;; escape larger sets of chars for different parts of a URI.

(define (escape-uri s . maybe-escaped-chars)
  (let-optionals maybe-escaped-chars ((escaped-chars uri-escaped-chars))
    (let ((nlen (string-fold
		 (lambda (c i)
		   (+ i
		      (if (char-set-contains? escaped-chars c)
			  3
			  1)))
		 0
		 s)))    ; new length of escaped string
      (if (= nlen (string-length s))
	  s
	  (let ((ns (make-string nlen))) 
	    (string-fold
	     (lambda (c i)     ; replace each occurance of an
			       ; character to escape with %ff where ff
			       ; is the ascii-code in hexadecimal
			       ; notation
	       (+ i (cond 
		     ((char-set-contains? escaped-chars c)
		      (string-set! ns i #\%)
		      (let* ((d (char->ascii c))
			     (dhi (bitwise-and (arithmetic-shift d -4) #xF))
			     (dlo (bitwise-and d #xF)))
			(string-set! ns (+ i 1)
				     (int->hexchar dhi))
			(string-set! ns (+ i 2)
				     (int->hexchar dlo)))
		      3)
		     (else (string-set! ns i c)
			   1))))
	     0
	     s)
	    ns)))))

;;; Cribbed from scsh's fname.scm

(define (split-uri uri start end)		; Split at /'s (infix grammar).
  (let split ((i start))			; "" -> ("")
    (cond
     ((>= i end) '(""))
     ((string-index uri #\/ i) =>
      (lambda (slash)
	(cons (substring uri i slash)
	      (split (+ slash 1)))))
     (else (list (substring uri i end))))))


;;; The elements of PLIST must be escaped in case they contain slashes.
;;; This procedure doesn't escape them for you; you must do that yourself:
;;;     (uri-path->uri (map escape-uri pathlist))

(define (uri-path->uri plist)
  (string-join plist "/"))		; Insert slashes between elts of PLIST.

(define (simplify-uri-path p)    
  (if (null? p)
      #f				; P must be non-null
      (let lp ((path-list (cdr p))
	       (stack (list (car p))))
	(if (null? path-list)		; we're done
	    (reverse stack)
	    (cond
	     ((string=? (car path-list) "..") ; back up
					; neither the empty path nor root
	      (if (not (or (null? stack) (string=? (car stack) "")))  
		  (lp (cdr path-list) (cdr stack))
		  #f))
	     ((string=? (car path-list) ".") ; leave this
	      (lp (cdr path-list) stack))
	     ((string=? (car path-list) "") ; back to root
	      (lp (cdr path-list) '("")))
	     (else			; usual segment
	      (lp (cdr path-list) (cons (car path-list) stack))))))))