;;; Delimited readers
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; These procedures ran their inner I/O loop in a C primitive in
;;; earlier versions of scsh. In a multi-threaded environment this
;;; causes lots of trouble in case the operation would
;;; block. Therefore the current implementation runs in Scheme but
;;; operates directly on the buffer of the port for speed. This also
;;; allows us to implement the push-back behaviour without a peek/read
;;; pair.
;;;

;;; (read-delimited delims [port delim-action])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Returns a string or the EOF object. DELIM-ACTION determines what to do
;;; with the terminating delimiter:
;;; - PEEK
;;;   Leave it in the input stream for later reading.
;;; - TRIM (the default)
;;;   Drop it on the floor.
;;; - CONCAT
;;;   Append it to the returned string.
;;; - SPLIT
;;;   Return it as a second return value.
;;;
;;; We repeatedly allocate a buffer and fill it with READ-DELIMITED!
;;; until we hit a delimiter or EOF. Each time through the loop, we
;;; double the total buffer space, so the loop terminates with a log
;;; number of reads, but uses at most double the optimal buffer space.

(define (read-delimited delims . args)
  (let-optionals args ((port         (current-input-port))
		       (delim-action 'trim))
	(let ((substr (lambda (s end)		; Smart substring.
			(if (= end (string-length s)) s
			    (substring s 0 end))))
	      (delims (x->char-set delims))
	      (gobble? (not (eq? delim-action 'peek))))
      ;; BUFLEN is total amount of buffer space allocated to date.
      (let lp ((strs '()) (buflen 80) (buf (make-string 80)))
	(receive (terminator num-read)
	         (%read-delimited! delims buf gobble? port)
	  (if terminator

	      ;; We are done. NUM-READ is either a read count or EOF.
	      (let ((retval (if (and (zero? num-read)
				     (eof-object? terminator)
				     (null? strs))
				terminator		; EOF -- got nothing.

				;; Got something. Stick all the strings
				;; together, plus the terminator if the
				;; client said 'CONCAT.
				(let ((s (substr buf num-read)))
				  (cond ((and (eq? delim-action 'concat)
					      (char? terminator))
					 (apply string-append
						(reverse `(,(string terminator)
							   ,s . ,strs))))

					((null? strs) s)    ; Gratuitous opt.
					(else (apply string-append
						     (reverse (cons s strs)))))))))
		(if (eq? delim-action 'split)
		    (values retval terminator)
		    retval))

	      ;; We are not done. Loop and read in some more.
	      (lp (cons buf strs)
		  (+ buflen buflen)
		  (make-string buflen))))))))


;;; (read-delimited! delims buf [port delim-action start end])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Returns:
;;; - EOF if at end of file, and a non-zero read was requested.
;;; - Integer j if that many chars read into BUF.
;;; - #f if the buffer was filled w/o finding a delimiter.
;;;
;;; DELIM-ACTION determines what to do with the terminating delimiter;
;;; it is as in READ-DELIMITED.
;;;
;;; In determining the return value, there is an ambiguous case: when the 
;;; buffer is full, *and* the following char is a delimiter char or EOF.
;;; Ties are broken favoring termination over #f -- after filling the buffer,
;;; READ-DELIMITED! won't return #f until it has peeked one past the end
;;; of the buffer to ensure the next char doesn't terminate input (or is EOF).
;;; However, this rule is relaxed with delim-action = CONCAT -- if the buffer
;;; is full, READ-DELIMITED! won't wait around trying to peek at the following
;;; char to determine whether or not it is a delimiter char, since it doesn't
;;; have space to store the character anyway. It simply immediately returns #f;
;;; a following read can pick up the delimiter char.

(define (read-delimited! delims buf . args) ; [port delim-action start end]
  (let-optionals args ((port         (current-input-port))
		       (delim-action 'trim)
		       (start        0)
		       (end          (string-length buf)))
    (receive (terminator num-read)
	     (%read-delimited! delims buf
			       (not (eq? delim-action 'peek)) ;Gobble delim?
			       port
			       start
			       (if (eq? delim-action 'concat)
				   (- end 1) ; Room for terminator.
				   end))

      (if terminator	; Check for buffer overflow.
	  (let ((retval (if (and (zero? num-read)
				 (eof-object? terminator))
			    terminator	; EOF -- got nothing.
			    num-read))) ; Got something.

	    (case delim-action
	      ((peek trim)	retval)
	      ((split)	(values retval terminator))
	      ((concat)	(cond ((char? terminator)
			       (string-set! buf (+ start num-read) terminator)
			       (+ num-read 1))
			      (else retval)))))

	  ;; Buffer overflow.
	  (case delim-action
	    ((peek trim) #f)
	    ((split)     (values #f #f))
	    ((concat)    (let ((last (read-char port)))
			   (if (char? last)
			       (string-set! buf (+ start num-read) last))
			   (and (or (eof-object? last)
				    (char-set-contains? (x->char-set delims)
							last))
				(+ num-read 1)))))))))
		  

;;; (%read-delimited! delims buf gobble? [port start end])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This low-level routine uses a different interface. It returns two values:
;;; - TERMINATOR: A value describing why the read was terminated:
;;;   + character or eof-object => read terminated by this value; 
;;;   + #f                      => filled buffer w/o terminating read.
;;; - NUM-READ: Number of chars read into buf.
;;; 
;;; Note:
;;; - Invariant: TERMINATOR = #f  =>  NUM-READ = END - START.
;;; - Invariant: TERMINATOR = eof-object and NUM-READ = 0 => at EOF.
;;; - When determining the TERMINATOR return value, ties are broken
;;;   favoring character or the eof-object over #f. That is, if the buffer
;;;   fills up, %READ-DELIMITED! will peek at one more character from the
;;;   input stream to determine if it terminates the input. If so, that
;;;   is returned, not #f.
;;;
;;; If GOBBLE? is true, then a terminator character is removed from
;;; the input stream. Otherwise, it is left in place for a following input
;;; operation.


(define (port-buffer-read-delimited delims buf gobble? port start end)
  (obtain-port-lock port)
  (let ((the-port-limit (port-limit port)))
    (let lp ((i start) (lp-port-index (port-index port))) 
      (cond ((port-pending-eof? port)
	     (set-port-index! port lp-port-index)
	     (release-port-lock port)
	     (values (eof-object) (- i start)))
	    ((>= i end)
	     (set-port-index! port lp-port-index)
	     (release-port-lock port)
	     (values #f (- i start)))
	    ((< lp-port-index the-port-limit)
	     (let ((the-read-char 
		    (ascii->char (byte-vector-ref 
				  (port-buffer port) lp-port-index))))
	       (if (char-set-contains? delims the-read-char)
		   (begin 
		     (if gobble? 
			 (set-port-index! port (+ lp-port-index 1))
			 (set-port-index! port lp-port-index))
		     (release-port-lock port)
		     (values the-read-char (- i start)))
		   (begin
		     (string-set! buf i the-read-char)
		     (lp (+ i 1) (+ lp-port-index 1))))))
	    (else  (set-port-index! port 0)
		   (set-port-limit! port 0)
		   (release-port-lock port)
		   (values 'port-buffer-exhausted (- i start)))))))
		 


(define (%read-delimited! delims buf gobble? . args)
  (let-optionals args ((port  (current-input-port))
		       (start 0)
		       (end   (string-length buf)))

    (let ((delims (x->char-set delims)))
      (let lp ((start start) (total 0))
	(receive (terminator num-read) 
	    (port-buffer-read-delimited delims buf gobble? port start end)
	  (if (not (eq? terminator 'port-buffer-exhausted))
	      (values terminator (+ num-read total))
	      (begin (peek-char port)	; kludge to fill the buffer
		     (lp (+ start num-read) (+ total num-read)))))))))
			  


; overwrites port-index :-(
(define (push-back port char)
  (byte-vector-set! (port-buffer port) (port-index port) (char->ascii char))
  (set-port-limit! port (+ (port-limit port) 1)))


(define (skip-char-set skip-chars . maybe-port)
  (let ((port (:optional maybe-port (current-input-port)))
	(cset (x->char-set skip-chars)))
    
    (let lp ((total 0))
      (receive (succ num-read) (buffer-skip-char-set cset port)
	(if (not succ)
	    (+ total num-read)		; eof
	    (begin (peek-char port)	; kludge to fill the buffer
		   (lp  (+ total num-read))))))))

(define (buffer-skip-char-set cset port)
  (let ((the-port-limit (port-limit port)))
    (let lp ((lp-port-index (port-index port)) (i 0))
      (cond ((port-pending-eof? port)
	     (set-port-index! port lp-port-index)
	     (values #f i))
	    ((< lp-port-index the-port-limit)
	     (let ((the-read-char 
		    (ascii->char (byte-vector-ref 
				  (port-buffer port) lp-port-index))))
	       (cond ((char-set-contains? cset the-read-char)
		      (lp (+ lp-port-index 1) (+ i 1)))
		     (else 
		      (set-port-index! port lp-port-index)
		      (values #f i)))))
	    (else (set-port-index! port 0)
		  (set-port-limit! port 0)
		  (values 'port-buffer-exhausted i))))))

;;; (read-line [port delim-action])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Read in a line of data. Input is terminated by either a newline or EOF.
;;; The newline is trimmed from the string by default.

(define charset:newline (char-set #\newline))

(define (read-line . rest) (apply read-delimited charset:newline rest))


;;; (read-paragraph [port handle-delim])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define blank-line-regexp (rx bos (* white) #\newline eos))

(define (read-paragraph . args)
  (let-optionals args ((port         (current-input-port))
		       (handle-delim 'trim))
    ;; First, skip all blank lines.
    (let lp ()
      (let ((line (read-line port 'concat)))
	(cond ((eof-object? line)
	       (if (eq? handle-delim 'split) (values line line) line))

	      ((regexp-search? blank-line-regexp line) (lp))

	      ;; Then, read in non-blank lines.
	      (else
	       (let lp ((lines (list line)))
		 (let ((line (read-line port 'concat)))
		   (if (and (string? line)
			    (not (regexp-search? blank-line-regexp line)))

		       (lp (cons line lines))

		       ;; Return the paragraph
		       (let ((->str (lambda (lns) (apply string-append (reverse lns)))))
			 (case handle-delim
			   ((trim) (->str lines))

			   ((concat)
			    (->str (if (eof-object? line) lines (cons line lines))))

			   ((split)
			    (values (->str lines) line))

			   (else (error "Illegal HANDLE-DELIM parameter to READ-PARAGRAPH")))))))))))))