289 lines
10 KiB
Scheme
289 lines
10 KiB
Scheme
|
;;; Delimited readers
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; These procedures ran their inner I/O loop in a C primitive in
|
||
|
;;; earlier versions of scsh. In a multi-threaded environment this
|
||
|
;;; causes lots of trouble in case the operation would
|
||
|
;;; block. Therefore the current implementation runs in Scheme but
|
||
|
;;; operates directly on the buffer of the port for speed. This also
|
||
|
;;; allows us to implement the push-back behaviour without a peek/read
|
||
|
;;; pair.
|
||
|
;;;
|
||
|
|
||
|
;;; (read-delimited delims [port delim-action])
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; Returns a string or the EOF object. DELIM-ACTION determines what to do
|
||
|
;;; with the terminating delimiter:
|
||
|
;;; - PEEK
|
||
|
;;; Leave it in the input stream for later reading.
|
||
|
;;; - TRIM (the default)
|
||
|
;;; Drop it on the floor.
|
||
|
;;; - CONCAT
|
||
|
;;; Append it to the returned string.
|
||
|
;;; - SPLIT
|
||
|
;;; Return it as a second return value.
|
||
|
;;;
|
||
|
;;; We repeatedly allocate a buffer and fill it with READ-DELIMITED!
|
||
|
;;; until we hit a delimiter or EOF. Each time through the loop, we
|
||
|
;;; double the total buffer space, so the loop terminates with a log
|
||
|
;;; number of reads, but uses at most double the optimal buffer space.
|
||
|
|
||
|
(define (read-delimited delims . args)
|
||
|
(let-optionals args ((port (current-input-port))
|
||
|
(delim-action 'trim))
|
||
|
(let ((substr (lambda (s end) ; Smart substring.
|
||
|
(if (= end (string-length s)) s
|
||
|
(substring s 0 end))))
|
||
|
(delims (x->char-set delims))
|
||
|
(gobble? (not (eq? delim-action 'peek))))
|
||
|
;; BUFLEN is total amount of buffer space allocated to date.
|
||
|
(let lp ((strs '()) (buflen 80) (buf (make-string 80)))
|
||
|
(receive (terminator num-read)
|
||
|
(%read-delimited! delims buf gobble? port)
|
||
|
(if terminator
|
||
|
|
||
|
;; We are done. NUM-READ is either a read count or EOF.
|
||
|
(let ((retval (if (and (zero? num-read)
|
||
|
(eof-object? terminator)
|
||
|
(null? strs))
|
||
|
terminator ; EOF -- got nothing.
|
||
|
|
||
|
;; Got something. Stick all the strings
|
||
|
;; together, plus the terminator if the
|
||
|
;; client said 'CONCAT.
|
||
|
(let ((s (substr buf num-read)))
|
||
|
(cond ((and (eq? delim-action 'concat)
|
||
|
(char? terminator))
|
||
|
(apply string-append
|
||
|
(reverse `(,(string terminator)
|
||
|
,s . ,strs))))
|
||
|
|
||
|
((null? strs) s) ; Gratuitous opt.
|
||
|
(else (apply string-append
|
||
|
(reverse (cons s strs)))))))))
|
||
|
(if (eq? delim-action 'split)
|
||
|
(values retval terminator)
|
||
|
retval))
|
||
|
|
||
|
;; We are not done. Loop and read in some more.
|
||
|
(lp (cons buf strs)
|
||
|
(+ buflen buflen)
|
||
|
(make-string buflen))))))))
|
||
|
|
||
|
|
||
|
;;; (read-delimited! delims buf [port delim-action start end])
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; Returns:
|
||
|
;;; - EOF if at end of file, and a non-zero read was requested.
|
||
|
;;; - Integer j if that many chars read into BUF.
|
||
|
;;; - #f if the buffer was filled w/o finding a delimiter.
|
||
|
;;;
|
||
|
;;; DELIM-ACTION determines what to do with the terminating delimiter;
|
||
|
;;; it is as in READ-DELIMITED.
|
||
|
;;;
|
||
|
;;; In determining the return value, there is an ambiguous case: when the
|
||
|
;;; buffer is full, *and* the following char is a delimiter char or EOF.
|
||
|
;;; Ties are broken favoring termination over #f -- after filling the buffer,
|
||
|
;;; READ-DELIMITED! won't return #f until it has peeked one past the end
|
||
|
;;; of the buffer to ensure the next char doesn't terminate input (or is EOF).
|
||
|
;;; However, this rule is relaxed with delim-action = CONCAT -- if the buffer
|
||
|
;;; is full, READ-DELIMITED! won't wait around trying to peek at the following
|
||
|
;;; char to determine whether or not it is a delimiter char, since it doesn't
|
||
|
;;; have space to store the character anyway. It simply immediately returns #f;
|
||
|
;;; a following read can pick up the delimiter char.
|
||
|
|
||
|
(define (read-delimited! delims buf . args) ; [port delim-action start end]
|
||
|
(let-optionals args ((port (current-input-port))
|
||
|
(delim-action 'trim)
|
||
|
(start 0)
|
||
|
(end (string-length buf)))
|
||
|
(receive (terminator num-read)
|
||
|
(%read-delimited! delims buf
|
||
|
(not (eq? delim-action 'peek)) ;Gobble delim?
|
||
|
port
|
||
|
start
|
||
|
(if (eq? delim-action 'concat)
|
||
|
(- end 1) ; Room for terminator.
|
||
|
end))
|
||
|
|
||
|
(if terminator ; Check for buffer overflow.
|
||
|
(let ((retval (if (and (zero? num-read)
|
||
|
(eof-object? terminator))
|
||
|
terminator ; EOF -- got nothing.
|
||
|
num-read))) ; Got something.
|
||
|
|
||
|
(case delim-action
|
||
|
((peek trim) retval)
|
||
|
((split) (values retval terminator))
|
||
|
((concat) (cond ((char? terminator)
|
||
|
(string-set! buf (+ start num-read) terminator)
|
||
|
(+ num-read 1))
|
||
|
(else retval)))))
|
||
|
|
||
|
;; Buffer overflow.
|
||
|
(case delim-action
|
||
|
((peek trim) #f)
|
||
|
((split) (values #f #f))
|
||
|
((concat) (let ((last (read-char port)))
|
||
|
(if (char? last)
|
||
|
(string-set! buf (+ start num-read) last))
|
||
|
(and (or (eof-object? last)
|
||
|
(char-set-contains? (x->char-set delims)
|
||
|
last))
|
||
|
(+ num-read 1)))))))))
|
||
|
|
||
|
|
||
|
;;; (%read-delimited! delims buf gobble? [port start end])
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; This low-level routine uses a different interface. It returns two values:
|
||
|
;;; - TERMINATOR: A value describing why the read was terminated:
|
||
|
;;; + character or eof-object => read terminated by this value;
|
||
|
;;; + #f => filled buffer w/o terminating read.
|
||
|
;;; - NUM-READ: Number of chars read into buf.
|
||
|
;;;
|
||
|
;;; Note:
|
||
|
;;; - Invariant: TERMINATOR = #f => NUM-READ = END - START.
|
||
|
;;; - Invariant: TERMINATOR = eof-object and NUM-READ = 0 => at EOF.
|
||
|
;;; - When determining the TERMINATOR return value, ties are broken
|
||
|
;;; favoring character or the eof-object over #f. That is, if the buffer
|
||
|
;;; fills up, %READ-DELIMITED! will peek at one more character from the
|
||
|
;;; input stream to determine if it terminates the input. If so, that
|
||
|
;;; is returned, not #f.
|
||
|
;;;
|
||
|
;;; If GOBBLE? is true, then a terminator character is removed from
|
||
|
;;; the input stream. Otherwise, it is left in place for a following input
|
||
|
;;; operation.
|
||
|
|
||
|
|
||
|
(define (port-buffer-read-delimited delims buf gobble? port start end)
|
||
|
(obtain-port-lock port)
|
||
|
(let ((the-port-limit (port-limit port)))
|
||
|
(let lp ((i start) (lp-port-index (port-index port)))
|
||
|
(cond ((port-pending-eof? port)
|
||
|
(set-port-index! port lp-port-index)
|
||
|
(release-port-lock port)
|
||
|
(values (eof-object) (- i start)))
|
||
|
((>= i end)
|
||
|
(set-port-index! port lp-port-index)
|
||
|
(release-port-lock port)
|
||
|
(values #f (- i start)))
|
||
|
((< lp-port-index the-port-limit)
|
||
|
(let ((the-read-char
|
||
|
(ascii->char (byte-vector-ref
|
||
|
(port-buffer port) lp-port-index))))
|
||
|
(if (char-set-contains? delims the-read-char)
|
||
|
(begin
|
||
|
(if gobble?
|
||
|
(set-port-index! port (+ lp-port-index 1))
|
||
|
(set-port-index! port lp-port-index))
|
||
|
(release-port-lock port)
|
||
|
(values the-read-char (- i start)))
|
||
|
(begin
|
||
|
(string-set! buf i the-read-char)
|
||
|
(lp (+ i 1) (+ lp-port-index 1))))))
|
||
|
(else (set-port-index! port 0)
|
||
|
(set-port-limit! port 0)
|
||
|
(release-port-lock port)
|
||
|
(values 'port-buffer-exhausted (- i start)))))))
|
||
|
|
||
|
|
||
|
|
||
|
(define (%read-delimited! delims buf gobble? . args)
|
||
|
(let-optionals args ((port (current-input-port))
|
||
|
(start 0)
|
||
|
(end (string-length buf)))
|
||
|
|
||
|
(let ((delims (x->char-set delims)))
|
||
|
(let lp ((start start) (total 0))
|
||
|
(receive (terminator num-read)
|
||
|
(port-buffer-read-delimited delims buf gobble? port start end)
|
||
|
(if (not (eq? terminator 'port-buffer-exhausted))
|
||
|
(values terminator (+ num-read total))
|
||
|
(begin (peek-char port) ; kludge to fill the buffer
|
||
|
(lp (+ start num-read) (+ total num-read)))))))))
|
||
|
|
||
|
|
||
|
|
||
|
; overwrites port-index :-(
|
||
|
(define (push-back port char)
|
||
|
(byte-vector-set! (port-buffer port) (port-index port) (char->ascii char))
|
||
|
(set-port-limit! port (+ (port-limit port) 1)))
|
||
|
|
||
|
|
||
|
(define (skip-char-set skip-chars . maybe-port)
|
||
|
(let ((port (:optional maybe-port (current-input-port)))
|
||
|
(cset (x->char-set skip-chars)))
|
||
|
|
||
|
(let lp ((total 0))
|
||
|
(receive (succ num-read) (buffer-skip-char-set cset port)
|
||
|
(if (not succ)
|
||
|
(+ total num-read) ; eof
|
||
|
(begin (peek-char port) ; kludge to fill the buffer
|
||
|
(lp (+ total num-read))))))))
|
||
|
|
||
|
(define (buffer-skip-char-set cset port)
|
||
|
(let ((the-port-limit (port-limit port)))
|
||
|
(let lp ((lp-port-index (port-index port)) (i 0))
|
||
|
(cond ((port-pending-eof? port)
|
||
|
(set-port-index! port lp-port-index)
|
||
|
(values #f i))
|
||
|
((< lp-port-index the-port-limit)
|
||
|
(let ((the-read-char
|
||
|
(ascii->char (byte-vector-ref
|
||
|
(port-buffer port) lp-port-index))))
|
||
|
(cond ((char-set-contains? cset the-read-char)
|
||
|
(lp (+ lp-port-index 1) (+ i 1)))
|
||
|
(else
|
||
|
(set-port-index! port lp-port-index)
|
||
|
(values #f i)))))
|
||
|
(else (set-port-index! port 0)
|
||
|
(set-port-limit! port 0)
|
||
|
(values 'port-buffer-exhausted i))))))
|
||
|
|
||
|
;;; (read-line [port delim-action])
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
;;; Read in a line of data. Input is terminated by either a newline or EOF.
|
||
|
;;; The newline is trimmed from the string by default.
|
||
|
|
||
|
(define charset:newline (char-set #\newline))
|
||
|
|
||
|
(define (read-line . rest) (apply read-delimited charset:newline rest))
|
||
|
|
||
|
|
||
|
;;; (read-paragraph [port handle-delim])
|
||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
|
||
|
(define blank-line-regexp (rx bos (* white) #\newline eos))
|
||
|
|
||
|
(define (read-paragraph . args)
|
||
|
(let-optionals args ((port (current-input-port))
|
||
|
(handle-delim 'trim))
|
||
|
;; First, skip all blank lines.
|
||
|
(let lp ()
|
||
|
(let ((line (read-line port 'concat)))
|
||
|
(cond ((eof-object? line)
|
||
|
(if (eq? handle-delim 'split) (values line line) line))
|
||
|
|
||
|
((regexp-search? blank-line-regexp line) (lp))
|
||
|
|
||
|
;; Then, read in non-blank lines.
|
||
|
(else
|
||
|
(let lp ((lines (list line)))
|
||
|
(let ((line (read-line port 'concat)))
|
||
|
(if (and (string? line)
|
||
|
(not (regexp-search? blank-line-regexp line)))
|
||
|
|
||
|
(lp (cons line lines))
|
||
|
|
||
|
;; Return the paragraph
|
||
|
(let ((->str (lambda (lns) (apply string-append (reverse lns)))))
|
||
|
(case handle-delim
|
||
|
((trim) (->str lines))
|
||
|
|
||
|
((concat)
|
||
|
(->str (if (eof-object? line) lines (cons line lines))))
|
||
|
|
||
|
((split)
|
||
|
(values (->str lines) line))
|
||
|
|
||
|
(else (error "Illegal HANDLE-DELIM parameter to READ-PARAGRAPH")))))))))))))
|