167 lines
6.4 KiB
Scheme
167 lines
6.4 KiB
Scheme
;;; Copyright (c) 1994 by Olin Shivers
|
|
|
|
;;; String collectors
|
|
;;; ===========================================================================
|
|
;;; string-colllector
|
|
;;; (make-string-collector)
|
|
;;; (collect-string! SC S)
|
|
;;; (clear-string-collector! SC)
|
|
;;; (string-collector->string SC)
|
|
;;;
|
|
;;; A string collector is a data structure that reduces the overhead of
|
|
;;; accumulating a large string in bits and pieces. It is basically a
|
|
;;; "chunk list," where a chunk is a string of at least 128 chars. In this
|
|
;;; way, the list overhead is kept under 2% of the whole data structure.
|
|
;;; When a new string is added to the collection, it is added to the current
|
|
;;; chunk. When the chunk reaches 128 chars, it is added to the chunk list,
|
|
;;; and a new chunk is started. If a large string is added to the collection,
|
|
;;; it is added as a chunk itself, so large strings are not split into small
|
|
;;; pieces. (Actually, a *copy* of the original large string is saved as a
|
|
;;; single chunk, so the collector's chunks are not shared with client data.)
|
|
;;;
|
|
;;; MAKE-STRING-COLLECTOR allocates a new string collector data structure.
|
|
;;; COLLECT-STRING! appends a string to the current collection.
|
|
;;; CLEAR-STRING-COLLECTOR! clears out accumulated strings from a collector.
|
|
;;; STRING-COLLECTOR->STRING converts a collector into a contiguous string.
|
|
;;;
|
|
;;; This facility makes it reasonably efficient to accumulate strings
|
|
;;; of any size in increments of any size.
|
|
|
|
(define-record string-collector
|
|
(len 0) ; How many chars have we accumulated?
|
|
(chunks '()) ; The chunk list.
|
|
(chunk #f) ; The current chunk being filled, if any.
|
|
(chunk-left 0)) ; How many chars left to fill in the current chunk.
|
|
|
|
(define (clear-string-collector! sc)
|
|
(set-string-collector:len sc 0)
|
|
(set-string-collector:chunks sc '())
|
|
(set-string-collector:chunk sc #f)
|
|
sc)
|
|
|
|
;;; (COLLECT-STRING! sc s)
|
|
;;; ----------------------
|
|
;;; S is a string. Append it to the string being collected in the
|
|
;;; string-collector SC.
|
|
;;;
|
|
;;; The algorithm:
|
|
;;; First, do nothing if S is the empty string. Otherwise:
|
|
;;; If there is a current chunk:
|
|
;;; Copy characters from S into it.
|
|
;;; If we filled up the chunk
|
|
;;; Put the chunk on the chunk list.
|
|
;;; Look at the remaining chars from S we haven't copied yet.
|
|
;;; If there a lot of characters left (>= 128)
|
|
;;; Save them as a single chunk on the chunk list.
|
|
;;; No current chunk.
|
|
;;; Else if there a just a few characters left (> 0, < 128)
|
|
;;; Start a new current chunk, copy the chars left into it.
|
|
;;; Else if there aren't any characters left
|
|
;;; No current chunk.
|
|
;;;
|
|
;;; If there is no current chunk:
|
|
;;; If there are a lot of characters in S (>= 128)
|
|
;;; Save a copy of S as a single chunk on the chunk list.
|
|
;;; Still no current chunk.
|
|
;;; Else if there are a few characters in S (> 0, < 128)
|
|
;;; Start a new current chunk, copy the S into it.
|
|
|
|
(define (collect-string! sc s)
|
|
(let ((slen (string-length s))
|
|
(chunk (string-collector:chunk sc))
|
|
(chunk-left (string-collector:chunk-left sc))
|
|
|
|
;; Add the chunk C to the collector's chunk list.
|
|
(push-chunk! (lambda (c)
|
|
(set-string-collector:chunks sc
|
|
(cons c (string-collector:chunks sc)))))
|
|
|
|
;; Copy nchars characters from src[j] to dest[i]
|
|
;; *Way* too much bounds checking going on in this loop.
|
|
(copy-substring! (lambda (dest i src j nchars)
|
|
(do ((i i (+ i 1))
|
|
(j j (+ j 1))
|
|
(nchars nchars (- nchars 1)))
|
|
((zero? nchars))
|
|
(string-set! dest i (string-ref src j))))))
|
|
|
|
(cond ((zero? slen)) ; Empty string, do nothing.
|
|
(chunk
|
|
(let ((ncopy (min slen chunk-left)))
|
|
(copy-substring! chunk (- 128 chunk-left) s 0 ncopy)
|
|
(if (> chunk-left slen)
|
|
(set-string-collector:chunk-left sc (- chunk-left slen))
|
|
;; Current chunk is full.
|
|
(let ((s-left (- slen chunk-left)))
|
|
(push-chunk! chunk) ; Push the current chunk.
|
|
;; Handle remaining chars from S that weren't copied into
|
|
;; the current chunk we just pushed:
|
|
(cond ((>= s-left 128)
|
|
;; A lot more chars left. Push them as one chunk.
|
|
(push-chunk! (substring s chunk-left slen))
|
|
(set-string-collector:chunk sc #f))
|
|
((> s-left 0)
|
|
;; A few more chars left. Start a new chunk.
|
|
(let ((new-chunk (make-string 128)))
|
|
(copy-substring! new-chunk 0 s chunk-left s-left)
|
|
(set-string-collector:chunk sc new-chunk)
|
|
(set-string-collector:chunk-left sc
|
|
(- 128 s-left))))
|
|
;; No more chars left. No current chunk.
|
|
(else (set-string-collector:chunk sc #f)))))))
|
|
|
|
(else ; No current chunk.
|
|
(if (>= slen 128) ; How many chars is S?
|
|
(push-chunk! (string-copy s)) ; A lot. Push as one chunk.
|
|
(let ((chunk (make-string 128))) ; Not many. Start a new chunk.
|
|
(set-string-collector:chunk sc chunk)
|
|
(copy-substring! chunk 0 s 0 slen)
|
|
(set-string-collector:chunk-left sc (- 128 slen))))))
|
|
|
|
;; We don't actually do anything with this, but we keep it updated anyway.
|
|
(set-string-collector:len sc (+ (string-collector:len sc) slen))
|
|
sc))
|
|
|
|
|
|
;;; A bummed version for collecting a single character.
|
|
|
|
(define (collect-char! sc c)
|
|
(let ((chunk (string-collector:chunk sc))
|
|
(chunk-left (string-collector:chunk-left sc)))
|
|
|
|
(cond (chunk
|
|
(string-set! chunk (- 128 chunk-left) c)
|
|
(cond ((> chunk-left 1)
|
|
(set-string-collector:chunk-left (- chunk-left 1)))
|
|
(else
|
|
(set-string-collector:chunks sc
|
|
(cons chunk (string-collector:chunks sc)))
|
|
(set-string-collector:chunk sc #f))))
|
|
(else
|
|
(let ((new-chunk (make-string 128 c)))
|
|
(set-string-collector:chunk-left 127)
|
|
(set-string-collector:chunk sc new-chunk)))))
|
|
|
|
;; We don't actually do anything with this, but we keep it updated anyway.
|
|
(set-string-collector:len sc (+ (string-collector:len sc) 1))
|
|
sc)
|
|
|
|
|
|
;;; Convert the data in the string-collector SC to a single contiguous
|
|
;;; string and return it.
|
|
|
|
(define (string-collector->string sc)
|
|
(let ((chunk (string-collector:chunk sc))
|
|
(chunks (string-collector:chunks sc)))
|
|
(apply string-append
|
|
(reverse (if chunk
|
|
(cons (substring chunk 0
|
|
(- 128
|
|
(string-collector:chunk-left sc)))
|
|
chunks)
|
|
chunks)))))
|
|
|
|
;;; It's too bad we can't side-effect the string-collector's chunk list
|
|
;;; to be a single chunk after this coalescing operation, but we don't
|
|
;;; want to share the string we return -- the user might side-effect it.
|