scsh-0.6/scheme/sort/vhsort.scm

118 lines
5.0 KiB
Scheme

;;; The SRFI-32 sort package -- vector heap sort -*- Scheme -*-
;;; Copyright (c) 2002 by Olin Shivers.
;;; This code is open-source; see the end of the file for porting and
;;; more copyright information.
;;; Olin Shivers 10/98.
;;; Exports:
;;; (heap-sort! elt< v [start end]) -> unspecified
;;; (heap-sort elt< v [start end]) -> vector
;;; Two key facts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; If a heap structure is embedded into a vector at indices [start,end), then:
;;; 1. The two children of index k are start + 2*(k-start) + 1 = k*2-start+1
;;; and start + 2*(k-start) + 2 = k*2-start+2.
;;;
;;; 2. The first index of a leaf node in the range [start,end) is
;;; first-leaf = floor[(start+end)/2]
;;; (You can deduce this from fact #1 above.)
;;; Any index before FIRST-LEAF is an internal node.
(define (really-heap-sort! elt< v start end)
;; Vector V contains a heap at indices [START,END). The heap is in heap
;; order in the range (I,END) -- i.e., every element in this range is >=
;; its children. Bubble HEAP[I] down into the heap to impose heap order on
;; the range [I,END).
(define (restore-heap! end i)
(let* ((vi (vector-ref v i))
(first-leaf (quotient (+ start end) 2)) ; Can fixnum overflow.
(final-k (let lp ((k i))
(if (>= k first-leaf)
k ; Leaf, so done.
(let* ((k*2-start (+ k (- k start))) ; Don't overflow.
(child1 (+ 1 k*2-start))
(child2 (+ 2 k*2-start))
(child1-val (vector-ref v child1)))
(receive (max-child max-child-val)
(if (< child2 end)
(let ((child2-val (vector-ref v child2)))
(if (elt< child2-val child1-val)
(values child1 child1-val)
(values child2 child2-val)))
(values child1 child1-val))
(cond ((elt< vi max-child-val)
(vector-set! v k max-child-val)
(lp max-child))
(else k)))))))) ; Done.
(vector-set! v final-k vi)))
;; Put the unsorted subvector V[start,end) into heap order.
(let ((first-leaf (quotient (+ start end) 2))) ; Can fixnum overflow.
(do ((i (- first-leaf 1) (- i 1)))
((< i start))
(restore-heap! end i)))
(do ((i (- end 1) (- i 1)))
((<= i start))
(let ((top (vector-ref v start)))
(vector-set! v start (vector-ref v i))
(vector-set! v i top)
(restore-heap! i start))))
;;; Here are the two exported interfaces.
(define (heap-sort! elt< v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(really-heap-sort! elt< v start end))))
(define (heap-sort elt< v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(let ((ans (vector-portion-copy v start end)))
(really-heap-sort! elt< ans 0 (- end start))
ans))))
;;; Notes on porting
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Bumming the code for speed
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; If you can use a module system to lock up the internal function
;;; REALLY-HEAP-SORT! so that it can only be called from HEAP-SORT and
;;; HEAP-SORT!, then you can hack the internal functions to run with no safety
;;; checks. The safety checks performed by the exported functions HEAP-SORT &
;;; HEAP-SORT! guarantee that there will be no type errors or array-indexing
;;; errors. In addition, with the exception of the two computations of
;;; FIRST-LEAF, all arithmetic will be fixnum arithmetic that never overflows
;;; into bignums, assuming your Scheme provides that you can't allocate an
;;; array so large you might need a bignum to index an element, which is
;;; definitely the case for every implementation with which I am familiar.
;;;
;;; If you want to code up the first-leaf = (quotient (+ s e) 2) computation
;;; so that it will never fixnum overflow when S & E are fixnums, you can do
;;; it this way:
;;; - compute floor(e/2), which throws away e's low-order bit.
;;; - add e's low-order bit to s, and divide that by two:
;;; floor[(s + e mod 2) / 2]
;;; - add these two parts together.
;;; giving you
;;; (+ (quotient e 2)
;;; (quotient (+ s (modulo e 2)) 2))
;;; If we know that e & s are fixnums, and that 0 <= s <= e, then this
;;; can only fixnum-overflow when s = e = max-fixnum. Note that the
;;; two divides and one modulo op can be done very quickly with two
;;; right-shifts and a bitwise and.
;;;
;;; I suspect there has never been a heapsort written in the history of
;;; the world in C that got this detail right.
;;;
;;; If your Scheme has a faster mechanism for handling optional arguments
;;; (e.g., Chez), you should definitely port over to it. Note that argument
;;; defaulting and error-checking are interleaved -- you don't have to
;;; error-check defaulted START/END args to see if they are fixnums that are
;;; legal vector indices for the corresponding vector, etc.