118 lines
5.0 KiB
Scheme
118 lines
5.0 KiB
Scheme
;;; The SRFI-32 sort package -- vector heap sort -*- Scheme -*-
|
|
;;; Copyright (c) 2002 by Olin Shivers.
|
|
;;; This code is open-source; see the end of the file for porting and
|
|
;;; more copyright information.
|
|
;;; Olin Shivers 10/98.
|
|
|
|
;;; Exports:
|
|
;;; (heap-sort! elt< v [start end]) -> unspecified
|
|
;;; (heap-sort elt< v [start end]) -> vector
|
|
|
|
;;; Two key facts
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; If a heap structure is embedded into a vector at indices [start,end), then:
|
|
;;; 1. The two children of index k are start + 2*(k-start) + 1 = k*2-start+1
|
|
;;; and start + 2*(k-start) + 2 = k*2-start+2.
|
|
;;;
|
|
;;; 2. The first index of a leaf node in the range [start,end) is
|
|
;;; first-leaf = floor[(start+end)/2]
|
|
;;; (You can deduce this from fact #1 above.)
|
|
;;; Any index before FIRST-LEAF is an internal node.
|
|
|
|
(define (really-heap-sort! elt< v start end)
|
|
;; Vector V contains a heap at indices [START,END). The heap is in heap
|
|
;; order in the range (I,END) -- i.e., every element in this range is >=
|
|
;; its children. Bubble HEAP[I] down into the heap to impose heap order on
|
|
;; the range [I,END).
|
|
(define (restore-heap! end i)
|
|
(let* ((vi (vector-ref v i))
|
|
(first-leaf (quotient (+ start end) 2)) ; Can fixnum overflow.
|
|
(final-k (let lp ((k i))
|
|
(if (>= k first-leaf)
|
|
k ; Leaf, so done.
|
|
(let* ((k*2-start (+ k (- k start))) ; Don't overflow.
|
|
(child1 (+ 1 k*2-start))
|
|
(child2 (+ 2 k*2-start))
|
|
(child1-val (vector-ref v child1)))
|
|
(receive (max-child max-child-val)
|
|
(if (< child2 end)
|
|
(let ((child2-val (vector-ref v child2)))
|
|
(if (elt< child2-val child1-val)
|
|
(values child1 child1-val)
|
|
(values child2 child2-val)))
|
|
(values child1 child1-val))
|
|
(cond ((elt< vi max-child-val)
|
|
(vector-set! v k max-child-val)
|
|
(lp max-child))
|
|
(else k)))))))) ; Done.
|
|
(vector-set! v final-k vi)))
|
|
|
|
;; Put the unsorted subvector V[start,end) into heap order.
|
|
(let ((first-leaf (quotient (+ start end) 2))) ; Can fixnum overflow.
|
|
(do ((i (- first-leaf 1) (- i 1)))
|
|
((< i start))
|
|
(restore-heap! end i)))
|
|
|
|
(do ((i (- end 1) (- i 1)))
|
|
((<= i start))
|
|
(let ((top (vector-ref v start)))
|
|
(vector-set! v start (vector-ref v i))
|
|
(vector-set! v i top)
|
|
(restore-heap! i start))))
|
|
|
|
;;; Here are the two exported interfaces.
|
|
|
|
(define (heap-sort! elt< v . maybe-start+end)
|
|
(call-with-values
|
|
(lambda () (vector-start+end v maybe-start+end))
|
|
(lambda (start end)
|
|
(really-heap-sort! elt< v start end))))
|
|
|
|
(define (heap-sort elt< v . maybe-start+end)
|
|
(call-with-values
|
|
(lambda () (vector-start+end v maybe-start+end))
|
|
(lambda (start end)
|
|
(let ((ans (vector-portion-copy v start end)))
|
|
(really-heap-sort! elt< ans 0 (- end start))
|
|
ans))))
|
|
|
|
;;; Notes on porting
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;;
|
|
;;; Bumming the code for speed
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;; If you can use a module system to lock up the internal function
|
|
;;; REALLY-HEAP-SORT! so that it can only be called from HEAP-SORT and
|
|
;;; HEAP-SORT!, then you can hack the internal functions to run with no safety
|
|
;;; checks. The safety checks performed by the exported functions HEAP-SORT &
|
|
;;; HEAP-SORT! guarantee that there will be no type errors or array-indexing
|
|
;;; errors. In addition, with the exception of the two computations of
|
|
;;; FIRST-LEAF, all arithmetic will be fixnum arithmetic that never overflows
|
|
;;; into bignums, assuming your Scheme provides that you can't allocate an
|
|
;;; array so large you might need a bignum to index an element, which is
|
|
;;; definitely the case for every implementation with which I am familiar.
|
|
;;;
|
|
;;; If you want to code up the first-leaf = (quotient (+ s e) 2) computation
|
|
;;; so that it will never fixnum overflow when S & E are fixnums, you can do
|
|
;;; it this way:
|
|
;;; - compute floor(e/2), which throws away e's low-order bit.
|
|
;;; - add e's low-order bit to s, and divide that by two:
|
|
;;; floor[(s + e mod 2) / 2]
|
|
;;; - add these two parts together.
|
|
;;; giving you
|
|
;;; (+ (quotient e 2)
|
|
;;; (quotient (+ s (modulo e 2)) 2))
|
|
;;; If we know that e & s are fixnums, and that 0 <= s <= e, then this
|
|
;;; can only fixnum-overflow when s = e = max-fixnum. Note that the
|
|
;;; two divides and one modulo op can be done very quickly with two
|
|
;;; right-shifts and a bitwise and.
|
|
;;;
|
|
;;; I suspect there has never been a heapsort written in the history of
|
|
;;; the world in C that got this detail right.
|
|
;;;
|
|
;;; If your Scheme has a faster mechanism for handling optional arguments
|
|
;;; (e.g., Chez), you should definitely port over to it. Note that argument
|
|
;;; defaulting and error-checking are interleaved -- you don't have to
|
|
;;; error-check defaulted START/END args to see if they are fixnums that are
|
|
;;; legal vector indices for the corresponding vector, etc.
|