Import sort code from s48-tuebingen/trunk, rev 573.

This commit is contained in:
sperber 2004-01-22 19:52:15 +00:00
parent 33fe47abca
commit cb9f440657
13 changed files with 2598 additions and 0 deletions

239
scheme/sort/delndups.scm Normal file
View File

@ -0,0 +1,239 @@
;;; The SRFI-32 sort package -- delete neighboring duplicate elts
;;; Copyright (c) 1998 by Olin Shivers.
;;; This code is open-source; see the end of the file for porting and
;;; more copyright information.
;;; Olin Shivers 11/98.
;;; Problem:
;;; vector-delete-neighbor-dups pushes N stack frames, where N is the number
;;; of elements in the answer vector. This is arguably a very efficient thing
;;; to do, but it might blow out on a system with a limited stack but a big
;;; heap. We could rewrite this to "chunk" up answers in temp vectors if we
;;; push more than a certain number of frames, then allocate a final answer,
;;; copying all the chunks into the answer. But it's much more complex code.
;;; Exports:
;;; (list-delete-neighbor-dups = lis) -> list
;;; (list-delete-neighbor-dups! = lis) -> list
;;; (vector-delete-neighbor-dups = v [start end]) -> vector
;;; (vector-delete-neighbor-dups! = v [start end]) -> end'
;;; These procedures delete adjacent duplicate elements from a list or
;;; a vector, using a given element equality procedure. The first or leftmost
;;; element of a run of equal elements is the one that survives. The list
;;; or vector is not otherwise disordered.
;;;
;;; These procedures are linear time -- much faster than the O(n^2) general
;;; duplicate-elt deletors that do not assume any "bunching" of elements.
;;; If you want to delete duplicate elements from a large list or vector,
;;; sort the elements to bring equal items together, then use one of these
;;; procedures -- for a total time of O(n lg n).
;;; LIST-DELETE-NEIGHBOR-DUPS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Below are multiple versions of the LIST-DELETE-NEIGHBOR-DUPS procedure,
;;; from simple to complex. RECUR's contract: Strip off any leading X's from
;;; LIS, and return that list neighbor-dup-deleted.
;;;
;;; The final version
;;; - shares a common subtail between the input & output list, up to 1024
;;; elements;
;;; - Needs no more than 1024 stack frames.
;;; Simplest version.
;;; - Always allocates a fresh list / never shares storage.
;;; - Needs N stack frames, if answer is length N.
(define (list-delete-neighbor-dups = lis)
(if (pair? lis)
(let ((x0 (car lis)))
(cons x0 (let recur ((x0 x0) (xs (cdr lis)))
(if (pair? xs)
(let ((x1 (car xs))
(x2+ (cdr xs)))
(if (= x0 x1)
(recur x0 x2+) ; Loop, actually.
(cons x1 (recur x1 x2+))))
xs))))
lis))
;;; This version tries to use cons cells from input by sharing longest
;;; common tail between input & output. Still needs N stack frames, for ans
;;; of length N.
(define (list-delete-neighbor-dups = lis)
(if (pair? lis)
(let* ((x0 (car lis))
(xs (cdr lis))
(ans (let recur ((x0 x0) (xs xs))
(if (pair? xs)
(let ((x1 (car xs))
(x2+ (cdr xs)))
(if (= x0 x1)
(recur x0 x2+)
(let ((ans-tail (recur x1 x2+)))
(if (eq? ans-tail x2+) xs
(cons x1 ans-tail)))))
xs))))
(if (eq? ans xs) lis (cons x0 ans)))
lis))
;;; This version tries to share the longest common tail between input & output,
;;; but it also refuses to push more than 1024 stack frames (which bounds the
;;; length of the shared suffix, as well). Useful for enormous lists that
;;; might otherwise blow out your stack. It basically computes 1024-element
;;; chunks of the answer, and then strings these results together using
;;; SET-CDR! to point the last cons cell of each chunk to the first cons cell
;;; of the following chunk.
(define (list-delete-neighbor-dups = lis)
(letrec ((lp (lambda (last-pair xs)
(format #t "lp(last-pair=~a, xs=~a)~%" last-pair xs)
(if (pair? xs)
(let ((x0 (car xs))
(x1+ (cdr xs)))
(receive (chunk last-pair2 xs) (recur xs 2)
(format #t "<-recur: chunk=~a lp=~a xs=~a~%"
chunk last-pair2 xs)
(set-cdr! last-pair chunk)
(lp last-pair2 xs))))))
(kill (lambda (xs)
(let ((y (car xs)))
(values y (let lp ((xs (cdr xs)))
(if (pair? xs)
(let ((x0 (car xs))
(x1+ (cdr xs)))
(if (= y x0) (lp x1+) x1+))
'()))))))
(recur (lambda (xs n)
(format #t "->recur(xs=~a, n=~a)~%" xs n)
(if (pair? xs)
(receive (x0 rest) (kill xs)
(if (pair? rest)
(if (< 1 n)
(receive (c lpr r) (recur rest (- n 1))
(values (cons x0 c) lpr r))
(receive (xn rest) (kill rest)
(let ((lpr (list xn)))
(values (cons x0 lpr) lpr rest))))
(values (list x0) '() '())))
(values '() '() '())))))
(if (pair? lis)
(let ((x0 (car lis))
(x1+ (cdr lis)))
(receive (chunk last-pair rest) (recur lis 2)
(lp last-pair rest)
chunk))
lis)))
;;; LIST-DELETE-NEIGHBOR-DUPS!
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code runs in constant list space, constant stack, and also
;;; does only the minimum SET-CDR!'s necessary.
(define (list-delete-neighbor-dups! = lis)
(if (pair? lis)
(let lp1 ((prev lis) (prev-elt (car lis)) (lis (cdr lis)))
(if (pair? lis)
(let ((lis-elt (car lis))
(next (cdr lis)))
(if (= prev-elt lis-elt)
;; We found the first elts of a run of dups, so we know
;; we're going to have to do a SET-CDR!. Scan to the end of
;; the run, do the SET-CDR!, and loop on LP1.
(let lp2 ((lis next))
(if (pair? lis)
(let ((lis-elt (car lis))
(next (cdr lis)))
(if (= prev-elt lis-elt)
(lp2 next)
(begin (set-cdr! prev lis)
(lp1 lis lis-elt next))))
(set-cdr! prev lis))) ; Ran off end => quit.
(lp1 lis lis-elt next))))))
lis)
(define (vector-delete-neighbor-dups elt= v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(if (< start end)
(let* ((x (vector-ref v start))
(ans (let recur ((x x) (i start) (j 1))
(if (< i end)
(let ((y (vector-ref v i))
(nexti (+ i 1)))
(if (elt= x y)
(recur x nexti j)
(let ((ansvec (recur y nexti (+ j 1))))
(vector-set! ansvec j y)
ansvec)))
(make-vector j)))))
(vector-set! ans 0 x)
ans)
'#()))))
;;; Packs the surviving elements to the left, in range [start,end'),
;;; and returns END'.
(define (vector-delete-neighbor-dups! elt= v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(if (>= start end)
end
;; To eliminate unnecessary copying (read elt i then write the value
;; back at index i), we scan until we find the first dup.
(let skip ((j start) (vj (vector-ref v start)))
(let ((j+1 (+ j 1)))
(if (>= j+1 end)
end
(let ((vj+1 (vector-ref v j+1)))
(if (not (elt= vj vj+1))
(skip j+1 vj+1)
;; OK -- j & j+1 are dups, so we're committed to moving
;; data around. In lp2, v[start,j] is what we've done;
;; v[k,end) is what we have yet to handle.
(let lp2 ((j j) (vj vj) (k (+ j 2)))
(let lp3 ((k k))
(if (>= k end)
(+ j 1) ; Done.
(let ((vk (vector-ref v k))
(k+1 (+ k 1)))
(if (elt= vj vk)
(lp3 k+1)
(let ((j+1 (+ j 1)))
(vector-set! v j+1 vk)
(lp2 j+1 vk k+1))))))))))))))))
;;; Copyright
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code is
;;; Copyright (c) 1998 by Olin Shivers.
;;; The terms are: You may do as you please with this code, as long as
;;; you do not delete this notice or hold me responsible for any outcome
;;; related to its use.
;;;
;;; Blah blah blah. Don't you think source files should contain more lines
;;; of code than copyright notice?
;;;
;;; Code porting
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; If your Scheme has a faster mechanism for handling optional arguments
;;; (e.g., Chez), you should definitely port over to it. Note that argument
;;; defaulting and error-checking are interleaved -- you don't have to
;;; error-check defaulted START/END args to see if they are fixnums that are
;;; legal vector indices for the corresponding vector, etc.

199
scheme/sort/interfaces.scm Normal file
View File

@ -0,0 +1,199 @@
;;; Interface defs for the Scheme Underground sorting package,
;;; in the Scheme 48 module language.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; list-delete-neighbor-dups = l -> list
;;; vector-delete-neighbor-dups = v [start end] -> vector
;;; vector-delete-neighbor-dups! = v [start end] -> vector
;;;
(define-interface delete-neighbor-duplicates-interface
(export (list-delete-neighbor-dups
(proc ((proc (:value :value) :boolean)
:value)
:value))
(vector-delete-neighbor-dups
(proc ((proc (:value :value) :boolean)
:vector
&opt
:exact-integer :exact-integer)
:vector))
(vector-delete-neighbor-dups!
(proc ((proc (:value :value) :boolean)
:vector
&opt
:exact-integer :exact-integer)
:vector))))
;;; vector-binary-search elt< elt->key key v [start end] -> integer-or-false
;;; vector-binary-search3 c v [start end] -> integer-or-false
(define-interface binary-searches-interface
(export vector-binary-search
vector-binary-search3))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; list-sorted? l < -> boolean
;;; vector-sorted? v < [start end] -> boolean
(define-interface sorted-interface
(export (list-sorted? (proc (:value (proc (:value :value) :boolean)) :boolean))
(vector-sorted? (proc ((proc (:value :value) :boolean)
:vector
&opt :exact-integer :exact-integer)
:boolean))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; list-merge-sort < l -> list
;;; list-merge-sort! < l -> list
;;; list-merge < lis lis -> list
;;; list-merge! < lis lis -> list
(define-interface list-merge-sort-interface
(export ((list-merge-sort list-merge-sort!)
(proc ((proc (:value :value) :boolean) :value) :value))
((list-merge list-merge!)
(proc ((proc (:value :value) :boolean) :value :value) :value))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; vector-merge-sort < v [start end temp] -> vector
;;; vector-merge-sort! < v [start end temp] -> unspecific
;;; vector-merge < v1 v2 [start1 end1 start2 end2] -> vector
;;; vector-merge! < v v1 v2 [start0 start1 end1 start2 end2] -> unspecific
(define-interface vector-merge-sort-interface
(export
(vector-merge-sort (proc ((proc (:value :value) :boolean)
:vector
&opt
:exact-integer :exact-integer
:vector)
:vector))
(vector-merge-sort! (proc ((proc (:value :value) :boolean)
:vector
&opt
:exact-integer :exact-integer
:vector)
:unspecific))
(vector-merge (proc ((proc (:value :value) :boolean)
:vector :vector
&opt
:exact-integer :exact-integer
:exact-integer :exact-integer)
:vector))
(vector-merge! (proc ((proc (:value :value) :boolean)
:vector :vector :vector
&opt
:exact-integer :exact-integer :exact-integer
:exact-integer :exact-integer)
:unspecific))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; heap-sort < v [start end] -> vector
;;; heap-sort! < v -> unspecific
(define-interface vector-heap-sort-interface
(export (heap-sort (proc ((proc (:value :value) :boolean)
:vector
&opt :exact-integer :exact-integer)
:vector))
(heap-sort! (proc ((proc (:value :value) :boolean) :vector) :unspecific))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; insert-sort < v [start end] -> vector
;;; insert-sort! < v [start end] -> unspecific
;;;
;;; internal:
;;; %insert-sort! < v start end -> unspecific
(define-interface vector-insertion-sort-interface
(export (insert-sort (proc ((proc (:value :value) :boolean)
:vector
&opt :exact-integer :exact-integer)
:vector))
(insert-sort! (proc ((proc (:value :value) :boolean)
:vector
&opt :exact-integer :exact-integer)
:unspecific))))
(define-interface vector-insertion-sort-internal-interface
(export (%insert-sort! (proc ((proc (:value :value) :boolean)
:vector
:exact-integer :exact-integer)
:unspecific))))
;;; The general sort interface:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; list-sorted? < l -> boolean
;;;
;;; list-merge < l1 l2 -> list
;;; list-merge! < l1 l2 -> list
;;;
;;; list-sort < l -> list
;;; list-sort! < l -> list
;;; list-stable-sort < l -> list
;;; list-stable-sort! < l -> list
;;;
;;; list-delete-neighbor-dups l = -> list
;;;
;;; vector-sorted? < v [start end] -> boolean
;;;
;;; vector-merge < v1 v2 [start1 end1 start2 end2] -> vector
;;; vector-merge! < v v1 v2 [start start1 end1 start2 end2] -> unspecific
;;;
;;; vector-sort < v [start end] -> vector
;;; vector-sort! < v -> unspecific
;;;
;;; vector-stable-sort < v [start end] -> vector
;;; vector-stable-sort! < v -> unspecific
;;;
;;; vector-delete-neighbor-dups v = [start end] -> vector
(define-interface sort-interface
(compound-interface
sorted-interface
(export
((list-merge list-merge!)
(proc ((proc (:value :value) :boolean) :value :value) :value))
((list-sort list-sort! list-stable-sort list-stable-sort!)
(proc ((proc (:value :value) :boolean) :value) :value))
(vector-merge (proc ((proc (:value :value) :boolean)
:vector :vector
&opt
:exact-integer :exact-integer
:exact-integer :exact-integer)
:vector))
(vector-merge! (proc ((proc (:value :value) :boolean)
:vector :vector :vector
&opt
:exact-integer :exact-integer :exact-integer
:exact-integer :exact-integer)
:unspecific))
((vector-sort vector-stable-sort)
(proc ((proc (:value :value) :boolean)
:vector
&opt
:exact-integer :exact-integer)
:vector))
((vector-sort! vector-stable-sort!)
(proc ((proc (:value :value) :boolean) :vector) :unspecific)))
(list-delete-neighbor-dups
(proc ((proc (:value :value) :boolean)
:value)
:value))
(vector-delete-neighbor-dups
(proc ((proc (:value :value) :boolean)
:vector
&opt
:exact-integer :exact-integer)
:vector))))

386
scheme/sort/lmsort.scm Normal file
View File

@ -0,0 +1,386 @@
;;; list merge & list merge-sort -*- Scheme -*-
;;; Copyright (c) 1998 by Olin Shivers.
;;; This code is open-source; see the end of the file for porting and
;;; more copyright information.
;;; Olin Shivers
;;; Exports:
;;; (list-merge < lis lis) -> list
;;; (list-merge! < lis lis) -> list
;;; (list-merge-sort < lis) -> list
;;; (list-merge-sort! < lis) -> list
;;; A stable list merge sort of my own device
;;; Two variants: pure & destructive
;;;
;;; This list merge sort is opportunistic (a "natural" sort) -- it exploits
;;; existing order in the input set. Instead of recursing all the way down to
;;; individual elements, the leaves of the merge tree are maximal contiguous
;;; runs of elements from the input list. So the algorithm does very well on
;;; data that is mostly ordered, with a best-case time of O(n) when the input
;;; list is already completely sorted. In any event, worst-case time is
;;; O(n lg n).
;;;
;;; The destructive variant is "in place," meaning that it allocates no new
;;; cons cells at all; it just rearranges the pairs of the input list with
;;; SET-CDR! to order it.
;;;
;;; The interesting control structure is the combination recursion/iteration
;;; of the core GROW function that does an "opportunistic" DFS walk of the
;;; merge tree, adaptively subdividing in response to the length of the
;;; merges, without requiring any auxiliary data structures beyond the
;;; recursion stack. It's actually quite simple -- ten lines of code.
;;; -Olin Shivers 10/20/98
;;; (mlet ((var-list mv-exp) ...) body ...)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; A LET* form that handles multiple values. Move this into the two clients
;;; if you don't have a module system handy to restrict its visibility...
(define-syntax mlet ; Multiple-value LET*
(syntax-rules ()
((mlet ((() exp) rest ...) body ...)
(begin exp (mlet (rest ...) body ...)))
((mlet (((var) exp) rest ...) body ...)
(let ((var exp)) (mlet (rest ...) body ...)))
((mlet ((vars exp) rest ...) body ...)
(call-with-values (lambda () exp)
(lambda vars (mlet (rest ...) body ...))))
((mlet () body ...) (begin body ...))))
;;; (list-merge-sort < lis)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; A natural, stable list merge sort.
;;; - natural: picks off maximal contiguous runs of pre-ordered data.
;;; - stable: won't invert the order of equal elements in the input list.
(define (list-merge-sort elt< lis)
;; (getrun lis) -> run runlen rest
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pick a run of non-decreasing data off of non-empty list LIS.
;; Return the length of this run, and the following list.
(define (getrun lis)
(let lp ((ans '()) (i 1) (prev (car lis)) (xs (cdr lis)))
(if (pair? xs)
(let ((x (car xs)))
(if (elt< x prev)
(values (append-reverse ans (cons prev '())) i xs)
(lp (cons prev ans) (+ i 1) x (cdr xs))))
(values (append-reverse ans (cons prev '())) i xs))))
(define (append-reverse rev-head tail)
(let lp ((rev-head rev-head) (tail tail))
(if (null-list? rev-head) tail
(lp (cdr rev-head) (cons (car rev-head) tail)))))
(define (null-list? l)
(cond ((pair? l) #f)
((null? l) #t)
(else (error "null-list?: argument out of domain" l))))
;; (merge a b) -> list
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; List merge -- stably merge lists A (length > 0) & B (length > 0).
;; This version requires up to |a|+|b| stack frames.
(define (merge a b)
(let recur ((x (car a)) (a a)
(y (car b)) (b b))
(if (elt< y x)
(cons y (let ((b (cdr b)))
(if (pair? b)
(recur x a (car b) b)
a)))
(cons x (let ((a (cdr a)))
(if (pair? a)
(recur (car a) a y b)
b))))))
;; (grow s ls ls2 u lw) -> [a la unused]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The core routine. Read the next 20 lines of comments & all is obvious.
;; - S is a sorted list of length LS > 1.
;; - LS2 is some power of two <= LS.
;; - U is an unsorted list.
;; - LW is a positive integer.
;; Starting with S, and taking data from U as needed, produce
;; a sorted list of *at least* length LW, if there's enough data
;; (LW <= LS + length(U)), or use all of U if not.
;;
;; GROW takes maximal contiguous runs of data from U at a time;
;; it is allowed to return a list *longer* than LW if it gets lucky
;; with a long run.
;;
;; The key idea: If you want a merge operation to "pay for itself," the two
;; lists being merged should be about the same length. Remember that.
;;
;; Returns:
;; - A: The result list
;; - LA: The length of the result list
;; - UNUSED: The unused tail of U.
(define (grow s ls ls2 u lw) ; The core of the sort algorithm.
(if (or (<= lw ls) (not (pair? u))) ; Met quota or out of data?
(values s ls u) ; If so, we're done.
(mlet (((ls2) (let lp ((ls2 ls2))
(let ((ls2*2 (+ ls2 ls2)))
(if (<= ls2*2 ls) (lp ls2*2) ls2))))
;; LS2 is now the largest power of two <= LS.
;; (Just think of it as being roughly LS.)
((r lr u2) (getrun u)) ; Get a run, then
((t lt u3) (grow r lr 1 u2 ls2))) ; grow it up to be T.
(grow (merge s t) (+ ls lt) ; Merge S & T,
(+ ls2 ls2) u3 lw)))) ; and loop.
;; Note: (LENGTH LIS) or any constant guaranteed
;; to be greater can be used in place of INFINITY.
(if (pair? lis) ; Don't sort an empty list.
(mlet (((r lr tail) (getrun lis)) ; Pick off an initial run,
((infinity) #o100000000) ; then grow it up maximally.
((a la v) (grow r lr 1 tail infinity)))
a)
'()))
;;; (list-merge-sort! < lis)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; A natural, stable, destructive, in-place list merge sort.
;;; - natural: picks off maximal contiguous runs of pre-ordered data.
;;; - stable: won't invert the order of equal elements in the input list.
;;; - destructive, in-place: this routine allocates no extra working memory;
;;; it simply rearranges the list with SET-CDR! operations.
(define (list-merge-sort! elt< lis)
;; (getrun lis) -> runlen last rest
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pick a run of non-decreasing data off of non-empty list LIS.
;; Return the length of this run, the last cons cell of the run,
;; and the following list.
(define (getrun lis)
(let lp ((lis lis) (x (car lis)) (i 1) (next (cdr lis)))
(if (pair? next)
(let ((y (car next)))
(if (elt< y x)
(values i lis next)
(lp next y (+ i 1) (cdr next))))
(values i lis next))))
;; (merge! a enda b endb) -> [m endm]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Destructively and stably merge non-empty lists A & B.
;; The last cons of A is ENDA. (The cdr of ENDA can be non-nil.)
;; the last cons of B is ENDB. (The cdr of ENDB can be non-nil.)
;;
;; Return the first and last cons cells of the merged list.
;; This routine is iterative & in-place: it runs in constant stack and
;; doesn't allocate any cons cells. It is also tedious but simple; don't
;; bother reading it unless necessary.
(define (merge! a enda b endb)
;; The logic of these two loops is completely driven by these invariants:
;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B).
;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B).
(letrec ((scan-a (lambda (prev x a y b) ; Zip down A until we
(cond ((elt< y x) ; find an elt > (CAR B).
(set-cdr! prev b)
(let ((next-b (cdr b)))
(if (eq? b endb)
(begin (set-cdr! b a) enda) ; Done.
(scan-b b x a (car next-b) next-b))))
((eq? a enda) (maybe-set-cdr! a b) endb) ; Done.
(else (let ((next-a (cdr a))) ; Continue scan.
(scan-a a (car next-a) next-a y b))))))
(scan-b (lambda (prev x a y b) ; Zip down B while its
(cond ((elt< y x) ; elts are < (CAR A).
(if (eq? b endb)
(begin (set-cdr! b a) enda) ; Done.
(let ((next-b (cdr b))) ; Continue scan.
(scan-b b x a (car next-b) next-b))))
(else (set-cdr! prev a)
(if (eq? a enda)
(begin (maybe-set-cdr! a b) endb) ; Done.
(let ((next-a (cdr a)))
(scan-a a (car next-a) next-a y b)))))))
;; This guy only writes if he has to. Called at most once.
;; Pointer equality rules; pure languages are for momma's boys.
(maybe-set-cdr! (lambda (pair val) (if (not (eq? (cdr pair) val))
(set-cdr! pair val)))))
(let ((x (car a)) (y (car b)))
(if (elt< y x)
;; B starts the answer list.
(values b (if (eq? b endb)
(begin (set-cdr! b a) enda)
(let ((next-b (cdr b)))
(scan-b b x a (car next-b) next-b))))
;; A starts the answer list.
(values a (if (eq? a enda)
(begin (maybe-set-cdr! a b) endb)
(let ((next-a (cdr a)))
(scan-a a (car next-a) next-a y b))))))))
;; (grow s ends ls ls2 u lw) -> [a enda la unused]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The core routine.
;; - S is a sorted list of length LS > 1, with final cons cell ENDS.
;; (CDR ENDS) doesn't have to be nil.
;; - LS2 is some power of two <= LS.
;; - U is an unsorted list.
;; - LW is a positive integer.
;; Starting with S, and taking data from U as needed, produce
;; a sorted list of *at least* length LW, if there's enough data
;; (LW <= LS + length(U)), or use all of U if not.
;;
;; GROW takes maximal contiguous runs of data from U at a time;
;; it is allowed to return a list *longer* than LW if it gets lucky
;; with a long run.
;;
;; The key idea: If you want a merge operation to "pay for itself," the two
;; lists being merged should be about the same length. Remember that.
;;
;; Returns:
;; - A: The result list (not properly terminated)
;; - ENDA: The last cons cell of the result list.
;; - LA: The length of the result list
;; - UNUSED: The unused tail of U.
(define (grow s ends ls ls2 u lw)
(if (and (pair? u) (< ls lw))
;; We haven't met the LW quota but there's still some U data to use.
(mlet (((ls2) (let lp ((ls2 ls2))
(let ((ls2*2 (+ ls2 ls2)))
(if (<= ls2*2 ls) (lp ls2*2) ls2))))
;; LS2 is now the largest power of two <= LS.
;; (Just think of it as being roughly LS.)
((lr endr u2) (getrun u)) ; Get a run from U;
((t endt lt u3) (grow u endr lr 1 u2 ls2)) ; grow it up to be T.
((st end-st) (merge! s ends t endt))) ; Merge S & T,
(grow st end-st (+ ls lt) (+ ls2 ls2) u3 lw)) ; then loop.
(values s ends ls u))) ; Done -- met LW quota or ran out of data.
;; Note: (LENGTH LIS) or any constant guaranteed
;; to be greater can be used in place of INFINITY.
(if (pair? lis)
(mlet (((lr endr rest) (getrun lis)) ; Pick off an initial run.
((infinity) #o100000000) ; Then grow it up maximally.
((a enda la v) (grow lis endr lr 1 rest infinity)))
(set-cdr! enda '()) ; Nil-terminate answer.
a) ; We're done.
'())) ; Don't sort an empty list.
;;; Merge
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; These two merge procedures are stable -- ties favor list A.
(define (list-merge < a b)
(cond ((not (pair? a)) b)
((not (pair? b)) a)
(else (let recur ((x (car a)) (a a) ; A is a pair; X = (CAR A).
(y (car b)) (b b)) ; B is a pair; Y = (CAR B).
(if (< y x)
(let ((b (cdr b)))
(if (pair? b)
(cons y (recur x a (car b) b))
(cons y a)))
(let ((a (cdr a)))
(if (pair? a)
(cons x (recur (car a) a y b))
(cons x b))))))))
;;; This destructive merge does as few SET-CDR!s as it can -- for example, if
;;; the list is already sorted, it does no SET-CDR!s at all. It is also
;;; iterative, running in constant stack.
(define (list-merge! < a b)
;; The logic of these two loops is completely driven by these invariants:
;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B).
;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B).
(letrec ((scan-a (lambda (prev a x b y) ; Zip down A doing
(if (< y x) ; no SET-CDR!s until
(let ((next-b (cdr b))) ; we hit a B elt that
(set-cdr! prev b) ; has to be inserted.
(if (pair? next-b)
(scan-b b a x next-b (car next-b))
(set-cdr! b a)))
(let ((next-a (cdr a)))
(if (pair? next-a)
(scan-a a next-a (car next-a) b y)
(set-cdr! a b))))))
(scan-b (lambda (prev a x b y) ; Zip down B doing
(if (< y x) ; no SET-CDR!s until
(let ((next-b (cdr b))) ; we hit an A elt that
(if (pair? next-b) ; has to be
(scan-b b a x next-b (car next-b)) ; inserted.
(set-cdr! b a)))
(let ((next-a (cdr a)))
(set-cdr! prev a)
(if (pair? next-a)
(scan-a a next-a (car next-a) b y)
(set-cdr! a b)))))))
(cond ((not (pair? a)) b)
((not (pair? b)) a)
;; B starts the answer list.
((< (car b) (car a))
(let ((next-b (cdr b)))
(if (null? next-b)
(set-cdr! b a)
(scan-b b a (car a) next-b (car next-b))))
b)
;; A starts the answer list.
(else (let ((next-a (cdr a)))
(if (null? next-a)
(set-cdr! a b)
(scan-a a next-a (car next-a) b (car b))))
a))))
;;; Copyright
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code is
;;; Copyright (c) 1998 by Olin Shivers.
;;; The terms are: You may do as you please with this code, as long as
;;; you do not delete this notice or hold me responsible for any outcome
;;; related to its use.
;;;
;;; Blah blah blah.
;;; Code tuning & porting
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This is very portable code. It's R4RS with the following exceptions:
;;; - The R5RS multiple-value VALUES & CALL-WITH-VALUES procedures for
;;; handling multiple-value return.
;;;
;;; This code is *tightly* bummed as far as I can go in portable Scheme.
;;;
;;; - The fixnum arithmetic in LIST-MERGE-SORT! and COUNTED-LIST-MERGE!
;;; that could be safely switched over to unsafe, fixnum-specific ops,
;;; if you're sure that 2*maxlen is a fixnum, where maxlen is the length
;;; of the longest list you could ever have.
;;;
;;; - I typically write my code in a style such that every CAR and CDR
;;; application is protected by an upstream PAIR?. This is the case in this
;;; code, so all the CAR's and CDR's could safely switched over to unsafe
;;; versions. But check over the code before you do it, in case the source
;;; has been altered since I wrote this.

71
scheme/sort/packages.scm Normal file
View File

@ -0,0 +1,71 @@
;;; Package defs for the Scheme Underground sorting package,
;;; in the Scheme 48 module language.
;;; The general sort package:
(define-structure sort sort-interface
(open scheme
list-merge-sort
vector-heap-sort
vector-merge-sort
sorted
delete-neighbor-duplicates)
(files sort)
(optimize auto-integrate))
(define-structure sorted sorted-interface
(open scheme
vector-utils)
(files sortp)
(optimize auto-integrate))
(define-structure delete-neighbor-duplicates delete-neighbor-duplicates-interface
(open scheme
receiving
formats
vector-utils)
(files delndups)
(optimize auto-integrate))
(define-structure binary-searches binary-searches-interface
(open scheme
vector-utils)
(files vbinsearch))
(define-structure list-merge-sort list-merge-sort-interface
(open scheme
receiving
(subset signals (error)))
(files lmsort)
(optimize auto-integrate))
(define-structure vector-merge-sort vector-merge-sort-interface
(open scheme
receiving
vector-utils
vector-insertion-sort-internal)
(files vmsort)
(optimize auto-integrate))
(define-structure vector-heap-sort vector-heap-sort-interface
(open scheme
receiving
vector-utils)
(files vhsort)
(optimize auto-integrate))
(define-structures ((vector-insertion-sort vector-insertion-sort-interface)
(vector-insertion-sort-internal
vector-insertion-sort-internal-interface))
(open scheme
vector-utils)
(files visort)
(optimize auto-integrate))
(define-structure vector-utils (export vector-copy
vector-portion-copy
vector-portion-copy!
vector-start+end
vectors-start+end-2)
(open scheme)
(files vector-util))

26
scheme/sort/sort.scm Normal file
View File

@ -0,0 +1,26 @@
;;; The SRFI-32 sort package -- general sort & merge procedures
;;;
;;; Copyright (c) 1998 by Olin Shivers.
;;; You may do as you please with this code, as long as you do not delete this
;;; notice or hold me responsible for any outcome related to its use.
;;; Olin Shivers 10/98.
;;; This file just defines the general sort API in terms of some
;;; algorithm-specific calls.
(define (list-sort < l) ; Sort lists by converting to
(let ((v (list->vector l))) ; a vector and sorting that.
(heap-sort! < v)
(vector->list v)))
(define list-sort! list-merge-sort!)
(define list-stable-sort list-merge-sort)
(define list-stable-sort! list-merge-sort!)
(define vector-sort heap-sort)
(define vector-sort! heap-sort!)
(define vector-stable-sort vector-merge-sort)
(define vector-stable-sort! vector-merge-sort!)

1056
scheme/sort/sort.txt Normal file

File diff suppressed because it is too large Load Diff

35
scheme/sort/sortp.scm Normal file
View File

@ -0,0 +1,35 @@
;;; The SRFI-?? sort package -- sorted predicates
;;; Olin Shivers 10/98.
;;;
;;; (list-sorted? < lis) -> boolean
;;; (vector-sorted? < v [start end]) -> boolean
(define (list-sorted? < list)
(or (not (pair? list))
(let lp ((prev (car list)) (tail (cdr list)))
(or (not (pair? tail))
(let ((next (car tail)))
(and (not (< next prev))
(lp next (cdr tail))))))))
(define (vector-sorted? elt< v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(or (>= start end) ; Empty range
(let lp ((i (+ start 1)) (vi-1 (vector-ref v start)))
(or (>= i end)
(let ((vi (vector-ref v i)))
(and (not (elt< vi vi-1))
(lp (+ i 1) vi)))))))))
;;; Copyright and porting non-notices
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Give me a break. It's fifteen lines of code. I place this code in the
;;; public domain; help yourself.
;;;
;;; If your Scheme has a faster mechanism for handling optional arguments
;;; (e.g., Chez), you should definitely port over to it. Note that argument
;;; defaulting and error-checking are interleaved -- you don't have to
;;; error-check defaulted START/END args to see if they are fixnums that are
;;; legal vector indices for the corresponding vector, etc.

65
scheme/sort/test.scm Normal file
View File

@ -0,0 +1,65 @@
;;; Little test harness, 'cause I'm paraoid about tricky code.
;;; It's scsh specific -- Scheme 48 random-number stuff & the mail-notification
;;; stuff.
(define r (make-random 42))
(define (rand n) (modulo (r) n))
;;; For testing stable sort -- 3 & -3 compare the same.
(define (my< x y) (< (abs x) (abs y)))
(define (unstable-sort-test v) ; quick & heap vs simple insert
(let ((v1 (vector-copy v))
(v2 (vector-copy v))
(v3 (vector-copy v))
(v4 (vector-copy v)))
(quick-sort! < v1)
(quick-sort3! - v1)
(heap-sort! < v2)
(insert-sort! < v3)
(and (or (not (equal? v1 v2))
(not (equal? v1 v3))
(not (equal? v1 v4))
(not (vector-sorted? < v1)))
(list v v1 v2 v3 v4))))
(define (stable-sort-test v) ; insert, list & vector merge sorts
(let ((v1 (vector-copy v))
(v2 (vector-copy v))
(v3 (list->vector (list-merge-sort! my< (vector->list v))))
(v4 (list->vector (list-merge-sort my< (vector->list v)))))
(vector-merge-sort! my< v1)
(insert-sort! my< v2)
(and (or (not (equal? v1 v2))
(not (equal? v1 v3))
(not (equal? v1 v4))
(not (vector-sorted? my< v1)))
(list v v1 v2 v3 v4))))
(define (do-test max-size)
(let lp ((i 0))
(let ((i (cond ((= i 1000)
(write-char #\.)
(force-output)
0)
(else (+ i 1))))
(v (random-vector (rand max-size))))
(cond ((unstable-sort-test v) => (lambda (x) (cons 'u x)))
((stable-sort-test v) => (lambda (x) (cons 's x)))
(else (lp i))))))
(define (test-n-mail max-size)
(let ((losers (do-test max-size))
(email-address "shivers@cc.gatech.edu"))
(run (mail -s "sort lost" ,email-address) (<< ,losers))))
(define (random-vector size)
(let ((v (make-vector size)))
(fill-vector-randomly! v (* 10 size))
v))
(define (fill-vector-randomly! v range)
(let ((half (quotient range 2)))
(do ((i (- (vector-length v) 1) (- i 1)))
((< i 0))
(vector-set! v i (- (rand range) half)))))

View File

@ -0,0 +1,34 @@
;;; The SRFI-32 sort package -- binary search -*- Scheme -*-
;;; Copyright (c) 1998 by Olin Shivers.
;;; This code is in the public domain.
;;; Olin Shivers 98/11
;;; Returns the index of the matching element.
;;; (vector-binary-search < car 4 '#((1 . one) (3 . three)
;;; (4 . four) (25 . twenty-five)))
;;; => 2
(define (vector-binary-search key< elt->key key v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(let lp ((left start) (right end)) ; Search V[left,right).
(and (< left right)
(let* ((m (quotient (+ left right) 2))
(elt (vector-ref v m))
(elt-key (elt->key elt)))
(cond ((key< key elt-key) (lp left m))
((key< elt-key key) (lp (+ m 1) right))
(else m))))))))
(define (vector-binary-search3 compare v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(let lp ((left start) (right end)) ; Search V[left,right).
(and (< left right)
(let* ((m (quotient (+ left right) 2))
(sign (compare (vector-ref v m))))
(cond ((> sign 0) (lp left m))
((< sign 0) (lp (+ m 1) right))
(else m))))))))

View File

@ -0,0 +1,56 @@
(define (vector-portion-copy vec start end)
(let* ((len (vector-length vec))
(new-len (- end start))
(new (make-vector new-len)))
(do ((i start (+ i 1))
(j 0 (+ j 1)))
((= i end) new)
(vector-set! new j (vector-ref vec i)))))
(define (vector-copy vec)
(vector-portion-copy vec 0 (vector-length vec)))
(define (vector-portion-copy! target src start end)
(let ((len (- end start)))
(do ((i (- len 1) (- i 1))
(j (- end 1) (- j 1)))
((< i 0))
(vector-set! target i (vector-ref src j)))))
(define (has-element list index)
(cond
((zero? index)
(if (pair? list)
(values #t (car list))
(values #f #f)))
((null? list)
(values #f #f))
(else
(has-element (cdr list) (- index 1)))))
(define (list-ref-or-default list index default)
(call-with-values
(lambda () (has-element list index))
(lambda (has? maybe)
(if has?
maybe
default))))
(define (vector-start+end vector maybe-start+end)
(let ((start (list-ref-or-default maybe-start+end
0 0))
(end (list-ref-or-default maybe-start+end
1 (vector-length vector))))
(values start end)))
(define (vectors-start+end-2 vector-1 vector-2 maybe-start+end)
(let ((start-1 (list-ref-or-default maybe-start+end
0 0))
(end-1 (list-ref-or-default maybe-start+end
1 (vector-length vector-1)))
(start-2 (list-ref-or-default maybe-start+end
2 0))
(end-2 (list-ref-or-default maybe-start+end
3 (vector-length vector-2))))
(values start-1 end-1
start-2 end-2)))

117
scheme/sort/vhsort.scm Normal file
View File

@ -0,0 +1,117 @@
;;; The SRFI-32 sort package -- vector heap sort -*- Scheme -*-
;;; Copyright (c) 2002 by Olin Shivers.
;;; This code is open-source; see the end of the file for porting and
;;; more copyright information.
;;; Olin Shivers 10/98.
;;; Exports:
;;; (heap-sort! elt< v [start end]) -> unspecified
;;; (heap-sort elt< v [start end]) -> vector
;;; Two key facts
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; If a heap structure is embedded into a vector at indices [start,end), then:
;;; 1. The two children of index k are start + 2*(k-start) + 1 = k*2-start+1
;;; and start + 2*(k-start) + 2 = k*2-start+2.
;;;
;;; 2. The first index of a leaf node in the range [start,end) is
;;; first-leaf = floor[(start+end)/2]
;;; (You can deduce this from fact #1 above.)
;;; Any index before FIRST-LEAF is an internal node.
(define (really-heap-sort! elt< v start end)
;; Vector V contains a heap at indices [START,END). The heap is in heap
;; order in the range (I,END) -- i.e., every element in this range is >=
;; its children. Bubble HEAP[I] down into the heap to impose heap order on
;; the range [I,END).
(define (restore-heap! end i)
(let* ((vi (vector-ref v i))
(first-leaf (quotient (+ start end) 2)) ; Can fixnum overflow.
(final-k (let lp ((k i))
(if (>= k first-leaf)
k ; Leaf, so done.
(let* ((k*2-start (+ k (- k start))) ; Don't overflow.
(child1 (+ 1 k*2-start))
(child2 (+ 2 k*2-start))
(child1-val (vector-ref v child1)))
(receive (max-child max-child-val)
(if (< child2 end)
(let ((child2-val (vector-ref v child2)))
(if (elt< child2-val child1-val)
(values child1 child1-val)
(values child2 child2-val)))
(values child1 child1-val))
(cond ((elt< vi max-child-val)
(vector-set! v k max-child-val)
(lp max-child))
(else k)))))))) ; Done.
(vector-set! v final-k vi)))
;; Put the unsorted subvector V[start,end) into heap order.
(let ((first-leaf (quotient (+ start end) 2))) ; Can fixnum overflow.
(do ((i (- first-leaf 1) (- i 1)))
((< i start))
(restore-heap! end i)))
(do ((i (- end 1) (- i 1)))
((<= i start))
(let ((top (vector-ref v start)))
(vector-set! v start (vector-ref v i))
(vector-set! v i top)
(restore-heap! i start))))
;;; Here are the two exported interfaces.
(define (heap-sort! elt< v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(really-heap-sort! elt< v start end))))
(define (heap-sort elt< v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(let ((ans (vector-portion-copy v start end)))
(really-heap-sort! elt< ans 0 (- end start))
ans))))
;;; Notes on porting
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Bumming the code for speed
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; If you can use a module system to lock up the internal function
;;; REALLY-HEAP-SORT! so that it can only be called from HEAP-SORT and
;;; HEAP-SORT!, then you can hack the internal functions to run with no safety
;;; checks. The safety checks performed by the exported functions HEAP-SORT &
;;; HEAP-SORT! guarantee that there will be no type errors or array-indexing
;;; errors. In addition, with the exception of the two computations of
;;; FIRST-LEAF, all arithmetic will be fixnum arithmetic that never overflows
;;; into bignums, assuming your Scheme provides that you can't allocate an
;;; array so large you might need a bignum to index an element, which is
;;; definitely the case for every implementation with which I am familiar.
;;;
;;; If you want to code up the first-leaf = (quotient (+ s e) 2) computation
;;; so that it will never fixnum overflow when S & E are fixnums, you can do
;;; it this way:
;;; - compute floor(e/2), which throws away e's low-order bit.
;;; - add e's low-order bit to s, and divide that by two:
;;; floor[(s + e mod 2) / 2]
;;; - add these two parts together.
;;; giving you
;;; (+ (quotient e 2)
;;; (quotient (+ s (modulo e 2)) 2))
;;; If we know that e & s are fixnums, and that 0 <= s <= e, then this
;;; can only fixnum-overflow when s = e = max-fixnum. Note that the
;;; two divides and one modulo op can be done very quickly with two
;;; right-shifts and a bitwise and.
;;;
;;; I suspect there has never been a heapsort written in the history of
;;; the world in C that got this detail right.
;;;
;;; If your Scheme has a faster mechanism for handling optional arguments
;;; (e.g., Chez), you should definitely port over to it. Note that argument
;;; defaulting and error-checking are interleaved -- you don't have to
;;; error-check defaulted START/END args to see if they are fixnums that are
;;; legal vector indices for the corresponding vector, etc.

76
scheme/sort/visort.scm Normal file
View File

@ -0,0 +1,76 @@
;;; The SRFI-?? sort package -- stable vector insertion sort -*- Scheme -*-
;;; Copyright (c) 1998 by Olin Shivers.
;;; This code is open-source; see the end of the file for porting and
;;; more copyright information.
;;; Olin Shivers 10/98.
;;; Exports:
;;; insert-sort < v [start end] -> vector
;;; insert-sort! < v [start end] -> unspecific
;;;
;;; %insert-sort! is also called from vqsort.scm's quick-sort function.
(define (insert-sort elt< v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(let ((ans (vector-portion-copy v start end)))
(%insert-sort! elt< ans 0 (- end start))
ans))))
(define (insert-sort! < v . maybe-start+end)
(call-with-values
(lambda () (vector-start+end v maybe-start+end))
(lambda (start end)
(%insert-sort! < v start end))))
(define (%insert-sort! elt< v start end)
(do ((i (+ 1 start) (+ i 1))) ; Invariant: [start,i) is sorted.
((>= i end))
(let ((val (vector-ref v i)))
(vector-set! v (let lp ((j i)) ; J is the location of the
(if (<= j start)
start ; "hole" as it bubbles down.
(let* ((j-1 (- j 1))
(vj-1 (vector-ref v j-1)))
(cond ((elt< val vj-1)
(vector-set! v j vj-1)
(lp j-1))
(else j)))))
val))))
;;; Copyright
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code is
;;; Copyright (c) 1998 by Olin Shivers.
;;; The terms are: You may do as you please with this code, as long as
;;; you do not delete this notice or hold me responsible for any outcome
;;; related to its use.
;;;
;;; Blah blah blah. Don't you think source files should contain more lines
;;; of code than copyright notice?
;;; Code tuning & porting
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; This code is tightly bummed as far as I can go in portable Scheme.
;;;
;;; The code can be converted to use unsafe vector-indexing and
;;; fixnum-specific arithmetic ops -- the safety checks done on entry to
;;; INSERT-SORT and INSERT-SORT! are sufficient to guarantee nothing bad will
;;; happen. However, note that if you alter %INSERT-SORT! to use dangerous
;;; primitives, you must ensure it is only called from clients that guarantee
;;; to observe its preconditions. In the SRFI-?? reference implementation,
;;; %INSERT-SORT! is only called from INSERT-SORT! and the quick-sort code in
;;; vqsort.scm, and the preconditions are guaranteed for these two clients.
;;; This should provide *big* speedups. In fact, all the code bumming I've
;;; done pretty much disappears in the noise unless you have a good compiler
;;; and also can dump the vector-index checks and generic arithmetic -- so
;;; I've really just set things up for you to exploit.
;;;
;;; If your Scheme has a faster mechanism for handling optional arguments
;;; (e.g., Chez), you should definitely port over to it. Note that argument
;;; defaulting and error-checking are interleaved -- you don't have to
;;; error-check defaulted START/END args to see if they are fixnums that are
;;; legal vector indices for the corresponding vector, etc.

238
scheme/sort/vmsort.scm Normal file
View File

@ -0,0 +1,238 @@
;;; The SRFI-32 sort package -- stable vector merge & merge sort -*- Scheme -*-
;;; Copyright (c) 1998 by Olin Shivers.
;;; This code is open-source; see the end of the file for porting and
;;; more copyright information.
;;; Olin Shivers 10/98.
;;; Exports:
;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector
;;; (vector-merge! < v v1 v2 [start0 start1 end1 start2 end2]) -> unspecific
;;;
;;; (vector-merge-sort < v [start end temp]) -> vector
;;; (vector-merge-sort! < v [start end temp]) -> unspecific
;;; Merge
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; (vector-merge < v1 v2 [start1 end1 start2 end2]) -> vector
;;; (vector-merge! < v v1 v2 [start start1 end1 start2 end2]) -> unspecific
;;;
;;; Stable vector merge -- V1's elements come out ahead of equal V2 elements.
(define (vector-merge < v1 v2 . maybe-starts+ends)
(call-with-values
(lambda () (vectors-start+end-2 v1 v2 maybe-starts+ends))
(lambda (start1 end1 start2 end2)
(let ((ans (make-vector (+ (- end1 start1) (- end2 start2)))))
(%vector-merge! < ans v1 v2 0 start1 end1 start2 end2)
ans))))
(define (vector-merge! < v v1 v2 . maybe-starts+ends)
(call-with-values
(lambda ()
(if (pair? maybe-starts+ends)
(values (car maybe-starts+ends)
(cdr maybe-starts+ends))
(values 0
'())))
(lambda (start rest)
(call-with-values
(lambda () (vectors-start+end-2 v1 v2 rest))
(lambda (start1 end1 start2 end2)
(%vector-merge! < v v1 v2 start start1 end1 start2 end2))))))
;;; This routine is not exported. The code is tightly bummed.
;;;
;;; If these preconditions hold, the routine can be bummed to run with
;;; unsafe vector-indexing and fixnum arithmetic ops:
;;; - V V1 V2 are vectors.
;;; - START0 START1 END1 START2 END2 are fixnums.
;;; - (<= 0 START0 END0 (vector-length V),
;;; where end0 = start0 + (end1 - start1) + (end2 - start2)
;;; - (<= 0 START1 END1 (vector-length V1))
;;; - (<= 0 START2 END2 (vector-length V2))
;;; If you put these error checks in the two client procedures above, you can
;;; safely convert this procedure to use unsafe ops -- which is why it isn't
;;; exported. This will provide *huge* speedup.
(define (%vector-merge! elt< v v1 v2 start start1 end1 start2 end2)
(letrec ((vblit (lambda (fromv j i end) ; Blit FROMV[J,END) to V[I,?].
(let lp ((j j) (i i))
(vector-set! v i (vector-ref fromv j))
(let ((j (+ j 1)))
(if (< j end) (lp j (+ i 1))))))))
(cond ((<= end1 start1) (if (< start2 end2) (vblit v2 start2 start)))
((<= end2 start2) (vblit v1 start1 start))
;; Invariants: I is next index of V to write; X = V1[J]; Y = V2[K].
(else (let lp ((i start)
(j start1) (x (vector-ref v1 start1))
(k start2) (y (vector-ref v2 start2)))
(let ((i1 (+ i 1))) ; "i+1" is a complex number in R4RS!
(if (elt< y x)
(let ((k (+ k 1)))
(vector-set! v i y)
(if (< k end2)
(lp i1 j x k (vector-ref v2 k))
(vblit v1 j i1 end1)))
(let ((j (+ j 1)))
(vector-set! v i x)
(if (< j end1)
(vblit v2 k i1 end2)
(lp i1 j (vector-ref v1 j) k y))))))))))
;;; (vector-merge-sort < v [start end temp]) -> vector
;;; (vector-merge-sort! < v [start end temp]) -> unspecific
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Stable natural vector merge sort
(define (vector-merge-sort! < v . maybe-args)
(call-with-values
(lambda () (vector-start+end v maybe-args))
(lambda (start end)
(let ((temp (if (and (pair? maybe-args) ; kludge
(pair? (cdr maybe-args))
(pair? (cddr maybe-args)))
(caddr maybe-args)
(vector-copy v))))
(%vector-merge-sort! < v start end temp)))))
(define (vector-merge-sort < v . maybe-args)
(let ((ans (vector-copy v)))
(apply vector-merge-sort! < ans maybe-args)
ans))
;;; %VECTOR-MERGE-SORT! is not exported.
;;; Preconditions:
;;; V TEMP vectors
;;; START END fixnums
;;; START END legal indices for V and TEMP
;;; If these preconditions are ensured by the cover functions, you
;;; can safely change this code to use unsafe fixnum arithmetic and vector
;;; indexing ops, for *huge* speedup.
;;; This merge sort is "opportunistic" -- the leaves of the merge tree are
;;; contiguous runs of already sorted elements in the vector. In the best
;;; case -- an already sorted vector -- it runs in linear time. Worst case
;;; is still O(n lg n) time.
(define (%vector-merge-sort! elt< v0 l r temp0)
(define (xor a b) (not (eq? a b)))
;; Merge v1[l,l+len1) and v2[l+len1,l+len1+len2) into target[l,l+len1+len2)
;; Merge left-to-right, so that TEMP may be either V1 or V2
;; (that this is OK takes a little bit of thought).
;; V2=TARGET? is true if V2 and TARGET are the same, which allows
;; merge to punt the final blit half of the time.
(define (merge target v1 v2 l len1 len2 v2=target?)
(letrec ((vblit (lambda (fromv j i end) ; Blit FROMV[J,END) to TARGET[I,?]
(let lp ((j j) (i i)) ; J < END. The final copy.
(vector-set! target i (vector-ref fromv j))
(let ((j (+ j 1)))
(if (< j end) (lp j (+ i 1))))))))
(let* ((r1 (+ l len1))
(r2 (+ r1 len2)))
; Invariants:
(let lp ((n l) ; N is next index of
(j l) (x (vector-ref v1 l)) ; TARGET to write.
(k r1) (y (vector-ref v2 r1))) ; X = V1[J]
(let ((n+1 (+ n 1))) ; Y = V2[K]
(if (elt< y x)
(let ((k (+ k 1)))
(vector-set! target n y)
(if (< k r2)
(lp n+1 j x k (vector-ref v2 k))
(vblit v1 j n+1 r1)))
(let ((j (+ j 1)))
(vector-set! target n x)
(if (< j r1)
(lp n+1 j (vector-ref v1 j) k y)
(if (not v2=target?) (vblit v2 k n+1 r2))))))))))
;; Might hack GETRUN so that if the run is short it pads it out to length
;; 10 with insert sort...
;; Precondition: l < r.
(define (getrun v l r)
(let lp ((i (+ l 1)) (x (vector-ref v l)))
(if (>= i r)
(- i l)
(let ((y (vector-ref v i)))
(if (elt< y x)
(- i l)
(lp (+ i 1) y))))))
;; RECUR: Sort V0[L,L+LEN) for some LEN where 0 < WANT <= LEN <= (R-L).
;; That is, sort *at least* WANT elements in V0 starting at index L.
;; May put the result into either V0[L,L+LEN) or TEMP0[L,L+LEN).
;; Must not alter either vector outside this range.
;; Return:
;; - LEN -- the number of values we sorted
;; - ANSVEC -- the vector holding the value
;; - ANS=V0? -- tells if ANSVEC is V0 or TEMP
;;
;; LP: V[L,L+PFXLEN) holds a sorted prefix of V0.
;; TEMP = if V = V0 then TEMP0 else V0. (I.e., TEMP is the other vec.)
;; PFXLEN2 is a power of 2 <= PFXLEN.
;; Solve RECUR's problem.
(if (< l r) ; Don't try to sort an empty range.
(receive (ignored-len ignored-ansvec ansvec=v0?)
(let recur ((l l) (want (- r l)))
(let ((len (- r l)))
(let lp ((pfxlen (getrun v0 l r)) (pfxlen2 1)
(v v0) (temp temp0)
(v=v0? #t))
(if (or (>= pfxlen want) (= pfxlen len))
(values pfxlen v v=v0?)
(let ((pfxlen2 (let lp ((j pfxlen2))
(let ((j*2 (+ j j)))
(if (<= j pfxlen) (lp j*2) j))))
(tail-len (- len pfxlen)))
;; PFXLEN2 is now the largest power of 2 <= PFXLEN.
;; (Just think of it as being roughly PFXLEN.)
(receive (nr-len nr-vec nrvec=v0?)
(recur (+ pfxlen l) pfxlen2)
(merge temp v nr-vec l pfxlen nr-len
(xor nrvec=v0? v=v0?))
(lp (+ pfxlen nr-len) (+ pfxlen2 pfxlen2)
temp v (not v=v0?))))))))
(if (not ansvec=v0?) (vector-portion-copy! v0 temp0 l r)))))
;;; Copyright
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code is
;;; Copyright (c) 1998 by Olin Shivers.
;;; The terms are: You may do as you please with this code, as long as
;;; you do not delete this notice or hold me responsible for any outcome
;;; related to its use.
;;;
;;; Blah blah blah. Don't you think source files should contain more lines
;;; of code than copyright notice?
;;; Code tuning & porting
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; This code is *tightly* bummed as far as I can go in portable Scheme.
;;;
;;; The two internal primitives that do the real work can be converted to
;;; use unsafe vector-indexing and fixnum-specific arithmetic ops *if* you
;;; alter the four small cover functions to enforce the invariants. This should
;;; provide *big* speedups. In fact, all the code bumming I've done pretty
;;; much disappears in the noise unless you have a good compiler and also
;;; can dump the vector-index checks and generic arithmetic -- so I've really
;;; just set things up for you to exploit.
;;;
;;; The optional-arg parsing, defaulting, and error checking is done with a
;;; portable R4RS macro. But if your Scheme has a faster mechanism (e.g.,
;;; Chez), you should definitely port over to it. Note that argument defaulting
;;; and error-checking are interleaved -- you don't have to error-check
;;; defaulted START/END args to see if they are fixnums that are legal vector
;;; indices for the corresponding vector, etc.