186 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Scheme
		
	
	
	
			
		
		
	
	
			186 lines
		
	
	
		
			6.5 KiB
		
	
	
	
		
			Scheme
		
	
	
	
| ;;; The SRFI-32 sort package -- delete neighboring duplicate elts
 | |
| ;;; Copyright (c) 1998 by Olin Shivers.
 | |
| ;;; This code is open-source; see the end of the file for porting and
 | |
| ;;; more copyright information.
 | |
| ;;; Olin Shivers 11/98.
 | |
| 
 | |
| ;;; Problem:
 | |
| ;;; vector-delete-neighbor-dups pushes N stack frames, where N is the number
 | |
| ;;; of elements in the answer vector. This is arguably a very efficient thing
 | |
| ;;; to do, but it might blow out on a system with a limited stack but a big
 | |
| ;;; heap. We could rewrite this to "chunk" up answers in temp vectors if we
 | |
| ;;; push more than a certain number of frames, then allocate a final answer,
 | |
| ;;; copying all the chunks into the answer. But it's much more complex code.
 | |
| 
 | |
| ;;; Exports:
 | |
| ;;; (list-delete-neighbor-dups  = lis) -> list
 | |
| ;;; (list-delete-neighbor-dups! = lis) -> list
 | |
| ;;; (vector-delete-neighbor-dups  = v [start end]) -> vector
 | |
| ;;; (vector-delete-neighbor-dups! = v [start end]) -> end'
 | |
| 
 | |
| ;;; These procedures delete adjacent duplicate elements from a list or
 | |
| ;;; a vector, using a given element equality procedure. The first or leftmost
 | |
| ;;; element of a run of equal elements is the one that survives. The list
 | |
| ;;; or vector is not otherwise disordered.
 | |
| ;;;
 | |
| ;;; These procedures are linear time -- much faster than the O(n^2) general 
 | |
| ;;; duplicate-elt deletors that do not assume any "bunching" of elements.
 | |
| ;;; If you want to delete duplicate elements from a large list or vector,
 | |
| ;;; sort the elements to bring equal items together, then use one of these
 | |
| ;;; procedures -- for a total time of O(n lg n). 
 | |
| 
 | |
| ;;; LIST-DELETE-NEIGHBOR-DUPS
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| ;;; Below are multiple versions of the LIST-DELETE-NEIGHBOR-DUPS procedure,
 | |
| ;;; from simple to complex. RECUR's contract: Strip off any leading X's from 
 | |
| ;;; LIS, and return that list neighbor-dup-deleted.
 | |
| ;;;
 | |
| ;;; The final version
 | |
| ;;; - shares a common subtail between the input & output list, up to 1024 
 | |
| ;;;   elements;
 | |
| ;;; - Needs no more than 1024 stack frames.
 | |
| 
 | |
| ;;; Simplest version. 
 | |
| ;;; - Always allocates a fresh list / never shares storage.
 | |
| ;;; - Needs N stack frames, if answer is length N.
 | |
| (define (list-delete-neighbor-dups = lis)
 | |
|   (if (pair? lis)
 | |
|       (let ((x0 (car lis)))
 | |
| 	(cons x0 (let recur ((x0 x0) (xs (cdr lis)))
 | |
| 		   (if (pair? xs)
 | |
| 		       (let ((x1  (car xs))
 | |
| 			     (x2+ (cdr xs)))
 | |
| 			  (if (= x0 x1)
 | |
| 			      (recur x0 x2+) ; Loop, actually.
 | |
| 			      (cons x1 (recur x1 x2+))))
 | |
| 		       xs))))
 | |
|       lis))
 | |
| 
 | |
| ;;; This version tries to use cons cells from input by sharing longest
 | |
| ;;; common tail between input & output. Still needs N stack frames, for ans
 | |
| ;;; of length N.
 | |
| (define (list-delete-neighbor-dups = lis)
 | |
|   (if (pair? lis)
 | |
|       (let* ((x0 (car lis))
 | |
| 	     (xs (cdr lis))
 | |
| 	     (ans (let recur ((x0 x0) (xs xs))
 | |
| 		    (if (pair? xs)
 | |
| 			(let ((x1  (car xs))
 | |
| 			      (x2+ (cdr xs)))
 | |
| 			  (if (= x0 x1)
 | |
| 			      (recur x0 x2+)
 | |
| 			      (let ((ans-tail (recur x1 x2+)))
 | |
| 				(if (eq? ans-tail x2+) xs
 | |
| 				    (cons x1 ans-tail)))))
 | |
| 			xs))))
 | |
| 	(if (eq? ans xs) lis (cons x0 ans)))
 | |
| 
 | |
|       lis))
 | |
| 
 | |
| ;;; LIST-DELETE-NEIGHBOR-DUPS!
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| ;;; This code runs in constant list space, constant stack, and also
 | |
| ;;; does only the minimum SET-CDR!'s necessary.
 | |
| 
 | |
| (define (list-delete-neighbor-dups! = lis)
 | |
|   (if (pair? lis)
 | |
|       (let lp1 ((prev lis) (prev-elt (car lis)) (lis (cdr lis)))
 | |
| 	(if (pair? lis)
 | |
| 	    (let ((lis-elt (car lis))
 | |
| 		  (next (cdr lis)))
 | |
| 	      (if (= prev-elt lis-elt)
 | |
| 
 | |
| 		  ;; We found the first elts of a run of dups, so we know
 | |
| 		  ;; we're going to have to do a SET-CDR!. Scan to the end of
 | |
| 		  ;; the run, do the SET-CDR!, and loop on LP1.
 | |
| 		  (let lp2 ((lis next))
 | |
| 		    (if (pair? lis)
 | |
| 			(let ((lis-elt (car lis))
 | |
| 			      (next (cdr lis)))
 | |
| 			  (if (= prev-elt lis-elt)
 | |
| 			      (lp2 next)
 | |
| 			      (begin (set-cdr! prev lis)
 | |
| 				     (lp1 lis lis-elt next))))
 | |
| 			(set-cdr! prev lis)))	; Ran off end => quit.
 | |
| 
 | |
| 		  (lp1 lis lis-elt next))))))
 | |
|   lis)
 | |
| 
 | |
| 
 | |
| (define (vector-delete-neighbor-dups elt= v . maybe-start+end)
 | |
|   (call-with-values
 | |
|    (lambda () (vector-start+end v maybe-start+end))
 | |
|    (lambda (start end)
 | |
|      (if (< start end)
 | |
| 	 (let* ((x (vector-ref v start))
 | |
| 		(ans (let recur ((x x) (i start) (j 1))
 | |
| 		       (if (< i end)
 | |
| 			   (let ((y (vector-ref v i))
 | |
| 				 (nexti (+ i 1)))
 | |
| 			     (if (elt= x y)
 | |
| 				 (recur x nexti j)
 | |
| 				 (let ((ansvec (recur y nexti (+ j 1))))
 | |
| 				   (vector-set! ansvec j y)
 | |
| 				   ansvec)))
 | |
| 			   (make-vector j)))))
 | |
| 	   (vector-set! ans 0 x)
 | |
| 	   ans)
 | |
| 	 '#()))))
 | |
| 
 | |
| 
 | |
| ;;; Packs the surviving elements to the left, in range [start,end'),
 | |
| ;;; and returns END'.
 | |
| (define (vector-delete-neighbor-dups! elt= v . maybe-start+end)
 | |
|   (call-with-values
 | |
|    (lambda () (vector-start+end v maybe-start+end))
 | |
|    (lambda (start end)
 | |
| 
 | |
|      (if (>= start end)
 | |
| 	 end
 | |
| 	 ;; To eliminate unnecessary copying (read elt i then write the value 
 | |
| 	 ;; back at index i), we scan until we find the first dup.
 | |
| 	 (let skip ((j start) (vj (vector-ref v start)))
 | |
| 	   (let ((j+1 (+ j 1)))
 | |
| 	     (if (>= j+1 end)
 | |
| 		 end
 | |
| 		 (let ((vj+1 (vector-ref v j+1)))
 | |
| 		   (if (not (elt= vj vj+1))
 | |
| 		       (skip j+1 vj+1)
 | |
| 
 | |
| 		       ;; OK -- j & j+1 are dups, so we're committed to moving
 | |
| 		       ;; data around. In lp2, v[start,j] is what we've done;
 | |
| 		       ;; v[k,end) is what we have yet to handle.
 | |
| 		       (let lp2 ((j j) (vj vj) (k (+ j 2)))
 | |
| 			 (let lp3 ((k k))
 | |
| 			   (if (>= k end)
 | |
| 			       (+ j 1) ; Done.
 | |
| 			       (let ((vk (vector-ref v k))
 | |
| 				     (k+1 (+ k 1)))
 | |
| 				 (if (elt= vj vk)
 | |
| 				     (lp3 k+1)
 | |
| 				     (let ((j+1 (+ j 1)))
 | |
| 				       (vector-set! v j+1 vk)
 | |
| 				       (lp2 j+1 vk k+1))))))))))))))))
 | |
| 		    
 | |
| ;;; Copyright
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| ;;; This code is
 | |
| ;;;     Copyright (c) 1998 by Olin Shivers.
 | |
| ;;; The terms are: You may do as you please with this code, as long as
 | |
| ;;; you do not delete this notice or hold me responsible for any outcome
 | |
| ;;; related to its use.
 | |
| ;;;
 | |
| ;;; Blah blah blah. Don't you think source files should contain more lines
 | |
| ;;; of code than copyright notice?
 | |
| ;;;
 | |
| ;;; Code porting
 | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 | |
| ;;;
 | |
| ;;; If your Scheme has a faster mechanism for handling optional arguments
 | |
| ;;; (e.g., Chez), you should definitely port over to it. Note that argument
 | |
| ;;; defaulting and error-checking are interleaved -- you don't have to
 | |
| ;;; error-check defaulted START/END args to see if they are fixnums that are
 | |
| ;;; legal vector indices for the corresponding vector, etc.
 | |
| 
 | |
| 
 |