...
Run Format

Source file src/sort/sort.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package sort provides primitives for sorting slices and user-defined
     6	// collections.
     7	package sort
     8	
     9	// A type, typically a collection, that satisfies sort.Interface can be
    10	// sorted by the routines in this package.  The methods require that the
    11	// elements of the collection be enumerated by an integer index.
    12	type Interface interface {
    13		// Len is the number of elements in the collection.
    14		Len() int
    15		// Less reports whether the element with
    16		// index i should sort before the element with index j.
    17		Less(i, j int) bool
    18		// Swap swaps the elements with indexes i and j.
    19		Swap(i, j int)
    20	}
    21	
    22	func min(a, b int) int {
    23		if a < b {
    24			return a
    25		}
    26		return b
    27	}
    28	
    29	// Insertion sort
    30	func insertionSort(data Interface, a, b int) {
    31		for i := a + 1; i < b; i++ {
    32			for j := i; j > a && data.Less(j, j-1); j-- {
    33				data.Swap(j, j-1)
    34			}
    35		}
    36	}
    37	
    38	// siftDown implements the heap property on data[lo, hi).
    39	// first is an offset into the array where the root of the heap lies.
    40	func siftDown(data Interface, lo, hi, first int) {
    41		root := lo
    42		for {
    43			child := 2*root + 1
    44			if child >= hi {
    45				break
    46			}
    47			if child+1 < hi && data.Less(first+child, first+child+1) {
    48				child++
    49			}
    50			if !data.Less(first+root, first+child) {
    51				return
    52			}
    53			data.Swap(first+root, first+child)
    54			root = child
    55		}
    56	}
    57	
    58	func heapSort(data Interface, a, b int) {
    59		first := a
    60		lo := 0
    61		hi := b - a
    62	
    63		// Build heap with greatest element at top.
    64		for i := (hi - 1) / 2; i >= 0; i-- {
    65			siftDown(data, i, hi, first)
    66		}
    67	
    68		// Pop elements, largest first, into end of data.
    69		for i := hi - 1; i >= 0; i-- {
    70			data.Swap(first, first+i)
    71			siftDown(data, lo, i, first)
    72		}
    73	}
    74	
    75	// Quicksort, following Bentley and McIlroy,
    76	// ``Engineering a Sort Function,'' SP&E November 1993.
    77	
    78	// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
    79	func medianOfThree(data Interface, m1, m0, m2 int) {
    80		// sort 3 elements
    81		if data.Less(m1, m0) {
    82			data.Swap(m1, m0)
    83		}
    84		// data[m0] <= data[m1]
    85		if data.Less(m2, m1) {
    86			data.Swap(m2, m1)
    87			// data[m0] <= data[m2] && data[m1] < data[m2]
    88			if data.Less(m1, m0) {
    89				data.Swap(m1, m0)
    90			}
    91		}
    92		// now data[m0] <= data[m1] <= data[m2]
    93	}
    94	
    95	func swapRange(data Interface, a, b, n int) {
    96		for i := 0; i < n; i++ {
    97			data.Swap(a+i, b+i)
    98		}
    99	}
   100	
   101	func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
   102		m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
   103		if hi-lo > 40 {
   104			// Tukey's ``Ninther,'' median of three medians of three.
   105			s := (hi - lo) / 8
   106			medianOfThree(data, lo, lo+s, lo+2*s)
   107			medianOfThree(data, m, m-s, m+s)
   108			medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   109		}
   110		medianOfThree(data, lo, m, hi-1)
   111	
   112		// Invariants are:
   113		//	data[lo] = pivot (set up by ChoosePivot)
   114		//	data[lo <= i < a] = pivot
   115		//	data[a <= i < b] < pivot
   116		//	data[b <= i < c] is unexamined
   117		//	data[c <= i < d] > pivot
   118		//	data[d <= i < hi] = pivot
   119		//
   120		// Once b meets c, can swap the "= pivot" sections
   121		// into the middle of the slice.
   122		pivot := lo
   123		a, b, c, d := lo+1, lo+1, hi, hi
   124		for {
   125			for b < c {
   126				if data.Less(b, pivot) { // data[b] < pivot
   127					b++
   128				} else if !data.Less(pivot, b) { // data[b] = pivot
   129					data.Swap(a, b)
   130					a++
   131					b++
   132				} else {
   133					break
   134				}
   135			}
   136			for b < c {
   137				if data.Less(pivot, c-1) { // data[c-1] > pivot
   138					c--
   139				} else if !data.Less(c-1, pivot) { // data[c-1] = pivot
   140					data.Swap(c-1, d-1)
   141					c--
   142					d--
   143				} else {
   144					break
   145				}
   146			}
   147			if b >= c {
   148				break
   149			}
   150			// data[b] > pivot; data[c-1] < pivot
   151			data.Swap(b, c-1)
   152			b++
   153			c--
   154		}
   155	
   156		n := min(b-a, a-lo)
   157		swapRange(data, lo, b-n, n)
   158	
   159		n = min(hi-d, d-c)
   160		swapRange(data, c, hi-n, n)
   161	
   162		return lo + b - a, hi - (d - c)
   163	}
   164	
   165	func quickSort(data Interface, a, b, maxDepth int) {
   166		for b-a > 7 {
   167			if maxDepth == 0 {
   168				heapSort(data, a, b)
   169				return
   170			}
   171			maxDepth--
   172			mlo, mhi := doPivot(data, a, b)
   173			// Avoiding recursion on the larger subproblem guarantees
   174			// a stack depth of at most lg(b-a).
   175			if mlo-a < b-mhi {
   176				quickSort(data, a, mlo, maxDepth)
   177				a = mhi // i.e., quickSort(data, mhi, b)
   178			} else {
   179				quickSort(data, mhi, b, maxDepth)
   180				b = mlo // i.e., quickSort(data, a, mlo)
   181			}
   182		}
   183		if b-a > 1 {
   184			insertionSort(data, a, b)
   185		}
   186	}
   187	
   188	// Sort sorts data.
   189	// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   190	// data.Less and data.Swap. The sort is not guaranteed to be stable.
   191	func Sort(data Interface) {
   192		// Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
   193		n := data.Len()
   194		maxDepth := 0
   195		for i := n; i > 0; i >>= 1 {
   196			maxDepth++
   197		}
   198		maxDepth *= 2
   199		quickSort(data, 0, n, maxDepth)
   200	}
   201	
   202	type reverse struct {
   203		// This embedded Interface permits Reverse to use the methods of
   204		// another Interface implementation.
   205		Interface
   206	}
   207	
   208	// Less returns the opposite of the embedded implementation's Less method.
   209	func (r reverse) Less(i, j int) bool {
   210		return r.Interface.Less(j, i)
   211	}
   212	
   213	// Reverse returns the reverse order for data.
   214	func Reverse(data Interface) Interface {
   215		return &reverse{data}
   216	}
   217	
   218	// IsSorted reports whether data is sorted.
   219	func IsSorted(data Interface) bool {
   220		n := data.Len()
   221		for i := n - 1; i > 0; i-- {
   222			if data.Less(i, i-1) {
   223				return false
   224			}
   225		}
   226		return true
   227	}
   228	
   229	// Convenience types for common cases
   230	
   231	// IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   232	type IntSlice []int
   233	
   234	func (p IntSlice) Len() int           { return len(p) }
   235	func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   236	func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   237	
   238	// Sort is a convenience method.
   239	func (p IntSlice) Sort() { Sort(p) }
   240	
   241	// Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
   242	type Float64Slice []float64
   243	
   244	func (p Float64Slice) Len() int           { return len(p) }
   245	func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   246	func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   247	
   248	// isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   249	func isNaN(f float64) bool {
   250		return f != f
   251	}
   252	
   253	// Sort is a convenience method.
   254	func (p Float64Slice) Sort() { Sort(p) }
   255	
   256	// StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   257	type StringSlice []string
   258	
   259	func (p StringSlice) Len() int           { return len(p) }
   260	func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   261	func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   262	
   263	// Sort is a convenience method.
   264	func (p StringSlice) Sort() { Sort(p) }
   265	
   266	// Convenience wrappers for common cases
   267	
   268	// Ints sorts a slice of ints in increasing order.
   269	func Ints(a []int) { Sort(IntSlice(a)) }
   270	
   271	// Float64s sorts a slice of float64s in increasing order.
   272	func Float64s(a []float64) { Sort(Float64Slice(a)) }
   273	
   274	// Strings sorts a slice of strings in increasing order.
   275	func Strings(a []string) { Sort(StringSlice(a)) }
   276	
   277	// IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   278	func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   279	
   280	// Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
   281	func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   282	
   283	// StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   284	func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   285	
   286	// Notes on stable sorting:
   287	// The used algorithms are simple and provable correct on all input and use
   288	// only logarithmic additional stack space.  They perform well if compared
   289	// experimentally to other stable in-place sorting algorithms.
   290	//
   291	// Remarks on other algorithms evaluated:
   292	//  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   293	//    Not faster.
   294	//  - GCC's __rotate for block rotations: Not faster.
   295	//  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   296	//    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   297	//    The given algorithms are in-place, number of Swap and Assignments
   298	//    grow as n log n but the algorithm is not stable.
   299	//  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   300	//    V. Raman in Algorithmica (1996) 16, 115-160:
   301	//    This algorithm either needs additional 2n bits or works only if there
   302	//    are enough different elements available to encode some permutations
   303	//    which have to be undone later (so not stable on any input).
   304	//  - All the optimal in-place sorting/merging algorithms I found are either
   305	//    unstable or rely on enough different elements in each step to encode the
   306	//    performed block rearrangements. See also "In-Place Merging Algorithms",
   307	//    Denham Coates-Evely, Department of Computer Science, Kings College,
   308	//    January 2004 and the reverences in there.
   309	//  - Often "optimal" algorithms are optimal in the number of assignments
   310	//    but Interface has only Swap as operation.
   311	
   312	// Stable sorts data while keeping the original order of equal elements.
   313	//
   314	// It makes one call to data.Len to determine n, O(n*log(n)) calls to
   315	// data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   316	func Stable(data Interface) {
   317		n := data.Len()
   318		blockSize := 20 // must be > 0
   319		a, b := 0, blockSize
   320		for b <= n {
   321			insertionSort(data, a, b)
   322			a = b
   323			b += blockSize
   324		}
   325		insertionSort(data, a, n)
   326	
   327		for blockSize < n {
   328			a, b = 0, 2*blockSize
   329			for b <= n {
   330				symMerge(data, a, a+blockSize, b)
   331				a = b
   332				b += 2 * blockSize
   333			}
   334			if m := a + blockSize; m < n {
   335				symMerge(data, a, m, n)
   336			}
   337			blockSize *= 2
   338		}
   339	}
   340	
   341	// SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   342	// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   343	// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   344	// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   345	// Computer Science, pages 714-723. Springer, 2004.
   346	//
   347	// Let M = m-a and N = b-n. Wolog M < N.
   348	// The recursion depth is bound by ceil(log(N+M)).
   349	// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   350	// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   351	//
   352	// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   353	// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   354	// in the paper carries through for Swap operations, especially as the block
   355	// swapping rotate uses only O(M+N) Swaps.
   356	//
   357	// symMerge assumes non-degenerate arguments: a < m && m < b.
   358	// Having the caller check this condition eliminates many leaf recursion calls,
   359	// which improves performance.
   360	func symMerge(data Interface, a, m, b int) {
   361		// Avoid unnecessary recursions of symMerge
   362		// by direct insertion of data[a] into data[m:b]
   363		// if data[a:m] only contains one element.
   364		if m-a == 1 {
   365			// Use binary search to find the lowest index i
   366			// such that data[i] >= data[a] for m <= i < b.
   367			// Exit the search loop with i == b in case no such index exists.
   368			i := m
   369			j := b
   370			for i < j {
   371				h := i + (j-i)/2
   372				if data.Less(h, a) {
   373					i = h + 1
   374				} else {
   375					j = h
   376				}
   377			}
   378			// Swap values until data[a] reaches the position before i.
   379			for k := a; k < i-1; k++ {
   380				data.Swap(k, k+1)
   381			}
   382			return
   383		}
   384	
   385		// Avoid unnecessary recursions of symMerge
   386		// by direct insertion of data[m] into data[a:m]
   387		// if data[m:b] only contains one element.
   388		if b-m == 1 {
   389			// Use binary search to find the lowest index i
   390			// such that data[i] > data[m] for a <= i < m.
   391			// Exit the search loop with i == m in case no such index exists.
   392			i := a
   393			j := m
   394			for i < j {
   395				h := i + (j-i)/2
   396				if !data.Less(m, h) {
   397					i = h + 1
   398				} else {
   399					j = h
   400				}
   401			}
   402			// Swap values until data[m] reaches the position i.
   403			for k := m; k > i; k-- {
   404				data.Swap(k, k-1)
   405			}
   406			return
   407		}
   408	
   409		mid := a + (b-a)/2
   410		n := mid + m
   411		var start, r int
   412		if m > mid {
   413			start = n - b
   414			r = mid
   415		} else {
   416			start = a
   417			r = m
   418		}
   419		p := n - 1
   420	
   421		for start < r {
   422			c := start + (r-start)/2
   423			if !data.Less(p-c, c) {
   424				start = c + 1
   425			} else {
   426				r = c
   427			}
   428		}
   429	
   430		end := n - start
   431		if start < m && m < end {
   432			rotate(data, start, m, end)
   433		}
   434		if a < start && start < mid {
   435			symMerge(data, a, start, mid)
   436		}
   437		if mid < end && end < b {
   438			symMerge(data, mid, end, b)
   439		}
   440	}
   441	
   442	// Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   443	// Data of the form 'x u v y' is changed to 'x v u y'.
   444	// Rotate performs at most b-a many calls to data.Swap.
   445	// Rotate assumes non-degenerate arguments: a < m && m < b.
   446	func rotate(data Interface, a, m, b int) {
   447		i := m - a
   448		j := b - m
   449	
   450		for i != j {
   451			if i > j {
   452				swapRange(data, m-i, m, j)
   453				i -= j
   454			} else {
   455				swapRange(data, m-i, m+j-i, i)
   456				j -= i
   457			}
   458		}
   459		// i == j
   460		swapRange(data, m-i, m, i)
   461	}
   462	
   463	/*
   464	Complexity of Stable Sorting
   465	
   466	
   467	Complexity of block swapping rotation
   468	
   469	Each Swap puts one new element into its correct, final position.
   470	Elements which reach their final position are no longer moved.
   471	Thus block swapping rotation needs |u|+|v| calls to Swaps.
   472	This is best possible as each element might need a move.
   473	
   474	Pay attention when comparing to other optimal algorithms which
   475	typically count the number of assignments instead of swaps:
   476	E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   477	rotations uses O(u + v + gcd(u,v)) assignments which is
   478	better than our O(3 * (u+v)) as gcd(u,v) <= u.
   479	
   480	
   481	Stable sorting by SymMerge and BlockSwap rotations
   482	
   483	SymMerg complexity for same size input M = N:
   484	Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   485	Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   486	
   487	(The following argument does not fuzz over a missing -1 or
   488	other stuff which does not impact the final result).
   489	
   490	Let n = data.Len(). Assume n = 2^k.
   491	
   492	Plain merge sort performs log(n) = k iterations.
   493	On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   494	
   495	Thus iteration i of merge sort performs:
   496	Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   497	Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   498	
   499	In total k = log(n) iterations are performed; so in total:
   500	Calls to Less O(log(n) * n)
   501	Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   502	   = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   503	
   504	
   505	Above results should generalize to arbitrary n = 2^k + p
   506	and should not be influenced by the initial insertion sort phase:
   507	Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   508	size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   509	Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   510	Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   511	   = O(n * log(n))
   512	Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   513	
   514	*/
   515	

View as plain text