...
Run Format

Source file src/sort/sort.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package sort provides primitives for sorting slices and user-defined
     6	// collections.
     7	package sort
     8	
     9	// A type, typically a collection, that satisfies sort.Interface can be
    10	// sorted by the routines in this package. The methods require that the
    11	// elements of the collection be enumerated by an integer index.
    12	type Interface interface {
    13		// Len is the number of elements in the collection.
    14		Len() int
    15		// Less reports whether the element with
    16		// index i should sort before the element with index j.
    17		Less(i, j int) bool
    18		// Swap swaps the elements with indexes i and j.
    19		Swap(i, j int)
    20	}
    21	
    22	// Insertion sort
    23	func insertionSort(data Interface, a, b int) {
    24		for i := a + 1; i < b; i++ {
    25			for j := i; j > a && data.Less(j, j-1); j-- {
    26				data.Swap(j, j-1)
    27			}
    28		}
    29	}
    30	
    31	// siftDown implements the heap property on data[lo, hi).
    32	// first is an offset into the array where the root of the heap lies.
    33	func siftDown(data Interface, lo, hi, first int) {
    34		root := lo
    35		for {
    36			child := 2*root + 1
    37			if child >= hi {
    38				break
    39			}
    40			if child+1 < hi && data.Less(first+child, first+child+1) {
    41				child++
    42			}
    43			if !data.Less(first+root, first+child) {
    44				return
    45			}
    46			data.Swap(first+root, first+child)
    47			root = child
    48		}
    49	}
    50	
    51	func heapSort(data Interface, a, b int) {
    52		first := a
    53		lo := 0
    54		hi := b - a
    55	
    56		// Build heap with greatest element at top.
    57		for i := (hi - 1) / 2; i >= 0; i-- {
    58			siftDown(data, i, hi, first)
    59		}
    60	
    61		// Pop elements, largest first, into end of data.
    62		for i := hi - 1; i >= 0; i-- {
    63			data.Swap(first, first+i)
    64			siftDown(data, lo, i, first)
    65		}
    66	}
    67	
    68	// Quicksort, loosely following Bentley and McIlroy,
    69	// ``Engineering a Sort Function,'' SP&E November 1993.
    70	
    71	// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
    72	func medianOfThree(data Interface, m1, m0, m2 int) {
    73		// sort 3 elements
    74		if data.Less(m1, m0) {
    75			data.Swap(m1, m0)
    76		}
    77		// data[m0] <= data[m1]
    78		if data.Less(m2, m1) {
    79			data.Swap(m2, m1)
    80			// data[m0] <= data[m2] && data[m1] < data[m2]
    81			if data.Less(m1, m0) {
    82				data.Swap(m1, m0)
    83			}
    84		}
    85		// now data[m0] <= data[m1] <= data[m2]
    86	}
    87	
    88	func swapRange(data Interface, a, b, n int) {
    89		for i := 0; i < n; i++ {
    90			data.Swap(a+i, b+i)
    91		}
    92	}
    93	
    94	func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
    95		m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
    96		if hi-lo > 40 {
    97			// Tukey's ``Ninther,'' median of three medians of three.
    98			s := (hi - lo) / 8
    99			medianOfThree(data, lo, lo+s, lo+2*s)
   100			medianOfThree(data, m, m-s, m+s)
   101			medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   102		}
   103		medianOfThree(data, lo, m, hi-1)
   104	
   105		// Invariants are:
   106		//	data[lo] = pivot (set up by ChoosePivot)
   107		//	data[lo < i < a] < pivot
   108		//	data[a <= i < b] <= pivot
   109		//	data[b <= i < c] unexamined
   110		//	data[c <= i < hi-1] > pivot
   111		//	data[hi-1] >= pivot
   112		pivot := lo
   113		a, c := lo+1, hi-1
   114	
   115		for ; a < c && data.Less(a, pivot); a++ {
   116		}
   117		b := a
   118		for {
   119			for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
   120			}
   121			for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
   122			}
   123			if b >= c {
   124				break
   125			}
   126			// data[b] > pivot; data[c-1] <= pivot
   127			data.Swap(b, c-1)
   128			b++
   129			c--
   130		}
   131		// If hi-c<3 then there are duplicates (by property of median of nine).
   132		// Let be a bit more conservative, and set border to 5.
   133		protect := hi-c < 5
   134		if !protect && hi-c < (hi-lo)/4 {
   135			// Lets test some points for equality to pivot
   136			dups := 0
   137			if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
   138				data.Swap(c, hi-1)
   139				c++
   140				dups++
   141			}
   142			if !data.Less(b-1, pivot) { // data[b-1] = pivot
   143				b--
   144				dups++
   145			}
   146			// m-lo = (hi-lo)/2 > 6
   147			// b-lo > (hi-lo)*3/4-1 > 8
   148			// ==> m < b ==> data[m] <= pivot
   149			if !data.Less(m, pivot) { // data[m] = pivot
   150				data.Swap(m, b-1)
   151				b--
   152				dups++
   153			}
   154			// if at least 2 points are equal to pivot, assume skewed distribution
   155			protect = dups > 1
   156		}
   157		if protect {
   158			// Protect against a lot of duplicates
   159			// Add invariant:
   160			//	data[a <= i < b] unexamined
   161			//	data[b <= i < c] = pivot
   162			for {
   163				for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
   164				}
   165				for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
   166				}
   167				if a >= b {
   168					break
   169				}
   170				// data[a] == pivot; data[b-1] < pivot
   171				data.Swap(a, b-1)
   172				a++
   173				b--
   174			}
   175		}
   176		// Swap pivot into middle
   177		data.Swap(pivot, b-1)
   178		return b - 1, c
   179	}
   180	
   181	func quickSort(data Interface, a, b, maxDepth int) {
   182		for b-a > 12 { // Use ShellSort for slices <= 12 elements
   183			if maxDepth == 0 {
   184				heapSort(data, a, b)
   185				return
   186			}
   187			maxDepth--
   188			mlo, mhi := doPivot(data, a, b)
   189			// Avoiding recursion on the larger subproblem guarantees
   190			// a stack depth of at most lg(b-a).
   191			if mlo-a < b-mhi {
   192				quickSort(data, a, mlo, maxDepth)
   193				a = mhi // i.e., quickSort(data, mhi, b)
   194			} else {
   195				quickSort(data, mhi, b, maxDepth)
   196				b = mlo // i.e., quickSort(data, a, mlo)
   197			}
   198		}
   199		if b-a > 1 {
   200			// Do ShellSort pass with gap 6
   201			// It could be written in this simplified form cause b-a <= 12
   202			for i := a + 6; i < b; i++ {
   203				if data.Less(i, i-6) {
   204					data.Swap(i, i-6)
   205				}
   206			}
   207			insertionSort(data, a, b)
   208		}
   209	}
   210	
   211	// Sort sorts data.
   212	// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   213	// data.Less and data.Swap. The sort is not guaranteed to be stable.
   214	func Sort(data Interface) {
   215		// Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
   216		n := data.Len()
   217		maxDepth := 0
   218		for i := n; i > 0; i >>= 1 {
   219			maxDepth++
   220		}
   221		maxDepth *= 2
   222		quickSort(data, 0, n, maxDepth)
   223	}
   224	
   225	type reverse struct {
   226		// This embedded Interface permits Reverse to use the methods of
   227		// another Interface implementation.
   228		Interface
   229	}
   230	
   231	// Less returns the opposite of the embedded implementation's Less method.
   232	func (r reverse) Less(i, j int) bool {
   233		return r.Interface.Less(j, i)
   234	}
   235	
   236	// Reverse returns the reverse order for data.
   237	func Reverse(data Interface) Interface {
   238		return &reverse{data}
   239	}
   240	
   241	// IsSorted reports whether data is sorted.
   242	func IsSorted(data Interface) bool {
   243		n := data.Len()
   244		for i := n - 1; i > 0; i-- {
   245			if data.Less(i, i-1) {
   246				return false
   247			}
   248		}
   249		return true
   250	}
   251	
   252	// Convenience types for common cases
   253	
   254	// IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   255	type IntSlice []int
   256	
   257	func (p IntSlice) Len() int           { return len(p) }
   258	func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   259	func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   260	
   261	// Sort is a convenience method.
   262	func (p IntSlice) Sort() { Sort(p) }
   263	
   264	// Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
   265	type Float64Slice []float64
   266	
   267	func (p Float64Slice) Len() int           { return len(p) }
   268	func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   269	func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   270	
   271	// isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   272	func isNaN(f float64) bool {
   273		return f != f
   274	}
   275	
   276	// Sort is a convenience method.
   277	func (p Float64Slice) Sort() { Sort(p) }
   278	
   279	// StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   280	type StringSlice []string
   281	
   282	func (p StringSlice) Len() int           { return len(p) }
   283	func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   284	func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   285	
   286	// Sort is a convenience method.
   287	func (p StringSlice) Sort() { Sort(p) }
   288	
   289	// Convenience wrappers for common cases
   290	
   291	// Ints sorts a slice of ints in increasing order.
   292	func Ints(a []int) { Sort(IntSlice(a)) }
   293	
   294	// Float64s sorts a slice of float64s in increasing order.
   295	func Float64s(a []float64) { Sort(Float64Slice(a)) }
   296	
   297	// Strings sorts a slice of strings in increasing order.
   298	func Strings(a []string) { Sort(StringSlice(a)) }
   299	
   300	// IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   301	func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   302	
   303	// Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
   304	func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   305	
   306	// StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   307	func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   308	
   309	// Notes on stable sorting:
   310	// The used algorithms are simple and provable correct on all input and use
   311	// only logarithmic additional stack space. They perform well if compared
   312	// experimentally to other stable in-place sorting algorithms.
   313	//
   314	// Remarks on other algorithms evaluated:
   315	//  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   316	//    Not faster.
   317	//  - GCC's __rotate for block rotations: Not faster.
   318	//  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   319	//    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   320	//    The given algorithms are in-place, number of Swap and Assignments
   321	//    grow as n log n but the algorithm is not stable.
   322	//  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   323	//    V. Raman in Algorithmica (1996) 16, 115-160:
   324	//    This algorithm either needs additional 2n bits or works only if there
   325	//    are enough different elements available to encode some permutations
   326	//    which have to be undone later (so not stable on any input).
   327	//  - All the optimal in-place sorting/merging algorithms I found are either
   328	//    unstable or rely on enough different elements in each step to encode the
   329	//    performed block rearrangements. See also "In-Place Merging Algorithms",
   330	//    Denham Coates-Evely, Department of Computer Science, Kings College,
   331	//    January 2004 and the references in there.
   332	//  - Often "optimal" algorithms are optimal in the number of assignments
   333	//    but Interface has only Swap as operation.
   334	
   335	// Stable sorts data while keeping the original order of equal elements.
   336	//
   337	// It makes one call to data.Len to determine n, O(n*log(n)) calls to
   338	// data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   339	func Stable(data Interface) {
   340		n := data.Len()
   341		blockSize := 20 // must be > 0
   342		a, b := 0, blockSize
   343		for b <= n {
   344			insertionSort(data, a, b)
   345			a = b
   346			b += blockSize
   347		}
   348		insertionSort(data, a, n)
   349	
   350		for blockSize < n {
   351			a, b = 0, 2*blockSize
   352			for b <= n {
   353				symMerge(data, a, a+blockSize, b)
   354				a = b
   355				b += 2 * blockSize
   356			}
   357			if m := a + blockSize; m < n {
   358				symMerge(data, a, m, n)
   359			}
   360			blockSize *= 2
   361		}
   362	}
   363	
   364	// SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   365	// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   366	// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   367	// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   368	// Computer Science, pages 714-723. Springer, 2004.
   369	//
   370	// Let M = m-a and N = b-n. Wolog M < N.
   371	// The recursion depth is bound by ceil(log(N+M)).
   372	// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   373	// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   374	//
   375	// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   376	// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   377	// in the paper carries through for Swap operations, especially as the block
   378	// swapping rotate uses only O(M+N) Swaps.
   379	//
   380	// symMerge assumes non-degenerate arguments: a < m && m < b.
   381	// Having the caller check this condition eliminates many leaf recursion calls,
   382	// which improves performance.
   383	func symMerge(data Interface, a, m, b int) {
   384		// Avoid unnecessary recursions of symMerge
   385		// by direct insertion of data[a] into data[m:b]
   386		// if data[a:m] only contains one element.
   387		if m-a == 1 {
   388			// Use binary search to find the lowest index i
   389			// such that data[i] >= data[a] for m <= i < b.
   390			// Exit the search loop with i == b in case no such index exists.
   391			i := m
   392			j := b
   393			for i < j {
   394				h := i + (j-i)/2
   395				if data.Less(h, a) {
   396					i = h + 1
   397				} else {
   398					j = h
   399				}
   400			}
   401			// Swap values until data[a] reaches the position before i.
   402			for k := a; k < i-1; k++ {
   403				data.Swap(k, k+1)
   404			}
   405			return
   406		}
   407	
   408		// Avoid unnecessary recursions of symMerge
   409		// by direct insertion of data[m] into data[a:m]
   410		// if data[m:b] only contains one element.
   411		if b-m == 1 {
   412			// Use binary search to find the lowest index i
   413			// such that data[i] > data[m] for a <= i < m.
   414			// Exit the search loop with i == m in case no such index exists.
   415			i := a
   416			j := m
   417			for i < j {
   418				h := i + (j-i)/2
   419				if !data.Less(m, h) {
   420					i = h + 1
   421				} else {
   422					j = h
   423				}
   424			}
   425			// Swap values until data[m] reaches the position i.
   426			for k := m; k > i; k-- {
   427				data.Swap(k, k-1)
   428			}
   429			return
   430		}
   431	
   432		mid := a + (b-a)/2
   433		n := mid + m
   434		var start, r int
   435		if m > mid {
   436			start = n - b
   437			r = mid
   438		} else {
   439			start = a
   440			r = m
   441		}
   442		p := n - 1
   443	
   444		for start < r {
   445			c := start + (r-start)/2
   446			if !data.Less(p-c, c) {
   447				start = c + 1
   448			} else {
   449				r = c
   450			}
   451		}
   452	
   453		end := n - start
   454		if start < m && m < end {
   455			rotate(data, start, m, end)
   456		}
   457		if a < start && start < mid {
   458			symMerge(data, a, start, mid)
   459		}
   460		if mid < end && end < b {
   461			symMerge(data, mid, end, b)
   462		}
   463	}
   464	
   465	// Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   466	// Data of the form 'x u v y' is changed to 'x v u y'.
   467	// Rotate performs at most b-a many calls to data.Swap.
   468	// Rotate assumes non-degenerate arguments: a < m && m < b.
   469	func rotate(data Interface, a, m, b int) {
   470		i := m - a
   471		j := b - m
   472	
   473		for i != j {
   474			if i > j {
   475				swapRange(data, m-i, m, j)
   476				i -= j
   477			} else {
   478				swapRange(data, m-i, m+j-i, i)
   479				j -= i
   480			}
   481		}
   482		// i == j
   483		swapRange(data, m-i, m, i)
   484	}
   485	
   486	/*
   487	Complexity of Stable Sorting
   488	
   489	
   490	Complexity of block swapping rotation
   491	
   492	Each Swap puts one new element into its correct, final position.
   493	Elements which reach their final position are no longer moved.
   494	Thus block swapping rotation needs |u|+|v| calls to Swaps.
   495	This is best possible as each element might need a move.
   496	
   497	Pay attention when comparing to other optimal algorithms which
   498	typically count the number of assignments instead of swaps:
   499	E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   500	rotations uses O(u + v + gcd(u,v)) assignments which is
   501	better than our O(3 * (u+v)) as gcd(u,v) <= u.
   502	
   503	
   504	Stable sorting by SymMerge and BlockSwap rotations
   505	
   506	SymMerg complexity for same size input M = N:
   507	Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   508	Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   509	
   510	(The following argument does not fuzz over a missing -1 or
   511	other stuff which does not impact the final result).
   512	
   513	Let n = data.Len(). Assume n = 2^k.
   514	
   515	Plain merge sort performs log(n) = k iterations.
   516	On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   517	
   518	Thus iteration i of merge sort performs:
   519	Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   520	Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   521	
   522	In total k = log(n) iterations are performed; so in total:
   523	Calls to Less O(log(n) * n)
   524	Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   525	   = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   526	
   527	
   528	Above results should generalize to arbitrary n = 2^k + p
   529	and should not be influenced by the initial insertion sort phase:
   530	Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   531	size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   532	Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   533	Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   534	   = O(n * log(n))
   535	Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   536	
   537	*/
   538	

View as plain text