...
Run Format

Source file src/sort/sort.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package sort provides primitives for sorting slices and user-defined
     6	// collections.
     7	package sort
     8	
     9	// A type, typically a collection, that satisfies sort.Interface can be
    10	// sorted by the routines in this package.  The methods require that the
    11	// elements of the collection be enumerated by an integer index.
    12	type Interface interface {
    13		// Len is the number of elements in the collection.
    14		Len() int
    15		// Less reports whether the element with
    16		// index i should sort before the element with index j.
    17		Less(i, j int) bool
    18		// Swap swaps the elements with indexes i and j.
    19		Swap(i, j int)
    20	}
    21	
    22	func min(a, b int) int {
    23		if a < b {
    24			return a
    25		}
    26		return b
    27	}
    28	
    29	// Insertion sort
    30	func insertionSort(data Interface, a, b int) {
    31		for i := a + 1; i < b; i++ {
    32			for j := i; j > a && data.Less(j, j-1); j-- {
    33				data.Swap(j, j-1)
    34			}
    35		}
    36	}
    37	
    38	// siftDown implements the heap property on data[lo, hi).
    39	// first is an offset into the array where the root of the heap lies.
    40	func siftDown(data Interface, lo, hi, first int) {
    41		root := lo
    42		for {
    43			child := 2*root + 1
    44			if child >= hi {
    45				break
    46			}
    47			if child+1 < hi && data.Less(first+child, first+child+1) {
    48				child++
    49			}
    50			if !data.Less(first+root, first+child) {
    51				return
    52			}
    53			data.Swap(first+root, first+child)
    54			root = child
    55		}
    56	}
    57	
    58	func heapSort(data Interface, a, b int) {
    59		first := a
    60		lo := 0
    61		hi := b - a
    62	
    63		// Build heap with greatest element at top.
    64		for i := (hi - 1) / 2; i >= 0; i-- {
    65			siftDown(data, i, hi, first)
    66		}
    67	
    68		// Pop elements, largest first, into end of data.
    69		for i := hi - 1; i >= 0; i-- {
    70			data.Swap(first, first+i)
    71			siftDown(data, lo, i, first)
    72		}
    73	}
    74	
    75	// Quicksort, loosely following Bentley and McIlroy,
    76	// ``Engineering a Sort Function,'' SP&E November 1993.
    77	
    78	// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
    79	func medianOfThree(data Interface, m1, m0, m2 int) {
    80		// sort 3 elements
    81		if data.Less(m1, m0) {
    82			data.Swap(m1, m0)
    83		}
    84		// data[m0] <= data[m1]
    85		if data.Less(m2, m1) {
    86			data.Swap(m2, m1)
    87			// data[m0] <= data[m2] && data[m1] < data[m2]
    88			if data.Less(m1, m0) {
    89				data.Swap(m1, m0)
    90			}
    91		}
    92		// now data[m0] <= data[m1] <= data[m2]
    93	}
    94	
    95	func swapRange(data Interface, a, b, n int) {
    96		for i := 0; i < n; i++ {
    97			data.Swap(a+i, b+i)
    98		}
    99	}
   100	
   101	func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
   102		m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
   103		if hi-lo > 40 {
   104			// Tukey's ``Ninther,'' median of three medians of three.
   105			s := (hi - lo) / 8
   106			medianOfThree(data, lo, lo+s, lo+2*s)
   107			medianOfThree(data, m, m-s, m+s)
   108			medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   109		}
   110		medianOfThree(data, lo, m, hi-1)
   111	
   112		// Invariants are:
   113		//	data[lo] = pivot (set up by ChoosePivot)
   114		//	data[lo < i < a] < pivot
   115		//	data[a <= i < b] <= pivot
   116		//	data[b <= i < c] unexamined
   117		//	data[c <= i < hi-1] > pivot
   118		//	data[hi-1] >= pivot
   119		pivot := lo
   120		a, c := lo+1, hi-1
   121	
   122		for ; a < c && data.Less(a, pivot); a++ {
   123		}
   124		b := a
   125		for {
   126			for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
   127			}
   128			for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
   129			}
   130			if b >= c {
   131				break
   132			}
   133			// data[b] > pivot; data[c-1] <= pivot
   134			data.Swap(b, c-1)
   135			b++
   136			c--
   137		}
   138		// If hi-c<3 then there are duplicates (by property of median of nine).
   139		// Let be a bit more conservative, and set border to 5.
   140		protect := hi-c < 5
   141		if !protect && hi-c < (hi-lo)/4 {
   142			// Lets test some points for equality to pivot
   143			dups := 0
   144			if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
   145				data.Swap(c, hi-1)
   146				c++
   147				dups++
   148			}
   149			if !data.Less(b-1, pivot) { // data[b-1] = pivot
   150				b--
   151				dups++
   152			}
   153			// m-lo = (hi-lo)/2 > 6
   154			// b-lo > (hi-lo)*3/4-1 > 8
   155			// ==> m < b ==> data[m] <= pivot
   156			if !data.Less(m, pivot) { // data[m] = pivot
   157				data.Swap(m, b-1)
   158				b--
   159				dups++
   160			}
   161			// if at least 2 points are equal to pivot, assume skewed distribution
   162			protect = dups > 1
   163		}
   164		if protect {
   165			// Protect against a lot of duplicates
   166			// Add invariant:
   167			//	data[a <= i < b] unexamined
   168			//	data[b <= i < c] = pivot
   169			for {
   170				for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
   171				}
   172				for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
   173				}
   174				if a >= b {
   175					break
   176				}
   177				// data[a] == pivot; data[b-1] < pivot
   178				data.Swap(a, b-1)
   179				a++
   180				b--
   181			}
   182		}
   183		// Swap pivot into middle
   184		data.Swap(pivot, b-1)
   185		return b - 1, c
   186	}
   187	
   188	func quickSort(data Interface, a, b, maxDepth int) {
   189		for b-a > 12 { // Use ShellSort for slices <= 12 elements
   190			if maxDepth == 0 {
   191				heapSort(data, a, b)
   192				return
   193			}
   194			maxDepth--
   195			mlo, mhi := doPivot(data, a, b)
   196			// Avoiding recursion on the larger subproblem guarantees
   197			// a stack depth of at most lg(b-a).
   198			if mlo-a < b-mhi {
   199				quickSort(data, a, mlo, maxDepth)
   200				a = mhi // i.e., quickSort(data, mhi, b)
   201			} else {
   202				quickSort(data, mhi, b, maxDepth)
   203				b = mlo // i.e., quickSort(data, a, mlo)
   204			}
   205		}
   206		if b-a > 1 {
   207			// Do ShellSort pass with gap 6
   208			// It could be written in this simplified form cause b-a <= 12
   209			for i := a + 6; i < b; i++ {
   210				if data.Less(i, i-6) {
   211					data.Swap(i, i-6)
   212				}
   213			}
   214			insertionSort(data, a, b)
   215		}
   216	}
   217	
   218	// Sort sorts data.
   219	// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   220	// data.Less and data.Swap. The sort is not guaranteed to be stable.
   221	func Sort(data Interface) {
   222		// Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
   223		n := data.Len()
   224		maxDepth := 0
   225		for i := n; i > 0; i >>= 1 {
   226			maxDepth++
   227		}
   228		maxDepth *= 2
   229		quickSort(data, 0, n, maxDepth)
   230	}
   231	
   232	type reverse struct {
   233		// This embedded Interface permits Reverse to use the methods of
   234		// another Interface implementation.
   235		Interface
   236	}
   237	
   238	// Less returns the opposite of the embedded implementation's Less method.
   239	func (r reverse) Less(i, j int) bool {
   240		return r.Interface.Less(j, i)
   241	}
   242	
   243	// Reverse returns the reverse order for data.
   244	func Reverse(data Interface) Interface {
   245		return &reverse{data}
   246	}
   247	
   248	// IsSorted reports whether data is sorted.
   249	func IsSorted(data Interface) bool {
   250		n := data.Len()
   251		for i := n - 1; i > 0; i-- {
   252			if data.Less(i, i-1) {
   253				return false
   254			}
   255		}
   256		return true
   257	}
   258	
   259	// Convenience types for common cases
   260	
   261	// IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   262	type IntSlice []int
   263	
   264	func (p IntSlice) Len() int           { return len(p) }
   265	func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   266	func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   267	
   268	// Sort is a convenience method.
   269	func (p IntSlice) Sort() { Sort(p) }
   270	
   271	// Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
   272	type Float64Slice []float64
   273	
   274	func (p Float64Slice) Len() int           { return len(p) }
   275	func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   276	func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   277	
   278	// isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   279	func isNaN(f float64) bool {
   280		return f != f
   281	}
   282	
   283	// Sort is a convenience method.
   284	func (p Float64Slice) Sort() { Sort(p) }
   285	
   286	// StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   287	type StringSlice []string
   288	
   289	func (p StringSlice) Len() int           { return len(p) }
   290	func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   291	func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   292	
   293	// Sort is a convenience method.
   294	func (p StringSlice) Sort() { Sort(p) }
   295	
   296	// Convenience wrappers for common cases
   297	
   298	// Ints sorts a slice of ints in increasing order.
   299	func Ints(a []int) { Sort(IntSlice(a)) }
   300	
   301	// Float64s sorts a slice of float64s in increasing order.
   302	func Float64s(a []float64) { Sort(Float64Slice(a)) }
   303	
   304	// Strings sorts a slice of strings in increasing order.
   305	func Strings(a []string) { Sort(StringSlice(a)) }
   306	
   307	// IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   308	func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   309	
   310	// Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
   311	func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   312	
   313	// StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   314	func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   315	
   316	// Notes on stable sorting:
   317	// The used algorithms are simple and provable correct on all input and use
   318	// only logarithmic additional stack space.  They perform well if compared
   319	// experimentally to other stable in-place sorting algorithms.
   320	//
   321	// Remarks on other algorithms evaluated:
   322	//  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   323	//    Not faster.
   324	//  - GCC's __rotate for block rotations: Not faster.
   325	//  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   326	//    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   327	//    The given algorithms are in-place, number of Swap and Assignments
   328	//    grow as n log n but the algorithm is not stable.
   329	//  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   330	//    V. Raman in Algorithmica (1996) 16, 115-160:
   331	//    This algorithm either needs additional 2n bits or works only if there
   332	//    are enough different elements available to encode some permutations
   333	//    which have to be undone later (so not stable on any input).
   334	//  - All the optimal in-place sorting/merging algorithms I found are either
   335	//    unstable or rely on enough different elements in each step to encode the
   336	//    performed block rearrangements. See also "In-Place Merging Algorithms",
   337	//    Denham Coates-Evely, Department of Computer Science, Kings College,
   338	//    January 2004 and the reverences in there.
   339	//  - Often "optimal" algorithms are optimal in the number of assignments
   340	//    but Interface has only Swap as operation.
   341	
   342	// Stable sorts data while keeping the original order of equal elements.
   343	//
   344	// It makes one call to data.Len to determine n, O(n*log(n)) calls to
   345	// data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   346	func Stable(data Interface) {
   347		n := data.Len()
   348		blockSize := 20 // must be > 0
   349		a, b := 0, blockSize
   350		for b <= n {
   351			insertionSort(data, a, b)
   352			a = b
   353			b += blockSize
   354		}
   355		insertionSort(data, a, n)
   356	
   357		for blockSize < n {
   358			a, b = 0, 2*blockSize
   359			for b <= n {
   360				symMerge(data, a, a+blockSize, b)
   361				a = b
   362				b += 2 * blockSize
   363			}
   364			if m := a + blockSize; m < n {
   365				symMerge(data, a, m, n)
   366			}
   367			blockSize *= 2
   368		}
   369	}
   370	
   371	// SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   372	// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   373	// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   374	// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   375	// Computer Science, pages 714-723. Springer, 2004.
   376	//
   377	// Let M = m-a and N = b-n. Wolog M < N.
   378	// The recursion depth is bound by ceil(log(N+M)).
   379	// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   380	// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   381	//
   382	// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   383	// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   384	// in the paper carries through for Swap operations, especially as the block
   385	// swapping rotate uses only O(M+N) Swaps.
   386	//
   387	// symMerge assumes non-degenerate arguments: a < m && m < b.
   388	// Having the caller check this condition eliminates many leaf recursion calls,
   389	// which improves performance.
   390	func symMerge(data Interface, a, m, b int) {
   391		// Avoid unnecessary recursions of symMerge
   392		// by direct insertion of data[a] into data[m:b]
   393		// if data[a:m] only contains one element.
   394		if m-a == 1 {
   395			// Use binary search to find the lowest index i
   396			// such that data[i] >= data[a] for m <= i < b.
   397			// Exit the search loop with i == b in case no such index exists.
   398			i := m
   399			j := b
   400			for i < j {
   401				h := i + (j-i)/2
   402				if data.Less(h, a) {
   403					i = h + 1
   404				} else {
   405					j = h
   406				}
   407			}
   408			// Swap values until data[a] reaches the position before i.
   409			for k := a; k < i-1; k++ {
   410				data.Swap(k, k+1)
   411			}
   412			return
   413		}
   414	
   415		// Avoid unnecessary recursions of symMerge
   416		// by direct insertion of data[m] into data[a:m]
   417		// if data[m:b] only contains one element.
   418		if b-m == 1 {
   419			// Use binary search to find the lowest index i
   420			// such that data[i] > data[m] for a <= i < m.
   421			// Exit the search loop with i == m in case no such index exists.
   422			i := a
   423			j := m
   424			for i < j {
   425				h := i + (j-i)/2
   426				if !data.Less(m, h) {
   427					i = h + 1
   428				} else {
   429					j = h
   430				}
   431			}
   432			// Swap values until data[m] reaches the position i.
   433			for k := m; k > i; k-- {
   434				data.Swap(k, k-1)
   435			}
   436			return
   437		}
   438	
   439		mid := a + (b-a)/2
   440		n := mid + m
   441		var start, r int
   442		if m > mid {
   443			start = n - b
   444			r = mid
   445		} else {
   446			start = a
   447			r = m
   448		}
   449		p := n - 1
   450	
   451		for start < r {
   452			c := start + (r-start)/2
   453			if !data.Less(p-c, c) {
   454				start = c + 1
   455			} else {
   456				r = c
   457			}
   458		}
   459	
   460		end := n - start
   461		if start < m && m < end {
   462			rotate(data, start, m, end)
   463		}
   464		if a < start && start < mid {
   465			symMerge(data, a, start, mid)
   466		}
   467		if mid < end && end < b {
   468			symMerge(data, mid, end, b)
   469		}
   470	}
   471	
   472	// Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   473	// Data of the form 'x u v y' is changed to 'x v u y'.
   474	// Rotate performs at most b-a many calls to data.Swap.
   475	// Rotate assumes non-degenerate arguments: a < m && m < b.
   476	func rotate(data Interface, a, m, b int) {
   477		i := m - a
   478		j := b - m
   479	
   480		for i != j {
   481			if i > j {
   482				swapRange(data, m-i, m, j)
   483				i -= j
   484			} else {
   485				swapRange(data, m-i, m+j-i, i)
   486				j -= i
   487			}
   488		}
   489		// i == j
   490		swapRange(data, m-i, m, i)
   491	}
   492	
   493	/*
   494	Complexity of Stable Sorting
   495	
   496	
   497	Complexity of block swapping rotation
   498	
   499	Each Swap puts one new element into its correct, final position.
   500	Elements which reach their final position are no longer moved.
   501	Thus block swapping rotation needs |u|+|v| calls to Swaps.
   502	This is best possible as each element might need a move.
   503	
   504	Pay attention when comparing to other optimal algorithms which
   505	typically count the number of assignments instead of swaps:
   506	E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   507	rotations uses O(u + v + gcd(u,v)) assignments which is
   508	better than our O(3 * (u+v)) as gcd(u,v) <= u.
   509	
   510	
   511	Stable sorting by SymMerge and BlockSwap rotations
   512	
   513	SymMerg complexity for same size input M = N:
   514	Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   515	Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   516	
   517	(The following argument does not fuzz over a missing -1 or
   518	other stuff which does not impact the final result).
   519	
   520	Let n = data.Len(). Assume n = 2^k.
   521	
   522	Plain merge sort performs log(n) = k iterations.
   523	On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   524	
   525	Thus iteration i of merge sort performs:
   526	Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   527	Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   528	
   529	In total k = log(n) iterations are performed; so in total:
   530	Calls to Less O(log(n) * n)
   531	Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   532	   = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   533	
   534	
   535	Above results should generalize to arbitrary n = 2^k + p
   536	and should not be influenced by the initial insertion sort phase:
   537	Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   538	size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   539	Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   540	Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   541	   = O(n * log(n))
   542	Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   543	
   544	*/
   545	

View as plain text