...
Run Format

Source file src/sort/sort.go

Documentation: sort

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run genzfunc.go
     6  
     7  // Package sort provides primitives for sorting slices and user-defined
     8  // collections.
     9  package sort
    10  
    11  // A type, typically a collection, that satisfies sort.Interface can be
    12  // sorted by the routines in this package. The methods require that the
    13  // elements of the collection be enumerated by an integer index.
    14  type Interface interface {
    15  	// Len is the number of elements in the collection.
    16  	Len() int
    17  	// Less reports whether the element with
    18  	// index i should sort before the element with index j.
    19  	Less(i, j int) bool
    20  	// Swap swaps the elements with indexes i and j.
    21  	Swap(i, j int)
    22  }
    23  
    24  // Insertion sort
    25  func insertionSort(data Interface, a, b int) {
    26  	for i := a + 1; i < b; i++ {
    27  		for j := i; j > a && data.Less(j, j-1); j-- {
    28  			data.Swap(j, j-1)
    29  		}
    30  	}
    31  }
    32  
    33  // siftDown implements the heap property on data[lo, hi).
    34  // first is an offset into the array where the root of the heap lies.
    35  func siftDown(data Interface, lo, hi, first int) {
    36  	root := lo
    37  	for {
    38  		child := 2*root + 1
    39  		if child >= hi {
    40  			break
    41  		}
    42  		if child+1 < hi && data.Less(first+child, first+child+1) {
    43  			child++
    44  		}
    45  		if !data.Less(first+root, first+child) {
    46  			return
    47  		}
    48  		data.Swap(first+root, first+child)
    49  		root = child
    50  	}
    51  }
    52  
    53  func heapSort(data Interface, a, b int) {
    54  	first := a
    55  	lo := 0
    56  	hi := b - a
    57  
    58  	// Build heap with greatest element at top.
    59  	for i := (hi - 1) / 2; i >= 0; i-- {
    60  		siftDown(data, i, hi, first)
    61  	}
    62  
    63  	// Pop elements, largest first, into end of data.
    64  	for i := hi - 1; i >= 0; i-- {
    65  		data.Swap(first, first+i)
    66  		siftDown(data, lo, i, first)
    67  	}
    68  }
    69  
    70  // Quicksort, loosely following Bentley and McIlroy,
    71  // ``Engineering a Sort Function,'' SP&E November 1993.
    72  
    73  // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
    74  func medianOfThree(data Interface, m1, m0, m2 int) {
    75  	// sort 3 elements
    76  	if data.Less(m1, m0) {
    77  		data.Swap(m1, m0)
    78  	}
    79  	// data[m0] <= data[m1]
    80  	if data.Less(m2, m1) {
    81  		data.Swap(m2, m1)
    82  		// data[m0] <= data[m2] && data[m1] < data[m2]
    83  		if data.Less(m1, m0) {
    84  			data.Swap(m1, m0)
    85  		}
    86  	}
    87  	// now data[m0] <= data[m1] <= data[m2]
    88  }
    89  
    90  func swapRange(data Interface, a, b, n int) {
    91  	for i := 0; i < n; i++ {
    92  		data.Swap(a+i, b+i)
    93  	}
    94  }
    95  
    96  func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
    97  	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
    98  	if hi-lo > 40 {
    99  		// Tukey's ``Ninther,'' median of three medians of three.
   100  		s := (hi - lo) / 8
   101  		medianOfThree(data, lo, lo+s, lo+2*s)
   102  		medianOfThree(data, m, m-s, m+s)
   103  		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   104  	}
   105  	medianOfThree(data, lo, m, hi-1)
   106  
   107  	// Invariants are:
   108  	//	data[lo] = pivot (set up by ChoosePivot)
   109  	//	data[lo < i < a] < pivot
   110  	//	data[a <= i < b] <= pivot
   111  	//	data[b <= i < c] unexamined
   112  	//	data[c <= i < hi-1] > pivot
   113  	//	data[hi-1] >= pivot
   114  	pivot := lo
   115  	a, c := lo+1, hi-1
   116  
   117  	for ; a < c && data.Less(a, pivot); a++ {
   118  	}
   119  	b := a
   120  	for {
   121  		for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
   122  		}
   123  		for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
   124  		}
   125  		if b >= c {
   126  			break
   127  		}
   128  		// data[b] > pivot; data[c-1] <= pivot
   129  		data.Swap(b, c-1)
   130  		b++
   131  		c--
   132  	}
   133  	// If hi-c<3 then there are duplicates (by property of median of nine).
   134  	// Let be a bit more conservative, and set border to 5.
   135  	protect := hi-c < 5
   136  	if !protect && hi-c < (hi-lo)/4 {
   137  		// Lets test some points for equality to pivot
   138  		dups := 0
   139  		if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
   140  			data.Swap(c, hi-1)
   141  			c++
   142  			dups++
   143  		}
   144  		if !data.Less(b-1, pivot) { // data[b-1] = pivot
   145  			b--
   146  			dups++
   147  		}
   148  		// m-lo = (hi-lo)/2 > 6
   149  		// b-lo > (hi-lo)*3/4-1 > 8
   150  		// ==> m < b ==> data[m] <= pivot
   151  		if !data.Less(m, pivot) { // data[m] = pivot
   152  			data.Swap(m, b-1)
   153  			b--
   154  			dups++
   155  		}
   156  		// if at least 2 points are equal to pivot, assume skewed distribution
   157  		protect = dups > 1
   158  	}
   159  	if protect {
   160  		// Protect against a lot of duplicates
   161  		// Add invariant:
   162  		//	data[a <= i < b] unexamined
   163  		//	data[b <= i < c] = pivot
   164  		for {
   165  			for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
   166  			}
   167  			for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
   168  			}
   169  			if a >= b {
   170  				break
   171  			}
   172  			// data[a] == pivot; data[b-1] < pivot
   173  			data.Swap(a, b-1)
   174  			a++
   175  			b--
   176  		}
   177  	}
   178  	// Swap pivot into middle
   179  	data.Swap(pivot, b-1)
   180  	return b - 1, c
   181  }
   182  
   183  func quickSort(data Interface, a, b, maxDepth int) {
   184  	for b-a > 12 { // Use ShellSort for slices <= 12 elements
   185  		if maxDepth == 0 {
   186  			heapSort(data, a, b)
   187  			return
   188  		}
   189  		maxDepth--
   190  		mlo, mhi := doPivot(data, a, b)
   191  		// Avoiding recursion on the larger subproblem guarantees
   192  		// a stack depth of at most lg(b-a).
   193  		if mlo-a < b-mhi {
   194  			quickSort(data, a, mlo, maxDepth)
   195  			a = mhi // i.e., quickSort(data, mhi, b)
   196  		} else {
   197  			quickSort(data, mhi, b, maxDepth)
   198  			b = mlo // i.e., quickSort(data, a, mlo)
   199  		}
   200  	}
   201  	if b-a > 1 {
   202  		// Do ShellSort pass with gap 6
   203  		// It could be written in this simplified form cause b-a <= 12
   204  		for i := a + 6; i < b; i++ {
   205  			if data.Less(i, i-6) {
   206  				data.Swap(i, i-6)
   207  			}
   208  		}
   209  		insertionSort(data, a, b)
   210  	}
   211  }
   212  
   213  // Sort sorts data.
   214  // It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   215  // data.Less and data.Swap. The sort is not guaranteed to be stable.
   216  func Sort(data Interface) {
   217  	n := data.Len()
   218  	quickSort(data, 0, n, maxDepth(n))
   219  }
   220  
   221  // maxDepth returns a threshold at which quicksort should switch
   222  // to heapsort. It returns 2*ceil(lg(n+1)).
   223  func maxDepth(n int) int {
   224  	var depth int
   225  	for i := n; i > 0; i >>= 1 {
   226  		depth++
   227  	}
   228  	return depth * 2
   229  }
   230  
   231  // lessSwap is a pair of Less and Swap function for use with the
   232  // auto-generated func-optimized variant of sort.go in
   233  // zfuncversion.go.
   234  type lessSwap struct {
   235  	Less func(i, j int) bool
   236  	Swap func(i, j int)
   237  }
   238  
   239  type reverse struct {
   240  	// This embedded Interface permits Reverse to use the methods of
   241  	// another Interface implementation.
   242  	Interface
   243  }
   244  
   245  // Less returns the opposite of the embedded implementation's Less method.
   246  func (r reverse) Less(i, j int) bool {
   247  	return r.Interface.Less(j, i)
   248  }
   249  
   250  // Reverse returns the reverse order for data.
   251  func Reverse(data Interface) Interface {
   252  	return &reverse{data}
   253  }
   254  
   255  // IsSorted reports whether data is sorted.
   256  func IsSorted(data Interface) bool {
   257  	n := data.Len()
   258  	for i := n - 1; i > 0; i-- {
   259  		if data.Less(i, i-1) {
   260  			return false
   261  		}
   262  	}
   263  	return true
   264  }
   265  
   266  // Convenience types for common cases
   267  
   268  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   269  type IntSlice []int
   270  
   271  func (p IntSlice) Len() int           { return len(p) }
   272  func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   273  func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   274  
   275  // Sort is a convenience method.
   276  func (p IntSlice) Sort() { Sort(p) }
   277  
   278  // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order
   279  // (not-a-number values are treated as less than other values).
   280  type Float64Slice []float64
   281  
   282  func (p Float64Slice) Len() int           { return len(p) }
   283  func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   284  func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   285  
   286  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   287  func isNaN(f float64) bool {
   288  	return f != f
   289  }
   290  
   291  // Sort is a convenience method.
   292  func (p Float64Slice) Sort() { Sort(p) }
   293  
   294  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   295  type StringSlice []string
   296  
   297  func (p StringSlice) Len() int           { return len(p) }
   298  func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   299  func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   300  
   301  // Sort is a convenience method.
   302  func (p StringSlice) Sort() { Sort(p) }
   303  
   304  // Convenience wrappers for common cases
   305  
   306  // Ints sorts a slice of ints in increasing order.
   307  func Ints(a []int) { Sort(IntSlice(a)) }
   308  
   309  // Float64s sorts a slice of float64s in increasing order
   310  // (not-a-number values are treated as less than other values).
   311  func Float64s(a []float64) { Sort(Float64Slice(a)) }
   312  
   313  // Strings sorts a slice of strings in increasing order.
   314  func Strings(a []string) { Sort(StringSlice(a)) }
   315  
   316  // IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   317  func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   318  
   319  // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order
   320  // (not-a-number values are treated as less than other values).
   321  func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   322  
   323  // StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   324  func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   325  
   326  // Notes on stable sorting:
   327  // The used algorithms are simple and provable correct on all input and use
   328  // only logarithmic additional stack space. They perform well if compared
   329  // experimentally to other stable in-place sorting algorithms.
   330  //
   331  // Remarks on other algorithms evaluated:
   332  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   333  //    Not faster.
   334  //  - GCC's __rotate for block rotations: Not faster.
   335  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   336  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   337  //    The given algorithms are in-place, number of Swap and Assignments
   338  //    grow as n log n but the algorithm is not stable.
   339  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   340  //    V. Raman in Algorithmica (1996) 16, 115-160:
   341  //    This algorithm either needs additional 2n bits or works only if there
   342  //    are enough different elements available to encode some permutations
   343  //    which have to be undone later (so not stable on any input).
   344  //  - All the optimal in-place sorting/merging algorithms I found are either
   345  //    unstable or rely on enough different elements in each step to encode the
   346  //    performed block rearrangements. See also "In-Place Merging Algorithms",
   347  //    Denham Coates-Evely, Department of Computer Science, Kings College,
   348  //    January 2004 and the references in there.
   349  //  - Often "optimal" algorithms are optimal in the number of assignments
   350  //    but Interface has only Swap as operation.
   351  
   352  // Stable sorts data while keeping the original order of equal elements.
   353  //
   354  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
   355  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   356  func Stable(data Interface) {
   357  	stable(data, data.Len())
   358  }
   359  
   360  func stable(data Interface, n int) {
   361  	blockSize := 20 // must be > 0
   362  	a, b := 0, blockSize
   363  	for b <= n {
   364  		insertionSort(data, a, b)
   365  		a = b
   366  		b += blockSize
   367  	}
   368  	insertionSort(data, a, n)
   369  
   370  	for blockSize < n {
   371  		a, b = 0, 2*blockSize
   372  		for b <= n {
   373  			symMerge(data, a, a+blockSize, b)
   374  			a = b
   375  			b += 2 * blockSize
   376  		}
   377  		if m := a + blockSize; m < n {
   378  			symMerge(data, a, m, n)
   379  		}
   380  		blockSize *= 2
   381  	}
   382  }
   383  
   384  // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   385  // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   386  // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   387  // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   388  // Computer Science, pages 714-723. Springer, 2004.
   389  //
   390  // Let M = m-a and N = b-n. Wolog M < N.
   391  // The recursion depth is bound by ceil(log(N+M)).
   392  // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   393  // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   394  //
   395  // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   396  // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   397  // in the paper carries through for Swap operations, especially as the block
   398  // swapping rotate uses only O(M+N) Swaps.
   399  //
   400  // symMerge assumes non-degenerate arguments: a < m && m < b.
   401  // Having the caller check this condition eliminates many leaf recursion calls,
   402  // which improves performance.
   403  func symMerge(data Interface, a, m, b int) {
   404  	// Avoid unnecessary recursions of symMerge
   405  	// by direct insertion of data[a] into data[m:b]
   406  	// if data[a:m] only contains one element.
   407  	if m-a == 1 {
   408  		// Use binary search to find the lowest index i
   409  		// such that data[i] >= data[a] for m <= i < b.
   410  		// Exit the search loop with i == b in case no such index exists.
   411  		i := m
   412  		j := b
   413  		for i < j {
   414  			h := int(uint(i+j) >> 1)
   415  			if data.Less(h, a) {
   416  				i = h + 1
   417  			} else {
   418  				j = h
   419  			}
   420  		}
   421  		// Swap values until data[a] reaches the position before i.
   422  		for k := a; k < i-1; k++ {
   423  			data.Swap(k, k+1)
   424  		}
   425  		return
   426  	}
   427  
   428  	// Avoid unnecessary recursions of symMerge
   429  	// by direct insertion of data[m] into data[a:m]
   430  	// if data[m:b] only contains one element.
   431  	if b-m == 1 {
   432  		// Use binary search to find the lowest index i
   433  		// such that data[i] > data[m] for a <= i < m.
   434  		// Exit the search loop with i == m in case no such index exists.
   435  		i := a
   436  		j := m
   437  		for i < j {
   438  			h := int(uint(i+j) >> 1)
   439  			if !data.Less(m, h) {
   440  				i = h + 1
   441  			} else {
   442  				j = h
   443  			}
   444  		}
   445  		// Swap values until data[m] reaches the position i.
   446  		for k := m; k > i; k-- {
   447  			data.Swap(k, k-1)
   448  		}
   449  		return
   450  	}
   451  
   452  	mid := int(uint(a+b) >> 1)
   453  	n := mid + m
   454  	var start, r int
   455  	if m > mid {
   456  		start = n - b
   457  		r = mid
   458  	} else {
   459  		start = a
   460  		r = m
   461  	}
   462  	p := n - 1
   463  
   464  	for start < r {
   465  		c := int(uint(start+r) >> 1)
   466  		if !data.Less(p-c, c) {
   467  			start = c + 1
   468  		} else {
   469  			r = c
   470  		}
   471  	}
   472  
   473  	end := n - start
   474  	if start < m && m < end {
   475  		rotate(data, start, m, end)
   476  	}
   477  	if a < start && start < mid {
   478  		symMerge(data, a, start, mid)
   479  	}
   480  	if mid < end && end < b {
   481  		symMerge(data, mid, end, b)
   482  	}
   483  }
   484  
   485  // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   486  // Data of the form 'x u v y' is changed to 'x v u y'.
   487  // Rotate performs at most b-a many calls to data.Swap.
   488  // Rotate assumes non-degenerate arguments: a < m && m < b.
   489  func rotate(data Interface, a, m, b int) {
   490  	i := m - a
   491  	j := b - m
   492  
   493  	for i != j {
   494  		if i > j {
   495  			swapRange(data, m-i, m, j)
   496  			i -= j
   497  		} else {
   498  			swapRange(data, m-i, m+j-i, i)
   499  			j -= i
   500  		}
   501  	}
   502  	// i == j
   503  	swapRange(data, m-i, m, i)
   504  }
   505  
   506  /*
   507  Complexity of Stable Sorting
   508  
   509  
   510  Complexity of block swapping rotation
   511  
   512  Each Swap puts one new element into its correct, final position.
   513  Elements which reach their final position are no longer moved.
   514  Thus block swapping rotation needs |u|+|v| calls to Swaps.
   515  This is best possible as each element might need a move.
   516  
   517  Pay attention when comparing to other optimal algorithms which
   518  typically count the number of assignments instead of swaps:
   519  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   520  rotations uses O(u + v + gcd(u,v)) assignments which is
   521  better than our O(3 * (u+v)) as gcd(u,v) <= u.
   522  
   523  
   524  Stable sorting by SymMerge and BlockSwap rotations
   525  
   526  SymMerg complexity for same size input M = N:
   527  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   528  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   529  
   530  (The following argument does not fuzz over a missing -1 or
   531  other stuff which does not impact the final result).
   532  
   533  Let n = data.Len(). Assume n = 2^k.
   534  
   535  Plain merge sort performs log(n) = k iterations.
   536  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   537  
   538  Thus iteration i of merge sort performs:
   539  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   540  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   541  
   542  In total k = log(n) iterations are performed; so in total:
   543  Calls to Less O(log(n) * n)
   544  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   545     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   546  
   547  
   548  Above results should generalize to arbitrary n = 2^k + p
   549  and should not be influenced by the initial insertion sort phase:
   550  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   551  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   552  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   553  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   554     = O(n * log(n))
   555  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   556  
   557  */
   558  

View as plain text