Black Lives Matter. Support the Equal Justice Initiative.

Source file src/sort/sort.go

Documentation: sort

```     1  // Copyright 2009 The Go Authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style
3  // license that can be found in the LICENSE file.
4
5  //go:generate go run genzfunc.go
6
7  // Package sort provides primitives for sorting slices and user-defined collections.
8  package sort
9
10  // An implementation of Interface can be sorted by the routines in this package.
11  // The methods refer to elements of the underlying collection by integer index.
12  type Interface interface {
13  	// Len is the number of elements in the collection.
14  	Len() int
15
16  	// Less reports whether the element with index i
17  	// must sort before the element with index j.
18  	//
19  	// If both Less(i, j) and Less(j, i) are false,
20  	// then the elements at index i and j are considered equal.
21  	// Sort may place equal elements in any order in the final result,
22  	// while Stable preserves the original input order of equal elements.
23  	//
24  	// Less must describe a transitive ordering:
25  	//  - if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well.
26  	//  - if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well.
27  	//
28  	// Note that floating-point comparison (the < operator on float32 or float64 values)
29  	// is not a transitive ordering when not-a-number (NaN) values are involved.
30  	// See Float64Slice.Less for a correct implementation for floating-point values.
31  	Less(i, j int) bool
32
33  	// Swap swaps the elements with indexes i and j.
34  	Swap(i, j int)
35  }
36
37  // insertionSort sorts data[a:b] using insertion sort.
38  func insertionSort(data Interface, a, b int) {
39  	for i := a + 1; i < b; i++ {
40  		for j := i; j > a && data.Less(j, j-1); j-- {
41  			data.Swap(j, j-1)
42  		}
43  	}
44  }
45
46  // siftDown implements the heap property on data[lo:hi].
47  // first is an offset into the array where the root of the heap lies.
48  func siftDown(data Interface, lo, hi, first int) {
49  	root := lo
50  	for {
51  		child := 2*root + 1
52  		if child >= hi {
53  			break
54  		}
55  		if child+1 < hi && data.Less(first+child, first+child+1) {
56  			child++
57  		}
58  		if !data.Less(first+root, first+child) {
59  			return
60  		}
61  		data.Swap(first+root, first+child)
62  		root = child
63  	}
64  }
65
66  func heapSort(data Interface, a, b int) {
67  	first := a
68  	lo := 0
69  	hi := b - a
70
71  	// Build heap with greatest element at top.
72  	for i := (hi - 1) / 2; i >= 0; i-- {
73  		siftDown(data, i, hi, first)
74  	}
75
76  	// Pop elements, largest first, into end of data.
77  	for i := hi - 1; i >= 0; i-- {
78  		data.Swap(first, first+i)
79  		siftDown(data, lo, i, first)
80  	}
81  }
82
83  // Quicksort, loosely following Bentley and McIlroy,
84  // ``Engineering a Sort Function,'' SP&E November 1993.
85
86  // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
87  func medianOfThree(data Interface, m1, m0, m2 int) {
88  	// sort 3 elements
89  	if data.Less(m1, m0) {
90  		data.Swap(m1, m0)
91  	}
92  	// data[m0] <= data[m1]
93  	if data.Less(m2, m1) {
94  		data.Swap(m2, m1)
95  		// data[m0] <= data[m2] && data[m1] < data[m2]
96  		if data.Less(m1, m0) {
97  			data.Swap(m1, m0)
98  		}
99  	}
100  	// now data[m0] <= data[m1] <= data[m2]
101  }
102
103  func swapRange(data Interface, a, b, n int) {
104  	for i := 0; i < n; i++ {
105  		data.Swap(a+i, b+i)
106  	}
107  }
108
109  func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
110  	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
111  	if hi-lo > 40 {
112  		// Tukey's ``Ninther,'' median of three medians of three.
113  		s := (hi - lo) / 8
114  		medianOfThree(data, lo, lo+s, lo+2*s)
115  		medianOfThree(data, m, m-s, m+s)
116  		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
117  	}
118  	medianOfThree(data, lo, m, hi-1)
119
120  	// Invariants are:
121  	//	data[lo] = pivot (set up by ChoosePivot)
122  	//	data[lo < i < a] < pivot
123  	//	data[a <= i < b] <= pivot
124  	//	data[b <= i < c] unexamined
125  	//	data[c <= i < hi-1] > pivot
126  	//	data[hi-1] >= pivot
127  	pivot := lo
128  	a, c := lo+1, hi-1
129
130  	for ; a < c && data.Less(a, pivot); a++ {
131  	}
132  	b := a
133  	for {
134  		for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
135  		}
136  		for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
137  		}
138  		if b >= c {
139  			break
140  		}
141  		// data[b] > pivot; data[c-1] <= pivot
142  		data.Swap(b, c-1)
143  		b++
144  		c--
145  	}
146  	// If hi-c<3 then there are duplicates (by property of median of nine).
147  	// Let's be a bit more conservative, and set border to 5.
148  	protect := hi-c < 5
149  	if !protect && hi-c < (hi-lo)/4 {
150  		// Lets test some points for equality to pivot
151  		dups := 0
152  		if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
153  			data.Swap(c, hi-1)
154  			c++
155  			dups++
156  		}
157  		if !data.Less(b-1, pivot) { // data[b-1] = pivot
158  			b--
159  			dups++
160  		}
161  		// m-lo = (hi-lo)/2 > 6
162  		// b-lo > (hi-lo)*3/4-1 > 8
163  		// ==> m < b ==> data[m] <= pivot
164  		if !data.Less(m, pivot) { // data[m] = pivot
165  			data.Swap(m, b-1)
166  			b--
167  			dups++
168  		}
169  		// if at least 2 points are equal to pivot, assume skewed distribution
170  		protect = dups > 1
171  	}
172  	if protect {
173  		// Protect against a lot of duplicates
174  		// Add invariant:
175  		//	data[a <= i < b] unexamined
176  		//	data[b <= i < c] = pivot
177  		for {
178  			for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
179  			}
180  			for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
181  			}
182  			if a >= b {
183  				break
184  			}
185  			// data[a] == pivot; data[b-1] < pivot
186  			data.Swap(a, b-1)
187  			a++
188  			b--
189  		}
190  	}
191  	// Swap pivot into middle
192  	data.Swap(pivot, b-1)
193  	return b - 1, c
194  }
195
196  func quickSort(data Interface, a, b, maxDepth int) {
197  	for b-a > 12 { // Use ShellSort for slices <= 12 elements
198  		if maxDepth == 0 {
199  			heapSort(data, a, b)
200  			return
201  		}
202  		maxDepth--
203  		mlo, mhi := doPivot(data, a, b)
204  		// Avoiding recursion on the larger subproblem guarantees
205  		// a stack depth of at most lg(b-a).
206  		if mlo-a < b-mhi {
207  			quickSort(data, a, mlo, maxDepth)
208  			a = mhi // i.e., quickSort(data, mhi, b)
209  		} else {
210  			quickSort(data, mhi, b, maxDepth)
211  			b = mlo // i.e., quickSort(data, a, mlo)
212  		}
213  	}
214  	if b-a > 1 {
215  		// Do ShellSort pass with gap 6
216  		// It could be written in this simplified form cause b-a <= 12
217  		for i := a + 6; i < b; i++ {
218  			if data.Less(i, i-6) {
219  				data.Swap(i, i-6)
220  			}
221  		}
222  		insertionSort(data, a, b)
223  	}
224  }
225
226  // Sort sorts data.
227  // It makes one call to data.Len to determine n and O(n*log(n)) calls to
228  // data.Less and data.Swap. The sort is not guaranteed to be stable.
229  func Sort(data Interface) {
230  	n := data.Len()
231  	quickSort(data, 0, n, maxDepth(n))
232  }
233
234  // maxDepth returns a threshold at which quicksort should switch
235  // to heapsort. It returns 2*ceil(lg(n+1)).
236  func maxDepth(n int) int {
237  	var depth int
238  	for i := n; i > 0; i >>= 1 {
239  		depth++
240  	}
241  	return depth * 2
242  }
243
244  // lessSwap is a pair of Less and Swap function for use with the
245  // auto-generated func-optimized variant of sort.go in
246  // zfuncversion.go.
247  type lessSwap struct {
248  	Less func(i, j int) bool
249  	Swap func(i, j int)
250  }
251
252  type reverse struct {
253  	// This embedded Interface permits Reverse to use the methods of
254  	// another Interface implementation.
255  	Interface
256  }
257
258  // Less returns the opposite of the embedded implementation's Less method.
259  func (r reverse) Less(i, j int) bool {
260  	return r.Interface.Less(j, i)
261  }
262
263  // Reverse returns the reverse order for data.
264  func Reverse(data Interface) Interface {
265  	return &reverse{data}
266  }
267
268  // IsSorted reports whether data is sorted.
269  func IsSorted(data Interface) bool {
270  	n := data.Len()
271  	for i := n - 1; i > 0; i-- {
272  		if data.Less(i, i-1) {
273  			return false
274  		}
275  	}
276  	return true
277  }
278
279  // Convenience types for common cases
280
281  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
282  type IntSlice []int
283
284  func (x IntSlice) Len() int           { return len(x) }
285  func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] }
286  func (x IntSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
287
288  // Sort is a convenience method: x.Sort() calls Sort(x).
289  func (x IntSlice) Sort() { Sort(x) }
290
291  // Float64Slice implements Interface for a []float64, sorting in increasing order,
292  // with not-a-number (NaN) values ordered before other values.
293  type Float64Slice []float64
294
295  func (x Float64Slice) Len() int { return len(x) }
296
297  // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface.
298  // Note that floating-point comparison by itself is not a transitive relation: it does not
299  // report a consistent ordering for not-a-number (NaN) values.
300  // This implementation of Less places NaN values before any others, by using:
301  //
302  //	x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j]))
303  //
304  func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) }
305  func (x Float64Slice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
306
307  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
308  func isNaN(f float64) bool {
309  	return f != f
310  }
311
312  // Sort is a convenience method: x.Sort() calls Sort(x).
313  func (x Float64Slice) Sort() { Sort(x) }
314
315  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
316  type StringSlice []string
317
318  func (x StringSlice) Len() int           { return len(x) }
319  func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] }
320  func (x StringSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
321
322  // Sort is a convenience method: x.Sort() calls Sort(x).
323  func (x StringSlice) Sort() { Sort(x) }
324
325  // Convenience wrappers for common cases
326
327  // Ints sorts a slice of ints in increasing order.
328  func Ints(x []int) { Sort(IntSlice(x)) }
329
330  // Float64s sorts a slice of float64s in increasing order.
331  // Not-a-number (NaN) values are ordered before other values.
332  func Float64s(x []float64) { Sort(Float64Slice(x)) }
333
334  // Strings sorts a slice of strings in increasing order.
335  func Strings(x []string) { Sort(StringSlice(x)) }
336
337  // IntsAreSorted reports whether the slice x is sorted in increasing order.
338  func IntsAreSorted(x []int) bool { return IsSorted(IntSlice(x)) }
339
340  // Float64sAreSorted reports whether the slice x is sorted in increasing order,
341  // with not-a-number (NaN) values before any other values.
342  func Float64sAreSorted(x []float64) bool { return IsSorted(Float64Slice(x)) }
343
344  // StringsAreSorted reports whether the slice x is sorted in increasing order.
345  func StringsAreSorted(x []string) bool { return IsSorted(StringSlice(x)) }
346
347  // Notes on stable sorting:
348  // The used algorithms are simple and provable correct on all input and use
349  // only logarithmic additional stack space. They perform well if compared
350  // experimentally to other stable in-place sorting algorithms.
351  //
352  // Remarks on other algorithms evaluated:
353  //  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
354  //    Not faster.
355  //  - GCC's __rotate for block rotations: Not faster.
356  //  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
357  //    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
358  //    The given algorithms are in-place, number of Swap and Assignments
359  //    grow as n log n but the algorithm is not stable.
360  //  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
361  //    V. Raman in Algorithmica (1996) 16, 115-160:
362  //    This algorithm either needs additional 2n bits or works only if there
363  //    are enough different elements available to encode some permutations
364  //    which have to be undone later (so not stable on any input).
365  //  - All the optimal in-place sorting/merging algorithms I found are either
366  //    unstable or rely on enough different elements in each step to encode the
367  //    performed block rearrangements. See also "In-Place Merging Algorithms",
368  //    Denham Coates-Evely, Department of Computer Science, Kings College,
369  //    January 2004 and the references in there.
370  //  - Often "optimal" algorithms are optimal in the number of assignments
371  //    but Interface has only Swap as operation.
372
373  // Stable sorts data while keeping the original order of equal elements.
374  //
375  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
376  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
377  func Stable(data Interface) {
378  	stable(data, data.Len())
379  }
380
381  func stable(data Interface, n int) {
382  	blockSize := 20 // must be > 0
383  	a, b := 0, blockSize
384  	for b <= n {
385  		insertionSort(data, a, b)
386  		a = b
387  		b += blockSize
388  	}
389  	insertionSort(data, a, n)
390
391  	for blockSize < n {
392  		a, b = 0, 2*blockSize
393  		for b <= n {
394  			symMerge(data, a, a+blockSize, b)
395  			a = b
396  			b += 2 * blockSize
397  		}
398  		if m := a + blockSize; m < n {
399  			symMerge(data, a, m, n)
400  		}
401  		blockSize *= 2
402  	}
403  }
404
405  // symMerge merges the two sorted subsequences data[a:m] and data[m:b] using
406  // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
407  // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
408  // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
409  // Computer Science, pages 714-723. Springer, 2004.
410  //
411  // Let M = m-a and N = b-n. Wolog M < N.
412  // The recursion depth is bound by ceil(log(N+M)).
413  // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
414  // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
415  //
416  // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
417  // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
418  // in the paper carries through for Swap operations, especially as the block
419  // swapping rotate uses only O(M+N) Swaps.
420  //
421  // symMerge assumes non-degenerate arguments: a < m && m < b.
422  // Having the caller check this condition eliminates many leaf recursion calls,
423  // which improves performance.
424  func symMerge(data Interface, a, m, b int) {
425  	// Avoid unnecessary recursions of symMerge
426  	// by direct insertion of data[a] into data[m:b]
427  	// if data[a:m] only contains one element.
428  	if m-a == 1 {
429  		// Use binary search to find the lowest index i
430  		// such that data[i] >= data[a] for m <= i < b.
431  		// Exit the search loop with i == b in case no such index exists.
432  		i := m
433  		j := b
434  		for i < j {
435  			h := int(uint(i+j) >> 1)
436  			if data.Less(h, a) {
437  				i = h + 1
438  			} else {
439  				j = h
440  			}
441  		}
442  		// Swap values until data[a] reaches the position before i.
443  		for k := a; k < i-1; k++ {
444  			data.Swap(k, k+1)
445  		}
446  		return
447  	}
448
449  	// Avoid unnecessary recursions of symMerge
450  	// by direct insertion of data[m] into data[a:m]
451  	// if data[m:b] only contains one element.
452  	if b-m == 1 {
453  		// Use binary search to find the lowest index i
454  		// such that data[i] > data[m] for a <= i < m.
455  		// Exit the search loop with i == m in case no such index exists.
456  		i := a
457  		j := m
458  		for i < j {
459  			h := int(uint(i+j) >> 1)
460  			if !data.Less(m, h) {
461  				i = h + 1
462  			} else {
463  				j = h
464  			}
465  		}
466  		// Swap values until data[m] reaches the position i.
467  		for k := m; k > i; k-- {
468  			data.Swap(k, k-1)
469  		}
470  		return
471  	}
472
473  	mid := int(uint(a+b) >> 1)
474  	n := mid + m
475  	var start, r int
476  	if m > mid {
477  		start = n - b
478  		r = mid
479  	} else {
480  		start = a
481  		r = m
482  	}
483  	p := n - 1
484
485  	for start < r {
486  		c := int(uint(start+r) >> 1)
487  		if !data.Less(p-c, c) {
488  			start = c + 1
489  		} else {
490  			r = c
491  		}
492  	}
493
494  	end := n - start
495  	if start < m && m < end {
496  		rotate(data, start, m, end)
497  	}
498  	if a < start && start < mid {
499  		symMerge(data, a, start, mid)
500  	}
501  	if mid < end && end < b {
502  		symMerge(data, mid, end, b)
503  	}
504  }
505
506  // rotate rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
507  // Data of the form 'x u v y' is changed to 'x v u y'.
508  // rotate performs at most b-a many calls to data.Swap,
509  // and it assumes non-degenerate arguments: a < m && m < b.
510  func rotate(data Interface, a, m, b int) {
511  	i := m - a
512  	j := b - m
513
514  	for i != j {
515  		if i > j {
516  			swapRange(data, m-i, m, j)
517  			i -= j
518  		} else {
519  			swapRange(data, m-i, m+j-i, i)
520  			j -= i
521  		}
522  	}
523  	// i == j
524  	swapRange(data, m-i, m, i)
525  }
526
527  /*
528  Complexity of Stable Sorting
529
530
531  Complexity of block swapping rotation
532
533  Each Swap puts one new element into its correct, final position.
534  Elements which reach their final position are no longer moved.
535  Thus block swapping rotation needs |u|+|v| calls to Swaps.
536  This is best possible as each element might need a move.
537
538  Pay attention when comparing to other optimal algorithms which
539  typically count the number of assignments instead of swaps:
540  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
541  rotations uses O(u + v + gcd(u,v)) assignments which is
542  better than our O(3 * (u+v)) as gcd(u,v) <= u.
543
544
545  Stable sorting by SymMerge and BlockSwap rotations
546
547  SymMerg complexity for same size input M = N:
548  Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
549  Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
550
551  (The following argument does not fuzz over a missing -1 or
552  other stuff which does not impact the final result).
553
554  Let n = data.Len(). Assume n = 2^k.
555
556  Plain merge sort performs log(n) = k iterations.
557  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
558
559  Thus iteration i of merge sort performs:
560  Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
561  Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
562
563  In total k = log(n) iterations are performed; so in total:
564  Calls to Less O(log(n) * n)
565  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
566     = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
567
568
569  Above results should generalize to arbitrary n = 2^k + p
570  and should not be influenced by the initial insertion sort phase:
571  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
572  size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
573  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
574  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
575     = O(n * log(n))
576  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
577
578  */
579
```

View as plain text