...
Run Format

Source file src/sort/sort.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	//go:generate go run genzfunc.go
     6	
     7	// Package sort provides primitives for sorting slices and user-defined
     8	// collections.
     9	package sort
    10	
    11	import "reflect"
    12	
    13	// A type, typically a collection, that satisfies sort.Interface can be
    14	// sorted by the routines in this package. The methods require that the
    15	// elements of the collection be enumerated by an integer index.
    16	type Interface interface {
    17		// Len is the number of elements in the collection.
    18		Len() int
    19		// Less reports whether the element with
    20		// index i should sort before the element with index j.
    21		Less(i, j int) bool
    22		// Swap swaps the elements with indexes i and j.
    23		Swap(i, j int)
    24	}
    25	
    26	// Insertion sort
    27	func insertionSort(data Interface, a, b int) {
    28		for i := a + 1; i < b; i++ {
    29			for j := i; j > a && data.Less(j, j-1); j-- {
    30				data.Swap(j, j-1)
    31			}
    32		}
    33	}
    34	
    35	// siftDown implements the heap property on data[lo, hi).
    36	// first is an offset into the array where the root of the heap lies.
    37	func siftDown(data Interface, lo, hi, first int) {
    38		root := lo
    39		for {
    40			child := 2*root + 1
    41			if child >= hi {
    42				break
    43			}
    44			if child+1 < hi && data.Less(first+child, first+child+1) {
    45				child++
    46			}
    47			if !data.Less(first+root, first+child) {
    48				return
    49			}
    50			data.Swap(first+root, first+child)
    51			root = child
    52		}
    53	}
    54	
    55	func heapSort(data Interface, a, b int) {
    56		first := a
    57		lo := 0
    58		hi := b - a
    59	
    60		// Build heap with greatest element at top.
    61		for i := (hi - 1) / 2; i >= 0; i-- {
    62			siftDown(data, i, hi, first)
    63		}
    64	
    65		// Pop elements, largest first, into end of data.
    66		for i := hi - 1; i >= 0; i-- {
    67			data.Swap(first, first+i)
    68			siftDown(data, lo, i, first)
    69		}
    70	}
    71	
    72	// Quicksort, loosely following Bentley and McIlroy,
    73	// ``Engineering a Sort Function,'' SP&E November 1993.
    74	
    75	// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
    76	func medianOfThree(data Interface, m1, m0, m2 int) {
    77		// sort 3 elements
    78		if data.Less(m1, m0) {
    79			data.Swap(m1, m0)
    80		}
    81		// data[m0] <= data[m1]
    82		if data.Less(m2, m1) {
    83			data.Swap(m2, m1)
    84			// data[m0] <= data[m2] && data[m1] < data[m2]
    85			if data.Less(m1, m0) {
    86				data.Swap(m1, m0)
    87			}
    88		}
    89		// now data[m0] <= data[m1] <= data[m2]
    90	}
    91	
    92	func swapRange(data Interface, a, b, n int) {
    93		for i := 0; i < n; i++ {
    94			data.Swap(a+i, b+i)
    95		}
    96	}
    97	
    98	func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
    99		m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
   100		if hi-lo > 40 {
   101			// Tukey's ``Ninther,'' median of three medians of three.
   102			s := (hi - lo) / 8
   103			medianOfThree(data, lo, lo+s, lo+2*s)
   104			medianOfThree(data, m, m-s, m+s)
   105			medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   106		}
   107		medianOfThree(data, lo, m, hi-1)
   108	
   109		// Invariants are:
   110		//	data[lo] = pivot (set up by ChoosePivot)
   111		//	data[lo < i < a] < pivot
   112		//	data[a <= i < b] <= pivot
   113		//	data[b <= i < c] unexamined
   114		//	data[c <= i < hi-1] > pivot
   115		//	data[hi-1] >= pivot
   116		pivot := lo
   117		a, c := lo+1, hi-1
   118	
   119		for ; a < c && data.Less(a, pivot); a++ {
   120		}
   121		b := a
   122		for {
   123			for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
   124			}
   125			for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
   126			}
   127			if b >= c {
   128				break
   129			}
   130			// data[b] > pivot; data[c-1] <= pivot
   131			data.Swap(b, c-1)
   132			b++
   133			c--
   134		}
   135		// If hi-c<3 then there are duplicates (by property of median of nine).
   136		// Let be a bit more conservative, and set border to 5.
   137		protect := hi-c < 5
   138		if !protect && hi-c < (hi-lo)/4 {
   139			// Lets test some points for equality to pivot
   140			dups := 0
   141			if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
   142				data.Swap(c, hi-1)
   143				c++
   144				dups++
   145			}
   146			if !data.Less(b-1, pivot) { // data[b-1] = pivot
   147				b--
   148				dups++
   149			}
   150			// m-lo = (hi-lo)/2 > 6
   151			// b-lo > (hi-lo)*3/4-1 > 8
   152			// ==> m < b ==> data[m] <= pivot
   153			if !data.Less(m, pivot) { // data[m] = pivot
   154				data.Swap(m, b-1)
   155				b--
   156				dups++
   157			}
   158			// if at least 2 points are equal to pivot, assume skewed distribution
   159			protect = dups > 1
   160		}
   161		if protect {
   162			// Protect against a lot of duplicates
   163			// Add invariant:
   164			//	data[a <= i < b] unexamined
   165			//	data[b <= i < c] = pivot
   166			for {
   167				for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
   168				}
   169				for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
   170				}
   171				if a >= b {
   172					break
   173				}
   174				// data[a] == pivot; data[b-1] < pivot
   175				data.Swap(a, b-1)
   176				a++
   177				b--
   178			}
   179		}
   180		// Swap pivot into middle
   181		data.Swap(pivot, b-1)
   182		return b - 1, c
   183	}
   184	
   185	func quickSort(data Interface, a, b, maxDepth int) {
   186		for b-a > 12 { // Use ShellSort for slices <= 12 elements
   187			if maxDepth == 0 {
   188				heapSort(data, a, b)
   189				return
   190			}
   191			maxDepth--
   192			mlo, mhi := doPivot(data, a, b)
   193			// Avoiding recursion on the larger subproblem guarantees
   194			// a stack depth of at most lg(b-a).
   195			if mlo-a < b-mhi {
   196				quickSort(data, a, mlo, maxDepth)
   197				a = mhi // i.e., quickSort(data, mhi, b)
   198			} else {
   199				quickSort(data, mhi, b, maxDepth)
   200				b = mlo // i.e., quickSort(data, a, mlo)
   201			}
   202		}
   203		if b-a > 1 {
   204			// Do ShellSort pass with gap 6
   205			// It could be written in this simplified form cause b-a <= 12
   206			for i := a + 6; i < b; i++ {
   207				if data.Less(i, i-6) {
   208					data.Swap(i, i-6)
   209				}
   210			}
   211			insertionSort(data, a, b)
   212		}
   213	}
   214	
   215	// Sort sorts data.
   216	// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   217	// data.Less and data.Swap. The sort is not guaranteed to be stable.
   218	func Sort(data Interface) {
   219		n := data.Len()
   220		quickSort(data, 0, n, maxDepth(n))
   221	}
   222	
   223	// maxDepth returns a threshold at which quicksort should switch
   224	// to heapsort. It returns 2*ceil(lg(n+1)).
   225	func maxDepth(n int) int {
   226		var depth int
   227		for i := n; i > 0; i >>= 1 {
   228			depth++
   229		}
   230		return depth * 2
   231	}
   232	
   233	// lessSwap is a pair of Less and Swap function for use with the
   234	// auto-generated func-optimized variant of sort.go in
   235	// zfuncversion.go.
   236	type lessSwap struct {
   237		Less func(i, j int) bool
   238		Swap func(i, j int)
   239	}
   240	
   241	// Slice sorts the provided slice given the provided less function.
   242	//
   243	// The sort is not guaranteed to be stable. For a stable sort, use
   244	// SliceStable.
   245	//
   246	// The function panics if the provided interface is not a slice.
   247	func Slice(slice interface{}, less func(i, j int) bool) {
   248		rv := reflect.ValueOf(slice)
   249		swap := reflect.Swapper(slice)
   250		length := rv.Len()
   251		quickSort_func(lessSwap{less, swap}, 0, length, maxDepth(length))
   252	}
   253	
   254	// SliceStable sorts the provided slice given the provided less
   255	// function while keeping the original order of equal elements.
   256	//
   257	// The function panics if the provided interface is not a slice.
   258	func SliceStable(slice interface{}, less func(i, j int) bool) {
   259		rv := reflect.ValueOf(slice)
   260		swap := reflect.Swapper(slice)
   261		stable_func(lessSwap{less, swap}, rv.Len())
   262	}
   263	
   264	// SliceIsSorted tests whether a slice is sorted.
   265	//
   266	// The function panics if the provided interface is not a slice.
   267	func SliceIsSorted(slice interface{}, less func(i, j int) bool) bool {
   268		rv := reflect.ValueOf(slice)
   269		n := rv.Len()
   270		for i := n - 1; i > 0; i-- {
   271			if less(i, i-1) {
   272				return false
   273			}
   274		}
   275		return true
   276	}
   277	
   278	type reverse struct {
   279		// This embedded Interface permits Reverse to use the methods of
   280		// another Interface implementation.
   281		Interface
   282	}
   283	
   284	// Less returns the opposite of the embedded implementation's Less method.
   285	func (r reverse) Less(i, j int) bool {
   286		return r.Interface.Less(j, i)
   287	}
   288	
   289	// Reverse returns the reverse order for data.
   290	func Reverse(data Interface) Interface {
   291		return &reverse{data}
   292	}
   293	
   294	// IsSorted reports whether data is sorted.
   295	func IsSorted(data Interface) bool {
   296		n := data.Len()
   297		for i := n - 1; i > 0; i-- {
   298			if data.Less(i, i-1) {
   299				return false
   300			}
   301		}
   302		return true
   303	}
   304	
   305	// Convenience types for common cases
   306	
   307	// IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   308	type IntSlice []int
   309	
   310	func (p IntSlice) Len() int           { return len(p) }
   311	func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   312	func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   313	
   314	// Sort is a convenience method.
   315	func (p IntSlice) Sort() { Sort(p) }
   316	
   317	// Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
   318	type Float64Slice []float64
   319	
   320	func (p Float64Slice) Len() int           { return len(p) }
   321	func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   322	func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   323	
   324	// isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   325	func isNaN(f float64) bool {
   326		return f != f
   327	}
   328	
   329	// Sort is a convenience method.
   330	func (p Float64Slice) Sort() { Sort(p) }
   331	
   332	// StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   333	type StringSlice []string
   334	
   335	func (p StringSlice) Len() int           { return len(p) }
   336	func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   337	func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   338	
   339	// Sort is a convenience method.
   340	func (p StringSlice) Sort() { Sort(p) }
   341	
   342	// Convenience wrappers for common cases
   343	
   344	// Ints sorts a slice of ints in increasing order.
   345	func Ints(a []int) { Sort(IntSlice(a)) }
   346	
   347	// Float64s sorts a slice of float64s in increasing order.
   348	func Float64s(a []float64) { Sort(Float64Slice(a)) }
   349	
   350	// Strings sorts a slice of strings in increasing order.
   351	func Strings(a []string) { Sort(StringSlice(a)) }
   352	
   353	// IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   354	func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   355	
   356	// Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
   357	func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   358	
   359	// StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   360	func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   361	
   362	// Notes on stable sorting:
   363	// The used algorithms are simple and provable correct on all input and use
   364	// only logarithmic additional stack space. They perform well if compared
   365	// experimentally to other stable in-place sorting algorithms.
   366	//
   367	// Remarks on other algorithms evaluated:
   368	//  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   369	//    Not faster.
   370	//  - GCC's __rotate for block rotations: Not faster.
   371	//  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   372	//    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   373	//    The given algorithms are in-place, number of Swap and Assignments
   374	//    grow as n log n but the algorithm is not stable.
   375	//  - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
   376	//    V. Raman in Algorithmica (1996) 16, 115-160:
   377	//    This algorithm either needs additional 2n bits or works only if there
   378	//    are enough different elements available to encode some permutations
   379	//    which have to be undone later (so not stable on any input).
   380	//  - All the optimal in-place sorting/merging algorithms I found are either
   381	//    unstable or rely on enough different elements in each step to encode the
   382	//    performed block rearrangements. See also "In-Place Merging Algorithms",
   383	//    Denham Coates-Evely, Department of Computer Science, Kings College,
   384	//    January 2004 and the references in there.
   385	//  - Often "optimal" algorithms are optimal in the number of assignments
   386	//    but Interface has only Swap as operation.
   387	
   388	// Stable sorts data while keeping the original order of equal elements.
   389	//
   390	// It makes one call to data.Len to determine n, O(n*log(n)) calls to
   391	// data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   392	func Stable(data Interface) {
   393		stable(data, data.Len())
   394	}
   395	
   396	func stable(data Interface, n int) {
   397		blockSize := 20 // must be > 0
   398		a, b := 0, blockSize
   399		for b <= n {
   400			insertionSort(data, a, b)
   401			a = b
   402			b += blockSize
   403		}
   404		insertionSort(data, a, n)
   405	
   406		for blockSize < n {
   407			a, b = 0, 2*blockSize
   408			for b <= n {
   409				symMerge(data, a, a+blockSize, b)
   410				a = b
   411				b += 2 * blockSize
   412			}
   413			if m := a + blockSize; m < n {
   414				symMerge(data, a, m, n)
   415			}
   416			blockSize *= 2
   417		}
   418	}
   419	
   420	// SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   421	// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   422	// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   423	// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   424	// Computer Science, pages 714-723. Springer, 2004.
   425	//
   426	// Let M = m-a and N = b-n. Wolog M < N.
   427	// The recursion depth is bound by ceil(log(N+M)).
   428	// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   429	// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   430	//
   431	// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   432	// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   433	// in the paper carries through for Swap operations, especially as the block
   434	// swapping rotate uses only O(M+N) Swaps.
   435	//
   436	// symMerge assumes non-degenerate arguments: a < m && m < b.
   437	// Having the caller check this condition eliminates many leaf recursion calls,
   438	// which improves performance.
   439	func symMerge(data Interface, a, m, b int) {
   440		// Avoid unnecessary recursions of symMerge
   441		// by direct insertion of data[a] into data[m:b]
   442		// if data[a:m] only contains one element.
   443		if m-a == 1 {
   444			// Use binary search to find the lowest index i
   445			// such that data[i] >= data[a] for m <= i < b.
   446			// Exit the search loop with i == b in case no such index exists.
   447			i := m
   448			j := b
   449			for i < j {
   450				h := i + (j-i)/2
   451				if data.Less(h, a) {
   452					i = h + 1
   453				} else {
   454					j = h
   455				}
   456			}
   457			// Swap values until data[a] reaches the position before i.
   458			for k := a; k < i-1; k++ {
   459				data.Swap(k, k+1)
   460			}
   461			return
   462		}
   463	
   464		// Avoid unnecessary recursions of symMerge
   465		// by direct insertion of data[m] into data[a:m]
   466		// if data[m:b] only contains one element.
   467		if b-m == 1 {
   468			// Use binary search to find the lowest index i
   469			// such that data[i] > data[m] for a <= i < m.
   470			// Exit the search loop with i == m in case no such index exists.
   471			i := a
   472			j := m
   473			for i < j {
   474				h := i + (j-i)/2
   475				if !data.Less(m, h) {
   476					i = h + 1
   477				} else {
   478					j = h
   479				}
   480			}
   481			// Swap values until data[m] reaches the position i.
   482			for k := m; k > i; k-- {
   483				data.Swap(k, k-1)
   484			}
   485			return
   486		}
   487	
   488		mid := a + (b-a)/2
   489		n := mid + m
   490		var start, r int
   491		if m > mid {
   492			start = n - b
   493			r = mid
   494		} else {
   495			start = a
   496			r = m
   497		}
   498		p := n - 1
   499	
   500		for start < r {
   501			c := start + (r-start)/2
   502			if !data.Less(p-c, c) {
   503				start = c + 1
   504			} else {
   505				r = c
   506			}
   507		}
   508	
   509		end := n - start
   510		if start < m && m < end {
   511			rotate(data, start, m, end)
   512		}
   513		if a < start && start < mid {
   514			symMerge(data, a, start, mid)
   515		}
   516		if mid < end && end < b {
   517			symMerge(data, mid, end, b)
   518		}
   519	}
   520	
   521	// Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   522	// Data of the form 'x u v y' is changed to 'x v u y'.
   523	// Rotate performs at most b-a many calls to data.Swap.
   524	// Rotate assumes non-degenerate arguments: a < m && m < b.
   525	func rotate(data Interface, a, m, b int) {
   526		i := m - a
   527		j := b - m
   528	
   529		for i != j {
   530			if i > j {
   531				swapRange(data, m-i, m, j)
   532				i -= j
   533			} else {
   534				swapRange(data, m-i, m+j-i, i)
   535				j -= i
   536			}
   537		}
   538		// i == j
   539		swapRange(data, m-i, m, i)
   540	}
   541	
   542	/*
   543	Complexity of Stable Sorting
   544	
   545	
   546	Complexity of block swapping rotation
   547	
   548	Each Swap puts one new element into its correct, final position.
   549	Elements which reach their final position are no longer moved.
   550	Thus block swapping rotation needs |u|+|v| calls to Swaps.
   551	This is best possible as each element might need a move.
   552	
   553	Pay attention when comparing to other optimal algorithms which
   554	typically count the number of assignments instead of swaps:
   555	E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   556	rotations uses O(u + v + gcd(u,v)) assignments which is
   557	better than our O(3 * (u+v)) as gcd(u,v) <= u.
   558	
   559	
   560	Stable sorting by SymMerge and BlockSwap rotations
   561	
   562	SymMerg complexity for same size input M = N:
   563	Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   564	Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   565	
   566	(The following argument does not fuzz over a missing -1 or
   567	other stuff which does not impact the final result).
   568	
   569	Let n = data.Len(). Assume n = 2^k.
   570	
   571	Plain merge sort performs log(n) = k iterations.
   572	On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   573	
   574	Thus iteration i of merge sort performs:
   575	Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   576	Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   577	
   578	In total k = log(n) iterations are performed; so in total:
   579	Calls to Less O(log(n) * n)
   580	Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   581	   = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   582	
   583	
   584	Above results should generalize to arbitrary n = 2^k + p
   585	and should not be influenced by the initial insertion sort phase:
   586	Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   587	size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   588	Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   589	Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   590	   = O(n * log(n))
   591	Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   592	
   593	*/
   594	

View as plain text