...
Run Format

Source file src/pkg/sort/sort.go

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// Package sort provides primitives for sorting slices and user-defined
     6	// collections.
     7	package sort
     8	
     9	// A type, typically a collection, that satisfies sort.Interface can be
    10	// sorted by the routines in this package.  The methods require that the
    11	// elements of the collection be enumerated by an integer index.
    12	type Interface interface {
    13		// Len is the number of elements in the collection.
    14		Len() int
    15		// Less reports whether the element with
    16		// index i should sort before the element with index j.
    17		Less(i, j int) bool
    18		// Swap swaps the elements with indexes i and j.
    19		Swap(i, j int)
    20	}
    21	
    22	func min(a, b int) int {
    23		if a < b {
    24			return a
    25		}
    26		return b
    27	}
    28	
    29	// Insertion sort
    30	func insertionSort(data Interface, a, b int) {
    31		for i := a + 1; i < b; i++ {
    32			for j := i; j > a && data.Less(j, j-1); j-- {
    33				data.Swap(j, j-1)
    34			}
    35		}
    36	}
    37	
    38	// siftDown implements the heap property on data[lo, hi).
    39	// first is an offset into the array where the root of the heap lies.
    40	func siftDown(data Interface, lo, hi, first int) {
    41		root := lo
    42		for {
    43			child := 2*root + 1
    44			if child >= hi {
    45				break
    46			}
    47			if child+1 < hi && data.Less(first+child, first+child+1) {
    48				child++
    49			}
    50			if !data.Less(first+root, first+child) {
    51				return
    52			}
    53			data.Swap(first+root, first+child)
    54			root = child
    55		}
    56	}
    57	
    58	func heapSort(data Interface, a, b int) {
    59		first := a
    60		lo := 0
    61		hi := b - a
    62	
    63		// Build heap with greatest element at top.
    64		for i := (hi - 1) / 2; i >= 0; i-- {
    65			siftDown(data, i, hi, first)
    66		}
    67	
    68		// Pop elements, largest first, into end of data.
    69		for i := hi - 1; i >= 0; i-- {
    70			data.Swap(first, first+i)
    71			siftDown(data, lo, i, first)
    72		}
    73	}
    74	
    75	// Quicksort, following Bentley and McIlroy,
    76	// ``Engineering a Sort Function,'' SP&E November 1993.
    77	
    78	// medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a].
    79	func medianOfThree(data Interface, a, b, c int) {
    80		m0 := b
    81		m1 := a
    82		m2 := c
    83		// bubble sort on 3 elements
    84		if data.Less(m1, m0) {
    85			data.Swap(m1, m0)
    86		}
    87		if data.Less(m2, m1) {
    88			data.Swap(m2, m1)
    89		}
    90		if data.Less(m1, m0) {
    91			data.Swap(m1, m0)
    92		}
    93		// now data[m0] <= data[m1] <= data[m2]
    94	}
    95	
    96	func swapRange(data Interface, a, b, n int) {
    97		for i := 0; i < n; i++ {
    98			data.Swap(a+i, b+i)
    99		}
   100	}
   101	
   102	func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
   103		m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
   104		if hi-lo > 40 {
   105			// Tukey's ``Ninther,'' median of three medians of three.
   106			s := (hi - lo) / 8
   107			medianOfThree(data, lo, lo+s, lo+2*s)
   108			medianOfThree(data, m, m-s, m+s)
   109			medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
   110		}
   111		medianOfThree(data, lo, m, hi-1)
   112	
   113		// Invariants are:
   114		//	data[lo] = pivot (set up by ChoosePivot)
   115		//	data[lo <= i < a] = pivot
   116		//	data[a <= i < b] < pivot
   117		//	data[b <= i < c] is unexamined
   118		//	data[c <= i < d] > pivot
   119		//	data[d <= i < hi] = pivot
   120		//
   121		// Once b meets c, can swap the "= pivot" sections
   122		// into the middle of the slice.
   123		pivot := lo
   124		a, b, c, d := lo+1, lo+1, hi, hi
   125		for {
   126			for b < c {
   127				if data.Less(b, pivot) { // data[b] < pivot
   128					b++
   129				} else if !data.Less(pivot, b) { // data[b] = pivot
   130					data.Swap(a, b)
   131					a++
   132					b++
   133				} else {
   134					break
   135				}
   136			}
   137			for b < c {
   138				if data.Less(pivot, c-1) { // data[c-1] > pivot
   139					c--
   140				} else if !data.Less(c-1, pivot) { // data[c-1] = pivot
   141					data.Swap(c-1, d-1)
   142					c--
   143					d--
   144				} else {
   145					break
   146				}
   147			}
   148			if b >= c {
   149				break
   150			}
   151			// data[b] > pivot; data[c-1] < pivot
   152			data.Swap(b, c-1)
   153			b++
   154			c--
   155		}
   156	
   157		n := min(b-a, a-lo)
   158		swapRange(data, lo, b-n, n)
   159	
   160		n = min(hi-d, d-c)
   161		swapRange(data, c, hi-n, n)
   162	
   163		return lo + b - a, hi - (d - c)
   164	}
   165	
   166	func quickSort(data Interface, a, b, maxDepth int) {
   167		for b-a > 7 {
   168			if maxDepth == 0 {
   169				heapSort(data, a, b)
   170				return
   171			}
   172			maxDepth--
   173			mlo, mhi := doPivot(data, a, b)
   174			// Avoiding recursion on the larger subproblem guarantees
   175			// a stack depth of at most lg(b-a).
   176			if mlo-a < b-mhi {
   177				quickSort(data, a, mlo, maxDepth)
   178				a = mhi // i.e., quickSort(data, mhi, b)
   179			} else {
   180				quickSort(data, mhi, b, maxDepth)
   181				b = mlo // i.e., quickSort(data, a, mlo)
   182			}
   183		}
   184		if b-a > 1 {
   185			insertionSort(data, a, b)
   186		}
   187	}
   188	
   189	// Sort sorts data.
   190	// It makes one call to data.Len to determine n, and O(n*log(n)) calls to
   191	// data.Less and data.Swap. The sort is not guaranteed to be stable.
   192	func Sort(data Interface) {
   193		// Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
   194		n := data.Len()
   195		maxDepth := 0
   196		for i := n; i > 0; i >>= 1 {
   197			maxDepth++
   198		}
   199		maxDepth *= 2
   200		quickSort(data, 0, n, maxDepth)
   201	}
   202	
   203	type reverse struct {
   204		// This embedded Interface permits Reverse to use the methods of
   205		// another Interface implementation.
   206		Interface
   207	}
   208	
   209	// Less returns the opposite of the embedded implementation's Less method.
   210	func (r reverse) Less(i, j int) bool {
   211		return r.Interface.Less(j, i)
   212	}
   213	
   214	// Reverse returns the reverse order for data.
   215	func Reverse(data Interface) Interface {
   216		return &reverse{data}
   217	}
   218	
   219	// IsSorted reports whether data is sorted.
   220	func IsSorted(data Interface) bool {
   221		n := data.Len()
   222		for i := n - 1; i > 0; i-- {
   223			if data.Less(i, i-1) {
   224				return false
   225			}
   226		}
   227		return true
   228	}
   229	
   230	// Convenience types for common cases
   231	
   232	// IntSlice attaches the methods of Interface to []int, sorting in increasing order.
   233	type IntSlice []int
   234	
   235	func (p IntSlice) Len() int           { return len(p) }
   236	func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
   237	func (p IntSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   238	
   239	// Sort is a convenience method.
   240	func (p IntSlice) Sort() { Sort(p) }
   241	
   242	// Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
   243	type Float64Slice []float64
   244	
   245	func (p Float64Slice) Len() int           { return len(p) }
   246	func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
   247	func (p Float64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   248	
   249	// isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
   250	func isNaN(f float64) bool {
   251		return f != f
   252	}
   253	
   254	// Sort is a convenience method.
   255	func (p Float64Slice) Sort() { Sort(p) }
   256	
   257	// StringSlice attaches the methods of Interface to []string, sorting in increasing order.
   258	type StringSlice []string
   259	
   260	func (p StringSlice) Len() int           { return len(p) }
   261	func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
   262	func (p StringSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
   263	
   264	// Sort is a convenience method.
   265	func (p StringSlice) Sort() { Sort(p) }
   266	
   267	// Convenience wrappers for common cases
   268	
   269	// Ints sorts a slice of ints in increasing order.
   270	func Ints(a []int) { Sort(IntSlice(a)) }
   271	
   272	// Float64s sorts a slice of float64s in increasing order.
   273	func Float64s(a []float64) { Sort(Float64Slice(a)) }
   274	
   275	// Strings sorts a slice of strings in increasing order.
   276	func Strings(a []string) { Sort(StringSlice(a)) }
   277	
   278	// IntsAreSorted tests whether a slice of ints is sorted in increasing order.
   279	func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
   280	
   281	// Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
   282	func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
   283	
   284	// StringsAreSorted tests whether a slice of strings is sorted in increasing order.
   285	func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
   286	
   287	// Notes on stable sorting:
   288	// The used algorithms are simple and provable correct on all input and use
   289	// only logarithmic additional stack space.  They perform well if compared
   290	// experimentally to other stable in-place sorting algorithms.
   291	//
   292	// Remarks on other algorithms evaluated:
   293	//  - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
   294	//    Not faster.
   295	//  - GCC's __rotate for block rotations: Not faster.
   296	//  - "Practical in-place mergesort" from  Jyrki Katajainen, Tomi A. Pasanen
   297	//    and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
   298	//    The given algorithms are in-place, number of Swap and Assignments
   299	//    grow as n log n but the algorithm is not stable.
   300	//  - "Fast Stable In-Plcae Sorting with O(n) Data Moves" J.I. Munro and
   301	//    V. Raman in Algorithmica (1996) 16, 115-160:
   302	//    This algorithm either needs additional 2n bits or works only if there
   303	//    are enough different elements available to encode some permutations
   304	//    which have to be undone later (so not stable an any input).
   305	//  - All the optimal in-place sorting/merging algorithms I found are either
   306	//    unstable or rely on enough different elements in each step to encode the
   307	//    performed block rearrangements. See also "In-Place Merging Algorithms",
   308	//    Denham Coates-Evely, Department of Computer Science, Kings College,
   309	//    January 2004 and the reverences in there.
   310	//  - Often "optimal" algorithms are optimal in the number of assignments
   311	//    but Interface has only Swap as operation.
   312	
   313	// Stable sorts data while keeping the original order of equal elements.
   314	//
   315	// It makes one call to data.Len to determine n, O(n*log(n)) calls to
   316	// data.Less and O(n*log(n)*log(n)) calls to data.Swap.
   317	func Stable(data Interface) {
   318		n := data.Len()
   319		blockSize := 20
   320		a, b := 0, blockSize
   321		for b <= n {
   322			insertionSort(data, a, b)
   323			a = b
   324			b += blockSize
   325		}
   326		insertionSort(data, a, n)
   327	
   328		for blockSize < n {
   329			a, b = 0, 2*blockSize
   330			for b <= n {
   331				symMerge(data, a, a+blockSize, b)
   332				a = b
   333				b += 2 * blockSize
   334			}
   335			symMerge(data, a, a+blockSize, n)
   336			blockSize *= 2
   337		}
   338	}
   339	
   340	// SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
   341	// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
   342	// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
   343	// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
   344	// Computer Science, pages 714-723. Springer, 2004.
   345	//
   346	// Let M = m-a and N = b-n. Wolog M < N.
   347	// The recursion depth is bound by ceil(log(N+M)).
   348	// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
   349	// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
   350	//
   351	// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
   352	// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
   353	// in the paper carries through for Swap operations, especially as the block
   354	// swapping rotate uses only O(M+N) Swaps.
   355	func symMerge(data Interface, a, m, b int) {
   356		if a >= m || m >= b {
   357			return
   358		}
   359	
   360		mid := a + (b-a)/2
   361		n := mid + m
   362		start := 0
   363		if m > mid {
   364			start = n - b
   365			r, p := mid, n-1
   366			for start < r {
   367				c := start + (r-start)/2
   368				if !data.Less(p-c, c) {
   369					start = c + 1
   370				} else {
   371					r = c
   372				}
   373			}
   374		} else {
   375			start = a
   376			r, p := m, n-1
   377			for start < r {
   378				c := start + (r-start)/2
   379				if !data.Less(p-c, c) {
   380					start = c + 1
   381				} else {
   382					r = c
   383				}
   384			}
   385		}
   386		end := n - start
   387		rotate(data, start, m, end)
   388		symMerge(data, a, start, mid)
   389		symMerge(data, mid, end, b)
   390	}
   391	
   392	// Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
   393	// Data of the form 'x u v y' is changed to 'x v u y'.
   394	// Rotate performs at most b-a many calls to data.Swap.
   395	func rotate(data Interface, a, m, b int) {
   396		i := m - a
   397		if i == 0 {
   398			return
   399		}
   400		j := b - m
   401		if j == 0 {
   402			return
   403		}
   404	
   405		if i == j {
   406			swapRange(data, a, m, i)
   407			return
   408		}
   409	
   410		p := a + i
   411		for i != j {
   412			if i > j {
   413				swapRange(data, p-i, p, j)
   414				i -= j
   415			} else {
   416				swapRange(data, p-i, p+j-i, i)
   417				j -= i
   418			}
   419		}
   420		swapRange(data, p-i, p, i)
   421	}
   422	
   423	/*
   424	Complexity of Stable Sorting
   425	
   426	
   427	Complexity of block swapping rotation
   428	
   429	Each Swap puts one new element into its correct, final position.
   430	Elements which reach their final position are no longer moved.
   431	Thus block swapping rotation needs |u|+|v| calls to Swaps.
   432	This is best possible as each element might need a move.
   433	
   434	Pay attention when comparing to other optimal algorithms which
   435	typically count the number of assignments instead of swaps:
   436	E.g. the optimal algorithm of Dudzinski and Dydek for in-place
   437	rotations uses O(u + v + gcd(u,v)) assignments which is
   438	better than our O(3 * (u+v)) as gcd(u,v) <= u.
   439	
   440	
   441	Stable sorting by SymMerge and BlockSwap rotations
   442	
   443	SymMerg complexity for same size input M = N:
   444	Calls to Less:  O(M*log(N/M+1)) = O(N*log(2)) = O(N)
   445	Calls to Swap:  O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
   446	
   447	(The following argument does not fuzz over a missing -1 or
   448	other stuff which does not impact the final result).
   449	
   450	Let n = data.Len(). Assume n = 2^k.
   451	
   452	Plain merge sort performs log(n) = k iterations.
   453	On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
   454	
   455	Thus iteration i of merge sort performs:
   456	Calls to Less  O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
   457	Calls to Swap  O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
   458	
   459	In total k = log(n) iterations are performed; so in total:
   460	Calls to Less O(log(n) * n)
   461	Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
   462	   = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
   463	
   464	
   465	Above results should generalize to arbitrary n = 2^k + p
   466	and should not be influenced by the initial insertion sort phase:
   467	Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
   468	size bs at n/bs blocks:  O(bs*n) Swaps and Less during insertion sort.
   469	Merge sort iterations start at i = log(bs). With t = log(bs) constant:
   470	Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
   471	   = O(n * log(n))
   472	Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
   473	
   474	*/
   475	

View as plain text