Source file src/cmd/compile/internal/ssa/cse.go

Documentation: cmd/compile/internal/ssa

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"fmt"
    11  	"sort"
    12  )
    13  
    14  // cse does common-subexpression elimination on the Function.
    15  // Values are just relinked, nothing is deleted. A subsequent deadcode
    16  // pass is required to actually remove duplicate expressions.
    17  func cse(f *Func) {
    18  	// Two values are equivalent if they satisfy the following definition:
    19  	// equivalent(v, w):
    20  	//   v.op == w.op
    21  	//   v.type == w.type
    22  	//   v.aux == w.aux
    23  	//   v.auxint == w.auxint
    24  	//   len(v.args) == len(w.args)
    25  	//   v.block == w.block if v.op == OpPhi
    26  	//   equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
    27  
    28  	// The algorithm searches for a partition of f's values into
    29  	// equivalence classes using the above definition.
    30  	// It starts with a coarse partition and iteratively refines it
    31  	// until it reaches a fixed point.
    32  
    33  	// Make initial coarse partitions by using a subset of the conditions above.
    34  	a := make([]*Value, 0, f.NumValues())
    35  	if f.auxmap == nil {
    36  		f.auxmap = auxmap{}
    37  	}
    38  	for _, b := range f.Blocks {
    39  		for _, v := range b.Values {
    40  			if v.Type.IsMemory() {
    41  				continue // memory values can never cse
    42  			}
    43  			if f.auxmap[v.Aux] == 0 {
    44  				f.auxmap[v.Aux] = int32(len(f.auxmap)) + 1
    45  			}
    46  			a = append(a, v)
    47  		}
    48  	}
    49  	partition := partitionValues(a, f.auxmap)
    50  
    51  	// map from value id back to eqclass id
    52  	valueEqClass := make([]ID, f.NumValues())
    53  	for _, b := range f.Blocks {
    54  		for _, v := range b.Values {
    55  			// Use negative equivalence class #s for unique values.
    56  			valueEqClass[v.ID] = -v.ID
    57  		}
    58  	}
    59  	var pNum ID = 1
    60  	for _, e := range partition {
    61  		if f.pass.debug > 1 && len(e) > 500 {
    62  			fmt.Printf("CSE.large partition (%d): ", len(e))
    63  			for j := 0; j < 3; j++ {
    64  				fmt.Printf("%s ", e[j].LongString())
    65  			}
    66  			fmt.Println()
    67  		}
    68  
    69  		for _, v := range e {
    70  			valueEqClass[v.ID] = pNum
    71  		}
    72  		if f.pass.debug > 2 && len(e) > 1 {
    73  			fmt.Printf("CSE.partition #%d:", pNum)
    74  			for _, v := range e {
    75  				fmt.Printf(" %s", v.String())
    76  			}
    77  			fmt.Printf("\n")
    78  		}
    79  		pNum++
    80  	}
    81  
    82  	// Split equivalence classes at points where they have
    83  	// non-equivalent arguments.  Repeat until we can't find any
    84  	// more splits.
    85  	var splitPoints []int
    86  	byArgClass := new(partitionByArgClass) // reuseable partitionByArgClass to reduce allocations
    87  	for {
    88  		changed := false
    89  
    90  		// partition can grow in the loop. By not using a range loop here,
    91  		// we process new additions as they arrive, avoiding O(n^2) behavior.
    92  		for i := 0; i < len(partition); i++ {
    93  			e := partition[i]
    94  
    95  			if opcodeTable[e[0].Op].commutative {
    96  				// Order the first two args before comparison.
    97  				for _, v := range e {
    98  					if valueEqClass[v.Args[0].ID] > valueEqClass[v.Args[1].ID] {
    99  						v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   100  					}
   101  				}
   102  			}
   103  
   104  			// Sort by eq class of arguments.
   105  			byArgClass.a = e
   106  			byArgClass.eqClass = valueEqClass
   107  			sort.Sort(byArgClass)
   108  
   109  			// Find split points.
   110  			splitPoints = append(splitPoints[:0], 0)
   111  			for j := 1; j < len(e); j++ {
   112  				v, w := e[j-1], e[j]
   113  				// Note: commutative args already correctly ordered by byArgClass.
   114  				eqArgs := true
   115  				for k, a := range v.Args {
   116  					b := w.Args[k]
   117  					if valueEqClass[a.ID] != valueEqClass[b.ID] {
   118  						eqArgs = false
   119  						break
   120  					}
   121  				}
   122  				if !eqArgs {
   123  					splitPoints = append(splitPoints, j)
   124  				}
   125  			}
   126  			if len(splitPoints) == 1 {
   127  				continue // no splits, leave equivalence class alone.
   128  			}
   129  
   130  			// Move another equivalence class down in place of e.
   131  			partition[i] = partition[len(partition)-1]
   132  			partition = partition[:len(partition)-1]
   133  			i--
   134  
   135  			// Add new equivalence classes for the parts of e we found.
   136  			splitPoints = append(splitPoints, len(e))
   137  			for j := 0; j < len(splitPoints)-1; j++ {
   138  				f := e[splitPoints[j]:splitPoints[j+1]]
   139  				if len(f) == 1 {
   140  					// Don't add singletons.
   141  					valueEqClass[f[0].ID] = -f[0].ID
   142  					continue
   143  				}
   144  				for _, v := range f {
   145  					valueEqClass[v.ID] = pNum
   146  				}
   147  				pNum++
   148  				partition = append(partition, f)
   149  			}
   150  			changed = true
   151  		}
   152  
   153  		if !changed {
   154  			break
   155  		}
   156  	}
   157  
   158  	sdom := f.sdom()
   159  
   160  	// Compute substitutions we would like to do. We substitute v for w
   161  	// if v and w are in the same equivalence class and v dominates w.
   162  	rewrite := make([]*Value, f.NumValues())
   163  	byDom := new(partitionByDom) // reusable partitionByDom to reduce allocs
   164  	for _, e := range partition {
   165  		byDom.a = e
   166  		byDom.sdom = sdom
   167  		sort.Sort(byDom)
   168  		for i := 0; i < len(e)-1; i++ {
   169  			// e is sorted by domorder, so a maximal dominant element is first in the slice
   170  			v := e[i]
   171  			if v == nil {
   172  				continue
   173  			}
   174  
   175  			e[i] = nil
   176  			// Replace all elements of e which v dominates
   177  			for j := i + 1; j < len(e); j++ {
   178  				w := e[j]
   179  				if w == nil {
   180  					continue
   181  				}
   182  				if sdom.isAncestorEq(v.Block, w.Block) {
   183  					rewrite[w.ID] = v
   184  					e[j] = nil
   185  				} else {
   186  					// e is sorted by domorder, so v.Block doesn't dominate any subsequent blocks in e
   187  					break
   188  				}
   189  			}
   190  		}
   191  	}
   192  
   193  	// if we rewrite a tuple generator to a new one in a different block,
   194  	// copy its selectors to the new generator's block, so tuple generator
   195  	// and selectors stay together.
   196  	// be careful not to copy same selectors more than once (issue 16741).
   197  	copiedSelects := make(map[ID][]*Value)
   198  	for _, b := range f.Blocks {
   199  	out:
   200  		for _, v := range b.Values {
   201  			// New values are created when selectors are copied to
   202  			// a new block. We can safely ignore those new values,
   203  			// since they have already been copied (issue 17918).
   204  			if int(v.ID) >= len(rewrite) || rewrite[v.ID] != nil {
   205  				continue
   206  			}
   207  			if v.Op != OpSelect0 && v.Op != OpSelect1 {
   208  				continue
   209  			}
   210  			if !v.Args[0].Type.IsTuple() {
   211  				f.Fatalf("arg of tuple selector %s is not a tuple: %s", v.String(), v.Args[0].LongString())
   212  			}
   213  			t := rewrite[v.Args[0].ID]
   214  			if t != nil && t.Block != b {
   215  				// v.Args[0] is tuple generator, CSE'd into a different block as t, v is left behind
   216  				for _, c := range copiedSelects[t.ID] {
   217  					if v.Op == c.Op {
   218  						// an equivalent selector is already copied
   219  						rewrite[v.ID] = c
   220  						continue out
   221  					}
   222  				}
   223  				c := v.copyInto(t.Block)
   224  				rewrite[v.ID] = c
   225  				copiedSelects[t.ID] = append(copiedSelects[t.ID], c)
   226  			}
   227  		}
   228  	}
   229  
   230  	rewrites := int64(0)
   231  
   232  	// Apply substitutions
   233  	for _, b := range f.Blocks {
   234  		for _, v := range b.Values {
   235  			for i, w := range v.Args {
   236  				if x := rewrite[w.ID]; x != nil {
   237  					if w.Pos.IsStmt() == src.PosIsStmt {
   238  						// about to lose a statement marker, w
   239  						// w is an input to v; if they're in the same block
   240  						// and the same line, v is a good-enough new statement boundary.
   241  						if w.Block == v.Block && w.Pos.Line() == v.Pos.Line() {
   242  							v.Pos = v.Pos.WithIsStmt()
   243  							w.Pos = w.Pos.WithNotStmt()
   244  						} // TODO and if this fails?
   245  					}
   246  					v.SetArg(i, x)
   247  					rewrites++
   248  				}
   249  			}
   250  		}
   251  		if v := b.Control; v != nil {
   252  			if x := rewrite[v.ID]; x != nil {
   253  				if v.Op == OpNilCheck {
   254  					// nilcheck pass will remove the nil checks and log
   255  					// them appropriately, so don't mess with them here.
   256  					continue
   257  				}
   258  				b.SetControl(x)
   259  			}
   260  		}
   261  	}
   262  	if f.pass.stats > 0 {
   263  		f.LogStat("CSE REWRITES", rewrites)
   264  	}
   265  }
   266  
   267  // An eqclass approximates an equivalence class. During the
   268  // algorithm it may represent the union of several of the
   269  // final equivalence classes.
   270  type eqclass []*Value
   271  
   272  // partitionValues partitions the values into equivalence classes
   273  // based on having all the following features match:
   274  //  - opcode
   275  //  - type
   276  //  - auxint
   277  //  - aux
   278  //  - nargs
   279  //  - block # if a phi op
   280  //  - first two arg's opcodes and auxint
   281  //  - NOT first two arg's aux; that can break CSE.
   282  // partitionValues returns a list of equivalence classes, each
   283  // being a sorted by ID list of *Values. The eqclass slices are
   284  // backed by the same storage as the input slice.
   285  // Equivalence classes of size 1 are ignored.
   286  func partitionValues(a []*Value, auxIDs auxmap) []eqclass {
   287  	sort.Sort(sortvalues{a, auxIDs})
   288  
   289  	var partition []eqclass
   290  	for len(a) > 0 {
   291  		v := a[0]
   292  		j := 1
   293  		for ; j < len(a); j++ {
   294  			w := a[j]
   295  			if cmpVal(v, w, auxIDs) != types.CMPeq {
   296  				break
   297  			}
   298  		}
   299  		if j > 1 {
   300  			partition = append(partition, a[:j])
   301  		}
   302  		a = a[j:]
   303  	}
   304  
   305  	return partition
   306  }
   307  func lt2Cmp(isLt bool) types.Cmp {
   308  	if isLt {
   309  		return types.CMPlt
   310  	}
   311  	return types.CMPgt
   312  }
   313  
   314  type auxmap map[interface{}]int32
   315  
   316  func cmpVal(v, w *Value, auxIDs auxmap) types.Cmp {
   317  	// Try to order these comparison by cost (cheaper first)
   318  	if v.Op != w.Op {
   319  		return lt2Cmp(v.Op < w.Op)
   320  	}
   321  	if v.AuxInt != w.AuxInt {
   322  		return lt2Cmp(v.AuxInt < w.AuxInt)
   323  	}
   324  	if len(v.Args) != len(w.Args) {
   325  		return lt2Cmp(len(v.Args) < len(w.Args))
   326  	}
   327  	if v.Op == OpPhi && v.Block != w.Block {
   328  		return lt2Cmp(v.Block.ID < w.Block.ID)
   329  	}
   330  	if v.Type.IsMemory() {
   331  		// We will never be able to CSE two values
   332  		// that generate memory.
   333  		return lt2Cmp(v.ID < w.ID)
   334  	}
   335  	// OpSelect is a pseudo-op. We need to be more aggressive
   336  	// regarding CSE to keep multiple OpSelect's of the same
   337  	// argument from existing.
   338  	if v.Op != OpSelect0 && v.Op != OpSelect1 {
   339  		if tc := v.Type.Compare(w.Type); tc != types.CMPeq {
   340  			return tc
   341  		}
   342  	}
   343  
   344  	if v.Aux != w.Aux {
   345  		if v.Aux == nil {
   346  			return types.CMPlt
   347  		}
   348  		if w.Aux == nil {
   349  			return types.CMPgt
   350  		}
   351  		return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux])
   352  	}
   353  
   354  	return types.CMPeq
   355  }
   356  
   357  // Sort values to make the initial partition.
   358  type sortvalues struct {
   359  	a      []*Value // array of values
   360  	auxIDs auxmap   // aux -> aux ID map
   361  }
   362  
   363  func (sv sortvalues) Len() int      { return len(sv.a) }
   364  func (sv sortvalues) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
   365  func (sv sortvalues) Less(i, j int) bool {
   366  	v := sv.a[i]
   367  	w := sv.a[j]
   368  	if cmp := cmpVal(v, w, sv.auxIDs); cmp != types.CMPeq {
   369  		return cmp == types.CMPlt
   370  	}
   371  
   372  	// Sort by value ID last to keep the sort result deterministic.
   373  	return v.ID < w.ID
   374  }
   375  
   376  type partitionByDom struct {
   377  	a    []*Value // array of values
   378  	sdom SparseTree
   379  }
   380  
   381  func (sv partitionByDom) Len() int      { return len(sv.a) }
   382  func (sv partitionByDom) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
   383  func (sv partitionByDom) Less(i, j int) bool {
   384  	v := sv.a[i]
   385  	w := sv.a[j]
   386  	return sv.sdom.domorder(v.Block) < sv.sdom.domorder(w.Block)
   387  }
   388  
   389  type partitionByArgClass struct {
   390  	a       []*Value // array of values
   391  	eqClass []ID     // equivalence class IDs of values
   392  }
   393  
   394  func (sv partitionByArgClass) Len() int      { return len(sv.a) }
   395  func (sv partitionByArgClass) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] }
   396  func (sv partitionByArgClass) Less(i, j int) bool {
   397  	v := sv.a[i]
   398  	w := sv.a[j]
   399  	for i, a := range v.Args {
   400  		b := w.Args[i]
   401  		if sv.eqClass[a.ID] < sv.eqClass[b.ID] {
   402  			return true
   403  		}
   404  		if sv.eqClass[a.ID] > sv.eqClass[b.ID] {
   405  			return false
   406  		}
   407  	}
   408  	return false
   409  }
   410  

View as plain text