...
Run Format

Source file src/cmd/compile/internal/ssa/branchelim.go

Documentation: cmd/compile/internal/ssa

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  // branchelim tries to eliminate branches by
     8  // generating CondSelect instructions.
     9  //
    10  // Search for basic blocks that look like
    11  //
    12  // bb0            bb0
    13  //  | \          /   \
    14  //  | bb1  or  bb1   bb2    <- trivial if/else blocks
    15  //  | /          \   /
    16  // bb2            bb3
    17  //
    18  // where the intermediate blocks are mostly empty (with no side-effects);
    19  // rewrite Phis in the postdominator as CondSelects.
    20  func branchelim(f *Func) {
    21  	// FIXME: add support for lowering CondSelects on more architectures
    22  	switch f.Config.arch {
    23  	case "arm64", "amd64":
    24  		// implemented
    25  	default:
    26  		return
    27  	}
    28  
    29  	// Find all the values used in computing the address of any load.
    30  	// Typically these values have operations like AddPtr, Lsh64x64, etc.
    31  	loadAddr := f.newSparseSet(f.NumValues())
    32  	defer f.retSparseSet(loadAddr)
    33  	for _, b := range f.Blocks {
    34  		for _, v := range b.Values {
    35  			switch v.Op {
    36  			case OpLoad, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32:
    37  				loadAddr.add(v.Args[0].ID)
    38  			case OpMove:
    39  				loadAddr.add(v.Args[1].ID)
    40  			}
    41  		}
    42  	}
    43  	po := f.postorder()
    44  	for {
    45  		n := loadAddr.size()
    46  		for _, b := range po {
    47  			for i := len(b.Values) - 1; i >= 0; i-- {
    48  				v := b.Values[i]
    49  				if !loadAddr.contains(v.ID) {
    50  					continue
    51  				}
    52  				for _, a := range v.Args {
    53  					if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() {
    54  						loadAddr.add(a.ID)
    55  					}
    56  				}
    57  			}
    58  		}
    59  		if loadAddr.size() == n {
    60  			break
    61  		}
    62  	}
    63  
    64  	change := true
    65  	for change {
    66  		change = false
    67  		for _, b := range f.Blocks {
    68  			change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change
    69  		}
    70  	}
    71  }
    72  
    73  func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool {
    74  	if loadAddr.contains(v.ID) {
    75  		// The result of the soon-to-be conditional move is used to compute a load address.
    76  		// We want to avoid generating a conditional move in this case
    77  		// because the load address would now be data-dependent on the condition.
    78  		// Previously it would only be control-dependent on the condition, which is faster
    79  		// if the branch predicts well (or possibly even if it doesn't, if the load will
    80  		// be an expensive cache miss).
    81  		// See issue #26306.
    82  		return false
    83  	}
    84  	// For now, stick to simple scalars that fit in registers
    85  	switch {
    86  	case v.Type.Size() > v.Block.Func.Config.RegSize:
    87  		return false
    88  	case v.Type.IsPtrShaped():
    89  		return true
    90  	case v.Type.IsInteger():
    91  		if arch == "amd64" && v.Type.Size() < 2 {
    92  			// amd64 doesn't support CMOV with byte registers
    93  			return false
    94  		}
    95  		return true
    96  	default:
    97  		return false
    98  	}
    99  }
   100  
   101  // elimIf converts the one-way branch starting at dom in f to a conditional move if possible.
   102  // loadAddr is a set of values which are used to compute the address of a load.
   103  // Those values are exempt from CMOV generation.
   104  func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool {
   105  	// See if dom is an If with one arm that
   106  	// is trivial and succeeded by the other
   107  	// successor of dom.
   108  	if dom.Kind != BlockIf || dom.Likely != BranchUnknown {
   109  		return false
   110  	}
   111  	var simple, post *Block
   112  	for i := range dom.Succs {
   113  		bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block()
   114  		if isLeafPlain(bb) && bb.Succs[0].Block() == other {
   115  			simple = bb
   116  			post = other
   117  			break
   118  		}
   119  	}
   120  	if simple == nil || len(post.Preds) != 2 || post == dom {
   121  		return false
   122  	}
   123  
   124  	// We've found our diamond CFG of blocks.
   125  	// Now decide if fusing 'simple' into dom+post
   126  	// looks profitable.
   127  
   128  	// Check that there are Phis, and that all of them
   129  	// can be safely rewritten to CondSelect.
   130  	hasphis := false
   131  	for _, v := range post.Values {
   132  		if v.Op == OpPhi {
   133  			hasphis = true
   134  			if !canCondSelect(v, f.Config.arch, loadAddr) {
   135  				return false
   136  			}
   137  		}
   138  	}
   139  	if !hasphis {
   140  		return false
   141  	}
   142  
   143  	// Pick some upper bound for the number of instructions
   144  	// we'd be willing to execute just to generate a dead
   145  	// argument to CondSelect. In the worst case, this is
   146  	// the number of useless instructions executed.
   147  	const maxfuseinsts = 2
   148  
   149  	if len(simple.Values) > maxfuseinsts || !allTrivial(simple) {
   150  		return false
   151  	}
   152  
   153  	// Replace Phi instructions in b with CondSelect instructions
   154  	swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post)
   155  	for _, v := range post.Values {
   156  		if v.Op != OpPhi {
   157  			continue
   158  		}
   159  		v.Op = OpCondSelect
   160  		if swap {
   161  			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   162  		}
   163  		v.AddArg(dom.Control)
   164  	}
   165  
   166  	// Put all of the instructions into 'dom'
   167  	// and update the CFG appropriately.
   168  	dom.Kind = post.Kind
   169  	dom.SetControl(post.Control)
   170  	dom.Aux = post.Aux
   171  	dom.Succs = append(dom.Succs[:0], post.Succs...)
   172  	for i := range dom.Succs {
   173  		e := dom.Succs[i]
   174  		e.b.Preds[e.i].b = dom
   175  	}
   176  
   177  	for i := range simple.Values {
   178  		simple.Values[i].Block = dom
   179  	}
   180  	for i := range post.Values {
   181  		post.Values[i].Block = dom
   182  	}
   183  	dom.Values = append(dom.Values, simple.Values...)
   184  	dom.Values = append(dom.Values, post.Values...)
   185  
   186  	// Trash 'post' and 'simple'
   187  	clobberBlock(post)
   188  	clobberBlock(simple)
   189  
   190  	f.invalidateCFG()
   191  	return true
   192  }
   193  
   194  // is this a BlockPlain with one predecessor?
   195  func isLeafPlain(b *Block) bool {
   196  	return b.Kind == BlockPlain && len(b.Preds) == 1
   197  }
   198  
   199  func clobberBlock(b *Block) {
   200  	b.Values = nil
   201  	b.Preds = nil
   202  	b.Succs = nil
   203  	b.Aux = nil
   204  	b.SetControl(nil)
   205  	b.Likely = BranchUnknown
   206  	b.Kind = BlockInvalid
   207  }
   208  
   209  // elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible.
   210  // loadAddr is a set of values which are used to compute the address of a load.
   211  // Those values are exempt from CMOV generation.
   212  func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool {
   213  	// See if 'b' ends in an if/else: it should
   214  	// have two successors, both of which are BlockPlain
   215  	// and succeeded by the same block.
   216  	if b.Kind != BlockIf || b.Likely != BranchUnknown {
   217  		return false
   218  	}
   219  	yes, no := b.Succs[0].Block(), b.Succs[1].Block()
   220  	if !isLeafPlain(yes) || len(yes.Values) > 1 || !allTrivial(yes) {
   221  		return false
   222  	}
   223  	if !isLeafPlain(no) || len(no.Values) > 1 || !allTrivial(no) {
   224  		return false
   225  	}
   226  	if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() {
   227  		return false
   228  	}
   229  	// block that postdominates the if/else
   230  	post := b.Succs[0].Block().Succs[0].Block()
   231  	if len(post.Preds) != 2 || post == b {
   232  		return false
   233  	}
   234  	hasphis := false
   235  	for _, v := range post.Values {
   236  		if v.Op == OpPhi {
   237  			hasphis = true
   238  			if !canCondSelect(v, f.Config.arch, loadAddr) {
   239  				return false
   240  			}
   241  		}
   242  	}
   243  	if !hasphis {
   244  		return false
   245  	}
   246  
   247  	// Don't generate CondSelects if branch is cheaper.
   248  	if !shouldElimIfElse(no, yes, post, f.Config.arch) {
   249  		return false
   250  	}
   251  
   252  	// now we're committed: rewrite each Phi as a CondSelect
   253  	swap := post.Preds[0].Block() != b.Succs[0].Block()
   254  	for _, v := range post.Values {
   255  		if v.Op != OpPhi {
   256  			continue
   257  		}
   258  		v.Op = OpCondSelect
   259  		if swap {
   260  			v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   261  		}
   262  		v.AddArg(b.Control)
   263  	}
   264  
   265  	// Move the contents of all of these
   266  	// blocks into 'b' and update CFG edges accordingly
   267  	b.Kind = post.Kind
   268  	b.SetControl(post.Control)
   269  	b.Aux = post.Aux
   270  	b.Succs = append(b.Succs[:0], post.Succs...)
   271  	for i := range b.Succs {
   272  		e := b.Succs[i]
   273  		e.b.Preds[e.i].b = b
   274  	}
   275  	for i := range post.Values {
   276  		post.Values[i].Block = b
   277  	}
   278  	for i := range yes.Values {
   279  		yes.Values[i].Block = b
   280  	}
   281  	for i := range no.Values {
   282  		no.Values[i].Block = b
   283  	}
   284  	b.Values = append(b.Values, yes.Values...)
   285  	b.Values = append(b.Values, no.Values...)
   286  	b.Values = append(b.Values, post.Values...)
   287  
   288  	// trash post, yes, and no
   289  	clobberBlock(yes)
   290  	clobberBlock(no)
   291  	clobberBlock(post)
   292  
   293  	f.invalidateCFG()
   294  	return true
   295  }
   296  
   297  // shouldElimIfElse reports whether estimated cost of eliminating branch
   298  // is lower than threshold.
   299  func shouldElimIfElse(no, yes, post *Block, arch string) bool {
   300  	switch arch {
   301  	default:
   302  		return true
   303  	case "amd64":
   304  		const maxcost = 2
   305  		phi := 0
   306  		other := 0
   307  		for _, v := range post.Values {
   308  			if v.Op == OpPhi {
   309  				// Each phi results in CondSelect, which lowers into CMOV,
   310  				// CMOV has latency >1 on most CPUs.
   311  				phi++
   312  			}
   313  			for _, x := range v.Args {
   314  				if x.Block == no || x.Block == yes {
   315  					other++
   316  				}
   317  			}
   318  		}
   319  		cost := phi * 1
   320  		if phi > 1 {
   321  			// If we have more than 1 phi and some values in post have args
   322  			// in yes or no blocks, we may have to recalucalte condition, because
   323  			// those args may clobber flags. For now assume that all operations clobber flags.
   324  			cost += other * 1
   325  		}
   326  		return cost < maxcost
   327  	}
   328  }
   329  
   330  func allTrivial(b *Block) bool {
   331  	// don't fuse memory ops, Phi ops, divides (can panic),
   332  	// or anything else with side-effects
   333  	for _, v := range b.Values {
   334  		if v.Op == OpPhi || isDivMod(v.Op) || v.Type.IsMemory() ||
   335  			v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
   336  			return false
   337  		}
   338  	}
   339  	return true
   340  }
   341  
   342  func isDivMod(op Op) bool {
   343  	switch op {
   344  	case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u,
   345  		OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u,
   346  		OpDiv32F, OpDiv64F,
   347  		OpMod8, OpMod8u, OpMod16, OpMod16u,
   348  		OpMod32, OpMod32u, OpMod64, OpMod64u:
   349  		return true
   350  	default:
   351  		return false
   352  	}
   353  }
   354  

View as plain text