// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package ssa import "cmd/internal/src" // branchelim tries to eliminate branches by // generating CondSelect instructions. // // Search for basic blocks that look like // // bb0 bb0 // | \ / \ // | bb1 or bb1 bb2 <- trivial if/else blocks // | / \ / // bb2 bb3 // // where the intermediate blocks are mostly empty (with no side-effects); // rewrite Phis in the postdominator as CondSelects. func branchelim(f *Func) { // FIXME: add support for lowering CondSelects on more architectures switch f.Config.arch { case "arm64", "ppc64le", "ppc64", "amd64", "wasm", "loong64": // implemented default: return } // Find all the values used in computing the address of any load. // Typically these values have operations like AddPtr, Lsh64x64, etc. loadAddr := f.newSparseSet(f.NumValues()) defer f.retSparseSet(loadAddr) for _, b := range f.Blocks { for _, v := range b.Values { switch v.Op { case OpLoad, OpAtomicLoad8, OpAtomicLoad32, OpAtomicLoad64, OpAtomicLoadPtr, OpAtomicLoadAcq32, OpAtomicLoadAcq64: loadAddr.add(v.Args[0].ID) case OpMove: loadAddr.add(v.Args[1].ID) } } } po := f.postorder() for { n := loadAddr.size() for _, b := range po { for i := len(b.Values) - 1; i >= 0; i-- { v := b.Values[i] if !loadAddr.contains(v.ID) { continue } for _, a := range v.Args { if a.Type.IsInteger() || a.Type.IsPtr() || a.Type.IsUnsafePtr() { loadAddr.add(a.ID) } } } } if loadAddr.size() == n { break } } change := true for change { change = false for _, b := range f.Blocks { change = elimIf(f, loadAddr, b) || elimIfElse(f, loadAddr, b) || change } } } func canCondSelect(v *Value, arch string, loadAddr *sparseSet) bool { if loadAddr.contains(v.ID) { // The result of the soon-to-be conditional move is used to compute a load address. // We want to avoid generating a conditional move in this case // because the load address would now be data-dependent on the condition. // Previously it would only be control-dependent on the condition, which is faster // if the branch predicts well (or possibly even if it doesn't, if the load will // be an expensive cache miss). // See issue #26306. return false } if arch == "loong64" { // We should not generate conditional moves if neither of the arguments is constant zero, // because it requires three instructions (OR, MASKEQZ, MASKNEZ) and will increase the // register pressure. if !(v.Args[0].isGenericIntConst() && v.Args[0].AuxInt == 0) && !(v.Args[1].isGenericIntConst() && v.Args[1].AuxInt == 0) { return false } } // For now, stick to simple scalars that fit in registers switch { case v.Type.Size() > v.Block.Func.Config.RegSize: return false case v.Type.IsPtrShaped(): return true case v.Type.IsInteger(): if arch == "amd64" && v.Type.Size() < 2 { // amd64 doesn't support CMOV with byte registers return false } return true default: return false } } // elimIf converts the one-way branch starting at dom in f to a conditional move if possible. // loadAddr is a set of values which are used to compute the address of a load. // Those values are exempt from CMOV generation. func elimIf(f *Func, loadAddr *sparseSet, dom *Block) bool { // See if dom is an If with one arm that // is trivial and succeeded by the other // successor of dom. if dom.Kind != BlockIf || dom.Likely != BranchUnknown { return false } var simple, post *Block for i := range dom.Succs { bb, other := dom.Succs[i].Block(), dom.Succs[i^1].Block() if isLeafPlain(bb) && bb.Succs[0].Block() == other { simple = bb post = other break } } if simple == nil || len(post.Preds) != 2 || post == dom { return false } // We've found our diamond CFG of blocks. // Now decide if fusing 'simple' into dom+post // looks profitable. // Check that there are Phis, and that all of them // can be safely rewritten to CondSelect. hasphis := false for _, v := range post.Values { if v.Op == OpPhi { hasphis = true if !canCondSelect(v, f.Config.arch, loadAddr) { return false } } } if !hasphis { return false } // Pick some upper bound for the number of instructions // we'd be willing to execute just to generate a dead // argument to CondSelect. In the worst case, this is // the number of useless instructions executed. const maxfuseinsts = 2 if len(simple.Values) > maxfuseinsts || !canSpeculativelyExecute(simple) { return false } // Replace Phi instructions in b with CondSelect instructions swap := (post.Preds[0].Block() == dom) != (dom.Succs[0].Block() == post) for _, v := range post.Values { if v.Op != OpPhi { continue } v.Op = OpCondSelect if swap { v.Args[0], v.Args[1] = v.Args[1], v.Args[0] } v.AddArg(dom.Controls[0]) } // Put all of the instructions into 'dom' // and update the CFG appropriately. dom.Kind = post.Kind dom.CopyControls(post) dom.Aux = post.Aux dom.Succs = append(dom.Succs[:0], post.Succs...) for i := range dom.Succs { e := dom.Succs[i] e.b.Preds[e.i].b = dom } // Try really hard to preserve statement marks attached to blocks. simplePos := simple.Pos postPos := post.Pos simpleStmt := simplePos.IsStmt() == src.PosIsStmt postStmt := postPos.IsStmt() == src.PosIsStmt for _, v := range simple.Values { v.Block = dom } for _, v := range post.Values { v.Block = dom } // findBlockPos determines if b contains a stmt-marked value // that has the same line number as the Pos for b itself. // (i.e. is the position on b actually redundant?) findBlockPos := func(b *Block) bool { pos := b.Pos for _, v := range b.Values { // See if there is a stmt-marked value already that matches simple.Pos (and perhaps post.Pos) if pos.SameFileAndLine(v.Pos) && v.Pos.IsStmt() == src.PosIsStmt { return true } } return false } if simpleStmt { simpleStmt = !findBlockPos(simple) if !simpleStmt && simplePos.SameFileAndLine(postPos) { postStmt = false } } if postStmt { postStmt = !findBlockPos(post) } // If simpleStmt and/or postStmt are still true, then try harder // to find the corresponding statement marks new homes. // setBlockPos determines if b contains a can-be-statement value // that has the same line number as the Pos for b itself, and // puts a statement mark on it, and returns whether it succeeded // in this operation. setBlockPos := func(b *Block) bool { pos := b.Pos for _, v := range b.Values { if pos.SameFileAndLine(v.Pos) && !isPoorStatementOp(v.Op) { v.Pos = v.Pos.WithIsStmt() return true } } return false } // If necessary and possible, add a mark to a value in simple if simpleStmt { if setBlockPos(simple) && simplePos.SameFileAndLine(postPos) { postStmt = false } } // If necessary and possible, add a mark to a value in post if postStmt { postStmt = !setBlockPos(post) } // Before giving up (this was added because it helps), try the end of "dom", and if that is not available, // try the values in the successor block if it is uncomplicated. if postStmt { if dom.Pos.IsStmt() != src.PosIsStmt { dom.Pos = postPos } else { // Try the successor block if len(dom.Succs) == 1 && len(dom.Succs[0].Block().Preds) == 1 { succ := dom.Succs[0].Block() for _, v := range succ.Values { if isPoorStatementOp(v.Op) { continue } if postPos.SameFileAndLine(v.Pos) { v.Pos = v.Pos.WithIsStmt() } postStmt = false break } // If postStmt still true, tag the block itself if possible if postStmt && succ.Pos.IsStmt() != src.PosIsStmt { succ.Pos = postPos } } } } dom.Values = append(dom.Values, simple.Values...) dom.Values = append(dom.Values, post.Values...) // Trash 'post' and 'simple' clobberBlock(post) clobberBlock(simple) f.invalidateCFG() return true } // is this a BlockPlain with one predecessor? func isLeafPlain(b *Block) bool { return b.Kind == BlockPlain && len(b.Preds) == 1 } func clobberBlock(b *Block) { b.Values = nil b.Preds = nil b.Succs = nil b.Aux = nil b.ResetControls() b.Likely = BranchUnknown b.Kind = BlockInvalid } // elimIfElse converts the two-way branch starting at dom in f to a conditional move if possible. // loadAddr is a set of values which are used to compute the address of a load. // Those values are exempt from CMOV generation. func elimIfElse(f *Func, loadAddr *sparseSet, b *Block) bool { // See if 'b' ends in an if/else: it should // have two successors, both of which are BlockPlain // and succeeded by the same block. if b.Kind != BlockIf || b.Likely != BranchUnknown { return false } yes, no := b.Succs[0].Block(), b.Succs[1].Block() if !isLeafPlain(yes) || len(yes.Values) > 1 || !canSpeculativelyExecute(yes) { return false } if !isLeafPlain(no) || len(no.Values) > 1 || !canSpeculativelyExecute(no) { return false } if b.Succs[0].Block().Succs[0].Block() != b.Succs[1].Block().Succs[0].Block() { return false } // block that postdominates the if/else post := b.Succs[0].Block().Succs[0].Block() if len(post.Preds) != 2 || post == b { return false } hasphis := false for _, v := range post.Values { if v.Op == OpPhi { hasphis = true if !canCondSelect(v, f.Config.arch, loadAddr) { return false } } } if !hasphis { return false } // Don't generate CondSelects if branch is cheaper. if !shouldElimIfElse(no, yes, post, f.Config.arch) { return false } // now we're committed: rewrite each Phi as a CondSelect swap := post.Preds[0].Block() != b.Succs[0].Block() for _, v := range post.Values { if v.Op != OpPhi { continue } v.Op = OpCondSelect if swap { v.Args[0], v.Args[1] = v.Args[1], v.Args[0] } v.AddArg(b.Controls[0]) } // Move the contents of all of these // blocks into 'b' and update CFG edges accordingly b.Kind = post.Kind b.CopyControls(post) b.Aux = post.Aux b.Succs = append(b.Succs[:0], post.Succs...) for i := range b.Succs { e := b.Succs[i] e.b.Preds[e.i].b = b } for i := range post.Values { post.Values[i].Block = b } for i := range yes.Values { yes.Values[i].Block = b } for i := range no.Values { no.Values[i].Block = b } b.Values = append(b.Values, yes.Values...) b.Values = append(b.Values, no.Values...) b.Values = append(b.Values, post.Values...) // trash post, yes, and no clobberBlock(yes) clobberBlock(no) clobberBlock(post) f.invalidateCFG() return true } // shouldElimIfElse reports whether estimated cost of eliminating branch // is lower than threshold. func shouldElimIfElse(no, yes, post *Block, arch string) bool { switch arch { default: return true case "amd64": const maxcost = 2 phi := 0 other := 0 for _, v := range post.Values { if v.Op == OpPhi { // Each phi results in CondSelect, which lowers into CMOV, // CMOV has latency >1 on most CPUs. phi++ } for _, x := range v.Args { if x.Block == no || x.Block == yes { other++ } } } cost := phi * 1 if phi > 1 { // If we have more than 1 phi and some values in post have args // in yes or no blocks, we may have to recalculate condition, because // those args may clobber flags. For now assume that all operations clobber flags. cost += other * 1 } return cost < maxcost } } // canSpeculativelyExecute reports whether every value in the block can // be evaluated without causing any observable side effects (memory // accesses, panics and so on) except for execution time changes. It // also ensures that the block does not contain any phis which we can't // speculatively execute. // Warning: this function cannot currently detect values that represent // instructions the execution of which need to be guarded with CPU // hardware feature checks. See issue #34950. func canSpeculativelyExecute(b *Block) bool { // don't fuse memory ops, Phi ops, divides (can panic), // or anything else with side-effects for _, v := range b.Values { if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() || v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects { return false } } return true } func isDivMod(op Op) bool { switch op { case OpDiv8, OpDiv8u, OpDiv16, OpDiv16u, OpDiv32, OpDiv32u, OpDiv64, OpDiv64u, OpDiv128u, OpDiv32F, OpDiv64F, OpMod8, OpMod8u, OpMod16, OpMod16u, OpMod32, OpMod32u, OpMod64, OpMod64u: return true default: return false } } func isPtrArithmetic(op Op) bool { // Pointer arithmetic can't be speculatively executed because the result // may be an invalid pointer (if, for example, the condition is that the // base pointer is not nil). See issue 56990. switch op { case OpOffPtr, OpAddPtr, OpSubPtr: return true default: return false } }