// Copyright 2015 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package ssa import "cmd/compile/internal/base" // tighten moves Values closer to the Blocks in which they are used. // This can reduce the amount of register spilling required, // if it doesn't also create more live values. // A Value can be moved to any block that // dominates all blocks in which it is used. func tighten(f *Func) { if base.Flag.N != 0 && len(f.Blocks) < 10000 { // Skip the optimization in -N mode, except for huge functions. // Too many values live across blocks can cause pathological // behavior in the register allocator (see issue 52180). return } canMove := f.Cache.allocBoolSlice(f.NumValues()) defer f.Cache.freeBoolSlice(canMove) // Compute the memory states of each block. startMem := f.Cache.allocValueSlice(f.NumBlocks()) defer f.Cache.freeValueSlice(startMem) endMem := f.Cache.allocValueSlice(f.NumBlocks()) defer f.Cache.freeValueSlice(endMem) memState(f, startMem, endMem) for _, b := range f.Blocks { for _, v := range b.Values { if v.Op.isLoweredGetClosurePtr() { // Must stay in the entry block. continue } switch v.Op { case OpPhi, OpArg, OpArgIntReg, OpArgFloatReg, OpSelect0, OpSelect1, OpSelectN: // Phis need to stay in their block. // Arg must stay in the entry block. // Tuple selectors must stay with the tuple generator. // SelectN is typically, ultimately, a register. continue } // Count arguments which will need a register. narg := 0 for _, a := range v.Args { // SP and SB are special registers and have no effect on // the allocation of general-purpose registers. if a.needRegister() && a.Op != OpSB && a.Op != OpSP { narg++ } } if narg >= 2 && !v.Type.IsFlags() { // Don't move values with more than one input, as that may // increase register pressure. // We make an exception for flags, as we want flag generators // moved next to uses (because we only have 1 flag register). continue } canMove[v.ID] = true } } // Build data structure for fast least-common-ancestor queries. lca := makeLCArange(f) // For each moveable value, record the block that dominates all uses found so far. target := f.Cache.allocBlockSlice(f.NumValues()) defer f.Cache.freeBlockSlice(target) // Grab loop information. // We use this to make sure we don't tighten a value into a (deeper) loop. idom := f.Idom() loops := f.loopnest() loops.calculateDepths() changed := true for changed { changed = false // Reset target for i := range target { target[i] = nil } // Compute target locations (for moveable values only). // target location = the least common ancestor of all uses in the dominator tree. for _, b := range f.Blocks { for _, v := range b.Values { for i, a := range v.Args { if !canMove[a.ID] { continue } use := b if v.Op == OpPhi { use = b.Preds[i].b } if target[a.ID] == nil { target[a.ID] = use } else { target[a.ID] = lca.find(target[a.ID], use) } } } for _, c := range b.ControlValues() { if !canMove[c.ID] { continue } if target[c.ID] == nil { target[c.ID] = b } else { target[c.ID] = lca.find(target[c.ID], b) } } } // If the target location is inside a loop, // move the target location up to just before the loop head. for _, b := range f.Blocks { origloop := loops.b2l[b.ID] for _, v := range b.Values { t := target[v.ID] if t == nil { continue } targetloop := loops.b2l[t.ID] for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) { t = idom[targetloop.header.ID] target[v.ID] = t targetloop = loops.b2l[t.ID] } } } // Move values to target locations. for _, b := range f.Blocks { for i := 0; i < len(b.Values); i++ { v := b.Values[i] t := target[v.ID] if t == nil || t == b { // v is not moveable, or is already in correct place. continue } if mem := v.MemoryArg(); mem != nil { if startMem[t.ID] != mem { // We can't move a value with a memory arg unless the target block // has that memory arg as its starting memory. continue } } if f.pass.debug > 0 { b.Func.Warnl(v.Pos, "%v is moved", v.Op) } // Move v to the block which dominates its uses. t.Values = append(t.Values, v) v.Block = t last := len(b.Values) - 1 b.Values[i] = b.Values[last] b.Values[last] = nil b.Values = b.Values[:last] changed = true i-- } } } } // phiTighten moves constants closer to phi users. // This pass avoids having lots of constants live for lots of the program. // See issue 16407. func phiTighten(f *Func) { for _, b := range f.Blocks { for _, v := range b.Values { if v.Op != OpPhi { continue } for i, a := range v.Args { if !a.rematerializeable() { continue // not a constant we can move around } if a.Block == b.Preds[i].b { continue // already in the right place } // Make a copy of a, put in predecessor block. v.SetArg(i, a.copyInto(b.Preds[i].b)) } } } } // memState computes the memory state at the beginning and end of each block of // the function. The memory state is represented by a value of mem type. // The returned result is stored in startMem and endMem, and endMem is nil for // blocks with no successors (Exit,Ret,RetJmp blocks). This algorithm is not // suitable for infinite loop blocks that do not contain any mem operations. // For example: // b1: // // (some values) // // plain -> b2 // b2: <- b1 b2 // Plain -> b2 // // Algorithm introduction: // 1. The start memory state of a block is InitMem, a Phi node of type mem or // an incoming memory value. // 2. The start memory state of a block is consistent with the end memory state // of its parent nodes. If the start memory state of a block is a Phi value, // then the end memory state of its parent nodes is consistent with the // corresponding argument value of the Phi node. // 3. The algorithm first obtains the memory state of some blocks in the tree // in the first step. Then floods the known memory state to other nodes in // the second step. func memState(f *Func, startMem, endMem []*Value) { // This slice contains the set of blocks that have had their startMem set but this // startMem value has not yet been propagated to the endMem of its predecessors changed := make([]*Block, 0) // First step, init the memory state of some blocks. for _, b := range f.Blocks { for _, v := range b.Values { var mem *Value if v.Op == OpPhi { if v.Type.IsMemory() { mem = v } } else if v.Op == OpInitMem { mem = v // This is actually not needed. } else if a := v.MemoryArg(); a != nil && a.Block != b { // The only incoming memory value doesn't belong to this block. mem = a } if mem != nil { if old := startMem[b.ID]; old != nil { if old == mem { continue } f.Fatalf("func %s, startMem[%v] has different values, old %v, new %v", f.Name, b, old, mem) } startMem[b.ID] = mem changed = append(changed, b) } } } // Second step, floods the known memory state of some blocks to others. for len(changed) != 0 { top := changed[0] changed = changed[1:] mem := startMem[top.ID] for i, p := range top.Preds { pb := p.b if endMem[pb.ID] != nil { continue } if mem.Op == OpPhi && mem.Block == top { endMem[pb.ID] = mem.Args[i] } else { endMem[pb.ID] = mem } if startMem[pb.ID] == nil { startMem[pb.ID] = endMem[pb.ID] changed = append(changed, pb) } } } }