Black Lives Matter. Support the Equal Justice Initiative.

Source file src/cmd/compile/internal/amd64/ggen.go

Documentation: cmd/compile/internal/amd64

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"cmd/compile/internal/gc"
     9  	"cmd/internal/obj"
    10  	"cmd/internal/obj/x86"
    11  	"cmd/internal/objabi"
    12  )
    13  
    14  // no floating point in note handlers on Plan 9
    15  var isPlan9 = objabi.GOOS == "plan9"
    16  
    17  // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
    18  // See runtime/mkduff.go.
    19  const (
    20  	dzBlocks    = 16 // number of MOV/ADD blocks
    21  	dzBlockLen  = 4  // number of clears per block
    22  	dzBlockSize = 19 // size of instructions in a single block
    23  	dzMovSize   = 4  // size of single MOV instruction w/ offset
    24  	dzLeaqSize  = 4  // size of single LEAQ instruction
    25  	dzClearStep = 16 // number of bytes cleared by each MOV instruction
    26  
    27  	dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
    28  	dzSize     = dzBlocks * dzBlockSize
    29  )
    30  
    31  // dzOff returns the offset for a jump into DUFFZERO.
    32  // b is the number of bytes to zero.
    33  func dzOff(b int64) int64 {
    34  	off := int64(dzSize)
    35  	off -= b / dzClearLen * dzBlockSize
    36  	tailLen := b % dzClearLen
    37  	if tailLen >= dzClearStep {
    38  		off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep)
    39  	}
    40  	return off
    41  }
    42  
    43  // duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
    44  // b is the number of bytes to zero.
    45  func dzDI(b int64) int64 {
    46  	tailLen := b % dzClearLen
    47  	if tailLen < dzClearStep {
    48  		return 0
    49  	}
    50  	tailSteps := tailLen / dzClearStep
    51  	return -dzClearStep * (dzBlockLen - tailSteps)
    52  }
    53  
    54  func zerorange(pp *gc.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
    55  	const (
    56  		ax = 1 << iota
    57  		x0
    58  	)
    59  
    60  	if cnt == 0 {
    61  		return p
    62  	}
    63  
    64  	if cnt%int64(gc.Widthreg) != 0 {
    65  		// should only happen with nacl
    66  		if cnt%int64(gc.Widthptr) != 0 {
    67  			gc.Fatalf("zerorange count not a multiple of widthptr %d", cnt)
    68  		}
    69  		if *state&ax == 0 {
    70  			p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
    71  			*state |= ax
    72  		}
    73  		p = pp.Appendpp(p, x86.AMOVL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off)
    74  		off += int64(gc.Widthptr)
    75  		cnt -= int64(gc.Widthptr)
    76  	}
    77  
    78  	if cnt == 8 {
    79  		if *state&ax == 0 {
    80  			p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
    81  			*state |= ax
    82  		}
    83  		p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_MEM, x86.REG_SP, off)
    84  	} else if !isPlan9 && cnt <= int64(8*gc.Widthreg) {
    85  		if *state&x0 == 0 {
    86  			p = pp.Appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0)
    87  			*state |= x0
    88  		}
    89  
    90  		for i := int64(0); i < cnt/16; i++ {
    91  			p = pp.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
    92  		}
    93  
    94  		if cnt%16 != 0 {
    95  			p = pp.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16))
    96  		}
    97  	} else if !isPlan9 && (cnt <= int64(128*gc.Widthreg)) {
    98  		if *state&x0 == 0 {
    99  			p = pp.Appendpp(p, x86.AXORPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_REG, x86.REG_X0, 0)
   100  			*state |= x0
   101  		}
   102  		p = pp.Appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
   103  		p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
   104  		p.To.Sym = gc.Duffzero
   105  
   106  		if cnt%16 != 0 {
   107  			p = pp.Appendpp(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X0, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
   108  		}
   109  	} else {
   110  		if *state&ax == 0 {
   111  			p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
   112  			*state |= ax
   113  		}
   114  
   115  		p = pp.Appendpp(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(gc.Widthreg), obj.TYPE_REG, x86.REG_CX, 0)
   116  		p = pp.Appendpp(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
   117  		p = pp.Appendpp(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   118  		p = pp.Appendpp(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
   119  	}
   120  
   121  	return p
   122  }
   123  
   124  func ginsnop(pp *gc.Progs) *obj.Prog {
   125  	// This is a hardware nop (1-byte 0x90) instruction,
   126  	// even though we describe it as an explicit XCHGL here.
   127  	// Particularly, this does not zero the high 32 bits
   128  	// like typical *L opcodes.
   129  	// (gas assembles "xchg %eax,%eax" to 0x87 0xc0, which
   130  	// does zero the high 32 bits.)
   131  	p := pp.Prog(x86.AXCHGL)
   132  	p.From.Type = obj.TYPE_REG
   133  	p.From.Reg = x86.REG_AX
   134  	p.To.Type = obj.TYPE_REG
   135  	p.To.Reg = x86.REG_AX
   136  	return p
   137  }
   138  

View as plain text