Source file src/cmd/internal/obj/riscv/obj.go

     1  // Copyright © 2015 The Go Authors.  All rights reserved.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package riscv
    22  
    23  import (
    24  	"cmd/internal/obj"
    25  	"cmd/internal/objabi"
    26  	"cmd/internal/sys"
    27  	"fmt"
    28  	"internal/abi"
    29  	"log"
    30  	"math/bits"
    31  )
    32  
    33  func buildop(ctxt *obj.Link) {}
    34  
    35  func jalToSym(ctxt *obj.Link, p *obj.Prog, lr int16) {
    36  	switch p.As {
    37  	case obj.ACALL, obj.AJMP, obj.ARET, obj.ADUFFZERO, obj.ADUFFCOPY:
    38  	default:
    39  		ctxt.Diag("unexpected Prog in jalToSym: %v", p)
    40  		return
    41  	}
    42  
    43  	p.As = AJAL
    44  	p.Mark |= NEED_JAL_RELOC
    45  	p.From.Type = obj.TYPE_REG
    46  	p.From.Reg = lr
    47  	p.Reg = obj.REG_NONE
    48  }
    49  
    50  // progedit is called individually for each *obj.Prog. It normalizes instruction
    51  // formats and eliminates as many pseudo-instructions as possible.
    52  func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
    53  
    54  	// Expand binary instructions to ternary ones.
    55  	if p.Reg == obj.REG_NONE {
    56  		switch p.As {
    57  		case AADDI, ASLTI, ASLTIU, AANDI, AORI, AXORI, ASLLI, ASRLI, ASRAI,
    58  			AADDIW, ASLLIW, ASRLIW, ASRAIW, AADDW, ASUBW, ASLLW, ASRLW, ASRAW,
    59  			AADD, AAND, AOR, AXOR, ASLL, ASRL, ASUB, ASRA,
    60  			AMUL, AMULH, AMULHU, AMULHSU, AMULW, ADIV, ADIVU, ADIVW, ADIVUW,
    61  			AREM, AREMU, AREMW, AREMUW:
    62  			p.Reg = p.To.Reg
    63  		}
    64  	}
    65  
    66  	// Rewrite instructions with constant operands to refer to the immediate
    67  	// form of the instruction.
    68  	if p.From.Type == obj.TYPE_CONST {
    69  		switch p.As {
    70  		case AADD:
    71  			p.As = AADDI
    72  		case ASUB:
    73  			p.As, p.From.Offset = AADDI, -p.From.Offset
    74  		case ASLT:
    75  			p.As = ASLTI
    76  		case ASLTU:
    77  			p.As = ASLTIU
    78  		case AAND:
    79  			p.As = AANDI
    80  		case AOR:
    81  			p.As = AORI
    82  		case AXOR:
    83  			p.As = AXORI
    84  		case ASLL:
    85  			p.As = ASLLI
    86  		case ASRL:
    87  			p.As = ASRLI
    88  		case ASRA:
    89  			p.As = ASRAI
    90  		case AADDW:
    91  			p.As = AADDIW
    92  		case ASUBW:
    93  			p.As, p.From.Offset = AADDIW, -p.From.Offset
    94  		case ASLLW:
    95  			p.As = ASLLIW
    96  		case ASRLW:
    97  			p.As = ASRLIW
    98  		case ASRAW:
    99  			p.As = ASRAIW
   100  		}
   101  	}
   102  
   103  	switch p.As {
   104  	case obj.AJMP:
   105  		// Turn JMP into JAL ZERO or JALR ZERO.
   106  		p.From.Type = obj.TYPE_REG
   107  		p.From.Reg = REG_ZERO
   108  
   109  		switch p.To.Type {
   110  		case obj.TYPE_BRANCH:
   111  			p.As = AJAL
   112  		case obj.TYPE_MEM:
   113  			switch p.To.Name {
   114  			case obj.NAME_NONE:
   115  				p.As = AJALR
   116  			case obj.NAME_EXTERN, obj.NAME_STATIC:
   117  				// Handled in preprocess.
   118  			default:
   119  				ctxt.Diag("unsupported name %d for %v", p.To.Name, p)
   120  			}
   121  		default:
   122  			panic(fmt.Sprintf("unhandled type %+v", p.To.Type))
   123  		}
   124  
   125  	case obj.ACALL:
   126  		switch p.To.Type {
   127  		case obj.TYPE_MEM:
   128  			// Handled in preprocess.
   129  		case obj.TYPE_REG:
   130  			p.As = AJALR
   131  			p.From.Type = obj.TYPE_REG
   132  			p.From.Reg = REG_LR
   133  		default:
   134  			ctxt.Diag("unknown destination type %+v in CALL: %v", p.To.Type, p)
   135  		}
   136  
   137  	case obj.AUNDEF:
   138  		p.As = AEBREAK
   139  
   140  	case ASCALL:
   141  		// SCALL is the old name for ECALL.
   142  		p.As = AECALL
   143  
   144  	case ASBREAK:
   145  		// SBREAK is the old name for EBREAK.
   146  		p.As = AEBREAK
   147  
   148  	case AMOV:
   149  		if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == obj.REG_NONE && int64(int32(p.From.Offset)) != p.From.Offset {
   150  			ctz := bits.TrailingZeros64(uint64(p.From.Offset))
   151  			val := p.From.Offset >> ctz
   152  			if int64(int32(val)) == val {
   153  				// It's ok. We can handle constants with many trailing zeros.
   154  				break
   155  			}
   156  			// Put >32-bit constants in memory and load them.
   157  			p.From.Type = obj.TYPE_MEM
   158  			p.From.Sym = ctxt.Int64Sym(p.From.Offset)
   159  			p.From.Name = obj.NAME_EXTERN
   160  			p.From.Offset = 0
   161  		}
   162  	}
   163  }
   164  
   165  // addrToReg extracts the register from an Addr, handling special Addr.Names.
   166  func addrToReg(a obj.Addr) int16 {
   167  	switch a.Name {
   168  	case obj.NAME_PARAM, obj.NAME_AUTO:
   169  		return REG_SP
   170  	}
   171  	return a.Reg
   172  }
   173  
   174  // movToLoad converts a MOV mnemonic into the corresponding load instruction.
   175  func movToLoad(mnemonic obj.As) obj.As {
   176  	switch mnemonic {
   177  	case AMOV:
   178  		return ALD
   179  	case AMOVB:
   180  		return ALB
   181  	case AMOVH:
   182  		return ALH
   183  	case AMOVW:
   184  		return ALW
   185  	case AMOVBU:
   186  		return ALBU
   187  	case AMOVHU:
   188  		return ALHU
   189  	case AMOVWU:
   190  		return ALWU
   191  	case AMOVF:
   192  		return AFLW
   193  	case AMOVD:
   194  		return AFLD
   195  	default:
   196  		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
   197  	}
   198  }
   199  
   200  // movToStore converts a MOV mnemonic into the corresponding store instruction.
   201  func movToStore(mnemonic obj.As) obj.As {
   202  	switch mnemonic {
   203  	case AMOV:
   204  		return ASD
   205  	case AMOVB:
   206  		return ASB
   207  	case AMOVH:
   208  		return ASH
   209  	case AMOVW:
   210  		return ASW
   211  	case AMOVF:
   212  		return AFSW
   213  	case AMOVD:
   214  		return AFSD
   215  	default:
   216  		panic(fmt.Sprintf("%+v is not a MOV", mnemonic))
   217  	}
   218  }
   219  
   220  // markRelocs marks an obj.Prog that specifies a MOV pseudo-instruction and
   221  // requires relocation.
   222  func markRelocs(p *obj.Prog) {
   223  	switch p.As {
   224  	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
   225  		switch {
   226  		case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG:
   227  			switch p.From.Name {
   228  			case obj.NAME_EXTERN, obj.NAME_STATIC:
   229  				p.Mark |= NEED_PCREL_ITYPE_RELOC
   230  			}
   231  		case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG:
   232  			switch p.From.Name {
   233  			case obj.NAME_EXTERN, obj.NAME_STATIC:
   234  				p.Mark |= NEED_PCREL_ITYPE_RELOC
   235  			}
   236  		case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM:
   237  			switch p.To.Name {
   238  			case obj.NAME_EXTERN, obj.NAME_STATIC:
   239  				p.Mark |= NEED_PCREL_STYPE_RELOC
   240  			}
   241  		}
   242  	}
   243  }
   244  
   245  // InvertBranch inverts the condition of a conditional branch.
   246  func InvertBranch(as obj.As) obj.As {
   247  	switch as {
   248  	case ABEQ:
   249  		return ABNE
   250  	case ABEQZ:
   251  		return ABNEZ
   252  	case ABGE:
   253  		return ABLT
   254  	case ABGEU:
   255  		return ABLTU
   256  	case ABGEZ:
   257  		return ABLTZ
   258  	case ABGT:
   259  		return ABLE
   260  	case ABGTU:
   261  		return ABLEU
   262  	case ABGTZ:
   263  		return ABLEZ
   264  	case ABLE:
   265  		return ABGT
   266  	case ABLEU:
   267  		return ABGTU
   268  	case ABLEZ:
   269  		return ABGTZ
   270  	case ABLT:
   271  		return ABGE
   272  	case ABLTU:
   273  		return ABGEU
   274  	case ABLTZ:
   275  		return ABGEZ
   276  	case ABNE:
   277  		return ABEQ
   278  	case ABNEZ:
   279  		return ABEQZ
   280  	default:
   281  		panic("InvertBranch: not a branch")
   282  	}
   283  }
   284  
   285  // containsCall reports whether the symbol contains a CALL (or equivalent)
   286  // instruction. Must be called after progedit.
   287  func containsCall(sym *obj.LSym) bool {
   288  	// CALLs are CALL or JAL(R) with link register LR.
   289  	for p := sym.Func().Text; p != nil; p = p.Link {
   290  		switch p.As {
   291  		case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
   292  			return true
   293  		case AJAL, AJALR:
   294  			if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
   295  				return true
   296  			}
   297  		}
   298  	}
   299  
   300  	return false
   301  }
   302  
   303  // setPCs sets the Pc field in all instructions reachable from p.
   304  // It uses pc as the initial value and returns the next available pc.
   305  func setPCs(p *obj.Prog, pc int64) int64 {
   306  	for ; p != nil; p = p.Link {
   307  		p.Pc = pc
   308  		for _, ins := range instructionsForProg(p) {
   309  			pc += int64(ins.length())
   310  		}
   311  
   312  		if p.As == obj.APCALIGN {
   313  			alignedValue := p.From.Offset
   314  			v := pcAlignPadLength(pc, alignedValue)
   315  			pc += int64(v)
   316  		}
   317  	}
   318  	return pc
   319  }
   320  
   321  // stackOffset updates Addr offsets based on the current stack size.
   322  //
   323  // The stack looks like:
   324  // -------------------
   325  // |                 |
   326  // |      PARAMs     |
   327  // |                 |
   328  // |                 |
   329  // -------------------
   330  // |    Parent RA    |   SP on function entry
   331  // -------------------
   332  // |                 |
   333  // |                 |
   334  // |       AUTOs     |
   335  // |                 |
   336  // |                 |
   337  // -------------------
   338  // |        RA       |   SP during function execution
   339  // -------------------
   340  //
   341  // FixedFrameSize makes other packages aware of the space allocated for RA.
   342  //
   343  // A nicer version of this diagram can be found on slide 21 of the presentation
   344  // attached to https://golang.org/issue/16922#issuecomment-243748180.
   345  func stackOffset(a *obj.Addr, stacksize int64) {
   346  	switch a.Name {
   347  	case obj.NAME_AUTO:
   348  		// Adjust to the top of AUTOs.
   349  		a.Offset += stacksize
   350  	case obj.NAME_PARAM:
   351  		// Adjust to the bottom of PARAMs.
   352  		a.Offset += stacksize + 8
   353  	}
   354  }
   355  
   356  // preprocess generates prologue and epilogue code, computes PC-relative branch
   357  // and jump offsets, and resolves pseudo-registers.
   358  //
   359  // preprocess is called once per linker symbol.
   360  //
   361  // When preprocess finishes, all instructions in the symbol are either
   362  // concrete, real RISC-V instructions or directive pseudo-ops like TEXT,
   363  // PCDATA, and FUNCDATA.
   364  func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   365  	if cursym.Func().Text == nil || cursym.Func().Text.Link == nil {
   366  		return
   367  	}
   368  
   369  	// Generate the prologue.
   370  	text := cursym.Func().Text
   371  	if text.As != obj.ATEXT {
   372  		ctxt.Diag("preprocess: found symbol that does not start with TEXT directive")
   373  		return
   374  	}
   375  
   376  	stacksize := text.To.Offset
   377  	if stacksize == -8 {
   378  		// Historical way to mark NOFRAME.
   379  		text.From.Sym.Set(obj.AttrNoFrame, true)
   380  		stacksize = 0
   381  	}
   382  	if stacksize < 0 {
   383  		ctxt.Diag("negative frame size %d - did you mean NOFRAME?", stacksize)
   384  	}
   385  	if text.From.Sym.NoFrame() {
   386  		if stacksize != 0 {
   387  			ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", stacksize)
   388  		}
   389  	}
   390  
   391  	if !containsCall(cursym) {
   392  		text.From.Sym.Set(obj.AttrLeaf, true)
   393  		if stacksize == 0 {
   394  			// A leaf function with no locals has no frame.
   395  			text.From.Sym.Set(obj.AttrNoFrame, true)
   396  		}
   397  	}
   398  
   399  	// Save LR unless there is no frame.
   400  	if !text.From.Sym.NoFrame() {
   401  		stacksize += ctxt.Arch.FixedFrameSize
   402  	}
   403  
   404  	cursym.Func().Args = text.To.Val.(int32)
   405  	cursym.Func().Locals = int32(stacksize)
   406  
   407  	prologue := text
   408  
   409  	if !cursym.Func().Text.From.Sym.NoSplit() {
   410  		prologue = stacksplit(ctxt, prologue, cursym, newprog, stacksize) // emit split check
   411  	}
   412  
   413  	if stacksize != 0 {
   414  		prologue = ctxt.StartUnsafePoint(prologue, newprog)
   415  
   416  		// Actually save LR.
   417  		prologue = obj.Appendp(prologue, newprog)
   418  		prologue.As = AMOV
   419  		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   420  		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -stacksize}
   421  
   422  		// Insert stack adjustment.
   423  		prologue = obj.Appendp(prologue, newprog)
   424  		prologue.As = AADDI
   425  		prologue.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -stacksize}
   426  		prologue.Reg = REG_SP
   427  		prologue.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
   428  		prologue.Spadj = int32(stacksize)
   429  
   430  		prologue = ctxt.EndUnsafePoint(prologue, newprog, -1)
   431  
   432  		// On Linux, in a cgo binary we may get a SIGSETXID signal early on
   433  		// before the signal stack is set, as glibc doesn't allow us to block
   434  		// SIGSETXID. So a signal may land on the current stack and clobber
   435  		// the content below the SP. We store the LR again after the SP is
   436  		// decremented.
   437  		prologue = obj.Appendp(prologue, newprog)
   438  		prologue.As = AMOV
   439  		prologue.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   440  		prologue.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
   441  	}
   442  
   443  	if cursym.Func().Text.From.Sym.Wrapper() {
   444  		// if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
   445  		//
   446  		//   MOV g_panic(g), X5
   447  		//   BNE X5, ZERO, adjust
   448  		// end:
   449  		//   NOP
   450  		// ...rest of function..
   451  		// adjust:
   452  		//   MOV panic_argp(X5), X6
   453  		//   ADD $(autosize+FIXED_FRAME), SP, X7
   454  		//   BNE X6, X7, end
   455  		//   ADD $FIXED_FRAME, SP, X6
   456  		//   MOV X6, panic_argp(X5)
   457  		//   JMP end
   458  		//
   459  		// The NOP is needed to give the jumps somewhere to land.
   460  
   461  		ldpanic := obj.Appendp(prologue, newprog)
   462  
   463  		ldpanic.As = AMOV
   464  		ldpanic.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REGG, Offset: 4 * int64(ctxt.Arch.PtrSize)} // G.panic
   465  		ldpanic.Reg = obj.REG_NONE
   466  		ldpanic.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5}
   467  
   468  		bneadj := obj.Appendp(ldpanic, newprog)
   469  		bneadj.As = ABNE
   470  		bneadj.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X5}
   471  		bneadj.Reg = REG_ZERO
   472  		bneadj.To.Type = obj.TYPE_BRANCH
   473  
   474  		endadj := obj.Appendp(bneadj, newprog)
   475  		endadj.As = obj.ANOP
   476  
   477  		last := endadj
   478  		for last.Link != nil {
   479  			last = last.Link
   480  		}
   481  
   482  		getargp := obj.Appendp(last, newprog)
   483  		getargp.As = AMOV
   484  		getargp.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp
   485  		getargp.Reg = obj.REG_NONE
   486  		getargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
   487  
   488  		bneadj.To.SetTarget(getargp)
   489  
   490  		calcargp := obj.Appendp(getargp, newprog)
   491  		calcargp.As = AADDI
   492  		calcargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize + ctxt.Arch.FixedFrameSize}
   493  		calcargp.Reg = REG_SP
   494  		calcargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X7}
   495  
   496  		testargp := obj.Appendp(calcargp, newprog)
   497  		testargp.As = ABNE
   498  		testargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
   499  		testargp.Reg = REG_X7
   500  		testargp.To.Type = obj.TYPE_BRANCH
   501  		testargp.To.SetTarget(endadj)
   502  
   503  		adjargp := obj.Appendp(testargp, newprog)
   504  		adjargp.As = AADDI
   505  		adjargp.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(ctxt.Arch.PtrSize)}
   506  		adjargp.Reg = REG_SP
   507  		adjargp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
   508  
   509  		setargp := obj.Appendp(adjargp, newprog)
   510  		setargp.As = AMOV
   511  		setargp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_X6}
   512  		setargp.Reg = obj.REG_NONE
   513  		setargp.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_X5, Offset: 0} // Panic.argp
   514  
   515  		godone := obj.Appendp(setargp, newprog)
   516  		godone.As = AJAL
   517  		godone.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
   518  		godone.To.Type = obj.TYPE_BRANCH
   519  		godone.To.SetTarget(endadj)
   520  	}
   521  
   522  	// Update stack-based offsets.
   523  	for p := cursym.Func().Text; p != nil; p = p.Link {
   524  		stackOffset(&p.From, stacksize)
   525  		stackOffset(&p.To, stacksize)
   526  	}
   527  
   528  	// Additional instruction rewriting.
   529  	for p := cursym.Func().Text; p != nil; p = p.Link {
   530  		switch p.As {
   531  		case obj.AGETCALLERPC:
   532  			if cursym.Leaf() {
   533  				// MOV LR, Rd
   534  				p.As = AMOV
   535  				p.From.Type = obj.TYPE_REG
   536  				p.From.Reg = REG_LR
   537  			} else {
   538  				// MOV (RSP), Rd
   539  				p.As = AMOV
   540  				p.From.Type = obj.TYPE_MEM
   541  				p.From.Reg = REG_SP
   542  			}
   543  
   544  		case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
   545  			switch p.To.Type {
   546  			case obj.TYPE_MEM:
   547  				jalToSym(ctxt, p, REG_LR)
   548  			}
   549  
   550  		case obj.AJMP:
   551  			switch p.To.Type {
   552  			case obj.TYPE_MEM:
   553  				switch p.To.Name {
   554  				case obj.NAME_EXTERN, obj.NAME_STATIC:
   555  					jalToSym(ctxt, p, REG_ZERO)
   556  				}
   557  			}
   558  
   559  		case obj.ARET:
   560  			// Replace RET with epilogue.
   561  			retJMP := p.To.Sym
   562  
   563  			if stacksize != 0 {
   564  				// Restore LR.
   565  				p.As = AMOV
   566  				p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
   567  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   568  				p = obj.Appendp(p, newprog)
   569  
   570  				p.As = AADDI
   571  				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: stacksize}
   572  				p.Reg = REG_SP
   573  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
   574  				p.Spadj = int32(-stacksize)
   575  				p = obj.Appendp(p, newprog)
   576  			}
   577  
   578  			if retJMP != nil {
   579  				p.As = obj.ARET
   580  				p.To.Sym = retJMP
   581  				jalToSym(ctxt, p, REG_ZERO)
   582  			} else {
   583  				p.As = AJALR
   584  				p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
   585  				p.Reg = obj.REG_NONE
   586  				p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   587  			}
   588  
   589  			// "Add back" the stack removed in the previous instruction.
   590  			//
   591  			// This is to avoid confusing pctospadj, which sums
   592  			// Spadj from function entry to each PC, and shouldn't
   593  			// count adjustments from earlier epilogues, since they
   594  			// won't affect later PCs.
   595  			p.Spadj = int32(stacksize)
   596  
   597  		case AADDI:
   598  			// Refine Spadjs account for adjustment via ADDI instruction.
   599  			if p.To.Type == obj.TYPE_REG && p.To.Reg == REG_SP && p.From.Type == obj.TYPE_CONST {
   600  				p.Spadj = int32(-p.From.Offset)
   601  			}
   602  		}
   603  
   604  		if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 {
   605  			f := cursym.Func()
   606  			if f.FuncFlag&abi.FuncFlagSPWrite == 0 {
   607  				f.FuncFlag |= abi.FuncFlagSPWrite
   608  				if ctxt.Debugvlog || !ctxt.IsAsm {
   609  					ctxt.Logf("auto-SPWRITE: %s %v\n", cursym.Name, p)
   610  					if !ctxt.IsAsm {
   611  						ctxt.Diag("invalid auto-SPWRITE in non-assembly")
   612  						ctxt.DiagFlush()
   613  						log.Fatalf("bad SPWRITE")
   614  					}
   615  				}
   616  			}
   617  		}
   618  	}
   619  
   620  	var callCount int
   621  	for p := cursym.Func().Text; p != nil; p = p.Link {
   622  		markRelocs(p)
   623  		if p.Mark&NEED_JAL_RELOC == NEED_JAL_RELOC {
   624  			callCount++
   625  		}
   626  	}
   627  	const callTrampSize = 8 // 2 machine instructions.
   628  	maxTrampSize := int64(callCount * callTrampSize)
   629  
   630  	// Compute instruction addresses.  Once we do that, we need to check for
   631  	// overextended jumps and branches.  Within each iteration, Pc differences
   632  	// are always lower bounds (since the program gets monotonically longer,
   633  	// a fixed point will be reached).  No attempt to handle functions > 2GiB.
   634  	for {
   635  		big, rescan := false, false
   636  		maxPC := setPCs(cursym.Func().Text, 0)
   637  		if maxPC+maxTrampSize > (1 << 20) {
   638  			big = true
   639  		}
   640  
   641  		for p := cursym.Func().Text; p != nil; p = p.Link {
   642  			switch p.As {
   643  			case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
   644  				if p.To.Type != obj.TYPE_BRANCH {
   645  					panic("assemble: instruction with branch-like opcode lacks destination")
   646  				}
   647  				offset := p.To.Target().Pc - p.Pc
   648  				if offset < -4096 || 4096 <= offset {
   649  					// Branch is long.  Replace it with a jump.
   650  					jmp := obj.Appendp(p, newprog)
   651  					jmp.As = AJAL
   652  					jmp.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
   653  					jmp.To = obj.Addr{Type: obj.TYPE_BRANCH}
   654  					jmp.To.SetTarget(p.To.Target())
   655  
   656  					p.As = InvertBranch(p.As)
   657  					p.To.SetTarget(jmp.Link)
   658  
   659  					// We may have made previous branches too long,
   660  					// so recheck them.
   661  					rescan = true
   662  				}
   663  			case AJAL:
   664  				// Linker will handle the intersymbol case and trampolines.
   665  				if p.To.Target() == nil {
   666  					if !big {
   667  						break
   668  					}
   669  					// This function is going to be too large for JALs
   670  					// to reach trampolines. Replace with AUIPC+JALR.
   671  					jmp := obj.Appendp(p, newprog)
   672  					jmp.As = AJALR
   673  					jmp.From = p.From
   674  					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
   675  
   676  					p.As = AAUIPC
   677  					p.Mark = (p.Mark &^ NEED_JAL_RELOC) | NEED_CALL_RELOC
   678  					p.AddRestSource(obj.Addr{Type: obj.TYPE_CONST, Offset: p.To.Offset, Sym: p.To.Sym})
   679  					p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: 0}
   680  					p.Reg = obj.REG_NONE
   681  					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
   682  
   683  					rescan = true
   684  					break
   685  				}
   686  				offset := p.To.Target().Pc - p.Pc
   687  				if offset < -(1<<20) || (1<<20) <= offset {
   688  					// Replace with 2-instruction sequence. This assumes
   689  					// that TMP is not live across J instructions, since
   690  					// it is reserved by SSA.
   691  					jmp := obj.Appendp(p, newprog)
   692  					jmp.As = AJALR
   693  					jmp.From = p.From
   694  					jmp.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
   695  
   696  					// p.From is not generally valid, however will be
   697  					// fixed up in the next loop.
   698  					p.As = AAUIPC
   699  					p.From = obj.Addr{Type: obj.TYPE_BRANCH, Sym: p.From.Sym}
   700  					p.From.SetTarget(p.To.Target())
   701  					p.Reg = obj.REG_NONE
   702  					p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_TMP}
   703  
   704  					rescan = true
   705  				}
   706  			}
   707  		}
   708  
   709  		if !rescan {
   710  			break
   711  		}
   712  	}
   713  
   714  	// Now that there are no long branches, resolve branch and jump targets.
   715  	// At this point, instruction rewriting which changes the number of
   716  	// instructions will break everything--don't do it!
   717  	for p := cursym.Func().Text; p != nil; p = p.Link {
   718  		switch p.As {
   719  		case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
   720  			switch p.To.Type {
   721  			case obj.TYPE_BRANCH:
   722  				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
   723  			case obj.TYPE_MEM:
   724  				panic("unhandled type")
   725  			}
   726  
   727  		case AJAL:
   728  			// Linker will handle the intersymbol case and trampolines.
   729  			if p.To.Target() != nil {
   730  				p.To.Type, p.To.Offset = obj.TYPE_CONST, p.To.Target().Pc-p.Pc
   731  			}
   732  
   733  		case AAUIPC:
   734  			if p.From.Type == obj.TYPE_BRANCH {
   735  				low, high, err := Split32BitImmediate(p.From.Target().Pc - p.Pc)
   736  				if err != nil {
   737  					ctxt.Diag("%v: jump displacement %d too large", p, p.To.Target().Pc-p.Pc)
   738  				}
   739  				p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: high, Sym: cursym}
   740  				p.Link.To.Offset = low
   741  			}
   742  
   743  		case obj.APCALIGN:
   744  			alignedValue := p.From.Offset
   745  			if (alignedValue&(alignedValue-1) != 0) || 4 > alignedValue || alignedValue > 2048 {
   746  				ctxt.Diag("alignment value of an instruction must be a power of two and in the range [4, 2048], got %d\n", alignedValue)
   747  			}
   748  			// Update the current text symbol alignment value.
   749  			if int32(alignedValue) > cursym.Func().Align {
   750  				cursym.Func().Align = int32(alignedValue)
   751  			}
   752  		}
   753  	}
   754  
   755  	// Validate all instructions - this provides nice error messages.
   756  	for p := cursym.Func().Text; p != nil; p = p.Link {
   757  		for _, ins := range instructionsForProg(p) {
   758  			ins.validate(ctxt)
   759  		}
   760  	}
   761  }
   762  
   763  func pcAlignPadLength(pc int64, alignedValue int64) int {
   764  	return int(-pc & (alignedValue - 1))
   765  }
   766  
   767  func stacksplit(ctxt *obj.Link, p *obj.Prog, cursym *obj.LSym, newprog obj.ProgAlloc, framesize int64) *obj.Prog {
   768  	// Leaf function with no frame is effectively NOSPLIT.
   769  	if framesize == 0 {
   770  		return p
   771  	}
   772  
   773  	if ctxt.Flag_maymorestack != "" {
   774  		// Save LR and REGCTXT
   775  		const frameSize = 16
   776  		p = ctxt.StartUnsafePoint(p, newprog)
   777  
   778  		// Spill Arguments. This has to happen before we open
   779  		// any more frame space.
   780  		p = cursym.Func().SpillRegisterArgs(p, newprog)
   781  
   782  		// MOV LR, -16(SP)
   783  		p = obj.Appendp(p, newprog)
   784  		p.As = AMOV
   785  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   786  		p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: -frameSize}
   787  		// ADDI $-16, SP
   788  		p = obj.Appendp(p, newprog)
   789  		p.As = AADDI
   790  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: -frameSize}
   791  		p.Reg = REG_SP
   792  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
   793  		p.Spadj = frameSize
   794  		// MOV REGCTXT, 8(SP)
   795  		p = obj.Appendp(p, newprog)
   796  		p.As = AMOV
   797  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT}
   798  		p.To = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8}
   799  
   800  		// CALL maymorestack
   801  		p = obj.Appendp(p, newprog)
   802  		p.As = obj.ACALL
   803  		p.To.Type = obj.TYPE_BRANCH
   804  		// See ../x86/obj6.go
   805  		p.To.Sym = ctxt.LookupABI(ctxt.Flag_maymorestack, cursym.ABI())
   806  		jalToSym(ctxt, p, REG_X5)
   807  
   808  		// Restore LR and REGCTXT
   809  
   810  		// MOV 8(SP), REGCTXT
   811  		p = obj.Appendp(p, newprog)
   812  		p.As = AMOV
   813  		p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 8}
   814  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_CTXT}
   815  		// MOV (SP), LR
   816  		p = obj.Appendp(p, newprog)
   817  		p.As = AMOV
   818  		p.From = obj.Addr{Type: obj.TYPE_MEM, Reg: REG_SP, Offset: 0}
   819  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_LR}
   820  		// ADDI $16, SP
   821  		p = obj.Appendp(p, newprog)
   822  		p.As = AADDI
   823  		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: frameSize}
   824  		p.Reg = REG_SP
   825  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: REG_SP}
   826  		p.Spadj = -frameSize
   827  
   828  		// Unspill arguments
   829  		p = cursym.Func().UnspillRegisterArgs(p, newprog)
   830  		p = ctxt.EndUnsafePoint(p, newprog, -1)
   831  	}
   832  
   833  	// Jump back to here after morestack returns.
   834  	startPred := p
   835  
   836  	// MOV	g_stackguard(g), X6
   837  	p = obj.Appendp(p, newprog)
   838  	p.As = AMOV
   839  	p.From.Type = obj.TYPE_MEM
   840  	p.From.Reg = REGG
   841  	p.From.Offset = 2 * int64(ctxt.Arch.PtrSize) // G.stackguard0
   842  	if cursym.CFunc() {
   843  		p.From.Offset = 3 * int64(ctxt.Arch.PtrSize) // G.stackguard1
   844  	}
   845  	p.To.Type = obj.TYPE_REG
   846  	p.To.Reg = REG_X6
   847  
   848  	// Mark the stack bound check and morestack call async nonpreemptible.
   849  	// If we get preempted here, when resumed the preemption request is
   850  	// cleared, but we'll still call morestack, which will double the stack
   851  	// unnecessarily. See issue #35470.
   852  	p = ctxt.StartUnsafePoint(p, newprog)
   853  
   854  	var to_done, to_more *obj.Prog
   855  
   856  	if framesize <= abi.StackSmall {
   857  		// small stack
   858  		//	// if SP > stackguard { goto done }
   859  		//	BLTU	stackguard, SP, done
   860  		p = obj.Appendp(p, newprog)
   861  		p.As = ABLTU
   862  		p.From.Type = obj.TYPE_REG
   863  		p.From.Reg = REG_X6
   864  		p.Reg = REG_SP
   865  		p.To.Type = obj.TYPE_BRANCH
   866  		to_done = p
   867  	} else {
   868  		// large stack: SP-framesize < stackguard-StackSmall
   869  		offset := int64(framesize) - abi.StackSmall
   870  		if framesize > abi.StackBig {
   871  			// Such a large stack we need to protect against underflow.
   872  			// The runtime guarantees SP > objabi.StackBig, but
   873  			// framesize is large enough that SP-framesize may
   874  			// underflow, causing a direct comparison with the
   875  			// stack guard to incorrectly succeed. We explicitly
   876  			// guard against underflow.
   877  			//
   878  			//	MOV	$(framesize-StackSmall), X7
   879  			//	BLTU	SP, X7, label-of-call-to-morestack
   880  
   881  			p = obj.Appendp(p, newprog)
   882  			p.As = AMOV
   883  			p.From.Type = obj.TYPE_CONST
   884  			p.From.Offset = offset
   885  			p.To.Type = obj.TYPE_REG
   886  			p.To.Reg = REG_X7
   887  
   888  			p = obj.Appendp(p, newprog)
   889  			p.As = ABLTU
   890  			p.From.Type = obj.TYPE_REG
   891  			p.From.Reg = REG_SP
   892  			p.Reg = REG_X7
   893  			p.To.Type = obj.TYPE_BRANCH
   894  			to_more = p
   895  		}
   896  
   897  		// Check against the stack guard. We've ensured this won't underflow.
   898  		//	ADD	$-(framesize-StackSmall), SP, X7
   899  		//	// if X7 > stackguard { goto done }
   900  		//	BLTU	stackguard, X7, done
   901  		p = obj.Appendp(p, newprog)
   902  		p.As = AADDI
   903  		p.From.Type = obj.TYPE_CONST
   904  		p.From.Offset = -offset
   905  		p.Reg = REG_SP
   906  		p.To.Type = obj.TYPE_REG
   907  		p.To.Reg = REG_X7
   908  
   909  		p = obj.Appendp(p, newprog)
   910  		p.As = ABLTU
   911  		p.From.Type = obj.TYPE_REG
   912  		p.From.Reg = REG_X6
   913  		p.Reg = REG_X7
   914  		p.To.Type = obj.TYPE_BRANCH
   915  		to_done = p
   916  	}
   917  
   918  	// Spill the register args that could be clobbered by the
   919  	// morestack code
   920  	p = ctxt.EmitEntryStackMap(cursym, p, newprog)
   921  	p = cursym.Func().SpillRegisterArgs(p, newprog)
   922  
   923  	// CALL runtime.morestack(SB)
   924  	p = obj.Appendp(p, newprog)
   925  	p.As = obj.ACALL
   926  	p.To.Type = obj.TYPE_BRANCH
   927  
   928  	if cursym.CFunc() {
   929  		p.To.Sym = ctxt.Lookup("runtime.morestackc")
   930  	} else if !cursym.Func().Text.From.Sym.NeedCtxt() {
   931  		p.To.Sym = ctxt.Lookup("runtime.morestack_noctxt")
   932  	} else {
   933  		p.To.Sym = ctxt.Lookup("runtime.morestack")
   934  	}
   935  	if to_more != nil {
   936  		to_more.To.SetTarget(p)
   937  	}
   938  	jalToSym(ctxt, p, REG_X5)
   939  
   940  	// The instructions which unspill regs should be preemptible.
   941  	p = ctxt.EndUnsafePoint(p, newprog, -1)
   942  	p = cursym.Func().UnspillRegisterArgs(p, newprog)
   943  
   944  	// JMP start
   945  	p = obj.Appendp(p, newprog)
   946  	p.As = AJAL
   947  	p.To = obj.Addr{Type: obj.TYPE_BRANCH}
   948  	p.From = obj.Addr{Type: obj.TYPE_REG, Reg: REG_ZERO}
   949  	p.To.SetTarget(startPred.Link)
   950  
   951  	// placeholder for to_done's jump target
   952  	p = obj.Appendp(p, newprog)
   953  	p.As = obj.ANOP // zero-width place holder
   954  	to_done.To.SetTarget(p)
   955  
   956  	return p
   957  }
   958  
   959  // signExtend sign extends val starting at bit bit.
   960  func signExtend(val int64, bit uint) int64 {
   961  	return val << (64 - bit) >> (64 - bit)
   962  }
   963  
   964  // Split32BitImmediate splits a signed 32-bit immediate into a signed 20-bit
   965  // upper immediate and a signed 12-bit lower immediate to be added to the upper
   966  // result. For example, high may be used in LUI and low in a following ADDI to
   967  // generate a full 32-bit constant.
   968  func Split32BitImmediate(imm int64) (low, high int64, err error) {
   969  	if err := immIFits(imm, 32); err != nil {
   970  		return 0, 0, err
   971  	}
   972  
   973  	// Nothing special needs to be done if the immediate fits in 12 bits.
   974  	if err := immIFits(imm, 12); err == nil {
   975  		return imm, 0, nil
   976  	}
   977  
   978  	high = imm >> 12
   979  
   980  	// The bottom 12 bits will be treated as signed.
   981  	//
   982  	// If that will result in a negative 12 bit number, add 1 to
   983  	// our upper bits to adjust for the borrow.
   984  	//
   985  	// It is not possible for this increment to overflow. To
   986  	// overflow, the 20 top bits would be 1, and the sign bit for
   987  	// the low 12 bits would be set, in which case the entire 32
   988  	// bit pattern fits in a 12 bit signed value.
   989  	if imm&(1<<11) != 0 {
   990  		high++
   991  	}
   992  
   993  	low = signExtend(imm, 12)
   994  	high = signExtend(high, 20)
   995  
   996  	return low, high, nil
   997  }
   998  
   999  func regVal(r, min, max uint32) uint32 {
  1000  	if r < min || r > max {
  1001  		panic(fmt.Sprintf("register out of range, want %d <= %d <= %d", min, r, max))
  1002  	}
  1003  	return r - min
  1004  }
  1005  
  1006  // regI returns an integer register.
  1007  func regI(r uint32) uint32 {
  1008  	return regVal(r, REG_X0, REG_X31)
  1009  }
  1010  
  1011  // regF returns a float register.
  1012  func regF(r uint32) uint32 {
  1013  	return regVal(r, REG_F0, REG_F31)
  1014  }
  1015  
  1016  // regAddr extracts a register from an Addr.
  1017  func regAddr(a obj.Addr, min, max uint32) uint32 {
  1018  	if a.Type != obj.TYPE_REG {
  1019  		panic(fmt.Sprintf("ill typed: %+v", a))
  1020  	}
  1021  	return regVal(uint32(a.Reg), min, max)
  1022  }
  1023  
  1024  // regIAddr extracts the integer register from an Addr.
  1025  func regIAddr(a obj.Addr) uint32 {
  1026  	return regAddr(a, REG_X0, REG_X31)
  1027  }
  1028  
  1029  // regFAddr extracts the float register from an Addr.
  1030  func regFAddr(a obj.Addr) uint32 {
  1031  	return regAddr(a, REG_F0, REG_F31)
  1032  }
  1033  
  1034  // immEven checks that the immediate is a multiple of two. If it
  1035  // is not, an error is returned.
  1036  func immEven(x int64) error {
  1037  	if x&1 != 0 {
  1038  		return fmt.Errorf("immediate %#x is not a multiple of two", x)
  1039  	}
  1040  	return nil
  1041  }
  1042  
  1043  // immIFits checks whether the immediate value x fits in nbits bits
  1044  // as a signed integer. If it does not, an error is returned.
  1045  func immIFits(x int64, nbits uint) error {
  1046  	nbits--
  1047  	min := int64(-1) << nbits
  1048  	max := int64(1)<<nbits - 1
  1049  	if x < min || x > max {
  1050  		if nbits <= 16 {
  1051  			return fmt.Errorf("signed immediate %d must be in range [%d, %d] (%d bits)", x, min, max, nbits)
  1052  		}
  1053  		return fmt.Errorf("signed immediate %#x must be in range [%#x, %#x] (%d bits)", x, min, max, nbits)
  1054  	}
  1055  	return nil
  1056  }
  1057  
  1058  // immI extracts the signed integer of the specified size from an immediate.
  1059  func immI(as obj.As, imm int64, nbits uint) uint32 {
  1060  	if err := immIFits(imm, nbits); err != nil {
  1061  		panic(fmt.Sprintf("%v: %v", as, err))
  1062  	}
  1063  	return uint32(imm)
  1064  }
  1065  
  1066  func wantImmI(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) {
  1067  	if err := immIFits(imm, nbits); err != nil {
  1068  		ctxt.Diag("%v: %v", ins, err)
  1069  	}
  1070  }
  1071  
  1072  func wantReg(ctxt *obj.Link, ins *instruction, pos string, descr string, r, min, max uint32) {
  1073  	if r < min || r > max {
  1074  		var suffix string
  1075  		if r != obj.REG_NONE {
  1076  			suffix = fmt.Sprintf(" but got non-%s register %s", descr, RegName(int(r)))
  1077  		}
  1078  		ctxt.Diag("%v: expected %s register in %s position%s", ins, descr, pos, suffix)
  1079  	}
  1080  }
  1081  
  1082  func wantNoneReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
  1083  	if r != obj.REG_NONE {
  1084  		ctxt.Diag("%v: expected no register in %s but got register %s", ins, pos, RegName(int(r)))
  1085  	}
  1086  }
  1087  
  1088  // wantIntReg checks that r is an integer register.
  1089  func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
  1090  	wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31)
  1091  }
  1092  
  1093  // wantFloatReg checks that r is a floating-point register.
  1094  func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
  1095  	wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31)
  1096  }
  1097  
  1098  // wantEvenOffset checks that the offset is a multiple of two.
  1099  func wantEvenOffset(ctxt *obj.Link, ins *instruction, offset int64) {
  1100  	if err := immEven(offset); err != nil {
  1101  		ctxt.Diag("%v: %v", ins, err)
  1102  	}
  1103  }
  1104  
  1105  func validateRIII(ctxt *obj.Link, ins *instruction) {
  1106  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1107  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1108  	wantIntReg(ctxt, ins, "rs2", ins.rs2)
  1109  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1110  }
  1111  
  1112  func validateRFFF(ctxt *obj.Link, ins *instruction) {
  1113  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1114  	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
  1115  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1116  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1117  }
  1118  
  1119  func validateRFFFF(ctxt *obj.Link, ins *instruction) {
  1120  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1121  	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
  1122  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1123  	wantFloatReg(ctxt, ins, "rs3", ins.rs3)
  1124  }
  1125  
  1126  func validateRFFI(ctxt *obj.Link, ins *instruction) {
  1127  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1128  	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
  1129  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1130  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1131  }
  1132  
  1133  func validateRFI(ctxt *obj.Link, ins *instruction) {
  1134  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1135  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1136  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1137  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1138  }
  1139  
  1140  func validateRIF(ctxt *obj.Link, ins *instruction) {
  1141  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1142  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1143  	wantIntReg(ctxt, ins, "rs2", ins.rs2)
  1144  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1145  }
  1146  
  1147  func validateRFF(ctxt *obj.Link, ins *instruction) {
  1148  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1149  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1150  	wantFloatReg(ctxt, ins, "rs2", ins.rs2)
  1151  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1152  }
  1153  
  1154  func validateII(ctxt *obj.Link, ins *instruction) {
  1155  	wantImmI(ctxt, ins, ins.imm, 12)
  1156  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1157  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1158  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1159  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1160  }
  1161  
  1162  func validateIF(ctxt *obj.Link, ins *instruction) {
  1163  	wantImmI(ctxt, ins, ins.imm, 12)
  1164  	wantFloatReg(ctxt, ins, "rd", ins.rd)
  1165  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1166  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1167  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1168  }
  1169  
  1170  func validateSI(ctxt *obj.Link, ins *instruction) {
  1171  	wantImmI(ctxt, ins, ins.imm, 12)
  1172  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1173  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1174  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1175  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1176  }
  1177  
  1178  func validateSF(ctxt *obj.Link, ins *instruction) {
  1179  	wantImmI(ctxt, ins, ins.imm, 12)
  1180  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1181  	wantFloatReg(ctxt, ins, "rs1", ins.rs1)
  1182  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1183  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1184  }
  1185  
  1186  func validateB(ctxt *obj.Link, ins *instruction) {
  1187  	// Offsets are multiples of two, so accept 13 bit immediates for the
  1188  	// 12 bit slot. We implicitly drop the least significant bit in encodeB.
  1189  	wantEvenOffset(ctxt, ins, ins.imm)
  1190  	wantImmI(ctxt, ins, ins.imm, 13)
  1191  	wantNoneReg(ctxt, ins, "rd", ins.rd)
  1192  	wantIntReg(ctxt, ins, "rs1", ins.rs1)
  1193  	wantIntReg(ctxt, ins, "rs2", ins.rs2)
  1194  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1195  }
  1196  
  1197  func validateU(ctxt *obj.Link, ins *instruction) {
  1198  	wantImmI(ctxt, ins, ins.imm, 20)
  1199  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1200  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1201  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1202  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1203  }
  1204  
  1205  func validateJ(ctxt *obj.Link, ins *instruction) {
  1206  	// Offsets are multiples of two, so accept 21 bit immediates for the
  1207  	// 20 bit slot. We implicitly drop the least significant bit in encodeJ.
  1208  	wantEvenOffset(ctxt, ins, ins.imm)
  1209  	wantImmI(ctxt, ins, ins.imm, 21)
  1210  	wantIntReg(ctxt, ins, "rd", ins.rd)
  1211  	wantNoneReg(ctxt, ins, "rs1", ins.rs1)
  1212  	wantNoneReg(ctxt, ins, "rs2", ins.rs2)
  1213  	wantNoneReg(ctxt, ins, "rs3", ins.rs3)
  1214  }
  1215  
  1216  func validateRaw(ctxt *obj.Link, ins *instruction) {
  1217  	// Treat the raw value specially as a 32-bit unsigned integer.
  1218  	// Nobody wants to enter negative machine code.
  1219  	if ins.imm < 0 || 1<<32 <= ins.imm {
  1220  		ctxt.Diag("%v: immediate %d in raw position cannot be larger than 32 bits", ins.as, ins.imm)
  1221  	}
  1222  }
  1223  
  1224  // extractBitAndShift extracts the specified bit from the given immediate,
  1225  // before shifting it to the requested position and returning it.
  1226  func extractBitAndShift(imm uint32, bit, pos int) uint32 {
  1227  	return ((imm >> bit) & 1) << pos
  1228  }
  1229  
  1230  // encodeR encodes an R-type RISC-V instruction.
  1231  func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 {
  1232  	enc := encode(as)
  1233  	if enc == nil {
  1234  		panic("encodeR: could not encode instruction")
  1235  	}
  1236  	if enc.rs2 != 0 && rs2 != 0 {
  1237  		panic("encodeR: instruction uses rs2, but rs2 was nonzero")
  1238  	}
  1239  	return funct7<<25 | enc.funct7<<25 | enc.rs2<<20 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
  1240  }
  1241  
  1242  // encodeR4 encodes an R4-type RISC-V instruction.
  1243  func encodeR4(as obj.As, rs1, rs2, rs3, rd, funct3, funct2 uint32) uint32 {
  1244  	enc := encode(as)
  1245  	if enc == nil {
  1246  		panic("encodeR4: could not encode instruction")
  1247  	}
  1248  	if enc.rs2 != 0 {
  1249  		panic("encodeR4: instruction uses rs2")
  1250  	}
  1251  	funct2 |= enc.funct7
  1252  	if funct2&^3 != 0 {
  1253  		panic("encodeR4: funct2 requires more than 2 bits")
  1254  	}
  1255  	return rs3<<27 | funct2<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode
  1256  }
  1257  
  1258  func encodeRIII(ins *instruction) uint32 {
  1259  	return encodeR(ins.as, regI(ins.rs1), regI(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
  1260  }
  1261  
  1262  func encodeRFFF(ins *instruction) uint32 {
  1263  	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rd), ins.funct3, ins.funct7)
  1264  }
  1265  
  1266  func encodeRFFFF(ins *instruction) uint32 {
  1267  	return encodeR4(ins.as, regF(ins.rs1), regF(ins.rs2), regF(ins.rs3), regF(ins.rd), ins.funct3, ins.funct7)
  1268  }
  1269  
  1270  func encodeRFFI(ins *instruction) uint32 {
  1271  	return encodeR(ins.as, regF(ins.rs1), regF(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7)
  1272  }
  1273  
  1274  func encodeRFI(ins *instruction) uint32 {
  1275  	return encodeR(ins.as, regF(ins.rs2), 0, regI(ins.rd), ins.funct3, ins.funct7)
  1276  }
  1277  
  1278  func encodeRIF(ins *instruction) uint32 {
  1279  	return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
  1280  }
  1281  
  1282  func encodeRFF(ins *instruction) uint32 {
  1283  	return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7)
  1284  }
  1285  
  1286  // encodeI encodes an I-type RISC-V instruction.
  1287  func encodeI(as obj.As, rs1, rd, imm uint32) uint32 {
  1288  	enc := encode(as)
  1289  	if enc == nil {
  1290  		panic("encodeI: could not encode instruction")
  1291  	}
  1292  	imm |= uint32(enc.csr)
  1293  	return imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode
  1294  }
  1295  
  1296  func encodeII(ins *instruction) uint32 {
  1297  	return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm))
  1298  }
  1299  
  1300  func encodeIF(ins *instruction) uint32 {
  1301  	return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm))
  1302  }
  1303  
  1304  // encodeS encodes an S-type RISC-V instruction.
  1305  func encodeS(as obj.As, rs1, rs2, imm uint32) uint32 {
  1306  	enc := encode(as)
  1307  	if enc == nil {
  1308  		panic("encodeS: could not encode instruction")
  1309  	}
  1310  	return (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode
  1311  }
  1312  
  1313  func encodeSI(ins *instruction) uint32 {
  1314  	return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm))
  1315  }
  1316  
  1317  func encodeSF(ins *instruction) uint32 {
  1318  	return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm))
  1319  }
  1320  
  1321  // encodeBImmediate encodes an immediate for a B-type RISC-V instruction.
  1322  func encodeBImmediate(imm uint32) uint32 {
  1323  	return (imm>>12)<<31 | ((imm>>5)&0x3f)<<25 | ((imm>>1)&0xf)<<8 | ((imm>>11)&0x1)<<7
  1324  }
  1325  
  1326  // encodeB encodes a B-type RISC-V instruction.
  1327  func encodeB(ins *instruction) uint32 {
  1328  	imm := immI(ins.as, ins.imm, 13)
  1329  	rs2 := regI(ins.rs1)
  1330  	rs1 := regI(ins.rs2)
  1331  	enc := encode(ins.as)
  1332  	if enc == nil {
  1333  		panic("encodeB: could not encode instruction")
  1334  	}
  1335  	return encodeBImmediate(imm) | rs2<<20 | rs1<<15 | enc.funct3<<12 | enc.opcode
  1336  }
  1337  
  1338  // encodeU encodes a U-type RISC-V instruction.
  1339  func encodeU(ins *instruction) uint32 {
  1340  	// The immediates for encodeU are the upper 20 bits of a 32 bit value.
  1341  	// Rather than have the user/compiler generate a 32 bit constant, the
  1342  	// bottommost bits of which must all be zero, instead accept just the
  1343  	// top bits.
  1344  	imm := immI(ins.as, ins.imm, 20)
  1345  	rd := regI(ins.rd)
  1346  	enc := encode(ins.as)
  1347  	if enc == nil {
  1348  		panic("encodeU: could not encode instruction")
  1349  	}
  1350  	return imm<<12 | rd<<7 | enc.opcode
  1351  }
  1352  
  1353  // encodeJImmediate encodes an immediate for a J-type RISC-V instruction.
  1354  func encodeJImmediate(imm uint32) uint32 {
  1355  	return (imm>>20)<<31 | ((imm>>1)&0x3ff)<<21 | ((imm>>11)&0x1)<<20 | ((imm>>12)&0xff)<<12
  1356  }
  1357  
  1358  // encodeJ encodes a J-type RISC-V instruction.
  1359  func encodeJ(ins *instruction) uint32 {
  1360  	imm := immI(ins.as, ins.imm, 21)
  1361  	rd := regI(ins.rd)
  1362  	enc := encode(ins.as)
  1363  	if enc == nil {
  1364  		panic("encodeJ: could not encode instruction")
  1365  	}
  1366  	return encodeJImmediate(imm) | rd<<7 | enc.opcode
  1367  }
  1368  
  1369  // encodeCBImmediate encodes an immediate for a CB-type RISC-V instruction.
  1370  func encodeCBImmediate(imm uint32) uint32 {
  1371  	// Bit order - [8|4:3|7:6|2:1|5]
  1372  	bits := extractBitAndShift(imm, 8, 7)
  1373  	bits |= extractBitAndShift(imm, 4, 6)
  1374  	bits |= extractBitAndShift(imm, 3, 5)
  1375  	bits |= extractBitAndShift(imm, 7, 4)
  1376  	bits |= extractBitAndShift(imm, 6, 3)
  1377  	bits |= extractBitAndShift(imm, 2, 2)
  1378  	bits |= extractBitAndShift(imm, 1, 1)
  1379  	bits |= extractBitAndShift(imm, 5, 0)
  1380  	return (bits>>5)<<10 | (bits&0x1f)<<2
  1381  }
  1382  
  1383  // encodeCJImmediate encodes an immediate for a CJ-type RISC-V instruction.
  1384  func encodeCJImmediate(imm uint32) uint32 {
  1385  	// Bit order - [11|4|9:8|10|6|7|3:1|5]
  1386  	bits := extractBitAndShift(imm, 11, 10)
  1387  	bits |= extractBitAndShift(imm, 4, 9)
  1388  	bits |= extractBitAndShift(imm, 9, 8)
  1389  	bits |= extractBitAndShift(imm, 8, 7)
  1390  	bits |= extractBitAndShift(imm, 10, 6)
  1391  	bits |= extractBitAndShift(imm, 6, 5)
  1392  	bits |= extractBitAndShift(imm, 7, 4)
  1393  	bits |= extractBitAndShift(imm, 3, 3)
  1394  	bits |= extractBitAndShift(imm, 2, 2)
  1395  	bits |= extractBitAndShift(imm, 1, 1)
  1396  	bits |= extractBitAndShift(imm, 5, 0)
  1397  	return bits << 2
  1398  }
  1399  
  1400  func encodeRawIns(ins *instruction) uint32 {
  1401  	// Treat the raw value specially as a 32-bit unsigned integer.
  1402  	// Nobody wants to enter negative machine code.
  1403  	if ins.imm < 0 || 1<<32 <= ins.imm {
  1404  		panic(fmt.Sprintf("immediate %d cannot fit in 32 bits", ins.imm))
  1405  	}
  1406  	return uint32(ins.imm)
  1407  }
  1408  
  1409  func EncodeBImmediate(imm int64) (int64, error) {
  1410  	if err := immIFits(imm, 13); err != nil {
  1411  		return 0, err
  1412  	}
  1413  	if err := immEven(imm); err != nil {
  1414  		return 0, err
  1415  	}
  1416  	return int64(encodeBImmediate(uint32(imm))), nil
  1417  }
  1418  
  1419  func EncodeCBImmediate(imm int64) (int64, error) {
  1420  	if err := immIFits(imm, 9); err != nil {
  1421  		return 0, err
  1422  	}
  1423  	if err := immEven(imm); err != nil {
  1424  		return 0, err
  1425  	}
  1426  	return int64(encodeCBImmediate(uint32(imm))), nil
  1427  }
  1428  
  1429  func EncodeCJImmediate(imm int64) (int64, error) {
  1430  	if err := immIFits(imm, 12); err != nil {
  1431  		return 0, err
  1432  	}
  1433  	if err := immEven(imm); err != nil {
  1434  		return 0, err
  1435  	}
  1436  	return int64(encodeCJImmediate(uint32(imm))), nil
  1437  }
  1438  
  1439  func EncodeIImmediate(imm int64) (int64, error) {
  1440  	if err := immIFits(imm, 12); err != nil {
  1441  		return 0, err
  1442  	}
  1443  	return imm << 20, nil
  1444  }
  1445  
  1446  func EncodeJImmediate(imm int64) (int64, error) {
  1447  	if err := immIFits(imm, 21); err != nil {
  1448  		return 0, err
  1449  	}
  1450  	if err := immEven(imm); err != nil {
  1451  		return 0, err
  1452  	}
  1453  	return int64(encodeJImmediate(uint32(imm))), nil
  1454  }
  1455  
  1456  func EncodeSImmediate(imm int64) (int64, error) {
  1457  	if err := immIFits(imm, 12); err != nil {
  1458  		return 0, err
  1459  	}
  1460  	return ((imm >> 5) << 25) | ((imm & 0x1f) << 7), nil
  1461  }
  1462  
  1463  func EncodeUImmediate(imm int64) (int64, error) {
  1464  	if err := immIFits(imm, 20); err != nil {
  1465  		return 0, err
  1466  	}
  1467  	return imm << 12, nil
  1468  }
  1469  
  1470  type encoding struct {
  1471  	encode   func(*instruction) uint32     // encode returns the machine code for an instruction
  1472  	validate func(*obj.Link, *instruction) // validate validates an instruction
  1473  	length   int                           // length of encoded instruction; 0 for pseudo-ops, 4 otherwise
  1474  }
  1475  
  1476  var (
  1477  	// Encodings have the following naming convention:
  1478  	//
  1479  	//  1. the instruction encoding (R/I/S/B/U/J), in lowercase
  1480  	//  2. zero or more register operand identifiers (I = integer
  1481  	//     register, F = float register), in uppercase
  1482  	//  3. the word "Encoding"
  1483  	//
  1484  	// For example, rIIIEncoding indicates an R-type instruction with two
  1485  	// integer register inputs and an integer register output; sFEncoding
  1486  	// indicates an S-type instruction with rs2 being a float register.
  1487  
  1488  	rIIIEncoding  = encoding{encode: encodeRIII, validate: validateRIII, length: 4}
  1489  	rFFFEncoding  = encoding{encode: encodeRFFF, validate: validateRFFF, length: 4}
  1490  	rFFFFEncoding = encoding{encode: encodeRFFFF, validate: validateRFFFF, length: 4}
  1491  	rFFIEncoding  = encoding{encode: encodeRFFI, validate: validateRFFI, length: 4}
  1492  	rFIEncoding   = encoding{encode: encodeRFI, validate: validateRFI, length: 4}
  1493  	rIFEncoding   = encoding{encode: encodeRIF, validate: validateRIF, length: 4}
  1494  	rFFEncoding   = encoding{encode: encodeRFF, validate: validateRFF, length: 4}
  1495  
  1496  	iIEncoding = encoding{encode: encodeII, validate: validateII, length: 4}
  1497  	iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4}
  1498  
  1499  	sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4}
  1500  	sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4}
  1501  
  1502  	bEncoding = encoding{encode: encodeB, validate: validateB, length: 4}
  1503  	uEncoding = encoding{encode: encodeU, validate: validateU, length: 4}
  1504  	jEncoding = encoding{encode: encodeJ, validate: validateJ, length: 4}
  1505  
  1506  	// rawEncoding encodes a raw instruction byte sequence.
  1507  	rawEncoding = encoding{encode: encodeRawIns, validate: validateRaw, length: 4}
  1508  
  1509  	// pseudoOpEncoding panics if encoding is attempted, but does no validation.
  1510  	pseudoOpEncoding = encoding{encode: nil, validate: func(*obj.Link, *instruction) {}, length: 0}
  1511  
  1512  	// badEncoding is used when an invalid op is encountered.
  1513  	// An error has already been generated, so let anything else through.
  1514  	badEncoding = encoding{encode: func(*instruction) uint32 { return 0 }, validate: func(*obj.Link, *instruction) {}, length: 0}
  1515  )
  1516  
  1517  // encodings contains the encodings for RISC-V instructions.
  1518  // Instructions are masked with obj.AMask to keep indices small.
  1519  var encodings = [ALAST & obj.AMask]encoding{
  1520  
  1521  	// Unprivileged ISA
  1522  
  1523  	// 2.4: Integer Computational Instructions
  1524  	AADDI & obj.AMask:  iIEncoding,
  1525  	ASLTI & obj.AMask:  iIEncoding,
  1526  	ASLTIU & obj.AMask: iIEncoding,
  1527  	AANDI & obj.AMask:  iIEncoding,
  1528  	AORI & obj.AMask:   iIEncoding,
  1529  	AXORI & obj.AMask:  iIEncoding,
  1530  	ASLLI & obj.AMask:  iIEncoding,
  1531  	ASRLI & obj.AMask:  iIEncoding,
  1532  	ASRAI & obj.AMask:  iIEncoding,
  1533  	ALUI & obj.AMask:   uEncoding,
  1534  	AAUIPC & obj.AMask: uEncoding,
  1535  	AADD & obj.AMask:   rIIIEncoding,
  1536  	ASLT & obj.AMask:   rIIIEncoding,
  1537  	ASLTU & obj.AMask:  rIIIEncoding,
  1538  	AAND & obj.AMask:   rIIIEncoding,
  1539  	AOR & obj.AMask:    rIIIEncoding,
  1540  	AXOR & obj.AMask:   rIIIEncoding,
  1541  	ASLL & obj.AMask:   rIIIEncoding,
  1542  	ASRL & obj.AMask:   rIIIEncoding,
  1543  	ASUB & obj.AMask:   rIIIEncoding,
  1544  	ASRA & obj.AMask:   rIIIEncoding,
  1545  
  1546  	// 2.5: Control Transfer Instructions
  1547  	AJAL & obj.AMask:  jEncoding,
  1548  	AJALR & obj.AMask: iIEncoding,
  1549  	ABEQ & obj.AMask:  bEncoding,
  1550  	ABNE & obj.AMask:  bEncoding,
  1551  	ABLT & obj.AMask:  bEncoding,
  1552  	ABLTU & obj.AMask: bEncoding,
  1553  	ABGE & obj.AMask:  bEncoding,
  1554  	ABGEU & obj.AMask: bEncoding,
  1555  
  1556  	// 2.6: Load and Store Instructions
  1557  	ALW & obj.AMask:  iIEncoding,
  1558  	ALWU & obj.AMask: iIEncoding,
  1559  	ALH & obj.AMask:  iIEncoding,
  1560  	ALHU & obj.AMask: iIEncoding,
  1561  	ALB & obj.AMask:  iIEncoding,
  1562  	ALBU & obj.AMask: iIEncoding,
  1563  	ASW & obj.AMask:  sIEncoding,
  1564  	ASH & obj.AMask:  sIEncoding,
  1565  	ASB & obj.AMask:  sIEncoding,
  1566  
  1567  	// 2.7: Memory Ordering
  1568  	AFENCE & obj.AMask: iIEncoding,
  1569  
  1570  	// 5.2: Integer Computational Instructions (RV64I)
  1571  	AADDIW & obj.AMask: iIEncoding,
  1572  	ASLLIW & obj.AMask: iIEncoding,
  1573  	ASRLIW & obj.AMask: iIEncoding,
  1574  	ASRAIW & obj.AMask: iIEncoding,
  1575  	AADDW & obj.AMask:  rIIIEncoding,
  1576  	ASLLW & obj.AMask:  rIIIEncoding,
  1577  	ASRLW & obj.AMask:  rIIIEncoding,
  1578  	ASUBW & obj.AMask:  rIIIEncoding,
  1579  	ASRAW & obj.AMask:  rIIIEncoding,
  1580  
  1581  	// 5.3: Load and Store Instructions (RV64I)
  1582  	ALD & obj.AMask: iIEncoding,
  1583  	ASD & obj.AMask: sIEncoding,
  1584  
  1585  	// 7.1: Multiplication Operations
  1586  	AMUL & obj.AMask:    rIIIEncoding,
  1587  	AMULH & obj.AMask:   rIIIEncoding,
  1588  	AMULHU & obj.AMask:  rIIIEncoding,
  1589  	AMULHSU & obj.AMask: rIIIEncoding,
  1590  	AMULW & obj.AMask:   rIIIEncoding,
  1591  	ADIV & obj.AMask:    rIIIEncoding,
  1592  	ADIVU & obj.AMask:   rIIIEncoding,
  1593  	AREM & obj.AMask:    rIIIEncoding,
  1594  	AREMU & obj.AMask:   rIIIEncoding,
  1595  	ADIVW & obj.AMask:   rIIIEncoding,
  1596  	ADIVUW & obj.AMask:  rIIIEncoding,
  1597  	AREMW & obj.AMask:   rIIIEncoding,
  1598  	AREMUW & obj.AMask:  rIIIEncoding,
  1599  
  1600  	// 8.2: Load-Reserved/Store-Conditional
  1601  	ALRW & obj.AMask: rIIIEncoding,
  1602  	ALRD & obj.AMask: rIIIEncoding,
  1603  	ASCW & obj.AMask: rIIIEncoding,
  1604  	ASCD & obj.AMask: rIIIEncoding,
  1605  
  1606  	// 8.3: Atomic Memory Operations
  1607  	AAMOSWAPW & obj.AMask: rIIIEncoding,
  1608  	AAMOSWAPD & obj.AMask: rIIIEncoding,
  1609  	AAMOADDW & obj.AMask:  rIIIEncoding,
  1610  	AAMOADDD & obj.AMask:  rIIIEncoding,
  1611  	AAMOANDW & obj.AMask:  rIIIEncoding,
  1612  	AAMOANDD & obj.AMask:  rIIIEncoding,
  1613  	AAMOORW & obj.AMask:   rIIIEncoding,
  1614  	AAMOORD & obj.AMask:   rIIIEncoding,
  1615  	AAMOXORW & obj.AMask:  rIIIEncoding,
  1616  	AAMOXORD & obj.AMask:  rIIIEncoding,
  1617  	AAMOMAXW & obj.AMask:  rIIIEncoding,
  1618  	AAMOMAXD & obj.AMask:  rIIIEncoding,
  1619  	AAMOMAXUW & obj.AMask: rIIIEncoding,
  1620  	AAMOMAXUD & obj.AMask: rIIIEncoding,
  1621  	AAMOMINW & obj.AMask:  rIIIEncoding,
  1622  	AAMOMIND & obj.AMask:  rIIIEncoding,
  1623  	AAMOMINUW & obj.AMask: rIIIEncoding,
  1624  	AAMOMINUD & obj.AMask: rIIIEncoding,
  1625  
  1626  	// 10.1: Base Counters and Timers
  1627  	ARDCYCLE & obj.AMask:   iIEncoding,
  1628  	ARDTIME & obj.AMask:    iIEncoding,
  1629  	ARDINSTRET & obj.AMask: iIEncoding,
  1630  
  1631  	// 11.5: Single-Precision Load and Store Instructions
  1632  	AFLW & obj.AMask: iFEncoding,
  1633  	AFSW & obj.AMask: sFEncoding,
  1634  
  1635  	// 11.6: Single-Precision Floating-Point Computational Instructions
  1636  	AFADDS & obj.AMask:   rFFFEncoding,
  1637  	AFSUBS & obj.AMask:   rFFFEncoding,
  1638  	AFMULS & obj.AMask:   rFFFEncoding,
  1639  	AFDIVS & obj.AMask:   rFFFEncoding,
  1640  	AFMINS & obj.AMask:   rFFFEncoding,
  1641  	AFMAXS & obj.AMask:   rFFFEncoding,
  1642  	AFSQRTS & obj.AMask:  rFFFEncoding,
  1643  	AFMADDS & obj.AMask:  rFFFFEncoding,
  1644  	AFMSUBS & obj.AMask:  rFFFFEncoding,
  1645  	AFNMSUBS & obj.AMask: rFFFFEncoding,
  1646  	AFNMADDS & obj.AMask: rFFFFEncoding,
  1647  
  1648  	// 11.7: Single-Precision Floating-Point Conversion and Move Instructions
  1649  	AFCVTWS & obj.AMask:  rFIEncoding,
  1650  	AFCVTLS & obj.AMask:  rFIEncoding,
  1651  	AFCVTSW & obj.AMask:  rIFEncoding,
  1652  	AFCVTSL & obj.AMask:  rIFEncoding,
  1653  	AFCVTWUS & obj.AMask: rFIEncoding,
  1654  	AFCVTLUS & obj.AMask: rFIEncoding,
  1655  	AFCVTSWU & obj.AMask: rIFEncoding,
  1656  	AFCVTSLU & obj.AMask: rIFEncoding,
  1657  	AFSGNJS & obj.AMask:  rFFFEncoding,
  1658  	AFSGNJNS & obj.AMask: rFFFEncoding,
  1659  	AFSGNJXS & obj.AMask: rFFFEncoding,
  1660  	AFMVXS & obj.AMask:   rFIEncoding,
  1661  	AFMVSX & obj.AMask:   rIFEncoding,
  1662  	AFMVXW & obj.AMask:   rFIEncoding,
  1663  	AFMVWX & obj.AMask:   rIFEncoding,
  1664  
  1665  	// 11.8: Single-Precision Floating-Point Compare Instructions
  1666  	AFEQS & obj.AMask: rFFIEncoding,
  1667  	AFLTS & obj.AMask: rFFIEncoding,
  1668  	AFLES & obj.AMask: rFFIEncoding,
  1669  
  1670  	// 11.9: Single-Precision Floating-Point Classify Instruction
  1671  	AFCLASSS & obj.AMask: rFIEncoding,
  1672  
  1673  	// 12.3: Double-Precision Load and Store Instructions
  1674  	AFLD & obj.AMask: iFEncoding,
  1675  	AFSD & obj.AMask: sFEncoding,
  1676  
  1677  	// 12.4: Double-Precision Floating-Point Computational Instructions
  1678  	AFADDD & obj.AMask:   rFFFEncoding,
  1679  	AFSUBD & obj.AMask:   rFFFEncoding,
  1680  	AFMULD & obj.AMask:   rFFFEncoding,
  1681  	AFDIVD & obj.AMask:   rFFFEncoding,
  1682  	AFMIND & obj.AMask:   rFFFEncoding,
  1683  	AFMAXD & obj.AMask:   rFFFEncoding,
  1684  	AFSQRTD & obj.AMask:  rFFFEncoding,
  1685  	AFMADDD & obj.AMask:  rFFFFEncoding,
  1686  	AFMSUBD & obj.AMask:  rFFFFEncoding,
  1687  	AFNMSUBD & obj.AMask: rFFFFEncoding,
  1688  	AFNMADDD & obj.AMask: rFFFFEncoding,
  1689  
  1690  	// 12.5: Double-Precision Floating-Point Conversion and Move Instructions
  1691  	AFCVTWD & obj.AMask:  rFIEncoding,
  1692  	AFCVTLD & obj.AMask:  rFIEncoding,
  1693  	AFCVTDW & obj.AMask:  rIFEncoding,
  1694  	AFCVTDL & obj.AMask:  rIFEncoding,
  1695  	AFCVTWUD & obj.AMask: rFIEncoding,
  1696  	AFCVTLUD & obj.AMask: rFIEncoding,
  1697  	AFCVTDWU & obj.AMask: rIFEncoding,
  1698  	AFCVTDLU & obj.AMask: rIFEncoding,
  1699  	AFCVTSD & obj.AMask:  rFFEncoding,
  1700  	AFCVTDS & obj.AMask:  rFFEncoding,
  1701  	AFSGNJD & obj.AMask:  rFFFEncoding,
  1702  	AFSGNJND & obj.AMask: rFFFEncoding,
  1703  	AFSGNJXD & obj.AMask: rFFFEncoding,
  1704  	AFMVXD & obj.AMask:   rFIEncoding,
  1705  	AFMVDX & obj.AMask:   rIFEncoding,
  1706  
  1707  	// 12.6: Double-Precision Floating-Point Compare Instructions
  1708  	AFEQD & obj.AMask: rFFIEncoding,
  1709  	AFLTD & obj.AMask: rFFIEncoding,
  1710  	AFLED & obj.AMask: rFFIEncoding,
  1711  
  1712  	// 12.7: Double-Precision Floating-Point Classify Instruction
  1713  	AFCLASSD & obj.AMask: rFIEncoding,
  1714  
  1715  	// Privileged ISA
  1716  
  1717  	// 3.2.1: Environment Call and Breakpoint
  1718  	AECALL & obj.AMask:  iIEncoding,
  1719  	AEBREAK & obj.AMask: iIEncoding,
  1720  
  1721  	// Escape hatch
  1722  	AWORD & obj.AMask: rawEncoding,
  1723  
  1724  	// Pseudo-operations
  1725  	obj.AFUNCDATA: pseudoOpEncoding,
  1726  	obj.APCDATA:   pseudoOpEncoding,
  1727  	obj.ATEXT:     pseudoOpEncoding,
  1728  	obj.ANOP:      pseudoOpEncoding,
  1729  	obj.ADUFFZERO: pseudoOpEncoding,
  1730  	obj.ADUFFCOPY: pseudoOpEncoding,
  1731  	obj.APCALIGN:  pseudoOpEncoding,
  1732  }
  1733  
  1734  // encodingForAs returns the encoding for an obj.As.
  1735  func encodingForAs(as obj.As) (encoding, error) {
  1736  	if base := as &^ obj.AMask; base != obj.ABaseRISCV && base != 0 {
  1737  		return badEncoding, fmt.Errorf("encodingForAs: not a RISC-V instruction %s", as)
  1738  	}
  1739  	asi := as & obj.AMask
  1740  	if int(asi) >= len(encodings) {
  1741  		return badEncoding, fmt.Errorf("encodingForAs: bad RISC-V instruction %s", as)
  1742  	}
  1743  	enc := encodings[asi]
  1744  	if enc.validate == nil {
  1745  		return badEncoding, fmt.Errorf("encodingForAs: no encoding for instruction %s", as)
  1746  	}
  1747  	return enc, nil
  1748  }
  1749  
  1750  type instruction struct {
  1751  	p      *obj.Prog // Prog that instruction is for
  1752  	as     obj.As    // Assembler opcode
  1753  	rd     uint32    // Destination register
  1754  	rs1    uint32    // Source register 1
  1755  	rs2    uint32    // Source register 2
  1756  	rs3    uint32    // Source register 3
  1757  	imm    int64     // Immediate
  1758  	funct3 uint32    // Function 3
  1759  	funct7 uint32    // Function 7 (or Function 2)
  1760  }
  1761  
  1762  func (ins *instruction) String() string {
  1763  	if ins.p == nil {
  1764  		return ins.as.String()
  1765  	}
  1766  	var suffix string
  1767  	if ins.p.As != ins.as {
  1768  		suffix = fmt.Sprintf(" (%v)", ins.as)
  1769  	}
  1770  	return fmt.Sprintf("%v%v", ins.p, suffix)
  1771  }
  1772  
  1773  func (ins *instruction) encode() (uint32, error) {
  1774  	enc, err := encodingForAs(ins.as)
  1775  	if err != nil {
  1776  		return 0, err
  1777  	}
  1778  	if enc.length <= 0 {
  1779  		return 0, fmt.Errorf("%v: encoding called for a pseudo instruction", ins.as)
  1780  	}
  1781  	return enc.encode(ins), nil
  1782  }
  1783  
  1784  func (ins *instruction) length() int {
  1785  	enc, err := encodingForAs(ins.as)
  1786  	if err != nil {
  1787  		return 0
  1788  	}
  1789  	return enc.length
  1790  }
  1791  
  1792  func (ins *instruction) validate(ctxt *obj.Link) {
  1793  	enc, err := encodingForAs(ins.as)
  1794  	if err != nil {
  1795  		ctxt.Diag(err.Error())
  1796  		return
  1797  	}
  1798  	enc.validate(ctxt, ins)
  1799  }
  1800  
  1801  func (ins *instruction) usesRegTmp() bool {
  1802  	return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP
  1803  }
  1804  
  1805  // instructionForProg returns the default *obj.Prog to instruction mapping.
  1806  func instructionForProg(p *obj.Prog) *instruction {
  1807  	ins := &instruction{
  1808  		as:  p.As,
  1809  		rd:  uint32(p.To.Reg),
  1810  		rs1: uint32(p.Reg),
  1811  		rs2: uint32(p.From.Reg),
  1812  		imm: p.From.Offset,
  1813  	}
  1814  	if len(p.RestArgs) == 1 {
  1815  		ins.rs3 = uint32(p.RestArgs[0].Reg)
  1816  	}
  1817  	return ins
  1818  }
  1819  
  1820  // instructionsForOpImmediate returns the machine instructions for an immediate
  1821  // operand. The instruction is specified by as and the source register is
  1822  // specified by rs, instead of the obj.Prog.
  1823  func instructionsForOpImmediate(p *obj.Prog, as obj.As, rs int16) []*instruction {
  1824  	// <opi> $imm, REG, TO
  1825  	ins := instructionForProg(p)
  1826  	ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE
  1827  
  1828  	low, high, err := Split32BitImmediate(ins.imm)
  1829  	if err != nil {
  1830  		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm, err)
  1831  		return nil
  1832  	}
  1833  	if high == 0 {
  1834  		return []*instruction{ins}
  1835  	}
  1836  
  1837  	// Split into two additions, if possible.
  1838  	// Do not split SP-writing instructions, as otherwise the recorded SP delta may be wrong.
  1839  	if p.Spadj == 0 && ins.as == AADDI && ins.imm >= -(1<<12) && ins.imm < 1<<12-1 {
  1840  		imm0 := ins.imm / 2
  1841  		imm1 := ins.imm - imm0
  1842  
  1843  		// ADDI $(imm/2), REG, TO
  1844  		// ADDI $(imm-imm/2), TO, TO
  1845  		ins.imm = imm0
  1846  		insADDI := &instruction{as: AADDI, rd: ins.rd, rs1: ins.rd, imm: imm1}
  1847  		return []*instruction{ins, insADDI}
  1848  	}
  1849  
  1850  	// LUI $high, TMP
  1851  	// ADDIW $low, TMP, TMP
  1852  	// <op> TMP, REG, TO
  1853  	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
  1854  	insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP, imm: low}
  1855  	switch ins.as {
  1856  	case AADDI:
  1857  		ins.as = AADD
  1858  	case AANDI:
  1859  		ins.as = AAND
  1860  	case AORI:
  1861  		ins.as = AOR
  1862  	case AXORI:
  1863  		ins.as = AXOR
  1864  	default:
  1865  		p.Ctxt.Diag("unsupported immediate instruction %v for splitting", p)
  1866  		return nil
  1867  	}
  1868  	ins.rs2 = REG_TMP
  1869  	if low == 0 {
  1870  		return []*instruction{insLUI, ins}
  1871  	}
  1872  	return []*instruction{insLUI, insADDIW, ins}
  1873  }
  1874  
  1875  // instructionsForLoad returns the machine instructions for a load. The load
  1876  // instruction is specified by as and the base/source register is specified
  1877  // by rs, instead of the obj.Prog.
  1878  func instructionsForLoad(p *obj.Prog, as obj.As, rs int16) []*instruction {
  1879  	if p.From.Type != obj.TYPE_MEM {
  1880  		p.Ctxt.Diag("%v requires memory for source", p)
  1881  		return nil
  1882  	}
  1883  
  1884  	switch as {
  1885  	case ALD, ALB, ALH, ALW, ALBU, ALHU, ALWU, AFLW, AFLD:
  1886  	default:
  1887  		p.Ctxt.Diag("%v: unknown load instruction %v", p, as)
  1888  		return nil
  1889  	}
  1890  
  1891  	// <load> $imm, REG, TO (load $imm+(REG), TO)
  1892  	ins := instructionForProg(p)
  1893  	ins.as, ins.rs1, ins.rs2 = as, uint32(rs), obj.REG_NONE
  1894  	ins.imm = p.From.Offset
  1895  
  1896  	low, high, err := Split32BitImmediate(ins.imm)
  1897  	if err != nil {
  1898  		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm)
  1899  		return nil
  1900  	}
  1901  	if high == 0 {
  1902  		return []*instruction{ins}
  1903  	}
  1904  
  1905  	// LUI $high, TMP
  1906  	// ADD TMP, REG, TMP
  1907  	// <load> $low, TMP, TO
  1908  	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
  1909  	insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rs1}
  1910  	ins.rs1, ins.imm = REG_TMP, low
  1911  
  1912  	return []*instruction{insLUI, insADD, ins}
  1913  }
  1914  
  1915  // instructionsForStore returns the machine instructions for a store. The store
  1916  // instruction is specified by as and the target/source register is specified
  1917  // by rd, instead of the obj.Prog.
  1918  func instructionsForStore(p *obj.Prog, as obj.As, rd int16) []*instruction {
  1919  	if p.To.Type != obj.TYPE_MEM {
  1920  		p.Ctxt.Diag("%v requires memory for destination", p)
  1921  		return nil
  1922  	}
  1923  
  1924  	switch as {
  1925  	case ASW, ASH, ASB, ASD, AFSW, AFSD:
  1926  	default:
  1927  		p.Ctxt.Diag("%v: unknown store instruction %v", p, as)
  1928  		return nil
  1929  	}
  1930  
  1931  	// <store> $imm, REG, TO (store $imm+(TO), REG)
  1932  	ins := instructionForProg(p)
  1933  	ins.as, ins.rd, ins.rs1, ins.rs2 = as, uint32(rd), uint32(p.From.Reg), obj.REG_NONE
  1934  	ins.imm = p.To.Offset
  1935  
  1936  	low, high, err := Split32BitImmediate(ins.imm)
  1937  	if err != nil {
  1938  		p.Ctxt.Diag("%v: constant %d too large", p, ins.imm)
  1939  		return nil
  1940  	}
  1941  	if high == 0 {
  1942  		return []*instruction{ins}
  1943  	}
  1944  
  1945  	// LUI $high, TMP
  1946  	// ADD TMP, TO, TMP
  1947  	// <store> $low, REG, TMP
  1948  	insLUI := &instruction{as: ALUI, rd: REG_TMP, imm: high}
  1949  	insADD := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: ins.rd}
  1950  	ins.rd, ins.imm = REG_TMP, low
  1951  
  1952  	return []*instruction{insLUI, insADD, ins}
  1953  }
  1954  
  1955  func instructionsForTLS(p *obj.Prog, ins *instruction) []*instruction {
  1956  	insAddTP := &instruction{as: AADD, rd: REG_TMP, rs1: REG_TMP, rs2: REG_TP}
  1957  
  1958  	var inss []*instruction
  1959  	if p.Ctxt.Flag_shared {
  1960  		// TLS initial-exec mode - load TLS offset from GOT, add the thread pointer
  1961  		// register, then load from or store to the resulting memory location.
  1962  		insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
  1963  		insLoadTLSOffset := &instruction{as: ALD, rd: REG_TMP, rs1: REG_TMP}
  1964  		inss = []*instruction{insAUIPC, insLoadTLSOffset, insAddTP, ins}
  1965  	} else {
  1966  		// TLS local-exec mode - load upper TLS offset, add the lower TLS offset,
  1967  		// add the thread pointer register, then load from or store to the resulting
  1968  		// memory location. Note that this differs from the suggested three
  1969  		// instruction sequence, as the Go linker does not currently have an
  1970  		// easy way to handle relocation across 12 bytes of machine code.
  1971  		insLUI := &instruction{as: ALUI, rd: REG_TMP}
  1972  		insADDIW := &instruction{as: AADDIW, rd: REG_TMP, rs1: REG_TMP}
  1973  		inss = []*instruction{insLUI, insADDIW, insAddTP, ins}
  1974  	}
  1975  	return inss
  1976  }
  1977  
  1978  func instructionsForTLSLoad(p *obj.Prog) []*instruction {
  1979  	if p.From.Sym.Type != objabi.STLSBSS {
  1980  		p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.From.Sym)
  1981  		return nil
  1982  	}
  1983  
  1984  	ins := instructionForProg(p)
  1985  	ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), REG_TMP, obj.REG_NONE, 0
  1986  
  1987  	return instructionsForTLS(p, ins)
  1988  }
  1989  
  1990  func instructionsForTLSStore(p *obj.Prog) []*instruction {
  1991  	if p.To.Sym.Type != objabi.STLSBSS {
  1992  		p.Ctxt.Diag("%v: %v is not a TLS symbol", p, p.To.Sym)
  1993  		return nil
  1994  	}
  1995  
  1996  	ins := instructionForProg(p)
  1997  	ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
  1998  
  1999  	return instructionsForTLS(p, ins)
  2000  }
  2001  
  2002  // instructionsForMOV returns the machine instructions for an *obj.Prog that
  2003  // uses a MOV pseudo-instruction.
  2004  func instructionsForMOV(p *obj.Prog) []*instruction {
  2005  	ins := instructionForProg(p)
  2006  	inss := []*instruction{ins}
  2007  
  2008  	if p.Reg != 0 {
  2009  		p.Ctxt.Diag("%v: illegal MOV instruction", p)
  2010  		return nil
  2011  	}
  2012  
  2013  	switch {
  2014  	case p.From.Type == obj.TYPE_CONST && p.To.Type == obj.TYPE_REG:
  2015  		// Handle constant to register moves.
  2016  		if p.As != AMOV {
  2017  			p.Ctxt.Diag("%v: unsupported constant load", p)
  2018  			return nil
  2019  		}
  2020  
  2021  		// For constants larger than 32 bits in size that have trailing zeros,
  2022  		// use the value with the trailing zeros removed and then use a SLLI
  2023  		// instruction to restore the original constant.
  2024  		// For example:
  2025  		// 	MOV $0x8000000000000000, X10
  2026  		// becomes
  2027  		// 	MOV $1, X10
  2028  		// 	SLLI $63, X10, X10
  2029  		var insSLLI *instruction
  2030  		if err := immIFits(ins.imm, 32); err != nil {
  2031  			ctz := bits.TrailingZeros64(uint64(ins.imm))
  2032  			if err := immIFits(ins.imm>>ctz, 32); err == nil {
  2033  				ins.imm = ins.imm >> ctz
  2034  				insSLLI = &instruction{as: ASLLI, rd: ins.rd, rs1: ins.rd, imm: int64(ctz)}
  2035  			}
  2036  		}
  2037  
  2038  		low, high, err := Split32BitImmediate(ins.imm)
  2039  		if err != nil {
  2040  			p.Ctxt.Diag("%v: constant %d too large: %v", p, ins.imm, err)
  2041  			return nil
  2042  		}
  2043  
  2044  		// MOV $c, R -> ADD $c, ZERO, R
  2045  		ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, REG_ZERO, obj.REG_NONE, low
  2046  
  2047  		// LUI is only necessary if the constant does not fit in 12 bits.
  2048  		if high != 0 {
  2049  			// LUI top20bits(c), R
  2050  			// ADD bottom12bits(c), R, R
  2051  			insLUI := &instruction{as: ALUI, rd: ins.rd, imm: high}
  2052  			inss = []*instruction{insLUI}
  2053  			if low != 0 {
  2054  				ins.as, ins.rs1 = AADDIW, ins.rd
  2055  				inss = append(inss, ins)
  2056  			}
  2057  		}
  2058  		if insSLLI != nil {
  2059  			inss = append(inss, insSLLI)
  2060  		}
  2061  
  2062  	case p.From.Type == obj.TYPE_CONST && p.To.Type != obj.TYPE_REG:
  2063  		p.Ctxt.Diag("%v: constant load must target register", p)
  2064  		return nil
  2065  
  2066  	case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_REG:
  2067  		// Handle register to register moves.
  2068  		switch p.As {
  2069  		case AMOV: // MOV Ra, Rb -> ADDI $0, Ra, Rb
  2070  			ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, uint32(p.From.Reg), obj.REG_NONE, 0
  2071  		case AMOVW: // MOVW Ra, Rb -> ADDIW $0, Ra, Rb
  2072  			ins.as, ins.rs1, ins.rs2, ins.imm = AADDIW, uint32(p.From.Reg), obj.REG_NONE, 0
  2073  		case AMOVBU: // MOVBU Ra, Rb -> ANDI $255, Ra, Rb
  2074  			ins.as, ins.rs1, ins.rs2, ins.imm = AANDI, uint32(p.From.Reg), obj.REG_NONE, 255
  2075  		case AMOVF: // MOVF Ra, Rb -> FSGNJS Ra, Ra, Rb
  2076  			ins.as, ins.rs1 = AFSGNJS, uint32(p.From.Reg)
  2077  		case AMOVD: // MOVD Ra, Rb -> FSGNJD Ra, Ra, Rb
  2078  			ins.as, ins.rs1 = AFSGNJD, uint32(p.From.Reg)
  2079  		case AMOVB, AMOVH:
  2080  			// Use SLLI/SRAI to extend.
  2081  			ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE
  2082  			if p.As == AMOVB {
  2083  				ins.imm = 56
  2084  			} else if p.As == AMOVH {
  2085  				ins.imm = 48
  2086  			}
  2087  			ins2 := &instruction{as: ASRAI, rd: ins.rd, rs1: ins.rd, imm: ins.imm}
  2088  			inss = append(inss, ins2)
  2089  		case AMOVHU, AMOVWU:
  2090  			// Use SLLI/SRLI to extend.
  2091  			ins.as, ins.rs1, ins.rs2 = ASLLI, uint32(p.From.Reg), obj.REG_NONE
  2092  			if p.As == AMOVHU {
  2093  				ins.imm = 48
  2094  			} else if p.As == AMOVWU {
  2095  				ins.imm = 32
  2096  			}
  2097  			ins2 := &instruction{as: ASRLI, rd: ins.rd, rs1: ins.rd, imm: ins.imm}
  2098  			inss = append(inss, ins2)
  2099  		}
  2100  
  2101  	case p.From.Type == obj.TYPE_MEM && p.To.Type == obj.TYPE_REG:
  2102  		// Memory to register loads.
  2103  		switch p.From.Name {
  2104  		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
  2105  			// MOV c(Rs), Rd -> L $c, Rs, Rd
  2106  			inss = instructionsForLoad(p, movToLoad(p.As), addrToReg(p.From))
  2107  
  2108  		case obj.NAME_EXTERN, obj.NAME_STATIC:
  2109  			if p.From.Sym.Type == objabi.STLSBSS {
  2110  				return instructionsForTLSLoad(p)
  2111  			}
  2112  
  2113  			// Note that the values for $off_hi and $off_lo are currently
  2114  			// zero and will be assigned during relocation.
  2115  			//
  2116  			// AUIPC $off_hi, Rd
  2117  			// L $off_lo, Rd, Rd
  2118  			insAUIPC := &instruction{as: AAUIPC, rd: ins.rd}
  2119  			ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), ins.rd, obj.REG_NONE, 0
  2120  			inss = []*instruction{insAUIPC, ins}
  2121  
  2122  		default:
  2123  			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
  2124  			return nil
  2125  		}
  2126  
  2127  	case p.From.Type == obj.TYPE_REG && p.To.Type == obj.TYPE_MEM:
  2128  		// Register to memory stores.
  2129  		switch p.As {
  2130  		case AMOVBU, AMOVHU, AMOVWU:
  2131  			p.Ctxt.Diag("%v: unsupported unsigned store", p)
  2132  			return nil
  2133  		}
  2134  		switch p.To.Name {
  2135  		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
  2136  			// MOV Rs, c(Rd) -> S $c, Rs, Rd
  2137  			inss = instructionsForStore(p, movToStore(p.As), addrToReg(p.To))
  2138  
  2139  		case obj.NAME_EXTERN, obj.NAME_STATIC:
  2140  			if p.To.Sym.Type == objabi.STLSBSS {
  2141  				return instructionsForTLSStore(p)
  2142  			}
  2143  
  2144  			// Note that the values for $off_hi and $off_lo are currently
  2145  			// zero and will be assigned during relocation.
  2146  			//
  2147  			// AUIPC $off_hi, Rtmp
  2148  			// S $off_lo, Rtmp, Rd
  2149  			insAUIPC := &instruction{as: AAUIPC, rd: REG_TMP}
  2150  			ins.as, ins.rd, ins.rs1, ins.rs2, ins.imm = movToStore(p.As), REG_TMP, uint32(p.From.Reg), obj.REG_NONE, 0
  2151  			inss = []*instruction{insAUIPC, ins}
  2152  
  2153  		default:
  2154  			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
  2155  			return nil
  2156  		}
  2157  
  2158  	case p.From.Type == obj.TYPE_ADDR && p.To.Type == obj.TYPE_REG:
  2159  		// MOV $sym+off(SP/SB), R
  2160  		if p.As != AMOV {
  2161  			p.Ctxt.Diag("%v: unsupported address load", p)
  2162  			return nil
  2163  		}
  2164  		switch p.From.Name {
  2165  		case obj.NAME_AUTO, obj.NAME_PARAM, obj.NAME_NONE:
  2166  			inss = instructionsForOpImmediate(p, AADDI, addrToReg(p.From))
  2167  
  2168  		case obj.NAME_EXTERN, obj.NAME_STATIC:
  2169  			// Note that the values for $off_hi and $off_lo are currently
  2170  			// zero and will be assigned during relocation.
  2171  			//
  2172  			// AUIPC $off_hi, R
  2173  			// ADDI $off_lo, R
  2174  			insAUIPC := &instruction{as: AAUIPC, rd: ins.rd}
  2175  			ins.as, ins.rs1, ins.rs2, ins.imm = AADDI, ins.rd, obj.REG_NONE, 0
  2176  			inss = []*instruction{insAUIPC, ins}
  2177  
  2178  		default:
  2179  			p.Ctxt.Diag("unsupported name %d for %v", p.From.Name, p)
  2180  			return nil
  2181  		}
  2182  
  2183  	case p.From.Type == obj.TYPE_ADDR && p.To.Type != obj.TYPE_REG:
  2184  		p.Ctxt.Diag("%v: address load must target register", p)
  2185  		return nil
  2186  
  2187  	default:
  2188  		p.Ctxt.Diag("%v: unsupported MOV", p)
  2189  		return nil
  2190  	}
  2191  
  2192  	return inss
  2193  }
  2194  
  2195  // instructionsForProg returns the machine instructions for an *obj.Prog.
  2196  func instructionsForProg(p *obj.Prog) []*instruction {
  2197  	ins := instructionForProg(p)
  2198  	inss := []*instruction{ins}
  2199  
  2200  	if len(p.RestArgs) > 1 {
  2201  		p.Ctxt.Diag("too many source registers")
  2202  		return nil
  2203  	}
  2204  
  2205  	switch ins.as {
  2206  	case AJAL, AJALR:
  2207  		ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.To.Reg), obj.REG_NONE
  2208  		ins.imm = p.To.Offset
  2209  
  2210  	case ABEQ, ABEQZ, ABGE, ABGEU, ABGEZ, ABGT, ABGTU, ABGTZ, ABLE, ABLEU, ABLEZ, ABLT, ABLTU, ABLTZ, ABNE, ABNEZ:
  2211  		switch ins.as {
  2212  		case ABEQZ:
  2213  			ins.as, ins.rs1, ins.rs2 = ABEQ, REG_ZERO, uint32(p.From.Reg)
  2214  		case ABGEZ:
  2215  			ins.as, ins.rs1, ins.rs2 = ABGE, REG_ZERO, uint32(p.From.Reg)
  2216  		case ABGT:
  2217  			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), uint32(p.Reg)
  2218  		case ABGTU:
  2219  			ins.as, ins.rs1, ins.rs2 = ABLTU, uint32(p.From.Reg), uint32(p.Reg)
  2220  		case ABGTZ:
  2221  			ins.as, ins.rs1, ins.rs2 = ABLT, uint32(p.From.Reg), REG_ZERO
  2222  		case ABLE:
  2223  			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), uint32(p.Reg)
  2224  		case ABLEU:
  2225  			ins.as, ins.rs1, ins.rs2 = ABGEU, uint32(p.From.Reg), uint32(p.Reg)
  2226  		case ABLEZ:
  2227  			ins.as, ins.rs1, ins.rs2 = ABGE, uint32(p.From.Reg), REG_ZERO
  2228  		case ABLTZ:
  2229  			ins.as, ins.rs1, ins.rs2 = ABLT, REG_ZERO, uint32(p.From.Reg)
  2230  		case ABNEZ:
  2231  			ins.as, ins.rs1, ins.rs2 = ABNE, REG_ZERO, uint32(p.From.Reg)
  2232  		}
  2233  		ins.imm = p.To.Offset
  2234  
  2235  	case AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
  2236  		inss = instructionsForMOV(p)
  2237  
  2238  	case ALW, ALWU, ALH, ALHU, ALB, ALBU, ALD, AFLW, AFLD:
  2239  		inss = instructionsForLoad(p, ins.as, p.From.Reg)
  2240  
  2241  	case ASW, ASH, ASB, ASD, AFSW, AFSD:
  2242  		inss = instructionsForStore(p, ins.as, p.To.Reg)
  2243  
  2244  	case ALRW, ALRD:
  2245  		// Set aq to use acquire access ordering
  2246  		ins.funct7 = 2
  2247  		ins.rs1, ins.rs2 = uint32(p.From.Reg), REG_ZERO
  2248  
  2249  	case AADDI, AANDI, AORI, AXORI:
  2250  		inss = instructionsForOpImmediate(p, ins.as, p.Reg)
  2251  
  2252  	case ASCW, ASCD:
  2253  		// Set release access ordering
  2254  		ins.funct7 = 1
  2255  		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
  2256  
  2257  	case AAMOSWAPW, AAMOSWAPD, AAMOADDW, AAMOADDD, AAMOANDW, AAMOANDD, AAMOORW, AAMOORD,
  2258  		AAMOXORW, AAMOXORD, AAMOMINW, AAMOMIND, AAMOMINUW, AAMOMINUD, AAMOMAXW, AAMOMAXD, AAMOMAXUW, AAMOMAXUD:
  2259  		// Set aqrl to use acquire & release access ordering
  2260  		ins.funct7 = 3
  2261  		ins.rd, ins.rs1, ins.rs2 = uint32(p.RegTo2), uint32(p.To.Reg), uint32(p.From.Reg)
  2262  
  2263  	case AECALL, AEBREAK, ARDCYCLE, ARDTIME, ARDINSTRET:
  2264  		insEnc := encode(p.As)
  2265  		if p.To.Type == obj.TYPE_NONE {
  2266  			ins.rd = REG_ZERO
  2267  		}
  2268  		ins.rs1 = REG_ZERO
  2269  		ins.imm = insEnc.csr
  2270  
  2271  	case AFENCE:
  2272  		ins.rd, ins.rs1, ins.rs2 = REG_ZERO, REG_ZERO, obj.REG_NONE
  2273  		ins.imm = 0x0ff
  2274  
  2275  	case AFCVTWS, AFCVTLS, AFCVTWUS, AFCVTLUS, AFCVTWD, AFCVTLD, AFCVTWUD, AFCVTLUD:
  2276  		// Set the rounding mode in funct3 to round to zero.
  2277  		ins.funct3 = 1
  2278  
  2279  	case AFNES, AFNED:
  2280  		// Replace FNE[SD] with FEQ[SD] and NOT.
  2281  		if p.To.Type != obj.TYPE_REG {
  2282  			p.Ctxt.Diag("%v needs an integer register output", p)
  2283  			return nil
  2284  		}
  2285  		if ins.as == AFNES {
  2286  			ins.as = AFEQS
  2287  		} else {
  2288  			ins.as = AFEQD
  2289  		}
  2290  		ins2 := &instruction{
  2291  			as:  AXORI, // [bit] xor 1 = not [bit]
  2292  			rd:  ins.rd,
  2293  			rs1: ins.rd,
  2294  			imm: 1,
  2295  		}
  2296  		inss = append(inss, ins2)
  2297  
  2298  	case AFSQRTS, AFSQRTD:
  2299  		// These instructions expect a zero (i.e. float register 0)
  2300  		// to be the second input operand.
  2301  		ins.rs1 = uint32(p.From.Reg)
  2302  		ins.rs2 = REG_F0
  2303  
  2304  	case AFMADDS, AFMSUBS, AFNMADDS, AFNMSUBS,
  2305  		AFMADDD, AFMSUBD, AFNMADDD, AFNMSUBD:
  2306  		// Swap the first two operands so that the operands are in the same
  2307  		// order as they are in the specification: RS1, RS2, RS3, RD.
  2308  		ins.rs1, ins.rs2 = ins.rs2, ins.rs1
  2309  
  2310  	case ANEG, ANEGW:
  2311  		// NEG rs, rd -> SUB rs, X0, rd
  2312  		ins.as = ASUB
  2313  		if p.As == ANEGW {
  2314  			ins.as = ASUBW
  2315  		}
  2316  		ins.rs1 = REG_ZERO
  2317  		if ins.rd == obj.REG_NONE {
  2318  			ins.rd = ins.rs2
  2319  		}
  2320  
  2321  	case ANOT:
  2322  		// NOT rs, rd -> XORI $-1, rs, rd
  2323  		ins.as = AXORI
  2324  		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
  2325  		if ins.rd == obj.REG_NONE {
  2326  			ins.rd = ins.rs1
  2327  		}
  2328  		ins.imm = -1
  2329  
  2330  	case ASEQZ:
  2331  		// SEQZ rs, rd -> SLTIU $1, rs, rd
  2332  		ins.as = ASLTIU
  2333  		ins.rs1, ins.rs2 = uint32(p.From.Reg), obj.REG_NONE
  2334  		ins.imm = 1
  2335  
  2336  	case ASNEZ:
  2337  		// SNEZ rs, rd -> SLTU rs, x0, rd
  2338  		ins.as = ASLTU
  2339  		ins.rs1 = REG_ZERO
  2340  
  2341  	case AFABSS:
  2342  		// FABSS rs, rd -> FSGNJXS rs, rs, rd
  2343  		ins.as = AFSGNJXS
  2344  		ins.rs1 = uint32(p.From.Reg)
  2345  
  2346  	case AFABSD:
  2347  		// FABSD rs, rd -> FSGNJXD rs, rs, rd
  2348  		ins.as = AFSGNJXD
  2349  		ins.rs1 = uint32(p.From.Reg)
  2350  
  2351  	case AFNEGS:
  2352  		// FNEGS rs, rd -> FSGNJNS rs, rs, rd
  2353  		ins.as = AFSGNJNS
  2354  		ins.rs1 = uint32(p.From.Reg)
  2355  
  2356  	case AFNEGD:
  2357  		// FNEGD rs, rd -> FSGNJND rs, rs, rd
  2358  		ins.as = AFSGNJND
  2359  		ins.rs1 = uint32(p.From.Reg)
  2360  
  2361  	case ASLLI, ASRLI, ASRAI:
  2362  		if ins.imm < 0 || ins.imm > 63 {
  2363  			p.Ctxt.Diag("%v: shift amount out of range 0 to 63", p)
  2364  		}
  2365  
  2366  	case ASLLIW, ASRLIW, ASRAIW:
  2367  		if ins.imm < 0 || ins.imm > 31 {
  2368  			p.Ctxt.Diag("%v: shift amount out of range 0 to 31", p)
  2369  		}
  2370  	}
  2371  
  2372  	for _, ins := range inss {
  2373  		ins.p = p
  2374  	}
  2375  
  2376  	return inss
  2377  }
  2378  
  2379  // assemble emits machine code.
  2380  // It is called at the very end of the assembly process.
  2381  func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
  2382  	if ctxt.Retpoline {
  2383  		ctxt.Diag("-spectre=ret not supported on riscv")
  2384  		ctxt.Retpoline = false // don't keep printing
  2385  	}
  2386  
  2387  	// If errors were encountered during preprocess/validation, proceeding
  2388  	// and attempting to encode said instructions will only lead to panics.
  2389  	if ctxt.Errors > 0 {
  2390  		return
  2391  	}
  2392  
  2393  	for p := cursym.Func().Text; p != nil; p = p.Link {
  2394  		switch p.As {
  2395  		case AJAL:
  2396  			if p.Mark&NEED_JAL_RELOC == NEED_JAL_RELOC {
  2397  				rel := obj.Addrel(cursym)
  2398  				rel.Off = int32(p.Pc)
  2399  				rel.Siz = 4
  2400  				rel.Sym = p.To.Sym
  2401  				rel.Add = p.To.Offset
  2402  				rel.Type = objabi.R_RISCV_JAL
  2403  			}
  2404  		case AJALR:
  2405  			if p.To.Sym != nil {
  2406  				ctxt.Diag("%v: unexpected AJALR with to symbol", p)
  2407  			}
  2408  
  2409  		case AAUIPC, AMOV, AMOVB, AMOVH, AMOVW, AMOVBU, AMOVHU, AMOVWU, AMOVF, AMOVD:
  2410  			var addr *obj.Addr
  2411  			var rt objabi.RelocType
  2412  			if p.Mark&NEED_CALL_RELOC == NEED_CALL_RELOC {
  2413  				rt = objabi.R_RISCV_CALL
  2414  				addr = &p.From
  2415  			} else if p.Mark&NEED_PCREL_ITYPE_RELOC == NEED_PCREL_ITYPE_RELOC {
  2416  				rt = objabi.R_RISCV_PCREL_ITYPE
  2417  				addr = &p.From
  2418  			} else if p.Mark&NEED_PCREL_STYPE_RELOC == NEED_PCREL_STYPE_RELOC {
  2419  				rt = objabi.R_RISCV_PCREL_STYPE
  2420  				addr = &p.To
  2421  			} else {
  2422  				break
  2423  			}
  2424  			if p.As == AAUIPC {
  2425  				if p.Link == nil {
  2426  					ctxt.Diag("AUIPC needing PC-relative reloc missing following instruction")
  2427  					break
  2428  				}
  2429  				addr = &p.RestArgs[0].Addr
  2430  			}
  2431  			if addr.Sym == nil {
  2432  				ctxt.Diag("PC-relative relocation missing symbol")
  2433  				break
  2434  			}
  2435  			if addr.Sym.Type == objabi.STLSBSS {
  2436  				if ctxt.Flag_shared {
  2437  					rt = objabi.R_RISCV_TLS_IE
  2438  				} else {
  2439  					rt = objabi.R_RISCV_TLS_LE
  2440  				}
  2441  			}
  2442  
  2443  			rel := obj.Addrel(cursym)
  2444  			rel.Off = int32(p.Pc)
  2445  			rel.Siz = 8
  2446  			rel.Sym = addr.Sym
  2447  			rel.Add = addr.Offset
  2448  			rel.Type = rt
  2449  
  2450  		case obj.APCALIGN:
  2451  			alignedValue := p.From.Offset
  2452  			v := pcAlignPadLength(p.Pc, alignedValue)
  2453  			offset := p.Pc
  2454  			for ; v >= 4; v -= 4 {
  2455  				// NOP
  2456  				cursym.WriteBytes(ctxt, offset, []byte{0x13, 0, 0, 0})
  2457  				offset += 4
  2458  			}
  2459  			continue
  2460  		}
  2461  
  2462  		offset := p.Pc
  2463  		for _, ins := range instructionsForProg(p) {
  2464  			if ic, err := ins.encode(); err == nil {
  2465  				cursym.WriteInt(ctxt, offset, ins.length(), int64(ic))
  2466  				offset += int64(ins.length())
  2467  			}
  2468  			if ins.usesRegTmp() {
  2469  				p.Mark |= USES_REG_TMP
  2470  			}
  2471  		}
  2472  	}
  2473  
  2474  	obj.MarkUnsafePoints(ctxt, cursym.Func().Text, newprog, isUnsafePoint, nil)
  2475  }
  2476  
  2477  func isUnsafePoint(p *obj.Prog) bool {
  2478  	return p.Mark&USES_REG_TMP == USES_REG_TMP || p.From.Reg == REG_TMP || p.To.Reg == REG_TMP || p.Reg == REG_TMP
  2479  }
  2480  
  2481  var LinkRISCV64 = obj.LinkArch{
  2482  	Arch:           sys.ArchRISCV64,
  2483  	Init:           buildop,
  2484  	Preprocess:     preprocess,
  2485  	Assemble:       assemble,
  2486  	Progedit:       progedit,
  2487  	UnaryDst:       unaryDst,
  2488  	DWARFRegisters: RISCV64DWARFRegisters,
  2489  }
  2490  

View as plain text