Black Lives Matter. Support the Equal Justice Initiative.

Text file src/runtime/asm_amd64.s

Documentation: runtime

     1// Copyright 2009 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "go_tls.h"
     7#include "funcdata.h"
     8#include "textflag.h"
     9
    10// _rt0_amd64 is common startup code for most amd64 systems when using
    11// internal linking. This is the entry point for the program from the
    12// kernel for an ordinary -buildmode=exe program. The stack holds the
    13// number of arguments and the C-style argv.
    14TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15	MOVQ	0(SP), DI	// argc
    16	LEAQ	8(SP), SI	// argv
    17	JMP	runtime·rt0_go(SB)
    18
    19// main is common startup code for most amd64 systems when using
    20// external linking. The C startup code will call the symbol "main"
    21// passing argc and argv in the usual C ABI registers DI and SI.
    22TEXT main(SB),NOSPLIT,$-8
    23	JMP	runtime·rt0_go(SB)
    24
    25// _rt0_amd64_lib is common startup code for most amd64 systems when
    26// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27// arrange to invoke this function as a global constructor (for
    28// c-archive) or when the shared library is loaded (for c-shared).
    29// We expect argc and argv to be passed in the usual C ABI registers
    30// DI and SI.
    31TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32	// Align stack per ELF ABI requirements.
    33	MOVQ	SP, AX
    34	ANDQ	$~15, SP
    35	// Save C ABI callee-saved registers, as caller may need them.
    36	MOVQ	BX, 0x10(SP)
    37	MOVQ	BP, 0x18(SP)
    38	MOVQ	R12, 0x20(SP)
    39	MOVQ	R13, 0x28(SP)
    40	MOVQ	R14, 0x30(SP)
    41	MOVQ	R15, 0x38(SP)
    42	MOVQ	AX, 0x40(SP)
    43
    44	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46
    47	// Synchronous initialization.
    48	CALL	runtime·libpreinit(SB)
    49
    50	// Create a new thread to finish Go runtime initialization.
    51	MOVQ	_cgo_sys_thread_create(SB), AX
    52	TESTQ	AX, AX
    53	JZ	nocgo
    54	MOVQ	$_rt0_amd64_lib_go(SB), DI
    55	MOVQ	$0, SI
    56	CALL	AX
    57	JMP	restore
    58
    59nocgo:
    60	MOVQ	$0x800000, 0(SP)		// stacksize
    61	MOVQ	$_rt0_amd64_lib_go(SB), AX
    62	MOVQ	AX, 8(SP)			// fn
    63	CALL	runtime·newosproc0(SB)
    64
    65restore:
    66	MOVQ	0x10(SP), BX
    67	MOVQ	0x18(SP), BP
    68	MOVQ	0x20(SP), R12
    69	MOVQ	0x28(SP), R13
    70	MOVQ	0x30(SP), R14
    71	MOVQ	0x38(SP), R15
    72	MOVQ	0x40(SP), SP
    73	RET
    74
    75// _rt0_amd64_lib_go initializes the Go runtime.
    76// This is started in a separate thread by _rt0_amd64_lib.
    77TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80	JMP	runtime·rt0_go(SB)
    81
    82DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86
    87// Defined as ABIInternal since it does not use the stack-based Go ABI (and
    88// in addition there are no calls to this entry point from Go code).
    89TEXT runtime·rt0_go<ABIInternal>(SB),NOSPLIT,$0
    90	// copy arguments forward on an even stack
    91	MOVQ	DI, AX		// argc
    92	MOVQ	SI, BX		// argv
    93	SUBQ	$(4*8+7), SP		// 2args 2auto
    94	ANDQ	$~15, SP
    95	MOVQ	AX, 16(SP)
    96	MOVQ	BX, 24(SP)
    97
    98	// create istack out of the given (operating system) stack.
    99	// _cgo_init may update stackguard.
   100	MOVQ	$runtime·g0(SB), DI
   101	LEAQ	(-64*1024+104)(SP), BX
   102	MOVQ	BX, g_stackguard0(DI)
   103	MOVQ	BX, g_stackguard1(DI)
   104	MOVQ	BX, (g_stack+stack_lo)(DI)
   105	MOVQ	SP, (g_stack+stack_hi)(DI)
   106
   107	// find out information about the processor we're on
   108	MOVL	$0, AX
   109	CPUID
   110	MOVL	AX, SI
   111	CMPL	AX, $0
   112	JE	nocpuinfo
   113
   114	// Figure out how to serialize RDTSC.
   115	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   116	// Don't know about the rest, so let's do MFENCE.
   117	CMPL	BX, $0x756E6547  // "Genu"
   118	JNE	notintel
   119	CMPL	DX, $0x49656E69  // "ineI"
   120	JNE	notintel
   121	CMPL	CX, $0x6C65746E  // "ntel"
   122	JNE	notintel
   123	MOVB	$1, runtime·isIntel(SB)
   124	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   125notintel:
   126
   127	// Load EAX=1 cpuid flags
   128	MOVL	$1, AX
   129	CPUID
   130	MOVL	AX, runtime·processorVersionInfo(SB)
   131
   132nocpuinfo:
   133	// if there is an _cgo_init, call it.
   134	MOVQ	_cgo_init(SB), AX
   135	TESTQ	AX, AX
   136	JZ	needtls
   137	// arg 1: g0, already in DI
   138	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   139#ifdef GOOS_android
   140	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   141	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   142	// Compensate for tls_g (+16).
   143	MOVQ	-16(TLS), CX
   144#else
   145	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   146	MOVQ	$0, CX
   147#endif
   148#ifdef GOOS_windows
   149	// Adjust for the Win64 calling convention.
   150	MOVQ	CX, R9 // arg 4
   151	MOVQ	DX, R8 // arg 3
   152	MOVQ	SI, DX // arg 2
   153	MOVQ	DI, CX // arg 1
   154#endif
   155	CALL	AX
   156
   157	// update stackguard after _cgo_init
   158	MOVQ	$runtime·g0(SB), CX
   159	MOVQ	(g_stack+stack_lo)(CX), AX
   160	ADDQ	$const__StackGuard, AX
   161	MOVQ	AX, g_stackguard0(CX)
   162	MOVQ	AX, g_stackguard1(CX)
   163
   164#ifndef GOOS_windows
   165	JMP ok
   166#endif
   167needtls:
   168#ifdef GOOS_plan9
   169	// skip TLS setup on Plan 9
   170	JMP ok
   171#endif
   172#ifdef GOOS_solaris
   173	// skip TLS setup on Solaris
   174	JMP ok
   175#endif
   176#ifdef GOOS_illumos
   177	// skip TLS setup on illumos
   178	JMP ok
   179#endif
   180#ifdef GOOS_darwin
   181	// skip TLS setup on Darwin
   182	JMP ok
   183#endif
   184#ifdef GOOS_openbsd
   185	// skip TLS setup on OpenBSD
   186	JMP ok
   187#endif
   188
   189	LEAQ	runtime·m0+m_tls(SB), DI
   190	CALL	runtime·settls(SB)
   191
   192	// store through it, to make sure it works
   193	get_tls(BX)
   194	MOVQ	$0x123, g(BX)
   195	MOVQ	runtime·m0+m_tls(SB), AX
   196	CMPQ	AX, $0x123
   197	JEQ 2(PC)
   198	CALL	runtime·abort(SB)
   199ok:
   200	// set the per-goroutine and per-mach "registers"
   201	get_tls(BX)
   202	LEAQ	runtime·g0(SB), CX
   203	MOVQ	CX, g(BX)
   204	LEAQ	runtime·m0(SB), AX
   205
   206	// save m->g0 = g0
   207	MOVQ	CX, m_g0(AX)
   208	// save m0 to g0->m
   209	MOVQ	AX, g_m(CX)
   210
   211	CLD				// convention is D is always left cleared
   212	CALL	runtime·check(SB)
   213
   214	MOVL	16(SP), AX		// copy argc
   215	MOVL	AX, 0(SP)
   216	MOVQ	24(SP), AX		// copy argv
   217	MOVQ	AX, 8(SP)
   218	CALL	runtime·args(SB)
   219	CALL	runtime·osinit(SB)
   220	CALL	runtime·schedinit(SB)
   221
   222	// create a new goroutine to start program
   223	MOVQ	$runtime·mainPC(SB), AX		// entry
   224	PUSHQ	AX
   225	PUSHQ	$0			// arg size
   226	CALL	runtime·newproc(SB)
   227	POPQ	AX
   228	POPQ	AX
   229
   230	// start this M
   231	CALL	runtime·mstart(SB)
   232
   233	CALL	runtime·abort(SB)	// mstart should never return
   234	RET
   235
   236	// Prevent dead-code elimination of debugCallV1, which is
   237	// intended to be called by debuggers.
   238	MOVQ	$runtime·debugCallV1<ABIInternal>(SB), AX
   239	RET
   240
   241// mainPC is a function value for runtime.main, to be passed to newproc.
   242// The reference to runtime.main is made via ABIInternal, since the
   243// actual function (not the ABI0 wrapper) is needed by newproc.
   244DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   245GLOBL	runtime·mainPC(SB),RODATA,$8
   246
   247TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   248	BYTE	$0xcc
   249	RET
   250
   251TEXT runtime·asminit(SB),NOSPLIT,$0-0
   252	// No per-thread init.
   253	RET
   254
   255/*
   256 *  go-routine
   257 */
   258
   259// func gosave(buf *gobuf)
   260// save state in Gobuf; setjmp
   261TEXT runtime·gosave(SB), NOSPLIT, $0-8
   262	MOVQ	buf+0(FP), AX		// gobuf
   263	LEAQ	buf+0(FP), BX		// caller's SP
   264	MOVQ	BX, gobuf_sp(AX)
   265	MOVQ	0(SP), BX		// caller's PC
   266	MOVQ	BX, gobuf_pc(AX)
   267	MOVQ	$0, gobuf_ret(AX)
   268	MOVQ	BP, gobuf_bp(AX)
   269	// Assert ctxt is zero. See func save.
   270	MOVQ	gobuf_ctxt(AX), BX
   271	TESTQ	BX, BX
   272	JZ	2(PC)
   273	CALL	runtime·badctxt(SB)
   274	get_tls(CX)
   275	MOVQ	g(CX), BX
   276	MOVQ	BX, gobuf_g(AX)
   277	RET
   278
   279// func gogo(buf *gobuf)
   280// restore state from Gobuf; longjmp
   281TEXT runtime·gogo(SB), NOSPLIT, $16-8
   282	MOVQ	buf+0(FP), BX		// gobuf
   283	MOVQ	gobuf_g(BX), DX
   284	MOVQ	0(DX), CX		// make sure g != nil
   285	get_tls(CX)
   286	MOVQ	DX, g(CX)
   287	MOVQ	gobuf_sp(BX), SP	// restore SP
   288	MOVQ	gobuf_ret(BX), AX
   289	MOVQ	gobuf_ctxt(BX), DX
   290	MOVQ	gobuf_bp(BX), BP
   291	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   292	MOVQ	$0, gobuf_ret(BX)
   293	MOVQ	$0, gobuf_ctxt(BX)
   294	MOVQ	$0, gobuf_bp(BX)
   295	MOVQ	gobuf_pc(BX), BX
   296	JMP	BX
   297
   298// func mcall(fn func(*g))
   299// Switch to m->g0's stack, call fn(g).
   300// Fn must never return. It should gogo(&g->sched)
   301// to keep running g.
   302TEXT runtime·mcall(SB), NOSPLIT, $0-8
   303	MOVQ	fn+0(FP), DI
   304
   305	get_tls(CX)
   306	MOVQ	g(CX), AX	// save state in g->sched
   307	MOVQ	0(SP), BX	// caller's PC
   308	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   309	LEAQ	fn+0(FP), BX	// caller's SP
   310	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   311	MOVQ	AX, (g_sched+gobuf_g)(AX)
   312	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   313
   314	// switch to m->g0 & its stack, call fn
   315	MOVQ	g(CX), BX
   316	MOVQ	g_m(BX), BX
   317	MOVQ	m_g0(BX), SI
   318	CMPQ	SI, AX	// if g == m->g0 call badmcall
   319	JNE	3(PC)
   320	MOVQ	$runtime·badmcall(SB), AX
   321	JMP	AX
   322	MOVQ	SI, g(CX)	// g = m->g0
   323	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   324	PUSHQ	AX
   325	MOVQ	DI, DX
   326	MOVQ	0(DI), DI
   327	CALL	DI
   328	POPQ	AX
   329	MOVQ	$runtime·badmcall2(SB), AX
   330	JMP	AX
   331	RET
   332
   333// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   334// of the G stack. We need to distinguish the routine that
   335// lives at the bottom of the G stack from the one that lives
   336// at the top of the system stack because the one at the top of
   337// the system stack terminates the stack walk (see topofstack()).
   338TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   339	RET
   340
   341// func systemstack(fn func())
   342TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   343	MOVQ	fn+0(FP), DI	// DI = fn
   344	get_tls(CX)
   345	MOVQ	g(CX), AX	// AX = g
   346	MOVQ	g_m(AX), BX	// BX = m
   347
   348	CMPQ	AX, m_gsignal(BX)
   349	JEQ	noswitch
   350
   351	MOVQ	m_g0(BX), DX	// DX = g0
   352	CMPQ	AX, DX
   353	JEQ	noswitch
   354
   355	CMPQ	AX, m_curg(BX)
   356	JNE	bad
   357
   358	// switch stacks
   359	// save our state in g->sched. Pretend to
   360	// be systemstack_switch if the G stack is scanned.
   361	MOVQ	$runtime·systemstack_switch(SB), SI
   362	MOVQ	SI, (g_sched+gobuf_pc)(AX)
   363	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   364	MOVQ	AX, (g_sched+gobuf_g)(AX)
   365	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   366
   367	// switch to g0
   368	MOVQ	DX, g(CX)
   369	MOVQ	(g_sched+gobuf_sp)(DX), BX
   370	// make it look like mstart called systemstack on g0, to stop traceback
   371	SUBQ	$8, BX
   372	MOVQ	$runtime·mstart(SB), DX
   373	MOVQ	DX, 0(BX)
   374	MOVQ	BX, SP
   375
   376	// call target function
   377	MOVQ	DI, DX
   378	MOVQ	0(DI), DI
   379	CALL	DI
   380
   381	// switch back to g
   382	get_tls(CX)
   383	MOVQ	g(CX), AX
   384	MOVQ	g_m(AX), BX
   385	MOVQ	m_curg(BX), AX
   386	MOVQ	AX, g(CX)
   387	MOVQ	(g_sched+gobuf_sp)(AX), SP
   388	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   389	RET
   390
   391noswitch:
   392	// already on m stack; tail call the function
   393	// Using a tail call here cleans up tracebacks since we won't stop
   394	// at an intermediate systemstack.
   395	MOVQ	DI, DX
   396	MOVQ	0(DI), DI
   397	JMP	DI
   398
   399bad:
   400	// Bad: g is not gsignal, not g0, not curg. What is it?
   401	MOVQ	$runtime·badsystemstack(SB), AX
   402	CALL	AX
   403	INT	$3
   404
   405
   406/*
   407 * support for morestack
   408 */
   409
   410// Called during function prolog when more stack is needed.
   411//
   412// The traceback routines see morestack on a g0 as being
   413// the top of a stack (for example, morestack calling newstack
   414// calling the scheduler calling newm calling gc), so we must
   415// record an argument size. For that purpose, it has no arguments.
   416TEXT runtime·morestack(SB),NOSPLIT,$0-0
   417	// Cannot grow scheduler stack (m->g0).
   418	get_tls(CX)
   419	MOVQ	g(CX), BX
   420	MOVQ	g_m(BX), BX
   421	MOVQ	m_g0(BX), SI
   422	CMPQ	g(CX), SI
   423	JNE	3(PC)
   424	CALL	runtime·badmorestackg0(SB)
   425	CALL	runtime·abort(SB)
   426
   427	// Cannot grow signal stack (m->gsignal).
   428	MOVQ	m_gsignal(BX), SI
   429	CMPQ	g(CX), SI
   430	JNE	3(PC)
   431	CALL	runtime·badmorestackgsignal(SB)
   432	CALL	runtime·abort(SB)
   433
   434	// Called from f.
   435	// Set m->morebuf to f's caller.
   436	NOP	SP	// tell vet SP changed - stop checking offsets
   437	MOVQ	8(SP), AX	// f's caller's PC
   438	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   439	LEAQ	16(SP), AX	// f's caller's SP
   440	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   441	get_tls(CX)
   442	MOVQ	g(CX), SI
   443	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   444
   445	// Set g->sched to context in f.
   446	MOVQ	0(SP), AX // f's PC
   447	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   448	MOVQ	SI, (g_sched+gobuf_g)(SI)
   449	LEAQ	8(SP), AX // f's SP
   450	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   451	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   452	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   453
   454	// Call newstack on m->g0's stack.
   455	MOVQ	m_g0(BX), BX
   456	MOVQ	BX, g(CX)
   457	MOVQ	(g_sched+gobuf_sp)(BX), SP
   458	CALL	runtime·newstack(SB)
   459	CALL	runtime·abort(SB)	// crash if newstack returns
   460	RET
   461
   462// morestack but not preserving ctxt.
   463TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   464	MOVL	$0, DX
   465	JMP	runtime·morestack(SB)
   466
   467// reflectcall: call a function with the given argument list
   468// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   469// we don't have variable-sized frames, so we use a small number
   470// of constant-sized-frame functions to encode a few bits of size in the pc.
   471// Caution: ugly multiline assembly macros in your future!
   472
   473#define DISPATCH(NAME,MAXSIZE)		\
   474	CMPQ	CX, $MAXSIZE;		\
   475	JA	3(PC);			\
   476	MOVQ	$NAME(SB), AX;		\
   477	JMP	AX
   478// Note: can't just "JMP NAME(SB)" - bad inlining results.
   479
   480TEXT ·reflectcall<ABIInternal>(SB), NOSPLIT, $0-32
   481	MOVLQZX argsize+24(FP), CX
   482	DISPATCH(runtime·call16, 16)
   483	DISPATCH(runtime·call32, 32)
   484	DISPATCH(runtime·call64, 64)
   485	DISPATCH(runtime·call128, 128)
   486	DISPATCH(runtime·call256, 256)
   487	DISPATCH(runtime·call512, 512)
   488	DISPATCH(runtime·call1024, 1024)
   489	DISPATCH(runtime·call2048, 2048)
   490	DISPATCH(runtime·call4096, 4096)
   491	DISPATCH(runtime·call8192, 8192)
   492	DISPATCH(runtime·call16384, 16384)
   493	DISPATCH(runtime·call32768, 32768)
   494	DISPATCH(runtime·call65536, 65536)
   495	DISPATCH(runtime·call131072, 131072)
   496	DISPATCH(runtime·call262144, 262144)
   497	DISPATCH(runtime·call524288, 524288)
   498	DISPATCH(runtime·call1048576, 1048576)
   499	DISPATCH(runtime·call2097152, 2097152)
   500	DISPATCH(runtime·call4194304, 4194304)
   501	DISPATCH(runtime·call8388608, 8388608)
   502	DISPATCH(runtime·call16777216, 16777216)
   503	DISPATCH(runtime·call33554432, 33554432)
   504	DISPATCH(runtime·call67108864, 67108864)
   505	DISPATCH(runtime·call134217728, 134217728)
   506	DISPATCH(runtime·call268435456, 268435456)
   507	DISPATCH(runtime·call536870912, 536870912)
   508	DISPATCH(runtime·call1073741824, 1073741824)
   509	MOVQ	$runtime·badreflectcall(SB), AX
   510	JMP	AX
   511
   512#define CALLFN(NAME,MAXSIZE)			\
   513TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   514	NO_LOCAL_POINTERS;			\
   515	/* copy arguments to stack */		\
   516	MOVQ	argptr+16(FP), SI;		\
   517	MOVLQZX argsize+24(FP), CX;		\
   518	MOVQ	SP, DI;				\
   519	REP;MOVSB;				\
   520	/* call function */			\
   521	MOVQ	f+8(FP), DX;			\
   522	PCDATA  $PCDATA_StackMapIndex, $0;	\
   523	MOVQ	(DX), AX;			\
   524	CALL	AX;				\
   525	/* copy return values back */		\
   526	MOVQ	argtype+0(FP), DX;		\
   527	MOVQ	argptr+16(FP), DI;		\
   528	MOVLQZX	argsize+24(FP), CX;		\
   529	MOVLQZX	retoffset+28(FP), BX;		\
   530	MOVQ	SP, SI;				\
   531	ADDQ	BX, DI;				\
   532	ADDQ	BX, SI;				\
   533	SUBQ	BX, CX;				\
   534	CALL	callRet<>(SB);			\
   535	RET
   536
   537// callRet copies return values back at the end of call*. This is a
   538// separate function so it can allocate stack space for the arguments
   539// to reflectcallmove. It does not follow the Go ABI; it expects its
   540// arguments in registers.
   541TEXT callRet<>(SB), NOSPLIT, $32-0
   542	NO_LOCAL_POINTERS
   543	MOVQ	DX, 0(SP)
   544	MOVQ	DI, 8(SP)
   545	MOVQ	SI, 16(SP)
   546	MOVQ	CX, 24(SP)
   547	CALL	runtime·reflectcallmove(SB)
   548	RET
   549
   550CALLFN(·call16, 16)
   551CALLFN(·call32, 32)
   552CALLFN(·call64, 64)
   553CALLFN(·call128, 128)
   554CALLFN(·call256, 256)
   555CALLFN(·call512, 512)
   556CALLFN(·call1024, 1024)
   557CALLFN(·call2048, 2048)
   558CALLFN(·call4096, 4096)
   559CALLFN(·call8192, 8192)
   560CALLFN(·call16384, 16384)
   561CALLFN(·call32768, 32768)
   562CALLFN(·call65536, 65536)
   563CALLFN(·call131072, 131072)
   564CALLFN(·call262144, 262144)
   565CALLFN(·call524288, 524288)
   566CALLFN(·call1048576, 1048576)
   567CALLFN(·call2097152, 2097152)
   568CALLFN(·call4194304, 4194304)
   569CALLFN(·call8388608, 8388608)
   570CALLFN(·call16777216, 16777216)
   571CALLFN(·call33554432, 33554432)
   572CALLFN(·call67108864, 67108864)
   573CALLFN(·call134217728, 134217728)
   574CALLFN(·call268435456, 268435456)
   575CALLFN(·call536870912, 536870912)
   576CALLFN(·call1073741824, 1073741824)
   577
   578TEXT runtime·procyield(SB),NOSPLIT,$0-0
   579	MOVL	cycles+0(FP), AX
   580again:
   581	PAUSE
   582	SUBL	$1, AX
   583	JNZ	again
   584	RET
   585
   586
   587TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   588	// Stores are already ordered on x86, so this is just a
   589	// compile barrier.
   590	RET
   591
   592// func jmpdefer(fv *funcval, argp uintptr)
   593// argp is a caller SP.
   594// called from deferreturn.
   595// 1. pop the caller
   596// 2. sub 5 bytes from the callers return
   597// 3. jmp to the argument
   598TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   599	MOVQ	fv+0(FP), DX	// fn
   600	MOVQ	argp+8(FP), BX	// caller sp
   601	LEAQ	-8(BX), SP	// caller sp after CALL
   602	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   603	SUBQ	$5, (SP)	// return to CALL again
   604	MOVQ	0(DX), BX
   605	JMP	BX	// but first run the deferred function
   606
   607// Save state of caller into g->sched. Smashes R8, R9.
   608TEXT gosave<>(SB),NOSPLIT,$0
   609	get_tls(R8)
   610	MOVQ	g(R8), R8
   611	MOVQ	0(SP), R9
   612	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   613	LEAQ	8(SP), R9
   614	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   615	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   616	MOVQ	BP, (g_sched+gobuf_bp)(R8)
   617	// Assert ctxt is zero. See func save.
   618	MOVQ	(g_sched+gobuf_ctxt)(R8), R9
   619	TESTQ	R9, R9
   620	JZ	2(PC)
   621	CALL	runtime·badctxt(SB)
   622	RET
   623
   624// func asmcgocall(fn, arg unsafe.Pointer) int32
   625// Call fn(arg) on the scheduler stack,
   626// aligned appropriately for the gcc ABI.
   627// See cgocall.go for more details.
   628TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   629	MOVQ	fn+0(FP), AX
   630	MOVQ	arg+8(FP), BX
   631
   632	MOVQ	SP, DX
   633
   634	// Figure out if we need to switch to m->g0 stack.
   635	// We get called to create new OS threads too, and those
   636	// come in on the m->g0 stack already.
   637	get_tls(CX)
   638	MOVQ	g(CX), R8
   639	CMPQ	R8, $0
   640	JEQ	nosave
   641	MOVQ	g_m(R8), R8
   642	MOVQ	m_g0(R8), SI
   643	MOVQ	g(CX), DI
   644	CMPQ	SI, DI
   645	JEQ	nosave
   646	MOVQ	m_gsignal(R8), SI
   647	CMPQ	SI, DI
   648	JEQ	nosave
   649
   650	// Switch to system stack.
   651	MOVQ	m_g0(R8), SI
   652	CALL	gosave<>(SB)
   653	MOVQ	SI, g(CX)
   654	MOVQ	(g_sched+gobuf_sp)(SI), SP
   655
   656	// Now on a scheduling stack (a pthread-created stack).
   657	// Make sure we have enough room for 4 stack-backed fast-call
   658	// registers as per windows amd64 calling convention.
   659	SUBQ	$64, SP
   660	ANDQ	$~15, SP	// alignment for gcc ABI
   661	MOVQ	DI, 48(SP)	// save g
   662	MOVQ	(g_stack+stack_hi)(DI), DI
   663	SUBQ	DX, DI
   664	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   665	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   666	MOVQ	BX, CX		// CX = first argument in Win64
   667	CALL	AX
   668
   669	// Restore registers, g, stack pointer.
   670	get_tls(CX)
   671	MOVQ	48(SP), DI
   672	MOVQ	(g_stack+stack_hi)(DI), SI
   673	SUBQ	40(SP), SI
   674	MOVQ	DI, g(CX)
   675	MOVQ	SI, SP
   676
   677	MOVL	AX, ret+16(FP)
   678	RET
   679
   680nosave:
   681	// Running on a system stack, perhaps even without a g.
   682	// Having no g can happen during thread creation or thread teardown
   683	// (see needm/dropm on Solaris, for example).
   684	// This code is like the above sequence but without saving/restoring g
   685	// and without worrying about the stack moving out from under us
   686	// (because we're on a system stack, not a goroutine stack).
   687	// The above code could be used directly if already on a system stack,
   688	// but then the only path through this code would be a rare case on Solaris.
   689	// Using this code for all "already on system stack" calls exercises it more,
   690	// which should help keep it correct.
   691	SUBQ	$64, SP
   692	ANDQ	$~15, SP
   693	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   694	MOVQ	DX, 40(SP)	// save original stack pointer
   695	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   696	MOVQ	BX, CX		// CX = first argument in Win64
   697	CALL	AX
   698	MOVQ	40(SP), SI	// restore original stack pointer
   699	MOVQ	SI, SP
   700	MOVL	AX, ret+16(FP)
   701	RET
   702
   703// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
   704// See cgocall.go for more details.
   705TEXT ·cgocallback(SB),NOSPLIT,$24-24
   706	NO_LOCAL_POINTERS
   707
   708	// If g is nil, Go did not create the current thread.
   709	// Call needm to obtain one m for temporary use.
   710	// In this case, we're running on the thread stack, so there's
   711	// lots of space, but the linker doesn't know. Hide the call from
   712	// the linker analysis by using an indirect call through AX.
   713	get_tls(CX)
   714#ifdef GOOS_windows
   715	MOVL	$0, BX
   716	CMPQ	CX, $0
   717	JEQ	2(PC)
   718#endif
   719	MOVQ	g(CX), BX
   720	CMPQ	BX, $0
   721	JEQ	needm
   722	MOVQ	g_m(BX), BX
   723	MOVQ	BX, savedm-8(SP)	// saved copy of oldm
   724	JMP	havem
   725needm:
   726	MOVQ    $runtime·needm(SB), AX
   727	CALL	AX
   728	MOVQ	$0, savedm-8(SP) // dropm on return
   729	get_tls(CX)
   730	MOVQ	g(CX), BX
   731	MOVQ	g_m(BX), BX
   732
   733	// Set m->sched.sp = SP, so that if a panic happens
   734	// during the function we are about to execute, it will
   735	// have a valid SP to run on the g0 stack.
   736	// The next few lines (after the havem label)
   737	// will save this SP onto the stack and then write
   738	// the same SP back to m->sched.sp. That seems redundant,
   739	// but if an unrecovered panic happens, unwindm will
   740	// restore the g->sched.sp from the stack location
   741	// and then systemstack will try to use it. If we don't set it here,
   742	// that restored SP will be uninitialized (typically 0) and
   743	// will not be usable.
   744	MOVQ	m_g0(BX), SI
   745	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   746
   747havem:
   748	// Now there's a valid m, and we're running on its m->g0.
   749	// Save current m->g0->sched.sp on stack and then set it to SP.
   750	// Save current sp in m->g0->sched.sp in preparation for
   751	// switch back to m->curg stack.
   752	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   753	MOVQ	m_g0(BX), SI
   754	MOVQ	(g_sched+gobuf_sp)(SI), AX
   755	MOVQ	AX, 0(SP)
   756	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   757
   758	// Switch to m->curg stack and call runtime.cgocallbackg.
   759	// Because we are taking over the execution of m->curg
   760	// but *not* resuming what had been running, we need to
   761	// save that information (m->curg->sched) so we can restore it.
   762	// We can restore m->curg->sched.sp easily, because calling
   763	// runtime.cgocallbackg leaves SP unchanged upon return.
   764	// To save m->curg->sched.pc, we push it onto the curg stack and
   765	// open a frame the same size as cgocallback's g0 frame.
   766	// Once we switch to the curg stack, the pushed PC will appear
   767	// to be the return PC of cgocallback, so that the traceback
   768	// will seamlessly trace back into the earlier calls.
   769	MOVQ	m_curg(BX), SI
   770	MOVQ	SI, g(CX)
   771	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   772	MOVQ	(g_sched+gobuf_pc)(SI), BX
   773	MOVQ	BX, -8(DI)  // "push" return PC on the g stack
   774	// Gather our arguments into registers.
   775	MOVQ	fn+0(FP), BX
   776	MOVQ	frame+8(FP), CX
   777	MOVQ	ctxt+16(FP), DX
   778	// Compute the size of the frame, including return PC and, if
   779	// GOEXPERIMENT=framepointer, the saved base pointer
   780	LEAQ	fn+0(FP), AX
   781	SUBQ	SP, AX   // AX is our actual frame size
   782	SUBQ	AX, DI   // Allocate the same frame size on the g stack
   783	MOVQ	DI, SP
   784
   785	MOVQ	BX, 0(SP)
   786	MOVQ	CX, 8(SP)
   787	MOVQ	DX, 16(SP)
   788	CALL	runtime·cgocallbackg(SB)
   789
   790	// Compute the size of the frame again. FP and SP have
   791	// completely different values here than they did above,
   792	// but only their difference matters.
   793	LEAQ	fn+0(FP), AX
   794	SUBQ	SP, AX
   795
   796	// Restore g->sched (== m->curg->sched) from saved values.
   797	get_tls(CX)
   798	MOVQ	g(CX), SI
   799	MOVQ	SP, DI
   800	ADDQ	AX, DI
   801	MOVQ	-8(DI), BX
   802	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   803	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   804
   805	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   806	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   807	// so we do not have to restore it.)
   808	MOVQ	g(CX), BX
   809	MOVQ	g_m(BX), BX
   810	MOVQ	m_g0(BX), SI
   811	MOVQ	SI, g(CX)
   812	MOVQ	(g_sched+gobuf_sp)(SI), SP
   813	MOVQ	0(SP), AX
   814	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   815
   816	// If the m on entry was nil, we called needm above to borrow an m
   817	// for the duration of the call. Since the call is over, return it with dropm.
   818	MOVQ	savedm-8(SP), BX
   819	CMPQ	BX, $0
   820	JNE 3(PC)
   821	MOVQ	$runtime·dropm(SB), AX
   822	CALL	AX
   823
   824	// Done!
   825	RET
   826
   827// func setg(gg *g)
   828// set g. for use by needm.
   829TEXT runtime·setg(SB), NOSPLIT, $0-8
   830	MOVQ	gg+0(FP), BX
   831#ifdef GOOS_windows
   832	CMPQ	BX, $0
   833	JNE	settls
   834	MOVQ	$0, 0x28(GS)
   835	RET
   836settls:
   837	MOVQ	g_m(BX), AX
   838	LEAQ	m_tls(AX), AX
   839	MOVQ	AX, 0x28(GS)
   840#endif
   841	get_tls(CX)
   842	MOVQ	BX, g(CX)
   843	RET
   844
   845// void setg_gcc(G*); set g called from gcc.
   846TEXT setg_gcc<>(SB),NOSPLIT,$0
   847	get_tls(AX)
   848	MOVQ	DI, g(AX)
   849	RET
   850
   851TEXT runtime·abort(SB),NOSPLIT,$0-0
   852	INT	$3
   853loop:
   854	JMP	loop
   855
   856// check that SP is in range [g->stack.lo, g->stack.hi)
   857TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   858	get_tls(CX)
   859	MOVQ	g(CX), AX
   860	CMPQ	(g_stack+stack_hi)(AX), SP
   861	JHI	2(PC)
   862	CALL	runtime·abort(SB)
   863	CMPQ	SP, (g_stack+stack_lo)(AX)
   864	JHI	2(PC)
   865	CALL	runtime·abort(SB)
   866	RET
   867
   868// func cputicks() int64
   869TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   870	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   871	JNE	mfence
   872	LFENCE
   873	JMP	done
   874mfence:
   875	MFENCE
   876done:
   877	RDTSC
   878	SHLQ	$32, DX
   879	ADDQ	DX, AX
   880	MOVQ	AX, ret+0(FP)
   881	RET
   882
   883// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
   884// hash function using AES hardware instructions
   885TEXT runtime·memhash(SB),NOSPLIT,$0-32
   886	CMPB	runtime·useAeshash(SB), $0
   887	JEQ	noaes
   888	MOVQ	p+0(FP), AX	// ptr to data
   889	MOVQ	s+16(FP), CX	// size
   890	LEAQ	ret+24(FP), DX
   891	JMP	aeshashbody<>(SB)
   892noaes:
   893	JMP	runtime·memhashFallback(SB)
   894
   895// func strhash(p unsafe.Pointer, h uintptr) uintptr
   896TEXT runtime·strhash(SB),NOSPLIT,$0-24
   897	CMPB	runtime·useAeshash(SB), $0
   898	JEQ	noaes
   899	MOVQ	p+0(FP), AX	// ptr to string struct
   900	MOVQ	8(AX), CX	// length of string
   901	MOVQ	(AX), AX	// string data
   902	LEAQ	ret+16(FP), DX
   903	JMP	aeshashbody<>(SB)
   904noaes:
   905	JMP	runtime·strhashFallback(SB)
   906
   907// AX: data
   908// CX: length
   909// DX: address to put return value
   910TEXT aeshashbody<>(SB),NOSPLIT,$0-0
   911	// Fill an SSE register with our seeds.
   912	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   913	PINSRW	$4, CX, X0			// 16 bits of length
   914	PSHUFHW $0, X0, X0			// repeat length 4 times total
   915	MOVO	X0, X1				// save unscrambled seed
   916	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   917	AESENC	X0, X0				// scramble seed
   918
   919	CMPQ	CX, $16
   920	JB	aes0to15
   921	JE	aes16
   922	CMPQ	CX, $32
   923	JBE	aes17to32
   924	CMPQ	CX, $64
   925	JBE	aes33to64
   926	CMPQ	CX, $128
   927	JBE	aes65to128
   928	JMP	aes129plus
   929
   930aes0to15:
   931	TESTQ	CX, CX
   932	JE	aes0
   933
   934	ADDQ	$16, AX
   935	TESTW	$0xff0, AX
   936	JE	endofpage
   937
   938	// 16 bytes loaded at this address won't cross
   939	// a page boundary, so we can load it directly.
   940	MOVOU	-16(AX), X1
   941	ADDQ	CX, CX
   942	MOVQ	$masks<>(SB), AX
   943	PAND	(AX)(CX*8), X1
   944final1:
   945	PXOR	X0, X1	// xor data with seed
   946	AESENC	X1, X1	// scramble combo 3 times
   947	AESENC	X1, X1
   948	AESENC	X1, X1
   949	MOVQ	X1, (DX)
   950	RET
   951
   952endofpage:
   953	// address ends in 1111xxxx. Might be up against
   954	// a page boundary, so load ending at last byte.
   955	// Then shift bytes down using pshufb.
   956	MOVOU	-32(AX)(CX*1), X1
   957	ADDQ	CX, CX
   958	MOVQ	$shifts<>(SB), AX
   959	PSHUFB	(AX)(CX*8), X1
   960	JMP	final1
   961
   962aes0:
   963	// Return scrambled input seed
   964	AESENC	X0, X0
   965	MOVQ	X0, (DX)
   966	RET
   967
   968aes16:
   969	MOVOU	(AX), X1
   970	JMP	final1
   971
   972aes17to32:
   973	// make second starting seed
   974	PXOR	runtime·aeskeysched+16(SB), X1
   975	AESENC	X1, X1
   976
   977	// load data to be hashed
   978	MOVOU	(AX), X2
   979	MOVOU	-16(AX)(CX*1), X3
   980
   981	// xor with seed
   982	PXOR	X0, X2
   983	PXOR	X1, X3
   984
   985	// scramble 3 times
   986	AESENC	X2, X2
   987	AESENC	X3, X3
   988	AESENC	X2, X2
   989	AESENC	X3, X3
   990	AESENC	X2, X2
   991	AESENC	X3, X3
   992
   993	// combine results
   994	PXOR	X3, X2
   995	MOVQ	X2, (DX)
   996	RET
   997
   998aes33to64:
   999	// make 3 more starting seeds
  1000	MOVO	X1, X2
  1001	MOVO	X1, X3
  1002	PXOR	runtime·aeskeysched+16(SB), X1
  1003	PXOR	runtime·aeskeysched+32(SB), X2
  1004	PXOR	runtime·aeskeysched+48(SB), X3
  1005	AESENC	X1, X1
  1006	AESENC	X2, X2
  1007	AESENC	X3, X3
  1008
  1009	MOVOU	(AX), X4
  1010	MOVOU	16(AX), X5
  1011	MOVOU	-32(AX)(CX*1), X6
  1012	MOVOU	-16(AX)(CX*1), X7
  1013
  1014	PXOR	X0, X4
  1015	PXOR	X1, X5
  1016	PXOR	X2, X6
  1017	PXOR	X3, X7
  1018
  1019	AESENC	X4, X4
  1020	AESENC	X5, X5
  1021	AESENC	X6, X6
  1022	AESENC	X7, X7
  1023
  1024	AESENC	X4, X4
  1025	AESENC	X5, X5
  1026	AESENC	X6, X6
  1027	AESENC	X7, X7
  1028
  1029	AESENC	X4, X4
  1030	AESENC	X5, X5
  1031	AESENC	X6, X6
  1032	AESENC	X7, X7
  1033
  1034	PXOR	X6, X4
  1035	PXOR	X7, X5
  1036	PXOR	X5, X4
  1037	MOVQ	X4, (DX)
  1038	RET
  1039
  1040aes65to128:
  1041	// make 7 more starting seeds
  1042	MOVO	X1, X2
  1043	MOVO	X1, X3
  1044	MOVO	X1, X4
  1045	MOVO	X1, X5
  1046	MOVO	X1, X6
  1047	MOVO	X1, X7
  1048	PXOR	runtime·aeskeysched+16(SB), X1
  1049	PXOR	runtime·aeskeysched+32(SB), X2
  1050	PXOR	runtime·aeskeysched+48(SB), X3
  1051	PXOR	runtime·aeskeysched+64(SB), X4
  1052	PXOR	runtime·aeskeysched+80(SB), X5
  1053	PXOR	runtime·aeskeysched+96(SB), X6
  1054	PXOR	runtime·aeskeysched+112(SB), X7
  1055	AESENC	X1, X1
  1056	AESENC	X2, X2
  1057	AESENC	X3, X3
  1058	AESENC	X4, X4
  1059	AESENC	X5, X5
  1060	AESENC	X6, X6
  1061	AESENC	X7, X7
  1062
  1063	// load data
  1064	MOVOU	(AX), X8
  1065	MOVOU	16(AX), X9
  1066	MOVOU	32(AX), X10
  1067	MOVOU	48(AX), X11
  1068	MOVOU	-64(AX)(CX*1), X12
  1069	MOVOU	-48(AX)(CX*1), X13
  1070	MOVOU	-32(AX)(CX*1), X14
  1071	MOVOU	-16(AX)(CX*1), X15
  1072
  1073	// xor with seed
  1074	PXOR	X0, X8
  1075	PXOR	X1, X9
  1076	PXOR	X2, X10
  1077	PXOR	X3, X11
  1078	PXOR	X4, X12
  1079	PXOR	X5, X13
  1080	PXOR	X6, X14
  1081	PXOR	X7, X15
  1082
  1083	// scramble 3 times
  1084	AESENC	X8, X8
  1085	AESENC	X9, X9
  1086	AESENC	X10, X10
  1087	AESENC	X11, X11
  1088	AESENC	X12, X12
  1089	AESENC	X13, X13
  1090	AESENC	X14, X14
  1091	AESENC	X15, X15
  1092
  1093	AESENC	X8, X8
  1094	AESENC	X9, X9
  1095	AESENC	X10, X10
  1096	AESENC	X11, X11
  1097	AESENC	X12, X12
  1098	AESENC	X13, X13
  1099	AESENC	X14, X14
  1100	AESENC	X15, X15
  1101
  1102	AESENC	X8, X8
  1103	AESENC	X9, X9
  1104	AESENC	X10, X10
  1105	AESENC	X11, X11
  1106	AESENC	X12, X12
  1107	AESENC	X13, X13
  1108	AESENC	X14, X14
  1109	AESENC	X15, X15
  1110
  1111	// combine results
  1112	PXOR	X12, X8
  1113	PXOR	X13, X9
  1114	PXOR	X14, X10
  1115	PXOR	X15, X11
  1116	PXOR	X10, X8
  1117	PXOR	X11, X9
  1118	PXOR	X9, X8
  1119	MOVQ	X8, (DX)
  1120	RET
  1121
  1122aes129plus:
  1123	// make 7 more starting seeds
  1124	MOVO	X1, X2
  1125	MOVO	X1, X3
  1126	MOVO	X1, X4
  1127	MOVO	X1, X5
  1128	MOVO	X1, X6
  1129	MOVO	X1, X7
  1130	PXOR	runtime·aeskeysched+16(SB), X1
  1131	PXOR	runtime·aeskeysched+32(SB), X2
  1132	PXOR	runtime·aeskeysched+48(SB), X3
  1133	PXOR	runtime·aeskeysched+64(SB), X4
  1134	PXOR	runtime·aeskeysched+80(SB), X5
  1135	PXOR	runtime·aeskeysched+96(SB), X6
  1136	PXOR	runtime·aeskeysched+112(SB), X7
  1137	AESENC	X1, X1
  1138	AESENC	X2, X2
  1139	AESENC	X3, X3
  1140	AESENC	X4, X4
  1141	AESENC	X5, X5
  1142	AESENC	X6, X6
  1143	AESENC	X7, X7
  1144
  1145	// start with last (possibly overlapping) block
  1146	MOVOU	-128(AX)(CX*1), X8
  1147	MOVOU	-112(AX)(CX*1), X9
  1148	MOVOU	-96(AX)(CX*1), X10
  1149	MOVOU	-80(AX)(CX*1), X11
  1150	MOVOU	-64(AX)(CX*1), X12
  1151	MOVOU	-48(AX)(CX*1), X13
  1152	MOVOU	-32(AX)(CX*1), X14
  1153	MOVOU	-16(AX)(CX*1), X15
  1154
  1155	// xor in seed
  1156	PXOR	X0, X8
  1157	PXOR	X1, X9
  1158	PXOR	X2, X10
  1159	PXOR	X3, X11
  1160	PXOR	X4, X12
  1161	PXOR	X5, X13
  1162	PXOR	X6, X14
  1163	PXOR	X7, X15
  1164
  1165	// compute number of remaining 128-byte blocks
  1166	DECQ	CX
  1167	SHRQ	$7, CX
  1168
  1169aesloop:
  1170	// scramble state
  1171	AESENC	X8, X8
  1172	AESENC	X9, X9
  1173	AESENC	X10, X10
  1174	AESENC	X11, X11
  1175	AESENC	X12, X12
  1176	AESENC	X13, X13
  1177	AESENC	X14, X14
  1178	AESENC	X15, X15
  1179
  1180	// scramble state, xor in a block
  1181	MOVOU	(AX), X0
  1182	MOVOU	16(AX), X1
  1183	MOVOU	32(AX), X2
  1184	MOVOU	48(AX), X3
  1185	AESENC	X0, X8
  1186	AESENC	X1, X9
  1187	AESENC	X2, X10
  1188	AESENC	X3, X11
  1189	MOVOU	64(AX), X4
  1190	MOVOU	80(AX), X5
  1191	MOVOU	96(AX), X6
  1192	MOVOU	112(AX), X7
  1193	AESENC	X4, X12
  1194	AESENC	X5, X13
  1195	AESENC	X6, X14
  1196	AESENC	X7, X15
  1197
  1198	ADDQ	$128, AX
  1199	DECQ	CX
  1200	JNE	aesloop
  1201
  1202	// 3 more scrambles to finish
  1203	AESENC	X8, X8
  1204	AESENC	X9, X9
  1205	AESENC	X10, X10
  1206	AESENC	X11, X11
  1207	AESENC	X12, X12
  1208	AESENC	X13, X13
  1209	AESENC	X14, X14
  1210	AESENC	X15, X15
  1211	AESENC	X8, X8
  1212	AESENC	X9, X9
  1213	AESENC	X10, X10
  1214	AESENC	X11, X11
  1215	AESENC	X12, X12
  1216	AESENC	X13, X13
  1217	AESENC	X14, X14
  1218	AESENC	X15, X15
  1219	AESENC	X8, X8
  1220	AESENC	X9, X9
  1221	AESENC	X10, X10
  1222	AESENC	X11, X11
  1223	AESENC	X12, X12
  1224	AESENC	X13, X13
  1225	AESENC	X14, X14
  1226	AESENC	X15, X15
  1227
  1228	PXOR	X12, X8
  1229	PXOR	X13, X9
  1230	PXOR	X14, X10
  1231	PXOR	X15, X11
  1232	PXOR	X10, X8
  1233	PXOR	X11, X9
  1234	PXOR	X9, X8
  1235	MOVQ	X8, (DX)
  1236	RET
  1237
  1238// func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1239TEXT runtime·memhash32(SB),NOSPLIT,$0-24
  1240	CMPB	runtime·useAeshash(SB), $0
  1241	JEQ	noaes
  1242	MOVQ	p+0(FP), AX	// ptr to data
  1243	MOVQ	h+8(FP), X0	// seed
  1244	PINSRD	$2, (AX), X0	// data
  1245	AESENC	runtime·aeskeysched+0(SB), X0
  1246	AESENC	runtime·aeskeysched+16(SB), X0
  1247	AESENC	runtime·aeskeysched+32(SB), X0
  1248	MOVQ	X0, ret+16(FP)
  1249	RET
  1250noaes:
  1251	JMP	runtime·memhash32Fallback(SB)
  1252
  1253// func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1254TEXT runtime·memhash64(SB),NOSPLIT,$0-24
  1255	CMPB	runtime·useAeshash(SB), $0
  1256	JEQ	noaes
  1257	MOVQ	p+0(FP), AX	// ptr to data
  1258	MOVQ	h+8(FP), X0	// seed
  1259	PINSRQ	$1, (AX), X0	// data
  1260	AESENC	runtime·aeskeysched+0(SB), X0
  1261	AESENC	runtime·aeskeysched+16(SB), X0
  1262	AESENC	runtime·aeskeysched+32(SB), X0
  1263	MOVQ	X0, ret+16(FP)
  1264	RET
  1265noaes:
  1266	JMP	runtime·memhash64Fallback(SB)
  1267
  1268// simple mask to get rid of data in the high part of the register.
  1269DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1270DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1271DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1272DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1273DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1274DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1275DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1276DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1277DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1278DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1279DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1280DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1281DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1282DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1283DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1284DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1285DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1286DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1287DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1288DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1289DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1290DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1291DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1292DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1293DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1294DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1295DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1296DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1297DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1298DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1299DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1300DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1301GLOBL masks<>(SB),RODATA,$256
  1302
  1303// func checkASM() bool
  1304TEXT ·checkASM(SB),NOSPLIT,$0-1
  1305	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1306	MOVQ	$masks<>(SB), AX
  1307	MOVQ	$shifts<>(SB), BX
  1308	ORQ	BX, AX
  1309	TESTQ	$15, AX
  1310	SETEQ	ret+0(FP)
  1311	RET
  1312
  1313// these are arguments to pshufb. They move data down from
  1314// the high bytes of the register to the low bytes of the register.
  1315// index is how many bytes to move.
  1316DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1317DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1318DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1319DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1320DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1321DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1322DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1323DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1324DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1325DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1326DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1327DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1328DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1329DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1330DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1331DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1332DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1333DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1334DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1335DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1336DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1337DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1338DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1339DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1340DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1341DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1342DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1343DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1344DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1345DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1346DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1347DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1348GLOBL shifts<>(SB),RODATA,$256
  1349
  1350TEXT runtime·return0(SB), NOSPLIT, $0
  1351	MOVL	$0, AX
  1352	RET
  1353
  1354
  1355// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1356// Must obey the gcc calling convention.
  1357TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1358	get_tls(CX)
  1359	MOVQ	g(CX), AX
  1360	MOVQ	g_m(AX), AX
  1361	MOVQ	m_curg(AX), AX
  1362	MOVQ	(g_stack+stack_hi)(AX), AX
  1363	RET
  1364
  1365// The top-most function running on a goroutine
  1366// returns to goexit+PCQuantum. Defined as ABIInternal
  1367// so as to make it identifiable to traceback (this
  1368// function it used as a sentinel; traceback wants to
  1369// see the func PC, not a wrapper PC).
  1370TEXT runtime·goexit<ABIInternal>(SB),NOSPLIT,$0-0
  1371	BYTE	$0x90	// NOP
  1372	CALL	runtime·goexit1(SB)	// does not return
  1373	// traceback from goexit1 must hit code range of goexit
  1374	BYTE	$0x90	// NOP
  1375
  1376// This is called from .init_array and follows the platform, not Go, ABI.
  1377TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1378	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1379	MOVQ	runtime·lastmoduledatap(SB), AX
  1380	MOVQ	DI, moduledata_next(AX)
  1381	MOVQ	DI, runtime·lastmoduledatap(SB)
  1382	POPQ	R15
  1383	RET
  1384
  1385// gcWriteBarrier performs a heap pointer write and informs the GC.
  1386//
  1387// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1388// - DI is the destination of the write
  1389// - AX is the value being written at DI
  1390// It clobbers FLAGS. It does not clobber any general-purpose registers,
  1391// but may clobber others (e.g., SSE registers).
  1392// Defined as ABIInternal since it does not use the stack-based Go ABI.
  1393TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$120
  1394	// Save the registers clobbered by the fast path. This is slightly
  1395	// faster than having the caller spill these.
  1396	MOVQ	R14, 104(SP)
  1397	MOVQ	R13, 112(SP)
  1398	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1399	// across a sequence of write barriers.
  1400	get_tls(R13)
  1401	MOVQ	g(R13), R13
  1402	MOVQ	g_m(R13), R13
  1403	MOVQ	m_p(R13), R13
  1404	MOVQ	(p_wbBuf+wbBuf_next)(R13), R14
  1405	// Increment wbBuf.next position.
  1406	LEAQ	16(R14), R14
  1407	MOVQ	R14, (p_wbBuf+wbBuf_next)(R13)
  1408	CMPQ	R14, (p_wbBuf+wbBuf_end)(R13)
  1409	// Record the write.
  1410	MOVQ	AX, -16(R14)	// Record value
  1411	// Note: This turns bad pointer writes into bad
  1412	// pointer reads, which could be confusing. We could avoid
  1413	// reading from obviously bad pointers, which would
  1414	// take care of the vast majority of these. We could
  1415	// patch this up in the signal handler, or use XCHG to
  1416	// combine the read and the write.
  1417	MOVQ	(DI), R13
  1418	MOVQ	R13, -8(R14)	// Record *slot
  1419	// Is the buffer full? (flags set in CMPQ above)
  1420	JEQ	flush
  1421ret:
  1422	MOVQ	104(SP), R14
  1423	MOVQ	112(SP), R13
  1424	// Do the write.
  1425	MOVQ	AX, (DI)
  1426	RET
  1427
  1428flush:
  1429	// Save all general purpose registers since these could be
  1430	// clobbered by wbBufFlush and were not saved by the caller.
  1431	// It is possible for wbBufFlush to clobber other registers
  1432	// (e.g., SSE registers), but the compiler takes care of saving
  1433	// those in the caller if necessary. This strikes a balance
  1434	// with registers that are likely to be used.
  1435	//
  1436	// We don't have type information for these, but all code under
  1437	// here is NOSPLIT, so nothing will observe these.
  1438	//
  1439	// TODO: We could strike a different balance; e.g., saving X0
  1440	// and not saving GP registers that are less likely to be used.
  1441	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1442	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1443	MOVQ	BX, 16(SP)
  1444	MOVQ	CX, 24(SP)
  1445	MOVQ	DX, 32(SP)
  1446	// DI already saved
  1447	MOVQ	SI, 40(SP)
  1448	MOVQ	BP, 48(SP)
  1449	MOVQ	R8, 56(SP)
  1450	MOVQ	R9, 64(SP)
  1451	MOVQ	R10, 72(SP)
  1452	MOVQ	R11, 80(SP)
  1453	MOVQ	R12, 88(SP)
  1454	// R13 already saved
  1455	// R14 already saved
  1456	MOVQ	R15, 96(SP)
  1457
  1458	// This takes arguments DI and AX
  1459	CALL	runtime·wbBufFlush(SB)
  1460
  1461	MOVQ	0(SP), DI
  1462	MOVQ	8(SP), AX
  1463	MOVQ	16(SP), BX
  1464	MOVQ	24(SP), CX
  1465	MOVQ	32(SP), DX
  1466	MOVQ	40(SP), SI
  1467	MOVQ	48(SP), BP
  1468	MOVQ	56(SP), R8
  1469	MOVQ	64(SP), R9
  1470	MOVQ	72(SP), R10
  1471	MOVQ	80(SP), R11
  1472	MOVQ	88(SP), R12
  1473	MOVQ	96(SP), R15
  1474	JMP	ret
  1475
  1476// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
  1477// Defined as ABIInternal since it does not use the stable Go ABI.
  1478TEXT runtime·gcWriteBarrierCX<ABIInternal>(SB),NOSPLIT,$0
  1479	XCHGQ CX, AX
  1480	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1481	XCHGQ CX, AX
  1482	RET
  1483
  1484// gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX.
  1485// Defined as ABIInternal since it does not use the stable Go ABI.
  1486TEXT runtime·gcWriteBarrierDX<ABIInternal>(SB),NOSPLIT,$0
  1487	XCHGQ DX, AX
  1488	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1489	XCHGQ DX, AX
  1490	RET
  1491
  1492// gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX.
  1493// Defined as ABIInternal since it does not use the stable Go ABI.
  1494TEXT runtime·gcWriteBarrierBX<ABIInternal>(SB),NOSPLIT,$0
  1495	XCHGQ BX, AX
  1496	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1497	XCHGQ BX, AX
  1498	RET
  1499
  1500// gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP.
  1501// Defined as ABIInternal since it does not use the stable Go ABI.
  1502TEXT runtime·gcWriteBarrierBP<ABIInternal>(SB),NOSPLIT,$0
  1503	XCHGQ BP, AX
  1504	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1505	XCHGQ BP, AX
  1506	RET
  1507
  1508// gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI.
  1509// Defined as ABIInternal since it does not use the stable Go ABI.
  1510TEXT runtime·gcWriteBarrierSI<ABIInternal>(SB),NOSPLIT,$0
  1511	XCHGQ SI, AX
  1512	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1513	XCHGQ SI, AX
  1514	RET
  1515
  1516// gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8.
  1517// Defined as ABIInternal since it does not use the stable Go ABI.
  1518TEXT runtime·gcWriteBarrierR8<ABIInternal>(SB),NOSPLIT,$0
  1519	XCHGQ R8, AX
  1520	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1521	XCHGQ R8, AX
  1522	RET
  1523
  1524// gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9.
  1525// Defined as ABIInternal since it does not use the stable Go ABI.
  1526TEXT runtime·gcWriteBarrierR9<ABIInternal>(SB),NOSPLIT,$0
  1527	XCHGQ R9, AX
  1528	CALL runtime·gcWriteBarrier<ABIInternal>(SB)
  1529	XCHGQ R9, AX
  1530	RET
  1531
  1532DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1533GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1534
  1535// debugCallV1 is the entry point for debugger-injected function
  1536// calls on running goroutines. It informs the runtime that a
  1537// debug call has been injected and creates a call frame for the
  1538// debugger to fill in.
  1539//
  1540// To inject a function call, a debugger should:
  1541// 1. Check that the goroutine is in state _Grunning and that
  1542//    there are at least 256 bytes free on the stack.
  1543// 2. Push the current PC on the stack (updating SP).
  1544// 3. Write the desired argument frame size at SP-16 (using the SP
  1545//    after step 2).
  1546// 4. Save all machine registers (including flags and XMM reigsters)
  1547//    so they can be restored later by the debugger.
  1548// 5. Set the PC to debugCallV1 and resume execution.
  1549//
  1550// If the goroutine is in state _Grunnable, then it's not generally
  1551// safe to inject a call because it may return out via other runtime
  1552// operations. Instead, the debugger should unwind the stack to find
  1553// the return to non-runtime code, add a temporary breakpoint there,
  1554// and inject the call once that breakpoint is hit.
  1555//
  1556// If the goroutine is in any other state, it's not safe to inject a call.
  1557//
  1558// This function communicates back to the debugger by setting RAX and
  1559// invoking INT3 to raise a breakpoint signal. See the comments in the
  1560// implementation for the protocol the debugger is expected to
  1561// follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1562//
  1563// The debugger must ensure that any pointers passed to the function
  1564// obey escape analysis requirements. Specifically, it must not pass
  1565// a stack pointer to an escaping argument. debugCallV1 cannot check
  1566// this invariant.
  1567//
  1568// This is ABIInternal because Go code injects its PC directly into new
  1569// goroutine stacks.
  1570TEXT runtime·debugCallV1<ABIInternal>(SB),NOSPLIT,$152-0
  1571	// Save all registers that may contain pointers so they can be
  1572	// conservatively scanned.
  1573	//
  1574	// We can't do anything that might clobber any of these
  1575	// registers before this.
  1576	MOVQ	R15, r15-(14*8+8)(SP)
  1577	MOVQ	R14, r14-(13*8+8)(SP)
  1578	MOVQ	R13, r13-(12*8+8)(SP)
  1579	MOVQ	R12, r12-(11*8+8)(SP)
  1580	MOVQ	R11, r11-(10*8+8)(SP)
  1581	MOVQ	R10, r10-(9*8+8)(SP)
  1582	MOVQ	R9, r9-(8*8+8)(SP)
  1583	MOVQ	R8, r8-(7*8+8)(SP)
  1584	MOVQ	DI, di-(6*8+8)(SP)
  1585	MOVQ	SI, si-(5*8+8)(SP)
  1586	MOVQ	BP, bp-(4*8+8)(SP)
  1587	MOVQ	BX, bx-(3*8+8)(SP)
  1588	MOVQ	DX, dx-(2*8+8)(SP)
  1589	// Save the frame size before we clobber it. Either of the last
  1590	// saves could clobber this depending on whether there's a saved BP.
  1591	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1592	MOVQ	CX, cx-(1*8+8)(SP)
  1593	MOVQ	AX, ax-(0*8+8)(SP)
  1594
  1595	// Save the argument frame size.
  1596	MOVQ	DX, frameSize-128(SP)
  1597
  1598	// Perform a safe-point check.
  1599	MOVQ	retpc-8(FP), AX	// Caller's PC
  1600	MOVQ	AX, 0(SP)
  1601	CALL	runtime·debugCallCheck(SB)
  1602	MOVQ	8(SP), AX
  1603	TESTQ	AX, AX
  1604	JZ	good
  1605	// The safety check failed. Put the reason string at the top
  1606	// of the stack.
  1607	MOVQ	AX, 0(SP)
  1608	MOVQ	16(SP), AX
  1609	MOVQ	AX, 8(SP)
  1610	// Set AX to 8 and invoke INT3. The debugger should get the
  1611	// reason a call can't be injected from the top of the stack
  1612	// and resume execution.
  1613	MOVQ	$8, AX
  1614	BYTE	$0xcc
  1615	JMP	restore
  1616
  1617good:
  1618	// Registers are saved and it's safe to make a call.
  1619	// Open up a call frame, moving the stack if necessary.
  1620	//
  1621	// Once the frame is allocated, this will set AX to 0 and
  1622	// invoke INT3. The debugger should write the argument
  1623	// frame for the call at SP, push the trapping PC on the
  1624	// stack, set the PC to the function to call, set RCX to point
  1625	// to the closure (if a closure call), and resume execution.
  1626	//
  1627	// If the function returns, this will set AX to 1 and invoke
  1628	// INT3. The debugger can then inspect any return value saved
  1629	// on the stack at SP and resume execution again.
  1630	//
  1631	// If the function panics, this will set AX to 2 and invoke INT3.
  1632	// The interface{} value of the panic will be at SP. The debugger
  1633	// can inspect the panic value and resume execution again.
  1634#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1635	CMPQ	AX, $MAXSIZE;			\
  1636	JA	5(PC);				\
  1637	MOVQ	$NAME(SB), AX;			\
  1638	MOVQ	AX, 0(SP);			\
  1639	CALL	runtime·debugCallWrap(SB);	\
  1640	JMP	restore
  1641
  1642	MOVQ	frameSize-128(SP), AX
  1643	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1644	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1645	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1646	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1647	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1648	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1649	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1650	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1651	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1652	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1653	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1654	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1655	// The frame size is too large. Report the error.
  1656	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1657	MOVQ	AX, 0(SP)
  1658	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1659	MOVQ	$8, AX
  1660	BYTE	$0xcc
  1661	JMP	restore
  1662
  1663restore:
  1664	// Calls and failures resume here.
  1665	//
  1666	// Set AX to 16 and invoke INT3. The debugger should restore
  1667	// all registers except RIP and RSP and resume execution.
  1668	MOVQ	$16, AX
  1669	BYTE	$0xcc
  1670	// We must not modify flags after this point.
  1671
  1672	// Restore pointer-containing registers, which may have been
  1673	// modified from the debugger's copy by stack copying.
  1674	MOVQ	ax-(0*8+8)(SP), AX
  1675	MOVQ	cx-(1*8+8)(SP), CX
  1676	MOVQ	dx-(2*8+8)(SP), DX
  1677	MOVQ	bx-(3*8+8)(SP), BX
  1678	MOVQ	bp-(4*8+8)(SP), BP
  1679	MOVQ	si-(5*8+8)(SP), SI
  1680	MOVQ	di-(6*8+8)(SP), DI
  1681	MOVQ	r8-(7*8+8)(SP), R8
  1682	MOVQ	r9-(8*8+8)(SP), R9
  1683	MOVQ	r10-(9*8+8)(SP), R10
  1684	MOVQ	r11-(10*8+8)(SP), R11
  1685	MOVQ	r12-(11*8+8)(SP), R12
  1686	MOVQ	r13-(12*8+8)(SP), R13
  1687	MOVQ	r14-(13*8+8)(SP), R14
  1688	MOVQ	r15-(14*8+8)(SP), R15
  1689
  1690	RET
  1691
  1692// runtime.debugCallCheck assumes that functions defined with the
  1693// DEBUG_CALL_FN macro are safe points to inject calls.
  1694#define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1695TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1696	NO_LOCAL_POINTERS;			\
  1697	MOVQ	$0, AX;				\
  1698	BYTE	$0xcc;				\
  1699	MOVQ	$1, AX;				\
  1700	BYTE	$0xcc;				\
  1701	RET
  1702DEBUG_CALL_FN(debugCall32<>, 32)
  1703DEBUG_CALL_FN(debugCall64<>, 64)
  1704DEBUG_CALL_FN(debugCall128<>, 128)
  1705DEBUG_CALL_FN(debugCall256<>, 256)
  1706DEBUG_CALL_FN(debugCall512<>, 512)
  1707DEBUG_CALL_FN(debugCall1024<>, 1024)
  1708DEBUG_CALL_FN(debugCall2048<>, 2048)
  1709DEBUG_CALL_FN(debugCall4096<>, 4096)
  1710DEBUG_CALL_FN(debugCall8192<>, 8192)
  1711DEBUG_CALL_FN(debugCall16384<>, 16384)
  1712DEBUG_CALL_FN(debugCall32768<>, 32768)
  1713DEBUG_CALL_FN(debugCall65536<>, 65536)
  1714
  1715// func debugCallPanicked(val interface{})
  1716TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1717	// Copy the panic value to the top of stack.
  1718	MOVQ	val_type+0(FP), AX
  1719	MOVQ	AX, 0(SP)
  1720	MOVQ	val_data+8(FP), AX
  1721	MOVQ	AX, 8(SP)
  1722	MOVQ	$2, AX
  1723	BYTE	$0xcc
  1724	RET
  1725
  1726// Note: these functions use a special calling convention to save generated code space.
  1727// Arguments are passed in registers, but the space for those arguments are allocated
  1728// in the caller's stack frame. These stubs write the args into that stack space and
  1729// then tail call to the corresponding runtime handler.
  1730// The tail call makes these stubs disappear in backtraces.
  1731// Defined as ABIInternal since they do not use the stack-based Go ABI.
  1732TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
  1733	MOVQ	AX, x+0(FP)
  1734	MOVQ	CX, y+8(FP)
  1735	JMP	runtime·goPanicIndex(SB)
  1736TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
  1737	MOVQ	AX, x+0(FP)
  1738	MOVQ	CX, y+8(FP)
  1739	JMP	runtime·goPanicIndexU(SB)
  1740TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
  1741	MOVQ	CX, x+0(FP)
  1742	MOVQ	DX, y+8(FP)
  1743	JMP	runtime·goPanicSliceAlen(SB)
  1744TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
  1745	MOVQ	CX, x+0(FP)
  1746	MOVQ	DX, y+8(FP)
  1747	JMP	runtime·goPanicSliceAlenU(SB)
  1748TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
  1749	MOVQ	CX, x+0(FP)
  1750	MOVQ	DX, y+8(FP)
  1751	JMP	runtime·goPanicSliceAcap(SB)
  1752TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
  1753	MOVQ	CX, x+0(FP)
  1754	MOVQ	DX, y+8(FP)
  1755	JMP	runtime·goPanicSliceAcapU(SB)
  1756TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
  1757	MOVQ	AX, x+0(FP)
  1758	MOVQ	CX, y+8(FP)
  1759	JMP	runtime·goPanicSliceB(SB)
  1760TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
  1761	MOVQ	AX, x+0(FP)
  1762	MOVQ	CX, y+8(FP)
  1763	JMP	runtime·goPanicSliceBU(SB)
  1764TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
  1765	MOVQ	DX, x+0(FP)
  1766	MOVQ	BX, y+8(FP)
  1767	JMP	runtime·goPanicSlice3Alen(SB)
  1768TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
  1769	MOVQ	DX, x+0(FP)
  1770	MOVQ	BX, y+8(FP)
  1771	JMP	runtime·goPanicSlice3AlenU(SB)
  1772TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
  1773	MOVQ	DX, x+0(FP)
  1774	MOVQ	BX, y+8(FP)
  1775	JMP	runtime·goPanicSlice3Acap(SB)
  1776TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
  1777	MOVQ	DX, x+0(FP)
  1778	MOVQ	BX, y+8(FP)
  1779	JMP	runtime·goPanicSlice3AcapU(SB)
  1780TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
  1781	MOVQ	CX, x+0(FP)
  1782	MOVQ	DX, y+8(FP)
  1783	JMP	runtime·goPanicSlice3B(SB)
  1784TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
  1785	MOVQ	CX, x+0(FP)
  1786	MOVQ	DX, y+8(FP)
  1787	JMP	runtime·goPanicSlice3BU(SB)
  1788TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
  1789	MOVQ	AX, x+0(FP)
  1790	MOVQ	CX, y+8(FP)
  1791	JMP	runtime·goPanicSlice3C(SB)
  1792TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
  1793	MOVQ	AX, x+0(FP)
  1794	MOVQ	CX, y+8(FP)
  1795	JMP	runtime·goPanicSlice3CU(SB)
  1796
  1797#ifdef GOOS_android
  1798// Use the free TLS_SLOT_APP slot #2 on Android Q.
  1799// Earlier androids are set up in gcc_android.c.
  1800DATA runtime·tls_g+0(SB)/8, $16
  1801GLOBL runtime·tls_g+0(SB), NOPTR, $8
  1802#endif
  1803
  1804// The compiler and assembler's -spectre=ret mode rewrites
  1805// all indirect CALL AX / JMP AX instructions to be
  1806// CALL retpolineAX / JMP retpolineAX.
  1807// See https://support.google.com/faqs/answer/7625886.
  1808#define RETPOLINE(reg) \
  1809	/*   CALL setup */     BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0;	\
  1810	/* nospec: */									\
  1811	/*   PAUSE */           BYTE $0xF3; BYTE $0x90;					\
  1812	/*   JMP nospec */      BYTE $0xEB; BYTE $-(2+2);				\
  1813	/* setup: */									\
  1814	/*   MOVQ AX, 0(SP) */  BYTE $0x48|((reg&8)>>1); BYTE $0x89;			\
  1815	                        BYTE $0x04|((reg&7)<<3); BYTE $0x24;			\
  1816	/*   RET */             BYTE $0xC3
  1817
  1818TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0)
  1819TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1)
  1820TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2)
  1821TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3)
  1822/* SP is 4, can't happen / magic encodings */
  1823TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5)
  1824TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6)
  1825TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7)
  1826TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8)
  1827TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9)
  1828TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10)
  1829TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11)
  1830TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12)
  1831TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13)
  1832TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14)
  1833TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)

View as plain text