Black Lives Matter. Support the Equal Justice Initiative.

Text file src/runtime/asm_amd64.s

Documentation: runtime

     1// Copyright 2009 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "go_tls.h"
     7#include "funcdata.h"
     8#include "textflag.h"
     9
    10// _rt0_amd64 is common startup code for most amd64 systems when using
    11// internal linking. This is the entry point for the program from the
    12// kernel for an ordinary -buildmode=exe program. The stack holds the
    13// number of arguments and the C-style argv.
    14TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15	MOVQ	0(SP), DI	// argc
    16	LEAQ	8(SP), SI	// argv
    17	JMP	runtime·rt0_go(SB)
    18
    19// main is common startup code for most amd64 systems when using
    20// external linking. The C startup code will call the symbol "main"
    21// passing argc and argv in the usual C ABI registers DI and SI.
    22TEXT main(SB),NOSPLIT,$-8
    23	JMP	runtime·rt0_go(SB)
    24
    25// _rt0_amd64_lib is common startup code for most amd64 systems when
    26// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27// arrange to invoke this function as a global constructor (for
    28// c-archive) or when the shared library is loaded (for c-shared).
    29// We expect argc and argv to be passed in the usual C ABI registers
    30// DI and SI.
    31TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32	// Align stack per ELF ABI requirements.
    33	MOVQ	SP, AX
    34	ANDQ	$~15, SP
    35	// Save C ABI callee-saved registers, as caller may need them.
    36	MOVQ	BX, 0x10(SP)
    37	MOVQ	BP, 0x18(SP)
    38	MOVQ	R12, 0x20(SP)
    39	MOVQ	R13, 0x28(SP)
    40	MOVQ	R14, 0x30(SP)
    41	MOVQ	R15, 0x38(SP)
    42	MOVQ	AX, 0x40(SP)
    43
    44	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46
    47	// Synchronous initialization.
    48	CALL	runtime·libpreinit(SB)
    49
    50	// Create a new thread to finish Go runtime initialization.
    51	MOVQ	_cgo_sys_thread_create(SB), AX
    52	TESTQ	AX, AX
    53	JZ	nocgo
    54	MOVQ	$_rt0_amd64_lib_go(SB), DI
    55	MOVQ	$0, SI
    56	CALL	AX
    57	JMP	restore
    58
    59nocgo:
    60	MOVQ	$0x800000, 0(SP)		// stacksize
    61	MOVQ	$_rt0_amd64_lib_go(SB), AX
    62	MOVQ	AX, 8(SP)			// fn
    63	CALL	runtime·newosproc0(SB)
    64
    65restore:
    66	MOVQ	0x10(SP), BX
    67	MOVQ	0x18(SP), BP
    68	MOVQ	0x20(SP), R12
    69	MOVQ	0x28(SP), R13
    70	MOVQ	0x30(SP), R14
    71	MOVQ	0x38(SP), R15
    72	MOVQ	0x40(SP), SP
    73	RET
    74
    75// _rt0_amd64_lib_go initializes the Go runtime.
    76// This is started in a separate thread by _rt0_amd64_lib.
    77TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80	JMP	runtime·rt0_go(SB)
    81
    82DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86
    87TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88	// copy arguments forward on an even stack
    89	MOVQ	DI, AX		// argc
    90	MOVQ	SI, BX		// argv
    91	SUBQ	$(4*8+7), SP		// 2args 2auto
    92	ANDQ	$~15, SP
    93	MOVQ	AX, 16(SP)
    94	MOVQ	BX, 24(SP)
    95
    96	// create istack out of the given (operating system) stack.
    97	// _cgo_init may update stackguard.
    98	MOVQ	$runtime·g0(SB), DI
    99	LEAQ	(-64*1024+104)(SP), BX
   100	MOVQ	BX, g_stackguard0(DI)
   101	MOVQ	BX, g_stackguard1(DI)
   102	MOVQ	BX, (g_stack+stack_lo)(DI)
   103	MOVQ	SP, (g_stack+stack_hi)(DI)
   104
   105	// find out information about the processor we're on
   106	MOVL	$0, AX
   107	CPUID
   108	MOVL	AX, SI
   109	CMPL	AX, $0
   110	JE	nocpuinfo
   111
   112	// Figure out how to serialize RDTSC.
   113	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   114	// Don't know about the rest, so let's do MFENCE.
   115	CMPL	BX, $0x756E6547  // "Genu"
   116	JNE	notintel
   117	CMPL	DX, $0x49656E69  // "ineI"
   118	JNE	notintel
   119	CMPL	CX, $0x6C65746E  // "ntel"
   120	JNE	notintel
   121	MOVB	$1, runtime·isIntel(SB)
   122	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   123notintel:
   124
   125	// Load EAX=1 cpuid flags
   126	MOVL	$1, AX
   127	CPUID
   128	MOVL	AX, runtime·processorVersionInfo(SB)
   129
   130nocpuinfo:
   131	// if there is an _cgo_init, call it.
   132	MOVQ	_cgo_init(SB), AX
   133	TESTQ	AX, AX
   134	JZ	needtls
   135	// arg 1: g0, already in DI
   136	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   137#ifdef GOOS_android
   138	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   139	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   140	// Compensate for tls_g (+16).
   141	MOVQ	-16(TLS), CX
   142#else
   143	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   144	MOVQ	$0, CX
   145#endif
   146#ifdef GOOS_windows
   147	// Adjust for the Win64 calling convention.
   148	MOVQ	CX, R9 // arg 4
   149	MOVQ	DX, R8 // arg 3
   150	MOVQ	SI, DX // arg 2
   151	MOVQ	DI, CX // arg 1
   152#endif
   153	CALL	AX
   154
   155	// update stackguard after _cgo_init
   156	MOVQ	$runtime·g0(SB), CX
   157	MOVQ	(g_stack+stack_lo)(CX), AX
   158	ADDQ	$const__StackGuard, AX
   159	MOVQ	AX, g_stackguard0(CX)
   160	MOVQ	AX, g_stackguard1(CX)
   161
   162#ifndef GOOS_windows
   163	JMP ok
   164#endif
   165needtls:
   166#ifdef GOOS_plan9
   167	// skip TLS setup on Plan 9
   168	JMP ok
   169#endif
   170#ifdef GOOS_solaris
   171	// skip TLS setup on Solaris
   172	JMP ok
   173#endif
   174#ifdef GOOS_illumos
   175	// skip TLS setup on illumos
   176	JMP ok
   177#endif
   178#ifdef GOOS_darwin
   179	// skip TLS setup on Darwin
   180	JMP ok
   181#endif
   182
   183	LEAQ	runtime·m0+m_tls(SB), DI
   184	CALL	runtime·settls(SB)
   185
   186	// store through it, to make sure it works
   187	get_tls(BX)
   188	MOVQ	$0x123, g(BX)
   189	MOVQ	runtime·m0+m_tls(SB), AX
   190	CMPQ	AX, $0x123
   191	JEQ 2(PC)
   192	CALL	runtime·abort(SB)
   193ok:
   194	// set the per-goroutine and per-mach "registers"
   195	get_tls(BX)
   196	LEAQ	runtime·g0(SB), CX
   197	MOVQ	CX, g(BX)
   198	LEAQ	runtime·m0(SB), AX
   199
   200	// save m->g0 = g0
   201	MOVQ	CX, m_g0(AX)
   202	// save m0 to g0->m
   203	MOVQ	AX, g_m(CX)
   204
   205	CLD				// convention is D is always left cleared
   206	CALL	runtime·check(SB)
   207
   208	MOVL	16(SP), AX		// copy argc
   209	MOVL	AX, 0(SP)
   210	MOVQ	24(SP), AX		// copy argv
   211	MOVQ	AX, 8(SP)
   212	CALL	runtime·args(SB)
   213	CALL	runtime·osinit(SB)
   214	CALL	runtime·schedinit(SB)
   215
   216	// create a new goroutine to start program
   217	MOVQ	$runtime·mainPC(SB), AX		// entry
   218	PUSHQ	AX
   219	PUSHQ	$0			// arg size
   220	CALL	runtime·newproc(SB)
   221	POPQ	AX
   222	POPQ	AX
   223
   224	// start this M
   225	CALL	runtime·mstart(SB)
   226
   227	CALL	runtime·abort(SB)	// mstart should never return
   228	RET
   229
   230	// Prevent dead-code elimination of debugCallV1, which is
   231	// intended to be called by debuggers.
   232	MOVQ	$runtime·debugCallV1(SB), AX
   233	RET
   234
   235DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   236GLOBL	runtime·mainPC(SB),RODATA,$8
   237
   238TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   239	BYTE	$0xcc
   240	RET
   241
   242TEXT runtime·asminit(SB),NOSPLIT,$0-0
   243	// No per-thread init.
   244	RET
   245
   246/*
   247 *  go-routine
   248 */
   249
   250// func gosave(buf *gobuf)
   251// save state in Gobuf; setjmp
   252TEXT runtime·gosave(SB), NOSPLIT, $0-8
   253	MOVQ	buf+0(FP), AX		// gobuf
   254	LEAQ	buf+0(FP), BX		// caller's SP
   255	MOVQ	BX, gobuf_sp(AX)
   256	MOVQ	0(SP), BX		// caller's PC
   257	MOVQ	BX, gobuf_pc(AX)
   258	MOVQ	$0, gobuf_ret(AX)
   259	MOVQ	BP, gobuf_bp(AX)
   260	// Assert ctxt is zero. See func save.
   261	MOVQ	gobuf_ctxt(AX), BX
   262	TESTQ	BX, BX
   263	JZ	2(PC)
   264	CALL	runtime·badctxt(SB)
   265	get_tls(CX)
   266	MOVQ	g(CX), BX
   267	MOVQ	BX, gobuf_g(AX)
   268	RET
   269
   270// func gogo(buf *gobuf)
   271// restore state from Gobuf; longjmp
   272TEXT runtime·gogo(SB), NOSPLIT, $16-8
   273	MOVQ	buf+0(FP), BX		// gobuf
   274	MOVQ	gobuf_g(BX), DX
   275	MOVQ	0(DX), CX		// make sure g != nil
   276	get_tls(CX)
   277	MOVQ	DX, g(CX)
   278	MOVQ	gobuf_sp(BX), SP	// restore SP
   279	MOVQ	gobuf_ret(BX), AX
   280	MOVQ	gobuf_ctxt(BX), DX
   281	MOVQ	gobuf_bp(BX), BP
   282	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   283	MOVQ	$0, gobuf_ret(BX)
   284	MOVQ	$0, gobuf_ctxt(BX)
   285	MOVQ	$0, gobuf_bp(BX)
   286	MOVQ	gobuf_pc(BX), BX
   287	JMP	BX
   288
   289// func mcall(fn func(*g))
   290// Switch to m->g0's stack, call fn(g).
   291// Fn must never return. It should gogo(&g->sched)
   292// to keep running g.
   293TEXT runtime·mcall(SB), NOSPLIT, $0-8
   294	MOVQ	fn+0(FP), DI
   295
   296	get_tls(CX)
   297	MOVQ	g(CX), AX	// save state in g->sched
   298	MOVQ	0(SP), BX	// caller's PC
   299	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   300	LEAQ	fn+0(FP), BX	// caller's SP
   301	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   302	MOVQ	AX, (g_sched+gobuf_g)(AX)
   303	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   304
   305	// switch to m->g0 & its stack, call fn
   306	MOVQ	g(CX), BX
   307	MOVQ	g_m(BX), BX
   308	MOVQ	m_g0(BX), SI
   309	CMPQ	SI, AX	// if g == m->g0 call badmcall
   310	JNE	3(PC)
   311	MOVQ	$runtime·badmcall(SB), AX
   312	JMP	AX
   313	MOVQ	SI, g(CX)	// g = m->g0
   314	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   315	PUSHQ	AX
   316	MOVQ	DI, DX
   317	MOVQ	0(DI), DI
   318	CALL	DI
   319	POPQ	AX
   320	MOVQ	$runtime·badmcall2(SB), AX
   321	JMP	AX
   322	RET
   323
   324// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   325// of the G stack. We need to distinguish the routine that
   326// lives at the bottom of the G stack from the one that lives
   327// at the top of the system stack because the one at the top of
   328// the system stack terminates the stack walk (see topofstack()).
   329TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   330	RET
   331
   332// func systemstack(fn func())
   333TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   334	MOVQ	fn+0(FP), DI	// DI = fn
   335	get_tls(CX)
   336	MOVQ	g(CX), AX	// AX = g
   337	MOVQ	g_m(AX), BX	// BX = m
   338
   339	CMPQ	AX, m_gsignal(BX)
   340	JEQ	noswitch
   341
   342	MOVQ	m_g0(BX), DX	// DX = g0
   343	CMPQ	AX, DX
   344	JEQ	noswitch
   345
   346	CMPQ	AX, m_curg(BX)
   347	JNE	bad
   348
   349	// switch stacks
   350	// save our state in g->sched. Pretend to
   351	// be systemstack_switch if the G stack is scanned.
   352	MOVQ	$runtime·systemstack_switch(SB), SI
   353	MOVQ	SI, (g_sched+gobuf_pc)(AX)
   354	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   355	MOVQ	AX, (g_sched+gobuf_g)(AX)
   356	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   357
   358	// switch to g0
   359	MOVQ	DX, g(CX)
   360	MOVQ	(g_sched+gobuf_sp)(DX), BX
   361	// make it look like mstart called systemstack on g0, to stop traceback
   362	SUBQ	$8, BX
   363	MOVQ	$runtime·mstart(SB), DX
   364	MOVQ	DX, 0(BX)
   365	MOVQ	BX, SP
   366
   367	// call target function
   368	MOVQ	DI, DX
   369	MOVQ	0(DI), DI
   370	CALL	DI
   371
   372	// switch back to g
   373	get_tls(CX)
   374	MOVQ	g(CX), AX
   375	MOVQ	g_m(AX), BX
   376	MOVQ	m_curg(BX), AX
   377	MOVQ	AX, g(CX)
   378	MOVQ	(g_sched+gobuf_sp)(AX), SP
   379	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   380	RET
   381
   382noswitch:
   383	// already on m stack; tail call the function
   384	// Using a tail call here cleans up tracebacks since we won't stop
   385	// at an intermediate systemstack.
   386	MOVQ	DI, DX
   387	MOVQ	0(DI), DI
   388	JMP	DI
   389
   390bad:
   391	// Bad: g is not gsignal, not g0, not curg. What is it?
   392	MOVQ	$runtime·badsystemstack(SB), AX
   393	CALL	AX
   394	INT	$3
   395
   396
   397/*
   398 * support for morestack
   399 */
   400
   401// Called during function prolog when more stack is needed.
   402//
   403// The traceback routines see morestack on a g0 as being
   404// the top of a stack (for example, morestack calling newstack
   405// calling the scheduler calling newm calling gc), so we must
   406// record an argument size. For that purpose, it has no arguments.
   407TEXT runtime·morestack(SB),NOSPLIT,$0-0
   408	// Cannot grow scheduler stack (m->g0).
   409	get_tls(CX)
   410	MOVQ	g(CX), BX
   411	MOVQ	g_m(BX), BX
   412	MOVQ	m_g0(BX), SI
   413	CMPQ	g(CX), SI
   414	JNE	3(PC)
   415	CALL	runtime·badmorestackg0(SB)
   416	CALL	runtime·abort(SB)
   417
   418	// Cannot grow signal stack (m->gsignal).
   419	MOVQ	m_gsignal(BX), SI
   420	CMPQ	g(CX), SI
   421	JNE	3(PC)
   422	CALL	runtime·badmorestackgsignal(SB)
   423	CALL	runtime·abort(SB)
   424
   425	// Called from f.
   426	// Set m->morebuf to f's caller.
   427	NOP	SP	// tell vet SP changed - stop checking offsets
   428	MOVQ	8(SP), AX	// f's caller's PC
   429	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   430	LEAQ	16(SP), AX	// f's caller's SP
   431	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   432	get_tls(CX)
   433	MOVQ	g(CX), SI
   434	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   435
   436	// Set g->sched to context in f.
   437	MOVQ	0(SP), AX // f's PC
   438	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   439	MOVQ	SI, (g_sched+gobuf_g)(SI)
   440	LEAQ	8(SP), AX // f's SP
   441	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   442	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   443	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   444
   445	// Call newstack on m->g0's stack.
   446	MOVQ	m_g0(BX), BX
   447	MOVQ	BX, g(CX)
   448	MOVQ	(g_sched+gobuf_sp)(BX), SP
   449	CALL	runtime·newstack(SB)
   450	CALL	runtime·abort(SB)	// crash if newstack returns
   451	RET
   452
   453// morestack but not preserving ctxt.
   454TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   455	MOVL	$0, DX
   456	JMP	runtime·morestack(SB)
   457
   458// reflectcall: call a function with the given argument list
   459// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   460// we don't have variable-sized frames, so we use a small number
   461// of constant-sized-frame functions to encode a few bits of size in the pc.
   462// Caution: ugly multiline assembly macros in your future!
   463
   464#define DISPATCH(NAME,MAXSIZE)		\
   465	CMPQ	CX, $MAXSIZE;		\
   466	JA	3(PC);			\
   467	MOVQ	$NAME(SB), AX;		\
   468	JMP	AX
   469// Note: can't just "JMP NAME(SB)" - bad inlining results.
   470
   471TEXT ·reflectcall(SB), NOSPLIT, $0-32
   472	MOVLQZX argsize+24(FP), CX
   473	DISPATCH(runtime·call32, 32)
   474	DISPATCH(runtime·call64, 64)
   475	DISPATCH(runtime·call128, 128)
   476	DISPATCH(runtime·call256, 256)
   477	DISPATCH(runtime·call512, 512)
   478	DISPATCH(runtime·call1024, 1024)
   479	DISPATCH(runtime·call2048, 2048)
   480	DISPATCH(runtime·call4096, 4096)
   481	DISPATCH(runtime·call8192, 8192)
   482	DISPATCH(runtime·call16384, 16384)
   483	DISPATCH(runtime·call32768, 32768)
   484	DISPATCH(runtime·call65536, 65536)
   485	DISPATCH(runtime·call131072, 131072)
   486	DISPATCH(runtime·call262144, 262144)
   487	DISPATCH(runtime·call524288, 524288)
   488	DISPATCH(runtime·call1048576, 1048576)
   489	DISPATCH(runtime·call2097152, 2097152)
   490	DISPATCH(runtime·call4194304, 4194304)
   491	DISPATCH(runtime·call8388608, 8388608)
   492	DISPATCH(runtime·call16777216, 16777216)
   493	DISPATCH(runtime·call33554432, 33554432)
   494	DISPATCH(runtime·call67108864, 67108864)
   495	DISPATCH(runtime·call134217728, 134217728)
   496	DISPATCH(runtime·call268435456, 268435456)
   497	DISPATCH(runtime·call536870912, 536870912)
   498	DISPATCH(runtime·call1073741824, 1073741824)
   499	MOVQ	$runtime·badreflectcall(SB), AX
   500	JMP	AX
   501
   502#define CALLFN(NAME,MAXSIZE)			\
   503TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   504	NO_LOCAL_POINTERS;			\
   505	/* copy arguments to stack */		\
   506	MOVQ	argptr+16(FP), SI;		\
   507	MOVLQZX argsize+24(FP), CX;		\
   508	MOVQ	SP, DI;				\
   509	REP;MOVSB;				\
   510	/* call function */			\
   511	MOVQ	f+8(FP), DX;			\
   512	PCDATA  $PCDATA_StackMapIndex, $0;	\
   513	MOVQ	(DX), AX;			\
   514	CALL	AX;				\
   515	/* copy return values back */		\
   516	MOVQ	argtype+0(FP), DX;		\
   517	MOVQ	argptr+16(FP), DI;		\
   518	MOVLQZX	argsize+24(FP), CX;		\
   519	MOVLQZX	retoffset+28(FP), BX;		\
   520	MOVQ	SP, SI;				\
   521	ADDQ	BX, DI;				\
   522	ADDQ	BX, SI;				\
   523	SUBQ	BX, CX;				\
   524	CALL	callRet<>(SB);			\
   525	RET
   526
   527// callRet copies return values back at the end of call*. This is a
   528// separate function so it can allocate stack space for the arguments
   529// to reflectcallmove. It does not follow the Go ABI; it expects its
   530// arguments in registers.
   531TEXT callRet<>(SB), NOSPLIT, $32-0
   532	NO_LOCAL_POINTERS
   533	MOVQ	DX, 0(SP)
   534	MOVQ	DI, 8(SP)
   535	MOVQ	SI, 16(SP)
   536	MOVQ	CX, 24(SP)
   537	CALL	runtime·reflectcallmove(SB)
   538	RET
   539
   540CALLFN(·call32, 32)
   541CALLFN(·call64, 64)
   542CALLFN(·call128, 128)
   543CALLFN(·call256, 256)
   544CALLFN(·call512, 512)
   545CALLFN(·call1024, 1024)
   546CALLFN(·call2048, 2048)
   547CALLFN(·call4096, 4096)
   548CALLFN(·call8192, 8192)
   549CALLFN(·call16384, 16384)
   550CALLFN(·call32768, 32768)
   551CALLFN(·call65536, 65536)
   552CALLFN(·call131072, 131072)
   553CALLFN(·call262144, 262144)
   554CALLFN(·call524288, 524288)
   555CALLFN(·call1048576, 1048576)
   556CALLFN(·call2097152, 2097152)
   557CALLFN(·call4194304, 4194304)
   558CALLFN(·call8388608, 8388608)
   559CALLFN(·call16777216, 16777216)
   560CALLFN(·call33554432, 33554432)
   561CALLFN(·call67108864, 67108864)
   562CALLFN(·call134217728, 134217728)
   563CALLFN(·call268435456, 268435456)
   564CALLFN(·call536870912, 536870912)
   565CALLFN(·call1073741824, 1073741824)
   566
   567TEXT runtime·procyield(SB),NOSPLIT,$0-0
   568	MOVL	cycles+0(FP), AX
   569again:
   570	PAUSE
   571	SUBL	$1, AX
   572	JNZ	again
   573	RET
   574
   575
   576TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   577	// Stores are already ordered on x86, so this is just a
   578	// compile barrier.
   579	RET
   580
   581// func jmpdefer(fv *funcval, argp uintptr)
   582// argp is a caller SP.
   583// called from deferreturn.
   584// 1. pop the caller
   585// 2. sub 5 bytes from the callers return
   586// 3. jmp to the argument
   587TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   588	MOVQ	fv+0(FP), DX	// fn
   589	MOVQ	argp+8(FP), BX	// caller sp
   590	LEAQ	-8(BX), SP	// caller sp after CALL
   591	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   592	SUBQ	$5, (SP)	// return to CALL again
   593	MOVQ	0(DX), BX
   594	JMP	BX	// but first run the deferred function
   595
   596// Save state of caller into g->sched. Smashes R8, R9.
   597TEXT gosave<>(SB),NOSPLIT,$0
   598	get_tls(R8)
   599	MOVQ	g(R8), R8
   600	MOVQ	0(SP), R9
   601	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   602	LEAQ	8(SP), R9
   603	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   604	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   605	MOVQ	BP, (g_sched+gobuf_bp)(R8)
   606	// Assert ctxt is zero. See func save.
   607	MOVQ	(g_sched+gobuf_ctxt)(R8), R9
   608	TESTQ	R9, R9
   609	JZ	2(PC)
   610	CALL	runtime·badctxt(SB)
   611	RET
   612
   613// func asmcgocall(fn, arg unsafe.Pointer) int32
   614// Call fn(arg) on the scheduler stack,
   615// aligned appropriately for the gcc ABI.
   616// See cgocall.go for more details.
   617TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   618	MOVQ	fn+0(FP), AX
   619	MOVQ	arg+8(FP), BX
   620
   621	MOVQ	SP, DX
   622
   623	// Figure out if we need to switch to m->g0 stack.
   624	// We get called to create new OS threads too, and those
   625	// come in on the m->g0 stack already.
   626	get_tls(CX)
   627	MOVQ	g(CX), R8
   628	CMPQ	R8, $0
   629	JEQ	nosave
   630	MOVQ	g_m(R8), R8
   631	MOVQ	m_g0(R8), SI
   632	MOVQ	g(CX), DI
   633	CMPQ	SI, DI
   634	JEQ	nosave
   635	MOVQ	m_gsignal(R8), SI
   636	CMPQ	SI, DI
   637	JEQ	nosave
   638
   639	// Switch to system stack.
   640	MOVQ	m_g0(R8), SI
   641	CALL	gosave<>(SB)
   642	MOVQ	SI, g(CX)
   643	MOVQ	(g_sched+gobuf_sp)(SI), SP
   644
   645	// Now on a scheduling stack (a pthread-created stack).
   646	// Make sure we have enough room for 4 stack-backed fast-call
   647	// registers as per windows amd64 calling convention.
   648	SUBQ	$64, SP
   649	ANDQ	$~15, SP	// alignment for gcc ABI
   650	MOVQ	DI, 48(SP)	// save g
   651	MOVQ	(g_stack+stack_hi)(DI), DI
   652	SUBQ	DX, DI
   653	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   654	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   655	MOVQ	BX, CX		// CX = first argument in Win64
   656	CALL	AX
   657
   658	// Restore registers, g, stack pointer.
   659	get_tls(CX)
   660	MOVQ	48(SP), DI
   661	MOVQ	(g_stack+stack_hi)(DI), SI
   662	SUBQ	40(SP), SI
   663	MOVQ	DI, g(CX)
   664	MOVQ	SI, SP
   665
   666	MOVL	AX, ret+16(FP)
   667	RET
   668
   669nosave:
   670	// Running on a system stack, perhaps even without a g.
   671	// Having no g can happen during thread creation or thread teardown
   672	// (see needm/dropm on Solaris, for example).
   673	// This code is like the above sequence but without saving/restoring g
   674	// and without worrying about the stack moving out from under us
   675	// (because we're on a system stack, not a goroutine stack).
   676	// The above code could be used directly if already on a system stack,
   677	// but then the only path through this code would be a rare case on Solaris.
   678	// Using this code for all "already on system stack" calls exercises it more,
   679	// which should help keep it correct.
   680	SUBQ	$64, SP
   681	ANDQ	$~15, SP
   682	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   683	MOVQ	DX, 40(SP)	// save original stack pointer
   684	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   685	MOVQ	BX, CX		// CX = first argument in Win64
   686	CALL	AX
   687	MOVQ	40(SP), SI	// restore original stack pointer
   688	MOVQ	SI, SP
   689	MOVL	AX, ret+16(FP)
   690	RET
   691
   692// func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr)
   693// Turn the fn into a Go func (by taking its address) and call
   694// cgocallback_gofunc.
   695TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   696	LEAQ	fn+0(FP), AX
   697	MOVQ	AX, 0(SP)
   698	MOVQ	frame+8(FP), AX
   699	MOVQ	AX, 8(SP)
   700	MOVQ	framesize+16(FP), AX
   701	MOVQ	AX, 16(SP)
   702	MOVQ	ctxt+24(FP), AX
   703	MOVQ	AX, 24(SP)
   704	MOVQ	$runtime·cgocallback_gofunc(SB), AX
   705	CALL	AX
   706	RET
   707
   708// func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr)
   709// See cgocall.go for more details.
   710TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   711	NO_LOCAL_POINTERS
   712
   713	// If g is nil, Go did not create the current thread.
   714	// Call needm to obtain one m for temporary use.
   715	// In this case, we're running on the thread stack, so there's
   716	// lots of space, but the linker doesn't know. Hide the call from
   717	// the linker analysis by using an indirect call through AX.
   718	get_tls(CX)
   719#ifdef GOOS_windows
   720	MOVL	$0, BX
   721	CMPQ	CX, $0
   722	JEQ	2(PC)
   723#endif
   724	MOVQ	g(CX), BX
   725	CMPQ	BX, $0
   726	JEQ	needm
   727	MOVQ	g_m(BX), BX
   728	MOVQ	BX, R8 // holds oldm until end of function
   729	JMP	havem
   730needm:
   731	MOVQ	$0, 0(SP)
   732	MOVQ	$runtime·needm(SB), AX
   733	CALL	AX
   734	MOVQ	0(SP), R8
   735	get_tls(CX)
   736	MOVQ	g(CX), BX
   737	MOVQ	g_m(BX), BX
   738
   739	// Set m->sched.sp = SP, so that if a panic happens
   740	// during the function we are about to execute, it will
   741	// have a valid SP to run on the g0 stack.
   742	// The next few lines (after the havem label)
   743	// will save this SP onto the stack and then write
   744	// the same SP back to m->sched.sp. That seems redundant,
   745	// but if an unrecovered panic happens, unwindm will
   746	// restore the g->sched.sp from the stack location
   747	// and then systemstack will try to use it. If we don't set it here,
   748	// that restored SP will be uninitialized (typically 0) and
   749	// will not be usable.
   750	MOVQ	m_g0(BX), SI
   751	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   752
   753havem:
   754	// Now there's a valid m, and we're running on its m->g0.
   755	// Save current m->g0->sched.sp on stack and then set it to SP.
   756	// Save current sp in m->g0->sched.sp in preparation for
   757	// switch back to m->curg stack.
   758	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   759	MOVQ	m_g0(BX), SI
   760	MOVQ	(g_sched+gobuf_sp)(SI), AX
   761	MOVQ	AX, 0(SP)
   762	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   763
   764	// Switch to m->curg stack and call runtime.cgocallbackg.
   765	// Because we are taking over the execution of m->curg
   766	// but *not* resuming what had been running, we need to
   767	// save that information (m->curg->sched) so we can restore it.
   768	// We can restore m->curg->sched.sp easily, because calling
   769	// runtime.cgocallbackg leaves SP unchanged upon return.
   770	// To save m->curg->sched.pc, we push it onto the stack.
   771	// This has the added benefit that it looks to the traceback
   772	// routine like cgocallbackg is going to return to that
   773	// PC (because the frame we allocate below has the same
   774	// size as cgocallback_gofunc's frame declared above)
   775	// so that the traceback will seamlessly trace back into
   776	// the earlier calls.
   777	//
   778	// In the new goroutine, 8(SP) holds the saved R8.
   779	MOVQ	m_curg(BX), SI
   780	MOVQ	SI, g(CX)
   781	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   782	MOVQ	(g_sched+gobuf_pc)(SI), BX
   783	MOVQ	BX, -8(DI)
   784	// Compute the size of the frame, including return PC and, if
   785	// GOEXPERIMENT=framepointer, the saved base pointer
   786	MOVQ	ctxt+24(FP), BX
   787	LEAQ	fv+0(FP), AX
   788	SUBQ	SP, AX
   789	SUBQ	AX, DI
   790	MOVQ	DI, SP
   791
   792	MOVQ	R8, 8(SP)
   793	MOVQ	BX, 0(SP)
   794	CALL	runtime·cgocallbackg(SB)
   795	MOVQ	8(SP), R8
   796
   797	// Compute the size of the frame again. FP and SP have
   798	// completely different values here than they did above,
   799	// but only their difference matters.
   800	LEAQ	fv+0(FP), AX
   801	SUBQ	SP, AX
   802
   803	// Restore g->sched (== m->curg->sched) from saved values.
   804	get_tls(CX)
   805	MOVQ	g(CX), SI
   806	MOVQ	SP, DI
   807	ADDQ	AX, DI
   808	MOVQ	-8(DI), BX
   809	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   810	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   811
   812	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   813	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   814	// so we do not have to restore it.)
   815	MOVQ	g(CX), BX
   816	MOVQ	g_m(BX), BX
   817	MOVQ	m_g0(BX), SI
   818	MOVQ	SI, g(CX)
   819	MOVQ	(g_sched+gobuf_sp)(SI), SP
   820	MOVQ	0(SP), AX
   821	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   822
   823	// If the m on entry was nil, we called needm above to borrow an m
   824	// for the duration of the call. Since the call is over, return it with dropm.
   825	CMPQ	R8, $0
   826	JNE 3(PC)
   827	MOVQ	$runtime·dropm(SB), AX
   828	CALL	AX
   829
   830	// Done!
   831	RET
   832
   833// func setg(gg *g)
   834// set g. for use by needm.
   835TEXT runtime·setg(SB), NOSPLIT, $0-8
   836	MOVQ	gg+0(FP), BX
   837#ifdef GOOS_windows
   838	CMPQ	BX, $0
   839	JNE	settls
   840	MOVQ	$0, 0x28(GS)
   841	RET
   842settls:
   843	MOVQ	g_m(BX), AX
   844	LEAQ	m_tls(AX), AX
   845	MOVQ	AX, 0x28(GS)
   846#endif
   847	get_tls(CX)
   848	MOVQ	BX, g(CX)
   849	RET
   850
   851// void setg_gcc(G*); set g called from gcc.
   852TEXT setg_gcc<>(SB),NOSPLIT,$0
   853	get_tls(AX)
   854	MOVQ	DI, g(AX)
   855	RET
   856
   857TEXT runtime·abort(SB),NOSPLIT,$0-0
   858	INT	$3
   859loop:
   860	JMP	loop
   861
   862// check that SP is in range [g->stack.lo, g->stack.hi)
   863TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   864	get_tls(CX)
   865	MOVQ	g(CX), AX
   866	CMPQ	(g_stack+stack_hi)(AX), SP
   867	JHI	2(PC)
   868	CALL	runtime·abort(SB)
   869	CMPQ	SP, (g_stack+stack_lo)(AX)
   870	JHI	2(PC)
   871	CALL	runtime·abort(SB)
   872	RET
   873
   874// func cputicks() int64
   875TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   876	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   877	JNE	mfence
   878	LFENCE
   879	JMP	done
   880mfence:
   881	MFENCE
   882done:
   883	RDTSC
   884	SHLQ	$32, DX
   885	ADDQ	DX, AX
   886	MOVQ	AX, ret+0(FP)
   887	RET
   888
   889// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
   890// hash function using AES hardware instructions
   891TEXT runtime·memhash(SB),NOSPLIT,$0-32
   892	CMPB	runtime·useAeshash(SB), $0
   893	JEQ	noaes
   894	MOVQ	p+0(FP), AX	// ptr to data
   895	MOVQ	s+16(FP), CX	// size
   896	LEAQ	ret+24(FP), DX
   897	JMP	aeshashbody<>(SB)
   898noaes:
   899	JMP	runtime·memhashFallback(SB)
   900
   901// func strhash(p unsafe.Pointer, h uintptr) uintptr
   902TEXT runtime·strhash(SB),NOSPLIT,$0-24
   903	CMPB	runtime·useAeshash(SB), $0
   904	JEQ	noaes
   905	MOVQ	p+0(FP), AX	// ptr to string struct
   906	MOVQ	8(AX), CX	// length of string
   907	MOVQ	(AX), AX	// string data
   908	LEAQ	ret+16(FP), DX
   909	JMP	aeshashbody<>(SB)
   910noaes:
   911	JMP	runtime·strhashFallback(SB)
   912
   913// AX: data
   914// CX: length
   915// DX: address to put return value
   916TEXT aeshashbody<>(SB),NOSPLIT,$0-0
   917	// Fill an SSE register with our seeds.
   918	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   919	PINSRW	$4, CX, X0			// 16 bits of length
   920	PSHUFHW $0, X0, X0			// repeat length 4 times total
   921	MOVO	X0, X1				// save unscrambled seed
   922	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   923	AESENC	X0, X0				// scramble seed
   924
   925	CMPQ	CX, $16
   926	JB	aes0to15
   927	JE	aes16
   928	CMPQ	CX, $32
   929	JBE	aes17to32
   930	CMPQ	CX, $64
   931	JBE	aes33to64
   932	CMPQ	CX, $128
   933	JBE	aes65to128
   934	JMP	aes129plus
   935
   936aes0to15:
   937	TESTQ	CX, CX
   938	JE	aes0
   939
   940	ADDQ	$16, AX
   941	TESTW	$0xff0, AX
   942	JE	endofpage
   943
   944	// 16 bytes loaded at this address won't cross
   945	// a page boundary, so we can load it directly.
   946	MOVOU	-16(AX), X1
   947	ADDQ	CX, CX
   948	MOVQ	$masks<>(SB), AX
   949	PAND	(AX)(CX*8), X1
   950final1:
   951	PXOR	X0, X1	// xor data with seed
   952	AESENC	X1, X1	// scramble combo 3 times
   953	AESENC	X1, X1
   954	AESENC	X1, X1
   955	MOVQ	X1, (DX)
   956	RET
   957
   958endofpage:
   959	// address ends in 1111xxxx. Might be up against
   960	// a page boundary, so load ending at last byte.
   961	// Then shift bytes down using pshufb.
   962	MOVOU	-32(AX)(CX*1), X1
   963	ADDQ	CX, CX
   964	MOVQ	$shifts<>(SB), AX
   965	PSHUFB	(AX)(CX*8), X1
   966	JMP	final1
   967
   968aes0:
   969	// Return scrambled input seed
   970	AESENC	X0, X0
   971	MOVQ	X0, (DX)
   972	RET
   973
   974aes16:
   975	MOVOU	(AX), X1
   976	JMP	final1
   977
   978aes17to32:
   979	// make second starting seed
   980	PXOR	runtime·aeskeysched+16(SB), X1
   981	AESENC	X1, X1
   982
   983	// load data to be hashed
   984	MOVOU	(AX), X2
   985	MOVOU	-16(AX)(CX*1), X3
   986
   987	// xor with seed
   988	PXOR	X0, X2
   989	PXOR	X1, X3
   990
   991	// scramble 3 times
   992	AESENC	X2, X2
   993	AESENC	X3, X3
   994	AESENC	X2, X2
   995	AESENC	X3, X3
   996	AESENC	X2, X2
   997	AESENC	X3, X3
   998
   999	// combine results
  1000	PXOR	X3, X2
  1001	MOVQ	X2, (DX)
  1002	RET
  1003
  1004aes33to64:
  1005	// make 3 more starting seeds
  1006	MOVO	X1, X2
  1007	MOVO	X1, X3
  1008	PXOR	runtime·aeskeysched+16(SB), X1
  1009	PXOR	runtime·aeskeysched+32(SB), X2
  1010	PXOR	runtime·aeskeysched+48(SB), X3
  1011	AESENC	X1, X1
  1012	AESENC	X2, X2
  1013	AESENC	X3, X3
  1014
  1015	MOVOU	(AX), X4
  1016	MOVOU	16(AX), X5
  1017	MOVOU	-32(AX)(CX*1), X6
  1018	MOVOU	-16(AX)(CX*1), X7
  1019
  1020	PXOR	X0, X4
  1021	PXOR	X1, X5
  1022	PXOR	X2, X6
  1023	PXOR	X3, X7
  1024
  1025	AESENC	X4, X4
  1026	AESENC	X5, X5
  1027	AESENC	X6, X6
  1028	AESENC	X7, X7
  1029
  1030	AESENC	X4, X4
  1031	AESENC	X5, X5
  1032	AESENC	X6, X6
  1033	AESENC	X7, X7
  1034
  1035	AESENC	X4, X4
  1036	AESENC	X5, X5
  1037	AESENC	X6, X6
  1038	AESENC	X7, X7
  1039
  1040	PXOR	X6, X4
  1041	PXOR	X7, X5
  1042	PXOR	X5, X4
  1043	MOVQ	X4, (DX)
  1044	RET
  1045
  1046aes65to128:
  1047	// make 7 more starting seeds
  1048	MOVO	X1, X2
  1049	MOVO	X1, X3
  1050	MOVO	X1, X4
  1051	MOVO	X1, X5
  1052	MOVO	X1, X6
  1053	MOVO	X1, X7
  1054	PXOR	runtime·aeskeysched+16(SB), X1
  1055	PXOR	runtime·aeskeysched+32(SB), X2
  1056	PXOR	runtime·aeskeysched+48(SB), X3
  1057	PXOR	runtime·aeskeysched+64(SB), X4
  1058	PXOR	runtime·aeskeysched+80(SB), X5
  1059	PXOR	runtime·aeskeysched+96(SB), X6
  1060	PXOR	runtime·aeskeysched+112(SB), X7
  1061	AESENC	X1, X1
  1062	AESENC	X2, X2
  1063	AESENC	X3, X3
  1064	AESENC	X4, X4
  1065	AESENC	X5, X5
  1066	AESENC	X6, X6
  1067	AESENC	X7, X7
  1068
  1069	// load data
  1070	MOVOU	(AX), X8
  1071	MOVOU	16(AX), X9
  1072	MOVOU	32(AX), X10
  1073	MOVOU	48(AX), X11
  1074	MOVOU	-64(AX)(CX*1), X12
  1075	MOVOU	-48(AX)(CX*1), X13
  1076	MOVOU	-32(AX)(CX*1), X14
  1077	MOVOU	-16(AX)(CX*1), X15
  1078
  1079	// xor with seed
  1080	PXOR	X0, X8
  1081	PXOR	X1, X9
  1082	PXOR	X2, X10
  1083	PXOR	X3, X11
  1084	PXOR	X4, X12
  1085	PXOR	X5, X13
  1086	PXOR	X6, X14
  1087	PXOR	X7, X15
  1088
  1089	// scramble 3 times
  1090	AESENC	X8, X8
  1091	AESENC	X9, X9
  1092	AESENC	X10, X10
  1093	AESENC	X11, X11
  1094	AESENC	X12, X12
  1095	AESENC	X13, X13
  1096	AESENC	X14, X14
  1097	AESENC	X15, X15
  1098
  1099	AESENC	X8, X8
  1100	AESENC	X9, X9
  1101	AESENC	X10, X10
  1102	AESENC	X11, X11
  1103	AESENC	X12, X12
  1104	AESENC	X13, X13
  1105	AESENC	X14, X14
  1106	AESENC	X15, X15
  1107
  1108	AESENC	X8, X8
  1109	AESENC	X9, X9
  1110	AESENC	X10, X10
  1111	AESENC	X11, X11
  1112	AESENC	X12, X12
  1113	AESENC	X13, X13
  1114	AESENC	X14, X14
  1115	AESENC	X15, X15
  1116
  1117	// combine results
  1118	PXOR	X12, X8
  1119	PXOR	X13, X9
  1120	PXOR	X14, X10
  1121	PXOR	X15, X11
  1122	PXOR	X10, X8
  1123	PXOR	X11, X9
  1124	PXOR	X9, X8
  1125	MOVQ	X8, (DX)
  1126	RET
  1127
  1128aes129plus:
  1129	// make 7 more starting seeds
  1130	MOVO	X1, X2
  1131	MOVO	X1, X3
  1132	MOVO	X1, X4
  1133	MOVO	X1, X5
  1134	MOVO	X1, X6
  1135	MOVO	X1, X7
  1136	PXOR	runtime·aeskeysched+16(SB), X1
  1137	PXOR	runtime·aeskeysched+32(SB), X2
  1138	PXOR	runtime·aeskeysched+48(SB), X3
  1139	PXOR	runtime·aeskeysched+64(SB), X4
  1140	PXOR	runtime·aeskeysched+80(SB), X5
  1141	PXOR	runtime·aeskeysched+96(SB), X6
  1142	PXOR	runtime·aeskeysched+112(SB), X7
  1143	AESENC	X1, X1
  1144	AESENC	X2, X2
  1145	AESENC	X3, X3
  1146	AESENC	X4, X4
  1147	AESENC	X5, X5
  1148	AESENC	X6, X6
  1149	AESENC	X7, X7
  1150
  1151	// start with last (possibly overlapping) block
  1152	MOVOU	-128(AX)(CX*1), X8
  1153	MOVOU	-112(AX)(CX*1), X9
  1154	MOVOU	-96(AX)(CX*1), X10
  1155	MOVOU	-80(AX)(CX*1), X11
  1156	MOVOU	-64(AX)(CX*1), X12
  1157	MOVOU	-48(AX)(CX*1), X13
  1158	MOVOU	-32(AX)(CX*1), X14
  1159	MOVOU	-16(AX)(CX*1), X15
  1160
  1161	// xor in seed
  1162	PXOR	X0, X8
  1163	PXOR	X1, X9
  1164	PXOR	X2, X10
  1165	PXOR	X3, X11
  1166	PXOR	X4, X12
  1167	PXOR	X5, X13
  1168	PXOR	X6, X14
  1169	PXOR	X7, X15
  1170
  1171	// compute number of remaining 128-byte blocks
  1172	DECQ	CX
  1173	SHRQ	$7, CX
  1174
  1175aesloop:
  1176	// scramble state
  1177	AESENC	X8, X8
  1178	AESENC	X9, X9
  1179	AESENC	X10, X10
  1180	AESENC	X11, X11
  1181	AESENC	X12, X12
  1182	AESENC	X13, X13
  1183	AESENC	X14, X14
  1184	AESENC	X15, X15
  1185
  1186	// scramble state, xor in a block
  1187	MOVOU	(AX), X0
  1188	MOVOU	16(AX), X1
  1189	MOVOU	32(AX), X2
  1190	MOVOU	48(AX), X3
  1191	AESENC	X0, X8
  1192	AESENC	X1, X9
  1193	AESENC	X2, X10
  1194	AESENC	X3, X11
  1195	MOVOU	64(AX), X4
  1196	MOVOU	80(AX), X5
  1197	MOVOU	96(AX), X6
  1198	MOVOU	112(AX), X7
  1199	AESENC	X4, X12
  1200	AESENC	X5, X13
  1201	AESENC	X6, X14
  1202	AESENC	X7, X15
  1203
  1204	ADDQ	$128, AX
  1205	DECQ	CX
  1206	JNE	aesloop
  1207
  1208	// 3 more scrambles to finish
  1209	AESENC	X8, X8
  1210	AESENC	X9, X9
  1211	AESENC	X10, X10
  1212	AESENC	X11, X11
  1213	AESENC	X12, X12
  1214	AESENC	X13, X13
  1215	AESENC	X14, X14
  1216	AESENC	X15, X15
  1217	AESENC	X8, X8
  1218	AESENC	X9, X9
  1219	AESENC	X10, X10
  1220	AESENC	X11, X11
  1221	AESENC	X12, X12
  1222	AESENC	X13, X13
  1223	AESENC	X14, X14
  1224	AESENC	X15, X15
  1225	AESENC	X8, X8
  1226	AESENC	X9, X9
  1227	AESENC	X10, X10
  1228	AESENC	X11, X11
  1229	AESENC	X12, X12
  1230	AESENC	X13, X13
  1231	AESENC	X14, X14
  1232	AESENC	X15, X15
  1233
  1234	PXOR	X12, X8
  1235	PXOR	X13, X9
  1236	PXOR	X14, X10
  1237	PXOR	X15, X11
  1238	PXOR	X10, X8
  1239	PXOR	X11, X9
  1240	PXOR	X9, X8
  1241	MOVQ	X8, (DX)
  1242	RET
  1243
  1244// func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1245TEXT runtime·memhash32(SB),NOSPLIT,$0-24
  1246	CMPB	runtime·useAeshash(SB), $0
  1247	JEQ	noaes
  1248	MOVQ	p+0(FP), AX	// ptr to data
  1249	MOVQ	h+8(FP), X0	// seed
  1250	PINSRD	$2, (AX), X0	// data
  1251	AESENC	runtime·aeskeysched+0(SB), X0
  1252	AESENC	runtime·aeskeysched+16(SB), X0
  1253	AESENC	runtime·aeskeysched+32(SB), X0
  1254	MOVQ	X0, ret+16(FP)
  1255	RET
  1256noaes:
  1257	JMP	runtime·memhash32Fallback(SB)
  1258
  1259// func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1260TEXT runtime·memhash64(SB),NOSPLIT,$0-24
  1261	CMPB	runtime·useAeshash(SB), $0
  1262	JEQ	noaes
  1263	MOVQ	p+0(FP), AX	// ptr to data
  1264	MOVQ	h+8(FP), X0	// seed
  1265	PINSRQ	$1, (AX), X0	// data
  1266	AESENC	runtime·aeskeysched+0(SB), X0
  1267	AESENC	runtime·aeskeysched+16(SB), X0
  1268	AESENC	runtime·aeskeysched+32(SB), X0
  1269	MOVQ	X0, ret+16(FP)
  1270	RET
  1271noaes:
  1272	JMP	runtime·memhash64Fallback(SB)
  1273
  1274// simple mask to get rid of data in the high part of the register.
  1275DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1276DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1277DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1278DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1279DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1280DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1281DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1282DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1283DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1284DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1285DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1286DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1287DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1288DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1289DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1290DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1291DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1292DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1293DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1294DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1295DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1296DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1297DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1298DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1299DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1300DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1301DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1302DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1303DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1304DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1305DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1306DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1307GLOBL masks<>(SB),RODATA,$256
  1308
  1309// func checkASM() bool
  1310TEXT ·checkASM(SB),NOSPLIT,$0-1
  1311	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1312	MOVQ	$masks<>(SB), AX
  1313	MOVQ	$shifts<>(SB), BX
  1314	ORQ	BX, AX
  1315	TESTQ	$15, AX
  1316	SETEQ	ret+0(FP)
  1317	RET
  1318
  1319// these are arguments to pshufb. They move data down from
  1320// the high bytes of the register to the low bytes of the register.
  1321// index is how many bytes to move.
  1322DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1323DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1324DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1325DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1326DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1327DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1328DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1329DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1330DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1331DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1332DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1333DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1334DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1335DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1336DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1337DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1338DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1339DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1340DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1341DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1342DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1343DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1344DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1345DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1346DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1347DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1348DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1349DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1350DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1351DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1352DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1353DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1354GLOBL shifts<>(SB),RODATA,$256
  1355
  1356TEXT runtime·return0(SB), NOSPLIT, $0
  1357	MOVL	$0, AX
  1358	RET
  1359
  1360
  1361// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1362// Must obey the gcc calling convention.
  1363TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1364	get_tls(CX)
  1365	MOVQ	g(CX), AX
  1366	MOVQ	g_m(AX), AX
  1367	MOVQ	m_curg(AX), AX
  1368	MOVQ	(g_stack+stack_hi)(AX), AX
  1369	RET
  1370
  1371// The top-most function running on a goroutine
  1372// returns to goexit+PCQuantum.
  1373TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1374	BYTE	$0x90	// NOP
  1375	CALL	runtime·goexit1(SB)	// does not return
  1376	// traceback from goexit1 must hit code range of goexit
  1377	BYTE	$0x90	// NOP
  1378
  1379// This is called from .init_array and follows the platform, not Go, ABI.
  1380TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1381	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1382	MOVQ	runtime·lastmoduledatap(SB), AX
  1383	MOVQ	DI, moduledata_next(AX)
  1384	MOVQ	DI, runtime·lastmoduledatap(SB)
  1385	POPQ	R15
  1386	RET
  1387
  1388// gcWriteBarrier performs a heap pointer write and informs the GC.
  1389//
  1390// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1391// - DI is the destination of the write
  1392// - AX is the value being written at DI
  1393// It clobbers FLAGS. It does not clobber any general-purpose registers,
  1394// but may clobber others (e.g., SSE registers).
  1395TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
  1396	// Save the registers clobbered by the fast path. This is slightly
  1397	// faster than having the caller spill these.
  1398	MOVQ	R14, 104(SP)
  1399	MOVQ	R13, 112(SP)
  1400	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1401	// across a sequence of write barriers.
  1402	get_tls(R13)
  1403	MOVQ	g(R13), R13
  1404	MOVQ	g_m(R13), R13
  1405	MOVQ	m_p(R13), R13
  1406	MOVQ	(p_wbBuf+wbBuf_next)(R13), R14
  1407	// Increment wbBuf.next position.
  1408	LEAQ	16(R14), R14
  1409	MOVQ	R14, (p_wbBuf+wbBuf_next)(R13)
  1410	CMPQ	R14, (p_wbBuf+wbBuf_end)(R13)
  1411	// Record the write.
  1412	MOVQ	AX, -16(R14)	// Record value
  1413	// Note: This turns bad pointer writes into bad
  1414	// pointer reads, which could be confusing. We could avoid
  1415	// reading from obviously bad pointers, which would
  1416	// take care of the vast majority of these. We could
  1417	// patch this up in the signal handler, or use XCHG to
  1418	// combine the read and the write.
  1419	MOVQ	(DI), R13
  1420	MOVQ	R13, -8(R14)	// Record *slot
  1421	// Is the buffer full? (flags set in CMPQ above)
  1422	JEQ	flush
  1423ret:
  1424	MOVQ	104(SP), R14
  1425	MOVQ	112(SP), R13
  1426	// Do the write.
  1427	MOVQ	AX, (DI)
  1428	RET
  1429
  1430flush:
  1431	// Save all general purpose registers since these could be
  1432	// clobbered by wbBufFlush and were not saved by the caller.
  1433	// It is possible for wbBufFlush to clobber other registers
  1434	// (e.g., SSE registers), but the compiler takes care of saving
  1435	// those in the caller if necessary. This strikes a balance
  1436	// with registers that are likely to be used.
  1437	//
  1438	// We don't have type information for these, but all code under
  1439	// here is NOSPLIT, so nothing will observe these.
  1440	//
  1441	// TODO: We could strike a different balance; e.g., saving X0
  1442	// and not saving GP registers that are less likely to be used.
  1443	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1444	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1445	MOVQ	BX, 16(SP)
  1446	MOVQ	CX, 24(SP)
  1447	MOVQ	DX, 32(SP)
  1448	// DI already saved
  1449	MOVQ	SI, 40(SP)
  1450	MOVQ	BP, 48(SP)
  1451	MOVQ	R8, 56(SP)
  1452	MOVQ	R9, 64(SP)
  1453	MOVQ	R10, 72(SP)
  1454	MOVQ	R11, 80(SP)
  1455	MOVQ	R12, 88(SP)
  1456	// R13 already saved
  1457	// R14 already saved
  1458	MOVQ	R15, 96(SP)
  1459
  1460	// This takes arguments DI and AX
  1461	CALL	runtime·wbBufFlush(SB)
  1462
  1463	MOVQ	0(SP), DI
  1464	MOVQ	8(SP), AX
  1465	MOVQ	16(SP), BX
  1466	MOVQ	24(SP), CX
  1467	MOVQ	32(SP), DX
  1468	MOVQ	40(SP), SI
  1469	MOVQ	48(SP), BP
  1470	MOVQ	56(SP), R8
  1471	MOVQ	64(SP), R9
  1472	MOVQ	72(SP), R10
  1473	MOVQ	80(SP), R11
  1474	MOVQ	88(SP), R12
  1475	MOVQ	96(SP), R15
  1476	JMP	ret
  1477
  1478// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
  1479TEXT runtime·gcWriteBarrierCX(SB),NOSPLIT,$0
  1480	XCHGQ CX, AX
  1481	CALL runtime·gcWriteBarrier(SB)
  1482	XCHGQ CX, AX
  1483	RET
  1484
  1485// gcWriteBarrierDX is gcWriteBarrier, but with args in DI and DX.
  1486TEXT runtime·gcWriteBarrierDX(SB),NOSPLIT,$0
  1487	XCHGQ DX, AX
  1488	CALL runtime·gcWriteBarrier(SB)
  1489	XCHGQ DX, AX
  1490	RET
  1491
  1492// gcWriteBarrierBX is gcWriteBarrier, but with args in DI and BX.
  1493TEXT runtime·gcWriteBarrierBX(SB),NOSPLIT,$0
  1494	XCHGQ BX, AX
  1495	CALL runtime·gcWriteBarrier(SB)
  1496	XCHGQ BX, AX
  1497	RET
  1498
  1499// gcWriteBarrierBP is gcWriteBarrier, but with args in DI and BP.
  1500TEXT runtime·gcWriteBarrierBP(SB),NOSPLIT,$0
  1501	XCHGQ BP, AX
  1502	CALL runtime·gcWriteBarrier(SB)
  1503	XCHGQ BP, AX
  1504	RET
  1505
  1506// gcWriteBarrierSI is gcWriteBarrier, but with args in DI and SI.
  1507TEXT runtime·gcWriteBarrierSI(SB),NOSPLIT,$0
  1508	XCHGQ SI, AX
  1509	CALL runtime·gcWriteBarrier(SB)
  1510	XCHGQ SI, AX
  1511	RET
  1512
  1513// gcWriteBarrierR8 is gcWriteBarrier, but with args in DI and R8.
  1514TEXT runtime·gcWriteBarrierR8(SB),NOSPLIT,$0
  1515	XCHGQ R8, AX
  1516	CALL runtime·gcWriteBarrier(SB)
  1517	XCHGQ R8, AX
  1518	RET
  1519
  1520// gcWriteBarrierR9 is gcWriteBarrier, but with args in DI and R9.
  1521TEXT runtime·gcWriteBarrierR9(SB),NOSPLIT,$0
  1522	XCHGQ R9, AX
  1523	CALL runtime·gcWriteBarrier(SB)
  1524	XCHGQ R9, AX
  1525	RET
  1526
  1527DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1528GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1529
  1530// debugCallV1 is the entry point for debugger-injected function
  1531// calls on running goroutines. It informs the runtime that a
  1532// debug call has been injected and creates a call frame for the
  1533// debugger to fill in.
  1534//
  1535// To inject a function call, a debugger should:
  1536// 1. Check that the goroutine is in state _Grunning and that
  1537//    there are at least 256 bytes free on the stack.
  1538// 2. Push the current PC on the stack (updating SP).
  1539// 3. Write the desired argument frame size at SP-16 (using the SP
  1540//    after step 2).
  1541// 4. Save all machine registers (including flags and XMM reigsters)
  1542//    so they can be restored later by the debugger.
  1543// 5. Set the PC to debugCallV1 and resume execution.
  1544//
  1545// If the goroutine is in state _Grunnable, then it's not generally
  1546// safe to inject a call because it may return out via other runtime
  1547// operations. Instead, the debugger should unwind the stack to find
  1548// the return to non-runtime code, add a temporary breakpoint there,
  1549// and inject the call once that breakpoint is hit.
  1550//
  1551// If the goroutine is in any other state, it's not safe to inject a call.
  1552//
  1553// This function communicates back to the debugger by setting RAX and
  1554// invoking INT3 to raise a breakpoint signal. See the comments in the
  1555// implementation for the protocol the debugger is expected to
  1556// follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1557//
  1558// The debugger must ensure that any pointers passed to the function
  1559// obey escape analysis requirements. Specifically, it must not pass
  1560// a stack pointer to an escaping argument. debugCallV1 cannot check
  1561// this invariant.
  1562TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
  1563	// Save all registers that may contain pointers so they can be
  1564	// conservatively scanned.
  1565	//
  1566	// We can't do anything that might clobber any of these
  1567	// registers before this.
  1568	MOVQ	R15, r15-(14*8+8)(SP)
  1569	MOVQ	R14, r14-(13*8+8)(SP)
  1570	MOVQ	R13, r13-(12*8+8)(SP)
  1571	MOVQ	R12, r12-(11*8+8)(SP)
  1572	MOVQ	R11, r11-(10*8+8)(SP)
  1573	MOVQ	R10, r10-(9*8+8)(SP)
  1574	MOVQ	R9, r9-(8*8+8)(SP)
  1575	MOVQ	R8, r8-(7*8+8)(SP)
  1576	MOVQ	DI, di-(6*8+8)(SP)
  1577	MOVQ	SI, si-(5*8+8)(SP)
  1578	MOVQ	BP, bp-(4*8+8)(SP)
  1579	MOVQ	BX, bx-(3*8+8)(SP)
  1580	MOVQ	DX, dx-(2*8+8)(SP)
  1581	// Save the frame size before we clobber it. Either of the last
  1582	// saves could clobber this depending on whether there's a saved BP.
  1583	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1584	MOVQ	CX, cx-(1*8+8)(SP)
  1585	MOVQ	AX, ax-(0*8+8)(SP)
  1586
  1587	// Save the argument frame size.
  1588	MOVQ	DX, frameSize-128(SP)
  1589
  1590	// Perform a safe-point check.
  1591	MOVQ	retpc-8(FP), AX	// Caller's PC
  1592	MOVQ	AX, 0(SP)
  1593	CALL	runtime·debugCallCheck(SB)
  1594	MOVQ	8(SP), AX
  1595	TESTQ	AX, AX
  1596	JZ	good
  1597	// The safety check failed. Put the reason string at the top
  1598	// of the stack.
  1599	MOVQ	AX, 0(SP)
  1600	MOVQ	16(SP), AX
  1601	MOVQ	AX, 8(SP)
  1602	// Set AX to 8 and invoke INT3. The debugger should get the
  1603	// reason a call can't be injected from the top of the stack
  1604	// and resume execution.
  1605	MOVQ	$8, AX
  1606	BYTE	$0xcc
  1607	JMP	restore
  1608
  1609good:
  1610	// Registers are saved and it's safe to make a call.
  1611	// Open up a call frame, moving the stack if necessary.
  1612	//
  1613	// Once the frame is allocated, this will set AX to 0 and
  1614	// invoke INT3. The debugger should write the argument
  1615	// frame for the call at SP, push the trapping PC on the
  1616	// stack, set the PC to the function to call, set RCX to point
  1617	// to the closure (if a closure call), and resume execution.
  1618	//
  1619	// If the function returns, this will set AX to 1 and invoke
  1620	// INT3. The debugger can then inspect any return value saved
  1621	// on the stack at SP and resume execution again.
  1622	//
  1623	// If the function panics, this will set AX to 2 and invoke INT3.
  1624	// The interface{} value of the panic will be at SP. The debugger
  1625	// can inspect the panic value and resume execution again.
  1626#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1627	CMPQ	AX, $MAXSIZE;			\
  1628	JA	5(PC);				\
  1629	MOVQ	$NAME(SB), AX;			\
  1630	MOVQ	AX, 0(SP);			\
  1631	CALL	runtime·debugCallWrap(SB);	\
  1632	JMP	restore
  1633
  1634	MOVQ	frameSize-128(SP), AX
  1635	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1636	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1637	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1638	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1639	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1640	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1641	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1642	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1643	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1644	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1645	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1646	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1647	// The frame size is too large. Report the error.
  1648	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1649	MOVQ	AX, 0(SP)
  1650	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1651	MOVQ	$8, AX
  1652	BYTE	$0xcc
  1653	JMP	restore
  1654
  1655restore:
  1656	// Calls and failures resume here.
  1657	//
  1658	// Set AX to 16 and invoke INT3. The debugger should restore
  1659	// all registers except RIP and RSP and resume execution.
  1660	MOVQ	$16, AX
  1661	BYTE	$0xcc
  1662	// We must not modify flags after this point.
  1663
  1664	// Restore pointer-containing registers, which may have been
  1665	// modified from the debugger's copy by stack copying.
  1666	MOVQ	ax-(0*8+8)(SP), AX
  1667	MOVQ	cx-(1*8+8)(SP), CX
  1668	MOVQ	dx-(2*8+8)(SP), DX
  1669	MOVQ	bx-(3*8+8)(SP), BX
  1670	MOVQ	bp-(4*8+8)(SP), BP
  1671	MOVQ	si-(5*8+8)(SP), SI
  1672	MOVQ	di-(6*8+8)(SP), DI
  1673	MOVQ	r8-(7*8+8)(SP), R8
  1674	MOVQ	r9-(8*8+8)(SP), R9
  1675	MOVQ	r10-(9*8+8)(SP), R10
  1676	MOVQ	r11-(10*8+8)(SP), R11
  1677	MOVQ	r12-(11*8+8)(SP), R12
  1678	MOVQ	r13-(12*8+8)(SP), R13
  1679	MOVQ	r14-(13*8+8)(SP), R14
  1680	MOVQ	r15-(14*8+8)(SP), R15
  1681
  1682	RET
  1683
  1684// runtime.debugCallCheck assumes that functions defined with the
  1685// DEBUG_CALL_FN macro are safe points to inject calls.
  1686#define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1687TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1688	NO_LOCAL_POINTERS;			\
  1689	MOVQ	$0, AX;				\
  1690	BYTE	$0xcc;				\
  1691	MOVQ	$1, AX;				\
  1692	BYTE	$0xcc;				\
  1693	RET
  1694DEBUG_CALL_FN(debugCall32<>, 32)
  1695DEBUG_CALL_FN(debugCall64<>, 64)
  1696DEBUG_CALL_FN(debugCall128<>, 128)
  1697DEBUG_CALL_FN(debugCall256<>, 256)
  1698DEBUG_CALL_FN(debugCall512<>, 512)
  1699DEBUG_CALL_FN(debugCall1024<>, 1024)
  1700DEBUG_CALL_FN(debugCall2048<>, 2048)
  1701DEBUG_CALL_FN(debugCall4096<>, 4096)
  1702DEBUG_CALL_FN(debugCall8192<>, 8192)
  1703DEBUG_CALL_FN(debugCall16384<>, 16384)
  1704DEBUG_CALL_FN(debugCall32768<>, 32768)
  1705DEBUG_CALL_FN(debugCall65536<>, 65536)
  1706
  1707// func debugCallPanicked(val interface{})
  1708TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1709	// Copy the panic value to the top of stack.
  1710	MOVQ	val_type+0(FP), AX
  1711	MOVQ	AX, 0(SP)
  1712	MOVQ	val_data+8(FP), AX
  1713	MOVQ	AX, 8(SP)
  1714	MOVQ	$2, AX
  1715	BYTE	$0xcc
  1716	RET
  1717
  1718// Note: these functions use a special calling convention to save generated code space.
  1719// Arguments are passed in registers, but the space for those arguments are allocated
  1720// in the caller's stack frame. These stubs write the args into that stack space and
  1721// then tail call to the corresponding runtime handler.
  1722// The tail call makes these stubs disappear in backtraces.
  1723TEXT runtime·panicIndex(SB),NOSPLIT,$0-16
  1724	MOVQ	AX, x+0(FP)
  1725	MOVQ	CX, y+8(FP)
  1726	JMP	runtime·goPanicIndex(SB)
  1727TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16
  1728	MOVQ	AX, x+0(FP)
  1729	MOVQ	CX, y+8(FP)
  1730	JMP	runtime·goPanicIndexU(SB)
  1731TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16
  1732	MOVQ	CX, x+0(FP)
  1733	MOVQ	DX, y+8(FP)
  1734	JMP	runtime·goPanicSliceAlen(SB)
  1735TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16
  1736	MOVQ	CX, x+0(FP)
  1737	MOVQ	DX, y+8(FP)
  1738	JMP	runtime·goPanicSliceAlenU(SB)
  1739TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16
  1740	MOVQ	CX, x+0(FP)
  1741	MOVQ	DX, y+8(FP)
  1742	JMP	runtime·goPanicSliceAcap(SB)
  1743TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16
  1744	MOVQ	CX, x+0(FP)
  1745	MOVQ	DX, y+8(FP)
  1746	JMP	runtime·goPanicSliceAcapU(SB)
  1747TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16
  1748	MOVQ	AX, x+0(FP)
  1749	MOVQ	CX, y+8(FP)
  1750	JMP	runtime·goPanicSliceB(SB)
  1751TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16
  1752	MOVQ	AX, x+0(FP)
  1753	MOVQ	CX, y+8(FP)
  1754	JMP	runtime·goPanicSliceBU(SB)
  1755TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16
  1756	MOVQ	DX, x+0(FP)
  1757	MOVQ	BX, y+8(FP)
  1758	JMP	runtime·goPanicSlice3Alen(SB)
  1759TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16
  1760	MOVQ	DX, x+0(FP)
  1761	MOVQ	BX, y+8(FP)
  1762	JMP	runtime·goPanicSlice3AlenU(SB)
  1763TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16
  1764	MOVQ	DX, x+0(FP)
  1765	MOVQ	BX, y+8(FP)
  1766	JMP	runtime·goPanicSlice3Acap(SB)
  1767TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16
  1768	MOVQ	DX, x+0(FP)
  1769	MOVQ	BX, y+8(FP)
  1770	JMP	runtime·goPanicSlice3AcapU(SB)
  1771TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16
  1772	MOVQ	CX, x+0(FP)
  1773	MOVQ	DX, y+8(FP)
  1774	JMP	runtime·goPanicSlice3B(SB)
  1775TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16
  1776	MOVQ	CX, x+0(FP)
  1777	MOVQ	DX, y+8(FP)
  1778	JMP	runtime·goPanicSlice3BU(SB)
  1779TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16
  1780	MOVQ	AX, x+0(FP)
  1781	MOVQ	CX, y+8(FP)
  1782	JMP	runtime·goPanicSlice3C(SB)
  1783TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16
  1784	MOVQ	AX, x+0(FP)
  1785	MOVQ	CX, y+8(FP)
  1786	JMP	runtime·goPanicSlice3CU(SB)
  1787
  1788#ifdef GOOS_android
  1789// Use the free TLS_SLOT_APP slot #2 on Android Q.
  1790// Earlier androids are set up in gcc_android.c.
  1791DATA runtime·tls_g+0(SB)/8, $16
  1792GLOBL runtime·tls_g+0(SB), NOPTR, $8
  1793#endif
  1794
  1795// The compiler and assembler's -spectre=ret mode rewrites
  1796// all indirect CALL AX / JMP AX instructions to be
  1797// CALL retpolineAX / JMP retpolineAX.
  1798// See https://support.google.com/faqs/answer/7625886.
  1799#define RETPOLINE(reg) \
  1800	/*   CALL setup */     BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0;	\
  1801	/* nospec: */									\
  1802	/*   PAUSE */           BYTE $0xF3; BYTE $0x90;					\
  1803	/*   JMP nospec */      BYTE $0xEB; BYTE $-(2+2);				\
  1804	/* setup: */									\
  1805	/*   MOVQ AX, 0(SP) */  BYTE $0x48|((reg&8)>>1); BYTE $0x89;			\
  1806	                        BYTE $0x04|((reg&7)<<3); BYTE $0x24;			\
  1807	/*   RET */             BYTE $0xC3
  1808
  1809TEXT runtime·retpolineAX(SB),NOSPLIT,$0; RETPOLINE(0)
  1810TEXT runtime·retpolineCX(SB),NOSPLIT,$0; RETPOLINE(1)
  1811TEXT runtime·retpolineDX(SB),NOSPLIT,$0; RETPOLINE(2)
  1812TEXT runtime·retpolineBX(SB),NOSPLIT,$0; RETPOLINE(3)
  1813/* SP is 4, can't happen / magic encodings */
  1814TEXT runtime·retpolineBP(SB),NOSPLIT,$0; RETPOLINE(5)
  1815TEXT runtime·retpolineSI(SB),NOSPLIT,$0; RETPOLINE(6)
  1816TEXT runtime·retpolineDI(SB),NOSPLIT,$0; RETPOLINE(7)
  1817TEXT runtime·retpolineR8(SB),NOSPLIT,$0; RETPOLINE(8)
  1818TEXT runtime·retpolineR9(SB),NOSPLIT,$0; RETPOLINE(9)
  1819TEXT runtime·retpolineR10(SB),NOSPLIT,$0; RETPOLINE(10)
  1820TEXT runtime·retpolineR11(SB),NOSPLIT,$0; RETPOLINE(11)
  1821TEXT runtime·retpolineR12(SB),NOSPLIT,$0; RETPOLINE(12)
  1822TEXT runtime·retpolineR13(SB),NOSPLIT,$0; RETPOLINE(13)
  1823TEXT runtime·retpolineR14(SB),NOSPLIT,$0; RETPOLINE(14)
  1824TEXT runtime·retpolineR15(SB),NOSPLIT,$0; RETPOLINE(15)

View as plain text