Text file src/runtime/asm_amd64.s

Documentation: runtime

     1// Copyright 2009 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "go_tls.h"
     7#include "funcdata.h"
     8#include "textflag.h"
     9
    10// _rt0_amd64 is common startup code for most amd64 systems when using
    11// internal linking. This is the entry point for the program from the
    12// kernel for an ordinary -buildmode=exe program. The stack holds the
    13// number of arguments and the C-style argv.
    14TEXT _rt0_amd64(SB),NOSPLIT,$-8
    15	MOVQ	0(SP), DI	// argc
    16	LEAQ	8(SP), SI	// argv
    17	JMP	runtime·rt0_go(SB)
    18
    19// main is common startup code for most amd64 systems when using
    20// external linking. The C startup code will call the symbol "main"
    21// passing argc and argv in the usual C ABI registers DI and SI.
    22TEXT main(SB),NOSPLIT,$-8
    23	JMP	runtime·rt0_go(SB)
    24
    25// _rt0_amd64_lib is common startup code for most amd64 systems when
    26// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    27// arrange to invoke this function as a global constructor (for
    28// c-archive) or when the shared library is loaded (for c-shared).
    29// We expect argc and argv to be passed in the usual C ABI registers
    30// DI and SI.
    31TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
    32	// Align stack per ELF ABI requirements.
    33	MOVQ	SP, AX
    34	ANDQ	$~15, SP
    35	// Save C ABI callee-saved registers, as caller may need them.
    36	MOVQ	BX, 0x10(SP)
    37	MOVQ	BP, 0x18(SP)
    38	MOVQ	R12, 0x20(SP)
    39	MOVQ	R13, 0x28(SP)
    40	MOVQ	R14, 0x30(SP)
    41	MOVQ	R15, 0x38(SP)
    42	MOVQ	AX, 0x40(SP)
    43
    44	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    45	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    46
    47	// Synchronous initialization.
    48	CALL	runtime·libpreinit(SB)
    49
    50	// Create a new thread to finish Go runtime initialization.
    51	MOVQ	_cgo_sys_thread_create(SB), AX
    52	TESTQ	AX, AX
    53	JZ	nocgo
    54	MOVQ	$_rt0_amd64_lib_go(SB), DI
    55	MOVQ	$0, SI
    56	CALL	AX
    57	JMP	restore
    58
    59nocgo:
    60	MOVQ	$0x800000, 0(SP)		// stacksize
    61	MOVQ	$_rt0_amd64_lib_go(SB), AX
    62	MOVQ	AX, 8(SP)			// fn
    63	CALL	runtime·newosproc0(SB)
    64
    65restore:
    66	MOVQ	0x10(SP), BX
    67	MOVQ	0x18(SP), BP
    68	MOVQ	0x20(SP), R12
    69	MOVQ	0x28(SP), R13
    70	MOVQ	0x30(SP), R14
    71	MOVQ	0x38(SP), R15
    72	MOVQ	0x40(SP), SP
    73	RET
    74
    75// _rt0_amd64_lib_go initializes the Go runtime.
    76// This is started in a separate thread by _rt0_amd64_lib.
    77TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    78	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    79	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    80	JMP	runtime·rt0_go(SB)
    81
    82DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    83GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    84DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    85GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    86
    87TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88	// copy arguments forward on an even stack
    89	MOVQ	DI, AX		// argc
    90	MOVQ	SI, BX		// argv
    91	SUBQ	$(4*8+7), SP		// 2args 2auto
    92	ANDQ	$~15, SP
    93	MOVQ	AX, 16(SP)
    94	MOVQ	BX, 24(SP)
    95
    96	// create istack out of the given (operating system) stack.
    97	// _cgo_init may update stackguard.
    98	MOVQ	$runtime·g0(SB), DI
    99	LEAQ	(-64*1024+104)(SP), BX
   100	MOVQ	BX, g_stackguard0(DI)
   101	MOVQ	BX, g_stackguard1(DI)
   102	MOVQ	BX, (g_stack+stack_lo)(DI)
   103	MOVQ	SP, (g_stack+stack_hi)(DI)
   104
   105	// find out information about the processor we're on
   106	MOVL	$0, AX
   107	CPUID
   108	MOVL	AX, SI
   109	CMPL	AX, $0
   110	JE	nocpuinfo
   111
   112	// Figure out how to serialize RDTSC.
   113	// On Intel processors LFENCE is enough. AMD requires MFENCE.
   114	// Don't know about the rest, so let's do MFENCE.
   115	CMPL	BX, $0x756E6547  // "Genu"
   116	JNE	notintel
   117	CMPL	DX, $0x49656E69  // "ineI"
   118	JNE	notintel
   119	CMPL	CX, $0x6C65746E  // "ntel"
   120	JNE	notintel
   121	MOVB	$1, runtime·isIntel(SB)
   122	MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   123notintel:
   124
   125	// Load EAX=1 cpuid flags
   126	MOVL	$1, AX
   127	CPUID
   128	MOVL	AX, runtime·processorVersionInfo(SB)
   129
   130nocpuinfo:
   131	// if there is an _cgo_init, call it.
   132	MOVQ	_cgo_init(SB), AX
   133	TESTQ	AX, AX
   134	JZ	needtls
   135	// arg 1: g0, already in DI
   136	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   137#ifdef GOOS_android
   138	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   139	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   140	// Compensate for tls_g (+16).
   141	MOVQ	-16(TLS), CX
   142#else
   143	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   144	MOVQ	$0, CX
   145#endif
   146#ifdef GOOS_windows
   147	// Adjust for the Win64 calling convention.
   148	MOVQ	CX, R9 // arg 4
   149	MOVQ	DX, R8 // arg 3
   150	MOVQ	SI, DX // arg 2
   151	MOVQ	DI, CX // arg 1
   152#endif
   153	CALL	AX
   154
   155	// update stackguard after _cgo_init
   156	MOVQ	$runtime·g0(SB), CX
   157	MOVQ	(g_stack+stack_lo)(CX), AX
   158	ADDQ	$const__StackGuard, AX
   159	MOVQ	AX, g_stackguard0(CX)
   160	MOVQ	AX, g_stackguard1(CX)
   161
   162#ifndef GOOS_windows
   163	JMP ok
   164#endif
   165needtls:
   166#ifdef GOOS_plan9
   167	// skip TLS setup on Plan 9
   168	JMP ok
   169#endif
   170#ifdef GOOS_solaris
   171	// skip TLS setup on Solaris
   172	JMP ok
   173#endif
   174#ifdef GOOS_illumos
   175	// skip TLS setup on illumos
   176	JMP ok
   177#endif
   178#ifdef GOOS_darwin
   179	// skip TLS setup on Darwin
   180	JMP ok
   181#endif
   182
   183	LEAQ	runtime·m0+m_tls(SB), DI
   184	CALL	runtime·settls(SB)
   185
   186	// store through it, to make sure it works
   187	get_tls(BX)
   188	MOVQ	$0x123, g(BX)
   189	MOVQ	runtime·m0+m_tls(SB), AX
   190	CMPQ	AX, $0x123
   191	JEQ 2(PC)
   192	CALL	runtime·abort(SB)
   193ok:
   194	// set the per-goroutine and per-mach "registers"
   195	get_tls(BX)
   196	LEAQ	runtime·g0(SB), CX
   197	MOVQ	CX, g(BX)
   198	LEAQ	runtime·m0(SB), AX
   199
   200	// save m->g0 = g0
   201	MOVQ	CX, m_g0(AX)
   202	// save m0 to g0->m
   203	MOVQ	AX, g_m(CX)
   204
   205	CLD				// convention is D is always left cleared
   206	CALL	runtime·check(SB)
   207
   208	MOVL	16(SP), AX		// copy argc
   209	MOVL	AX, 0(SP)
   210	MOVQ	24(SP), AX		// copy argv
   211	MOVQ	AX, 8(SP)
   212	CALL	runtime·args(SB)
   213	CALL	runtime·osinit(SB)
   214	CALL	runtime·schedinit(SB)
   215
   216	// create a new goroutine to start program
   217	MOVQ	$runtime·mainPC(SB), AX		// entry
   218	PUSHQ	AX
   219	PUSHQ	$0			// arg size
   220	CALL	runtime·newproc(SB)
   221	POPQ	AX
   222	POPQ	AX
   223
   224	// start this M
   225	CALL	runtime·mstart(SB)
   226
   227	CALL	runtime·abort(SB)	// mstart should never return
   228	RET
   229
   230	// Prevent dead-code elimination of debugCallV1, which is
   231	// intended to be called by debuggers.
   232	MOVQ	$runtime·debugCallV1(SB), AX
   233	RET
   234
   235DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   236GLOBL	runtime·mainPC(SB),RODATA,$8
   237
   238TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   239	BYTE	$0xcc
   240	RET
   241
   242TEXT runtime·asminit(SB),NOSPLIT,$0-0
   243	// No per-thread init.
   244	RET
   245
   246/*
   247 *  go-routine
   248 */
   249
   250// func gosave(buf *gobuf)
   251// save state in Gobuf; setjmp
   252TEXT runtime·gosave(SB), NOSPLIT, $0-8
   253	MOVQ	buf+0(FP), AX		// gobuf
   254	LEAQ	buf+0(FP), BX		// caller's SP
   255	MOVQ	BX, gobuf_sp(AX)
   256	MOVQ	0(SP), BX		// caller's PC
   257	MOVQ	BX, gobuf_pc(AX)
   258	MOVQ	$0, gobuf_ret(AX)
   259	MOVQ	BP, gobuf_bp(AX)
   260	// Assert ctxt is zero. See func save.
   261	MOVQ	gobuf_ctxt(AX), BX
   262	TESTQ	BX, BX
   263	JZ	2(PC)
   264	CALL	runtime·badctxt(SB)
   265	get_tls(CX)
   266	MOVQ	g(CX), BX
   267	MOVQ	BX, gobuf_g(AX)
   268	RET
   269
   270// func gogo(buf *gobuf)
   271// restore state from Gobuf; longjmp
   272TEXT runtime·gogo(SB), NOSPLIT, $16-8
   273	MOVQ	buf+0(FP), BX		// gobuf
   274	MOVQ	gobuf_g(BX), DX
   275	MOVQ	0(DX), CX		// make sure g != nil
   276	get_tls(CX)
   277	MOVQ	DX, g(CX)
   278	MOVQ	gobuf_sp(BX), SP	// restore SP
   279	MOVQ	gobuf_ret(BX), AX
   280	MOVQ	gobuf_ctxt(BX), DX
   281	MOVQ	gobuf_bp(BX), BP
   282	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   283	MOVQ	$0, gobuf_ret(BX)
   284	MOVQ	$0, gobuf_ctxt(BX)
   285	MOVQ	$0, gobuf_bp(BX)
   286	MOVQ	gobuf_pc(BX), BX
   287	JMP	BX
   288
   289// func mcall(fn func(*g))
   290// Switch to m->g0's stack, call fn(g).
   291// Fn must never return. It should gogo(&g->sched)
   292// to keep running g.
   293TEXT runtime·mcall(SB), NOSPLIT, $0-8
   294	MOVQ	fn+0(FP), DI
   295
   296	get_tls(CX)
   297	MOVQ	g(CX), AX	// save state in g->sched
   298	MOVQ	0(SP), BX	// caller's PC
   299	MOVQ	BX, (g_sched+gobuf_pc)(AX)
   300	LEAQ	fn+0(FP), BX	// caller's SP
   301	MOVQ	BX, (g_sched+gobuf_sp)(AX)
   302	MOVQ	AX, (g_sched+gobuf_g)(AX)
   303	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   304
   305	// switch to m->g0 & its stack, call fn
   306	MOVQ	g(CX), BX
   307	MOVQ	g_m(BX), BX
   308	MOVQ	m_g0(BX), SI
   309	CMPQ	SI, AX	// if g == m->g0 call badmcall
   310	JNE	3(PC)
   311	MOVQ	$runtime·badmcall(SB), AX
   312	JMP	AX
   313	MOVQ	SI, g(CX)	// g = m->g0
   314	MOVQ	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   315	PUSHQ	AX
   316	MOVQ	DI, DX
   317	MOVQ	0(DI), DI
   318	CALL	DI
   319	POPQ	AX
   320	MOVQ	$runtime·badmcall2(SB), AX
   321	JMP	AX
   322	RET
   323
   324// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   325// of the G stack. We need to distinguish the routine that
   326// lives at the bottom of the G stack from the one that lives
   327// at the top of the system stack because the one at the top of
   328// the system stack terminates the stack walk (see topofstack()).
   329TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   330	RET
   331
   332// func systemstack(fn func())
   333TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   334	MOVQ	fn+0(FP), DI	// DI = fn
   335	get_tls(CX)
   336	MOVQ	g(CX), AX	// AX = g
   337	MOVQ	g_m(AX), BX	// BX = m
   338
   339	CMPQ	AX, m_gsignal(BX)
   340	JEQ	noswitch
   341
   342	MOVQ	m_g0(BX), DX	// DX = g0
   343	CMPQ	AX, DX
   344	JEQ	noswitch
   345
   346	CMPQ	AX, m_curg(BX)
   347	JNE	bad
   348
   349	// switch stacks
   350	// save our state in g->sched. Pretend to
   351	// be systemstack_switch if the G stack is scanned.
   352	MOVQ	$runtime·systemstack_switch(SB), SI
   353	MOVQ	SI, (g_sched+gobuf_pc)(AX)
   354	MOVQ	SP, (g_sched+gobuf_sp)(AX)
   355	MOVQ	AX, (g_sched+gobuf_g)(AX)
   356	MOVQ	BP, (g_sched+gobuf_bp)(AX)
   357
   358	// switch to g0
   359	MOVQ	DX, g(CX)
   360	MOVQ	(g_sched+gobuf_sp)(DX), BX
   361	// make it look like mstart called systemstack on g0, to stop traceback
   362	SUBQ	$8, BX
   363	MOVQ	$runtime·mstart(SB), DX
   364	MOVQ	DX, 0(BX)
   365	MOVQ	BX, SP
   366
   367	// call target function
   368	MOVQ	DI, DX
   369	MOVQ	0(DI), DI
   370	CALL	DI
   371
   372	// switch back to g
   373	get_tls(CX)
   374	MOVQ	g(CX), AX
   375	MOVQ	g_m(AX), BX
   376	MOVQ	m_curg(BX), AX
   377	MOVQ	AX, g(CX)
   378	MOVQ	(g_sched+gobuf_sp)(AX), SP
   379	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   380	RET
   381
   382noswitch:
   383	// already on m stack; tail call the function
   384	// Using a tail call here cleans up tracebacks since we won't stop
   385	// at an intermediate systemstack.
   386	MOVQ	DI, DX
   387	MOVQ	0(DI), DI
   388	JMP	DI
   389
   390bad:
   391	// Bad: g is not gsignal, not g0, not curg. What is it?
   392	MOVQ	$runtime·badsystemstack(SB), AX
   393	CALL	AX
   394	INT	$3
   395
   396
   397/*
   398 * support for morestack
   399 */
   400
   401// Called during function prolog when more stack is needed.
   402//
   403// The traceback routines see morestack on a g0 as being
   404// the top of a stack (for example, morestack calling newstack
   405// calling the scheduler calling newm calling gc), so we must
   406// record an argument size. For that purpose, it has no arguments.
   407TEXT runtime·morestack(SB),NOSPLIT,$0-0
   408	// Cannot grow scheduler stack (m->g0).
   409	get_tls(CX)
   410	MOVQ	g(CX), BX
   411	MOVQ	g_m(BX), BX
   412	MOVQ	m_g0(BX), SI
   413	CMPQ	g(CX), SI
   414	JNE	3(PC)
   415	CALL	runtime·badmorestackg0(SB)
   416	CALL	runtime·abort(SB)
   417
   418	// Cannot grow signal stack (m->gsignal).
   419	MOVQ	m_gsignal(BX), SI
   420	CMPQ	g(CX), SI
   421	JNE	3(PC)
   422	CALL	runtime·badmorestackgsignal(SB)
   423	CALL	runtime·abort(SB)
   424
   425	// Called from f.
   426	// Set m->morebuf to f's caller.
   427	NOP	SP	// tell vet SP changed - stop checking offsets
   428	MOVQ	8(SP), AX	// f's caller's PC
   429	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   430	LEAQ	16(SP), AX	// f's caller's SP
   431	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   432	get_tls(CX)
   433	MOVQ	g(CX), SI
   434	MOVQ	SI, (m_morebuf+gobuf_g)(BX)
   435
   436	// Set g->sched to context in f.
   437	MOVQ	0(SP), AX // f's PC
   438	MOVQ	AX, (g_sched+gobuf_pc)(SI)
   439	MOVQ	SI, (g_sched+gobuf_g)(SI)
   440	LEAQ	8(SP), AX // f's SP
   441	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   442	MOVQ	BP, (g_sched+gobuf_bp)(SI)
   443	MOVQ	DX, (g_sched+gobuf_ctxt)(SI)
   444
   445	// Call newstack on m->g0's stack.
   446	MOVQ	m_g0(BX), BX
   447	MOVQ	BX, g(CX)
   448	MOVQ	(g_sched+gobuf_sp)(BX), SP
   449	CALL	runtime·newstack(SB)
   450	CALL	runtime·abort(SB)	// crash if newstack returns
   451	RET
   452
   453// morestack but not preserving ctxt.
   454TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   455	MOVL	$0, DX
   456	JMP	runtime·morestack(SB)
   457
   458// reflectcall: call a function with the given argument list
   459// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   460// we don't have variable-sized frames, so we use a small number
   461// of constant-sized-frame functions to encode a few bits of size in the pc.
   462// Caution: ugly multiline assembly macros in your future!
   463
   464#define DISPATCH(NAME,MAXSIZE)		\
   465	CMPQ	CX, $MAXSIZE;		\
   466	JA	3(PC);			\
   467	MOVQ	$NAME(SB), AX;		\
   468	JMP	AX
   469// Note: can't just "JMP NAME(SB)" - bad inlining results.
   470
   471TEXT ·reflectcall(SB), NOSPLIT, $0-32
   472	MOVLQZX argsize+24(FP), CX
   473	DISPATCH(runtime·call32, 32)
   474	DISPATCH(runtime·call64, 64)
   475	DISPATCH(runtime·call128, 128)
   476	DISPATCH(runtime·call256, 256)
   477	DISPATCH(runtime·call512, 512)
   478	DISPATCH(runtime·call1024, 1024)
   479	DISPATCH(runtime·call2048, 2048)
   480	DISPATCH(runtime·call4096, 4096)
   481	DISPATCH(runtime·call8192, 8192)
   482	DISPATCH(runtime·call16384, 16384)
   483	DISPATCH(runtime·call32768, 32768)
   484	DISPATCH(runtime·call65536, 65536)
   485	DISPATCH(runtime·call131072, 131072)
   486	DISPATCH(runtime·call262144, 262144)
   487	DISPATCH(runtime·call524288, 524288)
   488	DISPATCH(runtime·call1048576, 1048576)
   489	DISPATCH(runtime·call2097152, 2097152)
   490	DISPATCH(runtime·call4194304, 4194304)
   491	DISPATCH(runtime·call8388608, 8388608)
   492	DISPATCH(runtime·call16777216, 16777216)
   493	DISPATCH(runtime·call33554432, 33554432)
   494	DISPATCH(runtime·call67108864, 67108864)
   495	DISPATCH(runtime·call134217728, 134217728)
   496	DISPATCH(runtime·call268435456, 268435456)
   497	DISPATCH(runtime·call536870912, 536870912)
   498	DISPATCH(runtime·call1073741824, 1073741824)
   499	MOVQ	$runtime·badreflectcall(SB), AX
   500	JMP	AX
   501
   502#define CALLFN(NAME,MAXSIZE)			\
   503TEXT NAME(SB), WRAPPER, $MAXSIZE-32;		\
   504	NO_LOCAL_POINTERS;			\
   505	/* copy arguments to stack */		\
   506	MOVQ	argptr+16(FP), SI;		\
   507	MOVLQZX argsize+24(FP), CX;		\
   508	MOVQ	SP, DI;				\
   509	REP;MOVSB;				\
   510	/* call function */			\
   511	MOVQ	f+8(FP), DX;			\
   512	PCDATA  $PCDATA_StackMapIndex, $0;	\
   513	CALL	(DX);				\
   514	/* copy return values back */		\
   515	MOVQ	argtype+0(FP), DX;		\
   516	MOVQ	argptr+16(FP), DI;		\
   517	MOVLQZX	argsize+24(FP), CX;		\
   518	MOVLQZX	retoffset+28(FP), BX;		\
   519	MOVQ	SP, SI;				\
   520	ADDQ	BX, DI;				\
   521	ADDQ	BX, SI;				\
   522	SUBQ	BX, CX;				\
   523	CALL	callRet<>(SB);			\
   524	RET
   525
   526// callRet copies return values back at the end of call*. This is a
   527// separate function so it can allocate stack space for the arguments
   528// to reflectcallmove. It does not follow the Go ABI; it expects its
   529// arguments in registers.
   530TEXT callRet<>(SB), NOSPLIT, $32-0
   531	NO_LOCAL_POINTERS
   532	MOVQ	DX, 0(SP)
   533	MOVQ	DI, 8(SP)
   534	MOVQ	SI, 16(SP)
   535	MOVQ	CX, 24(SP)
   536	CALL	runtime·reflectcallmove(SB)
   537	RET
   538
   539CALLFN(·call32, 32)
   540CALLFN(·call64, 64)
   541CALLFN(·call128, 128)
   542CALLFN(·call256, 256)
   543CALLFN(·call512, 512)
   544CALLFN(·call1024, 1024)
   545CALLFN(·call2048, 2048)
   546CALLFN(·call4096, 4096)
   547CALLFN(·call8192, 8192)
   548CALLFN(·call16384, 16384)
   549CALLFN(·call32768, 32768)
   550CALLFN(·call65536, 65536)
   551CALLFN(·call131072, 131072)
   552CALLFN(·call262144, 262144)
   553CALLFN(·call524288, 524288)
   554CALLFN(·call1048576, 1048576)
   555CALLFN(·call2097152, 2097152)
   556CALLFN(·call4194304, 4194304)
   557CALLFN(·call8388608, 8388608)
   558CALLFN(·call16777216, 16777216)
   559CALLFN(·call33554432, 33554432)
   560CALLFN(·call67108864, 67108864)
   561CALLFN(·call134217728, 134217728)
   562CALLFN(·call268435456, 268435456)
   563CALLFN(·call536870912, 536870912)
   564CALLFN(·call1073741824, 1073741824)
   565
   566TEXT runtime·procyield(SB),NOSPLIT,$0-0
   567	MOVL	cycles+0(FP), AX
   568again:
   569	PAUSE
   570	SUBL	$1, AX
   571	JNZ	again
   572	RET
   573
   574
   575TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   576	// Stores are already ordered on x86, so this is just a
   577	// compile barrier.
   578	RET
   579
   580// func jmpdefer(fv *funcval, argp uintptr)
   581// argp is a caller SP.
   582// called from deferreturn.
   583// 1. pop the caller
   584// 2. sub 5 bytes from the callers return
   585// 3. jmp to the argument
   586TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
   587	MOVQ	fv+0(FP), DX	// fn
   588	MOVQ	argp+8(FP), BX	// caller sp
   589	LEAQ	-8(BX), SP	// caller sp after CALL
   590	MOVQ	-8(SP), BP	// restore BP as if deferreturn returned (harmless if framepointers not in use)
   591	SUBQ	$5, (SP)	// return to CALL again
   592	MOVQ	0(DX), BX
   593	JMP	BX	// but first run the deferred function
   594
   595// Save state of caller into g->sched. Smashes R8, R9.
   596TEXT gosave<>(SB),NOSPLIT,$0
   597	get_tls(R8)
   598	MOVQ	g(R8), R8
   599	MOVQ	0(SP), R9
   600	MOVQ	R9, (g_sched+gobuf_pc)(R8)
   601	LEAQ	8(SP), R9
   602	MOVQ	R9, (g_sched+gobuf_sp)(R8)
   603	MOVQ	$0, (g_sched+gobuf_ret)(R8)
   604	MOVQ	BP, (g_sched+gobuf_bp)(R8)
   605	// Assert ctxt is zero. See func save.
   606	MOVQ	(g_sched+gobuf_ctxt)(R8), R9
   607	TESTQ	R9, R9
   608	JZ	2(PC)
   609	CALL	runtime·badctxt(SB)
   610	RET
   611
   612// func asmcgocall(fn, arg unsafe.Pointer) int32
   613// Call fn(arg) on the scheduler stack,
   614// aligned appropriately for the gcc ABI.
   615// See cgocall.go for more details.
   616TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   617	MOVQ	fn+0(FP), AX
   618	MOVQ	arg+8(FP), BX
   619
   620	MOVQ	SP, DX
   621
   622	// Figure out if we need to switch to m->g0 stack.
   623	// We get called to create new OS threads too, and those
   624	// come in on the m->g0 stack already.
   625	get_tls(CX)
   626	MOVQ	g(CX), R8
   627	CMPQ	R8, $0
   628	JEQ	nosave
   629	MOVQ	g_m(R8), R8
   630	MOVQ	m_g0(R8), SI
   631	MOVQ	g(CX), DI
   632	CMPQ	SI, DI
   633	JEQ	nosave
   634	MOVQ	m_gsignal(R8), SI
   635	CMPQ	SI, DI
   636	JEQ	nosave
   637
   638	// Switch to system stack.
   639	MOVQ	m_g0(R8), SI
   640	CALL	gosave<>(SB)
   641	MOVQ	SI, g(CX)
   642	MOVQ	(g_sched+gobuf_sp)(SI), SP
   643
   644	// Now on a scheduling stack (a pthread-created stack).
   645	// Make sure we have enough room for 4 stack-backed fast-call
   646	// registers as per windows amd64 calling convention.
   647	SUBQ	$64, SP
   648	ANDQ	$~15, SP	// alignment for gcc ABI
   649	MOVQ	DI, 48(SP)	// save g
   650	MOVQ	(g_stack+stack_hi)(DI), DI
   651	SUBQ	DX, DI
   652	MOVQ	DI, 40(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   653	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   654	MOVQ	BX, CX		// CX = first argument in Win64
   655	CALL	AX
   656
   657	// Restore registers, g, stack pointer.
   658	get_tls(CX)
   659	MOVQ	48(SP), DI
   660	MOVQ	(g_stack+stack_hi)(DI), SI
   661	SUBQ	40(SP), SI
   662	MOVQ	DI, g(CX)
   663	MOVQ	SI, SP
   664
   665	MOVL	AX, ret+16(FP)
   666	RET
   667
   668nosave:
   669	// Running on a system stack, perhaps even without a g.
   670	// Having no g can happen during thread creation or thread teardown
   671	// (see needm/dropm on Solaris, for example).
   672	// This code is like the above sequence but without saving/restoring g
   673	// and without worrying about the stack moving out from under us
   674	// (because we're on a system stack, not a goroutine stack).
   675	// The above code could be used directly if already on a system stack,
   676	// but then the only path through this code would be a rare case on Solaris.
   677	// Using this code for all "already on system stack" calls exercises it more,
   678	// which should help keep it correct.
   679	SUBQ	$64, SP
   680	ANDQ	$~15, SP
   681	MOVQ	$0, 48(SP)		// where above code stores g, in case someone looks during debugging
   682	MOVQ	DX, 40(SP)	// save original stack pointer
   683	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   684	MOVQ	BX, CX		// CX = first argument in Win64
   685	CALL	AX
   686	MOVQ	40(SP), SI	// restore original stack pointer
   687	MOVQ	SI, SP
   688	MOVL	AX, ret+16(FP)
   689	RET
   690
   691// func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr)
   692// Turn the fn into a Go func (by taking its address) and call
   693// cgocallback_gofunc.
   694TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   695	LEAQ	fn+0(FP), AX
   696	MOVQ	AX, 0(SP)
   697	MOVQ	frame+8(FP), AX
   698	MOVQ	AX, 8(SP)
   699	MOVQ	framesize+16(FP), AX
   700	MOVQ	AX, 16(SP)
   701	MOVQ	ctxt+24(FP), AX
   702	MOVQ	AX, 24(SP)
   703	MOVQ	$runtime·cgocallback_gofunc(SB), AX
   704	CALL	AX
   705	RET
   706
   707// func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr)
   708// See cgocall.go for more details.
   709TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   710	NO_LOCAL_POINTERS
   711
   712	// If g is nil, Go did not create the current thread.
   713	// Call needm to obtain one m for temporary use.
   714	// In this case, we're running on the thread stack, so there's
   715	// lots of space, but the linker doesn't know. Hide the call from
   716	// the linker analysis by using an indirect call through AX.
   717	get_tls(CX)
   718#ifdef GOOS_windows
   719	MOVL	$0, BX
   720	CMPQ	CX, $0
   721	JEQ	2(PC)
   722#endif
   723	MOVQ	g(CX), BX
   724	CMPQ	BX, $0
   725	JEQ	needm
   726	MOVQ	g_m(BX), BX
   727	MOVQ	BX, R8 // holds oldm until end of function
   728	JMP	havem
   729needm:
   730	MOVQ	$0, 0(SP)
   731	MOVQ	$runtime·needm(SB), AX
   732	CALL	AX
   733	MOVQ	0(SP), R8
   734	get_tls(CX)
   735	MOVQ	g(CX), BX
   736	MOVQ	g_m(BX), BX
   737
   738	// Set m->sched.sp = SP, so that if a panic happens
   739	// during the function we are about to execute, it will
   740	// have a valid SP to run on the g0 stack.
   741	// The next few lines (after the havem label)
   742	// will save this SP onto the stack and then write
   743	// the same SP back to m->sched.sp. That seems redundant,
   744	// but if an unrecovered panic happens, unwindm will
   745	// restore the g->sched.sp from the stack location
   746	// and then systemstack will try to use it. If we don't set it here,
   747	// that restored SP will be uninitialized (typically 0) and
   748	// will not be usable.
   749	MOVQ	m_g0(BX), SI
   750	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   751
   752havem:
   753	// Now there's a valid m, and we're running on its m->g0.
   754	// Save current m->g0->sched.sp on stack and then set it to SP.
   755	// Save current sp in m->g0->sched.sp in preparation for
   756	// switch back to m->curg stack.
   757	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   758	MOVQ	m_g0(BX), SI
   759	MOVQ	(g_sched+gobuf_sp)(SI), AX
   760	MOVQ	AX, 0(SP)
   761	MOVQ	SP, (g_sched+gobuf_sp)(SI)
   762
   763	// Switch to m->curg stack and call runtime.cgocallbackg.
   764	// Because we are taking over the execution of m->curg
   765	// but *not* resuming what had been running, we need to
   766	// save that information (m->curg->sched) so we can restore it.
   767	// We can restore m->curg->sched.sp easily, because calling
   768	// runtime.cgocallbackg leaves SP unchanged upon return.
   769	// To save m->curg->sched.pc, we push it onto the stack.
   770	// This has the added benefit that it looks to the traceback
   771	// routine like cgocallbackg is going to return to that
   772	// PC (because the frame we allocate below has the same
   773	// size as cgocallback_gofunc's frame declared above)
   774	// so that the traceback will seamlessly trace back into
   775	// the earlier calls.
   776	//
   777	// In the new goroutine, 8(SP) holds the saved R8.
   778	MOVQ	m_curg(BX), SI
   779	MOVQ	SI, g(CX)
   780	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
   781	MOVQ	(g_sched+gobuf_pc)(SI), BX
   782	MOVQ	BX, -8(DI)
   783	// Compute the size of the frame, including return PC and, if
   784	// GOEXPERIMENT=framepointer, the saved base pointer
   785	MOVQ	ctxt+24(FP), BX
   786	LEAQ	fv+0(FP), AX
   787	SUBQ	SP, AX
   788	SUBQ	AX, DI
   789	MOVQ	DI, SP
   790
   791	MOVQ	R8, 8(SP)
   792	MOVQ	BX, 0(SP)
   793	CALL	runtime·cgocallbackg(SB)
   794	MOVQ	8(SP), R8
   795
   796	// Compute the size of the frame again. FP and SP have
   797	// completely different values here than they did above,
   798	// but only their difference matters.
   799	LEAQ	fv+0(FP), AX
   800	SUBQ	SP, AX
   801
   802	// Restore g->sched (== m->curg->sched) from saved values.
   803	get_tls(CX)
   804	MOVQ	g(CX), SI
   805	MOVQ	SP, DI
   806	ADDQ	AX, DI
   807	MOVQ	-8(DI), BX
   808	MOVQ	BX, (g_sched+gobuf_pc)(SI)
   809	MOVQ	DI, (g_sched+gobuf_sp)(SI)
   810
   811	// Switch back to m->g0's stack and restore m->g0->sched.sp.
   812	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   813	// so we do not have to restore it.)
   814	MOVQ	g(CX), BX
   815	MOVQ	g_m(BX), BX
   816	MOVQ	m_g0(BX), SI
   817	MOVQ	SI, g(CX)
   818	MOVQ	(g_sched+gobuf_sp)(SI), SP
   819	MOVQ	0(SP), AX
   820	MOVQ	AX, (g_sched+gobuf_sp)(SI)
   821
   822	// If the m on entry was nil, we called needm above to borrow an m
   823	// for the duration of the call. Since the call is over, return it with dropm.
   824	CMPQ	R8, $0
   825	JNE 3(PC)
   826	MOVQ	$runtime·dropm(SB), AX
   827	CALL	AX
   828
   829	// Done!
   830	RET
   831
   832// func setg(gg *g)
   833// set g. for use by needm.
   834TEXT runtime·setg(SB), NOSPLIT, $0-8
   835	MOVQ	gg+0(FP), BX
   836#ifdef GOOS_windows
   837	CMPQ	BX, $0
   838	JNE	settls
   839	MOVQ	$0, 0x28(GS)
   840	RET
   841settls:
   842	MOVQ	g_m(BX), AX
   843	LEAQ	m_tls(AX), AX
   844	MOVQ	AX, 0x28(GS)
   845#endif
   846	get_tls(CX)
   847	MOVQ	BX, g(CX)
   848	RET
   849
   850// void setg_gcc(G*); set g called from gcc.
   851TEXT setg_gcc<>(SB),NOSPLIT,$0
   852	get_tls(AX)
   853	MOVQ	DI, g(AX)
   854	RET
   855
   856TEXT runtime·abort(SB),NOSPLIT,$0-0
   857	INT	$3
   858loop:
   859	JMP	loop
   860
   861// check that SP is in range [g->stack.lo, g->stack.hi)
   862TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   863	get_tls(CX)
   864	MOVQ	g(CX), AX
   865	CMPQ	(g_stack+stack_hi)(AX), SP
   866	JHI	2(PC)
   867	CALL	runtime·abort(SB)
   868	CMPQ	SP, (g_stack+stack_lo)(AX)
   869	JHI	2(PC)
   870	CALL	runtime·abort(SB)
   871	RET
   872
   873// func cputicks() int64
   874TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   875	CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   876	JNE	mfence
   877	LFENCE
   878	JMP	done
   879mfence:
   880	MFENCE
   881done:
   882	RDTSC
   883	SHLQ	$32, DX
   884	ADDQ	DX, AX
   885	MOVQ	AX, ret+0(FP)
   886	RET
   887
   888// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
   889// hash function using AES hardware instructions
   890TEXT runtime·memhash(SB),NOSPLIT,$0-32
   891	CMPB	runtime·useAeshash(SB), $0
   892	JEQ	noaes
   893	MOVQ	p+0(FP), AX	// ptr to data
   894	MOVQ	s+16(FP), CX	// size
   895	LEAQ	ret+24(FP), DX
   896	JMP	aeshashbody<>(SB)
   897noaes:
   898	JMP	runtime·memhashFallback(SB)
   899
   900// func strhash(p unsafe.Pointer, h uintptr) uintptr
   901TEXT runtime·strhash(SB),NOSPLIT,$0-24
   902	CMPB	runtime·useAeshash(SB), $0
   903	JEQ	noaes
   904	MOVQ	p+0(FP), AX	// ptr to string struct
   905	MOVQ	8(AX), CX	// length of string
   906	MOVQ	(AX), AX	// string data
   907	LEAQ	ret+16(FP), DX
   908	JMP	aeshashbody<>(SB)
   909noaes:
   910	JMP	runtime·strhashFallback(SB)
   911
   912// AX: data
   913// CX: length
   914// DX: address to put return value
   915TEXT aeshashbody<>(SB),NOSPLIT,$0-0
   916	// Fill an SSE register with our seeds.
   917	MOVQ	h+8(FP), X0			// 64 bits of per-table hash seed
   918	PINSRW	$4, CX, X0			// 16 bits of length
   919	PSHUFHW $0, X0, X0			// repeat length 4 times total
   920	MOVO	X0, X1				// save unscrambled seed
   921	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
   922	AESENC	X0, X0				// scramble seed
   923
   924	CMPQ	CX, $16
   925	JB	aes0to15
   926	JE	aes16
   927	CMPQ	CX, $32
   928	JBE	aes17to32
   929	CMPQ	CX, $64
   930	JBE	aes33to64
   931	CMPQ	CX, $128
   932	JBE	aes65to128
   933	JMP	aes129plus
   934
   935aes0to15:
   936	TESTQ	CX, CX
   937	JE	aes0
   938
   939	ADDQ	$16, AX
   940	TESTW	$0xff0, AX
   941	JE	endofpage
   942
   943	// 16 bytes loaded at this address won't cross
   944	// a page boundary, so we can load it directly.
   945	MOVOU	-16(AX), X1
   946	ADDQ	CX, CX
   947	MOVQ	$masks<>(SB), AX
   948	PAND	(AX)(CX*8), X1
   949final1:
   950	PXOR	X0, X1	// xor data with seed
   951	AESENC	X1, X1	// scramble combo 3 times
   952	AESENC	X1, X1
   953	AESENC	X1, X1
   954	MOVQ	X1, (DX)
   955	RET
   956
   957endofpage:
   958	// address ends in 1111xxxx. Might be up against
   959	// a page boundary, so load ending at last byte.
   960	// Then shift bytes down using pshufb.
   961	MOVOU	-32(AX)(CX*1), X1
   962	ADDQ	CX, CX
   963	MOVQ	$shifts<>(SB), AX
   964	PSHUFB	(AX)(CX*8), X1
   965	JMP	final1
   966
   967aes0:
   968	// Return scrambled input seed
   969	AESENC	X0, X0
   970	MOVQ	X0, (DX)
   971	RET
   972
   973aes16:
   974	MOVOU	(AX), X1
   975	JMP	final1
   976
   977aes17to32:
   978	// make second starting seed
   979	PXOR	runtime·aeskeysched+16(SB), X1
   980	AESENC	X1, X1
   981
   982	// load data to be hashed
   983	MOVOU	(AX), X2
   984	MOVOU	-16(AX)(CX*1), X3
   985
   986	// xor with seed
   987	PXOR	X0, X2
   988	PXOR	X1, X3
   989
   990	// scramble 3 times
   991	AESENC	X2, X2
   992	AESENC	X3, X3
   993	AESENC	X2, X2
   994	AESENC	X3, X3
   995	AESENC	X2, X2
   996	AESENC	X3, X3
   997
   998	// combine results
   999	PXOR	X3, X2
  1000	MOVQ	X2, (DX)
  1001	RET
  1002
  1003aes33to64:
  1004	// make 3 more starting seeds
  1005	MOVO	X1, X2
  1006	MOVO	X1, X3
  1007	PXOR	runtime·aeskeysched+16(SB), X1
  1008	PXOR	runtime·aeskeysched+32(SB), X2
  1009	PXOR	runtime·aeskeysched+48(SB), X3
  1010	AESENC	X1, X1
  1011	AESENC	X2, X2
  1012	AESENC	X3, X3
  1013
  1014	MOVOU	(AX), X4
  1015	MOVOU	16(AX), X5
  1016	MOVOU	-32(AX)(CX*1), X6
  1017	MOVOU	-16(AX)(CX*1), X7
  1018
  1019	PXOR	X0, X4
  1020	PXOR	X1, X5
  1021	PXOR	X2, X6
  1022	PXOR	X3, X7
  1023
  1024	AESENC	X4, X4
  1025	AESENC	X5, X5
  1026	AESENC	X6, X6
  1027	AESENC	X7, X7
  1028
  1029	AESENC	X4, X4
  1030	AESENC	X5, X5
  1031	AESENC	X6, X6
  1032	AESENC	X7, X7
  1033
  1034	AESENC	X4, X4
  1035	AESENC	X5, X5
  1036	AESENC	X6, X6
  1037	AESENC	X7, X7
  1038
  1039	PXOR	X6, X4
  1040	PXOR	X7, X5
  1041	PXOR	X5, X4
  1042	MOVQ	X4, (DX)
  1043	RET
  1044
  1045aes65to128:
  1046	// make 7 more starting seeds
  1047	MOVO	X1, X2
  1048	MOVO	X1, X3
  1049	MOVO	X1, X4
  1050	MOVO	X1, X5
  1051	MOVO	X1, X6
  1052	MOVO	X1, X7
  1053	PXOR	runtime·aeskeysched+16(SB), X1
  1054	PXOR	runtime·aeskeysched+32(SB), X2
  1055	PXOR	runtime·aeskeysched+48(SB), X3
  1056	PXOR	runtime·aeskeysched+64(SB), X4
  1057	PXOR	runtime·aeskeysched+80(SB), X5
  1058	PXOR	runtime·aeskeysched+96(SB), X6
  1059	PXOR	runtime·aeskeysched+112(SB), X7
  1060	AESENC	X1, X1
  1061	AESENC	X2, X2
  1062	AESENC	X3, X3
  1063	AESENC	X4, X4
  1064	AESENC	X5, X5
  1065	AESENC	X6, X6
  1066	AESENC	X7, X7
  1067
  1068	// load data
  1069	MOVOU	(AX), X8
  1070	MOVOU	16(AX), X9
  1071	MOVOU	32(AX), X10
  1072	MOVOU	48(AX), X11
  1073	MOVOU	-64(AX)(CX*1), X12
  1074	MOVOU	-48(AX)(CX*1), X13
  1075	MOVOU	-32(AX)(CX*1), X14
  1076	MOVOU	-16(AX)(CX*1), X15
  1077
  1078	// xor with seed
  1079	PXOR	X0, X8
  1080	PXOR	X1, X9
  1081	PXOR	X2, X10
  1082	PXOR	X3, X11
  1083	PXOR	X4, X12
  1084	PXOR	X5, X13
  1085	PXOR	X6, X14
  1086	PXOR	X7, X15
  1087
  1088	// scramble 3 times
  1089	AESENC	X8, X8
  1090	AESENC	X9, X9
  1091	AESENC	X10, X10
  1092	AESENC	X11, X11
  1093	AESENC	X12, X12
  1094	AESENC	X13, X13
  1095	AESENC	X14, X14
  1096	AESENC	X15, X15
  1097
  1098	AESENC	X8, X8
  1099	AESENC	X9, X9
  1100	AESENC	X10, X10
  1101	AESENC	X11, X11
  1102	AESENC	X12, X12
  1103	AESENC	X13, X13
  1104	AESENC	X14, X14
  1105	AESENC	X15, X15
  1106
  1107	AESENC	X8, X8
  1108	AESENC	X9, X9
  1109	AESENC	X10, X10
  1110	AESENC	X11, X11
  1111	AESENC	X12, X12
  1112	AESENC	X13, X13
  1113	AESENC	X14, X14
  1114	AESENC	X15, X15
  1115
  1116	// combine results
  1117	PXOR	X12, X8
  1118	PXOR	X13, X9
  1119	PXOR	X14, X10
  1120	PXOR	X15, X11
  1121	PXOR	X10, X8
  1122	PXOR	X11, X9
  1123	PXOR	X9, X8
  1124	MOVQ	X8, (DX)
  1125	RET
  1126
  1127aes129plus:
  1128	// make 7 more starting seeds
  1129	MOVO	X1, X2
  1130	MOVO	X1, X3
  1131	MOVO	X1, X4
  1132	MOVO	X1, X5
  1133	MOVO	X1, X6
  1134	MOVO	X1, X7
  1135	PXOR	runtime·aeskeysched+16(SB), X1
  1136	PXOR	runtime·aeskeysched+32(SB), X2
  1137	PXOR	runtime·aeskeysched+48(SB), X3
  1138	PXOR	runtime·aeskeysched+64(SB), X4
  1139	PXOR	runtime·aeskeysched+80(SB), X5
  1140	PXOR	runtime·aeskeysched+96(SB), X6
  1141	PXOR	runtime·aeskeysched+112(SB), X7
  1142	AESENC	X1, X1
  1143	AESENC	X2, X2
  1144	AESENC	X3, X3
  1145	AESENC	X4, X4
  1146	AESENC	X5, X5
  1147	AESENC	X6, X6
  1148	AESENC	X7, X7
  1149
  1150	// start with last (possibly overlapping) block
  1151	MOVOU	-128(AX)(CX*1), X8
  1152	MOVOU	-112(AX)(CX*1), X9
  1153	MOVOU	-96(AX)(CX*1), X10
  1154	MOVOU	-80(AX)(CX*1), X11
  1155	MOVOU	-64(AX)(CX*1), X12
  1156	MOVOU	-48(AX)(CX*1), X13
  1157	MOVOU	-32(AX)(CX*1), X14
  1158	MOVOU	-16(AX)(CX*1), X15
  1159
  1160	// xor in seed
  1161	PXOR	X0, X8
  1162	PXOR	X1, X9
  1163	PXOR	X2, X10
  1164	PXOR	X3, X11
  1165	PXOR	X4, X12
  1166	PXOR	X5, X13
  1167	PXOR	X6, X14
  1168	PXOR	X7, X15
  1169
  1170	// compute number of remaining 128-byte blocks
  1171	DECQ	CX
  1172	SHRQ	$7, CX
  1173
  1174aesloop:
  1175	// scramble state
  1176	AESENC	X8, X8
  1177	AESENC	X9, X9
  1178	AESENC	X10, X10
  1179	AESENC	X11, X11
  1180	AESENC	X12, X12
  1181	AESENC	X13, X13
  1182	AESENC	X14, X14
  1183	AESENC	X15, X15
  1184
  1185	// scramble state, xor in a block
  1186	MOVOU	(AX), X0
  1187	MOVOU	16(AX), X1
  1188	MOVOU	32(AX), X2
  1189	MOVOU	48(AX), X3
  1190	AESENC	X0, X8
  1191	AESENC	X1, X9
  1192	AESENC	X2, X10
  1193	AESENC	X3, X11
  1194	MOVOU	64(AX), X4
  1195	MOVOU	80(AX), X5
  1196	MOVOU	96(AX), X6
  1197	MOVOU	112(AX), X7
  1198	AESENC	X4, X12
  1199	AESENC	X5, X13
  1200	AESENC	X6, X14
  1201	AESENC	X7, X15
  1202
  1203	ADDQ	$128, AX
  1204	DECQ	CX
  1205	JNE	aesloop
  1206
  1207	// 3 more scrambles to finish
  1208	AESENC	X8, X8
  1209	AESENC	X9, X9
  1210	AESENC	X10, X10
  1211	AESENC	X11, X11
  1212	AESENC	X12, X12
  1213	AESENC	X13, X13
  1214	AESENC	X14, X14
  1215	AESENC	X15, X15
  1216	AESENC	X8, X8
  1217	AESENC	X9, X9
  1218	AESENC	X10, X10
  1219	AESENC	X11, X11
  1220	AESENC	X12, X12
  1221	AESENC	X13, X13
  1222	AESENC	X14, X14
  1223	AESENC	X15, X15
  1224	AESENC	X8, X8
  1225	AESENC	X9, X9
  1226	AESENC	X10, X10
  1227	AESENC	X11, X11
  1228	AESENC	X12, X12
  1229	AESENC	X13, X13
  1230	AESENC	X14, X14
  1231	AESENC	X15, X15
  1232
  1233	PXOR	X12, X8
  1234	PXOR	X13, X9
  1235	PXOR	X14, X10
  1236	PXOR	X15, X11
  1237	PXOR	X10, X8
  1238	PXOR	X11, X9
  1239	PXOR	X9, X8
  1240	MOVQ	X8, (DX)
  1241	RET
  1242
  1243// func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1244TEXT runtime·memhash32(SB),NOSPLIT,$0-24
  1245	CMPB	runtime·useAeshash(SB), $0
  1246	JEQ	noaes
  1247	MOVQ	p+0(FP), AX	// ptr to data
  1248	MOVQ	h+8(FP), X0	// seed
  1249	PINSRD	$2, (AX), X0	// data
  1250	AESENC	runtime·aeskeysched+0(SB), X0
  1251	AESENC	runtime·aeskeysched+16(SB), X0
  1252	AESENC	runtime·aeskeysched+32(SB), X0
  1253	MOVQ	X0, ret+16(FP)
  1254	RET
  1255noaes:
  1256	JMP	runtime·memhash32Fallback(SB)
  1257
  1258// func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1259TEXT runtime·memhash64(SB),NOSPLIT,$0-24
  1260	CMPB	runtime·useAeshash(SB), $0
  1261	JEQ	noaes
  1262	MOVQ	p+0(FP), AX	// ptr to data
  1263	MOVQ	h+8(FP), X0	// seed
  1264	PINSRQ	$1, (AX), X0	// data
  1265	AESENC	runtime·aeskeysched+0(SB), X0
  1266	AESENC	runtime·aeskeysched+16(SB), X0
  1267	AESENC	runtime·aeskeysched+32(SB), X0
  1268	MOVQ	X0, ret+16(FP)
  1269	RET
  1270noaes:
  1271	JMP	runtime·memhash64Fallback(SB)
  1272
  1273// simple mask to get rid of data in the high part of the register.
  1274DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1275DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1276DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1277DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1278DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1279DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1280DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1281DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1282DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1283DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1284DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1285DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1286DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1287DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1288DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1289DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1290DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1291DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1292DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1293DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1294DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1295DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1296DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1297DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1298DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1299DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1300DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1301DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1302DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1303DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1304DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1305DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1306GLOBL masks<>(SB),RODATA,$256
  1307
  1308// func checkASM() bool
  1309TEXT ·checkASM(SB),NOSPLIT,$0-1
  1310	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1311	MOVQ	$masks<>(SB), AX
  1312	MOVQ	$shifts<>(SB), BX
  1313	ORQ	BX, AX
  1314	TESTQ	$15, AX
  1315	SETEQ	ret+0(FP)
  1316	RET
  1317
  1318// these are arguments to pshufb. They move data down from
  1319// the high bytes of the register to the low bytes of the register.
  1320// index is how many bytes to move.
  1321DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1322DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1323DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1324DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1325DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1326DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1327DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1328DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1329DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1330DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1331DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1332DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1333DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1334DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1335DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1336DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1337DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1338DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1339DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1340DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1341DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1342DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1343DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1344DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1345DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1346DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1347DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1348DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1349DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1350DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1351DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1352DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1353GLOBL shifts<>(SB),RODATA,$256
  1354
  1355TEXT runtime·return0(SB), NOSPLIT, $0
  1356	MOVL	$0, AX
  1357	RET
  1358
  1359
  1360// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1361// Must obey the gcc calling convention.
  1362TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1363	get_tls(CX)
  1364	MOVQ	g(CX), AX
  1365	MOVQ	g_m(AX), AX
  1366	MOVQ	m_curg(AX), AX
  1367	MOVQ	(g_stack+stack_hi)(AX), AX
  1368	RET
  1369
  1370// The top-most function running on a goroutine
  1371// returns to goexit+PCQuantum.
  1372TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1373	BYTE	$0x90	// NOP
  1374	CALL	runtime·goexit1(SB)	// does not return
  1375	// traceback from goexit1 must hit code range of goexit
  1376	BYTE	$0x90	// NOP
  1377
  1378// This is called from .init_array and follows the platform, not Go, ABI.
  1379TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1380	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1381	MOVQ	runtime·lastmoduledatap(SB), AX
  1382	MOVQ	DI, moduledata_next(AX)
  1383	MOVQ	DI, runtime·lastmoduledatap(SB)
  1384	POPQ	R15
  1385	RET
  1386
  1387// gcWriteBarrier performs a heap pointer write and informs the GC.
  1388//
  1389// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1390// - DI is the destination of the write
  1391// - AX is the value being written at DI
  1392// It clobbers FLAGS. It does not clobber any general-purpose registers,
  1393// but may clobber others (e.g., SSE registers).
  1394TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
  1395	// Save the registers clobbered by the fast path. This is slightly
  1396	// faster than having the caller spill these.
  1397	MOVQ	R14, 104(SP)
  1398	MOVQ	R13, 112(SP)
  1399	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1400	// across a sequence of write barriers.
  1401	get_tls(R13)
  1402	MOVQ	g(R13), R13
  1403	MOVQ	g_m(R13), R13
  1404	MOVQ	m_p(R13), R13
  1405	MOVQ	(p_wbBuf+wbBuf_next)(R13), R14
  1406	// Increment wbBuf.next position.
  1407	LEAQ	16(R14), R14
  1408	MOVQ	R14, (p_wbBuf+wbBuf_next)(R13)
  1409	CMPQ	R14, (p_wbBuf+wbBuf_end)(R13)
  1410	// Record the write.
  1411	MOVQ	AX, -16(R14)	// Record value
  1412	// Note: This turns bad pointer writes into bad
  1413	// pointer reads, which could be confusing. We could avoid
  1414	// reading from obviously bad pointers, which would
  1415	// take care of the vast majority of these. We could
  1416	// patch this up in the signal handler, or use XCHG to
  1417	// combine the read and the write.
  1418	MOVQ	(DI), R13
  1419	MOVQ	R13, -8(R14)	// Record *slot
  1420	// Is the buffer full? (flags set in CMPQ above)
  1421	JEQ	flush
  1422ret:
  1423	MOVQ	104(SP), R14
  1424	MOVQ	112(SP), R13
  1425	// Do the write.
  1426	MOVQ	AX, (DI)
  1427	RET
  1428
  1429flush:
  1430	// Save all general purpose registers since these could be
  1431	// clobbered by wbBufFlush and were not saved by the caller.
  1432	// It is possible for wbBufFlush to clobber other registers
  1433	// (e.g., SSE registers), but the compiler takes care of saving
  1434	// those in the caller if necessary. This strikes a balance
  1435	// with registers that are likely to be used.
  1436	//
  1437	// We don't have type information for these, but all code under
  1438	// here is NOSPLIT, so nothing will observe these.
  1439	//
  1440	// TODO: We could strike a different balance; e.g., saving X0
  1441	// and not saving GP registers that are less likely to be used.
  1442	MOVQ	DI, 0(SP)	// Also first argument to wbBufFlush
  1443	MOVQ	AX, 8(SP)	// Also second argument to wbBufFlush
  1444	MOVQ	BX, 16(SP)
  1445	MOVQ	CX, 24(SP)
  1446	MOVQ	DX, 32(SP)
  1447	// DI already saved
  1448	MOVQ	SI, 40(SP)
  1449	MOVQ	BP, 48(SP)
  1450	MOVQ	R8, 56(SP)
  1451	MOVQ	R9, 64(SP)
  1452	MOVQ	R10, 72(SP)
  1453	MOVQ	R11, 80(SP)
  1454	MOVQ	R12, 88(SP)
  1455	// R13 already saved
  1456	// R14 already saved
  1457	MOVQ	R15, 96(SP)
  1458
  1459	// This takes arguments DI and AX
  1460	CALL	runtime·wbBufFlush(SB)
  1461
  1462	MOVQ	0(SP), DI
  1463	MOVQ	8(SP), AX
  1464	MOVQ	16(SP), BX
  1465	MOVQ	24(SP), CX
  1466	MOVQ	32(SP), DX
  1467	MOVQ	40(SP), SI
  1468	MOVQ	48(SP), BP
  1469	MOVQ	56(SP), R8
  1470	MOVQ	64(SP), R9
  1471	MOVQ	72(SP), R10
  1472	MOVQ	80(SP), R11
  1473	MOVQ	88(SP), R12
  1474	MOVQ	96(SP), R15
  1475	JMP	ret
  1476
  1477DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1478GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1479
  1480// debugCallV1 is the entry point for debugger-injected function
  1481// calls on running goroutines. It informs the runtime that a
  1482// debug call has been injected and creates a call frame for the
  1483// debugger to fill in.
  1484//
  1485// To inject a function call, a debugger should:
  1486// 1. Check that the goroutine is in state _Grunning and that
  1487//    there are at least 256 bytes free on the stack.
  1488// 2. Push the current PC on the stack (updating SP).
  1489// 3. Write the desired argument frame size at SP-16 (using the SP
  1490//    after step 2).
  1491// 4. Save all machine registers (including flags and XMM reigsters)
  1492//    so they can be restored later by the debugger.
  1493// 5. Set the PC to debugCallV1 and resume execution.
  1494//
  1495// If the goroutine is in state _Grunnable, then it's not generally
  1496// safe to inject a call because it may return out via other runtime
  1497// operations. Instead, the debugger should unwind the stack to find
  1498// the return to non-runtime code, add a temporary breakpoint there,
  1499// and inject the call once that breakpoint is hit.
  1500//
  1501// If the goroutine is in any other state, it's not safe to inject a call.
  1502//
  1503// This function communicates back to the debugger by setting RAX and
  1504// invoking INT3 to raise a breakpoint signal. See the comments in the
  1505// implementation for the protocol the debugger is expected to
  1506// follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1507//
  1508// The debugger must ensure that any pointers passed to the function
  1509// obey escape analysis requirements. Specifically, it must not pass
  1510// a stack pointer to an escaping argument. debugCallV1 cannot check
  1511// this invariant.
  1512TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
  1513	// Save all registers that may contain pointers in GC register
  1514	// map order (see ssa.registersAMD64). This makes it possible
  1515	// to copy the stack while updating pointers currently held in
  1516	// registers, and for the GC to find roots in registers.
  1517	//
  1518	// We can't do anything that might clobber any of these
  1519	// registers before this.
  1520	MOVQ	R15, r15-(14*8+8)(SP)
  1521	MOVQ	R14, r14-(13*8+8)(SP)
  1522	MOVQ	R13, r13-(12*8+8)(SP)
  1523	MOVQ	R12, r12-(11*8+8)(SP)
  1524	MOVQ	R11, r11-(10*8+8)(SP)
  1525	MOVQ	R10, r10-(9*8+8)(SP)
  1526	MOVQ	R9, r9-(8*8+8)(SP)
  1527	MOVQ	R8, r8-(7*8+8)(SP)
  1528	MOVQ	DI, di-(6*8+8)(SP)
  1529	MOVQ	SI, si-(5*8+8)(SP)
  1530	MOVQ	BP, bp-(4*8+8)(SP)
  1531	MOVQ	BX, bx-(3*8+8)(SP)
  1532	MOVQ	DX, dx-(2*8+8)(SP)
  1533	// Save the frame size before we clobber it. Either of the last
  1534	// saves could clobber this depending on whether there's a saved BP.
  1535	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1536	MOVQ	CX, cx-(1*8+8)(SP)
  1537	MOVQ	AX, ax-(0*8+8)(SP)
  1538
  1539	// Save the argument frame size.
  1540	MOVQ	DX, frameSize-128(SP)
  1541
  1542	// Perform a safe-point check.
  1543	MOVQ	retpc-8(FP), AX	// Caller's PC
  1544	MOVQ	AX, 0(SP)
  1545	CALL	runtime·debugCallCheck(SB)
  1546	MOVQ	8(SP), AX
  1547	TESTQ	AX, AX
  1548	JZ	good
  1549	// The safety check failed. Put the reason string at the top
  1550	// of the stack.
  1551	MOVQ	AX, 0(SP)
  1552	MOVQ	16(SP), AX
  1553	MOVQ	AX, 8(SP)
  1554	// Set AX to 8 and invoke INT3. The debugger should get the
  1555	// reason a call can't be injected from the top of the stack
  1556	// and resume execution.
  1557	MOVQ	$8, AX
  1558	BYTE	$0xcc
  1559	JMP	restore
  1560
  1561good:
  1562	// Registers are saved and it's safe to make a call.
  1563	// Open up a call frame, moving the stack if necessary.
  1564	//
  1565	// Once the frame is allocated, this will set AX to 0 and
  1566	// invoke INT3. The debugger should write the argument
  1567	// frame for the call at SP, push the trapping PC on the
  1568	// stack, set the PC to the function to call, set RCX to point
  1569	// to the closure (if a closure call), and resume execution.
  1570	//
  1571	// If the function returns, this will set AX to 1 and invoke
  1572	// INT3. The debugger can then inspect any return value saved
  1573	// on the stack at SP and resume execution again.
  1574	//
  1575	// If the function panics, this will set AX to 2 and invoke INT3.
  1576	// The interface{} value of the panic will be at SP. The debugger
  1577	// can inspect the panic value and resume execution again.
  1578#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1579	CMPQ	AX, $MAXSIZE;			\
  1580	JA	5(PC);				\
  1581	MOVQ	$NAME(SB), AX;			\
  1582	MOVQ	AX, 0(SP);			\
  1583	CALL	runtime·debugCallWrap(SB);	\
  1584	JMP	restore
  1585
  1586	MOVQ	frameSize-128(SP), AX
  1587	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1588	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1589	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1590	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1591	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1592	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1593	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1594	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1595	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1596	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1597	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1598	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1599	// The frame size is too large. Report the error.
  1600	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1601	MOVQ	AX, 0(SP)
  1602	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1603	MOVQ	$8, AX
  1604	BYTE	$0xcc
  1605	JMP	restore
  1606
  1607restore:
  1608	// Calls and failures resume here.
  1609	//
  1610	// Set AX to 16 and invoke INT3. The debugger should restore
  1611	// all registers except RIP and RSP and resume execution.
  1612	MOVQ	$16, AX
  1613	BYTE	$0xcc
  1614	// We must not modify flags after this point.
  1615
  1616	// Restore pointer-containing registers, which may have been
  1617	// modified from the debugger's copy by stack copying.
  1618	MOVQ	ax-(0*8+8)(SP), AX
  1619	MOVQ	cx-(1*8+8)(SP), CX
  1620	MOVQ	dx-(2*8+8)(SP), DX
  1621	MOVQ	bx-(3*8+8)(SP), BX
  1622	MOVQ	bp-(4*8+8)(SP), BP
  1623	MOVQ	si-(5*8+8)(SP), SI
  1624	MOVQ	di-(6*8+8)(SP), DI
  1625	MOVQ	r8-(7*8+8)(SP), R8
  1626	MOVQ	r9-(8*8+8)(SP), R9
  1627	MOVQ	r10-(9*8+8)(SP), R10
  1628	MOVQ	r11-(10*8+8)(SP), R11
  1629	MOVQ	r12-(11*8+8)(SP), R12
  1630	MOVQ	r13-(12*8+8)(SP), R13
  1631	MOVQ	r14-(13*8+8)(SP), R14
  1632	MOVQ	r15-(14*8+8)(SP), R15
  1633
  1634	RET
  1635
  1636// runtime.debugCallCheck assumes that functions defined with the
  1637// DEBUG_CALL_FN macro are safe points to inject calls.
  1638#define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  1639TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  1640	NO_LOCAL_POINTERS;			\
  1641	MOVQ	$0, AX;				\
  1642	BYTE	$0xcc;				\
  1643	MOVQ	$1, AX;				\
  1644	BYTE	$0xcc;				\
  1645	RET
  1646DEBUG_CALL_FN(debugCall32<>, 32)
  1647DEBUG_CALL_FN(debugCall64<>, 64)
  1648DEBUG_CALL_FN(debugCall128<>, 128)
  1649DEBUG_CALL_FN(debugCall256<>, 256)
  1650DEBUG_CALL_FN(debugCall512<>, 512)
  1651DEBUG_CALL_FN(debugCall1024<>, 1024)
  1652DEBUG_CALL_FN(debugCall2048<>, 2048)
  1653DEBUG_CALL_FN(debugCall4096<>, 4096)
  1654DEBUG_CALL_FN(debugCall8192<>, 8192)
  1655DEBUG_CALL_FN(debugCall16384<>, 16384)
  1656DEBUG_CALL_FN(debugCall32768<>, 32768)
  1657DEBUG_CALL_FN(debugCall65536<>, 65536)
  1658
  1659// func debugCallPanicked(val interface{})
  1660TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  1661	// Copy the panic value to the top of stack.
  1662	MOVQ	val_type+0(FP), AX
  1663	MOVQ	AX, 0(SP)
  1664	MOVQ	val_data+8(FP), AX
  1665	MOVQ	AX, 8(SP)
  1666	MOVQ	$2, AX
  1667	BYTE	$0xcc
  1668	RET
  1669
  1670// Note: these functions use a special calling convention to save generated code space.
  1671// Arguments are passed in registers, but the space for those arguments are allocated
  1672// in the caller's stack frame. These stubs write the args into that stack space and
  1673// then tail call to the corresponding runtime handler.
  1674// The tail call makes these stubs disappear in backtraces.
  1675TEXT runtime·panicIndex(SB),NOSPLIT,$0-16
  1676	MOVQ	AX, x+0(FP)
  1677	MOVQ	CX, y+8(FP)
  1678	JMP	runtime·goPanicIndex(SB)
  1679TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16
  1680	MOVQ	AX, x+0(FP)
  1681	MOVQ	CX, y+8(FP)
  1682	JMP	runtime·goPanicIndexU(SB)
  1683TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16
  1684	MOVQ	CX, x+0(FP)
  1685	MOVQ	DX, y+8(FP)
  1686	JMP	runtime·goPanicSliceAlen(SB)
  1687TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16
  1688	MOVQ	CX, x+0(FP)
  1689	MOVQ	DX, y+8(FP)
  1690	JMP	runtime·goPanicSliceAlenU(SB)
  1691TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16
  1692	MOVQ	CX, x+0(FP)
  1693	MOVQ	DX, y+8(FP)
  1694	JMP	runtime·goPanicSliceAcap(SB)
  1695TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16
  1696	MOVQ	CX, x+0(FP)
  1697	MOVQ	DX, y+8(FP)
  1698	JMP	runtime·goPanicSliceAcapU(SB)
  1699TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16
  1700	MOVQ	AX, x+0(FP)
  1701	MOVQ	CX, y+8(FP)
  1702	JMP	runtime·goPanicSliceB(SB)
  1703TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16
  1704	MOVQ	AX, x+0(FP)
  1705	MOVQ	CX, y+8(FP)
  1706	JMP	runtime·goPanicSliceBU(SB)
  1707TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16
  1708	MOVQ	DX, x+0(FP)
  1709	MOVQ	BX, y+8(FP)
  1710	JMP	runtime·goPanicSlice3Alen(SB)
  1711TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16
  1712	MOVQ	DX, x+0(FP)
  1713	MOVQ	BX, y+8(FP)
  1714	JMP	runtime·goPanicSlice3AlenU(SB)
  1715TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16
  1716	MOVQ	DX, x+0(FP)
  1717	MOVQ	BX, y+8(FP)
  1718	JMP	runtime·goPanicSlice3Acap(SB)
  1719TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16
  1720	MOVQ	DX, x+0(FP)
  1721	MOVQ	BX, y+8(FP)
  1722	JMP	runtime·goPanicSlice3AcapU(SB)
  1723TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16
  1724	MOVQ	CX, x+0(FP)
  1725	MOVQ	DX, y+8(FP)
  1726	JMP	runtime·goPanicSlice3B(SB)
  1727TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16
  1728	MOVQ	CX, x+0(FP)
  1729	MOVQ	DX, y+8(FP)
  1730	JMP	runtime·goPanicSlice3BU(SB)
  1731TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16
  1732	MOVQ	AX, x+0(FP)
  1733	MOVQ	CX, y+8(FP)
  1734	JMP	runtime·goPanicSlice3C(SB)
  1735TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16
  1736	MOVQ	AX, x+0(FP)
  1737	MOVQ	CX, y+8(FP)
  1738	JMP	runtime·goPanicSlice3CU(SB)
  1739
  1740#ifdef GOOS_android
  1741// Use the free TLS_SLOT_APP slot #2 on Android Q.
  1742// Earlier androids are set up in gcc_android.c.
  1743DATA runtime·tls_g+0(SB)/8, $16
  1744GLOBL runtime·tls_g+0(SB), NOPTR, $8
  1745#endif

View as plain text