...
Run Format

Text file src/runtime/asm_arm64.s

Documentation: runtime

     1	// Copyright 2015 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "tls_arm64.h"
     8	#include "funcdata.h"
     9	#include "textflag.h"
    10	
    11	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    12		// SP = stack; R0 = argc; R1 = argv
    13	
    14		SUB	$32, RSP
    15		MOVW	R0, 8(RSP) // argc
    16		MOVD	R1, 16(RSP) // argv
    17	
    18		// create istack out of the given (operating system) stack.
    19		// _cgo_init may update stackguard.
    20		MOVD	$runtime·g0(SB), g
    21		MOVD RSP, R7
    22		MOVD	$(-64*1024)(R7), R0
    23		MOVD	R0, g_stackguard0(g)
    24		MOVD	R0, g_stackguard1(g)
    25		MOVD	R0, (g_stack+stack_lo)(g)
    26		MOVD	R7, (g_stack+stack_hi)(g)
    27	
    28		// if there is a _cgo_init, call it using the gcc ABI.
    29		MOVD	_cgo_init(SB), R12
    30		CMP	$0, R12
    31		BEQ	nocgo
    32	
    33		MRS_TPIDR_R0			// load TLS base pointer
    34		MOVD	R0, R3			// arg 3: TLS base pointer
    35	#ifdef TLSG_IS_VARIABLE
    36		MOVD	$runtime·tls_g(SB), R2 	// arg 2: &tls_g
    37	#else
    38		MOVD	$0, R2		        // arg 2: not used when using platform's TLS
    39	#endif
    40		MOVD	$setg_gcc<>(SB), R1	// arg 1: setg
    41		MOVD	g, R0			// arg 0: G
    42		BL	(R12)
    43		MOVD	_cgo_init(SB), R12
    44		CMP	$0, R12
    45		BEQ	nocgo
    46	
    47	nocgo:
    48		// update stackguard after _cgo_init
    49		MOVD	(g_stack+stack_lo)(g), R0
    50		ADD	$const__StackGuard, R0
    51		MOVD	R0, g_stackguard0(g)
    52		MOVD	R0, g_stackguard1(g)
    53	
    54		// set the per-goroutine and per-mach "registers"
    55		MOVD	$runtime·m0(SB), R0
    56	
    57		// save m->g0 = g0
    58		MOVD	g, m_g0(R0)
    59		// save m0 to g0->m
    60		MOVD	R0, g_m(g)
    61	
    62		BL	runtime·check(SB)
    63	
    64		MOVW	8(RSP), R0	// copy argc
    65		MOVW	R0, -8(RSP)
    66		MOVD	16(RSP), R0		// copy argv
    67		MOVD	R0, 0(RSP)
    68		BL	runtime·args(SB)
    69		BL	runtime·osinit(SB)
    70		BL	runtime·schedinit(SB)
    71	
    72		// create a new goroutine to start program
    73		MOVD	$runtime·mainPC(SB), R0		// entry
    74		MOVD	RSP, R7
    75		MOVD.W	$0, -8(R7)
    76		MOVD.W	R0, -8(R7)
    77		MOVD.W	$0, -8(R7)
    78		MOVD.W	$0, -8(R7)
    79		MOVD	R7, RSP
    80		BL	runtime·newproc(SB)
    81		ADD	$32, RSP
    82	
    83		// start this M
    84		BL	runtime·mstart(SB)
    85	
    86		MOVD	$0, R0
    87		MOVD	R0, (R0)	// boom
    88		UNDEF
    89	
    90	DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    91	GLOBL	runtime·mainPC(SB),RODATA,$8
    92	
    93	TEXT runtime·breakpoint(SB),NOSPLIT,$-8-0
    94		BRK
    95		RET
    96	
    97	TEXT runtime·asminit(SB),NOSPLIT,$-8-0
    98		RET
    99	
   100	/*
   101	 *  go-routine
   102	 */
   103	
   104	// void gosave(Gobuf*)
   105	// save state in Gobuf; setjmp
   106	TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   107		MOVD	buf+0(FP), R3
   108		MOVD	RSP, R0
   109		MOVD	R0, gobuf_sp(R3)
   110		MOVD	LR, gobuf_pc(R3)
   111		MOVD	g, gobuf_g(R3)
   112		MOVD	ZR, gobuf_lr(R3)
   113		MOVD	ZR, gobuf_ret(R3)
   114		// Assert ctxt is zero. See func save.
   115		MOVD	gobuf_ctxt(R3), R0
   116		CMP	$0, R0
   117		BEQ	2(PC)
   118		CALL	runtime·badctxt(SB)
   119		RET
   120	
   121	// void gogo(Gobuf*)
   122	// restore state from Gobuf; longjmp
   123	TEXT runtime·gogo(SB), NOSPLIT, $24-8
   124		MOVD	buf+0(FP), R5
   125		MOVD	gobuf_g(R5), g
   126		BL	runtime·save_g(SB)
   127	
   128		MOVD	0(g), R4	// make sure g is not nil
   129		MOVD	gobuf_sp(R5), R0
   130		MOVD	R0, RSP
   131		MOVD	gobuf_lr(R5), LR
   132		MOVD	gobuf_ret(R5), R0
   133		MOVD	gobuf_ctxt(R5), R26
   134		MOVD	$0, gobuf_sp(R5)
   135		MOVD	$0, gobuf_ret(R5)
   136		MOVD	$0, gobuf_lr(R5)
   137		MOVD	$0, gobuf_ctxt(R5)
   138		CMP	ZR, ZR // set condition codes for == test, needed by stack split
   139		MOVD	gobuf_pc(R5), R6
   140		B	(R6)
   141	
   142	// void mcall(fn func(*g))
   143	// Switch to m->g0's stack, call fn(g).
   144	// Fn must never return. It should gogo(&g->sched)
   145	// to keep running g.
   146	TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   147		// Save caller state in g->sched
   148		MOVD	RSP, R0
   149		MOVD	R0, (g_sched+gobuf_sp)(g)
   150		MOVD	LR, (g_sched+gobuf_pc)(g)
   151		MOVD	$0, (g_sched+gobuf_lr)(g)
   152		MOVD	g, (g_sched+gobuf_g)(g)
   153	
   154		// Switch to m->g0 & its stack, call fn.
   155		MOVD	g, R3
   156		MOVD	g_m(g), R8
   157		MOVD	m_g0(R8), g
   158		BL	runtime·save_g(SB)
   159		CMP	g, R3
   160		BNE	2(PC)
   161		B	runtime·badmcall(SB)
   162		MOVD	fn+0(FP), R26			// context
   163		MOVD	0(R26), R4			// code pointer
   164		MOVD	(g_sched+gobuf_sp)(g), R0
   165		MOVD	R0, RSP	// sp = m->g0->sched.sp
   166		MOVD	R3, -8(RSP)
   167		MOVD	$0, -16(RSP)
   168		SUB	$16, RSP
   169		BL	(R4)
   170		B	runtime·badmcall2(SB)
   171	
   172	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   173	// of the G stack. We need to distinguish the routine that
   174	// lives at the bottom of the G stack from the one that lives
   175	// at the top of the system stack because the one at the top of
   176	// the system stack terminates the stack walk (see topofstack()).
   177	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   178		UNDEF
   179		BL	(LR)	// make sure this function is not leaf
   180		RET
   181	
   182	// func systemstack(fn func())
   183	TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   184		MOVD	fn+0(FP), R3	// R3 = fn
   185		MOVD	R3, R26		// context
   186		MOVD	g_m(g), R4	// R4 = m
   187	
   188		MOVD	m_gsignal(R4), R5	// R5 = gsignal
   189		CMP	g, R5
   190		BEQ	noswitch
   191	
   192		MOVD	m_g0(R4), R5	// R5 = g0
   193		CMP	g, R5
   194		BEQ	noswitch
   195	
   196		MOVD	m_curg(R4), R6
   197		CMP	g, R6
   198		BEQ	switch
   199	
   200		// Bad: g is not gsignal, not g0, not curg. What is it?
   201		// Hide call from linker nosplit analysis.
   202		MOVD	$runtime·badsystemstack(SB), R3
   203		BL	(R3)
   204	
   205	switch:
   206		// save our state in g->sched. Pretend to
   207		// be systemstack_switch if the G stack is scanned.
   208		MOVD	$runtime·systemstack_switch(SB), R6
   209		ADD	$8, R6	// get past prologue
   210		MOVD	R6, (g_sched+gobuf_pc)(g)
   211		MOVD	RSP, R0
   212		MOVD	R0, (g_sched+gobuf_sp)(g)
   213		MOVD	$0, (g_sched+gobuf_lr)(g)
   214		MOVD	g, (g_sched+gobuf_g)(g)
   215	
   216		// switch to g0
   217		MOVD	R5, g
   218		BL	runtime·save_g(SB)
   219		MOVD	(g_sched+gobuf_sp)(g), R3
   220		// make it look like mstart called systemstack on g0, to stop traceback
   221		SUB	$16, R3
   222		AND	$~15, R3
   223		MOVD	$runtime·mstart(SB), R4
   224		MOVD	R4, 0(R3)
   225		MOVD	R3, RSP
   226	
   227		// call target function
   228		MOVD	0(R26), R3	// code pointer
   229		BL	(R3)
   230	
   231		// switch back to g
   232		MOVD	g_m(g), R3
   233		MOVD	m_curg(R3), g
   234		BL	runtime·save_g(SB)
   235		MOVD	(g_sched+gobuf_sp)(g), R0
   236		MOVD	R0, RSP
   237		MOVD	$0, (g_sched+gobuf_sp)(g)
   238		RET
   239	
   240	noswitch:
   241		// already on m stack, just call directly
   242		// Using a tail call here cleans up tracebacks since we won't stop
   243		// at an intermediate systemstack.
   244		MOVD	0(R26), R3	// code pointer
   245		MOVD.P	16(RSP), R30	// restore LR
   246		B	(R3)
   247	
   248	/*
   249	 * support for morestack
   250	 */
   251	
   252	// Called during function prolog when more stack is needed.
   253	// Caller has already loaded:
   254	// R3 prolog's LR (R30)
   255	//
   256	// The traceback routines see morestack on a g0 as being
   257	// the top of a stack (for example, morestack calling newstack
   258	// calling the scheduler calling newm calling gc), so we must
   259	// record an argument size. For that purpose, it has no arguments.
   260	TEXT runtime·morestack(SB),NOSPLIT,$-8-0
   261		// Cannot grow scheduler stack (m->g0).
   262		MOVD	g_m(g), R8
   263		MOVD	m_g0(R8), R4
   264		CMP	g, R4
   265		BNE	3(PC)
   266		BL	runtime·badmorestackg0(SB)
   267		B	runtime·abort(SB)
   268	
   269		// Cannot grow signal stack (m->gsignal).
   270		MOVD	m_gsignal(R8), R4
   271		CMP	g, R4
   272		BNE	3(PC)
   273		BL	runtime·badmorestackgsignal(SB)
   274		B	runtime·abort(SB)
   275	
   276		// Called from f.
   277		// Set g->sched to context in f
   278		MOVD	RSP, R0
   279		MOVD	R0, (g_sched+gobuf_sp)(g)
   280		MOVD	LR, (g_sched+gobuf_pc)(g)
   281		MOVD	R3, (g_sched+gobuf_lr)(g)
   282		MOVD	R26, (g_sched+gobuf_ctxt)(g)
   283	
   284		// Called from f.
   285		// Set m->morebuf to f's callers.
   286		MOVD	R3, (m_morebuf+gobuf_pc)(R8)	// f's caller's PC
   287		MOVD	RSP, R0
   288		MOVD	R0, (m_morebuf+gobuf_sp)(R8)	// f's caller's RSP
   289		MOVD	g, (m_morebuf+gobuf_g)(R8)
   290	
   291		// Call newstack on m->g0's stack.
   292		MOVD	m_g0(R8), g
   293		BL	runtime·save_g(SB)
   294		MOVD	(g_sched+gobuf_sp)(g), R0
   295		MOVD	R0, RSP
   296		MOVD.W	$0, -16(RSP)	// create a call frame on g0 (saved LR; keep 16-aligned)
   297		BL	runtime·newstack(SB)
   298	
   299		// Not reached, but make sure the return PC from the call to newstack
   300		// is still in this function, and not the beginning of the next.
   301		UNDEF
   302	
   303	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-4-0
   304		MOVW	$0, R26
   305		B runtime·morestack(SB)
   306	
   307	// reflectcall: call a function with the given argument list
   308	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   309	// we don't have variable-sized frames, so we use a small number
   310	// of constant-sized-frame functions to encode a few bits of size in the pc.
   311	// Caution: ugly multiline assembly macros in your future!
   312	
   313	#define DISPATCH(NAME,MAXSIZE)		\
   314		MOVD	$MAXSIZE, R27;		\
   315		CMP	R27, R16;		\
   316		BGT	3(PC);			\
   317		MOVD	$NAME(SB), R27;	\
   318		B	(R27)
   319	// Note: can't just "B NAME(SB)" - bad inlining results.
   320	
   321	TEXT reflect·call(SB), NOSPLIT, $0-0
   322		B	·reflectcall(SB)
   323	
   324	TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   325		MOVWU argsize+24(FP), R16
   326		DISPATCH(runtime·call32, 32)
   327		DISPATCH(runtime·call64, 64)
   328		DISPATCH(runtime·call128, 128)
   329		DISPATCH(runtime·call256, 256)
   330		DISPATCH(runtime·call512, 512)
   331		DISPATCH(runtime·call1024, 1024)
   332		DISPATCH(runtime·call2048, 2048)
   333		DISPATCH(runtime·call4096, 4096)
   334		DISPATCH(runtime·call8192, 8192)
   335		DISPATCH(runtime·call16384, 16384)
   336		DISPATCH(runtime·call32768, 32768)
   337		DISPATCH(runtime·call65536, 65536)
   338		DISPATCH(runtime·call131072, 131072)
   339		DISPATCH(runtime·call262144, 262144)
   340		DISPATCH(runtime·call524288, 524288)
   341		DISPATCH(runtime·call1048576, 1048576)
   342		DISPATCH(runtime·call2097152, 2097152)
   343		DISPATCH(runtime·call4194304, 4194304)
   344		DISPATCH(runtime·call8388608, 8388608)
   345		DISPATCH(runtime·call16777216, 16777216)
   346		DISPATCH(runtime·call33554432, 33554432)
   347		DISPATCH(runtime·call67108864, 67108864)
   348		DISPATCH(runtime·call134217728, 134217728)
   349		DISPATCH(runtime·call268435456, 268435456)
   350		DISPATCH(runtime·call536870912, 536870912)
   351		DISPATCH(runtime·call1073741824, 1073741824)
   352		MOVD	$runtime·badreflectcall(SB), R0
   353		B	(R0)
   354	
   355	#define CALLFN(NAME,MAXSIZE)			\
   356	TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   357		NO_LOCAL_POINTERS;			\
   358		/* copy arguments to stack */		\
   359		MOVD	arg+16(FP), R3;			\
   360		MOVWU	argsize+24(FP), R4;		\
   361		ADD	$8, RSP, R5;			\
   362		BIC	$0xf, R4, R6;			\
   363		CBZ	R6, 6(PC);			\
   364		/* if R6=(argsize&~15) != 0 */		\
   365		ADD	R6, R5, R6;			\
   366		/* copy 16 bytes a time */		\
   367		LDP.P	16(R3), (R7, R8);		\
   368		STP.P	(R7, R8), 16(R5);		\
   369		CMP	R5, R6;				\
   370		BNE	-3(PC);				\
   371		AND	$0xf, R4, R6;			\
   372		CBZ	R6, 6(PC);			\
   373		/* if R6=(argsize&15) != 0 */		\
   374		ADD	R6, R5, R6;			\
   375		/* copy 1 byte a time for the rest */	\
   376		MOVBU.P	1(R3), R7;			\
   377		MOVBU.P	R7, 1(R5);			\
   378		CMP	R5, R6;				\
   379		BNE	-3(PC);				\
   380		/* call function */			\
   381		MOVD	f+8(FP), R26;			\
   382		MOVD	(R26), R0;			\
   383		PCDATA  $PCDATA_StackMapIndex, $0;	\
   384		BL	(R0);				\
   385		/* copy return values back */		\
   386		MOVD	argtype+0(FP), R7;		\
   387		MOVD	arg+16(FP), R3;			\
   388		MOVWU	n+24(FP), R4;			\
   389		MOVWU	retoffset+28(FP), R6;		\
   390		ADD	$8, RSP, R5;			\
   391		ADD	R6, R5; 			\
   392		ADD	R6, R3;				\
   393		SUB	R6, R4;				\
   394		BL	callRet<>(SB);			\
   395		RET
   396	
   397	// callRet copies return values back at the end of call*. This is a
   398	// separate function so it can allocate stack space for the arguments
   399	// to reflectcallmove. It does not follow the Go ABI; it expects its
   400	// arguments in registers.
   401	TEXT callRet<>(SB), NOSPLIT, $40-0
   402		MOVD	R7, 8(RSP)
   403		MOVD	R3, 16(RSP)
   404		MOVD	R5, 24(RSP)
   405		MOVD	R4, 32(RSP)
   406		BL	runtime·reflectcallmove(SB)
   407		RET
   408	
   409	// These have 8 added to make the overall frame size a multiple of 16,
   410	// as required by the ABI. (There is another +8 for the saved LR.)
   411	CALLFN(·call32, 40 )
   412	CALLFN(·call64, 72 )
   413	CALLFN(·call128, 136 )
   414	CALLFN(·call256, 264 )
   415	CALLFN(·call512, 520 )
   416	CALLFN(·call1024, 1032 )
   417	CALLFN(·call2048, 2056 )
   418	CALLFN(·call4096, 4104 )
   419	CALLFN(·call8192, 8200 )
   420	CALLFN(·call16384, 16392 )
   421	CALLFN(·call32768, 32776 )
   422	CALLFN(·call65536, 65544 )
   423	CALLFN(·call131072, 131080 )
   424	CALLFN(·call262144, 262152 )
   425	CALLFN(·call524288, 524296 )
   426	CALLFN(·call1048576, 1048584 )
   427	CALLFN(·call2097152, 2097160 )
   428	CALLFN(·call4194304, 4194312 )
   429	CALLFN(·call8388608, 8388616 )
   430	CALLFN(·call16777216, 16777224 )
   431	CALLFN(·call33554432, 33554440 )
   432	CALLFN(·call67108864, 67108872 )
   433	CALLFN(·call134217728, 134217736 )
   434	CALLFN(·call268435456, 268435464 )
   435	CALLFN(·call536870912, 536870920 )
   436	CALLFN(·call1073741824, 1073741832 )
   437	
   438	// AES hashing not implemented for ARM64, issue #10109.
   439	TEXT runtime·aeshash(SB),NOSPLIT,$-8-0
   440		MOVW	$0, R0
   441		MOVW	(R0), R1
   442	TEXT runtime·aeshash32(SB),NOSPLIT,$-8-0
   443		MOVW	$0, R0
   444		MOVW	(R0), R1
   445	TEXT runtime·aeshash64(SB),NOSPLIT,$-8-0
   446		MOVW	$0, R0
   447		MOVW	(R0), R1
   448	TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0
   449		MOVW	$0, R0
   450		MOVW	(R0), R1
   451		
   452	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   453		MOVWU	cycles+0(FP), R0
   454	again:
   455		YIELD
   456		SUBW	$1, R0
   457		CBNZ	R0, again
   458		RET
   459	
   460	// void jmpdefer(fv, sp);
   461	// called from deferreturn.
   462	// 1. grab stored LR for caller
   463	// 2. sub 4 bytes to get back to BL deferreturn
   464	// 3. BR to fn
   465	TEXT runtime·jmpdefer(SB), NOSPLIT, $-8-16
   466		MOVD	0(RSP), R0
   467		SUB	$4, R0
   468		MOVD	R0, LR
   469	
   470		MOVD	fv+0(FP), R26
   471		MOVD	argp+8(FP), R0
   472		MOVD	R0, RSP
   473		SUB	$8, RSP
   474		MOVD	0(R26), R3
   475		B	(R3)
   476	
   477	// Save state of caller into g->sched. Smashes R0.
   478	TEXT gosave<>(SB),NOSPLIT,$-8
   479		MOVD	LR, (g_sched+gobuf_pc)(g)
   480		MOVD RSP, R0
   481		MOVD	R0, (g_sched+gobuf_sp)(g)
   482		MOVD	$0, (g_sched+gobuf_lr)(g)
   483		MOVD	$0, (g_sched+gobuf_ret)(g)
   484		// Assert ctxt is zero. See func save.
   485		MOVD	(g_sched+gobuf_ctxt)(g), R0
   486		CMP	$0, R0
   487		BEQ	2(PC)
   488		CALL	runtime·badctxt(SB)
   489		RET
   490	
   491	// func asmcgocall(fn, arg unsafe.Pointer) int32
   492	// Call fn(arg) on the scheduler stack,
   493	// aligned appropriately for the gcc ABI.
   494	// See cgocall.go for more details.
   495	TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   496		MOVD	fn+0(FP), R1
   497		MOVD	arg+8(FP), R0
   498	
   499		MOVD	RSP, R2		// save original stack pointer
   500		MOVD	g, R4
   501	
   502		// Figure out if we need to switch to m->g0 stack.
   503		// We get called to create new OS threads too, and those
   504		// come in on the m->g0 stack already.
   505		MOVD	g_m(g), R8
   506		MOVD	m_g0(R8), R3
   507		CMP	R3, g
   508		BEQ	g0
   509		MOVD	R0, R9	// gosave<> and save_g might clobber R0
   510		BL	gosave<>(SB)
   511		MOVD	R3, g
   512		BL	runtime·save_g(SB)
   513		MOVD	(g_sched+gobuf_sp)(g), R0
   514		MOVD	R0, RSP
   515		MOVD	R9, R0
   516	
   517		// Now on a scheduling stack (a pthread-created stack).
   518	g0:
   519		// Save room for two of our pointers /*, plus 32 bytes of callee
   520		// save area that lives on the caller stack. */
   521		MOVD	RSP, R13
   522		SUB	$16, R13
   523		MOVD	R13, RSP
   524		MOVD	R4, 0(RSP)	// save old g on stack
   525		MOVD	(g_stack+stack_hi)(R4), R4
   526		SUB	R2, R4
   527		MOVD	R4, 8(RSP)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   528		BL	(R1)
   529		MOVD	R0, R9
   530	
   531		// Restore g, stack pointer. R0 is errno, so don't touch it
   532		MOVD	0(RSP), g
   533		BL	runtime·save_g(SB)
   534		MOVD	(g_stack+stack_hi)(g), R5
   535		MOVD	8(RSP), R6
   536		SUB	R6, R5
   537		MOVD	R9, R0
   538		MOVD	R5, RSP
   539	
   540		MOVW	R0, ret+16(FP)
   541		RET
   542	
   543	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   544	// Turn the fn into a Go func (by taking its address) and call
   545	// cgocallback_gofunc.
   546	TEXT runtime·cgocallback(SB),NOSPLIT,$40-32
   547		MOVD	$fn+0(FP), R0
   548		MOVD	R0, 8(RSP)
   549		MOVD	frame+8(FP), R0
   550		MOVD	R0, 16(RSP)
   551		MOVD	framesize+16(FP), R0
   552		MOVD	R0, 24(RSP)
   553		MOVD	ctxt+24(FP), R0
   554		MOVD	R0, 32(RSP)
   555		MOVD	$runtime·cgocallback_gofunc(SB), R0
   556		BL	(R0)
   557		RET
   558	
   559	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   560	// See cgocall.go for more details.
   561	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$24-32
   562		NO_LOCAL_POINTERS
   563	
   564		// Load g from thread-local storage.
   565		MOVB	runtime·iscgo(SB), R3
   566		CMP	$0, R3
   567		BEQ	nocgo
   568		BL	runtime·load_g(SB)
   569	nocgo:
   570	
   571		// If g is nil, Go did not create the current thread.
   572		// Call needm to obtain one for temporary use.
   573		// In this case, we're running on the thread stack, so there's
   574		// lots of space, but the linker doesn't know. Hide the call from
   575		// the linker analysis by using an indirect call.
   576		CMP	$0, g
   577		BEQ	needm
   578	
   579		MOVD	g_m(g), R8
   580		MOVD	R8, savedm-8(SP)
   581		B	havem
   582	
   583	needm:
   584		MOVD	g, savedm-8(SP) // g is zero, so is m.
   585		MOVD	$runtime·needm(SB), R0
   586		BL	(R0)
   587	
   588		// Set m->sched.sp = SP, so that if a panic happens
   589		// during the function we are about to execute, it will
   590		// have a valid SP to run on the g0 stack.
   591		// The next few lines (after the havem label)
   592		// will save this SP onto the stack and then write
   593		// the same SP back to m->sched.sp. That seems redundant,
   594		// but if an unrecovered panic happens, unwindm will
   595		// restore the g->sched.sp from the stack location
   596		// and then systemstack will try to use it. If we don't set it here,
   597		// that restored SP will be uninitialized (typically 0) and
   598		// will not be usable.
   599		MOVD	g_m(g), R8
   600		MOVD	m_g0(R8), R3
   601		MOVD	RSP, R0
   602		MOVD	R0, (g_sched+gobuf_sp)(R3)
   603	
   604	havem:
   605		// Now there's a valid m, and we're running on its m->g0.
   606		// Save current m->g0->sched.sp on stack and then set it to SP.
   607		// Save current sp in m->g0->sched.sp in preparation for
   608		// switch back to m->curg stack.
   609		// NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
   610		// Beware that the frame size is actually 32.
   611		MOVD	m_g0(R8), R3
   612		MOVD	(g_sched+gobuf_sp)(R3), R4
   613		MOVD	R4, savedsp-16(SP)
   614		MOVD	RSP, R0
   615		MOVD	R0, (g_sched+gobuf_sp)(R3)
   616	
   617		// Switch to m->curg stack and call runtime.cgocallbackg.
   618		// Because we are taking over the execution of m->curg
   619		// but *not* resuming what had been running, we need to
   620		// save that information (m->curg->sched) so we can restore it.
   621		// We can restore m->curg->sched.sp easily, because calling
   622		// runtime.cgocallbackg leaves SP unchanged upon return.
   623		// To save m->curg->sched.pc, we push it onto the stack.
   624		// This has the added benefit that it looks to the traceback
   625		// routine like cgocallbackg is going to return to that
   626		// PC (because the frame we allocate below has the same
   627		// size as cgocallback_gofunc's frame declared above)
   628		// so that the traceback will seamlessly trace back into
   629		// the earlier calls.
   630		//
   631		// In the new goroutine, -8(SP) is unused (where SP refers to
   632		// m->curg's SP while we're setting it up, before we've adjusted it).
   633		MOVD	m_curg(R8), g
   634		BL	runtime·save_g(SB)
   635		MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   636		MOVD	(g_sched+gobuf_pc)(g), R5
   637		MOVD	R5, -(24+8)(R4)
   638		MOVD	ctxt+24(FP), R0
   639		MOVD	R0, -(16+8)(R4)
   640		MOVD	$-(24+8)(R4), R0 // maintain 16-byte SP alignment
   641		MOVD	R0, RSP
   642		BL	runtime·cgocallbackg(SB)
   643	
   644		// Restore g->sched (== m->curg->sched) from saved values.
   645		MOVD	0(RSP), R5
   646		MOVD	R5, (g_sched+gobuf_pc)(g)
   647		MOVD	RSP, R4
   648		ADD	$(24+8), R4, R4
   649		MOVD	R4, (g_sched+gobuf_sp)(g)
   650	
   651		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   652		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   653		// so we do not have to restore it.)
   654		MOVD	g_m(g), R8
   655		MOVD	m_g0(R8), g
   656		BL	runtime·save_g(SB)
   657		MOVD	(g_sched+gobuf_sp)(g), R0
   658		MOVD	R0, RSP
   659		MOVD	savedsp-16(SP), R4
   660		MOVD	R4, (g_sched+gobuf_sp)(g)
   661	
   662		// If the m on entry was nil, we called needm above to borrow an m
   663		// for the duration of the call. Since the call is over, return it with dropm.
   664		MOVD	savedm-8(SP), R6
   665		CMP	$0, R6
   666		BNE	droppedm
   667		MOVD	$runtime·dropm(SB), R0
   668		BL	(R0)
   669	droppedm:
   670	
   671		// Done!
   672		RET
   673	
   674	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   675	// Must obey the gcc calling convention.
   676	TEXT _cgo_topofstack(SB),NOSPLIT,$24
   677		// g (R28) and REGTMP (R27)  might be clobbered by load_g. They
   678		// are callee-save in the gcc calling convention, so save them.
   679		MOVD	R27, savedR27-8(SP)
   680		MOVD	g, saveG-16(SP)
   681	
   682		BL	runtime·load_g(SB)
   683		MOVD	g_m(g), R0
   684		MOVD	m_curg(R0), R0
   685		MOVD	(g_stack+stack_hi)(R0), R0
   686	
   687		MOVD	saveG-16(SP), g
   688		MOVD	savedR28-8(SP), R27
   689		RET
   690	
   691	// void setg(G*); set g. for use by needm.
   692	TEXT runtime·setg(SB), NOSPLIT, $0-8
   693		MOVD	gg+0(FP), g
   694		// This only happens if iscgo, so jump straight to save_g
   695		BL	runtime·save_g(SB)
   696		RET
   697	
   698	// void setg_gcc(G*); set g called from gcc
   699	TEXT setg_gcc<>(SB),NOSPLIT,$8
   700		MOVD	R0, g
   701		MOVD	R27, savedR27-8(SP)
   702		BL	runtime·save_g(SB)
   703		MOVD	savedR27-8(SP), R27
   704		RET
   705	
   706	TEXT runtime·getcallerpc(SB),NOSPLIT,$-8-8
   707		MOVD	0(RSP), R0		// LR saved by caller
   708		MOVD	R0, ret+0(FP)
   709		RET
   710	
   711	TEXT runtime·abort(SB),NOSPLIT,$-8-0
   712		B	(ZR)
   713		UNDEF
   714	
   715	// memequal(a, b unsafe.Pointer, size uintptr) bool
   716	TEXT runtime·memequal(SB),NOSPLIT,$-8-25
   717		MOVD	size+16(FP), R1
   718		// short path to handle 0-byte case
   719		CBZ	R1, equal
   720		MOVD	a+0(FP), R0
   721		MOVD	b+8(FP), R2
   722		MOVD	$ret+24(FP), R8
   723		B	runtime·memeqbody<>(SB)
   724	equal:
   725		MOVD	$1, R0
   726		MOVB	R0, ret+24(FP)
   727		RET
   728	
   729	// memequal_varlen(a, b unsafe.Pointer) bool
   730	TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   731		MOVD	a+0(FP), R3
   732		MOVD	b+8(FP), R4
   733		CMP	R3, R4
   734		BEQ	eq
   735		MOVD	8(R26), R5    // compiler stores size at offset 8 in the closure
   736		MOVD	R3, 8(RSP)
   737		MOVD	R4, 16(RSP)
   738		MOVD	R5, 24(RSP)
   739		BL	runtime·memequal(SB)
   740		MOVBU	32(RSP), R3
   741		MOVB	R3, ret+16(FP)
   742		RET
   743	eq:
   744		MOVD	$1, R3
   745		MOVB	R3, ret+16(FP)
   746		RET
   747	
   748	TEXT runtime·cmpstring(SB),NOSPLIT,$-4-40
   749		MOVD	s1_base+0(FP), R2
   750		MOVD	s1_len+8(FP), R0
   751		MOVD	s2_base+16(FP), R3
   752		MOVD	s2_len+24(FP), R1
   753		ADD	$40, RSP, R7
   754		B	runtime·cmpbody<>(SB)
   755	
   756	TEXT bytes·Compare(SB),NOSPLIT,$-4-56
   757		MOVD	s1+0(FP), R2
   758		MOVD	s1+8(FP), R0
   759		MOVD	s2+24(FP), R3
   760		MOVD	s2+32(FP), R1
   761		ADD	$56, RSP, R7
   762		B	runtime·cmpbody<>(SB)
   763	
   764	// On entry:
   765	// R0 is the length of s1
   766	// R1 is the length of s2
   767	// R2 points to the start of s1
   768	// R3 points to the start of s2
   769	// R7 points to return value (-1/0/1 will be written here)
   770	//
   771	// On exit:
   772	// R4, R5, and R6 are clobbered
   773	TEXT runtime·cmpbody<>(SB),NOSPLIT,$-4-0
   774		CMP	R2, R3
   775		BEQ	samebytes // same starting pointers; compare lengths
   776		CMP	R0, R1
   777		CSEL    LT, R1, R0, R6 // R6 is min(R0, R1)
   778	
   779		ADD	R2, R6	// R2 is current byte in s1, R6 is last byte in s1 to compare
   780	loop:
   781		CMP	R2, R6
   782		BEQ	samebytes // all compared bytes were the same; compare lengths
   783		MOVBU.P	1(R2), R4
   784		MOVBU.P	1(R3), R5
   785		CMP	R4, R5
   786		BEQ	loop
   787		// bytes differed
   788		MOVD	$1, R4
   789		CSNEG	LT, R4, R4, R4
   790		MOVD	R4, (R7)
   791		RET
   792	samebytes:
   793		MOVD	$1, R4
   794		CMP	R0, R1
   795		CSNEG	LT, R4, R4, R4
   796		CSEL	EQ, ZR, R4, R4
   797		MOVD	R4, (R7)
   798		RET
   799	
   800	//
   801	// functions for other packages
   802	//
   803	TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
   804		MOVD	b+0(FP), R0
   805		MOVD	b_len+8(FP), R2
   806		MOVBU	c+24(FP), R1
   807		MOVD	$ret+32(FP), R8
   808		B	runtime·indexbytebody<>(SB)
   809	
   810	TEXT strings·IndexByte(SB),NOSPLIT,$0-32
   811		MOVD	s+0(FP), R0
   812		MOVD	s_len+8(FP), R2
   813		MOVBU	c+16(FP), R1
   814		MOVD	$ret+24(FP), R8
   815		B	runtime·indexbytebody<>(SB)
   816	
   817	// input:
   818	//   R0: data
   819	//   R1: byte to search
   820	//   R2: data len
   821	//   R8: address to put result
   822	TEXT runtime·indexbytebody<>(SB),NOSPLIT,$0
   823		// Core algorithm:
   824		// For each 32-byte chunk we calculate a 64-bit syndrome value,
   825		// with two bits per byte. For each tuple, bit 0 is set if the
   826		// relevant byte matched the requested character and bit 1 is
   827		// not used (faster than using a 32bit syndrome). Since the bits
   828		// in the syndrome reflect exactly the order in which things occur
   829		// in the original string, counting trailing zeros allows to
   830		// identify exactly which byte has matched.
   831	
   832		CBZ	R2, fail
   833		MOVD	R0, R11
   834		// Magic constant 0x40100401 allows us to identify
   835		// which lane matches the requested byte.
   836		// 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24))
   837		// Different bytes have different bit masks (i.e: 1, 4, 16, 64)
   838		MOVD	$0x40100401, R5
   839		VMOV	R1, V0.B16
   840		// Work with aligned 32-byte chunks
   841		BIC	$0x1f, R0, R3
   842		VMOV	R5, V5.S4
   843		ANDS	$0x1f, R0, R9
   844		AND	$0x1f, R2, R10
   845		BEQ	loop
   846	
   847		// Input string is not 32-byte aligned. We calculate the
   848		// syndrome value for the aligned 32 bytes block containing
   849		// the first bytes and mask off the irrelevant part.
   850		VLD1.P	(R3), [V1.B16, V2.B16]
   851		SUB	$0x20, R9, R4
   852		ADDS	R4, R2, R2
   853		VCMEQ	V0.B16, V1.B16, V3.B16
   854		VCMEQ	V0.B16, V2.B16, V4.B16
   855		VAND	V5.B16, V3.B16, V3.B16
   856		VAND	V5.B16, V4.B16, V4.B16
   857		VADDP	V4.B16, V3.B16, V6.B16 // 256->128
   858		VADDP	V6.B16, V6.B16, V6.B16 // 128->64
   859		VMOV	V6.D[0], R6
   860		// Clear the irrelevant lower bits
   861		LSL	$1, R9, R4
   862		LSR	R4, R6, R6
   863		LSL	R4, R6, R6
   864		// The first block can also be the last
   865		BLS	masklast
   866		// Have we found something already?
   867		CBNZ	R6, tail
   868	
   869	loop:
   870		VLD1.P	(R3), [V1.B16, V2.B16]
   871		SUBS	$0x20, R2, R2
   872		VCMEQ	V0.B16, V1.B16, V3.B16
   873		VCMEQ	V0.B16, V2.B16, V4.B16
   874		// If we're out of data we finish regardless of the result
   875		BLS	end
   876		// Use a fast check for the termination condition
   877		VORR	V4.B16, V3.B16, V6.B16
   878		VADDP	V6.D2, V6.D2, V6.D2
   879		VMOV	V6.D[0], R6
   880		// We're not out of data, loop if we haven't found the character
   881		CBZ	R6, loop
   882	
   883	end:
   884		// Termination condition found, let's calculate the syndrome value
   885		VAND	V5.B16, V3.B16, V3.B16
   886		VAND	V5.B16, V4.B16, V4.B16
   887		VADDP	V4.B16, V3.B16, V6.B16
   888		VADDP	V6.B16, V6.B16, V6.B16
   889		VMOV	V6.D[0], R6
   890		// Only do the clear for the last possible block with less than 32 bytes
   891		// Condition flags come from SUBS in the loop
   892		BHS	tail
   893	
   894	masklast:
   895		// Clear the irrelevant upper bits
   896		ADD	R9, R10, R4
   897		AND	$0x1f, R4, R4
   898		SUB	$0x20, R4, R4
   899		NEG	R4<<1, R4
   900		LSL	R4, R6, R6
   901		LSR	R4, R6, R6
   902	
   903	tail:
   904		// Check that we have found a character
   905		CBZ	R6, fail
   906		// Count the trailing zeros using bit reversing
   907		RBIT	R6, R6
   908		// Compensate the last post-increment
   909		SUB	$0x20, R3, R3
   910		// And count the leading zeros
   911		CLZ	R6, R6
   912		// R6 is twice the offset into the fragment
   913		ADD	R6>>1, R3, R0
   914		// Compute the offset result
   915		SUB	R11, R0, R0
   916		MOVD	R0, (R8)
   917		RET
   918	
   919	fail:
   920		MOVD	$-1, R0
   921		MOVD	R0, (R8)
   922		RET
   923	
   924	// Equal(a, b []byte) bool
   925	TEXT bytes·Equal(SB),NOSPLIT,$0-49
   926		MOVD	a_len+8(FP), R1
   927		MOVD	b_len+32(FP), R3
   928		CMP	R1, R3
   929		// unequal lengths are not equal
   930		BNE	not_equal
   931		// short path to handle 0-byte case
   932		CBZ	R1, equal
   933		MOVD	a+0(FP), R0
   934		MOVD	b+24(FP), R2
   935		MOVD	$ret+48(FP), R8
   936		B	runtime·memeqbody<>(SB)
   937	equal:
   938		MOVD	$1, R0
   939		MOVB	R0, ret+48(FP)
   940		RET
   941	not_equal:
   942		MOVB	ZR, ret+48(FP)
   943		RET
   944	
   945	// input:
   946	// R0: pointer a
   947	// R1: data len
   948	// R2: pointer b
   949	// R8: address to put result
   950	TEXT runtime·memeqbody<>(SB),NOSPLIT,$0
   951		CMP	$1, R1
   952		// handle 1-byte special case for better performance
   953		BEQ	one
   954		CMP	$16, R1
   955		// handle specially if length < 16
   956		BLO	tail
   957		BIC	$0x3f, R1, R3
   958		CBZ	R3, chunk16
   959		// work with 64-byte chunks
   960		ADD	R3, R0, R6	// end of chunks
   961	chunk64_loop:
   962		VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
   963		VLD1.P	(R2), [V4.D2, V5.D2, V6.D2, V7.D2]
   964		VCMEQ	V0.D2, V4.D2, V8.D2
   965		VCMEQ	V1.D2, V5.D2, V9.D2
   966		VCMEQ	V2.D2, V6.D2, V10.D2
   967		VCMEQ	V3.D2, V7.D2, V11.D2
   968		VAND	V8.B16, V9.B16, V8.B16
   969		VAND	V8.B16, V10.B16, V8.B16
   970		VAND	V8.B16, V11.B16, V8.B16
   971		CMP	R0, R6
   972		VMOV	V8.D[0], R4
   973		VMOV	V8.D[1], R5
   974		CBZ	R4, not_equal
   975		CBZ	R5, not_equal
   976		BNE	chunk64_loop
   977		AND	$0x3f, R1, R1
   978		CBZ	R1, equal
   979	chunk16:
   980		// work with 16-byte chunks
   981		BIC	$0xf, R1, R3
   982		CBZ	R3, tail
   983		ADD	R3, R0, R6	// end of chunks
   984	chunk16_loop:
   985		VLD1.P	(R0), [V0.D2]
   986		VLD1.P	(R2), [V1.D2]
   987		VCMEQ	V0.D2, V1.D2, V2.D2
   988		CMP	R0, R6
   989		VMOV	V2.D[0], R4
   990		VMOV	V2.D[1], R5
   991		CBZ	R4, not_equal
   992		CBZ	R5, not_equal
   993		BNE	chunk16_loop
   994		AND	$0xf, R1, R1
   995		CBZ	R1, equal
   996	tail:
   997		// special compare of tail with length < 16
   998		TBZ	$3, R1, lt_8
   999		MOVD.P	8(R0), R4
  1000		MOVD.P	8(R2), R5
  1001		CMP	R4, R5
  1002		BNE	not_equal
  1003	lt_8:
  1004		TBZ	$2, R1, lt_4
  1005		MOVWU.P	4(R0), R4
  1006		MOVWU.P	4(R2), R5
  1007		CMP	R4, R5
  1008		BNE	not_equal
  1009	lt_4:
  1010		TBZ	$1, R1, lt_2
  1011		MOVHU.P	2(R0), R4
  1012		MOVHU.P	2(R2), R5
  1013		CMP	R4, R5
  1014		BNE	not_equal
  1015	lt_2:
  1016		TBZ     $0, R1, equal
  1017	one:
  1018		MOVBU	(R0), R4
  1019		MOVBU	(R2), R5
  1020		CMP	R4, R5
  1021		BNE	not_equal
  1022	equal:
  1023		MOVD	$1, R0
  1024		MOVB	R0, (R8)
  1025		RET
  1026	not_equal:
  1027		MOVB	ZR, (R8)
  1028		RET
  1029	
  1030	TEXT runtime·return0(SB), NOSPLIT, $0
  1031		MOVW	$0, R0
  1032		RET
  1033	
  1034	// The top-most function running on a goroutine
  1035	// returns to goexit+PCQuantum.
  1036	TEXT runtime·goexit(SB),NOSPLIT,$-8-0
  1037		MOVD	R0, R0	// NOP
  1038		BL	runtime·goexit1(SB)	// does not return
  1039	
  1040	TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1041		RET
  1042	
  1043	// This is called from .init_array and follows the platform, not Go, ABI.
  1044	TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1045		SUB	$0x10, RSP
  1046		MOVD	R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
  1047		MOVD	runtime·lastmoduledatap(SB), R1
  1048		MOVD	R0, moduledata_next(R1)
  1049		MOVD	R0, runtime·lastmoduledatap(SB)
  1050		MOVD	8(RSP), R27
  1051		ADD	$0x10, RSP
  1052		RET
  1053	
  1054	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1055		MOVW	$1, R3
  1056		MOVB	R3, ret+0(FP)
  1057		RET

View as plain text