...
Run Format

Text file src/runtime/asm_ppc64x.s

Documentation: runtime

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ppc64 ppc64le
     6	
     7	#include "go_asm.h"
     8	#include "go_tls.h"
     9	#include "funcdata.h"
    10	#include "textflag.h"
    11	#include "asm_ppc64x.h"
    12	
    13	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    14		// R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer
    15	
    16		// initialize essential registers
    17		BL	runtime·reginit(SB)
    18	
    19		SUB	$(FIXED_FRAME+16), R1
    20		MOVD	R2, 24(R1)		// stash the TOC pointer away again now we've created a new frame
    21		MOVW	R3, FIXED_FRAME+0(R1)	// argc
    22		MOVD	R4, FIXED_FRAME+8(R1)	// argv
    23	
    24		// create istack out of the given (operating system) stack.
    25		// _cgo_init may update stackguard.
    26		MOVD	$runtime·g0(SB), g
    27		MOVD	$(-64*1024), R31
    28		ADD	R31, R1, R3
    29		MOVD	R3, g_stackguard0(g)
    30		MOVD	R3, g_stackguard1(g)
    31		MOVD	R3, (g_stack+stack_lo)(g)
    32		MOVD	R1, (g_stack+stack_hi)(g)
    33	
    34		// if there is a _cgo_init, call it using the gcc ABI.
    35		MOVD	_cgo_init(SB), R12
    36		CMP	R0, R12
    37		BEQ	nocgo
    38		MOVD	R12, CTR		// r12 = "global function entry point"
    39		MOVD	R13, R5			// arg 2: TLS base pointer
    40		MOVD	$setg_gcc<>(SB), R4 	// arg 1: setg
    41		MOVD	g, R3			// arg 0: G
    42		// C functions expect 32 bytes of space on caller stack frame
    43		// and a 16-byte aligned R1
    44		MOVD	R1, R14			// save current stack
    45		SUB	$32, R1			// reserve 32 bytes
    46		RLDCR	$0, R1, $~15, R1	// 16-byte align
    47		BL	(CTR)			// may clobber R0, R3-R12
    48		MOVD	R14, R1			// restore stack
    49		MOVD	24(R1), R2
    50		XOR	R0, R0			// fix R0
    51	
    52	nocgo:
    53		// update stackguard after _cgo_init
    54		MOVD	(g_stack+stack_lo)(g), R3
    55		ADD	$const__StackGuard, R3
    56		MOVD	R3, g_stackguard0(g)
    57		MOVD	R3, g_stackguard1(g)
    58	
    59		// set the per-goroutine and per-mach "registers"
    60		MOVD	$runtime·m0(SB), R3
    61	
    62		// save m->g0 = g0
    63		MOVD	g, m_g0(R3)
    64		// save m0 to g0->m
    65		MOVD	R3, g_m(g)
    66	
    67		BL	runtime·check(SB)
    68	
    69		// args are already prepared
    70		BL	runtime·args(SB)
    71		BL	runtime·osinit(SB)
    72		BL	runtime·schedinit(SB)
    73	
    74		// create a new goroutine to start program
    75		MOVD	$runtime·mainPC(SB), R3		// entry
    76		MOVDU	R3, -8(R1)
    77		MOVDU	R0, -8(R1)
    78		MOVDU	R0, -8(R1)
    79		MOVDU	R0, -8(R1)
    80		MOVDU	R0, -8(R1)
    81		MOVDU	R0, -8(R1)
    82		BL	runtime·newproc(SB)
    83		ADD	$(16+FIXED_FRAME), R1
    84	
    85		// start this M
    86		BL	runtime·mstart(SB)
    87	
    88		MOVD	R0, 0(R0)
    89		RET
    90	
    91	DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    92	GLOBL	runtime·mainPC(SB),RODATA,$8
    93	
    94	TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    95		MOVD	R0, 0(R0) // TODO: TD
    96		RET
    97	
    98	TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    99		RET
   100	
   101	TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0
   102		// crosscall_ppc64 and crosscall2 need to reginit, but can't
   103		// get at the 'runtime.reginit' symbol.
   104		BR	runtime·reginit(SB)
   105	
   106	TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0
   107		// set R0 to zero, it's expected by the toolchain
   108		XOR R0, R0
   109		RET
   110	
   111	/*
   112	 *  go-routine
   113	 */
   114	
   115	// void gosave(Gobuf*)
   116	// save state in Gobuf; setjmp
   117	TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
   118		MOVD	buf+0(FP), R3
   119		MOVD	R1, gobuf_sp(R3)
   120		MOVD	LR, R31
   121		MOVD	R31, gobuf_pc(R3)
   122		MOVD	g, gobuf_g(R3)
   123		MOVD	R0, gobuf_lr(R3)
   124		MOVD	R0, gobuf_ret(R3)
   125		// Assert ctxt is zero. See func save.
   126		MOVD	gobuf_ctxt(R3), R3
   127		CMP	R0, R3
   128		BEQ	2(PC)
   129		BL	runtime·badctxt(SB)
   130		RET
   131	
   132	// void gogo(Gobuf*)
   133	// restore state from Gobuf; longjmp
   134	TEXT runtime·gogo(SB), NOSPLIT, $16-8
   135		MOVD	buf+0(FP), R5
   136	
   137		// If ctxt is not nil, invoke deletion barrier before overwriting.
   138		MOVD	gobuf_ctxt(R5), R3
   139		CMP	R0, R3
   140		BEQ	nilctxt
   141		MOVD	$gobuf_ctxt(R5), R3
   142		MOVD	R3, FIXED_FRAME+0(R1)
   143		MOVD	R0, FIXED_FRAME+8(R1)
   144		BL	runtime·writebarrierptr_prewrite(SB)
   145		MOVD	buf+0(FP), R5
   146	
   147	nilctxt:
   148		MOVD	gobuf_g(R5), g	// make sure g is not nil
   149		BL	runtime·save_g(SB)
   150	
   151		MOVD	0(g), R4
   152		MOVD	gobuf_sp(R5), R1
   153		MOVD	gobuf_lr(R5), R31
   154		MOVD	R31, LR
   155		MOVD	gobuf_ret(R5), R3
   156		MOVD	gobuf_ctxt(R5), R11
   157		MOVD	R0, gobuf_sp(R5)
   158		MOVD	R0, gobuf_ret(R5)
   159		MOVD	R0, gobuf_lr(R5)
   160		MOVD	R0, gobuf_ctxt(R5)
   161		CMP	R0, R0 // set condition codes for == test, needed by stack split
   162		MOVD	gobuf_pc(R5), R12
   163		MOVD	R12, CTR
   164		BR	(CTR)
   165	
   166	// void mcall(fn func(*g))
   167	// Switch to m->g0's stack, call fn(g).
   168	// Fn must never return. It should gogo(&g->sched)
   169	// to keep running g.
   170	TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
   171		// Save caller state in g->sched
   172		MOVD	R1, (g_sched+gobuf_sp)(g)
   173		MOVD	LR, R31
   174		MOVD	R31, (g_sched+gobuf_pc)(g)
   175		MOVD	R0, (g_sched+gobuf_lr)(g)
   176		MOVD	g, (g_sched+gobuf_g)(g)
   177	
   178		// Switch to m->g0 & its stack, call fn.
   179		MOVD	g, R3
   180		MOVD	g_m(g), R8
   181		MOVD	m_g0(R8), g
   182		BL	runtime·save_g(SB)
   183		CMP	g, R3
   184		BNE	2(PC)
   185		BR	runtime·badmcall(SB)
   186		MOVD	fn+0(FP), R11			// context
   187		MOVD	0(R11), R12			// code pointer
   188		MOVD	R12, CTR
   189		MOVD	(g_sched+gobuf_sp)(g), R1	// sp = m->g0->sched.sp
   190		MOVDU	R3, -8(R1)
   191		MOVDU	R0, -8(R1)
   192		MOVDU	R0, -8(R1)
   193		MOVDU	R0, -8(R1)
   194		MOVDU	R0, -8(R1)
   195		BL	(CTR)
   196		MOVD	24(R1), R2
   197		BR	runtime·badmcall2(SB)
   198	
   199	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   200	// of the G stack. We need to distinguish the routine that
   201	// lives at the bottom of the G stack from the one that lives
   202	// at the top of the system stack because the one at the top of
   203	// the system stack terminates the stack walk (see topofstack()).
   204	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   205		// We have several undefs here so that 16 bytes past
   206		// $runtime·systemstack_switch lies within them whether or not the
   207	        // instructions that derive r2 from r12 are there.
   208		UNDEF
   209		UNDEF
   210		UNDEF
   211		BL	(LR)	// make sure this function is not leaf
   212		RET
   213	
   214	// func systemstack(fn func())
   215	TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   216		MOVD	fn+0(FP), R3	// R3 = fn
   217		MOVD	R3, R11		// context
   218		MOVD	g_m(g), R4	// R4 = m
   219	
   220		MOVD	m_gsignal(R4), R5	// R5 = gsignal
   221		CMP	g, R5
   222		BEQ	noswitch
   223	
   224		MOVD	m_g0(R4), R5	// R5 = g0
   225		CMP	g, R5
   226		BEQ	noswitch
   227	
   228		MOVD	m_curg(R4), R6
   229		CMP	g, R6
   230		BEQ	switch
   231	
   232		// Bad: g is not gsignal, not g0, not curg. What is it?
   233		// Hide call from linker nosplit analysis.
   234		MOVD	$runtime·badsystemstack(SB), R12
   235		MOVD	R12, CTR
   236		BL	(CTR)
   237	
   238	switch:
   239		// save our state in g->sched. Pretend to
   240		// be systemstack_switch if the G stack is scanned.
   241		MOVD	$runtime·systemstack_switch(SB), R6
   242		ADD     $16, R6 // get past prologue (including r2-setting instructions when they're there)
   243		MOVD	R6, (g_sched+gobuf_pc)(g)
   244		MOVD	R1, (g_sched+gobuf_sp)(g)
   245		MOVD	R0, (g_sched+gobuf_lr)(g)
   246		MOVD	g, (g_sched+gobuf_g)(g)
   247	
   248		// switch to g0
   249		MOVD	R5, g
   250		BL	runtime·save_g(SB)
   251		MOVD	(g_sched+gobuf_sp)(g), R3
   252		// make it look like mstart called systemstack on g0, to stop traceback
   253		SUB	$FIXED_FRAME, R3
   254		MOVD	$runtime·mstart(SB), R4
   255		MOVD	R4, 0(R3)
   256		MOVD	R3, R1
   257	
   258		// call target function
   259		MOVD	0(R11), R12	// code pointer
   260		MOVD	R12, CTR
   261		BL	(CTR)
   262	
   263		// restore TOC pointer. It seems unlikely that we will use systemstack
   264		// to call a function defined in another module, but the results of
   265		// doing so would be so confusing that it's worth doing this.
   266		MOVD	g_m(g), R3
   267		MOVD	m_curg(R3), g
   268		MOVD	(g_sched+gobuf_sp)(g), R3
   269		MOVD	24(R3), R2
   270		// switch back to g
   271		MOVD	g_m(g), R3
   272		MOVD	m_curg(R3), g
   273		BL	runtime·save_g(SB)
   274		MOVD	(g_sched+gobuf_sp)(g), R1
   275		MOVD	R0, (g_sched+gobuf_sp)(g)
   276		RET
   277	
   278	noswitch:
   279		// already on m stack, just call directly
   280		MOVD	0(R11), R12	// code pointer
   281		MOVD	R12, CTR
   282		BL	(CTR)
   283		MOVD	24(R1), R2
   284		RET
   285	
   286	/*
   287	 * support for morestack
   288	 */
   289	
   290	// Called during function prolog when more stack is needed.
   291	// Caller has already loaded:
   292	// R3: framesize, R4: argsize, R5: LR
   293	//
   294	// The traceback routines see morestack on a g0 as being
   295	// the top of a stack (for example, morestack calling newstack
   296	// calling the scheduler calling newm calling gc), so we must
   297	// record an argument size. For that purpose, it has no arguments.
   298	TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   299		// Cannot grow scheduler stack (m->g0).
   300		MOVD	g_m(g), R7
   301		MOVD	m_g0(R7), R8
   302		CMP	g, R8
   303		BNE	3(PC)
   304		BL	runtime·badmorestackg0(SB)
   305		BL	runtime·abort(SB)
   306	
   307		// Cannot grow signal stack (m->gsignal).
   308		MOVD	m_gsignal(R7), R8
   309		CMP	g, R8
   310		BNE	3(PC)
   311		BL	runtime·badmorestackgsignal(SB)
   312		BL	runtime·abort(SB)
   313	
   314		// Called from f.
   315		// Set g->sched to context in f.
   316		MOVD	R1, (g_sched+gobuf_sp)(g)
   317		MOVD	LR, R8
   318		MOVD	R8, (g_sched+gobuf_pc)(g)
   319		MOVD	R5, (g_sched+gobuf_lr)(g)
   320		// newstack will fill gobuf.ctxt.
   321	
   322		// Called from f.
   323		// Set m->morebuf to f's caller.
   324		MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   325		MOVD	R1, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   326		MOVD	g, (m_morebuf+gobuf_g)(R7)
   327	
   328		// Call newstack on m->g0's stack.
   329		MOVD	m_g0(R7), g
   330		BL	runtime·save_g(SB)
   331		MOVD	(g_sched+gobuf_sp)(g), R1
   332		MOVDU   R0, -(FIXED_FRAME+8)(R1)	// create a call frame on g0
   333		MOVD	R11, FIXED_FRAME+0(R1)	// ctxt argument
   334		BL	runtime·newstack(SB)
   335	
   336		// Not reached, but make sure the return PC from the call to newstack
   337		// is still in this function, and not the beginning of the next.
   338		UNDEF
   339	
   340	TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   341		MOVD	R0, R11
   342		BR	runtime·morestack(SB)
   343	
   344	// reflectcall: call a function with the given argument list
   345	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   346	// we don't have variable-sized frames, so we use a small number
   347	// of constant-sized-frame functions to encode a few bits of size in the pc.
   348	// Caution: ugly multiline assembly macros in your future!
   349	
   350	#define DISPATCH(NAME,MAXSIZE)		\
   351		MOVD	$MAXSIZE, R31;		\
   352		CMP	R3, R31;		\
   353		BGT	4(PC);			\
   354		MOVD	$NAME(SB), R12;		\
   355		MOVD	R12, CTR;		\
   356		BR	(CTR)
   357	// Note: can't just "BR NAME(SB)" - bad inlining results.
   358	
   359	TEXT reflect·call(SB), NOSPLIT, $0-0
   360		BR	·reflectcall(SB)
   361	
   362	TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
   363		MOVWZ argsize+24(FP), R3
   364		DISPATCH(runtime·call32, 32)
   365		DISPATCH(runtime·call64, 64)
   366		DISPATCH(runtime·call128, 128)
   367		DISPATCH(runtime·call256, 256)
   368		DISPATCH(runtime·call512, 512)
   369		DISPATCH(runtime·call1024, 1024)
   370		DISPATCH(runtime·call2048, 2048)
   371		DISPATCH(runtime·call4096, 4096)
   372		DISPATCH(runtime·call8192, 8192)
   373		DISPATCH(runtime·call16384, 16384)
   374		DISPATCH(runtime·call32768, 32768)
   375		DISPATCH(runtime·call65536, 65536)
   376		DISPATCH(runtime·call131072, 131072)
   377		DISPATCH(runtime·call262144, 262144)
   378		DISPATCH(runtime·call524288, 524288)
   379		DISPATCH(runtime·call1048576, 1048576)
   380		DISPATCH(runtime·call2097152, 2097152)
   381		DISPATCH(runtime·call4194304, 4194304)
   382		DISPATCH(runtime·call8388608, 8388608)
   383		DISPATCH(runtime·call16777216, 16777216)
   384		DISPATCH(runtime·call33554432, 33554432)
   385		DISPATCH(runtime·call67108864, 67108864)
   386		DISPATCH(runtime·call134217728, 134217728)
   387		DISPATCH(runtime·call268435456, 268435456)
   388		DISPATCH(runtime·call536870912, 536870912)
   389		DISPATCH(runtime·call1073741824, 1073741824)
   390		MOVD	$runtime·badreflectcall(SB), R12
   391		MOVD	R12, CTR
   392		BR	(CTR)
   393	
   394	#define CALLFN(NAME,MAXSIZE)			\
   395	TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   396		NO_LOCAL_POINTERS;			\
   397		/* copy arguments to stack */		\
   398		MOVD	arg+16(FP), R3;			\
   399		MOVWZ	argsize+24(FP), R4;			\
   400		MOVD	R1, R5;				\
   401		ADD	$(FIXED_FRAME-1), R5;			\
   402		SUB	$1, R3;				\
   403		ADD	R5, R4;				\
   404		CMP	R5, R4;				\
   405		BEQ	4(PC);				\
   406		MOVBZU	1(R3), R6;			\
   407		MOVBZU	R6, 1(R5);			\
   408		BR	-4(PC);				\
   409		/* call function */			\
   410		MOVD	f+8(FP), R11;			\
   411		MOVD	(R11), R12;			\
   412		MOVD	R12, CTR;			\
   413		PCDATA  $PCDATA_StackMapIndex, $0;	\
   414		BL	(CTR);				\
   415		MOVD	24(R1), R2;			\
   416		/* copy return values back */		\
   417		MOVD	argtype+0(FP), R7;		\
   418		MOVD	arg+16(FP), R3;			\
   419		MOVWZ	n+24(FP), R4;			\
   420		MOVWZ	retoffset+28(FP), R6;		\
   421		ADD	$FIXED_FRAME, R1, R5;		\
   422		ADD	R6, R5; 			\
   423		ADD	R6, R3;				\
   424		SUB	R6, R4;				\
   425		BL	callRet<>(SB);			\
   426		RET
   427	
   428	// callRet copies return values back at the end of call*. This is a
   429	// separate function so it can allocate stack space for the arguments
   430	// to reflectcallmove. It does not follow the Go ABI; it expects its
   431	// arguments in registers.
   432	TEXT callRet<>(SB), NOSPLIT, $32-0
   433		MOVD	R7, FIXED_FRAME+0(R1)
   434		MOVD	R3, FIXED_FRAME+8(R1)
   435		MOVD	R5, FIXED_FRAME+16(R1)
   436		MOVD	R4, FIXED_FRAME+24(R1)
   437		BL	runtime·reflectcallmove(SB)
   438		RET
   439	
   440	CALLFN(·call32, 32)
   441	CALLFN(·call64, 64)
   442	CALLFN(·call128, 128)
   443	CALLFN(·call256, 256)
   444	CALLFN(·call512, 512)
   445	CALLFN(·call1024, 1024)
   446	CALLFN(·call2048, 2048)
   447	CALLFN(·call4096, 4096)
   448	CALLFN(·call8192, 8192)
   449	CALLFN(·call16384, 16384)
   450	CALLFN(·call32768, 32768)
   451	CALLFN(·call65536, 65536)
   452	CALLFN(·call131072, 131072)
   453	CALLFN(·call262144, 262144)
   454	CALLFN(·call524288, 524288)
   455	CALLFN(·call1048576, 1048576)
   456	CALLFN(·call2097152, 2097152)
   457	CALLFN(·call4194304, 4194304)
   458	CALLFN(·call8388608, 8388608)
   459	CALLFN(·call16777216, 16777216)
   460	CALLFN(·call33554432, 33554432)
   461	CALLFN(·call67108864, 67108864)
   462	CALLFN(·call134217728, 134217728)
   463	CALLFN(·call268435456, 268435456)
   464	CALLFN(·call536870912, 536870912)
   465	CALLFN(·call1073741824, 1073741824)
   466	
   467	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   468		RET
   469	
   470	// void jmpdefer(fv, sp);
   471	// called from deferreturn.
   472	// 1. grab stored LR for caller
   473	// 2. sub 8 bytes to get back to either nop or toc reload before deferreturn
   474	// 3. BR to fn
   475	// When dynamically linking Go, it is not sufficient to rewind to the BL
   476	// deferreturn -- we might be jumping between modules and so we need to reset
   477	// the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before*
   478	// the BL deferreturn and jmpdefer rewinds to that.
   479	TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
   480		MOVD	0(R1), R31
   481		SUB     $8, R31
   482		MOVD	R31, LR
   483	
   484		MOVD	fv+0(FP), R11
   485		MOVD	argp+8(FP), R1
   486		SUB	$FIXED_FRAME, R1
   487		MOVD	0(R11), R12
   488		MOVD	R12, CTR
   489		BR	(CTR)
   490	
   491	// Save state of caller into g->sched. Smashes R31.
   492	TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   493		MOVD	LR, R31
   494		MOVD	R31, (g_sched+gobuf_pc)(g)
   495		MOVD	R1, (g_sched+gobuf_sp)(g)
   496		MOVD	R0, (g_sched+gobuf_lr)(g)
   497		MOVD	R0, (g_sched+gobuf_ret)(g)
   498		// Assert ctxt is zero. See func save.
   499		MOVD	(g_sched+gobuf_ctxt)(g), R31
   500		CMP	R0, R31
   501		BEQ	2(PC)
   502		BL	runtime·badctxt(SB)
   503		RET
   504	
   505	// func asmcgocall(fn, arg unsafe.Pointer) int32
   506	// Call fn(arg) on the scheduler stack,
   507	// aligned appropriately for the gcc ABI.
   508	// See cgocall.go for more details.
   509	TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   510		MOVD	fn+0(FP), R3
   511		MOVD	arg+8(FP), R4
   512	
   513		MOVD	R1, R7		// save original stack pointer
   514		MOVD	g, R5
   515	
   516		// Figure out if we need to switch to m->g0 stack.
   517		// We get called to create new OS threads too, and those
   518		// come in on the m->g0 stack already.
   519		MOVD	g_m(g), R6
   520		MOVD	m_g0(R6), R6
   521		CMP	R6, g
   522		BEQ	g0
   523		BL	gosave<>(SB)
   524		MOVD	R6, g
   525		BL	runtime·save_g(SB)
   526		MOVD	(g_sched+gobuf_sp)(g), R1
   527	
   528		// Now on a scheduling stack (a pthread-created stack).
   529	g0:
   530		// Save room for two of our pointers, plus 32 bytes of callee
   531		// save area that lives on the caller stack.
   532		SUB	$48, R1
   533		RLDCR	$0, R1, $~15, R1	// 16-byte alignment for gcc ABI
   534		MOVD	R5, 40(R1)	// save old g on stack
   535		MOVD	(g_stack+stack_hi)(R5), R5
   536		SUB	R7, R5
   537		MOVD	R5, 32(R1)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   538		MOVD	R0, 0(R1)	// clear back chain pointer (TODO can we give it real back trace information?)
   539		// This is a "global call", so put the global entry point in r12
   540		MOVD	R3, R12
   541		MOVD	R12, CTR
   542		MOVD	R4, R3		// arg in r3
   543		BL	(CTR)
   544	
   545		// C code can clobber R0, so set it back to 0.  F27-F31 are
   546		// callee save, so we don't need to recover those.
   547		XOR	R0, R0
   548		// Restore g, stack pointer, toc pointer.
   549		// R3 is errno, so don't touch it
   550		MOVD	40(R1), g
   551		MOVD    (g_stack+stack_hi)(g), R5
   552		MOVD    32(R1), R6
   553		SUB     R6, R5
   554		MOVD    24(R5), R2
   555		BL	runtime·save_g(SB)
   556		MOVD	(g_stack+stack_hi)(g), R5
   557		MOVD	32(R1), R6
   558		SUB	R6, R5
   559		MOVD	R5, R1
   560	
   561		MOVW	R3, ret+16(FP)
   562		RET
   563	
   564	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   565	// Turn the fn into a Go func (by taking its address) and call
   566	// cgocallback_gofunc.
   567	TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   568		MOVD	$fn+0(FP), R3
   569		MOVD	R3, FIXED_FRAME+0(R1)
   570		MOVD	frame+8(FP), R3
   571		MOVD	R3, FIXED_FRAME+8(R1)
   572		MOVD	framesize+16(FP), R3
   573		MOVD	R3, FIXED_FRAME+16(R1)
   574		MOVD	ctxt+24(FP), R3
   575		MOVD	R3, FIXED_FRAME+24(R1)
   576		MOVD	$runtime·cgocallback_gofunc(SB), R12
   577		MOVD	R12, CTR
   578		BL	(CTR)
   579		RET
   580	
   581	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   582	// See cgocall.go for more details.
   583	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   584		NO_LOCAL_POINTERS
   585	
   586		// Load m and g from thread-local storage.
   587		MOVB	runtime·iscgo(SB), R3
   588		CMP	R3, $0
   589		BEQ	nocgo
   590		BL	runtime·load_g(SB)
   591	nocgo:
   592	
   593		// If g is nil, Go did not create the current thread.
   594		// Call needm to obtain one for temporary use.
   595		// In this case, we're running on the thread stack, so there's
   596		// lots of space, but the linker doesn't know. Hide the call from
   597		// the linker analysis by using an indirect call.
   598		CMP	g, $0
   599		BEQ	needm
   600	
   601		MOVD	g_m(g), R8
   602		MOVD	R8, savedm-8(SP)
   603		BR	havem
   604	
   605	needm:
   606		MOVD	g, savedm-8(SP) // g is zero, so is m.
   607		MOVD	$runtime·needm(SB), R12
   608		MOVD	R12, CTR
   609		BL	(CTR)
   610	
   611		// Set m->sched.sp = SP, so that if a panic happens
   612		// during the function we are about to execute, it will
   613		// have a valid SP to run on the g0 stack.
   614		// The next few lines (after the havem label)
   615		// will save this SP onto the stack and then write
   616		// the same SP back to m->sched.sp. That seems redundant,
   617		// but if an unrecovered panic happens, unwindm will
   618		// restore the g->sched.sp from the stack location
   619		// and then systemstack will try to use it. If we don't set it here,
   620		// that restored SP will be uninitialized (typically 0) and
   621		// will not be usable.
   622		MOVD	g_m(g), R8
   623		MOVD	m_g0(R8), R3
   624		MOVD	R1, (g_sched+gobuf_sp)(R3)
   625	
   626	havem:
   627		// Now there's a valid m, and we're running on its m->g0.
   628		// Save current m->g0->sched.sp on stack and then set it to SP.
   629		// Save current sp in m->g0->sched.sp in preparation for
   630		// switch back to m->curg stack.
   631		// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   632		MOVD	m_g0(R8), R3
   633		MOVD	(g_sched+gobuf_sp)(R3), R4
   634		MOVD	R4, savedsp-16(SP)
   635		MOVD	R1, (g_sched+gobuf_sp)(R3)
   636	
   637		// Switch to m->curg stack and call runtime.cgocallbackg.
   638		// Because we are taking over the execution of m->curg
   639		// but *not* resuming what had been running, we need to
   640		// save that information (m->curg->sched) so we can restore it.
   641		// We can restore m->curg->sched.sp easily, because calling
   642		// runtime.cgocallbackg leaves SP unchanged upon return.
   643		// To save m->curg->sched.pc, we push it onto the stack.
   644		// This has the added benefit that it looks to the traceback
   645		// routine like cgocallbackg is going to return to that
   646		// PC (because the frame we allocate below has the same
   647		// size as cgocallback_gofunc's frame declared above)
   648		// so that the traceback will seamlessly trace back into
   649		// the earlier calls.
   650		//
   651		// In the new goroutine, -8(SP) is unused (where SP refers to
   652		// m->curg's SP while we're setting it up, before we've adjusted it).
   653		MOVD	m_curg(R8), g
   654		BL	runtime·save_g(SB)
   655		MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   656		MOVD	(g_sched+gobuf_pc)(g), R5
   657		MOVD	R5, -(FIXED_FRAME+16)(R4)
   658		MOVD	ctxt+24(FP), R3
   659		MOVD	R3, -16(R4)
   660		MOVD	$-(FIXED_FRAME+16)(R4), R1
   661		BL	runtime·cgocallbackg(SB)
   662	
   663		// Restore g->sched (== m->curg->sched) from saved values.
   664		MOVD	0(R1), R5
   665		MOVD	R5, (g_sched+gobuf_pc)(g)
   666		MOVD	$(FIXED_FRAME+16)(R1), R4
   667		MOVD	R4, (g_sched+gobuf_sp)(g)
   668	
   669		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   670		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   671		// so we do not have to restore it.)
   672		MOVD	g_m(g), R8
   673		MOVD	m_g0(R8), g
   674		BL	runtime·save_g(SB)
   675		MOVD	(g_sched+gobuf_sp)(g), R1
   676		MOVD	savedsp-16(SP), R4
   677		MOVD	R4, (g_sched+gobuf_sp)(g)
   678	
   679		// If the m on entry was nil, we called needm above to borrow an m
   680		// for the duration of the call. Since the call is over, return it with dropm.
   681		MOVD	savedm-8(SP), R6
   682		CMP	R6, $0
   683		BNE	droppedm
   684		MOVD	$runtime·dropm(SB), R12
   685		MOVD	R12, CTR
   686		BL	(CTR)
   687	droppedm:
   688	
   689		// Done!
   690		RET
   691	
   692	// void setg(G*); set g. for use by needm.
   693	TEXT runtime·setg(SB), NOSPLIT, $0-8
   694		MOVD	gg+0(FP), g
   695		// This only happens if iscgo, so jump straight to save_g
   696		BL	runtime·save_g(SB)
   697		RET
   698	
   699	// void setg_gcc(G*); set g in C TLS.
   700	// Must obey the gcc calling convention.
   701	TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   702		// The standard prologue clobbers R31, which is callee-save in
   703		// the C ABI, so we have to use $-8-0 and save LR ourselves.
   704		MOVD	LR, R4
   705		// Also save g and R31, since they're callee-save in C ABI
   706		MOVD	R31, R5
   707		MOVD	g, R6
   708	
   709		MOVD	R3, g
   710		BL	runtime·save_g(SB)
   711	
   712		MOVD	R6, g
   713		MOVD	R5, R31
   714		MOVD	R4, LR
   715		RET
   716	
   717	TEXT runtime·getcallerpc(SB),NOSPLIT,$8-16
   718		MOVD	FIXED_FRAME+8(R1), R3		// LR saved by caller
   719		MOVD	R3, ret+8(FP)
   720		RET
   721	
   722	TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   723		MOVW	(R0), R0
   724		UNDEF
   725	
   726	#define	TBRL	268
   727	#define	TBRU	269		/* Time base Upper/Lower */
   728	
   729	// int64 runtime·cputicks(void)
   730	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   731		MOVW	SPR(TBRU), R4
   732		MOVW	SPR(TBRL), R3
   733		MOVW	SPR(TBRU), R5
   734		CMPW	R4, R5
   735		BNE	-4(PC)
   736		SLD	$32, R5
   737		OR	R5, R3
   738		MOVD	R3, ret+0(FP)
   739		RET
   740	
   741	// memhash_varlen(p unsafe.Pointer, h seed) uintptr
   742	// redirects to memhash(p, h, size) using the size
   743	// stored in the closure.
   744	TEXT runtime·memhash_varlen(SB),NOSPLIT,$40-24
   745		GO_ARGS
   746		NO_LOCAL_POINTERS
   747		MOVD	p+0(FP), R3
   748		MOVD	h+8(FP), R4
   749		MOVD	8(R11), R5
   750		MOVD	R3, FIXED_FRAME+0(R1)
   751		MOVD	R4, FIXED_FRAME+8(R1)
   752		MOVD	R5, FIXED_FRAME+16(R1)
   753		BL	runtime·memhash(SB)
   754		MOVD	FIXED_FRAME+24(R1), R3
   755		MOVD	R3, ret+16(FP)
   756		RET
   757	
   758	// AES hashing not implemented for ppc64
   759	TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   760		MOVW	(R0), R1
   761	TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   762		MOVW	(R0), R1
   763	TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   764		MOVW	(R0), R1
   765	TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   766		MOVW	(R0), R1
   767	
   768	TEXT runtime·memequal(SB),NOSPLIT,$0-25
   769		MOVD    a+0(FP), R3
   770		MOVD    b+8(FP), R4
   771		MOVD    size+16(FP), R5
   772	
   773		BL	runtime·memeqbody(SB)
   774		MOVB    R9, ret+24(FP)
   775		RET
   776	
   777	// memequal_varlen(a, b unsafe.Pointer) bool
   778	TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   779		MOVD	a+0(FP), R3
   780		MOVD	b+8(FP), R4
   781		CMP	R3, R4
   782		BEQ	eq
   783		MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
   784		BL	runtime·memeqbody(SB)
   785		MOVB	R9, ret+16(FP)
   786		RET
   787	eq:
   788		MOVD	$1, R3
   789		MOVB	R3, ret+16(FP)
   790		RET
   791	
   792	// Do an efficient memcmp for ppc64le
   793	// R3 = s1 len
   794	// R4 = s2 len
   795	// R5 = s1 addr
   796	// R6 = s2 addr
   797	// R7 = addr of return value
   798	TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
   799		MOVD	R3,R8		// set up length
   800		CMP	R3,R4,CR2	// unequal?
   801		BC	12,8,setuplen	// BLT CR2
   802		MOVD	R4,R8		// use R4 for comparison len
   803	setuplen:
   804		MOVD	R8,CTR		// set up loop counter
   805		CMP	R8,$8		// only optimize >=8
   806		BLT	simplecheck
   807		DCBT	(R5)		// cache hint
   808		DCBT	(R6)
   809		CMP	R8,$32		// optimize >= 32
   810		MOVD	R8,R9
   811		BLT	setup8a		// 8 byte moves only
   812	setup32a:
   813		SRADCC	$5,R8,R9	// number of 32 byte chunks
   814		MOVD	R9,CTR
   815	
   816	        // Special processing for 32 bytes or longer.
   817	        // Loading this way is faster and correct as long as the
   818		// doublewords being compared are equal. Once they
   819		// are found unequal, reload them in proper byte order
   820		// to determine greater or less than.
   821	loop32a:
   822		MOVD	0(R5),R9	// doublewords to compare
   823		MOVD	0(R6),R10	// get 4 doublewords
   824		MOVD	8(R5),R14
   825		MOVD	8(R6),R15
   826		CMPU	R9,R10		// bytes equal?
   827		MOVD	$0,R16		// set up for cmpne
   828		BNE	cmpne		// further compare for LT or GT
   829		MOVD	16(R5),R9	// get next pair of doublewords
   830		MOVD	16(R6),R10
   831		CMPU	R14,R15		// bytes match?
   832		MOVD	$8,R16		// set up for cmpne
   833		BNE	cmpne		// further compare for LT or GT
   834		MOVD	24(R5),R14	// get next pair of doublewords
   835		MOVD    24(R6),R15
   836		CMPU	R9,R10		// bytes match?
   837		MOVD	$16,R16		// set up for cmpne
   838		BNE	cmpne		// further compare for LT or GT
   839		MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   840		ADD	$32,R5		// bump up to next 32
   841		ADD	$32,R6
   842		CMPU    R14,R15		// bytes match?
   843		BC	8,2,loop32a	// br ctr and cr
   844		BNE	cmpne
   845		ANDCC	$24,R8,R9	// Any 8 byte chunks?
   846		BEQ	leftover	// and result is 0
   847	setup8a:
   848		SRADCC	$3,R9,R9	// get the 8 byte count
   849		BEQ	leftover	// shifted value is 0
   850		MOVD	R9,CTR		// loop count for doublewords
   851	loop8:
   852		MOVDBR	(R5+R0),R9	// doublewords to compare
   853		MOVDBR	(R6+R0),R10	// LE compare order
   854		ADD	$8,R5
   855		ADD	$8,R6
   856		CMPU	R9,R10		// match?
   857		BC	8,2,loop8	// bt ctr <> 0 && cr
   858		BGT	greater
   859		BLT	less
   860	leftover:
   861		ANDCC	$7,R8,R9	// check for leftover bytes
   862		MOVD	R9,CTR		// save the ctr
   863		BNE	simple		// leftover bytes
   864		BC	12,10,equal	// test CR2 for length comparison
   865		BC	12,8,less
   866		BR	greater
   867	simplecheck:
   868		CMP	R8,$0		// remaining compare length 0
   869		BNE	simple		// do simple compare
   870		BC	12,10,equal	// test CR2 for length comparison
   871		BC	12,8,less	// 1st len < 2nd len, result less
   872		BR	greater		// 1st len > 2nd len must be greater
   873	simple:
   874		MOVBZ	0(R5), R9	// get byte from 1st operand
   875		ADD	$1,R5
   876		MOVBZ	0(R6), R10	// get byte from 2nd operand
   877		ADD	$1,R6
   878		CMPU	R9, R10
   879		BC	8,2,simple	// bc ctr <> 0 && cr
   880		BGT	greater		// 1st > 2nd
   881		BLT	less		// 1st < 2nd
   882		BC	12,10,equal	// test CR2 for length comparison
   883		BC	12,9,greater	// 2nd len > 1st len
   884		BR	less		// must be less
   885	cmpne:				// only here is not equal
   886		MOVDBR	(R5+R16),R8	// reload in reverse order
   887		MOVDBR	(R6+R16),R9
   888		CMPU	R8,R9		// compare correct endianness
   889		BGT	greater		// here only if NE
   890	less:
   891		MOVD	$-1,R3
   892		MOVD	R3,(R7)		// return value if A < B
   893		RET
   894	equal:
   895		MOVD	$0,(R7)		// return value if A == B
   896		RET
   897	greater:
   898		MOVD	$1,R3
   899		MOVD	R3,(R7)		// return value if A > B
   900		RET
   901	
   902	// Do an efficient memcmp for ppc64 (BE)
   903	// R3 = s1 len
   904	// R4 = s2 len
   905	// R5 = s1 addr
   906	// R6 = s2 addr
   907	// R7 = addr of return value
   908	TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   909		MOVD	R3,R8		// set up length
   910		CMP	R3,R4,CR2	// unequal?
   911		BC	12,8,setuplen	// BLT CR2
   912		MOVD	R4,R8		// use R4 for comparison len
   913	setuplen:
   914		MOVD	R8,CTR		// set up loop counter
   915		CMP	R8,$8		// only optimize >=8
   916		BLT	simplecheck
   917		DCBT	(R5)		// cache hint
   918		DCBT	(R6)
   919		CMP	R8,$32		// optimize >= 32
   920		MOVD	R8,R9
   921		BLT	setup8a		// 8 byte moves only
   922	
   923	setup32a:
   924		SRADCC	$5,R8,R9	// number of 32 byte chunks
   925		MOVD	R9,CTR
   926	loop32a:
   927		MOVD	0(R5),R9	// doublewords to compare
   928		MOVD	0(R6),R10	// get 4 doublewords
   929		MOVD	8(R5),R14
   930		MOVD	8(R6),R15
   931		CMPU	R9,R10		// bytes equal?
   932		BLT	less		// found to be less
   933		BGT	greater		// found to be greater
   934		MOVD	16(R5),R9	// get next pair of doublewords
   935		MOVD	16(R6),R10
   936		CMPU	R14,R15		// bytes match?
   937		BLT	less		// found less
   938		BGT	greater		// found greater
   939		MOVD	24(R5),R14	// get next pair of doublewords
   940		MOVD	24(R6),R15
   941		CMPU	R9,R10		// bytes match?
   942		BLT	less		// found to be less
   943		BGT	greater		// found to be greater
   944		ADD	$32,R5		// bump up to next 32
   945		ADD	$32,R6
   946		CMPU	R14,R15		// bytes match?
   947		BC	8,2,loop32a	// br ctr and cr
   948		BLT	less		// with BE, byte ordering is
   949		BGT	greater		// good for compare
   950		ANDCC	$24,R8,R9	// Any 8 byte chunks?
   951		BEQ	leftover	// and result is 0
   952	setup8a:
   953		SRADCC	$3,R9,R9	// get the 8 byte count
   954		BEQ	leftover	// shifted value is 0
   955		MOVD	R9,CTR		// loop count for doublewords
   956	loop8:
   957		MOVD	(R5),R9
   958		MOVD	(R6),R10
   959		ADD	$8,R5
   960		ADD	$8,R6
   961		CMPU	R9,R10		// match?
   962		BC	8,2,loop8	// bt ctr <> 0 && cr
   963		BGT	greater
   964		BLT	less
   965	leftover:
   966		ANDCC	$7,R8,R9	// check for leftover bytes
   967		MOVD	R9,CTR		// save the ctr
   968		BNE	simple		// leftover bytes
   969		BC	12,10,equal	// test CR2 for length comparison
   970		BC	12,8,less
   971		BR	greater
   972	simplecheck:
   973		CMP	R8,$0		// remaining compare length 0
   974		BNE	simple		// do simple compare
   975		BC	12,10,equal	// test CR2 for length comparison
   976		BC 	12,8,less	// 1st len < 2nd len, result less
   977		BR	greater		// same len, must be equal
   978	simple:
   979		MOVBZ	0(R5),R9	// get byte from 1st operand
   980		ADD	$1,R5
   981		MOVBZ	0(R6),R10	// get byte from 2nd operand
   982		ADD	$1,R6
   983		CMPU	R9,R10
   984		BC	8,2,simple	// bc ctr <> 0 && cr
   985		BGT	greater		// 1st > 2nd
   986		BLT	less		// 1st < 2nd
   987		BC	12,10,equal	// test CR2 for length comparison
   988		BC	12,9,greater	// 2nd len > 1st len
   989	less:
   990		MOVD	$-1,R3
   991		MOVD    R3,(R7)		// return value if A < B
   992		RET
   993	equal:
   994		MOVD    $0,(R7)		// return value if A == B
   995		RET
   996	greater:
   997		MOVD	$1,R3
   998		MOVD	R3,(R7)		// return value if A > B
   999		RET
  1000	
  1001	// Do an efficient memequal for ppc64
  1002	// R3 = s1
  1003	// R4 = s2
  1004	// R5 = len
  1005	// R9 = return value
  1006	TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
  1007		MOVD    R5,CTR
  1008		CMP     R5,$8		// only optimize >=8
  1009		BLT     simplecheck
  1010		DCBT	(R3)		// cache hint
  1011		DCBT	(R4)
  1012		CMP	R5,$32		// optimize >= 32
  1013		MOVD	R5,R6		// needed if setup8a branch
  1014		BLT	setup8a		// 8 byte moves only
  1015	setup32a:                       // 8 byte aligned, >= 32 bytes
  1016		SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
  1017		MOVD	R6,CTR
  1018	loop32a:
  1019		MOVD    0(R3),R6        // doublewords to compare
  1020		MOVD    0(R4),R7
  1021		MOVD	8(R3),R8	//
  1022		MOVD	8(R4),R9
  1023		CMP     R6,R7           // bytes batch?
  1024		BNE     noteq
  1025		MOVD	16(R3),R6
  1026		MOVD	16(R4),R7
  1027		CMP     R8,R9		// bytes match?
  1028		MOVD	24(R3),R8
  1029		MOVD	24(R4),R9
  1030		BNE     noteq
  1031		CMP     R6,R7           // bytes match?
  1032		BNE	noteq
  1033		ADD     $32,R3		// bump up to next 32
  1034		ADD     $32,R4
  1035		CMP     R8,R9           // bytes match?
  1036		BC      8,2,loop32a	// br ctr and cr
  1037		BNE	noteq
  1038		ANDCC	$24,R5,R6       // Any 8 byte chunks?
  1039		BEQ	leftover	// and result is 0
  1040	setup8a:
  1041		SRADCC  $3,R6,R6        // get the 8 byte count
  1042		BEQ	leftover	// shifted value is 0
  1043		MOVD    R6,CTR
  1044	loop8:
  1045		MOVD    0(R3),R6        // doublewords to compare
  1046		ADD	$8,R3
  1047		MOVD    0(R4),R7
  1048		ADD     $8,R4
  1049		CMP     R6,R7           // match?
  1050		BC	8,2,loop8	// bt ctr <> 0 && cr
  1051		BNE     noteq
  1052	leftover:
  1053		ANDCC   $7,R5,R6        // check for leftover bytes
  1054		BEQ     equal
  1055		MOVD    R6,CTR
  1056		BR	simple
  1057	simplecheck:
  1058		CMP	R5,$0
  1059		BEQ	equal
  1060	simple:
  1061		MOVBZ   0(R3), R6
  1062		ADD	$1,R3
  1063		MOVBZ   0(R4), R7
  1064		ADD     $1,R4
  1065		CMP     R6, R7
  1066		BNE     noteq
  1067		BC      8,2,simple
  1068		BNE	noteq
  1069		BR	equal
  1070	noteq:
  1071		MOVD    $0, R9
  1072		RET
  1073	equal:
  1074		MOVD    $1, R9
  1075		RET
  1076	
  1077	// eqstring tests whether two strings are equal.
  1078	// The compiler guarantees that strings passed
  1079	// to eqstring have equal length.
  1080	// See runtime_test.go:eqstring_generic for
  1081	// equivalent Go code.
  1082	TEXT runtime·eqstring(SB),NOSPLIT,$0-33
  1083		MOVD    s1_base+0(FP), R3
  1084		MOVD    s2_base+16(FP), R4
  1085		MOVD    $1, R5
  1086		MOVB    R5, ret+32(FP)
  1087		CMP     R3, R4
  1088		BNE     2(PC)
  1089		RET
  1090		MOVD    s1_len+8(FP), R5
  1091		BL      runtime·memeqbody(SB)
  1092		MOVB    R9, ret+32(FP)
  1093		RET
  1094	
  1095	TEXT bytes·Equal(SB),NOSPLIT,$0-49
  1096		MOVD	a_len+8(FP), R4
  1097		MOVD	b_len+32(FP), R5
  1098		CMP	R5, R4		// unequal lengths are not equal
  1099		BNE	noteq
  1100		MOVD	a+0(FP), R3
  1101		MOVD	b+24(FP), R4
  1102		BL	runtime·memeqbody(SB)
  1103	
  1104		MOVBZ	R9,ret+48(FP)
  1105		RET
  1106	
  1107	noteq:
  1108		MOVBZ	$0,ret+48(FP)
  1109		RET
  1110	
  1111	equal:
  1112		MOVD	$1,R3
  1113		MOVBZ	R3,ret+48(FP)
  1114		RET
  1115	
  1116	TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
  1117		MOVD	s+0(FP), R3		// R3 = byte array pointer
  1118		MOVD	s_len+8(FP), R4		// R4 = length
  1119		MOVBZ	c+24(FP), R5		// R5 = byte
  1120		MOVD	$ret+32(FP), R14	// R14 = &ret
  1121		BR	runtime·indexbytebody<>(SB)
  1122	
  1123	TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
  1124		MOVD	s+0(FP), R3	  // R3 = string
  1125		MOVD	s_len+8(FP), R4	  // R4 = length
  1126		MOVBZ	c+16(FP), R5	  // R5 = byte
  1127		MOVD	$ret+24(FP), R14  // R14 = &ret
  1128		BR	runtime·indexbytebody<>(SB)
  1129	
  1130	TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
  1131		DCBT	(R3)		// Prepare cache line.
  1132		MOVD	R3,R10		// Save base address for calculating the index later.
  1133		RLDICR	$0,R3,$60,R8	// Align address to doubleword boundary in R8.
  1134		RLDIMI	$8,R5,$48,R5	// Replicating the byte across the register.
  1135	
  1136		// Calculate last acceptable address and check for possible overflow
  1137		// using a saturated add.
  1138		// Overflows set last acceptable address to 0xffffffffffffffff.
  1139		ADD	R4,R3,R7
  1140		SUBC	R3,R7,R6
  1141		SUBE	R0,R0,R9
  1142		MOVW	R9,R6
  1143		OR	R6,R7,R7
  1144	
  1145		RLDIMI	$16,R5,$32,R5
  1146		CMPU	R4,$32		// Check if it's a small string (<32 bytes). Those will be processed differently.
  1147		MOVD	$-1,R9
  1148		WORD $0x54661EB8	// Calculate padding in R6 (rlwinm r6,r3,3,26,28).
  1149		RLDIMI	$32,R5,$0,R5
  1150		ADD	$-1,R7,R7
  1151	#ifdef GOARCH_ppc64le
  1152		SLD	R6,R9,R9	// Prepare mask for Little Endian
  1153	#else
  1154		SRD	R6,R9,R9	// Same for Big Endian
  1155	#endif
  1156		BLE	small_string	// Jump to the small string case if it's <32 bytes.
  1157	
  1158		// Case for length >32 bytes
  1159		MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1160		CMPB	R12,R5,R3	// Check for a match.
  1161		AND	R9,R3,R3	// Mask bytes below s_base
  1162		RLDICL	$0,R7,$61,R4	// length-1
  1163		RLDICR	$0,R7,$60,R7	// Last doubleword in R7
  1164		CMPU	R3,$0,CR7	// If we have a match, jump to the final computation
  1165		BNE	CR7,done
  1166	
  1167		// Check for doubleword alignment and jump to the loop setup if aligned.
  1168		MOVFL	R8,CR7
  1169		BC	12,28,loop_setup
  1170	
  1171		// Not aligned, so handle the second doubleword
  1172		MOVDU	8(R8),R12
  1173		CMPB	R12,R5,R3
  1174		CMPU	R3,$0,CR7
  1175		BNE	CR7,done
  1176	
  1177	loop_setup:
  1178		// We are now aligned to a 16-byte boundary. We will load two doublewords
  1179		// per loop iteration. The last doubleword is in R7, so our loop counter
  1180		// starts at (R7-R8)/16.
  1181		SUB	R8,R7,R6
  1182		SRD	$4,R6,R6
  1183		MOVD	R6,CTR
  1184	
  1185		// Note: when we have an align directive, align this loop to 32 bytes so
  1186		// it fits in a single icache sector.
  1187	loop:
  1188		// Load two doublewords, then compare and merge in a single register. We
  1189		// will check two doublewords per iteration, then find out which of them
  1190		// contains the byte later. This speeds up the search.
  1191		MOVD	8(R8),R12
  1192		MOVDU	16(R8),R11
  1193		CMPB	R12,R5,R3
  1194		CMPB	R11,R5,R9
  1195		OR	R3,R9,R6
  1196		CMPU	R6,$0,CR7
  1197		BNE	CR7,found
  1198		BC	16,0,loop
  1199	
  1200		// Counter zeroed, but we may have another doubleword to read
  1201		CMPU	R8,R7
  1202		BEQ	notfound
  1203	
  1204		MOVDU	8(R8),R12
  1205		CMPB	R12,R5,R3
  1206		CMPU	R3,$0,CR6
  1207		BNE	CR6,done
  1208	
  1209	notfound:
  1210		MOVD	$-1,R3
  1211		MOVD	R3,(R14)
  1212		RET
  1213	
  1214	found:
  1215		// One of the doublewords from the loop contains the byte we are looking
  1216		// for. Check the first doubleword and adjust the address if found.
  1217		CMPU	R3,$0,CR6
  1218		ADD	$-8,R8,R8
  1219		BNE	CR6,done
  1220	
  1221		// Not found, so it must be in the second doubleword of the merged pair.
  1222		MOVD	R9,R3
  1223		ADD	$8,R8,R8
  1224	
  1225	done:
  1226		// At this point, R3 has 0xFF in the same position as the byte we are
  1227		// looking for in the doubleword. Use that to calculate the exact index
  1228		// of the byte.
  1229	#ifdef GOARCH_ppc64le
  1230		ADD	$-1,R3,R11
  1231		ANDN	R3,R11,R11
  1232		POPCNTD	R11,R11		// Count trailing zeros (Little Endian).
  1233	#else
  1234		CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1235	#endif
  1236		CMPU	R8,R7		// Check if we are at the last doubleword.
  1237		SRD	$3,R11		// Convert trailing zeros to bytes.
  1238		ADD	R11,R8,R3
  1239		CMPU	R11,R4,CR7	// If at the last doubleword, check the byte offset.
  1240		BNE	return
  1241		BLE	CR7,return
  1242		MOVD	$-1,R3
  1243		MOVD	R3,(R14)
  1244		RET
  1245	
  1246	return:
  1247		SUB	R10,R3		// Calculate index.
  1248		MOVD	R3,(R14)
  1249		RET
  1250	
  1251	small_string:
  1252		// We unroll this loop for better performance.
  1253		CMPU	R4,$0		// Check for length=0
  1254		BEQ	notfound
  1255	
  1256		MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1257		CMPB	R12,R5,R3	// Check for a match.
  1258		AND	R9,R3,R3	// Mask bytes below s_base.
  1259		CMPU	R3,$0,CR7	// If we have a match, jump to the final computation.
  1260		RLDICL	$0,R7,$61,R4	// length-1
  1261		RLDICR	$0,R7,$60,R7	// Last doubleword in R7.
  1262	        CMPU	R8,R7
  1263		BNE	CR7,done
  1264		BEQ	notfound	// Hit length.
  1265	
  1266		MOVDU	8(R8),R12
  1267		CMPB	R12,R5,R3
  1268		CMPU	R3,$0,CR6
  1269		CMPU	R8,R7
  1270		BNE	CR6,done
  1271		BEQ	notfound
  1272	
  1273		MOVDU	8(R8),R12
  1274		CMPB	R12,R5,R3
  1275		CMPU	R3,$0,CR6
  1276		CMPU	R8,R7
  1277		BNE	CR6,done
  1278		BEQ	notfound
  1279	
  1280		MOVDU	8(R8),R12
  1281		CMPB	R12,R5,R3
  1282		CMPU	R3,$0,CR6
  1283		CMPU	R8,R7
  1284		BNE	CR6,done
  1285		BEQ	notfound
  1286	
  1287		MOVDU	8(R8),R12
  1288		CMPB	R12,R5,R3
  1289		CMPU	R3,$0,CR6
  1290		CMPU	R8,R7
  1291		BNE	CR6,done
  1292		BR	notfound
  1293	
  1294	TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1295		MOVD	s1_base+0(FP), R5
  1296		MOVD	s1_len+8(FP), R3
  1297		MOVD	s2_base+16(FP), R6
  1298		MOVD	s2_len+24(FP), R4
  1299		MOVD	$ret+32(FP), R7
  1300	#ifdef	GOARCH_ppc64le
  1301		BR	cmpbodyLE<>(SB)
  1302	#else
  1303		BR      cmpbodyBE<>(SB)
  1304	#endif
  1305	
  1306	TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1307		MOVD	s1+0(FP), R5
  1308		MOVD	s1+8(FP), R3
  1309		MOVD	s2+24(FP), R6
  1310		MOVD	s2+32(FP), R4
  1311		MOVD	$ret+48(FP), R7
  1312	#ifdef	GOARCH_ppc64le
  1313		BR	cmpbodyLE<>(SB)
  1314	#else
  1315		BR      cmpbodyBE<>(SB)
  1316	#endif
  1317	
  1318	TEXT runtime·return0(SB), NOSPLIT, $0
  1319		MOVW	$0, R3
  1320		RET
  1321	
  1322	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1323	// Must obey the gcc calling convention.
  1324	TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
  1325		// g (R30) and R31 are callee-save in the C ABI, so save them
  1326		MOVD	g, R4
  1327		MOVD	R31, R5
  1328		MOVD	LR, R6
  1329	
  1330		BL	runtime·load_g(SB)	// clobbers g (R30), R31
  1331		MOVD	g_m(g), R3
  1332		MOVD	m_curg(R3), R3
  1333		MOVD	(g_stack+stack_hi)(R3), R3
  1334	
  1335		MOVD	R4, g
  1336		MOVD	R5, R31
  1337		MOVD	R6, LR
  1338		RET
  1339	
  1340	// The top-most function running on a goroutine
  1341	// returns to goexit+PCQuantum.
  1342	//
  1343	// When dynamically linking Go, it can be returned to from a function
  1344	// implemented in a different module and so needs to reload the TOC pointer
  1345	// from the stack (although this function declares that it does not set up x-a
  1346	// frame, newproc1 does in fact allocate one for goexit and saves the TOC
  1347	// pointer in the correct place).
  1348	// goexit+_PCQuantum is halfway through the usual global entry point prologue
  1349	// that derives r2 from r12 which is a bit silly, but not harmful.
  1350	TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
  1351		MOVD	24(R1), R2
  1352		BL	runtime·goexit1(SB)	// does not return
  1353		// traceback from goexit1 must hit code range of goexit
  1354		MOVD	R0, R0	// NOP
  1355	
  1356	TEXT runtime·prefetcht0(SB),NOSPLIT,$0-8
  1357		RET
  1358	
  1359	TEXT runtime·prefetcht1(SB),NOSPLIT,$0-8
  1360		RET
  1361	
  1362	TEXT runtime·prefetcht2(SB),NOSPLIT,$0-8
  1363		RET
  1364	
  1365	TEXT runtime·prefetchnta(SB),NOSPLIT,$0-8
  1366		RET
  1367	
  1368	TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1369		RET
  1370	
  1371	// prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the
  1372	// module containing runtime) to the frame that goexit will execute in when
  1373	// the goroutine exits. It's implemented in assembly mainly because that's the
  1374	// easiest way to get access to R2.
  1375	TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8
  1376	      MOVD    sp+0(FP), R3
  1377	      MOVD    R2, 24(R3)
  1378	      RET
  1379	
  1380	TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1381		ADD	$-8, R1
  1382		MOVD	R31, 0(R1)
  1383		MOVD	runtime·lastmoduledatap(SB), R4
  1384		MOVD	R3, moduledata_next(R4)
  1385		MOVD	R3, runtime·lastmoduledatap(SB)
  1386		MOVD	0(R1), R31
  1387		ADD	$8, R1
  1388		RET
  1389	
  1390	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1391		MOVW	$1, R3
  1392		MOVB	R3, ret+0(FP)
  1393		RET

View as plain text