...
Run Format

Text file src/runtime/asm_ppc64x.s

Documentation: runtime

     1	// Copyright 2014 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// +build ppc64 ppc64le
     6	
     7	#include "go_asm.h"
     8	#include "go_tls.h"
     9	#include "funcdata.h"
    10	#include "textflag.h"
    11	#include "asm_ppc64x.h"
    12	
    13	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    14		// R1 = stack; R3 = argc; R4 = argv; R13 = C TLS base pointer
    15	
    16		// initialize essential registers
    17		BL	runtime·reginit(SB)
    18	
    19		SUB	$(FIXED_FRAME+16), R1
    20		MOVD	R2, 24(R1)		// stash the TOC pointer away again now we've created a new frame
    21		MOVW	R3, FIXED_FRAME+0(R1)	// argc
    22		MOVD	R4, FIXED_FRAME+8(R1)	// argv
    23	
    24		// create istack out of the given (operating system) stack.
    25		// _cgo_init may update stackguard.
    26		MOVD	$runtime·g0(SB), g
    27		MOVD	$(-64*1024), R31
    28		ADD	R31, R1, R3
    29		MOVD	R3, g_stackguard0(g)
    30		MOVD	R3, g_stackguard1(g)
    31		MOVD	R3, (g_stack+stack_lo)(g)
    32		MOVD	R1, (g_stack+stack_hi)(g)
    33	
    34		// if there is a _cgo_init, call it using the gcc ABI.
    35		MOVD	_cgo_init(SB), R12
    36		CMP	R0, R12
    37		BEQ	nocgo
    38		MOVD	R12, CTR		// r12 = "global function entry point"
    39		MOVD	R13, R5			// arg 2: TLS base pointer
    40		MOVD	$setg_gcc<>(SB), R4 	// arg 1: setg
    41		MOVD	g, R3			// arg 0: G
    42		// C functions expect 32 bytes of space on caller stack frame
    43		// and a 16-byte aligned R1
    44		MOVD	R1, R14			// save current stack
    45		SUB	$32, R1			// reserve 32 bytes
    46		RLDCR	$0, R1, $~15, R1	// 16-byte align
    47		BL	(CTR)			// may clobber R0, R3-R12
    48		MOVD	R14, R1			// restore stack
    49		MOVD	24(R1), R2
    50		XOR	R0, R0			// fix R0
    51	
    52	nocgo:
    53		// update stackguard after _cgo_init
    54		MOVD	(g_stack+stack_lo)(g), R3
    55		ADD	$const__StackGuard, R3
    56		MOVD	R3, g_stackguard0(g)
    57		MOVD	R3, g_stackguard1(g)
    58	
    59		// set the per-goroutine and per-mach "registers"
    60		MOVD	$runtime·m0(SB), R3
    61	
    62		// save m->g0 = g0
    63		MOVD	g, m_g0(R3)
    64		// save m0 to g0->m
    65		MOVD	R3, g_m(g)
    66	
    67		BL	runtime·check(SB)
    68	
    69		// args are already prepared
    70		BL	runtime·args(SB)
    71		BL	runtime·osinit(SB)
    72		BL	runtime·schedinit(SB)
    73	
    74		// create a new goroutine to start program
    75		MOVD	$runtime·mainPC(SB), R3		// entry
    76		MOVDU	R3, -8(R1)
    77		MOVDU	R0, -8(R1)
    78		MOVDU	R0, -8(R1)
    79		MOVDU	R0, -8(R1)
    80		MOVDU	R0, -8(R1)
    81		MOVDU	R0, -8(R1)
    82		BL	runtime·newproc(SB)
    83		ADD	$(16+FIXED_FRAME), R1
    84	
    85		// start this M
    86		BL	runtime·mstart(SB)
    87	
    88		MOVD	R0, 0(R0)
    89		RET
    90	
    91	DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
    92	GLOBL	runtime·mainPC(SB),RODATA,$8
    93	
    94	TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
    95		MOVD	R0, 0(R0) // TODO: TD
    96		RET
    97	
    98	TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
    99		RET
   100	
   101	TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0
   102		// crosscall_ppc64 and crosscall2 need to reginit, but can't
   103		// get at the 'runtime.reginit' symbol.
   104		BR	runtime·reginit(SB)
   105	
   106	TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0
   107		// set R0 to zero, it's expected by the toolchain
   108		XOR R0, R0
   109		RET
   110	
   111	/*
   112	 *  go-routine
   113	 */
   114	
   115	// void gosave(Gobuf*)
   116	// save state in Gobuf; setjmp
   117	TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
   118		MOVD	buf+0(FP), R3
   119		MOVD	R1, gobuf_sp(R3)
   120		MOVD	LR, R31
   121		MOVD	R31, gobuf_pc(R3)
   122		MOVD	g, gobuf_g(R3)
   123		MOVD	R0, gobuf_lr(R3)
   124		MOVD	R0, gobuf_ret(R3)
   125		// Assert ctxt is zero. See func save.
   126		MOVD	gobuf_ctxt(R3), R3
   127		CMP	R0, R3
   128		BEQ	2(PC)
   129		BL	runtime·badctxt(SB)
   130		RET
   131	
   132	// void gogo(Gobuf*)
   133	// restore state from Gobuf; longjmp
   134	TEXT runtime·gogo(SB), NOSPLIT, $16-8
   135		MOVD	buf+0(FP), R5
   136		MOVD	gobuf_g(R5), g	// make sure g is not nil
   137		BL	runtime·save_g(SB)
   138	
   139		MOVD	0(g), R4
   140		MOVD	gobuf_sp(R5), R1
   141		MOVD	gobuf_lr(R5), R31
   142		MOVD	R31, LR
   143		MOVD	gobuf_ret(R5), R3
   144		MOVD	gobuf_ctxt(R5), R11
   145		MOVD	R0, gobuf_sp(R5)
   146		MOVD	R0, gobuf_ret(R5)
   147		MOVD	R0, gobuf_lr(R5)
   148		MOVD	R0, gobuf_ctxt(R5)
   149		CMP	R0, R0 // set condition codes for == test, needed by stack split
   150		MOVD	gobuf_pc(R5), R12
   151		MOVD	R12, CTR
   152		BR	(CTR)
   153	
   154	// void mcall(fn func(*g))
   155	// Switch to m->g0's stack, call fn(g).
   156	// Fn must never return. It should gogo(&g->sched)
   157	// to keep running g.
   158	TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
   159		// Save caller state in g->sched
   160		MOVD	R1, (g_sched+gobuf_sp)(g)
   161		MOVD	LR, R31
   162		MOVD	R31, (g_sched+gobuf_pc)(g)
   163		MOVD	R0, (g_sched+gobuf_lr)(g)
   164		MOVD	g, (g_sched+gobuf_g)(g)
   165	
   166		// Switch to m->g0 & its stack, call fn.
   167		MOVD	g, R3
   168		MOVD	g_m(g), R8
   169		MOVD	m_g0(R8), g
   170		BL	runtime·save_g(SB)
   171		CMP	g, R3
   172		BNE	2(PC)
   173		BR	runtime·badmcall(SB)
   174		MOVD	fn+0(FP), R11			// context
   175		MOVD	0(R11), R12			// code pointer
   176		MOVD	R12, CTR
   177		MOVD	(g_sched+gobuf_sp)(g), R1	// sp = m->g0->sched.sp
   178		MOVDU	R3, -8(R1)
   179		MOVDU	R0, -8(R1)
   180		MOVDU	R0, -8(R1)
   181		MOVDU	R0, -8(R1)
   182		MOVDU	R0, -8(R1)
   183		BL	(CTR)
   184		MOVD	24(R1), R2
   185		BR	runtime·badmcall2(SB)
   186	
   187	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   188	// of the G stack. We need to distinguish the routine that
   189	// lives at the bottom of the G stack from the one that lives
   190	// at the top of the system stack because the one at the top of
   191	// the system stack terminates the stack walk (see topofstack()).
   192	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   193		// We have several undefs here so that 16 bytes past
   194		// $runtime·systemstack_switch lies within them whether or not the
   195	        // instructions that derive r2 from r12 are there.
   196		UNDEF
   197		UNDEF
   198		UNDEF
   199		BL	(LR)	// make sure this function is not leaf
   200		RET
   201	
   202	// func systemstack(fn func())
   203	TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   204		MOVD	fn+0(FP), R3	// R3 = fn
   205		MOVD	R3, R11		// context
   206		MOVD	g_m(g), R4	// R4 = m
   207	
   208		MOVD	m_gsignal(R4), R5	// R5 = gsignal
   209		CMP	g, R5
   210		BEQ	noswitch
   211	
   212		MOVD	m_g0(R4), R5	// R5 = g0
   213		CMP	g, R5
   214		BEQ	noswitch
   215	
   216		MOVD	m_curg(R4), R6
   217		CMP	g, R6
   218		BEQ	switch
   219	
   220		// Bad: g is not gsignal, not g0, not curg. What is it?
   221		// Hide call from linker nosplit analysis.
   222		MOVD	$runtime·badsystemstack(SB), R12
   223		MOVD	R12, CTR
   224		BL	(CTR)
   225	
   226	switch:
   227		// save our state in g->sched. Pretend to
   228		// be systemstack_switch if the G stack is scanned.
   229		MOVD	$runtime·systemstack_switch(SB), R6
   230		ADD     $16, R6 // get past prologue (including r2-setting instructions when they're there)
   231		MOVD	R6, (g_sched+gobuf_pc)(g)
   232		MOVD	R1, (g_sched+gobuf_sp)(g)
   233		MOVD	R0, (g_sched+gobuf_lr)(g)
   234		MOVD	g, (g_sched+gobuf_g)(g)
   235	
   236		// switch to g0
   237		MOVD	R5, g
   238		BL	runtime·save_g(SB)
   239		MOVD	(g_sched+gobuf_sp)(g), R3
   240		// make it look like mstart called systemstack on g0, to stop traceback
   241		SUB	$FIXED_FRAME, R3
   242		MOVD	$runtime·mstart(SB), R4
   243		MOVD	R4, 0(R3)
   244		MOVD	R3, R1
   245	
   246		// call target function
   247		MOVD	0(R11), R12	// code pointer
   248		MOVD	R12, CTR
   249		BL	(CTR)
   250	
   251		// restore TOC pointer. It seems unlikely that we will use systemstack
   252		// to call a function defined in another module, but the results of
   253		// doing so would be so confusing that it's worth doing this.
   254		MOVD	g_m(g), R3
   255		MOVD	m_curg(R3), g
   256		MOVD	(g_sched+gobuf_sp)(g), R3
   257		MOVD	24(R3), R2
   258		// switch back to g
   259		MOVD	g_m(g), R3
   260		MOVD	m_curg(R3), g
   261		BL	runtime·save_g(SB)
   262		MOVD	(g_sched+gobuf_sp)(g), R1
   263		MOVD	R0, (g_sched+gobuf_sp)(g)
   264		RET
   265	
   266	noswitch:
   267		// already on m stack, just call directly
   268		// On other arches we do a tail call here, but it appears to be
   269		// impossible to tail call a function pointer in shared mode on
   270		// ppc64 because the caller is responsible for restoring the TOC.
   271		MOVD	0(R11), R12	// code pointer
   272		MOVD	R12, CTR
   273		BL	(CTR)
   274		MOVD	24(R1), R2
   275		RET
   276	
   277	/*
   278	 * support for morestack
   279	 */
   280	
   281	// Called during function prolog when more stack is needed.
   282	// Caller has already loaded:
   283	// R3: framesize, R4: argsize, R5: LR
   284	//
   285	// The traceback routines see morestack on a g0 as being
   286	// the top of a stack (for example, morestack calling newstack
   287	// calling the scheduler calling newm calling gc), so we must
   288	// record an argument size. For that purpose, it has no arguments.
   289	TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   290		// Cannot grow scheduler stack (m->g0).
   291		MOVD	g_m(g), R7
   292		MOVD	m_g0(R7), R8
   293		CMP	g, R8
   294		BNE	3(PC)
   295		BL	runtime·badmorestackg0(SB)
   296		BL	runtime·abort(SB)
   297	
   298		// Cannot grow signal stack (m->gsignal).
   299		MOVD	m_gsignal(R7), R8
   300		CMP	g, R8
   301		BNE	3(PC)
   302		BL	runtime·badmorestackgsignal(SB)
   303		BL	runtime·abort(SB)
   304	
   305		// Called from f.
   306		// Set g->sched to context in f.
   307		MOVD	R1, (g_sched+gobuf_sp)(g)
   308		MOVD	LR, R8
   309		MOVD	R8, (g_sched+gobuf_pc)(g)
   310		MOVD	R5, (g_sched+gobuf_lr)(g)
   311		MOVD	R11, (g_sched+gobuf_ctxt)(g)
   312	
   313		// Called from f.
   314		// Set m->morebuf to f's caller.
   315		MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   316		MOVD	R1, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   317		MOVD	g, (m_morebuf+gobuf_g)(R7)
   318	
   319		// Call newstack on m->g0's stack.
   320		MOVD	m_g0(R7), g
   321		BL	runtime·save_g(SB)
   322		MOVD	(g_sched+gobuf_sp)(g), R1
   323		MOVDU   R0, -(FIXED_FRAME+0)(R1)	// create a call frame on g0
   324		BL	runtime·newstack(SB)
   325	
   326		// Not reached, but make sure the return PC from the call to newstack
   327		// is still in this function, and not the beginning of the next.
   328		UNDEF
   329	
   330	TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   331		MOVD	R0, R11
   332		BR	runtime·morestack(SB)
   333	
   334	// reflectcall: call a function with the given argument list
   335	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   336	// we don't have variable-sized frames, so we use a small number
   337	// of constant-sized-frame functions to encode a few bits of size in the pc.
   338	// Caution: ugly multiline assembly macros in your future!
   339	
   340	#define DISPATCH(NAME,MAXSIZE)		\
   341		MOVD	$MAXSIZE, R31;		\
   342		CMP	R3, R31;		\
   343		BGT	4(PC);			\
   344		MOVD	$NAME(SB), R12;		\
   345		MOVD	R12, CTR;		\
   346		BR	(CTR)
   347	// Note: can't just "BR NAME(SB)" - bad inlining results.
   348	
   349	TEXT reflect·call(SB), NOSPLIT, $0-0
   350		BR	·reflectcall(SB)
   351	
   352	TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
   353		MOVWZ argsize+24(FP), R3
   354		DISPATCH(runtime·call32, 32)
   355		DISPATCH(runtime·call64, 64)
   356		DISPATCH(runtime·call128, 128)
   357		DISPATCH(runtime·call256, 256)
   358		DISPATCH(runtime·call512, 512)
   359		DISPATCH(runtime·call1024, 1024)
   360		DISPATCH(runtime·call2048, 2048)
   361		DISPATCH(runtime·call4096, 4096)
   362		DISPATCH(runtime·call8192, 8192)
   363		DISPATCH(runtime·call16384, 16384)
   364		DISPATCH(runtime·call32768, 32768)
   365		DISPATCH(runtime·call65536, 65536)
   366		DISPATCH(runtime·call131072, 131072)
   367		DISPATCH(runtime·call262144, 262144)
   368		DISPATCH(runtime·call524288, 524288)
   369		DISPATCH(runtime·call1048576, 1048576)
   370		DISPATCH(runtime·call2097152, 2097152)
   371		DISPATCH(runtime·call4194304, 4194304)
   372		DISPATCH(runtime·call8388608, 8388608)
   373		DISPATCH(runtime·call16777216, 16777216)
   374		DISPATCH(runtime·call33554432, 33554432)
   375		DISPATCH(runtime·call67108864, 67108864)
   376		DISPATCH(runtime·call134217728, 134217728)
   377		DISPATCH(runtime·call268435456, 268435456)
   378		DISPATCH(runtime·call536870912, 536870912)
   379		DISPATCH(runtime·call1073741824, 1073741824)
   380		MOVD	$runtime·badreflectcall(SB), R12
   381		MOVD	R12, CTR
   382		BR	(CTR)
   383	
   384	#define CALLFN(NAME,MAXSIZE)			\
   385	TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   386		NO_LOCAL_POINTERS;			\
   387		/* copy arguments to stack */		\
   388		MOVD	arg+16(FP), R3;			\
   389		MOVWZ	argsize+24(FP), R4;			\
   390		MOVD	R1, R5;				\
   391		ADD	$(FIXED_FRAME-1), R5;			\
   392		SUB	$1, R3;				\
   393		ADD	R5, R4;				\
   394		CMP	R5, R4;				\
   395		BEQ	4(PC);				\
   396		MOVBZU	1(R3), R6;			\
   397		MOVBZU	R6, 1(R5);			\
   398		BR	-4(PC);				\
   399		/* call function */			\
   400		MOVD	f+8(FP), R11;			\
   401		MOVD	(R11), R12;			\
   402		MOVD	R12, CTR;			\
   403		PCDATA  $PCDATA_StackMapIndex, $0;	\
   404		BL	(CTR);				\
   405		MOVD	24(R1), R2;			\
   406		/* copy return values back */		\
   407		MOVD	argtype+0(FP), R7;		\
   408		MOVD	arg+16(FP), R3;			\
   409		MOVWZ	n+24(FP), R4;			\
   410		MOVWZ	retoffset+28(FP), R6;		\
   411		ADD	$FIXED_FRAME, R1, R5;		\
   412		ADD	R6, R5; 			\
   413		ADD	R6, R3;				\
   414		SUB	R6, R4;				\
   415		BL	callRet<>(SB);			\
   416		RET
   417	
   418	// callRet copies return values back at the end of call*. This is a
   419	// separate function so it can allocate stack space for the arguments
   420	// to reflectcallmove. It does not follow the Go ABI; it expects its
   421	// arguments in registers.
   422	TEXT callRet<>(SB), NOSPLIT, $32-0
   423		MOVD	R7, FIXED_FRAME+0(R1)
   424		MOVD	R3, FIXED_FRAME+8(R1)
   425		MOVD	R5, FIXED_FRAME+16(R1)
   426		MOVD	R4, FIXED_FRAME+24(R1)
   427		BL	runtime·reflectcallmove(SB)
   428		RET
   429	
   430	CALLFN(·call32, 32)
   431	CALLFN(·call64, 64)
   432	CALLFN(·call128, 128)
   433	CALLFN(·call256, 256)
   434	CALLFN(·call512, 512)
   435	CALLFN(·call1024, 1024)
   436	CALLFN(·call2048, 2048)
   437	CALLFN(·call4096, 4096)
   438	CALLFN(·call8192, 8192)
   439	CALLFN(·call16384, 16384)
   440	CALLFN(·call32768, 32768)
   441	CALLFN(·call65536, 65536)
   442	CALLFN(·call131072, 131072)
   443	CALLFN(·call262144, 262144)
   444	CALLFN(·call524288, 524288)
   445	CALLFN(·call1048576, 1048576)
   446	CALLFN(·call2097152, 2097152)
   447	CALLFN(·call4194304, 4194304)
   448	CALLFN(·call8388608, 8388608)
   449	CALLFN(·call16777216, 16777216)
   450	CALLFN(·call33554432, 33554432)
   451	CALLFN(·call67108864, 67108864)
   452	CALLFN(·call134217728, 134217728)
   453	CALLFN(·call268435456, 268435456)
   454	CALLFN(·call536870912, 536870912)
   455	CALLFN(·call1073741824, 1073741824)
   456	
   457	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   458		RET
   459	
   460	// void jmpdefer(fv, sp);
   461	// called from deferreturn.
   462	// 1. grab stored LR for caller
   463	// 2. sub 8 bytes to get back to either nop or toc reload before deferreturn
   464	// 3. BR to fn
   465	// When dynamically linking Go, it is not sufficient to rewind to the BL
   466	// deferreturn -- we might be jumping between modules and so we need to reset
   467	// the TOC pointer in r2. To do this, codegen inserts MOVD 24(R1), R2 *before*
   468	// the BL deferreturn and jmpdefer rewinds to that.
   469	TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
   470		MOVD	0(R1), R31
   471		SUB     $8, R31
   472		MOVD	R31, LR
   473	
   474		MOVD	fv+0(FP), R11
   475		MOVD	argp+8(FP), R1
   476		SUB	$FIXED_FRAME, R1
   477		MOVD	0(R11), R12
   478		MOVD	R12, CTR
   479		BR	(CTR)
   480	
   481	// Save state of caller into g->sched. Smashes R31.
   482	TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   483		MOVD	LR, R31
   484		MOVD	R31, (g_sched+gobuf_pc)(g)
   485		MOVD	R1, (g_sched+gobuf_sp)(g)
   486		MOVD	R0, (g_sched+gobuf_lr)(g)
   487		MOVD	R0, (g_sched+gobuf_ret)(g)
   488		// Assert ctxt is zero. See func save.
   489		MOVD	(g_sched+gobuf_ctxt)(g), R31
   490		CMP	R0, R31
   491		BEQ	2(PC)
   492		BL	runtime·badctxt(SB)
   493		RET
   494	
   495	// func asmcgocall(fn, arg unsafe.Pointer) int32
   496	// Call fn(arg) on the scheduler stack,
   497	// aligned appropriately for the gcc ABI.
   498	// See cgocall.go for more details.
   499	TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   500		MOVD	fn+0(FP), R3
   501		MOVD	arg+8(FP), R4
   502	
   503		MOVD	R1, R7		// save original stack pointer
   504		MOVD	g, R5
   505	
   506		// Figure out if we need to switch to m->g0 stack.
   507		// We get called to create new OS threads too, and those
   508		// come in on the m->g0 stack already.
   509		MOVD	g_m(g), R6
   510		MOVD	m_g0(R6), R6
   511		CMP	R6, g
   512		BEQ	g0
   513		BL	gosave<>(SB)
   514		MOVD	R6, g
   515		BL	runtime·save_g(SB)
   516		MOVD	(g_sched+gobuf_sp)(g), R1
   517	
   518		// Now on a scheduling stack (a pthread-created stack).
   519	g0:
   520		// Save room for two of our pointers, plus 32 bytes of callee
   521		// save area that lives on the caller stack.
   522		SUB	$48, R1
   523		RLDCR	$0, R1, $~15, R1	// 16-byte alignment for gcc ABI
   524		MOVD	R5, 40(R1)	// save old g on stack
   525		MOVD	(g_stack+stack_hi)(R5), R5
   526		SUB	R7, R5
   527		MOVD	R5, 32(R1)	// save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   528		MOVD	R0, 0(R1)	// clear back chain pointer (TODO can we give it real back trace information?)
   529		// This is a "global call", so put the global entry point in r12
   530		MOVD	R3, R12
   531		MOVD	R12, CTR
   532		MOVD	R4, R3		// arg in r3
   533		BL	(CTR)
   534	
   535		// C code can clobber R0, so set it back to 0.  F27-F31 are
   536		// callee save, so we don't need to recover those.
   537		XOR	R0, R0
   538		// Restore g, stack pointer, toc pointer.
   539		// R3 is errno, so don't touch it
   540		MOVD	40(R1), g
   541		MOVD    (g_stack+stack_hi)(g), R5
   542		MOVD    32(R1), R6
   543		SUB     R6, R5
   544		MOVD    24(R5), R2
   545		BL	runtime·save_g(SB)
   546		MOVD	(g_stack+stack_hi)(g), R5
   547		MOVD	32(R1), R6
   548		SUB	R6, R5
   549		MOVD	R5, R1
   550	
   551		MOVW	R3, ret+16(FP)
   552		RET
   553	
   554	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   555	// Turn the fn into a Go func (by taking its address) and call
   556	// cgocallback_gofunc.
   557	TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   558		MOVD	$fn+0(FP), R3
   559		MOVD	R3, FIXED_FRAME+0(R1)
   560		MOVD	frame+8(FP), R3
   561		MOVD	R3, FIXED_FRAME+8(R1)
   562		MOVD	framesize+16(FP), R3
   563		MOVD	R3, FIXED_FRAME+16(R1)
   564		MOVD	ctxt+24(FP), R3
   565		MOVD	R3, FIXED_FRAME+24(R1)
   566		MOVD	$runtime·cgocallback_gofunc(SB), R12
   567		MOVD	R12, CTR
   568		BL	(CTR)
   569		RET
   570	
   571	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   572	// See cgocall.go for more details.
   573	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   574		NO_LOCAL_POINTERS
   575	
   576		// Load m and g from thread-local storage.
   577		MOVB	runtime·iscgo(SB), R3
   578		CMP	R3, $0
   579		BEQ	nocgo
   580		BL	runtime·load_g(SB)
   581	nocgo:
   582	
   583		// If g is nil, Go did not create the current thread.
   584		// Call needm to obtain one for temporary use.
   585		// In this case, we're running on the thread stack, so there's
   586		// lots of space, but the linker doesn't know. Hide the call from
   587		// the linker analysis by using an indirect call.
   588		CMP	g, $0
   589		BEQ	needm
   590	
   591		MOVD	g_m(g), R8
   592		MOVD	R8, savedm-8(SP)
   593		BR	havem
   594	
   595	needm:
   596		MOVD	g, savedm-8(SP) // g is zero, so is m.
   597		MOVD	$runtime·needm(SB), R12
   598		MOVD	R12, CTR
   599		BL	(CTR)
   600	
   601		// Set m->sched.sp = SP, so that if a panic happens
   602		// during the function we are about to execute, it will
   603		// have a valid SP to run on the g0 stack.
   604		// The next few lines (after the havem label)
   605		// will save this SP onto the stack and then write
   606		// the same SP back to m->sched.sp. That seems redundant,
   607		// but if an unrecovered panic happens, unwindm will
   608		// restore the g->sched.sp from the stack location
   609		// and then systemstack will try to use it. If we don't set it here,
   610		// that restored SP will be uninitialized (typically 0) and
   611		// will not be usable.
   612		MOVD	g_m(g), R8
   613		MOVD	m_g0(R8), R3
   614		MOVD	R1, (g_sched+gobuf_sp)(R3)
   615	
   616	havem:
   617		// Now there's a valid m, and we're running on its m->g0.
   618		// Save current m->g0->sched.sp on stack and then set it to SP.
   619		// Save current sp in m->g0->sched.sp in preparation for
   620		// switch back to m->curg stack.
   621		// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   622		MOVD	m_g0(R8), R3
   623		MOVD	(g_sched+gobuf_sp)(R3), R4
   624		MOVD	R4, savedsp-16(SP)
   625		MOVD	R1, (g_sched+gobuf_sp)(R3)
   626	
   627		// Switch to m->curg stack and call runtime.cgocallbackg.
   628		// Because we are taking over the execution of m->curg
   629		// but *not* resuming what had been running, we need to
   630		// save that information (m->curg->sched) so we can restore it.
   631		// We can restore m->curg->sched.sp easily, because calling
   632		// runtime.cgocallbackg leaves SP unchanged upon return.
   633		// To save m->curg->sched.pc, we push it onto the stack.
   634		// This has the added benefit that it looks to the traceback
   635		// routine like cgocallbackg is going to return to that
   636		// PC (because the frame we allocate below has the same
   637		// size as cgocallback_gofunc's frame declared above)
   638		// so that the traceback will seamlessly trace back into
   639		// the earlier calls.
   640		//
   641		// In the new goroutine, -8(SP) is unused (where SP refers to
   642		// m->curg's SP while we're setting it up, before we've adjusted it).
   643		MOVD	m_curg(R8), g
   644		BL	runtime·save_g(SB)
   645		MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   646		MOVD	(g_sched+gobuf_pc)(g), R5
   647		MOVD	R5, -(FIXED_FRAME+16)(R4)
   648		MOVD	ctxt+24(FP), R3
   649		MOVD	R3, -16(R4)
   650		MOVD	$-(FIXED_FRAME+16)(R4), R1
   651		BL	runtime·cgocallbackg(SB)
   652	
   653		// Restore g->sched (== m->curg->sched) from saved values.
   654		MOVD	0(R1), R5
   655		MOVD	R5, (g_sched+gobuf_pc)(g)
   656		MOVD	$(FIXED_FRAME+16)(R1), R4
   657		MOVD	R4, (g_sched+gobuf_sp)(g)
   658	
   659		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   660		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   661		// so we do not have to restore it.)
   662		MOVD	g_m(g), R8
   663		MOVD	m_g0(R8), g
   664		BL	runtime·save_g(SB)
   665		MOVD	(g_sched+gobuf_sp)(g), R1
   666		MOVD	savedsp-16(SP), R4
   667		MOVD	R4, (g_sched+gobuf_sp)(g)
   668	
   669		// If the m on entry was nil, we called needm above to borrow an m
   670		// for the duration of the call. Since the call is over, return it with dropm.
   671		MOVD	savedm-8(SP), R6
   672		CMP	R6, $0
   673		BNE	droppedm
   674		MOVD	$runtime·dropm(SB), R12
   675		MOVD	R12, CTR
   676		BL	(CTR)
   677	droppedm:
   678	
   679		// Done!
   680		RET
   681	
   682	// void setg(G*); set g. for use by needm.
   683	TEXT runtime·setg(SB), NOSPLIT, $0-8
   684		MOVD	gg+0(FP), g
   685		// This only happens if iscgo, so jump straight to save_g
   686		BL	runtime·save_g(SB)
   687		RET
   688	
   689	// void setg_gcc(G*); set g in C TLS.
   690	// Must obey the gcc calling convention.
   691	TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   692		// The standard prologue clobbers R31, which is callee-save in
   693		// the C ABI, so we have to use $-8-0 and save LR ourselves.
   694		MOVD	LR, R4
   695		// Also save g and R31, since they're callee-save in C ABI
   696		MOVD	R31, R5
   697		MOVD	g, R6
   698	
   699		MOVD	R3, g
   700		BL	runtime·save_g(SB)
   701	
   702		MOVD	R6, g
   703		MOVD	R5, R31
   704		MOVD	R4, LR
   705		RET
   706	
   707	TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
   708		MOVD	0(R1), R3		// LR saved by caller
   709		MOVD	R3, ret+0(FP)
   710		RET
   711	
   712	TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   713		MOVW	(R0), R0
   714		UNDEF
   715	
   716	#define	TBRL	268
   717	#define	TBRU	269		/* Time base Upper/Lower */
   718	
   719	// int64 runtime·cputicks(void)
   720	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   721		MOVW	SPR(TBRU), R4
   722		MOVW	SPR(TBRL), R3
   723		MOVW	SPR(TBRU), R5
   724		CMPW	R4, R5
   725		BNE	-4(PC)
   726		SLD	$32, R5
   727		OR	R5, R3
   728		MOVD	R3, ret+0(FP)
   729		RET
   730	
   731	// AES hashing not implemented for ppc64
   732	TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   733		MOVW	(R0), R1
   734	TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   735		MOVW	(R0), R1
   736	TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   737		MOVW	(R0), R1
   738	TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   739		MOVW	(R0), R1
   740	
   741	TEXT runtime·memequal(SB),NOSPLIT,$0-25
   742		MOVD    a+0(FP), R3
   743		MOVD    b+8(FP), R4
   744		MOVD    size+16(FP), R5
   745	
   746		BL	runtime·memeqbody(SB)
   747		MOVB    R9, ret+24(FP)
   748		RET
   749	
   750	// memequal_varlen(a, b unsafe.Pointer) bool
   751	TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
   752		MOVD	a+0(FP), R3
   753		MOVD	b+8(FP), R4
   754		CMP	R3, R4
   755		BEQ	eq
   756		MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
   757		BL	runtime·memeqbody(SB)
   758		MOVB	R9, ret+16(FP)
   759		RET
   760	eq:
   761		MOVD	$1, R3
   762		MOVB	R3, ret+16(FP)
   763		RET
   764	
   765	// Do an efficient memcmp for ppc64le
   766	// R3 = s1 len
   767	// R4 = s2 len
   768	// R5 = s1 addr
   769	// R6 = s2 addr
   770	// R7 = addr of return value
   771	TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
   772		MOVD	R3,R8		// set up length
   773		CMP	R3,R4,CR2	// unequal?
   774		BC	12,8,setuplen	// BLT CR2
   775		MOVD	R4,R8		// use R4 for comparison len
   776	setuplen:
   777		MOVD	R8,CTR		// set up loop counter
   778		CMP	R8,$8		// only optimize >=8
   779		BLT	simplecheck
   780		DCBT	(R5)		// cache hint
   781		DCBT	(R6)
   782		CMP	R8,$32		// optimize >= 32
   783		MOVD	R8,R9
   784		BLT	setup8a		// 8 byte moves only
   785	setup32a:
   786		SRADCC	$5,R8,R9	// number of 32 byte chunks
   787		MOVD	R9,CTR
   788	
   789	        // Special processing for 32 bytes or longer.
   790	        // Loading this way is faster and correct as long as the
   791		// doublewords being compared are equal. Once they
   792		// are found unequal, reload them in proper byte order
   793		// to determine greater or less than.
   794	loop32a:
   795		MOVD	0(R5),R9	// doublewords to compare
   796		MOVD	0(R6),R10	// get 4 doublewords
   797		MOVD	8(R5),R14
   798		MOVD	8(R6),R15
   799		CMPU	R9,R10		// bytes equal?
   800		MOVD	$0,R16		// set up for cmpne
   801		BNE	cmpne		// further compare for LT or GT
   802		MOVD	16(R5),R9	// get next pair of doublewords
   803		MOVD	16(R6),R10
   804		CMPU	R14,R15		// bytes match?
   805		MOVD	$8,R16		// set up for cmpne
   806		BNE	cmpne		// further compare for LT or GT
   807		MOVD	24(R5),R14	// get next pair of doublewords
   808		MOVD    24(R6),R15
   809		CMPU	R9,R10		// bytes match?
   810		MOVD	$16,R16		// set up for cmpne
   811		BNE	cmpne		// further compare for LT or GT
   812		MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   813		ADD	$32,R5		// bump up to next 32
   814		ADD	$32,R6
   815		CMPU    R14,R15		// bytes match?
   816		BC	8,2,loop32a	// br ctr and cr
   817		BNE	cmpne
   818		ANDCC	$24,R8,R9	// Any 8 byte chunks?
   819		BEQ	leftover	// and result is 0
   820	setup8a:
   821		SRADCC	$3,R9,R9	// get the 8 byte count
   822		BEQ	leftover	// shifted value is 0
   823		MOVD	R9,CTR		// loop count for doublewords
   824	loop8:
   825		MOVDBR	(R5+R0),R9	// doublewords to compare
   826		MOVDBR	(R6+R0),R10	// LE compare order
   827		ADD	$8,R5
   828		ADD	$8,R6
   829		CMPU	R9,R10		// match?
   830		BC	8,2,loop8	// bt ctr <> 0 && cr
   831		BGT	greater
   832		BLT	less
   833	leftover:
   834		ANDCC	$7,R8,R9	// check for leftover bytes
   835		MOVD	R9,CTR		// save the ctr
   836		BNE	simple		// leftover bytes
   837		BC	12,10,equal	// test CR2 for length comparison
   838		BC	12,8,less
   839		BR	greater
   840	simplecheck:
   841		CMP	R8,$0		// remaining compare length 0
   842		BNE	simple		// do simple compare
   843		BC	12,10,equal	// test CR2 for length comparison
   844		BC	12,8,less	// 1st len < 2nd len, result less
   845		BR	greater		// 1st len > 2nd len must be greater
   846	simple:
   847		MOVBZ	0(R5), R9	// get byte from 1st operand
   848		ADD	$1,R5
   849		MOVBZ	0(R6), R10	// get byte from 2nd operand
   850		ADD	$1,R6
   851		CMPU	R9, R10
   852		BC	8,2,simple	// bc ctr <> 0 && cr
   853		BGT	greater		// 1st > 2nd
   854		BLT	less		// 1st < 2nd
   855		BC	12,10,equal	// test CR2 for length comparison
   856		BC	12,9,greater	// 2nd len > 1st len
   857		BR	less		// must be less
   858	cmpne:				// only here is not equal
   859		MOVDBR	(R5+R16),R8	// reload in reverse order
   860		MOVDBR	(R6+R16),R9
   861		CMPU	R8,R9		// compare correct endianness
   862		BGT	greater		// here only if NE
   863	less:
   864		MOVD	$-1,R3
   865		MOVD	R3,(R7)		// return value if A < B
   866		RET
   867	equal:
   868		MOVD	$0,(R7)		// return value if A == B
   869		RET
   870	greater:
   871		MOVD	$1,R3
   872		MOVD	R3,(R7)		// return value if A > B
   873		RET
   874	
   875	// Do an efficient memcmp for ppc64 (BE)
   876	// R3 = s1 len
   877	// R4 = s2 len
   878	// R5 = s1 addr
   879	// R6 = s2 addr
   880	// R7 = addr of return value
   881	TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   882		MOVD	R3,R8		// set up length
   883		CMP	R3,R4,CR2	// unequal?
   884		BC	12,8,setuplen	// BLT CR2
   885		MOVD	R4,R8		// use R4 for comparison len
   886	setuplen:
   887		MOVD	R8,CTR		// set up loop counter
   888		CMP	R8,$8		// only optimize >=8
   889		BLT	simplecheck
   890		DCBT	(R5)		// cache hint
   891		DCBT	(R6)
   892		CMP	R8,$32		// optimize >= 32
   893		MOVD	R8,R9
   894		BLT	setup8a		// 8 byte moves only
   895	
   896	setup32a:
   897		SRADCC	$5,R8,R9	// number of 32 byte chunks
   898		MOVD	R9,CTR
   899	loop32a:
   900		MOVD	0(R5),R9	// doublewords to compare
   901		MOVD	0(R6),R10	// get 4 doublewords
   902		MOVD	8(R5),R14
   903		MOVD	8(R6),R15
   904		CMPU	R9,R10		// bytes equal?
   905		BLT	less		// found to be less
   906		BGT	greater		// found to be greater
   907		MOVD	16(R5),R9	// get next pair of doublewords
   908		MOVD	16(R6),R10
   909		CMPU	R14,R15		// bytes match?
   910		BLT	less		// found less
   911		BGT	greater		// found greater
   912		MOVD	24(R5),R14	// get next pair of doublewords
   913		MOVD	24(R6),R15
   914		CMPU	R9,R10		// bytes match?
   915		BLT	less		// found to be less
   916		BGT	greater		// found to be greater
   917		ADD	$32,R5		// bump up to next 32
   918		ADD	$32,R6
   919		CMPU	R14,R15		// bytes match?
   920		BC	8,2,loop32a	// br ctr and cr
   921		BLT	less		// with BE, byte ordering is
   922		BGT	greater		// good for compare
   923		ANDCC	$24,R8,R9	// Any 8 byte chunks?
   924		BEQ	leftover	// and result is 0
   925	setup8a:
   926		SRADCC	$3,R9,R9	// get the 8 byte count
   927		BEQ	leftover	// shifted value is 0
   928		MOVD	R9,CTR		// loop count for doublewords
   929	loop8:
   930		MOVD	(R5),R9
   931		MOVD	(R6),R10
   932		ADD	$8,R5
   933		ADD	$8,R6
   934		CMPU	R9,R10		// match?
   935		BC	8,2,loop8	// bt ctr <> 0 && cr
   936		BGT	greater
   937		BLT	less
   938	leftover:
   939		ANDCC	$7,R8,R9	// check for leftover bytes
   940		MOVD	R9,CTR		// save the ctr
   941		BNE	simple		// leftover bytes
   942		BC	12,10,equal	// test CR2 for length comparison
   943		BC	12,8,less
   944		BR	greater
   945	simplecheck:
   946		CMP	R8,$0		// remaining compare length 0
   947		BNE	simple		// do simple compare
   948		BC	12,10,equal	// test CR2 for length comparison
   949		BC 	12,8,less	// 1st len < 2nd len, result less
   950		BR	greater		// same len, must be equal
   951	simple:
   952		MOVBZ	0(R5),R9	// get byte from 1st operand
   953		ADD	$1,R5
   954		MOVBZ	0(R6),R10	// get byte from 2nd operand
   955		ADD	$1,R6
   956		CMPU	R9,R10
   957		BC	8,2,simple	// bc ctr <> 0 && cr
   958		BGT	greater		// 1st > 2nd
   959		BLT	less		// 1st < 2nd
   960		BC	12,10,equal	// test CR2 for length comparison
   961		BC	12,9,greater	// 2nd len > 1st len
   962	less:
   963		MOVD	$-1,R3
   964		MOVD    R3,(R7)		// return value if A < B
   965		RET
   966	equal:
   967		MOVD    $0,(R7)		// return value if A == B
   968		RET
   969	greater:
   970		MOVD	$1,R3
   971		MOVD	R3,(R7)		// return value if A > B
   972		RET
   973	
   974	// Do an efficient memequal for ppc64
   975	// R3 = s1
   976	// R4 = s2
   977	// R5 = len
   978	// R9 = return value
   979	TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   980		MOVD    R5,CTR
   981		CMP     R5,$8		// only optimize >=8
   982		BLT     simplecheck
   983		DCBT	(R3)		// cache hint
   984		DCBT	(R4)
   985		CMP	R5,$32		// optimize >= 32
   986		MOVD	R5,R6		// needed if setup8a branch
   987		BLT	setup8a		// 8 byte moves only
   988	setup32a:                       // 8 byte aligned, >= 32 bytes
   989		SRADCC  $5,R5,R6        // number of 32 byte chunks to compare
   990		MOVD	R6,CTR
   991	loop32a:
   992		MOVD    0(R3),R6        // doublewords to compare
   993		MOVD    0(R4),R7
   994		MOVD	8(R3),R8	//
   995		MOVD	8(R4),R9
   996		CMP     R6,R7           // bytes batch?
   997		BNE     noteq
   998		MOVD	16(R3),R6
   999		MOVD	16(R4),R7
  1000		CMP     R8,R9		// bytes match?
  1001		MOVD	24(R3),R8
  1002		MOVD	24(R4),R9
  1003		BNE     noteq
  1004		CMP     R6,R7           // bytes match?
  1005		BNE	noteq
  1006		ADD     $32,R3		// bump up to next 32
  1007		ADD     $32,R4
  1008		CMP     R8,R9           // bytes match?
  1009		BC      8,2,loop32a	// br ctr and cr
  1010		BNE	noteq
  1011		ANDCC	$24,R5,R6       // Any 8 byte chunks?
  1012		BEQ	leftover	// and result is 0
  1013	setup8a:
  1014		SRADCC  $3,R6,R6        // get the 8 byte count
  1015		BEQ	leftover	// shifted value is 0
  1016		MOVD    R6,CTR
  1017	loop8:
  1018		MOVD    0(R3),R6        // doublewords to compare
  1019		ADD	$8,R3
  1020		MOVD    0(R4),R7
  1021		ADD     $8,R4
  1022		CMP     R6,R7           // match?
  1023		BC	8,2,loop8	// bt ctr <> 0 && cr
  1024		BNE     noteq
  1025	leftover:
  1026		ANDCC   $7,R5,R6        // check for leftover bytes
  1027		BEQ     equal
  1028		MOVD    R6,CTR
  1029		BR	simple
  1030	simplecheck:
  1031		CMP	R5,$0
  1032		BEQ	equal
  1033	simple:
  1034		MOVBZ   0(R3), R6
  1035		ADD	$1,R3
  1036		MOVBZ   0(R4), R7
  1037		ADD     $1,R4
  1038		CMP     R6, R7
  1039		BNE     noteq
  1040		BC      8,2,simple
  1041		BNE	noteq
  1042		BR	equal
  1043	noteq:
  1044		MOVD    $0, R9
  1045		RET
  1046	equal:
  1047		MOVD    $1, R9
  1048		RET
  1049	
  1050	TEXT bytes·Equal(SB),NOSPLIT,$0-49
  1051		MOVD	a_len+8(FP), R4
  1052		MOVD	b_len+32(FP), R5
  1053		CMP	R5, R4		// unequal lengths are not equal
  1054		BNE	noteq
  1055		MOVD	a+0(FP), R3
  1056		MOVD	b+24(FP), R4
  1057		BL	runtime·memeqbody(SB)
  1058	
  1059		MOVBZ	R9,ret+48(FP)
  1060		RET
  1061	
  1062	noteq:
  1063		MOVBZ	$0,ret+48(FP)
  1064		RET
  1065	
  1066	equal:
  1067		MOVD	$1,R3
  1068		MOVBZ	R3,ret+48(FP)
  1069		RET
  1070	
  1071	TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
  1072		MOVD	s+0(FP), R3		// R3 = byte array pointer
  1073		MOVD	s_len+8(FP), R4		// R4 = length
  1074		MOVBZ	c+24(FP), R5		// R5 = byte
  1075		MOVD	$ret+32(FP), R14	// R14 = &ret
  1076		BR	runtime·indexbytebody<>(SB)
  1077	
  1078	TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
  1079		MOVD	s+0(FP), R3	  // R3 = string
  1080		MOVD	s_len+8(FP), R4	  // R4 = length
  1081		MOVBZ	c+16(FP), R5	  // R5 = byte
  1082		MOVD	$ret+24(FP), R14  // R14 = &ret
  1083		BR	runtime·indexbytebody<>(SB)
  1084	
  1085	TEXT runtime·indexbytebody<>(SB),NOSPLIT|NOFRAME,$0-0
  1086		DCBT	(R3)		// Prepare cache line.
  1087		MOVD	R3,R17		// Save base address for calculating the index later.
  1088		RLDICR	$0,R3,$60,R8	// Align address to doubleword boundary in R8.
  1089		RLDIMI	$8,R5,$48,R5	// Replicating the byte across the register.
  1090		ADD	R4,R3,R7	// Last acceptable address in R7.
  1091	
  1092		RLDIMI	$16,R5,$32,R5
  1093		CMPU	R4,$32		// Check if it's a small string (<32 bytes). Those will be processed differently.
  1094		MOVD	$-1,R9
  1095		WORD	$0x54661EB8	// Calculate padding in R6 (rlwinm r6,r3,3,26,28).
  1096		RLDIMI	$32,R5,$0,R5
  1097		MOVD	R7,R10		// Save last acceptable address in R10 for later.
  1098		ADD	$-1,R7,R7
  1099	#ifdef GOARCH_ppc64le
  1100		SLD	R6,R9,R9	// Prepare mask for Little Endian
  1101	#else
  1102		SRD	R6,R9,R9	// Same for Big Endian
  1103	#endif
  1104		BLE	small_string	// Jump to the small string case if it's <32 bytes.
  1105	
  1106		// If we are 64-byte aligned, branch to qw_align just to get the auxiliary values
  1107		// in V0, V1 and V10, then branch to the preloop.
  1108		ANDCC	$63,R3,R11
  1109		BEQ	CR0,qw_align
  1110		RLDICL	$0,R3,$61,R11
  1111	
  1112		MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1113		CMPB	R12,R5,R3	// Check for a match.
  1114		AND	R9,R3,R3	// Mask bytes below s_base
  1115		RLDICL	$0,R7,$61,R6	// length-1
  1116		RLDICR	$0,R7,$60,R7	// Last doubleword in R7
  1117		CMPU	R3,$0,CR7	// If we have a match, jump to the final computation
  1118		BNE	CR7,done
  1119		ADD	$8,R8,R8
  1120		ADD	$-8,R4,R4
  1121		ADD	R4,R11,R4
  1122	
  1123		// Check for quadword alignment
  1124		ANDCC	$15,R8,R11
  1125		BEQ	CR0,qw_align
  1126	
  1127		// Not aligned, so handle the next doubleword
  1128		MOVD	0(R8),R12
  1129		CMPB	R12,R5,R3
  1130		CMPU	R3,$0,CR7
  1131		BNE	CR7,done
  1132		ADD	$8,R8,R8
  1133		ADD	$-8,R4,R4
  1134	
  1135		// Either quadword aligned or 64-byte at this point. We can use LVX.
  1136	qw_align:
  1137	
  1138		// Set up auxiliary data for the vectorized algorithm.
  1139		VSPLTISB  $0,V0		// Replicate 0 across V0
  1140		VSPLTISB  $3,V10	// Use V10 as control for VBPERMQ
  1141		MTVRD	  R5,V1
  1142		LVSL	  (R0+R0),V11
  1143		VSLB	  V11,V10,V10
  1144		VSPLTB	  $7,V1,V1	// Replicate byte across V1
  1145		CMPU	  R4, $64	// If len <= 64, don't use the vectorized loop
  1146		BLE	  tail
  1147	
  1148		// We will load 4 quardwords per iteration in the loop, so check for
  1149		// 64-byte alignment. If 64-byte aligned, then branch to the preloop.
  1150		ANDCC	  $63,R8,R11
  1151		BEQ	  CR0,preloop
  1152	
  1153		// Not 64-byte aligned. Load one quadword at a time until aligned.
  1154		LVX	    (R8+R0),V4
  1155		VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1156		BNE	    CR6,found_qw_align
  1157		ADD	    $16,R8,R8
  1158		ADD	    $-16,R4,R4
  1159	
  1160		ANDCC	    $63,R8,R11
  1161		BEQ	    CR0,preloop
  1162		LVX	    (R8+R0),V4
  1163		VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1164		BNE	    CR6,found_qw_align
  1165		ADD	    $16,R8,R8
  1166		ADD	    $-16,R4,R4
  1167	
  1168		ANDCC	    $63,R8,R11
  1169		BEQ	    CR0,preloop
  1170		LVX	    (R8+R0),V4
  1171		VCMPEQUBCC  V1,V4,V6		// Check for byte in V4
  1172		BNE	    CR6,found_qw_align
  1173		ADD	    $-16,R4,R4
  1174		ADD	    $16,R8,R8
  1175	
  1176		// 64-byte aligned. Prepare for the main loop.
  1177	preloop:
  1178		CMPU	R4,$64
  1179		BLE	tail	      // If len <= 64, don't use the vectorized loop
  1180	
  1181		// We are now aligned to a 64-byte boundary. We will load 4 quadwords
  1182		// per loop iteration. The last doubleword is in R10, so our loop counter
  1183		// starts at (R10-R8)/64.
  1184		SUB	R8,R10,R6
  1185		SRD	$6,R6,R9      // Loop counter in R9
  1186		MOVD	R9,CTR
  1187	
  1188		MOVD	$16,R11      // Load offsets for the vector loads
  1189		MOVD	$32,R9
  1190		MOVD	$48,R7
  1191	
  1192		// Main loop we will load 64 bytes per iteration
  1193	loop:
  1194		LVX	    (R8+R0),V2	      // Load 4 16-byte vectors
  1195		LVX	    (R11+R8),V3
  1196		LVX	    (R9+R8),V4
  1197		LVX	    (R7+R8),V5
  1198		VCMPEQUB    V1,V2,V6	      // Look for byte in each vector
  1199		VCMPEQUB    V1,V3,V7
  1200		VCMPEQUB    V1,V4,V8
  1201		VCMPEQUB    V1,V5,V9
  1202		VOR	    V6,V7,V11	      // Compress the result in a single vector
  1203		VOR	    V8,V9,V12
  1204		VOR	    V11,V12,V11
  1205		VCMPEQUBCC  V0,V11,V11	      // Check for byte
  1206		BGE	    CR6,found
  1207		ADD	    $64,R8,R8
  1208		BC	    16,0,loop	      // bdnz loop
  1209	
  1210		// Handle the tailing bytes or R4 <= 64
  1211		RLDICL	$0,R6,$58,R4
  1212	tail:
  1213		CMPU	    R4,$0
  1214		BEQ	    notfound
  1215		LVX	    (R8+R0),V4
  1216		VCMPEQUBCC  V1,V4,V6
  1217		BNE	    CR6,found_qw_align
  1218		ADD	    $16,R8,R8
  1219		CMPU	    R4,$16,CR6
  1220		BLE	    CR6,notfound
  1221		ADD	    $-16,R4,R4
  1222	
  1223		LVX	    (R8+R0),V4
  1224		VCMPEQUBCC  V1,V4,V6
  1225		BNE	    CR6,found_qw_align
  1226		ADD	    $16,R8,R8
  1227		CMPU	    R4,$16,CR6
  1228		BLE	    CR6,notfound
  1229		ADD	    $-16,R4,R4
  1230	
  1231		LVX	    (R8+R0),V4
  1232		VCMPEQUBCC  V1,V4,V6
  1233		BNE	    CR6,found_qw_align
  1234		ADD	    $16,R8,R8
  1235		CMPU	    R4,$16,CR6
  1236		BLE	    CR6,notfound
  1237		ADD	    $-16,R4,R4
  1238	
  1239		LVX	    (R8+R0),V4
  1240		VCMPEQUBCC  V1,V4,V6
  1241		BNE	    CR6,found_qw_align
  1242	
  1243	notfound:
  1244		MOVD	$-1,R3
  1245		MOVD	R3,(R14)
  1246		RET
  1247	
  1248	found:
  1249		// We will now compress the results into a single doubleword,
  1250		// so it can be moved to a GPR for the final index calculation.
  1251	
  1252		// The bytes in V6-V9 are either 0x00 or 0xFF. So, permute the
  1253		// first bit of each byte into bits 48-63.
  1254		VBPERMQ	  V6,V10,V6
  1255		VBPERMQ	  V7,V10,V7
  1256		VBPERMQ	  V8,V10,V8
  1257		VBPERMQ	  V9,V10,V9
  1258	
  1259		// Shift each 16-bit component into its correct position for
  1260		// merging into a single doubleword.
  1261	#ifdef GOARCH_ppc64le
  1262		VSLDOI	  $2,V7,V7,V7
  1263		VSLDOI	  $4,V8,V8,V8
  1264		VSLDOI	  $6,V9,V9,V9
  1265	#else
  1266		VSLDOI	  $6,V6,V6,V6
  1267		VSLDOI	  $4,V7,V7,V7
  1268		VSLDOI	  $2,V8,V8,V8
  1269	#endif
  1270	
  1271		// Merge V6-V9 into a single doubleword and move to a GPR.
  1272		VOR	V6,V7,V11
  1273		VOR	V8,V9,V4
  1274		VOR	V4,V11,V4
  1275		MFVRD	V4,R3
  1276	
  1277	#ifdef GOARCH_ppc64le
  1278		ADD	  $-1,R3,R11
  1279		ANDN	  R3,R11,R11
  1280		POPCNTD	  R11,R11	// Count trailing zeros (Little Endian).
  1281	#else
  1282		CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1283	#endif
  1284		ADD	R8,R11,R3	// Calculate byte address
  1285	
  1286	return:
  1287		SUB	R17,R3
  1288		MOVD	R3,(R14)
  1289		RET
  1290	
  1291	found_qw_align:
  1292		// Use the same algorithm as above. Compress the result into
  1293		// a single doubleword and move it to a GPR for the final
  1294		// calculation.
  1295		VBPERMQ	  V6,V10,V6
  1296	
  1297	#ifdef GOARCH_ppc64le
  1298		MFVRD	  V6,R3
  1299		ADD	  $-1,R3,R11
  1300		ANDN	  R3,R11,R11
  1301		POPCNTD	  R11,R11
  1302	#else
  1303		VSLDOI	  $6,V6,V6,V6
  1304		MFVRD	  V6,R3
  1305		CNTLZD	  R3,R11
  1306	#endif
  1307		ADD	  R8,R11,R3
  1308		CMPU	  R11,R4
  1309		BLT	  return
  1310		BR	  notfound
  1311	
  1312	done:
  1313		// At this point, R3 has 0xFF in the same position as the byte we are
  1314		// looking for in the doubleword. Use that to calculate the exact index
  1315		// of the byte.
  1316	#ifdef GOARCH_ppc64le
  1317		ADD	$-1,R3,R11
  1318		ANDN	R3,R11,R11
  1319		POPCNTD	R11,R11		// Count trailing zeros (Little Endian).
  1320	#else
  1321		CNTLZD	R3,R11		// Count leading zeros (Big Endian).
  1322	#endif
  1323		CMPU	R8,R7		// Check if we are at the last doubleword.
  1324		SRD	$3,R11		// Convert trailing zeros to bytes.
  1325		ADD	R11,R8,R3
  1326		CMPU	R11,R6,CR7	// If at the last doubleword, check the byte offset.
  1327		BNE	return
  1328		BLE	CR7,return
  1329		BR	notfound
  1330	
  1331	small_string:
  1332		// We unroll this loop for better performance.
  1333		CMPU	R4,$0		// Check for length=0
  1334		BEQ	notfound
  1335	
  1336		MOVD	0(R8),R12	// Load one doubleword from the aligned address in R8.
  1337		CMPB	R12,R5,R3	// Check for a match.
  1338		AND	R9,R3,R3	// Mask bytes below s_base.
  1339		CMPU	R3,$0,CR7	// If we have a match, jump to the final computation.
  1340		RLDICL	$0,R7,$61,R6	// length-1
  1341		RLDICR	$0,R7,$60,R7	// Last doubleword in R7.
  1342		CMPU	R8,R7
  1343		BNE	CR7,done
  1344		BEQ	notfound	// Hit length.
  1345	
  1346		MOVDU	8(R8),R12
  1347		CMPB	R12,R5,R3
  1348		CMPU	R3,$0,CR6
  1349		CMPU	R8,R7
  1350		BNE	CR6,done
  1351		BEQ	notfound
  1352	
  1353		MOVDU	8(R8),R12
  1354		CMPB	R12,R5,R3
  1355		CMPU	R3,$0,CR6
  1356		CMPU	R8,R7
  1357		BNE	CR6,done
  1358		BEQ	notfound
  1359	
  1360		MOVDU	8(R8),R12
  1361		CMPB	R12,R5,R3
  1362		CMPU	R3,$0,CR6
  1363		CMPU	R8,R7
  1364		BNE	CR6,done
  1365		BEQ	notfound
  1366	
  1367		MOVDU	8(R8),R12
  1368		CMPB	R12,R5,R3
  1369		CMPU	R3,$0,CR6
  1370		BNE	CR6,done
  1371		BR	notfound
  1372	
  1373	TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1374		MOVD	s1_base+0(FP), R5
  1375		MOVD	s2_base+16(FP), R6
  1376		MOVD	s1_len+8(FP), R3
  1377		CMP	R5,R6,CR7
  1378		MOVD	s2_len+24(FP), R4
  1379		MOVD	$ret+32(FP), R7
  1380		CMP	R3,R4,CR6
  1381		BEQ	CR7,equal
  1382	
  1383	notequal:
  1384	#ifdef	GOARCH_ppc64le
  1385		BR	cmpbodyLE<>(SB)
  1386	#else
  1387		BR      cmpbodyBE<>(SB)
  1388	#endif
  1389	
  1390	equal:
  1391		BEQ	CR6,done
  1392		MOVD	$1, R8
  1393		BGT	CR6,greater
  1394		NEG	R8
  1395	
  1396	greater:
  1397		MOVD	R8, (R7)
  1398		RET
  1399	
  1400	done:
  1401		MOVD	$0, (R7)
  1402		RET
  1403	
  1404	TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1405		MOVD	s1+0(FP), R5
  1406		MOVD	s2+24(FP), R6
  1407		MOVD	s1+8(FP), R3
  1408		CMP	R5,R6,CR7
  1409		MOVD	s2+32(FP), R4
  1410		MOVD	$ret+48(FP), R7
  1411		CMP	R3,R4,CR6
  1412		BEQ	CR7,equal
  1413	
  1414	#ifdef	GOARCH_ppc64le
  1415		BR	cmpbodyLE<>(SB)
  1416	#else
  1417		BR      cmpbodyBE<>(SB)
  1418	#endif
  1419	
  1420	equal:
  1421		BEQ	CR6,done
  1422		MOVD	$1, R8
  1423		BGT	CR6,greater
  1424		NEG	R8
  1425	
  1426	greater:
  1427		MOVD	R8, (R7)
  1428		RET
  1429	
  1430	done:
  1431		MOVD	$0, (R7)
  1432		RET
  1433	
  1434	TEXT runtime·return0(SB), NOSPLIT, $0
  1435		MOVW	$0, R3
  1436		RET
  1437	
  1438	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1439	// Must obey the gcc calling convention.
  1440	TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
  1441		// g (R30) and R31 are callee-save in the C ABI, so save them
  1442		MOVD	g, R4
  1443		MOVD	R31, R5
  1444		MOVD	LR, R6
  1445	
  1446		BL	runtime·load_g(SB)	// clobbers g (R30), R31
  1447		MOVD	g_m(g), R3
  1448		MOVD	m_curg(R3), R3
  1449		MOVD	(g_stack+stack_hi)(R3), R3
  1450	
  1451		MOVD	R4, g
  1452		MOVD	R5, R31
  1453		MOVD	R6, LR
  1454		RET
  1455	
  1456	// The top-most function running on a goroutine
  1457	// returns to goexit+PCQuantum.
  1458	//
  1459	// When dynamically linking Go, it can be returned to from a function
  1460	// implemented in a different module and so needs to reload the TOC pointer
  1461	// from the stack (although this function declares that it does not set up x-a
  1462	// frame, newproc1 does in fact allocate one for goexit and saves the TOC
  1463	// pointer in the correct place).
  1464	// goexit+_PCQuantum is halfway through the usual global entry point prologue
  1465	// that derives r2 from r12 which is a bit silly, but not harmful.
  1466	TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
  1467		MOVD	24(R1), R2
  1468		BL	runtime·goexit1(SB)	// does not return
  1469		// traceback from goexit1 must hit code range of goexit
  1470		MOVD	R0, R0	// NOP
  1471	
  1472	TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
  1473		RET
  1474	
  1475	// prepGoExitFrame saves the current TOC pointer (i.e. the TOC pointer for the
  1476	// module containing runtime) to the frame that goexit will execute in when
  1477	// the goroutine exits. It's implemented in assembly mainly because that's the
  1478	// easiest way to get access to R2.
  1479	TEXT runtime·prepGoExitFrame(SB),NOSPLIT,$0-8
  1480	      MOVD    sp+0(FP), R3
  1481	      MOVD    R2, 24(R3)
  1482	      RET
  1483	
  1484	TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1485		ADD	$-8, R1
  1486		MOVD	R31, 0(R1)
  1487		MOVD	runtime·lastmoduledatap(SB), R4
  1488		MOVD	R3, moduledata_next(R4)
  1489		MOVD	R3, runtime·lastmoduledatap(SB)
  1490		MOVD	0(R1), R31
  1491		ADD	$8, R1
  1492		RET
  1493	
  1494	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1495		MOVW	$1, R3
  1496		MOVB	R3, ret+0(FP)
  1497		RET

View as plain text