...
Run Format

Text file src/runtime/asm_s390x.s

Documentation: runtime

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	// _rt0_s390x_lib is common startup code for s390x systems when
    11	// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    12	// arrange to invoke this function as a global constructor (for
    13	// c-archive) or when the shared library is loaded (for c-shared).
    14	// We expect argc and argv to be passed in the usual C ABI registers
    15	// R2 and R3.
    16	TEXT _rt0_s390x_lib(SB), NOSPLIT|NOFRAME, $0
    17		STMG	R6, R15, 48(R15)
    18		MOVD	R2, _rt0_s390x_lib_argc<>(SB)
    19		MOVD	R3, _rt0_s390x_lib_argv<>(SB)
    20	
    21		// Save R6-R15 in the register save area of the calling function.
    22		STMG	R6, R15, 48(R15)
    23	
    24		// Allocate 80 bytes on the stack.
    25		MOVD	$-80(R15), R15
    26	
    27		// Save F8-F15 in our stack frame.
    28		FMOVD	F8, 16(R15)
    29		FMOVD	F9, 24(R15)
    30		FMOVD	F10, 32(R15)
    31		FMOVD	F11, 40(R15)
    32		FMOVD	F12, 48(R15)
    33		FMOVD	F13, 56(R15)
    34		FMOVD	F14, 64(R15)
    35		FMOVD	F15, 72(R15)
    36	
    37		// Synchronous initialization.
    38		MOVD	$runtime·libpreinit(SB), R1
    39		BL	R1
    40	
    41		// Create a new thread to finish Go runtime initialization.
    42		MOVD	_cgo_sys_thread_create(SB), R1
    43		CMP	R1, $0
    44		BEQ	nocgo
    45		MOVD	$_rt0_s390x_lib_go(SB), R2
    46		MOVD	$0, R3
    47		BL	R1
    48		BR	restore
    49	
    50	nocgo:
    51		MOVD	$0x800000, R1              // stacksize
    52		MOVD	R1, 0(R15)
    53		MOVD	$_rt0_s390x_lib_go(SB), R1
    54		MOVD	R1, 8(R15)                 // fn
    55		MOVD	$runtime·newosproc(SB), R1
    56		BL	R1
    57	
    58	restore:
    59		// Restore F8-F15 from our stack frame.
    60		FMOVD	16(R15), F8
    61		FMOVD	24(R15), F9
    62		FMOVD	32(R15), F10
    63		FMOVD	40(R15), F11
    64		FMOVD	48(R15), F12
    65		FMOVD	56(R15), F13
    66		FMOVD	64(R15), F14
    67		FMOVD	72(R15), F15
    68		MOVD	$80(R15), R15
    69	
    70		// Restore R6-R15.
    71		LMG	48(R15), R6, R15
    72		RET
    73	
    74	// _rt0_s390x_lib_go initializes the Go runtime.
    75	// This is started in a separate thread by _rt0_s390x_lib.
    76	TEXT _rt0_s390x_lib_go(SB), NOSPLIT|NOFRAME, $0
    77		MOVD	_rt0_s390x_lib_argc<>(SB), R2
    78		MOVD	_rt0_s390x_lib_argv<>(SB), R3
    79		MOVD	$runtime·rt0_go(SB), R1
    80		BR	R1
    81	
    82	DATA _rt0_s390x_lib_argc<>(SB)/8, $0
    83	GLOBL _rt0_s390x_lib_argc<>(SB), NOPTR, $8
    84	DATA _rt0_s90x_lib_argv<>(SB)/8, $0
    85	GLOBL _rt0_s390x_lib_argv<>(SB), NOPTR, $8
    86	
    87	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88		// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    89		// C TLS base pointer in AR0:AR1
    90	
    91		// initialize essential registers
    92		XOR	R0, R0
    93	
    94		SUB	$24, R15
    95		MOVW	R2, 8(R15) // argc
    96		MOVD	R3, 16(R15) // argv
    97	
    98		// create istack out of the given (operating system) stack.
    99		// _cgo_init may update stackguard.
   100		MOVD	$runtime·g0(SB), g
   101		MOVD	R15, R11
   102		SUB	$(64*1024), R11
   103		MOVD	R11, g_stackguard0(g)
   104		MOVD	R11, g_stackguard1(g)
   105		MOVD	R11, (g_stack+stack_lo)(g)
   106		MOVD	R15, (g_stack+stack_hi)(g)
   107	
   108		// if there is a _cgo_init, call it using the gcc ABI.
   109		MOVD	_cgo_init(SB), R11
   110		CMPBEQ	R11, $0, nocgo
   111		MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
   112		SLD	$32, R4, R4
   113		MOVW	AR1, R4			// arg 2: TLS base pointer
   114		MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
   115		MOVD	g, R2			// arg 0: G
   116		// C functions expect 160 bytes of space on caller stack frame
   117		// and an 8-byte aligned stack pointer
   118		MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
   119		SUB	$160, R15		// reserve 160 bytes
   120		MOVD    $~7, R6
   121		AND 	R6, R15			// 8-byte align
   122		BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
   123		MOVD	R9, R15			// restore stack
   124		XOR	R0, R0			// zero R0
   125	
   126	nocgo:
   127		// update stackguard after _cgo_init
   128		MOVD	(g_stack+stack_lo)(g), R2
   129		ADD	$const__StackGuard, R2
   130		MOVD	R2, g_stackguard0(g)
   131		MOVD	R2, g_stackguard1(g)
   132	
   133		// set the per-goroutine and per-mach "registers"
   134		MOVD	$runtime·m0(SB), R2
   135	
   136		// save m->g0 = g0
   137		MOVD	g, m_g0(R2)
   138		// save m0 to g0->m
   139		MOVD	R2, g_m(g)
   140	
   141		BL	runtime·check(SB)
   142	
   143		// argc/argv are already prepared on stack
   144		BL	runtime·args(SB)
   145		BL	runtime·osinit(SB)
   146		BL	runtime·schedinit(SB)
   147	
   148		// create a new goroutine to start program
   149		MOVD	$runtime·mainPC(SB), R2		// entry
   150		SUB     $24, R15
   151		MOVD 	R2, 16(R15)
   152		MOVD 	$0, 8(R15)
   153		MOVD 	$0, 0(R15)
   154		BL	runtime·newproc(SB)
   155		ADD	$24, R15
   156	
   157		// start this M
   158		BL	runtime·mstart(SB)
   159	
   160		MOVD	$0, 1(R0)
   161		RET
   162	
   163	DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   164	GLOBL	runtime·mainPC(SB),RODATA,$8
   165	
   166	TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
   167		MOVD	$0, 2(R0)
   168		RET
   169	
   170	TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
   171		RET
   172	
   173	/*
   174	 *  go-routine
   175	 */
   176	
   177	// void gosave(Gobuf*)
   178	// save state in Gobuf; setjmp
   179	TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   180		MOVD	buf+0(FP), R3
   181		MOVD	R15, gobuf_sp(R3)
   182		MOVD	LR, gobuf_pc(R3)
   183		MOVD	g, gobuf_g(R3)
   184		MOVD	$0, gobuf_lr(R3)
   185		MOVD	$0, gobuf_ret(R3)
   186		// Assert ctxt is zero. See func save.
   187		MOVD	gobuf_ctxt(R3), R3
   188		CMPBEQ	R3, $0, 2(PC)
   189		BL	runtime·badctxt(SB)
   190		RET
   191	
   192	// void gogo(Gobuf*)
   193	// restore state from Gobuf; longjmp
   194	TEXT runtime·gogo(SB), NOSPLIT, $16-8
   195		MOVD	buf+0(FP), R5
   196		MOVD	gobuf_g(R5), g	// make sure g is not nil
   197		BL	runtime·save_g(SB)
   198	
   199		MOVD	0(g), R4
   200		MOVD	gobuf_sp(R5), R15
   201		MOVD	gobuf_lr(R5), LR
   202		MOVD	gobuf_ret(R5), R3
   203		MOVD	gobuf_ctxt(R5), R12
   204		MOVD	$0, gobuf_sp(R5)
   205		MOVD	$0, gobuf_ret(R5)
   206		MOVD	$0, gobuf_lr(R5)
   207		MOVD	$0, gobuf_ctxt(R5)
   208		CMP	R0, R0 // set condition codes for == test, needed by stack split
   209		MOVD	gobuf_pc(R5), R6
   210		BR	(R6)
   211	
   212	// void mcall(fn func(*g))
   213	// Switch to m->g0's stack, call fn(g).
   214	// Fn must never return.  It should gogo(&g->sched)
   215	// to keep running g.
   216	TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   217		// Save caller state in g->sched
   218		MOVD	R15, (g_sched+gobuf_sp)(g)
   219		MOVD	LR, (g_sched+gobuf_pc)(g)
   220		MOVD	$0, (g_sched+gobuf_lr)(g)
   221		MOVD	g, (g_sched+gobuf_g)(g)
   222	
   223		// Switch to m->g0 & its stack, call fn.
   224		MOVD	g, R3
   225		MOVD	g_m(g), R8
   226		MOVD	m_g0(R8), g
   227		BL	runtime·save_g(SB)
   228		CMP	g, R3
   229		BNE	2(PC)
   230		BR	runtime·badmcall(SB)
   231		MOVD	fn+0(FP), R12			// context
   232		MOVD	0(R12), R4			// code pointer
   233		MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   234		SUB	$16, R15
   235		MOVD	R3, 8(R15)
   236		MOVD	$0, 0(R15)
   237		BL	(R4)
   238		BR	runtime·badmcall2(SB)
   239	
   240	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   241	// of the G stack.  We need to distinguish the routine that
   242	// lives at the bottom of the G stack from the one that lives
   243	// at the top of the system stack because the one at the top of
   244	// the system stack terminates the stack walk (see topofstack()).
   245	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   246		UNDEF
   247		BL	(LR)	// make sure this function is not leaf
   248		RET
   249	
   250	// func systemstack(fn func())
   251	TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   252		MOVD	fn+0(FP), R3	// R3 = fn
   253		MOVD	R3, R12		// context
   254		MOVD	g_m(g), R4	// R4 = m
   255	
   256		MOVD	m_gsignal(R4), R5	// R5 = gsignal
   257		CMPBEQ	g, R5, noswitch
   258	
   259		MOVD	m_g0(R4), R5	// R5 = g0
   260		CMPBEQ	g, R5, noswitch
   261	
   262		MOVD	m_curg(R4), R6
   263		CMPBEQ	g, R6, switch
   264	
   265		// Bad: g is not gsignal, not g0, not curg. What is it?
   266		// Hide call from linker nosplit analysis.
   267		MOVD	$runtime·badsystemstack(SB), R3
   268		BL	(R3)
   269	
   270	switch:
   271		// save our state in g->sched.  Pretend to
   272		// be systemstack_switch if the G stack is scanned.
   273		MOVD	$runtime·systemstack_switch(SB), R6
   274		ADD	$16, R6	// get past prologue
   275		MOVD	R6, (g_sched+gobuf_pc)(g)
   276		MOVD	R15, (g_sched+gobuf_sp)(g)
   277		MOVD	$0, (g_sched+gobuf_lr)(g)
   278		MOVD	g, (g_sched+gobuf_g)(g)
   279	
   280		// switch to g0
   281		MOVD	R5, g
   282		BL	runtime·save_g(SB)
   283		MOVD	(g_sched+gobuf_sp)(g), R3
   284		// make it look like mstart called systemstack on g0, to stop traceback
   285		SUB	$8, R3
   286		MOVD	$runtime·mstart(SB), R4
   287		MOVD	R4, 0(R3)
   288		MOVD	R3, R15
   289	
   290		// call target function
   291		MOVD	0(R12), R3	// code pointer
   292		BL	(R3)
   293	
   294		// switch back to g
   295		MOVD	g_m(g), R3
   296		MOVD	m_curg(R3), g
   297		BL	runtime·save_g(SB)
   298		MOVD	(g_sched+gobuf_sp)(g), R15
   299		MOVD	$0, (g_sched+gobuf_sp)(g)
   300		RET
   301	
   302	noswitch:
   303		// already on m stack, just call directly
   304		// Using a tail call here cleans up tracebacks since we won't stop
   305		// at an intermediate systemstack.
   306		MOVD	0(R12), R3	// code pointer
   307		MOVD	0(R15), LR	// restore LR
   308		ADD	$8, R15
   309		BR	(R3)
   310	
   311	/*
   312	 * support for morestack
   313	 */
   314	
   315	// Called during function prolog when more stack is needed.
   316	// Caller has already loaded:
   317	// R3: framesize, R4: argsize, R5: LR
   318	//
   319	// The traceback routines see morestack on a g0 as being
   320	// the top of a stack (for example, morestack calling newstack
   321	// calling the scheduler calling newm calling gc), so we must
   322	// record an argument size. For that purpose, it has no arguments.
   323	TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   324		// Cannot grow scheduler stack (m->g0).
   325		MOVD	g_m(g), R7
   326		MOVD	m_g0(R7), R8
   327		CMPBNE	g, R8, 3(PC)
   328		BL	runtime·badmorestackg0(SB)
   329		BL	runtime·abort(SB)
   330	
   331		// Cannot grow signal stack (m->gsignal).
   332		MOVD	m_gsignal(R7), R8
   333		CMP	g, R8
   334		BNE	3(PC)
   335		BL	runtime·badmorestackgsignal(SB)
   336		BL	runtime·abort(SB)
   337	
   338		// Called from f.
   339		// Set g->sched to context in f.
   340		MOVD	R15, (g_sched+gobuf_sp)(g)
   341		MOVD	LR, R8
   342		MOVD	R8, (g_sched+gobuf_pc)(g)
   343		MOVD	R5, (g_sched+gobuf_lr)(g)
   344		MOVD	R12, (g_sched+gobuf_ctxt)(g)
   345	
   346		// Called from f.
   347		// Set m->morebuf to f's caller.
   348		MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   349		MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   350		MOVD	g, (m_morebuf+gobuf_g)(R7)
   351	
   352		// Call newstack on m->g0's stack.
   353		MOVD	m_g0(R7), g
   354		BL	runtime·save_g(SB)
   355		MOVD	(g_sched+gobuf_sp)(g), R15
   356		// Create a stack frame on g0 to call newstack.
   357		MOVD	$0, -8(R15)	// Zero saved LR in frame
   358		SUB	$8, R15
   359		BL	runtime·newstack(SB)
   360	
   361		// Not reached, but make sure the return PC from the call to newstack
   362		// is still in this function, and not the beginning of the next.
   363		UNDEF
   364	
   365	TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   366		MOVD	$0, R12
   367		BR	runtime·morestack(SB)
   368	
   369	// reflectcall: call a function with the given argument list
   370	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   371	// we don't have variable-sized frames, so we use a small number
   372	// of constant-sized-frame functions to encode a few bits of size in the pc.
   373	// Caution: ugly multiline assembly macros in your future!
   374	
   375	#define DISPATCH(NAME,MAXSIZE)		\
   376		MOVD	$MAXSIZE, R4;		\
   377		CMP	R3, R4;		\
   378		BGT	3(PC);			\
   379		MOVD	$NAME(SB), R5;	\
   380		BR	(R5)
   381	// Note: can't just "BR NAME(SB)" - bad inlining results.
   382	
   383	TEXT reflect·call(SB), NOSPLIT, $0-0
   384		BR	·reflectcall(SB)
   385	
   386	TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   387		MOVWZ argsize+24(FP), R3
   388		DISPATCH(runtime·call32, 32)
   389		DISPATCH(runtime·call64, 64)
   390		DISPATCH(runtime·call128, 128)
   391		DISPATCH(runtime·call256, 256)
   392		DISPATCH(runtime·call512, 512)
   393		DISPATCH(runtime·call1024, 1024)
   394		DISPATCH(runtime·call2048, 2048)
   395		DISPATCH(runtime·call4096, 4096)
   396		DISPATCH(runtime·call8192, 8192)
   397		DISPATCH(runtime·call16384, 16384)
   398		DISPATCH(runtime·call32768, 32768)
   399		DISPATCH(runtime·call65536, 65536)
   400		DISPATCH(runtime·call131072, 131072)
   401		DISPATCH(runtime·call262144, 262144)
   402		DISPATCH(runtime·call524288, 524288)
   403		DISPATCH(runtime·call1048576, 1048576)
   404		DISPATCH(runtime·call2097152, 2097152)
   405		DISPATCH(runtime·call4194304, 4194304)
   406		DISPATCH(runtime·call8388608, 8388608)
   407		DISPATCH(runtime·call16777216, 16777216)
   408		DISPATCH(runtime·call33554432, 33554432)
   409		DISPATCH(runtime·call67108864, 67108864)
   410		DISPATCH(runtime·call134217728, 134217728)
   411		DISPATCH(runtime·call268435456, 268435456)
   412		DISPATCH(runtime·call536870912, 536870912)
   413		DISPATCH(runtime·call1073741824, 1073741824)
   414		MOVD	$runtime·badreflectcall(SB), R5
   415		BR	(R5)
   416	
   417	#define CALLFN(NAME,MAXSIZE)			\
   418	TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   419		NO_LOCAL_POINTERS;			\
   420		/* copy arguments to stack */		\
   421		MOVD	arg+16(FP), R4;			\
   422		MOVWZ	argsize+24(FP), R5;		\
   423		MOVD	$stack-MAXSIZE(SP), R6;		\
   424	loopArgs: /* copy 256 bytes at a time */	\
   425		CMP	R5, $256;			\
   426		BLT	tailArgs;			\
   427		SUB	$256, R5;			\
   428		MVC	$256, 0(R4), 0(R6);		\
   429		MOVD	$256(R4), R4;			\
   430		MOVD	$256(R6), R6;			\
   431		BR	loopArgs;			\
   432	tailArgs: /* copy remaining bytes */		\
   433		CMP	R5, $0;				\
   434		BEQ	callFunction;			\
   435		SUB	$1, R5;				\
   436		EXRL	$callfnMVC<>(SB), R5;		\
   437	callFunction:					\
   438		MOVD	f+8(FP), R12;			\
   439		MOVD	(R12), R8;			\
   440		PCDATA  $PCDATA_StackMapIndex, $0;	\
   441		BL	(R8);				\
   442		/* copy return values back */		\
   443		MOVD	argtype+0(FP), R7;		\
   444		MOVD	arg+16(FP), R6;			\
   445		MOVWZ	n+24(FP), R5;			\
   446		MOVD	$stack-MAXSIZE(SP), R4;		\
   447		MOVWZ	retoffset+28(FP), R1;		\
   448		ADD	R1, R4;				\
   449		ADD	R1, R6;				\
   450		SUB	R1, R5;				\
   451		BL	callRet<>(SB);			\
   452		RET
   453	
   454	// callRet copies return values back at the end of call*. This is a
   455	// separate function so it can allocate stack space for the arguments
   456	// to reflectcallmove. It does not follow the Go ABI; it expects its
   457	// arguments in registers.
   458	TEXT callRet<>(SB), NOSPLIT, $32-0
   459		MOVD	R7, 8(R15)
   460		MOVD	R6, 16(R15)
   461		MOVD	R4, 24(R15)
   462		MOVD	R5, 32(R15)
   463		BL	runtime·reflectcallmove(SB)
   464		RET
   465	
   466	CALLFN(·call32, 32)
   467	CALLFN(·call64, 64)
   468	CALLFN(·call128, 128)
   469	CALLFN(·call256, 256)
   470	CALLFN(·call512, 512)
   471	CALLFN(·call1024, 1024)
   472	CALLFN(·call2048, 2048)
   473	CALLFN(·call4096, 4096)
   474	CALLFN(·call8192, 8192)
   475	CALLFN(·call16384, 16384)
   476	CALLFN(·call32768, 32768)
   477	CALLFN(·call65536, 65536)
   478	CALLFN(·call131072, 131072)
   479	CALLFN(·call262144, 262144)
   480	CALLFN(·call524288, 524288)
   481	CALLFN(·call1048576, 1048576)
   482	CALLFN(·call2097152, 2097152)
   483	CALLFN(·call4194304, 4194304)
   484	CALLFN(·call8388608, 8388608)
   485	CALLFN(·call16777216, 16777216)
   486	CALLFN(·call33554432, 33554432)
   487	CALLFN(·call67108864, 67108864)
   488	CALLFN(·call134217728, 134217728)
   489	CALLFN(·call268435456, 268435456)
   490	CALLFN(·call536870912, 536870912)
   491	CALLFN(·call1073741824, 1073741824)
   492	
   493	// Not a function: target for EXRL (execute relative long) instruction.
   494	TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   495		MVC	$1, 0(R4), 0(R6)
   496	
   497	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   498		RET
   499	
   500	// void jmpdefer(fv, sp);
   501	// called from deferreturn.
   502	// 1. grab stored LR for caller
   503	// 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   504	// 3. BR to fn
   505	TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   506		MOVD	0(R15), R1
   507		SUB	$6, R1, LR
   508	
   509		MOVD	fv+0(FP), R12
   510		MOVD	argp+8(FP), R15
   511		SUB	$8, R15
   512		MOVD	0(R12), R3
   513		BR	(R3)
   514	
   515	// Save state of caller into g->sched. Smashes R1.
   516	TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   517		MOVD	LR, (g_sched+gobuf_pc)(g)
   518		MOVD	R15, (g_sched+gobuf_sp)(g)
   519		MOVD	$0, (g_sched+gobuf_lr)(g)
   520		MOVD	$0, (g_sched+gobuf_ret)(g)
   521		// Assert ctxt is zero. See func save.
   522		MOVD	(g_sched+gobuf_ctxt)(g), R1
   523		CMPBEQ	R1, $0, 2(PC)
   524		BL	runtime·badctxt(SB)
   525		RET
   526	
   527	// func asmcgocall(fn, arg unsafe.Pointer) int32
   528	// Call fn(arg) on the scheduler stack,
   529	// aligned appropriately for the gcc ABI.
   530	// See cgocall.go for more details.
   531	TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   532		// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   533		// C TLS base pointer in AR0:AR1
   534		MOVD	fn+0(FP), R3
   535		MOVD	arg+8(FP), R4
   536	
   537		MOVD	R15, R2		// save original stack pointer
   538		MOVD	g, R5
   539	
   540		// Figure out if we need to switch to m->g0 stack.
   541		// We get called to create new OS threads too, and those
   542		// come in on the m->g0 stack already.
   543		MOVD	g_m(g), R6
   544		MOVD	m_g0(R6), R6
   545		CMPBEQ	R6, g, g0
   546		BL	gosave<>(SB)
   547		MOVD	R6, g
   548		BL	runtime·save_g(SB)
   549		MOVD	(g_sched+gobuf_sp)(g), R15
   550	
   551		// Now on a scheduling stack (a pthread-created stack).
   552	g0:
   553		// Save room for two of our pointers, plus 160 bytes of callee
   554		// save area that lives on the caller stack.
   555		SUB	$176, R15
   556		MOVD	$~7, R6
   557		AND	R6, R15                 // 8-byte alignment for gcc ABI
   558		MOVD	R5, 168(R15)             // save old g on stack
   559		MOVD	(g_stack+stack_hi)(R5), R5
   560		SUB	R2, R5
   561		MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   562		MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   563		MOVD	R4, R2                  // arg in R2
   564		BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   565	
   566		XOR	R0, R0                  // set R0 back to 0.
   567		// Restore g, stack pointer.
   568		MOVD	168(R15), g
   569		BL	runtime·save_g(SB)
   570		MOVD	(g_stack+stack_hi)(g), R5
   571		MOVD	160(R15), R6
   572		SUB	R6, R5
   573		MOVD	R5, R15
   574	
   575		MOVW	R2, ret+16(FP)
   576		RET
   577	
   578	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   579	// Turn the fn into a Go func (by taking its address) and call
   580	// cgocallback_gofunc.
   581	TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   582		MOVD	$fn+0(FP), R3
   583		MOVD	R3, 8(R15)
   584		MOVD	frame+8(FP), R3
   585		MOVD	R3, 16(R15)
   586		MOVD	framesize+16(FP), R3
   587		MOVD	R3, 24(R15)
   588		MOVD	ctxt+24(FP), R3
   589		MOVD	R3, 32(R15)
   590		MOVD	$runtime·cgocallback_gofunc(SB), R3
   591		BL	(R3)
   592		RET
   593	
   594	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   595	// See cgocall.go for more details.
   596	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   597		NO_LOCAL_POINTERS
   598	
   599		// Load m and g from thread-local storage.
   600		MOVB	runtime·iscgo(SB), R3
   601		CMPBEQ	R3, $0, nocgo
   602		BL	runtime·load_g(SB)
   603	
   604	nocgo:
   605		// If g is nil, Go did not create the current thread.
   606		// Call needm to obtain one for temporary use.
   607		// In this case, we're running on the thread stack, so there's
   608		// lots of space, but the linker doesn't know. Hide the call from
   609		// the linker analysis by using an indirect call.
   610		CMPBEQ	g, $0, needm
   611	
   612		MOVD	g_m(g), R8
   613		MOVD	R8, savedm-8(SP)
   614		BR	havem
   615	
   616	needm:
   617		MOVD	g, savedm-8(SP) // g is zero, so is m.
   618		MOVD	$runtime·needm(SB), R3
   619		BL	(R3)
   620	
   621		// Set m->sched.sp = SP, so that if a panic happens
   622		// during the function we are about to execute, it will
   623		// have a valid SP to run on the g0 stack.
   624		// The next few lines (after the havem label)
   625		// will save this SP onto the stack and then write
   626		// the same SP back to m->sched.sp. That seems redundant,
   627		// but if an unrecovered panic happens, unwindm will
   628		// restore the g->sched.sp from the stack location
   629		// and then systemstack will try to use it. If we don't set it here,
   630		// that restored SP will be uninitialized (typically 0) and
   631		// will not be usable.
   632		MOVD	g_m(g), R8
   633		MOVD	m_g0(R8), R3
   634		MOVD	R15, (g_sched+gobuf_sp)(R3)
   635	
   636	havem:
   637		// Now there's a valid m, and we're running on its m->g0.
   638		// Save current m->g0->sched.sp on stack and then set it to SP.
   639		// Save current sp in m->g0->sched.sp in preparation for
   640		// switch back to m->curg stack.
   641		// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   642		MOVD	m_g0(R8), R3
   643		MOVD	(g_sched+gobuf_sp)(R3), R4
   644		MOVD	R4, savedsp-16(SP)
   645		MOVD	R15, (g_sched+gobuf_sp)(R3)
   646	
   647		// Switch to m->curg stack and call runtime.cgocallbackg.
   648		// Because we are taking over the execution of m->curg
   649		// but *not* resuming what had been running, we need to
   650		// save that information (m->curg->sched) so we can restore it.
   651		// We can restore m->curg->sched.sp easily, because calling
   652		// runtime.cgocallbackg leaves SP unchanged upon return.
   653		// To save m->curg->sched.pc, we push it onto the stack.
   654		// This has the added benefit that it looks to the traceback
   655		// routine like cgocallbackg is going to return to that
   656		// PC (because the frame we allocate below has the same
   657		// size as cgocallback_gofunc's frame declared above)
   658		// so that the traceback will seamlessly trace back into
   659		// the earlier calls.
   660		//
   661		// In the new goroutine, -8(SP) is unused (where SP refers to
   662		// m->curg's SP while we're setting it up, before we've adjusted it).
   663		MOVD	m_curg(R8), g
   664		BL	runtime·save_g(SB)
   665		MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   666		MOVD	(g_sched+gobuf_pc)(g), R5
   667		MOVD	R5, -24(R4)
   668		MOVD	ctxt+24(FP), R5
   669		MOVD	R5, -16(R4)
   670		MOVD	$-24(R4), R15
   671		BL	runtime·cgocallbackg(SB)
   672	
   673		// Restore g->sched (== m->curg->sched) from saved values.
   674		MOVD	0(R15), R5
   675		MOVD	R5, (g_sched+gobuf_pc)(g)
   676		MOVD	$24(R15), R4
   677		MOVD	R4, (g_sched+gobuf_sp)(g)
   678	
   679		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   680		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   681		// so we do not have to restore it.)
   682		MOVD	g_m(g), R8
   683		MOVD	m_g0(R8), g
   684		BL	runtime·save_g(SB)
   685		MOVD	(g_sched+gobuf_sp)(g), R15
   686		MOVD	savedsp-16(SP), R4
   687		MOVD	R4, (g_sched+gobuf_sp)(g)
   688	
   689		// If the m on entry was nil, we called needm above to borrow an m
   690		// for the duration of the call. Since the call is over, return it with dropm.
   691		MOVD	savedm-8(SP), R6
   692		CMPBNE	R6, $0, droppedm
   693		MOVD	$runtime·dropm(SB), R3
   694		BL	(R3)
   695	droppedm:
   696	
   697		// Done!
   698		RET
   699	
   700	// void setg(G*); set g. for use by needm.
   701	TEXT runtime·setg(SB), NOSPLIT, $0-8
   702		MOVD	gg+0(FP), g
   703		// This only happens if iscgo, so jump straight to save_g
   704		BL	runtime·save_g(SB)
   705		RET
   706	
   707	// void setg_gcc(G*); set g in C TLS.
   708	// Must obey the gcc calling convention.
   709	TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   710		// The standard prologue clobbers LR (R14), which is callee-save in
   711		// the C ABI, so we have to use NOFRAME and save LR ourselves.
   712		MOVD	LR, R1
   713		// Also save g, R10, and R11 since they're callee-save in C ABI
   714		MOVD	R10, R3
   715		MOVD	g, R4
   716		MOVD	R11, R5
   717	
   718		MOVD	R2, g
   719		BL	runtime·save_g(SB)
   720	
   721		MOVD	R5, R11
   722		MOVD	R4, g
   723		MOVD	R3, R10
   724		MOVD	R1, LR
   725		RET
   726	
   727	TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
   728		MOVD	0(R15), R3		// LR saved by caller
   729		MOVD	R3, ret+0(FP)
   730		RET
   731	
   732	TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   733		MOVW	(R0), R0
   734		UNDEF
   735	
   736	// int64 runtime·cputicks(void)
   737	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   738		// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   739		// This means that since about 1972 the msb has been set, making the
   740		// result of a call to STORE CLOCK (stck) a negative number.
   741		// We clear the msb to make it positive.
   742		STCK	ret+0(FP)      // serialises before and after call
   743		MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   744		SLD	$1, R3
   745		SRD	$1, R3
   746		MOVD	R3, ret+0(FP)
   747		RET
   748	
   749	// AES hashing not implemented for s390x
   750	TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   751		MOVW	(R0), R15
   752	TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   753		MOVW	(R0), R15
   754	TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   755		MOVW	(R0), R15
   756	TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   757		MOVW	(R0), R15
   758	
   759	// memequal(a, b unsafe.Pointer, size uintptr) bool
   760	TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   761		MOVD	a+0(FP), R3
   762		MOVD	b+8(FP), R5
   763		MOVD	size+16(FP), R6
   764		LA	ret+24(FP), R7
   765		BR	runtime·memeqbody(SB)
   766	
   767	// memequal_varlen(a, b unsafe.Pointer) bool
   768	TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   769		MOVD	a+0(FP), R3
   770		MOVD	b+8(FP), R5
   771		MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   772		LA	ret+16(FP), R7
   773		BR	runtime·memeqbody(SB)
   774	
   775	TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   776		MOVD	a_len+8(FP), R2
   777		MOVD	b_len+32(FP), R6
   778		MOVD	a+0(FP), R3
   779		MOVD	b+24(FP), R5
   780		LA	ret+48(FP), R7
   781		CMPBNE	R2, R6, notequal
   782		BR	runtime·memeqbody(SB)
   783	notequal:
   784		MOVB	$0, ret+48(FP)
   785		RET
   786	
   787	// input:
   788	//   R3 = a
   789	//   R5 = b
   790	//   R6 = len
   791	//   R7 = address of output byte (stores 0 or 1 here)
   792	//   a and b have the same length
   793	TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   794		CMPBEQ	R3, R5, equal
   795	loop:
   796		CMPBEQ	R6, $0, equal
   797		CMPBLT	R6, $32, tiny
   798		CMP	R6, $256
   799		BLT	tail
   800		CLC	$256, 0(R3), 0(R5)
   801		BNE	notequal
   802		SUB	$256, R6
   803		LA	256(R3), R3
   804		LA	256(R5), R5
   805		BR	loop
   806	tail:
   807		SUB	$1, R6, R8
   808		EXRL	$runtime·memeqbodyclc(SB), R8
   809		BEQ	equal
   810	notequal:
   811		MOVB	$0, 0(R7)
   812		RET
   813	equal:
   814		MOVB	$1, 0(R7)
   815		RET
   816	tiny:
   817		MOVD	$0, R2
   818		CMPBLT	R6, $16, lt16
   819		MOVD	0(R3), R8
   820		MOVD	0(R5), R9
   821		CMPBNE	R8, R9, notequal
   822		MOVD	8(R3), R8
   823		MOVD	8(R5), R9
   824		CMPBNE	R8, R9, notequal
   825		LA	16(R2), R2
   826		SUB	$16, R6
   827	lt16:
   828		CMPBLT	R6, $8, lt8
   829		MOVD	0(R3)(R2*1), R8
   830		MOVD	0(R5)(R2*1), R9
   831		CMPBNE	R8, R9, notequal
   832		LA	8(R2), R2
   833		SUB	$8, R6
   834	lt8:
   835		CMPBLT	R6, $4, lt4
   836		MOVWZ	0(R3)(R2*1), R8
   837		MOVWZ	0(R5)(R2*1), R9
   838		CMPBNE	R8, R9, notequal
   839		LA	4(R2), R2
   840		SUB	$4, R6
   841	lt4:
   842	#define CHECK(n) \
   843		CMPBEQ	R6, $n, equal \
   844		MOVB	n(R3)(R2*1), R8 \
   845		MOVB	n(R5)(R2*1), R9 \
   846		CMPBNE	R8, R9, notequal
   847		CHECK(0)
   848		CHECK(1)
   849		CHECK(2)
   850		CHECK(3)
   851		BR	equal
   852	
   853	TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   854		CLC	$1, 0(R3), 0(R5)
   855		RET
   856	
   857	TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   858		MOVD	s+0(FP), R3     // s => R3
   859		MOVD	s_len+8(FP), R4 // s_len => R4
   860		MOVBZ	c+24(FP), R5    // c => R5
   861		MOVD	$ret+32(FP), R2 // &ret => R9
   862		BR	runtime·indexbytebody(SB)
   863	
   864	TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   865		MOVD	s+0(FP), R3     // s => R3
   866		MOVD	s_len+8(FP), R4 // s_len => R4
   867		MOVBZ	c+16(FP), R5    // c => R5
   868		MOVD	$ret+24(FP), R2 // &ret => R9
   869		BR	runtime·indexbytebody(SB)
   870	
   871	// input:
   872	// R3: s
   873	// R4: s_len
   874	// R5: c -- byte sought
   875	// R2: &ret -- address to put index into
   876	TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   877		CMPBEQ	R4, $0, notfound
   878		MOVD	R3, R6          // store base for later
   879		ADD	R3, R4, R8      // the address after the end of the string
   880		//if the length is small, use loop; otherwise, use vector or srst search
   881		CMPBGE	R4, $16, large
   882	
   883	residual:
   884		CMPBEQ	R3, R8, notfound
   885		MOVBZ	0(R3), R7
   886		LA	1(R3), R3
   887		CMPBNE	R7, R5, residual
   888	
   889	found:
   890		SUB	R6, R3
   891		SUB	$1, R3
   892		MOVD	R3, 0(R2)
   893		RET
   894	
   895	notfound:
   896		MOVD	$-1, 0(R2)
   897		RET
   898	
   899	large:
   900		MOVBZ	·cpu+facilities_hasVX(SB), R1
   901		CMPBNE	R1, $0, vectorimpl
   902	
   903	srstimpl:                       // no vector facility
   904		MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   905	srstloop:
   906		WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   907		BVS	srstloop        // interrupted - continue
   908		BGT	notfoundr0
   909	foundr0:
   910		XOR	R0, R0          // reset R0
   911		SUB	R6, R8          // remove base
   912		MOVD	R8, 0(R2)
   913		RET
   914	notfoundr0:
   915		XOR	R0, R0          // reset R0
   916		MOVD	$-1, 0(R2)
   917		RET
   918	
   919	vectorimpl:
   920		//if the address is not 16byte aligned, use loop for the header
   921		MOVD	R3, R8
   922		AND	$15, R8
   923		CMPBGT	R8, $0, notaligned
   924	
   925	aligned:
   926		ADD	R6, R4, R8
   927		MOVD	R8, R7
   928		AND	$-16, R7
   929		// replicate c across V17
   930		VLVGB	$0, R5, V19
   931		VREPB	$0, V19, V17
   932	
   933	vectorloop:
   934		CMPBGE	R3, R7, residual
   935		VL	0(R3), V16    // load string to be searched into V16
   936		ADD	$16, R3
   937		VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   938		BVS	vectorloop
   939	
   940		// when vector search found c in the string
   941		VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   942		SUB	$16, R3
   943		SUB	R6, R3
   944		ADD	R3, R7
   945		MOVD	R7, 0(R2)
   946		RET
   947	
   948	notaligned:
   949		MOVD	R3, R8
   950		AND	$-16, R8
   951		ADD     $16, R8
   952	notalignedloop:
   953		CMPBEQ	R3, R8, aligned
   954		MOVBZ	0(R3), R7
   955		LA	1(R3), R3
   956		CMPBNE	R7, R5, notalignedloop
   957		BR	found
   958	
   959	TEXT runtime·return0(SB), NOSPLIT, $0
   960		MOVW	$0, R3
   961		RET
   962	
   963	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   964	// Must obey the gcc calling convention.
   965	TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   966		// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   967		MOVD	g, R1
   968		MOVD	R10, R3
   969		MOVD	LR, R4
   970		MOVD	R11, R5
   971	
   972		BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   973		MOVD	g_m(g), R2
   974		MOVD	m_curg(R2), R2
   975		MOVD	(g_stack+stack_hi)(R2), R2
   976	
   977		MOVD	R1, g
   978		MOVD	R3, R10
   979		MOVD	R4, LR
   980		MOVD	R5, R11
   981		RET
   982	
   983	// The top-most function running on a goroutine
   984	// returns to goexit+PCQuantum.
   985	TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   986		BYTE $0x07; BYTE $0x00; // 2-byte nop
   987		BL	runtime·goexit1(SB)	// does not return
   988		// traceback from goexit1 must hit code range of goexit
   989		BYTE $0x07; BYTE $0x00; // 2-byte nop
   990	
   991	TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
   992		RET
   993	
   994	TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   995	        // Stores are already ordered on s390x, so this is just a
   996	        // compile barrier.
   997		RET
   998	
   999	TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1000		MOVD	s1_base+0(FP), R3
  1001		MOVD	s1_len+8(FP), R4
  1002		MOVD	s2_base+16(FP), R5
  1003		MOVD	s2_len+24(FP), R6
  1004		LA	ret+32(FP), R7
  1005		BR	runtime·cmpbody(SB)
  1006	
  1007	TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1008		MOVD	s1+0(FP), R3
  1009		MOVD	s1+8(FP), R4
  1010		MOVD	s2+24(FP), R5
  1011		MOVD	s2+32(FP), R6
  1012		LA	res+48(FP), R7
  1013		BR	runtime·cmpbody(SB)
  1014	
  1015	// input:
  1016	//   R3 = a
  1017	//   R4 = alen
  1018	//   R5 = b
  1019	//   R6 = blen
  1020	//   R7 = address of output word (stores -1/0/1 here)
  1021	TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
  1022		CMPBEQ	R3, R5, cmplengths
  1023		MOVD	R4, R8
  1024		CMPBLE	R4, R6, amin
  1025		MOVD	R6, R8
  1026	amin:
  1027		CMPBEQ	R8, $0, cmplengths
  1028		CMP	R8, $256
  1029		BLE	tail
  1030	loop:
  1031		CLC	$256, 0(R3), 0(R5)
  1032		BGT	gt
  1033		BLT	lt
  1034		SUB	$256, R8
  1035		CMP	R8, $256
  1036		BGT	loop
  1037	tail:
  1038		SUB	$1, R8
  1039		EXRL	$runtime·cmpbodyclc(SB), R8
  1040		BGT	gt
  1041		BLT	lt
  1042	cmplengths:
  1043		CMP	R4, R6
  1044		BEQ	eq
  1045		BLT	lt
  1046	gt:
  1047		MOVD	$1, 0(R7)
  1048		RET
  1049	lt:
  1050		MOVD	$-1, 0(R7)
  1051		RET
  1052	eq:
  1053		MOVD	$0, 0(R7)
  1054		RET
  1055	
  1056	TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
  1057		CLC	$1, 0(R3), 0(R5)
  1058		RET
  1059	
  1060	// func supportsVX() bool
  1061	TEXT strings·supportsVX(SB),NOSPLIT,$0-1
  1062		MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1063		MOVB	R0, ret+0(FP)
  1064		RET
  1065	
  1066	// func supportsVX() bool
  1067	TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
  1068		MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1069		MOVB	R0, ret+0(FP)
  1070		RET
  1071	
  1072	// func indexShortStr(s, sep string) int
  1073	// Caller must confirm availability of vx facility before calling.
  1074	TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
  1075		LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
  1076		LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1077		MOVD	$ret+32(FP), R5
  1078		BR	runtime·indexShortStr(SB)
  1079	
  1080	// func indexShortStr(s, sep []byte) int
  1081	// Caller must confirm availability of vx facility before calling.
  1082	TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1083		LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1084		LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1085		MOVD	$ret+48(FP), R5
  1086		BR	runtime·indexShortStr(SB)
  1087	
  1088	// s: string we are searching
  1089	// sep: string to search for
  1090	// R1=&s[0], R2=len(s)
  1091	// R3=&sep[0], R4=len(sep)
  1092	// R5=&ret (int)
  1093	// Caller must confirm availability of vx facility before calling.
  1094	TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1095		CMPBGT	R4, R2, notfound
  1096		ADD	R1, R2
  1097		SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1098		CMPBEQ	R4, $0, notfound
  1099		SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1100		VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1101		MOVD	R1, R7
  1102	index2plus:
  1103		CMPBNE	R4, $1, index3plus
  1104		MOVD	$15(R7), R9
  1105		CMPBGE	R9, R2, index2to16
  1106		VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1107		VONE	V16
  1108		VREPH	$0, V0, V1
  1109		CMPBGE	R9, R2, index2to16
  1110	index2loop:
  1111		VL	0(R7), V2          // 16 bytes, even indices
  1112		VL	1(R7), V4          // 16 bytes, odd indices
  1113		VCEQH	V1, V2, V5         // compare even indices
  1114		VCEQH	V1, V4, V6         // compare odd indices
  1115		VSEL	V5, V6, V31, V7    // merge even and odd indices
  1116		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1117		BLT	foundV17
  1118		MOVD	$16(R7), R7        // R7+=16
  1119		ADD	$15, R7, R9
  1120		CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1121		CMPBLE	R7, R2, index2to16
  1122		BR	notfound
  1123	
  1124	index3plus:
  1125		CMPBNE	R4, $2, index4plus
  1126		ADD	$15, R7, R9
  1127		CMPBGE	R9, R2, index2to16
  1128		MOVD	$1, R0
  1129		VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1130		VONE	V16
  1131		VREPH	$0, V0, V1
  1132		VREPB	$2, V0, V8
  1133	index3loop:
  1134		VL	(R7), V2           // load 16-bytes into V2
  1135		VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1136		VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1137		VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1138		VCEQH	V1, V2, V5         // compare 2-byte even indices
  1139		VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1140		VCEQB	V8, V9, V10        // compare last bytes
  1141		VSEL	V5, V6, V31, V7    // merge even and odd indices
  1142		VN	V7, V10, V7        // AND indices with last byte
  1143		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1144		BLT	foundV17
  1145		MOVD	$16(R7), R7        // R7+=16
  1146		ADD	$15, R7, R9
  1147		CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1148		CMPBLE	R7, R2, index2to16
  1149		BR	notfound
  1150	
  1151	index4plus:
  1152		CMPBNE	R4, $3, index5plus
  1153		ADD	$15, R7, R9
  1154		CMPBGE	R9, R2, index2to16
  1155		MOVD	$2, R0
  1156		VGBM	$0x8888, V29       // 0xff000000ff000000...
  1157		VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1158		VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1159		VONE	V16
  1160		VREPF	$0, V0, V1
  1161	index4loop:
  1162		VL	(R7), V2           // load 16-bytes into V2
  1163		VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1164		VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1165		VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1166		VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1167		VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1168		VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1169		VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1170		VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1171		VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1172		VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1173		VSEL	V13, V14, V31, V7  // final merge
  1174		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1175		BLT	foundV17
  1176		MOVD	$16(R7), R7        // R7+=16
  1177		ADD	$15, R7, R9
  1178		CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1179		CMPBLE	R7, R2, index2to16
  1180		BR	notfound
  1181	
  1182	index5plus:
  1183		CMPBGT	R4, $15, index17plus
  1184	index2to16:
  1185		CMPBGT	R7, R2, notfound
  1186		MOVD	$1(R7), R8
  1187		CMPBGT	R8, R2, index2to16tail
  1188	index2to16loop:
  1189		// unrolled 2x
  1190		VLL	R4, (R7), V1
  1191		VLL	R4, 1(R7), V2
  1192		VCEQGS	V0, V1, V3
  1193		BEQ	found
  1194		MOVD	$1(R7), R7
  1195		VCEQGS	V0, V2, V4
  1196		BEQ	found
  1197		MOVD	$1(R7), R7
  1198		CMPBLT	R7, R2, index2to16loop
  1199		CMPBGT	R7, R2, notfound
  1200	index2to16tail:
  1201		VLL	R4, (R7), V1
  1202		VCEQGS	V0, V1, V2
  1203		BEQ	found
  1204		BR	notfound
  1205	
  1206	index17plus:
  1207		CMPBGT	R4, $31, index33plus
  1208		SUB	$16, R4, R0
  1209		VLL	R0, 16(R3), V1
  1210		VONE	V7
  1211	index17to32loop:
  1212		VL	(R7), V2
  1213		VLL	R0, 16(R7), V3
  1214		VCEQG	V0, V2, V4
  1215		VCEQG	V1, V3, V5
  1216		VN	V4, V5, V6
  1217		VCEQGS	V6, V7, V8
  1218		BEQ	found
  1219		MOVD	$1(R7), R7
  1220		CMPBLE  R7, R2, index17to32loop
  1221		BR	notfound
  1222	
  1223	index33plus:
  1224		CMPBGT	R4, $47, index49plus
  1225		SUB	$32, R4, R0
  1226		VL	16(R3), V1
  1227		VLL	R0, 32(R3), V2
  1228		VONE	V11
  1229	index33to48loop:
  1230		VL	(R7), V3
  1231		VL	16(R7), V4
  1232		VLL	R0, 32(R7), V5
  1233		VCEQG	V0, V3, V6
  1234		VCEQG	V1, V4, V7
  1235		VCEQG	V2, V5, V8
  1236		VN	V6, V7, V9
  1237		VN	V8, V9, V10
  1238		VCEQGS	V10, V11, V12
  1239		BEQ	found
  1240		MOVD	$1(R7), R7
  1241		CMPBLE  R7, R2, index33to48loop
  1242		BR	notfound
  1243	
  1244	index49plus:
  1245		CMPBGT	R4, $63, index65plus
  1246		SUB	$48, R4, R0
  1247		VL	16(R3), V1
  1248		VL	32(R3), V2
  1249		VLL	R0, 48(R3), V3
  1250		VONE	V15
  1251	index49to64loop:
  1252		VL	(R7), V4
  1253		VL	16(R7), V5
  1254		VL	32(R7), V6
  1255		VLL	R0, 48(R7), V7
  1256		VCEQG	V0, V4, V8
  1257		VCEQG	V1, V5, V9
  1258		VCEQG	V2, V6, V10
  1259		VCEQG	V3, V7, V11
  1260		VN	V8, V9, V12
  1261		VN	V10, V11, V13
  1262		VN	V12, V13, V14
  1263		VCEQGS	V14, V15, V16
  1264		BEQ	found
  1265		MOVD	$1(R7), R7
  1266		CMPBLE  R7, R2, index49to64loop
  1267	notfound:
  1268		MOVD	$-1, (R5)
  1269		RET
  1270	
  1271	index65plus:
  1272		// not implemented
  1273		MOVD	$0, (R0)
  1274		RET
  1275	
  1276	foundV17: // index is in doubleword V17[0]
  1277		VLGVG	$0, V17, R8
  1278		ADD	R8, R7
  1279	found:
  1280		SUB	R1, R7
  1281		MOVD	R7, (R5)
  1282		RET
  1283	
  1284	// This is called from .init_array and follows the platform, not Go, ABI.
  1285	// We are overly conservative. We could only save the registers we use.
  1286	// However, since this function is only called once per loaded module
  1287	// performance is unimportant.
  1288	TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1289		// Save R6-R15 in the register save area of the calling function.
  1290		// Don't bother saving F8-F15 as we aren't doing any calls.
  1291		STMG	R6, R15, 48(R15)
  1292	
  1293		// append the argument (passed in R2, as per the ELF ABI) to the
  1294		// moduledata linked list.
  1295		MOVD	runtime·lastmoduledatap(SB), R1
  1296		MOVD	R2, moduledata_next(R1)
  1297		MOVD	R2, runtime·lastmoduledatap(SB)
  1298	
  1299		// Restore R6-R15.
  1300		LMG	48(R15), R6, R15
  1301		RET
  1302	
  1303	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1304		MOVB	$1, ret+0(FP)
  1305		RET

View as plain text