...
Run Format

Text file src/runtime/asm_s390x.s

Documentation: runtime

     1	// Copyright 2016 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	// _rt0_s390x_lib is common startup code for s390x systems when
    11	// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    12	// arrange to invoke this function as a global constructor (for
    13	// c-archive) or when the shared library is loaded (for c-shared).
    14	// We expect argc and argv to be passed in the usual C ABI registers
    15	// R2 and R3.
    16	TEXT _rt0_s390x_lib(SB), NOSPLIT|NOFRAME, $0
    17		STMG	R6, R15, 48(R15)
    18		MOVD	R2, _rt0_s390x_lib_argc<>(SB)
    19		MOVD	R3, _rt0_s390x_lib_argv<>(SB)
    20	
    21		// Save R6-R15 in the register save area of the calling function.
    22		STMG	R6, R15, 48(R15)
    23	
    24		// Allocate 80 bytes on the stack.
    25		MOVD	$-80(R15), R15
    26	
    27		// Save F8-F15 in our stack frame.
    28		FMOVD	F8, 16(R15)
    29		FMOVD	F9, 24(R15)
    30		FMOVD	F10, 32(R15)
    31		FMOVD	F11, 40(R15)
    32		FMOVD	F12, 48(R15)
    33		FMOVD	F13, 56(R15)
    34		FMOVD	F14, 64(R15)
    35		FMOVD	F15, 72(R15)
    36	
    37		// Synchronous initialization.
    38		MOVD	$runtime·libpreinit(SB), R1
    39		BL	R1
    40	
    41		// Create a new thread to finish Go runtime initialization.
    42		MOVD	_cgo_sys_thread_create(SB), R1
    43		CMP	R1, $0
    44		BEQ	nocgo
    45		MOVD	$_rt0_s390x_lib_go(SB), R2
    46		MOVD	$0, R3
    47		BL	R1
    48		BR	restore
    49	
    50	nocgo:
    51		MOVD	$0x800000, R1              // stacksize
    52		MOVD	R1, 0(R15)
    53		MOVD	$_rt0_s390x_lib_go(SB), R1
    54		MOVD	R1, 8(R15)                 // fn
    55		MOVD	$runtime·newosproc(SB), R1
    56		BL	R1
    57	
    58	restore:
    59		// Restore F8-F15 from our stack frame.
    60		FMOVD	16(R15), F8
    61		FMOVD	24(R15), F9
    62		FMOVD	32(R15), F10
    63		FMOVD	40(R15), F11
    64		FMOVD	48(R15), F12
    65		FMOVD	56(R15), F13
    66		FMOVD	64(R15), F14
    67		FMOVD	72(R15), F15
    68		MOVD	$80(R15), R15
    69	
    70		// Restore R6-R15.
    71		LMG	48(R15), R6, R15
    72		RET
    73	
    74	// _rt0_s390x_lib_go initializes the Go runtime.
    75	// This is started in a separate thread by _rt0_s390x_lib.
    76	TEXT _rt0_s390x_lib_go(SB), NOSPLIT|NOFRAME, $0
    77		MOVD	_rt0_s390x_lib_argc<>(SB), R2
    78		MOVD	_rt0_s390x_lib_argv<>(SB), R3
    79		MOVD	$runtime·rt0_go(SB), R1
    80		BR	R1
    81	
    82	DATA _rt0_s390x_lib_argc<>(SB)/8, $0
    83	GLOBL _rt0_s390x_lib_argc<>(SB), NOPTR, $8
    84	DATA _rt0_s90x_lib_argv<>(SB)/8, $0
    85	GLOBL _rt0_s390x_lib_argv<>(SB), NOPTR, $8
    86	
    87	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    88		// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
    89		// C TLS base pointer in AR0:AR1
    90	
    91		// initialize essential registers
    92		XOR	R0, R0
    93	
    94		SUB	$24, R15
    95		MOVW	R2, 8(R15) // argc
    96		MOVD	R3, 16(R15) // argv
    97	
    98		// create istack out of the given (operating system) stack.
    99		// _cgo_init may update stackguard.
   100		MOVD	$runtime·g0(SB), g
   101		MOVD	R15, R11
   102		SUB	$(64*1024), R11
   103		MOVD	R11, g_stackguard0(g)
   104		MOVD	R11, g_stackguard1(g)
   105		MOVD	R11, (g_stack+stack_lo)(g)
   106		MOVD	R15, (g_stack+stack_hi)(g)
   107	
   108		// if there is a _cgo_init, call it using the gcc ABI.
   109		MOVD	_cgo_init(SB), R11
   110		CMPBEQ	R11, $0, nocgo
   111		MOVW	AR0, R4			// (AR0 << 32 | AR1) is the TLS base pointer; MOVD is translated to EAR
   112		SLD	$32, R4, R4
   113		MOVW	AR1, R4			// arg 2: TLS base pointer
   114		MOVD	$setg_gcc<>(SB), R3 	// arg 1: setg
   115		MOVD	g, R2			// arg 0: G
   116		// C functions expect 160 bytes of space on caller stack frame
   117		// and an 8-byte aligned stack pointer
   118		MOVD	R15, R9			// save current stack (R9 is preserved in the Linux ABI)
   119		SUB	$160, R15		// reserve 160 bytes
   120		MOVD    $~7, R6
   121		AND 	R6, R15			// 8-byte align
   122		BL	R11			// this call clobbers volatile registers according to Linux ABI (R0-R5, R14)
   123		MOVD	R9, R15			// restore stack
   124		XOR	R0, R0			// zero R0
   125	
   126	nocgo:
   127		// update stackguard after _cgo_init
   128		MOVD	(g_stack+stack_lo)(g), R2
   129		ADD	$const__StackGuard, R2
   130		MOVD	R2, g_stackguard0(g)
   131		MOVD	R2, g_stackguard1(g)
   132	
   133		// set the per-goroutine and per-mach "registers"
   134		MOVD	$runtime·m0(SB), R2
   135	
   136		// save m->g0 = g0
   137		MOVD	g, m_g0(R2)
   138		// save m0 to g0->m
   139		MOVD	R2, g_m(g)
   140	
   141		BL	runtime·check(SB)
   142	
   143		// argc/argv are already prepared on stack
   144		BL	runtime·args(SB)
   145		BL	runtime·osinit(SB)
   146		BL	runtime·schedinit(SB)
   147	
   148		// create a new goroutine to start program
   149		MOVD	$runtime·mainPC(SB), R2		// entry
   150		SUB     $24, R15
   151		MOVD 	R2, 16(R15)
   152		MOVD 	$0, 8(R15)
   153		MOVD 	$0, 0(R15)
   154		BL	runtime·newproc(SB)
   155		ADD	$24, R15
   156	
   157		// start this M
   158		BL	runtime·mstart(SB)
   159	
   160		MOVD	$0, 1(R0)
   161		RET
   162	
   163	DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
   164	GLOBL	runtime·mainPC(SB),RODATA,$8
   165	
   166	TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
   167		MOVD	$0, 2(R0)
   168		RET
   169	
   170	TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
   171		RET
   172	
   173	/*
   174	 *  go-routine
   175	 */
   176	
   177	// void gosave(Gobuf*)
   178	// save state in Gobuf; setjmp
   179	TEXT runtime·gosave(SB), NOSPLIT, $-8-8
   180		MOVD	buf+0(FP), R3
   181		MOVD	R15, gobuf_sp(R3)
   182		MOVD	LR, gobuf_pc(R3)
   183		MOVD	g, gobuf_g(R3)
   184		MOVD	$0, gobuf_lr(R3)
   185		MOVD	$0, gobuf_ret(R3)
   186		// Assert ctxt is zero. See func save.
   187		MOVD	gobuf_ctxt(R3), R3
   188		CMPBEQ	R3, $0, 2(PC)
   189		BL	runtime·badctxt(SB)
   190		RET
   191	
   192	// void gogo(Gobuf*)
   193	// restore state from Gobuf; longjmp
   194	TEXT runtime·gogo(SB), NOSPLIT, $16-8
   195		MOVD	buf+0(FP), R5
   196		MOVD	gobuf_g(R5), g	// make sure g is not nil
   197		BL	runtime·save_g(SB)
   198	
   199		MOVD	0(g), R4
   200		MOVD	gobuf_sp(R5), R15
   201		MOVD	gobuf_lr(R5), LR
   202		MOVD	gobuf_ret(R5), R3
   203		MOVD	gobuf_ctxt(R5), R12
   204		MOVD	$0, gobuf_sp(R5)
   205		MOVD	$0, gobuf_ret(R5)
   206		MOVD	$0, gobuf_lr(R5)
   207		MOVD	$0, gobuf_ctxt(R5)
   208		CMP	R0, R0 // set condition codes for == test, needed by stack split
   209		MOVD	gobuf_pc(R5), R6
   210		BR	(R6)
   211	
   212	// void mcall(fn func(*g))
   213	// Switch to m->g0's stack, call fn(g).
   214	// Fn must never return.  It should gogo(&g->sched)
   215	// to keep running g.
   216	TEXT runtime·mcall(SB), NOSPLIT, $-8-8
   217		// Save caller state in g->sched
   218		MOVD	R15, (g_sched+gobuf_sp)(g)
   219		MOVD	LR, (g_sched+gobuf_pc)(g)
   220		MOVD	$0, (g_sched+gobuf_lr)(g)
   221		MOVD	g, (g_sched+gobuf_g)(g)
   222	
   223		// Switch to m->g0 & its stack, call fn.
   224		MOVD	g, R3
   225		MOVD	g_m(g), R8
   226		MOVD	m_g0(R8), g
   227		BL	runtime·save_g(SB)
   228		CMP	g, R3
   229		BNE	2(PC)
   230		BR	runtime·badmcall(SB)
   231		MOVD	fn+0(FP), R12			// context
   232		MOVD	0(R12), R4			// code pointer
   233		MOVD	(g_sched+gobuf_sp)(g), R15	// sp = m->g0->sched.sp
   234		SUB	$16, R15
   235		MOVD	R3, 8(R15)
   236		MOVD	$0, 0(R15)
   237		BL	(R4)
   238		BR	runtime·badmcall2(SB)
   239	
   240	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   241	// of the G stack.  We need to distinguish the routine that
   242	// lives at the bottom of the G stack from the one that lives
   243	// at the top of the system stack because the one at the top of
   244	// the system stack terminates the stack walk (see topofstack()).
   245	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   246		UNDEF
   247		BL	(LR)	// make sure this function is not leaf
   248		RET
   249	
   250	// func systemstack(fn func())
   251	TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   252		MOVD	fn+0(FP), R3	// R3 = fn
   253		MOVD	R3, R12		// context
   254		MOVD	g_m(g), R4	// R4 = m
   255	
   256		MOVD	m_gsignal(R4), R5	// R5 = gsignal
   257		CMPBEQ	g, R5, noswitch
   258	
   259		MOVD	m_g0(R4), R5	// R5 = g0
   260		CMPBEQ	g, R5, noswitch
   261	
   262		MOVD	m_curg(R4), R6
   263		CMPBEQ	g, R6, switch
   264	
   265		// Bad: g is not gsignal, not g0, not curg. What is it?
   266		// Hide call from linker nosplit analysis.
   267		MOVD	$runtime·badsystemstack(SB), R3
   268		BL	(R3)
   269	
   270	switch:
   271		// save our state in g->sched.  Pretend to
   272		// be systemstack_switch if the G stack is scanned.
   273		MOVD	$runtime·systemstack_switch(SB), R6
   274		ADD	$16, R6	// get past prologue
   275		MOVD	R6, (g_sched+gobuf_pc)(g)
   276		MOVD	R15, (g_sched+gobuf_sp)(g)
   277		MOVD	$0, (g_sched+gobuf_lr)(g)
   278		MOVD	g, (g_sched+gobuf_g)(g)
   279	
   280		// switch to g0
   281		MOVD	R5, g
   282		BL	runtime·save_g(SB)
   283		MOVD	(g_sched+gobuf_sp)(g), R3
   284		// make it look like mstart called systemstack on g0, to stop traceback
   285		SUB	$8, R3
   286		MOVD	$runtime·mstart(SB), R4
   287		MOVD	R4, 0(R3)
   288		MOVD	R3, R15
   289	
   290		// call target function
   291		MOVD	0(R12), R3	// code pointer
   292		BL	(R3)
   293	
   294		// switch back to g
   295		MOVD	g_m(g), R3
   296		MOVD	m_curg(R3), g
   297		BL	runtime·save_g(SB)
   298		MOVD	(g_sched+gobuf_sp)(g), R15
   299		MOVD	$0, (g_sched+gobuf_sp)(g)
   300		RET
   301	
   302	noswitch:
   303		// already on m stack, just call directly
   304		// Using a tail call here cleans up tracebacks since we won't stop
   305		// at an intermediate systemstack.
   306		MOVD	0(R12), R3	// code pointer
   307		MOVD	0(R15), LR	// restore LR
   308		ADD	$8, R15
   309		BR	(R3)
   310	
   311	/*
   312	 * support for morestack
   313	 */
   314	
   315	// Called during function prolog when more stack is needed.
   316	// Caller has already loaded:
   317	// R3: framesize, R4: argsize, R5: LR
   318	//
   319	// The traceback routines see morestack on a g0 as being
   320	// the top of a stack (for example, morestack calling newstack
   321	// calling the scheduler calling newm calling gc), so we must
   322	// record an argument size. For that purpose, it has no arguments.
   323	TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   324		// Cannot grow scheduler stack (m->g0).
   325		MOVD	g_m(g), R7
   326		MOVD	m_g0(R7), R8
   327		CMPBNE	g, R8, 3(PC)
   328		BL	runtime·badmorestackg0(SB)
   329		BL	runtime·abort(SB)
   330	
   331		// Cannot grow signal stack (m->gsignal).
   332		MOVD	m_gsignal(R7), R8
   333		CMP	g, R8
   334		BNE	3(PC)
   335		BL	runtime·badmorestackgsignal(SB)
   336		BL	runtime·abort(SB)
   337	
   338		// Called from f.
   339		// Set g->sched to context in f.
   340		MOVD	R15, (g_sched+gobuf_sp)(g)
   341		MOVD	LR, R8
   342		MOVD	R8, (g_sched+gobuf_pc)(g)
   343		MOVD	R5, (g_sched+gobuf_lr)(g)
   344		MOVD	R12, (g_sched+gobuf_ctxt)(g)
   345	
   346		// Called from f.
   347		// Set m->morebuf to f's caller.
   348		MOVD	R5, (m_morebuf+gobuf_pc)(R7)	// f's caller's PC
   349		MOVD	R15, (m_morebuf+gobuf_sp)(R7)	// f's caller's SP
   350		MOVD	g, (m_morebuf+gobuf_g)(R7)
   351	
   352		// Call newstack on m->g0's stack.
   353		MOVD	m_g0(R7), g
   354		BL	runtime·save_g(SB)
   355		MOVD	(g_sched+gobuf_sp)(g), R15
   356		// Create a stack frame on g0 to call newstack.
   357		MOVD	$0, -8(R15)	// Zero saved LR in frame
   358		SUB	$8, R15
   359		BL	runtime·newstack(SB)
   360	
   361		// Not reached, but make sure the return PC from the call to newstack
   362		// is still in this function, and not the beginning of the next.
   363		UNDEF
   364	
   365	TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
   366		MOVD	$0, R12
   367		BR	runtime·morestack(SB)
   368	
   369	// reflectcall: call a function with the given argument list
   370	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   371	// we don't have variable-sized frames, so we use a small number
   372	// of constant-sized-frame functions to encode a few bits of size in the pc.
   373	// Caution: ugly multiline assembly macros in your future!
   374	
   375	#define DISPATCH(NAME,MAXSIZE)		\
   376		MOVD	$MAXSIZE, R4;		\
   377		CMP	R3, R4;		\
   378		BGT	3(PC);			\
   379		MOVD	$NAME(SB), R5;	\
   380		BR	(R5)
   381	// Note: can't just "BR NAME(SB)" - bad inlining results.
   382	
   383	TEXT reflect·call(SB), NOSPLIT, $0-0
   384		BR	·reflectcall(SB)
   385	
   386	TEXT ·reflectcall(SB), NOSPLIT, $-8-32
   387		MOVWZ argsize+24(FP), R3
   388		DISPATCH(runtime·call32, 32)
   389		DISPATCH(runtime·call64, 64)
   390		DISPATCH(runtime·call128, 128)
   391		DISPATCH(runtime·call256, 256)
   392		DISPATCH(runtime·call512, 512)
   393		DISPATCH(runtime·call1024, 1024)
   394		DISPATCH(runtime·call2048, 2048)
   395		DISPATCH(runtime·call4096, 4096)
   396		DISPATCH(runtime·call8192, 8192)
   397		DISPATCH(runtime·call16384, 16384)
   398		DISPATCH(runtime·call32768, 32768)
   399		DISPATCH(runtime·call65536, 65536)
   400		DISPATCH(runtime·call131072, 131072)
   401		DISPATCH(runtime·call262144, 262144)
   402		DISPATCH(runtime·call524288, 524288)
   403		DISPATCH(runtime·call1048576, 1048576)
   404		DISPATCH(runtime·call2097152, 2097152)
   405		DISPATCH(runtime·call4194304, 4194304)
   406		DISPATCH(runtime·call8388608, 8388608)
   407		DISPATCH(runtime·call16777216, 16777216)
   408		DISPATCH(runtime·call33554432, 33554432)
   409		DISPATCH(runtime·call67108864, 67108864)
   410		DISPATCH(runtime·call134217728, 134217728)
   411		DISPATCH(runtime·call268435456, 268435456)
   412		DISPATCH(runtime·call536870912, 536870912)
   413		DISPATCH(runtime·call1073741824, 1073741824)
   414		MOVD	$runtime·badreflectcall(SB), R5
   415		BR	(R5)
   416	
   417	#define CALLFN(NAME,MAXSIZE)			\
   418	TEXT NAME(SB), WRAPPER, $MAXSIZE-24;		\
   419		NO_LOCAL_POINTERS;			\
   420		/* copy arguments to stack */		\
   421		MOVD	arg+16(FP), R4;			\
   422		MOVWZ	argsize+24(FP), R5;		\
   423		MOVD	$stack-MAXSIZE(SP), R6;		\
   424	loopArgs: /* copy 256 bytes at a time */	\
   425		CMP	R5, $256;			\
   426		BLT	tailArgs;			\
   427		SUB	$256, R5;			\
   428		MVC	$256, 0(R4), 0(R6);		\
   429		MOVD	$256(R4), R4;			\
   430		MOVD	$256(R6), R6;			\
   431		BR	loopArgs;			\
   432	tailArgs: /* copy remaining bytes */		\
   433		CMP	R5, $0;				\
   434		BEQ	callFunction;			\
   435		SUB	$1, R5;				\
   436		EXRL	$callfnMVC<>(SB), R5;		\
   437	callFunction:					\
   438		MOVD	f+8(FP), R12;			\
   439		MOVD	(R12), R8;			\
   440		PCDATA  $PCDATA_StackMapIndex, $0;	\
   441		BL	(R8);				\
   442		/* copy return values back */		\
   443		MOVD	argtype+0(FP), R7;		\
   444		MOVD	arg+16(FP), R6;			\
   445		MOVWZ	n+24(FP), R5;			\
   446		MOVD	$stack-MAXSIZE(SP), R4;		\
   447		MOVWZ	retoffset+28(FP), R1;		\
   448		ADD	R1, R4;				\
   449		ADD	R1, R6;				\
   450		SUB	R1, R5;				\
   451		BL	callRet<>(SB);			\
   452		RET
   453	
   454	// callRet copies return values back at the end of call*. This is a
   455	// separate function so it can allocate stack space for the arguments
   456	// to reflectcallmove. It does not follow the Go ABI; it expects its
   457	// arguments in registers.
   458	TEXT callRet<>(SB), NOSPLIT, $32-0
   459		MOVD	R7, 8(R15)
   460		MOVD	R6, 16(R15)
   461		MOVD	R4, 24(R15)
   462		MOVD	R5, 32(R15)
   463		BL	runtime·reflectcallmove(SB)
   464		RET
   465	
   466	CALLFN(·call32, 32)
   467	CALLFN(·call64, 64)
   468	CALLFN(·call128, 128)
   469	CALLFN(·call256, 256)
   470	CALLFN(·call512, 512)
   471	CALLFN(·call1024, 1024)
   472	CALLFN(·call2048, 2048)
   473	CALLFN(·call4096, 4096)
   474	CALLFN(·call8192, 8192)
   475	CALLFN(·call16384, 16384)
   476	CALLFN(·call32768, 32768)
   477	CALLFN(·call65536, 65536)
   478	CALLFN(·call131072, 131072)
   479	CALLFN(·call262144, 262144)
   480	CALLFN(·call524288, 524288)
   481	CALLFN(·call1048576, 1048576)
   482	CALLFN(·call2097152, 2097152)
   483	CALLFN(·call4194304, 4194304)
   484	CALLFN(·call8388608, 8388608)
   485	CALLFN(·call16777216, 16777216)
   486	CALLFN(·call33554432, 33554432)
   487	CALLFN(·call67108864, 67108864)
   488	CALLFN(·call134217728, 134217728)
   489	CALLFN(·call268435456, 268435456)
   490	CALLFN(·call536870912, 536870912)
   491	CALLFN(·call1073741824, 1073741824)
   492	
   493	// Not a function: target for EXRL (execute relative long) instruction.
   494	TEXT callfnMVC<>(SB),NOSPLIT|NOFRAME,$0-0
   495		MVC	$1, 0(R4), 0(R6)
   496	
   497	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   498		RET
   499	
   500	// void jmpdefer(fv, sp);
   501	// called from deferreturn.
   502	// 1. grab stored LR for caller
   503	// 2. sub 6 bytes to get back to BL deferreturn (size of BRASL instruction)
   504	// 3. BR to fn
   505	TEXT runtime·jmpdefer(SB),NOSPLIT|NOFRAME,$0-16
   506		MOVD	0(R15), R1
   507		SUB	$6, R1, LR
   508	
   509		MOVD	fv+0(FP), R12
   510		MOVD	argp+8(FP), R15
   511		SUB	$8, R15
   512		MOVD	0(R12), R3
   513		BR	(R3)
   514	
   515	// Save state of caller into g->sched. Smashes R1.
   516	TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
   517		MOVD	LR, (g_sched+gobuf_pc)(g)
   518		MOVD	R15, (g_sched+gobuf_sp)(g)
   519		MOVD	$0, (g_sched+gobuf_lr)(g)
   520		MOVD	$0, (g_sched+gobuf_ret)(g)
   521		// Assert ctxt is zero. See func save.
   522		MOVD	(g_sched+gobuf_ctxt)(g), R1
   523		CMPBEQ	R1, $0, 2(PC)
   524		BL	runtime·badctxt(SB)
   525		RET
   526	
   527	// func asmcgocall(fn, arg unsafe.Pointer) int32
   528	// Call fn(arg) on the scheduler stack,
   529	// aligned appropriately for the gcc ABI.
   530	// See cgocall.go for more details.
   531	TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   532		// R2 = argc; R3 = argv; R11 = temp; R13 = g; R15 = stack pointer
   533		// C TLS base pointer in AR0:AR1
   534		MOVD	fn+0(FP), R3
   535		MOVD	arg+8(FP), R4
   536	
   537		MOVD	R15, R2		// save original stack pointer
   538		MOVD	g, R5
   539	
   540		// Figure out if we need to switch to m->g0 stack.
   541		// We get called to create new OS threads too, and those
   542		// come in on the m->g0 stack already.
   543		MOVD	g_m(g), R6
   544		MOVD	m_g0(R6), R6
   545		CMPBEQ	R6, g, g0
   546		BL	gosave<>(SB)
   547		MOVD	R6, g
   548		BL	runtime·save_g(SB)
   549		MOVD	(g_sched+gobuf_sp)(g), R15
   550	
   551		// Now on a scheduling stack (a pthread-created stack).
   552	g0:
   553		// Save room for two of our pointers, plus 160 bytes of callee
   554		// save area that lives on the caller stack.
   555		SUB	$176, R15
   556		MOVD	$~7, R6
   557		AND	R6, R15                 // 8-byte alignment for gcc ABI
   558		MOVD	R5, 168(R15)             // save old g on stack
   559		MOVD	(g_stack+stack_hi)(R5), R5
   560		SUB	R2, R5
   561		MOVD	R5, 160(R15)             // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
   562		MOVD	$0, 0(R15)              // clear back chain pointer (TODO can we give it real back trace information?)
   563		MOVD	R4, R2                  // arg in R2
   564		BL	R3                      // can clobber: R0-R5, R14, F0-F3, F5, F7-F15
   565	
   566		XOR	R0, R0                  // set R0 back to 0.
   567		// Restore g, stack pointer.
   568		MOVD	168(R15), g
   569		BL	runtime·save_g(SB)
   570		MOVD	(g_stack+stack_hi)(g), R5
   571		MOVD	160(R15), R6
   572		SUB	R6, R5
   573		MOVD	R5, R15
   574	
   575		MOVW	R2, ret+16(FP)
   576		RET
   577	
   578	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   579	// Turn the fn into a Go func (by taking its address) and call
   580	// cgocallback_gofunc.
   581	TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
   582		MOVD	$fn+0(FP), R3
   583		MOVD	R3, 8(R15)
   584		MOVD	frame+8(FP), R3
   585		MOVD	R3, 16(R15)
   586		MOVD	framesize+16(FP), R3
   587		MOVD	R3, 24(R15)
   588		MOVD	ctxt+24(FP), R3
   589		MOVD	R3, 32(R15)
   590		MOVD	$runtime·cgocallback_gofunc(SB), R3
   591		BL	(R3)
   592		RET
   593	
   594	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   595	// See cgocall.go for more details.
   596	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
   597		NO_LOCAL_POINTERS
   598	
   599		// Load m and g from thread-local storage.
   600		MOVB	runtime·iscgo(SB), R3
   601		CMPBEQ	R3, $0, nocgo
   602		BL	runtime·load_g(SB)
   603	
   604	nocgo:
   605		// If g is nil, Go did not create the current thread.
   606		// Call needm to obtain one for temporary use.
   607		// In this case, we're running on the thread stack, so there's
   608		// lots of space, but the linker doesn't know. Hide the call from
   609		// the linker analysis by using an indirect call.
   610		CMPBEQ	g, $0, needm
   611	
   612		MOVD	g_m(g), R8
   613		MOVD	R8, savedm-8(SP)
   614		BR	havem
   615	
   616	needm:
   617		MOVD	g, savedm-8(SP) // g is zero, so is m.
   618		MOVD	$runtime·needm(SB), R3
   619		BL	(R3)
   620	
   621		// Set m->sched.sp = SP, so that if a panic happens
   622		// during the function we are about to execute, it will
   623		// have a valid SP to run on the g0 stack.
   624		// The next few lines (after the havem label)
   625		// will save this SP onto the stack and then write
   626		// the same SP back to m->sched.sp. That seems redundant,
   627		// but if an unrecovered panic happens, unwindm will
   628		// restore the g->sched.sp from the stack location
   629		// and then systemstack will try to use it. If we don't set it here,
   630		// that restored SP will be uninitialized (typically 0) and
   631		// will not be usable.
   632		MOVD	g_m(g), R8
   633		MOVD	m_g0(R8), R3
   634		MOVD	R15, (g_sched+gobuf_sp)(R3)
   635	
   636	havem:
   637		// Now there's a valid m, and we're running on its m->g0.
   638		// Save current m->g0->sched.sp on stack and then set it to SP.
   639		// Save current sp in m->g0->sched.sp in preparation for
   640		// switch back to m->curg stack.
   641		// NOTE: unwindm knows that the saved g->sched.sp is at 8(R1) aka savedsp-16(SP).
   642		MOVD	m_g0(R8), R3
   643		MOVD	(g_sched+gobuf_sp)(R3), R4
   644		MOVD	R4, savedsp-16(SP)
   645		MOVD	R15, (g_sched+gobuf_sp)(R3)
   646	
   647		// Switch to m->curg stack and call runtime.cgocallbackg.
   648		// Because we are taking over the execution of m->curg
   649		// but *not* resuming what had been running, we need to
   650		// save that information (m->curg->sched) so we can restore it.
   651		// We can restore m->curg->sched.sp easily, because calling
   652		// runtime.cgocallbackg leaves SP unchanged upon return.
   653		// To save m->curg->sched.pc, we push it onto the stack.
   654		// This has the added benefit that it looks to the traceback
   655		// routine like cgocallbackg is going to return to that
   656		// PC (because the frame we allocate below has the same
   657		// size as cgocallback_gofunc's frame declared above)
   658		// so that the traceback will seamlessly trace back into
   659		// the earlier calls.
   660		//
   661		// In the new goroutine, -8(SP) is unused (where SP refers to
   662		// m->curg's SP while we're setting it up, before we've adjusted it).
   663		MOVD	m_curg(R8), g
   664		BL	runtime·save_g(SB)
   665		MOVD	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
   666		MOVD	(g_sched+gobuf_pc)(g), R5
   667		MOVD	R5, -24(R4)
   668		MOVD	ctxt+24(FP), R5
   669		MOVD	R5, -16(R4)
   670		MOVD	$-24(R4), R15
   671		BL	runtime·cgocallbackg(SB)
   672	
   673		// Restore g->sched (== m->curg->sched) from saved values.
   674		MOVD	0(R15), R5
   675		MOVD	R5, (g_sched+gobuf_pc)(g)
   676		MOVD	$24(R15), R4
   677		MOVD	R4, (g_sched+gobuf_sp)(g)
   678	
   679		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   680		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   681		// so we do not have to restore it.)
   682		MOVD	g_m(g), R8
   683		MOVD	m_g0(R8), g
   684		BL	runtime·save_g(SB)
   685		MOVD	(g_sched+gobuf_sp)(g), R15
   686		MOVD	savedsp-16(SP), R4
   687		MOVD	R4, (g_sched+gobuf_sp)(g)
   688	
   689		// If the m on entry was nil, we called needm above to borrow an m
   690		// for the duration of the call. Since the call is over, return it with dropm.
   691		MOVD	savedm-8(SP), R6
   692		CMPBNE	R6, $0, droppedm
   693		MOVD	$runtime·dropm(SB), R3
   694		BL	(R3)
   695	droppedm:
   696	
   697		// Done!
   698		RET
   699	
   700	// void setg(G*); set g. for use by needm.
   701	TEXT runtime·setg(SB), NOSPLIT, $0-8
   702		MOVD	gg+0(FP), g
   703		// This only happens if iscgo, so jump straight to save_g
   704		BL	runtime·save_g(SB)
   705		RET
   706	
   707	// void setg_gcc(G*); set g in C TLS.
   708	// Must obey the gcc calling convention.
   709	TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
   710		// The standard prologue clobbers LR (R14), which is callee-save in
   711		// the C ABI, so we have to use NOFRAME and save LR ourselves.
   712		MOVD	LR, R1
   713		// Also save g, R10, and R11 since they're callee-save in C ABI
   714		MOVD	R10, R3
   715		MOVD	g, R4
   716		MOVD	R11, R5
   717	
   718		MOVD	R2, g
   719		BL	runtime·save_g(SB)
   720	
   721		MOVD	R5, R11
   722		MOVD	R4, g
   723		MOVD	R3, R10
   724		MOVD	R1, LR
   725		RET
   726	
   727	TEXT runtime·getcallerpc(SB),NOSPLIT|NOFRAME,$0-8
   728		MOVD	0(R15), R3		// LR saved by caller
   729		MOVD	R3, ret+0(FP)
   730		RET
   731	
   732	TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
   733		MOVW	(R0), R0
   734		UNDEF
   735	
   736	// int64 runtime·cputicks(void)
   737	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   738		// The TOD clock on s390 counts from the year 1900 in ~250ps intervals.
   739		// This means that since about 1972 the msb has been set, making the
   740		// result of a call to STORE CLOCK (stck) a negative number.
   741		// We clear the msb to make it positive.
   742		STCK	ret+0(FP)      // serialises before and after call
   743		MOVD	ret+0(FP), R3  // R3 will wrap to 0 in the year 2043
   744		SLD	$1, R3
   745		SRD	$1, R3
   746		MOVD	R3, ret+0(FP)
   747		RET
   748	
   749	// AES hashing not implemented for s390x
   750	TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
   751		MOVW	(R0), R15
   752	TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
   753		MOVW	(R0), R15
   754	TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
   755		MOVW	(R0), R15
   756	TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
   757		MOVW	(R0), R15
   758	
   759	// memequal(a, b unsafe.Pointer, size uintptr) bool
   760	TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
   761		MOVD	a+0(FP), R3
   762		MOVD	b+8(FP), R5
   763		MOVD	size+16(FP), R6
   764		LA	ret+24(FP), R7
   765		BR	runtime·memeqbody(SB)
   766	
   767	// memequal_varlen(a, b unsafe.Pointer) bool
   768	TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
   769		MOVD	a+0(FP), R3
   770		MOVD	b+8(FP), R5
   771		MOVD	8(R12), R6    // compiler stores size at offset 8 in the closure
   772		LA	ret+16(FP), R7
   773		BR	runtime·memeqbody(SB)
   774	
   775	TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
   776		MOVD	a_len+8(FP), R2
   777		MOVD	b_len+32(FP), R6
   778		MOVD	a+0(FP), R3
   779		MOVD	b+24(FP), R5
   780		LA	ret+48(FP), R7
   781		CMPBNE	R2, R6, notequal
   782		BR	runtime·memeqbody(SB)
   783	notequal:
   784		MOVB	$0, ret+48(FP)
   785		RET
   786	
   787	// input:
   788	//   R3 = a
   789	//   R5 = b
   790	//   R6 = len
   791	//   R7 = address of output byte (stores 0 or 1 here)
   792	//   a and b have the same length
   793	TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
   794		CMPBEQ	R3, R5, equal
   795	loop:
   796		CMPBEQ	R6, $0, equal
   797		CMPBLT	R6, $32, tiny
   798		CMP	R6, $256
   799		BLT	tail
   800		CLC	$256, 0(R3), 0(R5)
   801		BNE	notequal
   802		SUB	$256, R6
   803		LA	256(R3), R3
   804		LA	256(R5), R5
   805		BR	loop
   806	tail:
   807		SUB	$1, R6, R8
   808		EXRL	$runtime·memeqbodyclc(SB), R8
   809		BEQ	equal
   810	notequal:
   811		MOVB	$0, 0(R7)
   812		RET
   813	equal:
   814		MOVB	$1, 0(R7)
   815		RET
   816	tiny:
   817		MOVD	$0, R2
   818		CMPBLT	R6, $16, lt16
   819		MOVD	0(R3), R8
   820		MOVD	0(R5), R9
   821		CMPBNE	R8, R9, notequal
   822		MOVD	8(R3), R8
   823		MOVD	8(R5), R9
   824		CMPBNE	R8, R9, notequal
   825		LA	16(R2), R2
   826		SUB	$16, R6
   827	lt16:
   828		CMPBLT	R6, $8, lt8
   829		MOVD	0(R3)(R2*1), R8
   830		MOVD	0(R5)(R2*1), R9
   831		CMPBNE	R8, R9, notequal
   832		LA	8(R2), R2
   833		SUB	$8, R6
   834	lt8:
   835		CMPBLT	R6, $4, lt4
   836		MOVWZ	0(R3)(R2*1), R8
   837		MOVWZ	0(R5)(R2*1), R9
   838		CMPBNE	R8, R9, notequal
   839		LA	4(R2), R2
   840		SUB	$4, R6
   841	lt4:
   842	#define CHECK(n) \
   843		CMPBEQ	R6, $n, equal \
   844		MOVB	n(R3)(R2*1), R8 \
   845		MOVB	n(R5)(R2*1), R9 \
   846		CMPBNE	R8, R9, notequal
   847		CHECK(0)
   848		CHECK(1)
   849		CHECK(2)
   850		CHECK(3)
   851		BR	equal
   852	
   853	TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
   854		CLC	$1, 0(R3), 0(R5)
   855		RET
   856	
   857	TEXT bytes·IndexByte(SB),NOSPLIT|NOFRAME,$0-40
   858		MOVD	s+0(FP), R3     // s => R3
   859		MOVD	s_len+8(FP), R4 // s_len => R4
   860		MOVBZ	c+24(FP), R5    // c => R5
   861		MOVD	$ret+32(FP), R2 // &ret => R9
   862		BR	runtime·indexbytebody(SB)
   863	
   864	TEXT strings·IndexByte(SB),NOSPLIT|NOFRAME,$0-32
   865		MOVD	s+0(FP), R3     // s => R3
   866		MOVD	s_len+8(FP), R4 // s_len => R4
   867		MOVBZ	c+16(FP), R5    // c => R5
   868		MOVD	$ret+24(FP), R2 // &ret => R9
   869		BR	runtime·indexbytebody(SB)
   870	
   871	// input:
   872	// R3: s
   873	// R4: s_len
   874	// R5: c -- byte sought
   875	// R2: &ret -- address to put index into
   876	TEXT runtime·indexbytebody(SB),NOSPLIT|NOFRAME,$0
   877		CMPBEQ	R4, $0, notfound
   878		MOVD	R3, R6          // store base for later
   879		ADD	R3, R4, R8      // the address after the end of the string
   880		//if the length is small, use loop; otherwise, use vector or srst search
   881		CMPBGE	R4, $16, large
   882	
   883	residual:
   884		CMPBEQ	R3, R8, notfound
   885		MOVBZ	0(R3), R7
   886		LA	1(R3), R3
   887		CMPBNE	R7, R5, residual
   888	
   889	found:
   890		SUB	R6, R3
   891		SUB	$1, R3
   892		MOVD	R3, 0(R2)
   893		RET
   894	
   895	notfound:
   896		MOVD	$-1, 0(R2)
   897		RET
   898	
   899	large:
   900		MOVBZ	·cpu+facilities_hasVX(SB), R1
   901		CMPBNE	R1, $0, vectorimpl
   902	
   903	srstimpl:                       // no vector facility
   904		MOVBZ	R5, R0          // c needs to be in R0, leave until last minute as currently R0 is expected to be 0
   905	srstloop:
   906		WORD	$0xB25E0083     // srst %r8, %r3 (search the range [R3, R8))
   907		BVS	srstloop        // interrupted - continue
   908		BGT	notfoundr0
   909	foundr0:
   910		XOR	R0, R0          // reset R0
   911		SUB	R6, R8          // remove base
   912		MOVD	R8, 0(R2)
   913		RET
   914	notfoundr0:
   915		XOR	R0, R0          // reset R0
   916		MOVD	$-1, 0(R2)
   917		RET
   918	
   919	vectorimpl:
   920		//if the address is not 16byte aligned, use loop for the header
   921		MOVD	R3, R8
   922		AND	$15, R8
   923		CMPBGT	R8, $0, notaligned
   924	
   925	aligned:
   926		ADD	R6, R4, R8
   927		MOVD	R8, R7
   928		AND	$-16, R7
   929		// replicate c across V17
   930		VLVGB	$0, R5, V19
   931		VREPB	$0, V19, V17
   932	
   933	vectorloop:
   934		CMPBGE	R3, R7, residual
   935		VL	0(R3), V16    // load string to be searched into V16
   936		ADD	$16, R3
   937		VFEEBS	V16, V17, V18 // search V17 in V16 and set conditional code accordingly
   938		BVS	vectorloop
   939	
   940		// when vector search found c in the string
   941		VLGVB	$7, V18, R7   // load 7th element of V18 containing index into R7
   942		SUB	$16, R3
   943		SUB	R6, R3
   944		ADD	R3, R7
   945		MOVD	R7, 0(R2)
   946		RET
   947	
   948	notaligned:
   949		MOVD	R3, R8
   950		AND	$-16, R8
   951		ADD     $16, R8
   952	notalignedloop:
   953		CMPBEQ	R3, R8, aligned
   954		MOVBZ	0(R3), R7
   955		LA	1(R3), R3
   956		CMPBNE	R7, R5, notalignedloop
   957		BR	found
   958	
   959	TEXT runtime·return0(SB), NOSPLIT, $0
   960		MOVW	$0, R3
   961		RET
   962	
   963	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
   964	// Must obey the gcc calling convention.
   965	TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
   966		// g (R13), R10, R11 and LR (R14) are callee-save in the C ABI, so save them
   967		MOVD	g, R1
   968		MOVD	R10, R3
   969		MOVD	LR, R4
   970		MOVD	R11, R5
   971	
   972		BL	runtime·load_g(SB)	// clobbers g (R13), R10, R11
   973		MOVD	g_m(g), R2
   974		MOVD	m_curg(R2), R2
   975		MOVD	(g_stack+stack_hi)(R2), R2
   976	
   977		MOVD	R1, g
   978		MOVD	R3, R10
   979		MOVD	R4, LR
   980		MOVD	R5, R11
   981		RET
   982	
   983	// The top-most function running on a goroutine
   984	// returns to goexit+PCQuantum.
   985	TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
   986		BYTE $0x07; BYTE $0x00; // 2-byte nop
   987		BL	runtime·goexit1(SB)	// does not return
   988		// traceback from goexit1 must hit code range of goexit
   989		BYTE $0x07; BYTE $0x00; // 2-byte nop
   990	
   991	TEXT runtime·sigreturn(SB),NOSPLIT,$0-0
   992		RET
   993	
   994	TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
   995	        // Stores are already ordered on s390x, so this is just a
   996	        // compile barrier.
   997		RET
   998	
   999	TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
  1000		MOVD	s1_base+0(FP), R3
  1001		MOVD	s1_len+8(FP), R4
  1002		MOVD	s2_base+16(FP), R5
  1003		MOVD	s2_len+24(FP), R6
  1004		LA	ret+32(FP), R7
  1005		BR	runtime·cmpbody(SB)
  1006	
  1007	TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
  1008		MOVD	s1+0(FP), R3
  1009		MOVD	s1+8(FP), R4
  1010		MOVD	s2+24(FP), R5
  1011		MOVD	s2+32(FP), R6
  1012		LA	res+48(FP), R7
  1013		BR	runtime·cmpbody(SB)
  1014	
  1015	// input:
  1016	//   R3 = a
  1017	//   R4 = alen
  1018	//   R5 = b
  1019	//   R6 = blen
  1020	//   R7 = address of output word (stores -1/0/1 here)
  1021	TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
  1022		CMPBEQ	R3, R5, cmplengths
  1023		MOVD	R4, R8
  1024		CMPBLE	R4, R6, amin
  1025		MOVD	R6, R8
  1026	amin:
  1027		CMPBEQ	R8, $0, cmplengths
  1028		CMP	R8, $256
  1029		BLE	tail
  1030	loop:
  1031		CLC	$256, 0(R3), 0(R5)
  1032		BGT	gt
  1033		BLT	lt
  1034		SUB	$256, R8
  1035		MOVD	$256(R3), R3
  1036		MOVD	$256(R5), R5
  1037		CMP	R8, $256
  1038		BGT	loop
  1039	tail:
  1040		SUB	$1, R8
  1041		EXRL	$runtime·cmpbodyclc(SB), R8
  1042		BGT	gt
  1043		BLT	lt
  1044	cmplengths:
  1045		CMP	R4, R6
  1046		BEQ	eq
  1047		BLT	lt
  1048	gt:
  1049		MOVD	$1, 0(R7)
  1050		RET
  1051	lt:
  1052		MOVD	$-1, 0(R7)
  1053		RET
  1054	eq:
  1055		MOVD	$0, 0(R7)
  1056		RET
  1057	
  1058	TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
  1059		CLC	$1, 0(R3), 0(R5)
  1060		RET
  1061	
  1062	// func supportsVX() bool
  1063	TEXT strings·supportsVX(SB),NOSPLIT,$0-1
  1064		MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1065		MOVB	R0, ret+0(FP)
  1066		RET
  1067	
  1068	// func supportsVX() bool
  1069	TEXT bytes·supportsVX(SB),NOSPLIT,$0-1
  1070		MOVBZ	runtime·cpu+facilities_hasVX(SB), R0
  1071		MOVB	R0, ret+0(FP)
  1072		RET
  1073	
  1074	// func indexShortStr(s, sep string) int
  1075	// Caller must confirm availability of vx facility before calling.
  1076	TEXT strings·indexShortStr(SB),NOSPLIT|NOFRAME,$0-40
  1077		LMG	s+0(FP), R1, R2   // R1=&s[0],   R2=len(s)
  1078		LMG	sep+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1079		MOVD	$ret+32(FP), R5
  1080		BR	runtime·indexShortStr(SB)
  1081	
  1082	// func indexShortStr(s, sep []byte) int
  1083	// Caller must confirm availability of vx facility before calling.
  1084	TEXT bytes·indexShortStr(SB),NOSPLIT|NOFRAME,$0-56
  1085		LMG	s+0(FP), R1, R2    // R1=&s[0],   R2=len(s)
  1086		LMG	sep+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
  1087		MOVD	$ret+48(FP), R5
  1088		BR	runtime·indexShortStr(SB)
  1089	
  1090	// s: string we are searching
  1091	// sep: string to search for
  1092	// R1=&s[0], R2=len(s)
  1093	// R3=&sep[0], R4=len(sep)
  1094	// R5=&ret (int)
  1095	// Caller must confirm availability of vx facility before calling.
  1096	TEXT runtime·indexShortStr(SB),NOSPLIT|NOFRAME,$0
  1097		CMPBGT	R4, R2, notfound
  1098		ADD	R1, R2
  1099		SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
  1100		CMPBEQ	R4, $0, notfound
  1101		SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
  1102		VLL	R4, (R3), V0 // contains first 16 bytes of sep
  1103		MOVD	R1, R7
  1104	index2plus:
  1105		CMPBNE	R4, $1, index3plus
  1106		MOVD	$15(R7), R9
  1107		CMPBGE	R9, R2, index2to16
  1108		VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1109		VONE	V16
  1110		VREPH	$0, V0, V1
  1111		CMPBGE	R9, R2, index2to16
  1112	index2loop:
  1113		VL	0(R7), V2          // 16 bytes, even indices
  1114		VL	1(R7), V4          // 16 bytes, odd indices
  1115		VCEQH	V1, V2, V5         // compare even indices
  1116		VCEQH	V1, V4, V6         // compare odd indices
  1117		VSEL	V5, V6, V31, V7    // merge even and odd indices
  1118		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1119		BLT	foundV17
  1120		MOVD	$16(R7), R7        // R7+=16
  1121		ADD	$15, R7, R9
  1122		CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
  1123		CMPBLE	R7, R2, index2to16
  1124		BR	notfound
  1125	
  1126	index3plus:
  1127		CMPBNE	R4, $2, index4plus
  1128		ADD	$15, R7, R9
  1129		CMPBGE	R9, R2, index2to16
  1130		MOVD	$1, R0
  1131		VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
  1132		VONE	V16
  1133		VREPH	$0, V0, V1
  1134		VREPB	$2, V0, V8
  1135	index3loop:
  1136		VL	(R7), V2           // load 16-bytes into V2
  1137		VLL	R0, 16(R7), V3     // load 2-bytes into V3
  1138		VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1139		VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
  1140		VCEQH	V1, V2, V5         // compare 2-byte even indices
  1141		VCEQH	V1, V4, V6         // compare 2-byte odd indices
  1142		VCEQB	V8, V9, V10        // compare last bytes
  1143		VSEL	V5, V6, V31, V7    // merge even and odd indices
  1144		VN	V7, V10, V7        // AND indices with last byte
  1145		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1146		BLT	foundV17
  1147		MOVD	$16(R7), R7        // R7+=16
  1148		ADD	$15, R7, R9
  1149		CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
  1150		CMPBLE	R7, R2, index2to16
  1151		BR	notfound
  1152	
  1153	index4plus:
  1154		CMPBNE	R4, $3, index5plus
  1155		ADD	$15, R7, R9
  1156		CMPBGE	R9, R2, index2to16
  1157		MOVD	$2, R0
  1158		VGBM	$0x8888, V29       // 0xff000000ff000000...
  1159		VGBM	$0x2222, V30       // 0x0000ff000000ff00...
  1160		VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
  1161		VONE	V16
  1162		VREPF	$0, V0, V1
  1163	index4loop:
  1164		VL	(R7), V2           // load 16-bytes into V2
  1165		VLL	R0, 16(R7), V3     // load 3-bytes into V3
  1166		VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
  1167		VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
  1168		VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
  1169		VCEQF	V1, V2, V5         // compare index 0, 4, ...
  1170		VCEQF	V1, V4, V6         // compare index 1, 5, ...
  1171		VCEQF	V1, V9, V11        // compare index 2, 6, ...
  1172		VCEQF	V1, V10, V12       // compare index 3, 7, ...
  1173		VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
  1174		VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
  1175		VSEL	V13, V14, V31, V7  // final merge
  1176		VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
  1177		BLT	foundV17
  1178		MOVD	$16(R7), R7        // R7+=16
  1179		ADD	$15, R7, R9
  1180		CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
  1181		CMPBLE	R7, R2, index2to16
  1182		BR	notfound
  1183	
  1184	index5plus:
  1185		CMPBGT	R4, $15, index17plus
  1186	index2to16:
  1187		CMPBGT	R7, R2, notfound
  1188		MOVD	$1(R7), R8
  1189		CMPBGT	R8, R2, index2to16tail
  1190	index2to16loop:
  1191		// unrolled 2x
  1192		VLL	R4, (R7), V1
  1193		VLL	R4, 1(R7), V2
  1194		VCEQGS	V0, V1, V3
  1195		BEQ	found
  1196		MOVD	$1(R7), R7
  1197		VCEQGS	V0, V2, V4
  1198		BEQ	found
  1199		MOVD	$1(R7), R7
  1200		CMPBLT	R7, R2, index2to16loop
  1201		CMPBGT	R7, R2, notfound
  1202	index2to16tail:
  1203		VLL	R4, (R7), V1
  1204		VCEQGS	V0, V1, V2
  1205		BEQ	found
  1206		BR	notfound
  1207	
  1208	index17plus:
  1209		CMPBGT	R4, $31, index33plus
  1210		SUB	$16, R4, R0
  1211		VLL	R0, 16(R3), V1
  1212		VONE	V7
  1213	index17to32loop:
  1214		VL	(R7), V2
  1215		VLL	R0, 16(R7), V3
  1216		VCEQG	V0, V2, V4
  1217		VCEQG	V1, V3, V5
  1218		VN	V4, V5, V6
  1219		VCEQGS	V6, V7, V8
  1220		BEQ	found
  1221		MOVD	$1(R7), R7
  1222		CMPBLE  R7, R2, index17to32loop
  1223		BR	notfound
  1224	
  1225	index33plus:
  1226		CMPBGT	R4, $47, index49plus
  1227		SUB	$32, R4, R0
  1228		VL	16(R3), V1
  1229		VLL	R0, 32(R3), V2
  1230		VONE	V11
  1231	index33to48loop:
  1232		VL	(R7), V3
  1233		VL	16(R7), V4
  1234		VLL	R0, 32(R7), V5
  1235		VCEQG	V0, V3, V6
  1236		VCEQG	V1, V4, V7
  1237		VCEQG	V2, V5, V8
  1238		VN	V6, V7, V9
  1239		VN	V8, V9, V10
  1240		VCEQGS	V10, V11, V12
  1241		BEQ	found
  1242		MOVD	$1(R7), R7
  1243		CMPBLE  R7, R2, index33to48loop
  1244		BR	notfound
  1245	
  1246	index49plus:
  1247		CMPBGT	R4, $63, index65plus
  1248		SUB	$48, R4, R0
  1249		VL	16(R3), V1
  1250		VL	32(R3), V2
  1251		VLL	R0, 48(R3), V3
  1252		VONE	V15
  1253	index49to64loop:
  1254		VL	(R7), V4
  1255		VL	16(R7), V5
  1256		VL	32(R7), V6
  1257		VLL	R0, 48(R7), V7
  1258		VCEQG	V0, V4, V8
  1259		VCEQG	V1, V5, V9
  1260		VCEQG	V2, V6, V10
  1261		VCEQG	V3, V7, V11
  1262		VN	V8, V9, V12
  1263		VN	V10, V11, V13
  1264		VN	V12, V13, V14
  1265		VCEQGS	V14, V15, V16
  1266		BEQ	found
  1267		MOVD	$1(R7), R7
  1268		CMPBLE  R7, R2, index49to64loop
  1269	notfound:
  1270		MOVD	$-1, (R5)
  1271		RET
  1272	
  1273	index65plus:
  1274		// not implemented
  1275		MOVD	$0, (R0)
  1276		RET
  1277	
  1278	foundV17: // index is in doubleword V17[0]
  1279		VLGVG	$0, V17, R8
  1280		ADD	R8, R7
  1281	found:
  1282		SUB	R1, R7
  1283		MOVD	R7, (R5)
  1284		RET
  1285	
  1286	// This is called from .init_array and follows the platform, not Go, ABI.
  1287	// We are overly conservative. We could only save the registers we use.
  1288	// However, since this function is only called once per loaded module
  1289	// performance is unimportant.
  1290	TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
  1291		// Save R6-R15 in the register save area of the calling function.
  1292		// Don't bother saving F8-F15 as we aren't doing any calls.
  1293		STMG	R6, R15, 48(R15)
  1294	
  1295		// append the argument (passed in R2, as per the ELF ABI) to the
  1296		// moduledata linked list.
  1297		MOVD	runtime·lastmoduledatap(SB), R1
  1298		MOVD	R2, moduledata_next(R1)
  1299		MOVD	R2, runtime·lastmoduledatap(SB)
  1300	
  1301		// Restore R6-R15.
  1302		LMG	48(R15), R6, R15
  1303		RET
  1304	
  1305	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1306		MOVB	$1, ret+0(FP)
  1307		RET

View as plain text