...
Run Format

Text file src/runtime/asm_amd64p32.s

Documentation: runtime

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11		// copy arguments forward on an even stack
    12		MOVL	argc+0(FP), AX
    13		MOVL	argv+4(FP), BX
    14		MOVL	SP, CX
    15		SUBL	$128, CX		// plenty of scratch
    16		ANDL	$~15, CX
    17		MOVL	CX, SP
    18	
    19		MOVL	AX, 16(SP)
    20		MOVL	BX, 24(SP)
    21		
    22		// create istack out of the given (operating system) stack.
    23		MOVL	$runtime·g0(SB), DI
    24		LEAL	(-64*1024+104)(SP), BX
    25		MOVL	BX, g_stackguard0(DI)
    26		MOVL	BX, g_stackguard1(DI)
    27		MOVL	BX, (g_stack+stack_lo)(DI)
    28		MOVL	SP, (g_stack+stack_hi)(DI)
    29	
    30		// find out information about the processor we're on
    31		MOVL	$0, AX
    32		CPUID
    33		CMPL	AX, $0
    34		JE	nocpuinfo
    35	
    36		CMPL	BX, $0x756E6547  // "Genu"
    37		JNE	notintel
    38		CMPL	DX, $0x49656E69  // "ineI"
    39		JNE	notintel
    40		CMPL	CX, $0x6C65746E  // "ntel"
    41		JNE	notintel
    42		MOVB	$1, runtime·isIntel(SB)
    43	notintel:
    44	
    45		// Load EAX=1 cpuid flags
    46		MOVL	$1, AX
    47		CPUID
    48		MOVL	AX, runtime·processorVersionInfo(SB)
    49	
    50		TESTL	$(1<<26), DX // SSE2
    51		SETNE	runtime·support_sse2(SB)
    52	
    53		TESTL	$(1<<9), CX // SSSE3
    54		SETNE	runtime·support_ssse3(SB)
    55	
    56		TESTL	$(1<<19), CX // SSE4.1
    57		SETNE	runtime·support_sse41(SB)
    58	
    59		TESTL	$(1<<20), CX // SSE4.2
    60		SETNE	runtime·support_sse42(SB)
    61	
    62		TESTL	$(1<<23), CX // POPCNT
    63		SETNE	runtime·support_popcnt(SB)
    64	
    65		TESTL	$(1<<25), CX // AES
    66		SETNE	runtime·support_aes(SB)
    67	
    68		TESTL	$(1<<27), CX // OSXSAVE
    69		SETNE	runtime·support_osxsave(SB)
    70	
    71		// If OS support for XMM and YMM is not present
    72		// support_avx will be set back to false later.
    73		TESTL	$(1<<28), CX // AVX
    74		SETNE	runtime·support_avx(SB)
    75	
    76	eax7:
    77		// Load EAX=7/ECX=0 cpuid flags
    78		CMPL	SI, $7
    79		JLT	osavx
    80		MOVL	$7, AX
    81		MOVL	$0, CX
    82		CPUID
    83	
    84		TESTL	$(1<<3), BX // BMI1
    85		SETNE	runtime·support_bmi1(SB)
    86	
    87		// If OS support for XMM and YMM is not present
    88		// support_avx2 will be set back to false later.
    89		TESTL	$(1<<5), BX
    90		SETNE	runtime·support_avx2(SB)
    91	
    92		TESTL	$(1<<8), BX // BMI2
    93		SETNE	runtime·support_bmi2(SB)
    94	
    95		TESTL	$(1<<9), BX // ERMS
    96		SETNE	runtime·support_erms(SB)
    97	
    98	osavx:
    99		// nacl does not support XGETBV to test
   100		// for XMM and YMM OS support.
   101	#ifndef GOOS_nacl
   102		CMPB	runtime·support_osxsave(SB), $1
   103		JNE	noavx
   104		MOVL	$0, CX
   105		// For XGETBV, OSXSAVE bit is required and sufficient
   106		XGETBV
   107		ANDL	$6, AX
   108		CMPL	AX, $6 // Check for OS support of XMM and YMM registers.
   109		JE nocpuinfo
   110	#endif
   111	noavx:
   112		MOVB $0, runtime·support_avx(SB)
   113		MOVB $0, runtime·support_avx2(SB)
   114	
   115	nocpuinfo:
   116	
   117	needtls:
   118		LEAL	runtime·m0+m_tls(SB), DI
   119		CALL	runtime·settls(SB)
   120	
   121		// store through it, to make sure it works
   122		get_tls(BX)
   123		MOVQ	$0x123, g(BX)
   124		MOVQ	runtime·m0+m_tls(SB), AX
   125		CMPQ	AX, $0x123
   126		JEQ 2(PC)
   127		MOVL	AX, 0	// abort
   128	ok:
   129		// set the per-goroutine and per-mach "registers"
   130		get_tls(BX)
   131		LEAL	runtime·g0(SB), CX
   132		MOVL	CX, g(BX)
   133		LEAL	runtime·m0(SB), AX
   134	
   135		// save m->g0 = g0
   136		MOVL	CX, m_g0(AX)
   137		// save m0 to g0->m
   138		MOVL	AX, g_m(CX)
   139	
   140		CLD				// convention is D is always left cleared
   141		CALL	runtime·check(SB)
   142	
   143		MOVL	16(SP), AX		// copy argc
   144		MOVL	AX, 0(SP)
   145		MOVL	24(SP), AX		// copy argv
   146		MOVL	AX, 4(SP)
   147		CALL	runtime·args(SB)
   148		CALL	runtime·osinit(SB)
   149		CALL	runtime·schedinit(SB)
   150	
   151		// create a new goroutine to start program
   152		MOVL	$runtime·mainPC(SB), AX	// entry
   153		MOVL	$0, 0(SP)
   154		MOVL	AX, 4(SP)
   155		CALL	runtime·newproc(SB)
   156	
   157		// start this M
   158		CALL	runtime·mstart(SB)
   159	
   160		MOVL	$0xf1, 0xf1  // crash
   161		RET
   162	
   163	DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   164	GLOBL	runtime·mainPC(SB),RODATA,$4
   165	
   166	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   167		INT $3
   168		RET
   169	
   170	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   171		// No per-thread init.
   172		RET
   173	
   174	/*
   175	 *  go-routine
   176	 */
   177	
   178	// void gosave(Gobuf*)
   179	// save state in Gobuf; setjmp
   180	TEXT runtime·gosave(SB), NOSPLIT, $0-4
   181		MOVL	buf+0(FP), AX	// gobuf
   182		LEAL	buf+0(FP), BX	// caller's SP
   183		MOVL	BX, gobuf_sp(AX)
   184		MOVL	0(SP), BX		// caller's PC
   185		MOVL	BX, gobuf_pc(AX)
   186		MOVQ	$0, gobuf_ret(AX)
   187		// Assert ctxt is zero. See func save.
   188		MOVL	gobuf_ctxt(AX), BX
   189		TESTL	BX, BX
   190		JZ	2(PC)
   191		CALL	runtime·badctxt(SB)
   192		get_tls(CX)
   193		MOVL	g(CX), BX
   194		MOVL	BX, gobuf_g(AX)
   195		RET
   196	
   197	// void gogo(Gobuf*)
   198	// restore state from Gobuf; longjmp
   199	TEXT runtime·gogo(SB), NOSPLIT, $8-4
   200		MOVL	buf+0(FP), BX		// gobuf
   201		MOVL	gobuf_g(BX), DX
   202		MOVL	0(DX), CX		// make sure g != nil
   203		get_tls(CX)
   204		MOVL	DX, g(CX)
   205		MOVL	gobuf_sp(BX), SP	// restore SP
   206		MOVL	gobuf_ctxt(BX), DX
   207		MOVQ	gobuf_ret(BX), AX
   208		MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   209		MOVQ	$0, gobuf_ret(BX)
   210		MOVL	$0, gobuf_ctxt(BX)
   211		MOVL	gobuf_pc(BX), BX
   212		JMP	BX
   213	
   214	// func mcall(fn func(*g))
   215	// Switch to m->g0's stack, call fn(g).
   216	// Fn must never return. It should gogo(&g->sched)
   217	// to keep running g.
   218	TEXT runtime·mcall(SB), NOSPLIT, $0-4
   219		MOVL	fn+0(FP), DI
   220		
   221		get_tls(CX)
   222		MOVL	g(CX), AX	// save state in g->sched
   223		MOVL	0(SP), BX	// caller's PC
   224		MOVL	BX, (g_sched+gobuf_pc)(AX)
   225		LEAL	fn+0(FP), BX	// caller's SP
   226		MOVL	BX, (g_sched+gobuf_sp)(AX)
   227		MOVL	AX, (g_sched+gobuf_g)(AX)
   228	
   229		// switch to m->g0 & its stack, call fn
   230		MOVL	g(CX), BX
   231		MOVL	g_m(BX), BX
   232		MOVL	m_g0(BX), SI
   233		CMPL	SI, AX	// if g == m->g0 call badmcall
   234		JNE	3(PC)
   235		MOVL	$runtime·badmcall(SB), AX
   236		JMP	AX
   237		MOVL	SI, g(CX)	// g = m->g0
   238		MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   239		PUSHQ	AX
   240		MOVL	DI, DX
   241		MOVL	0(DI), DI
   242		CALL	DI
   243		POPQ	AX
   244		MOVL	$runtime·badmcall2(SB), AX
   245		JMP	AX
   246		RET
   247	
   248	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   249	// of the G stack. We need to distinguish the routine that
   250	// lives at the bottom of the G stack from the one that lives
   251	// at the top of the system stack because the one at the top of
   252	// the system stack terminates the stack walk (see topofstack()).
   253	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   254		RET
   255	
   256	// func systemstack(fn func())
   257	TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   258		MOVL	fn+0(FP), DI	// DI = fn
   259		get_tls(CX)
   260		MOVL	g(CX), AX	// AX = g
   261		MOVL	g_m(AX), BX	// BX = m
   262	
   263		MOVL	m_gsignal(BX), DX	// DX = gsignal
   264		CMPL	AX, DX
   265		JEQ	noswitch
   266	
   267		MOVL	m_g0(BX), DX	// DX = g0
   268		CMPL	AX, DX
   269		JEQ	noswitch
   270	
   271		MOVL	m_curg(BX), R8
   272		CMPL	AX, R8
   273		JEQ	switch
   274		
   275		// Not g0, not curg. Must be gsignal, but that's not allowed.
   276		// Hide call from linker nosplit analysis.
   277		MOVL	$runtime·badsystemstack(SB), AX
   278		CALL	AX
   279	
   280	switch:
   281		// save our state in g->sched. Pretend to
   282		// be systemstack_switch if the G stack is scanned.
   283		MOVL	$runtime·systemstack_switch(SB), SI
   284		MOVL	SI, (g_sched+gobuf_pc)(AX)
   285		MOVL	SP, (g_sched+gobuf_sp)(AX)
   286		MOVL	AX, (g_sched+gobuf_g)(AX)
   287	
   288		// switch to g0
   289		MOVL	DX, g(CX)
   290		MOVL	(g_sched+gobuf_sp)(DX), SP
   291	
   292		// call target function
   293		MOVL	DI, DX
   294		MOVL	0(DI), DI
   295		CALL	DI
   296	
   297		// switch back to g
   298		get_tls(CX)
   299		MOVL	g(CX), AX
   300		MOVL	g_m(AX), BX
   301		MOVL	m_curg(BX), AX
   302		MOVL	AX, g(CX)
   303		MOVL	(g_sched+gobuf_sp)(AX), SP
   304		MOVL	$0, (g_sched+gobuf_sp)(AX)
   305		RET
   306	
   307	noswitch:
   308		// already on m stack, just call directly
   309		// Using a tail call here cleans up tracebacks since we won't stop
   310		// at an intermediate systemstack.
   311		MOVL	DI, DX
   312		MOVL	0(DI), DI
   313		JMP	DI
   314	
   315	/*
   316	 * support for morestack
   317	 */
   318	
   319	// Called during function prolog when more stack is needed.
   320	//
   321	// The traceback routines see morestack on a g0 as being
   322	// the top of a stack (for example, morestack calling newstack
   323	// calling the scheduler calling newm calling gc), so we must
   324	// record an argument size. For that purpose, it has no arguments.
   325	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   326		get_tls(CX)
   327		MOVL	g(CX), BX
   328		MOVL	g_m(BX), BX
   329	
   330		// Cannot grow scheduler stack (m->g0).
   331		MOVL	m_g0(BX), SI
   332		CMPL	g(CX), SI
   333		JNE	3(PC)
   334		CALL	runtime·badmorestackg0(SB)
   335		MOVL	0, AX
   336	
   337		// Cannot grow signal stack (m->gsignal).
   338		MOVL	m_gsignal(BX), SI
   339		CMPL	g(CX), SI
   340		JNE	3(PC)
   341		CALL	runtime·badmorestackgsignal(SB)
   342		MOVL	0, AX
   343	
   344		// Called from f.
   345		// Set m->morebuf to f's caller.
   346		MOVL	8(SP), AX	// f's caller's PC
   347		MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   348		LEAL	16(SP), AX	// f's caller's SP
   349		MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   350		get_tls(CX)
   351		MOVL	g(CX), SI
   352		MOVL	SI, (m_morebuf+gobuf_g)(BX)
   353	
   354		// Set g->sched to context in f.
   355		MOVL	0(SP), AX // f's PC
   356		MOVL	AX, (g_sched+gobuf_pc)(SI)
   357		MOVL	SI, (g_sched+gobuf_g)(SI)
   358		LEAL	8(SP), AX // f's SP
   359		MOVL	AX, (g_sched+gobuf_sp)(SI)
   360		MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   361	
   362		// Call newstack on m->g0's stack.
   363		MOVL	m_g0(BX), BX
   364		MOVL	BX, g(CX)
   365		MOVL	(g_sched+gobuf_sp)(BX), SP
   366		CALL	runtime·newstack(SB)
   367		MOVL	$0, 0x1003	// crash if newstack returns
   368		RET
   369	
   370	// morestack trampolines
   371	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   372		MOVL	$0, DX
   373		JMP	runtime·morestack(SB)
   374	
   375	// reflectcall: call a function with the given argument list
   376	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   377	// we don't have variable-sized frames, so we use a small number
   378	// of constant-sized-frame functions to encode a few bits of size in the pc.
   379	// Caution: ugly multiline assembly macros in your future!
   380	
   381	#define DISPATCH(NAME,MAXSIZE)		\
   382		CMPL	CX, $MAXSIZE;		\
   383		JA	3(PC);			\
   384		MOVL	$NAME(SB), AX;		\
   385		JMP	AX
   386	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   387	
   388	TEXT reflect·call(SB), NOSPLIT, $0-0
   389		JMP	·reflectcall(SB)
   390	
   391	TEXT ·reflectcall(SB), NOSPLIT, $0-20
   392		MOVLQZX argsize+12(FP), CX
   393		DISPATCH(runtime·call16, 16)
   394		DISPATCH(runtime·call32, 32)
   395		DISPATCH(runtime·call64, 64)
   396		DISPATCH(runtime·call128, 128)
   397		DISPATCH(runtime·call256, 256)
   398		DISPATCH(runtime·call512, 512)
   399		DISPATCH(runtime·call1024, 1024)
   400		DISPATCH(runtime·call2048, 2048)
   401		DISPATCH(runtime·call4096, 4096)
   402		DISPATCH(runtime·call8192, 8192)
   403		DISPATCH(runtime·call16384, 16384)
   404		DISPATCH(runtime·call32768, 32768)
   405		DISPATCH(runtime·call65536, 65536)
   406		DISPATCH(runtime·call131072, 131072)
   407		DISPATCH(runtime·call262144, 262144)
   408		DISPATCH(runtime·call524288, 524288)
   409		DISPATCH(runtime·call1048576, 1048576)
   410		DISPATCH(runtime·call2097152, 2097152)
   411		DISPATCH(runtime·call4194304, 4194304)
   412		DISPATCH(runtime·call8388608, 8388608)
   413		DISPATCH(runtime·call16777216, 16777216)
   414		DISPATCH(runtime·call33554432, 33554432)
   415		DISPATCH(runtime·call67108864, 67108864)
   416		DISPATCH(runtime·call134217728, 134217728)
   417		DISPATCH(runtime·call268435456, 268435456)
   418		DISPATCH(runtime·call536870912, 536870912)
   419		DISPATCH(runtime·call1073741824, 1073741824)
   420		MOVL	$runtime·badreflectcall(SB), AX
   421		JMP	AX
   422	
   423	#define CALLFN(NAME,MAXSIZE)			\
   424	TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   425		NO_LOCAL_POINTERS;			\
   426		/* copy arguments to stack */		\
   427		MOVL	argptr+8(FP), SI;		\
   428		MOVL	argsize+12(FP), CX;		\
   429		MOVL	SP, DI;				\
   430		REP;MOVSB;				\
   431		/* call function */			\
   432		MOVL	f+4(FP), DX;			\
   433		MOVL	(DX), AX;			\
   434		CALL	AX;				\
   435		/* copy return values back */		\
   436		MOVL	argtype+0(FP), DX;		\
   437		MOVL	argptr+8(FP), DI;		\
   438		MOVL	argsize+12(FP), CX;		\
   439		MOVL	retoffset+16(FP), BX;		\
   440		MOVL	SP, SI;				\
   441		ADDL	BX, DI;				\
   442		ADDL	BX, SI;				\
   443		SUBL	BX, CX;				\
   444		CALL	callRet<>(SB);			\
   445		RET
   446	
   447	// callRet copies return values back at the end of call*. This is a
   448	// separate function so it can allocate stack space for the arguments
   449	// to reflectcallmove. It does not follow the Go ABI; it expects its
   450	// arguments in registers.
   451	TEXT callRet<>(SB), NOSPLIT, $16-0
   452		MOVL	DX, 0(SP)
   453		MOVL	DI, 4(SP)
   454		MOVL	SI, 8(SP)
   455		MOVL	CX, 12(SP)
   456		CALL	runtime·reflectcallmove(SB)
   457		RET
   458	
   459	CALLFN(·call16, 16)
   460	CALLFN(·call32, 32)
   461	CALLFN(·call64, 64)
   462	CALLFN(·call128, 128)
   463	CALLFN(·call256, 256)
   464	CALLFN(·call512, 512)
   465	CALLFN(·call1024, 1024)
   466	CALLFN(·call2048, 2048)
   467	CALLFN(·call4096, 4096)
   468	CALLFN(·call8192, 8192)
   469	CALLFN(·call16384, 16384)
   470	CALLFN(·call32768, 32768)
   471	CALLFN(·call65536, 65536)
   472	CALLFN(·call131072, 131072)
   473	CALLFN(·call262144, 262144)
   474	CALLFN(·call524288, 524288)
   475	CALLFN(·call1048576, 1048576)
   476	CALLFN(·call2097152, 2097152)
   477	CALLFN(·call4194304, 4194304)
   478	CALLFN(·call8388608, 8388608)
   479	CALLFN(·call16777216, 16777216)
   480	CALLFN(·call33554432, 33554432)
   481	CALLFN(·call67108864, 67108864)
   482	CALLFN(·call134217728, 134217728)
   483	CALLFN(·call268435456, 268435456)
   484	CALLFN(·call536870912, 536870912)
   485	CALLFN(·call1073741824, 1073741824)
   486	
   487	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   488		MOVL	cycles+0(FP), AX
   489	again:
   490		PAUSE
   491		SUBL	$1, AX
   492		JNZ	again
   493		RET
   494	
   495	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   496		// Stores are already ordered on x86, so this is just a
   497		// compile barrier.
   498		RET
   499	
   500	// void jmpdefer(fn, sp);
   501	// called from deferreturn.
   502	// 1. pop the caller
   503	// 2. sub 5 bytes from the callers return
   504	// 3. jmp to the argument
   505	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   506		MOVL	fv+0(FP), DX
   507		MOVL	argp+4(FP), BX
   508		LEAL	-8(BX), SP	// caller sp after CALL
   509		SUBL	$5, (SP)	// return to CALL again
   510		MOVL	0(DX), BX
   511		JMP	BX	// but first run the deferred function
   512	
   513	// func asmcgocall(fn, arg unsafe.Pointer) int32
   514	// Not implemented.
   515	TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
   516		MOVL	0, AX
   517		RET
   518	
   519	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   520	// Not implemented.
   521	TEXT runtime·cgocallback(SB),NOSPLIT,$0-16
   522		MOVL	0, AX
   523		RET
   524	
   525	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   526	// Not implemented.
   527	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-16
   528		MOVL	0, AX
   529		RET
   530	
   531	// void setg(G*); set g. for use by needm.
   532	// Not implemented.
   533	TEXT runtime·setg(SB), NOSPLIT, $0-4
   534		MOVL	0, AX
   535		RET
   536	
   537	// check that SP is in range [g->stack.lo, g->stack.hi)
   538	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   539		get_tls(CX)
   540		MOVL	g(CX), AX
   541		CMPL	(g_stack+stack_hi)(AX), SP
   542		JHI	2(PC)
   543		MOVL	0, AX
   544		CMPL	SP, (g_stack+stack_lo)(AX)
   545		JHI	2(PC)
   546		MOVL	0, AX
   547		RET
   548	
   549	// int64 runtime·cputicks(void)
   550	TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   551		RDTSC
   552		SHLQ	$32, DX
   553		ADDQ	DX, AX
   554		MOVQ	AX, ret+0(FP)
   555		RET
   556	
   557	// hash function using AES hardware instructions
   558	// For now, our one amd64p32 system (NaCl) does not
   559	// support using AES instructions, so have not bothered to
   560	// write the implementations. Can copy and adjust the ones
   561	// in asm_amd64.s when the time comes.
   562	
   563	TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   564		MOVL	AX, ret+16(FP)
   565		RET
   566	
   567	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   568		MOVL	AX, ret+8(FP)
   569		RET
   570	
   571	TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
   572		MOVL	AX, ret+8(FP)
   573		RET
   574	
   575	TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
   576		MOVL	AX, ret+8(FP)
   577		RET
   578	
   579	// memequal(p, q unsafe.Pointer, size uintptr) bool
   580	TEXT runtime·memequal(SB),NOSPLIT,$0-17
   581		MOVL	a+0(FP), SI
   582		MOVL	b+4(FP), DI
   583		CMPL	SI, DI
   584		JEQ	eq
   585		MOVL	size+8(FP), BX
   586		CALL	runtime·memeqbody(SB)
   587		MOVB	AX, ret+16(FP)
   588		RET
   589	eq:
   590		MOVB    $1, ret+16(FP)
   591		RET
   592	
   593	// memequal_varlen(a, b unsafe.Pointer) bool
   594	TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   595		MOVL    a+0(FP), SI
   596		MOVL    b+4(FP), DI
   597		CMPL    SI, DI
   598		JEQ     eq
   599		MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   600		CALL    runtime·memeqbody(SB)
   601		MOVB    AX, ret+8(FP)
   602		RET
   603	eq:
   604		MOVB    $1, ret+8(FP)
   605		RET
   606	
   607	// a in SI
   608	// b in DI
   609	// count in BX
   610	TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   611		XORQ	AX, AX
   612	
   613		CMPQ	BX, $8
   614		JB	small
   615		
   616		// 64 bytes at a time using xmm registers
   617	hugeloop:
   618		CMPQ	BX, $64
   619		JB	bigloop
   620		MOVOU	(SI), X0
   621		MOVOU	(DI), X1
   622		MOVOU	16(SI), X2
   623		MOVOU	16(DI), X3
   624		MOVOU	32(SI), X4
   625		MOVOU	32(DI), X5
   626		MOVOU	48(SI), X6
   627		MOVOU	48(DI), X7
   628		PCMPEQB	X1, X0
   629		PCMPEQB	X3, X2
   630		PCMPEQB	X5, X4
   631		PCMPEQB	X7, X6
   632		PAND	X2, X0
   633		PAND	X6, X4
   634		PAND	X4, X0
   635		PMOVMSKB X0, DX
   636		ADDQ	$64, SI
   637		ADDQ	$64, DI
   638		SUBQ	$64, BX
   639		CMPL	DX, $0xffff
   640		JEQ	hugeloop
   641		RET
   642	
   643		// 8 bytes at a time using 64-bit register
   644	bigloop:
   645		CMPQ	BX, $8
   646		JBE	leftover
   647		MOVQ	(SI), CX
   648		MOVQ	(DI), DX
   649		ADDQ	$8, SI
   650		ADDQ	$8, DI
   651		SUBQ	$8, BX
   652		CMPQ	CX, DX
   653		JEQ	bigloop
   654		RET
   655	
   656		// remaining 0-8 bytes
   657	leftover:
   658		ADDQ	BX, SI
   659		ADDQ	BX, DI
   660		MOVQ	-8(SI), CX
   661		MOVQ	-8(DI), DX
   662		CMPQ	CX, DX
   663		SETEQ	AX
   664		RET
   665	
   666	small:
   667		CMPQ	BX, $0
   668		JEQ	equal
   669	
   670		LEAQ	0(BX*8), CX
   671		NEGQ	CX
   672	
   673		CMPB	SI, $0xf8
   674		JA	si_high
   675	
   676		// load at SI won't cross a page boundary.
   677		MOVQ	(SI), SI
   678		JMP	si_finish
   679	si_high:
   680		// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   681		MOVQ	BX, DX
   682		ADDQ	SI, DX
   683		MOVQ	-8(DX), SI
   684		SHRQ	CX, SI
   685	si_finish:
   686	
   687		// same for DI.
   688		CMPB	DI, $0xf8
   689		JA	di_high
   690		MOVQ	(DI), DI
   691		JMP	di_finish
   692	di_high:
   693		MOVQ	BX, DX
   694		ADDQ	DI, DX
   695		MOVQ	-8(DX), DI
   696		SHRQ	CX, DI
   697	di_finish:
   698	
   699		SUBQ	SI, DI
   700		SHLQ	CX, DI
   701	equal:
   702		SETEQ	AX
   703		RET
   704	
   705	TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   706		MOVL	s1_base+0(FP), SI
   707		MOVL	s1_len+4(FP), BX
   708		MOVL	s2_base+8(FP), DI
   709		MOVL	s2_len+12(FP), DX
   710		CALL	runtime·cmpbody(SB)
   711		MOVL	AX, ret+16(FP)
   712		RET
   713	
   714	TEXT bytes·Compare(SB),NOSPLIT,$0-28
   715		MOVL	s1+0(FP), SI
   716		MOVL	s1+4(FP), BX
   717		MOVL	s2+12(FP), DI
   718		MOVL	s2+16(FP), DX
   719		CALL	runtime·cmpbody(SB)
   720		MOVL	AX, res+24(FP)
   721		RET
   722	
   723	// input:
   724	//   SI = a
   725	//   DI = b
   726	//   BX = alen
   727	//   DX = blen
   728	// output:
   729	//   AX = 1/0/-1
   730	TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   731		CMPQ	SI, DI
   732		JEQ	allsame
   733		CMPQ	BX, DX
   734		MOVQ	DX, R8
   735		CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   736		CMPQ	R8, $8
   737		JB	small
   738	
   739	loop:
   740		CMPQ	R8, $16
   741		JBE	_0through16
   742		MOVOU	(SI), X0
   743		MOVOU	(DI), X1
   744		PCMPEQB X0, X1
   745		PMOVMSKB X1, AX
   746		XORQ	$0xffff, AX	// convert EQ to NE
   747		JNE	diff16	// branch if at least one byte is not equal
   748		ADDQ	$16, SI
   749		ADDQ	$16, DI
   750		SUBQ	$16, R8
   751		JMP	loop
   752		
   753		// AX = bit mask of differences
   754	diff16:
   755		BSFQ	AX, BX	// index of first byte that differs
   756		XORQ	AX, AX
   757		ADDQ	BX, SI
   758		MOVB	(SI), CX
   759		ADDQ	BX, DI
   760		CMPB	CX, (DI)
   761		SETHI	AX
   762		LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   763		RET
   764	
   765		// 0 through 16 bytes left, alen>=8, blen>=8
   766	_0through16:
   767		CMPQ	R8, $8
   768		JBE	_0through8
   769		MOVQ	(SI), AX
   770		MOVQ	(DI), CX
   771		CMPQ	AX, CX
   772		JNE	diff8
   773	_0through8:
   774		ADDQ	R8, SI
   775		ADDQ	R8, DI
   776		MOVQ	-8(SI), AX
   777		MOVQ	-8(DI), CX
   778		CMPQ	AX, CX
   779		JEQ	allsame
   780	
   781		// AX and CX contain parts of a and b that differ.
   782	diff8:
   783		BSWAPQ	AX	// reverse order of bytes
   784		BSWAPQ	CX
   785		XORQ	AX, CX
   786		BSRQ	CX, CX	// index of highest bit difference
   787		SHRQ	CX, AX	// move a's bit to bottom
   788		ANDQ	$1, AX	// mask bit
   789		LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   790		RET
   791	
   792		// 0-7 bytes in common
   793	small:
   794		LEAQ	(R8*8), CX	// bytes left -> bits left
   795		NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   796		JEQ	allsame
   797	
   798		// load bytes of a into high bytes of AX
   799		CMPB	SI, $0xf8
   800		JA	si_high
   801		MOVQ	(SI), SI
   802		JMP	si_finish
   803	si_high:
   804		ADDQ	R8, SI
   805		MOVQ	-8(SI), SI
   806		SHRQ	CX, SI
   807	si_finish:
   808		SHLQ	CX, SI
   809	
   810		// load bytes of b in to high bytes of BX
   811		CMPB	DI, $0xf8
   812		JA	di_high
   813		MOVQ	(DI), DI
   814		JMP	di_finish
   815	di_high:
   816		ADDQ	R8, DI
   817		MOVQ	-8(DI), DI
   818		SHRQ	CX, DI
   819	di_finish:
   820		SHLQ	CX, DI
   821	
   822		BSWAPQ	SI	// reverse order of bytes
   823		BSWAPQ	DI
   824		XORQ	SI, DI	// find bit differences
   825		JEQ	allsame
   826		BSRQ	DI, CX	// index of highest bit difference
   827		SHRQ	CX, SI	// move a's bit to bottom
   828		ANDQ	$1, SI	// mask bit
   829		LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   830		RET
   831	
   832	allsame:
   833		XORQ	AX, AX
   834		XORQ	CX, CX
   835		CMPQ	BX, DX
   836		SETGT	AX	// 1 if alen > blen
   837		SETEQ	CX	// 1 if alen == blen
   838		LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   839		RET
   840	
   841	TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
   842		MOVL s+0(FP), SI
   843		MOVL s_len+4(FP), BX
   844		MOVB c+12(FP), AL
   845		CALL runtime·indexbytebody(SB)
   846		MOVL AX, ret+16(FP)
   847		RET
   848	
   849	TEXT strings·IndexByte(SB),NOSPLIT,$0-20
   850		MOVL s+0(FP), SI
   851		MOVL s_len+4(FP), BX
   852		MOVB c+8(FP), AL
   853		CALL runtime·indexbytebody(SB)
   854		MOVL AX, ret+16(FP)
   855		RET
   856	
   857	// input:
   858	//   SI: data
   859	//   BX: data len
   860	//   AL: byte sought
   861	// output:
   862	//   AX
   863	TEXT runtime·indexbytebody(SB),NOSPLIT,$0
   864		MOVL SI, DI
   865	
   866		CMPL BX, $16
   867		JLT small
   868	
   869		// round up to first 16-byte boundary
   870		TESTL $15, SI
   871		JZ aligned
   872		MOVL SI, CX
   873		ANDL $~15, CX
   874		ADDL $16, CX
   875	
   876		// search the beginning
   877		SUBL SI, CX
   878		REPN; SCASB
   879		JZ success
   880	
   881	// DI is 16-byte aligned; get ready to search using SSE instructions
   882	aligned:
   883		// round down to last 16-byte boundary
   884		MOVL BX, R11
   885		ADDL SI, R11
   886		ANDL $~15, R11
   887	
   888		// shuffle X0 around so that each byte contains c
   889		MOVD AX, X0
   890		PUNPCKLBW X0, X0
   891		PUNPCKLBW X0, X0
   892		PSHUFL $0, X0, X0
   893		JMP condition
   894	
   895	sse:
   896		// move the next 16-byte chunk of the buffer into X1
   897		MOVO (DI), X1
   898		// compare bytes in X0 to X1
   899		PCMPEQB X0, X1
   900		// take the top bit of each byte in X1 and put the result in DX
   901		PMOVMSKB X1, DX
   902		TESTL DX, DX
   903		JNZ ssesuccess
   904		ADDL $16, DI
   905	
   906	condition:
   907		CMPL DI, R11
   908		JLT sse
   909	
   910		// search the end
   911		MOVL SI, CX
   912		ADDL BX, CX
   913		SUBL R11, CX
   914		// if CX == 0, the zero flag will be set and we'll end up
   915		// returning a false success
   916		JZ failure
   917		REPN; SCASB
   918		JZ success
   919	
   920	failure:
   921		MOVL $-1, AX
   922		RET
   923	
   924	// handle for lengths < 16
   925	small:
   926		MOVL BX, CX
   927		REPN; SCASB
   928		JZ success
   929		MOVL $-1, AX
   930		RET
   931	
   932	// we've found the chunk containing the byte
   933	// now just figure out which specific byte it is
   934	ssesuccess:
   935		// get the index of the least significant set bit
   936		BSFW DX, DX
   937		SUBL SI, DI
   938		ADDL DI, DX
   939		MOVL DX, AX
   940		RET
   941	
   942	success:
   943		SUBL SI, DI
   944		SUBL $1, DI
   945		MOVL DI, AX
   946		RET
   947	
   948	TEXT bytes·Equal(SB),NOSPLIT,$0-25
   949		MOVL	a_len+4(FP), BX
   950		MOVL	b_len+16(FP), CX
   951		XORL	AX, AX
   952		CMPL	BX, CX
   953		JNE	eqret
   954		MOVL	a+0(FP), SI
   955		MOVL	b+12(FP), DI
   956		CALL	runtime·memeqbody(SB)
   957	eqret:
   958		MOVB	AX, ret+24(FP)
   959		RET
   960	
   961	TEXT runtime·return0(SB), NOSPLIT, $0
   962		MOVL	$0, AX
   963		RET
   964	
   965	// The top-most function running on a goroutine
   966	// returns to goexit+PCQuantum.
   967	TEXT runtime·goexit(SB),NOSPLIT,$0-0
   968		BYTE	$0x90	// NOP
   969		CALL	runtime·goexit1(SB)	// does not return
   970		// traceback from goexit1 must hit code range of goexit
   971		BYTE	$0x90	// NOP
   972	
   973	TEXT ·checkASM(SB),NOSPLIT,$0-1
   974		MOVB	$1, ret+0(FP)
   975		RET

View as plain text