...
Run Format

Text file src/runtime/asm_amd64p32.s

Documentation: runtime

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11		// copy arguments forward on an even stack
    12		MOVL	argc+0(FP), AX
    13		MOVL	argv+4(FP), BX
    14		MOVL	SP, CX
    15		SUBL	$128, CX		// plenty of scratch
    16		ANDL	$~15, CX
    17		MOVL	CX, SP
    18	
    19		MOVL	AX, 16(SP)
    20		MOVL	BX, 24(SP)
    21		
    22		// create istack out of the given (operating system) stack.
    23		MOVL	$runtime·g0(SB), DI
    24		LEAL	(-64*1024+104)(SP), BX
    25		MOVL	BX, g_stackguard0(DI)
    26		MOVL	BX, g_stackguard1(DI)
    27		MOVL	BX, (g_stack+stack_lo)(DI)
    28		MOVL	SP, (g_stack+stack_hi)(DI)
    29	
    30		// find out information about the processor we're on
    31		MOVL	$0, AX
    32		CPUID
    33		CMPL	AX, $0
    34		JE	nocpuinfo
    35	
    36		CMPL	BX, $0x756E6547  // "Genu"
    37		JNE	notintel
    38		CMPL	DX, $0x49656E69  // "ineI"
    39		JNE	notintel
    40		CMPL	CX, $0x6C65746E  // "ntel"
    41		JNE	notintel
    42		MOVB	$1, runtime·isIntel(SB)
    43	notintel:
    44	
    45		// Load EAX=1 cpuid flags
    46		MOVL	$1, AX
    47		CPUID
    48		MOVL	AX, runtime·processorVersionInfo(SB)
    49	
    50		TESTL	$(1<<26), DX // SSE2
    51		SETNE	runtime·support_sse2(SB)
    52	
    53		TESTL	$(1<<9), CX // SSSE3
    54		SETNE	runtime·support_ssse3(SB)
    55	
    56		TESTL	$(1<<19), CX // SSE4.1
    57		SETNE	runtime·support_sse41(SB)
    58	
    59		TESTL	$(1<<20), CX // SSE4.2
    60		SETNE	runtime·support_sse42(SB)
    61	
    62		TESTL	$(1<<23), CX // POPCNT
    63		SETNE	runtime·support_popcnt(SB)
    64	
    65		TESTL	$(1<<25), CX // AES
    66		SETNE	runtime·support_aes(SB)
    67	
    68		TESTL	$(1<<27), CX // OSXSAVE
    69		SETNE	runtime·support_osxsave(SB)
    70	
    71		// If OS support for XMM and YMM is not present
    72		// support_avx will be set back to false later.
    73		TESTL	$(1<<28), CX // AVX
    74		SETNE	runtime·support_avx(SB)
    75	
    76	eax7:
    77		// Load EAX=7/ECX=0 cpuid flags
    78		CMPL	SI, $7
    79		JLT	osavx
    80		MOVL	$7, AX
    81		MOVL	$0, CX
    82		CPUID
    83	
    84		TESTL	$(1<<3), BX // BMI1
    85		SETNE	runtime·support_bmi1(SB)
    86	
    87		// If OS support for XMM and YMM is not present
    88		// support_avx2 will be set back to false later.
    89		TESTL	$(1<<5), BX
    90		SETNE	runtime·support_avx2(SB)
    91	
    92		TESTL	$(1<<8), BX // BMI2
    93		SETNE	runtime·support_bmi2(SB)
    94	
    95		TESTL	$(1<<9), BX // ERMS
    96		SETNE	runtime·support_erms(SB)
    97	
    98	osavx:
    99		// nacl does not support XGETBV to test
   100		// for XMM and YMM OS support.
   101	#ifndef GOOS_nacl
   102		CMPB	runtime·support_osxsave(SB), $1
   103		JNE	noavx
   104		MOVL	$0, CX
   105		// For XGETBV, OSXSAVE bit is required and sufficient
   106		XGETBV
   107		ANDL	$6, AX
   108		CMPL	AX, $6 // Check for OS support of XMM and YMM registers.
   109		JE nocpuinfo
   110	#endif
   111	noavx:
   112		MOVB $0, runtime·support_avx(SB)
   113		MOVB $0, runtime·support_avx2(SB)
   114	
   115	nocpuinfo:
   116	
   117	needtls:
   118		LEAL	runtime·m0+m_tls(SB), DI
   119		CALL	runtime·settls(SB)
   120	
   121		// store through it, to make sure it works
   122		get_tls(BX)
   123		MOVQ	$0x123, g(BX)
   124		MOVQ	runtime·m0+m_tls(SB), AX
   125		CMPQ	AX, $0x123
   126		JEQ 2(PC)
   127		MOVL	AX, 0	// abort
   128	ok:
   129		// set the per-goroutine and per-mach "registers"
   130		get_tls(BX)
   131		LEAL	runtime·g0(SB), CX
   132		MOVL	CX, g(BX)
   133		LEAL	runtime·m0(SB), AX
   134	
   135		// save m->g0 = g0
   136		MOVL	CX, m_g0(AX)
   137		// save m0 to g0->m
   138		MOVL	AX, g_m(CX)
   139	
   140		CLD				// convention is D is always left cleared
   141		CALL	runtime·check(SB)
   142	
   143		MOVL	16(SP), AX		// copy argc
   144		MOVL	AX, 0(SP)
   145		MOVL	24(SP), AX		// copy argv
   146		MOVL	AX, 4(SP)
   147		CALL	runtime·args(SB)
   148		CALL	runtime·osinit(SB)
   149		CALL	runtime·schedinit(SB)
   150	
   151		// create a new goroutine to start program
   152		MOVL	$runtime·mainPC(SB), AX	// entry
   153		MOVL	$0, 0(SP)
   154		MOVL	AX, 4(SP)
   155		CALL	runtime·newproc(SB)
   156	
   157		// start this M
   158		CALL	runtime·mstart(SB)
   159	
   160		MOVL	$0xf1, 0xf1  // crash
   161		RET
   162	
   163	DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   164	GLOBL	runtime·mainPC(SB),RODATA,$4
   165	
   166	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   167		INT $3
   168		RET
   169	
   170	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   171		// No per-thread init.
   172		RET
   173	
   174	/*
   175	 *  go-routine
   176	 */
   177	
   178	// void gosave(Gobuf*)
   179	// save state in Gobuf; setjmp
   180	TEXT runtime·gosave(SB), NOSPLIT, $0-4
   181		MOVL	buf+0(FP), AX	// gobuf
   182		LEAL	buf+0(FP), BX	// caller's SP
   183		MOVL	BX, gobuf_sp(AX)
   184		MOVL	0(SP), BX		// caller's PC
   185		MOVL	BX, gobuf_pc(AX)
   186		MOVQ	$0, gobuf_ret(AX)
   187		// Assert ctxt is zero. See func save.
   188		MOVL	gobuf_ctxt(AX), BX
   189		TESTL	BX, BX
   190		JZ	2(PC)
   191		CALL	runtime·badctxt(SB)
   192		get_tls(CX)
   193		MOVL	g(CX), BX
   194		MOVL	BX, gobuf_g(AX)
   195		RET
   196	
   197	// void gogo(Gobuf*)
   198	// restore state from Gobuf; longjmp
   199	TEXT runtime·gogo(SB), NOSPLIT, $8-4
   200		MOVL	buf+0(FP), BX		// gobuf
   201	
   202		// If ctxt is not nil, invoke deletion barrier before overwriting.
   203		MOVL	gobuf_ctxt(BX), DX
   204		TESTL	DX, DX
   205		JZ	nilctxt
   206		LEAL	gobuf_ctxt(BX), AX
   207		MOVL	AX, 0(SP)
   208		MOVL	$0, 4(SP)
   209		CALL	runtime·writebarrierptr_prewrite(SB)
   210		MOVL	buf+0(FP), BX
   211	
   212	nilctxt:
   213		MOVL	gobuf_g(BX), DX
   214		MOVL	0(DX), CX		// make sure g != nil
   215		get_tls(CX)
   216		MOVL	DX, g(CX)
   217		MOVL	gobuf_sp(BX), SP	// restore SP
   218		MOVL	gobuf_ctxt(BX), DX
   219		MOVQ	gobuf_ret(BX), AX
   220		MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   221		MOVQ	$0, gobuf_ret(BX)
   222		MOVL	$0, gobuf_ctxt(BX)
   223		MOVL	gobuf_pc(BX), BX
   224		JMP	BX
   225	
   226	// func mcall(fn func(*g))
   227	// Switch to m->g0's stack, call fn(g).
   228	// Fn must never return. It should gogo(&g->sched)
   229	// to keep running g.
   230	TEXT runtime·mcall(SB), NOSPLIT, $0-4
   231		MOVL	fn+0(FP), DI
   232		
   233		get_tls(CX)
   234		MOVL	g(CX), AX	// save state in g->sched
   235		MOVL	0(SP), BX	// caller's PC
   236		MOVL	BX, (g_sched+gobuf_pc)(AX)
   237		LEAL	fn+0(FP), BX	// caller's SP
   238		MOVL	BX, (g_sched+gobuf_sp)(AX)
   239		MOVL	AX, (g_sched+gobuf_g)(AX)
   240	
   241		// switch to m->g0 & its stack, call fn
   242		MOVL	g(CX), BX
   243		MOVL	g_m(BX), BX
   244		MOVL	m_g0(BX), SI
   245		CMPL	SI, AX	// if g == m->g0 call badmcall
   246		JNE	3(PC)
   247		MOVL	$runtime·badmcall(SB), AX
   248		JMP	AX
   249		MOVL	SI, g(CX)	// g = m->g0
   250		MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   251		PUSHQ	AX
   252		MOVL	DI, DX
   253		MOVL	0(DI), DI
   254		CALL	DI
   255		POPQ	AX
   256		MOVL	$runtime·badmcall2(SB), AX
   257		JMP	AX
   258		RET
   259	
   260	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   261	// of the G stack. We need to distinguish the routine that
   262	// lives at the bottom of the G stack from the one that lives
   263	// at the top of the system stack because the one at the top of
   264	// the system stack terminates the stack walk (see topofstack()).
   265	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   266		RET
   267	
   268	// func systemstack(fn func())
   269	TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   270		MOVL	fn+0(FP), DI	// DI = fn
   271		get_tls(CX)
   272		MOVL	g(CX), AX	// AX = g
   273		MOVL	g_m(AX), BX	// BX = m
   274	
   275		MOVL	m_gsignal(BX), DX	// DX = gsignal
   276		CMPL	AX, DX
   277		JEQ	noswitch
   278	
   279		MOVL	m_g0(BX), DX	// DX = g0
   280		CMPL	AX, DX
   281		JEQ	noswitch
   282	
   283		MOVL	m_curg(BX), R8
   284		CMPL	AX, R8
   285		JEQ	switch
   286		
   287		// Not g0, not curg. Must be gsignal, but that's not allowed.
   288		// Hide call from linker nosplit analysis.
   289		MOVL	$runtime·badsystemstack(SB), AX
   290		CALL	AX
   291	
   292	switch:
   293		// save our state in g->sched. Pretend to
   294		// be systemstack_switch if the G stack is scanned.
   295		MOVL	$runtime·systemstack_switch(SB), SI
   296		MOVL	SI, (g_sched+gobuf_pc)(AX)
   297		MOVL	SP, (g_sched+gobuf_sp)(AX)
   298		MOVL	AX, (g_sched+gobuf_g)(AX)
   299	
   300		// switch to g0
   301		MOVL	DX, g(CX)
   302		MOVL	(g_sched+gobuf_sp)(DX), SP
   303	
   304		// call target function
   305		MOVL	DI, DX
   306		MOVL	0(DI), DI
   307		CALL	DI
   308	
   309		// switch back to g
   310		get_tls(CX)
   311		MOVL	g(CX), AX
   312		MOVL	g_m(AX), BX
   313		MOVL	m_curg(BX), AX
   314		MOVL	AX, g(CX)
   315		MOVL	(g_sched+gobuf_sp)(AX), SP
   316		MOVL	$0, (g_sched+gobuf_sp)(AX)
   317		RET
   318	
   319	noswitch:
   320		// already on m stack, just call directly
   321		MOVL	DI, DX
   322		MOVL	0(DI), DI
   323		CALL	DI
   324		RET
   325	
   326	/*
   327	 * support for morestack
   328	 */
   329	
   330	// Called during function prolog when more stack is needed.
   331	//
   332	// The traceback routines see morestack on a g0 as being
   333	// the top of a stack (for example, morestack calling newstack
   334	// calling the scheduler calling newm calling gc), so we must
   335	// record an argument size. For that purpose, it has no arguments.
   336	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   337		get_tls(CX)
   338		MOVL	g(CX), BX
   339		MOVL	g_m(BX), BX
   340	
   341		// Cannot grow scheduler stack (m->g0).
   342		MOVL	m_g0(BX), SI
   343		CMPL	g(CX), SI
   344		JNE	3(PC)
   345		CALL	runtime·badmorestackg0(SB)
   346		MOVL	0, AX
   347	
   348		// Cannot grow signal stack (m->gsignal).
   349		MOVL	m_gsignal(BX), SI
   350		CMPL	g(CX), SI
   351		JNE	3(PC)
   352		CALL	runtime·badmorestackgsignal(SB)
   353		MOVL	0, AX
   354	
   355		// Called from f.
   356		// Set m->morebuf to f's caller.
   357		MOVL	8(SP), AX	// f's caller's PC
   358		MOVL	AX, (m_morebuf+gobuf_pc)(BX)
   359		LEAL	16(SP), AX	// f's caller's SP
   360		MOVL	AX, (m_morebuf+gobuf_sp)(BX)
   361		get_tls(CX)
   362		MOVL	g(CX), SI
   363		MOVL	SI, (m_morebuf+gobuf_g)(BX)
   364	
   365		// Set g->sched to context in f.
   366		MOVL	0(SP), AX // f's PC
   367		MOVL	AX, (g_sched+gobuf_pc)(SI)
   368		MOVL	SI, (g_sched+gobuf_g)(SI)
   369		LEAL	8(SP), AX // f's SP
   370		MOVL	AX, (g_sched+gobuf_sp)(SI)
   371		// newstack will fill gobuf.ctxt.
   372	
   373		// Call newstack on m->g0's stack.
   374		MOVL	m_g0(BX), BX
   375		MOVL	BX, g(CX)
   376		MOVL	(g_sched+gobuf_sp)(BX), SP
   377		PUSHQ	DX	// ctxt argument
   378		CALL	runtime·newstack(SB)
   379		MOVL	$0, 0x1003	// crash if newstack returns
   380		POPQ	DX	// keep balance check happy
   381		RET
   382	
   383	// morestack trampolines
   384	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   385		MOVL	$0, DX
   386		JMP	runtime·morestack(SB)
   387	
   388	// reflectcall: call a function with the given argument list
   389	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   390	// we don't have variable-sized frames, so we use a small number
   391	// of constant-sized-frame functions to encode a few bits of size in the pc.
   392	// Caution: ugly multiline assembly macros in your future!
   393	
   394	#define DISPATCH(NAME,MAXSIZE)		\
   395		CMPL	CX, $MAXSIZE;		\
   396		JA	3(PC);			\
   397		MOVL	$NAME(SB), AX;		\
   398		JMP	AX
   399	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   400	
   401	TEXT reflect·call(SB), NOSPLIT, $0-0
   402		JMP	·reflectcall(SB)
   403	
   404	TEXT ·reflectcall(SB), NOSPLIT, $0-20
   405		MOVLQZX argsize+12(FP), CX
   406		DISPATCH(runtime·call16, 16)
   407		DISPATCH(runtime·call32, 32)
   408		DISPATCH(runtime·call64, 64)
   409		DISPATCH(runtime·call128, 128)
   410		DISPATCH(runtime·call256, 256)
   411		DISPATCH(runtime·call512, 512)
   412		DISPATCH(runtime·call1024, 1024)
   413		DISPATCH(runtime·call2048, 2048)
   414		DISPATCH(runtime·call4096, 4096)
   415		DISPATCH(runtime·call8192, 8192)
   416		DISPATCH(runtime·call16384, 16384)
   417		DISPATCH(runtime·call32768, 32768)
   418		DISPATCH(runtime·call65536, 65536)
   419		DISPATCH(runtime·call131072, 131072)
   420		DISPATCH(runtime·call262144, 262144)
   421		DISPATCH(runtime·call524288, 524288)
   422		DISPATCH(runtime·call1048576, 1048576)
   423		DISPATCH(runtime·call2097152, 2097152)
   424		DISPATCH(runtime·call4194304, 4194304)
   425		DISPATCH(runtime·call8388608, 8388608)
   426		DISPATCH(runtime·call16777216, 16777216)
   427		DISPATCH(runtime·call33554432, 33554432)
   428		DISPATCH(runtime·call67108864, 67108864)
   429		DISPATCH(runtime·call134217728, 134217728)
   430		DISPATCH(runtime·call268435456, 268435456)
   431		DISPATCH(runtime·call536870912, 536870912)
   432		DISPATCH(runtime·call1073741824, 1073741824)
   433		MOVL	$runtime·badreflectcall(SB), AX
   434		JMP	AX
   435	
   436	#define CALLFN(NAME,MAXSIZE)			\
   437	TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   438		NO_LOCAL_POINTERS;			\
   439		/* copy arguments to stack */		\
   440		MOVL	argptr+8(FP), SI;		\
   441		MOVL	argsize+12(FP), CX;		\
   442		MOVL	SP, DI;				\
   443		REP;MOVSB;				\
   444		/* call function */			\
   445		MOVL	f+4(FP), DX;			\
   446		MOVL	(DX), AX;			\
   447		CALL	AX;				\
   448		/* copy return values back */		\
   449		MOVL	argtype+0(FP), DX;		\
   450		MOVL	argptr+8(FP), DI;		\
   451		MOVL	argsize+12(FP), CX;		\
   452		MOVL	retoffset+16(FP), BX;		\
   453		MOVL	SP, SI;				\
   454		ADDL	BX, DI;				\
   455		ADDL	BX, SI;				\
   456		SUBL	BX, CX;				\
   457		CALL	callRet<>(SB);			\
   458		RET
   459	
   460	// callRet copies return values back at the end of call*. This is a
   461	// separate function so it can allocate stack space for the arguments
   462	// to reflectcallmove. It does not follow the Go ABI; it expects its
   463	// arguments in registers.
   464	TEXT callRet<>(SB), NOSPLIT, $16-0
   465		MOVL	DX, 0(SP)
   466		MOVL	DI, 4(SP)
   467		MOVL	SI, 8(SP)
   468		MOVL	CX, 12(SP)
   469		CALL	runtime·reflectcallmove(SB)
   470		RET
   471	
   472	CALLFN(·call16, 16)
   473	CALLFN(·call32, 32)
   474	CALLFN(·call64, 64)
   475	CALLFN(·call128, 128)
   476	CALLFN(·call256, 256)
   477	CALLFN(·call512, 512)
   478	CALLFN(·call1024, 1024)
   479	CALLFN(·call2048, 2048)
   480	CALLFN(·call4096, 4096)
   481	CALLFN(·call8192, 8192)
   482	CALLFN(·call16384, 16384)
   483	CALLFN(·call32768, 32768)
   484	CALLFN(·call65536, 65536)
   485	CALLFN(·call131072, 131072)
   486	CALLFN(·call262144, 262144)
   487	CALLFN(·call524288, 524288)
   488	CALLFN(·call1048576, 1048576)
   489	CALLFN(·call2097152, 2097152)
   490	CALLFN(·call4194304, 4194304)
   491	CALLFN(·call8388608, 8388608)
   492	CALLFN(·call16777216, 16777216)
   493	CALLFN(·call33554432, 33554432)
   494	CALLFN(·call67108864, 67108864)
   495	CALLFN(·call134217728, 134217728)
   496	CALLFN(·call268435456, 268435456)
   497	CALLFN(·call536870912, 536870912)
   498	CALLFN(·call1073741824, 1073741824)
   499	
   500	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   501		MOVL	cycles+0(FP), AX
   502	again:
   503		PAUSE
   504		SUBL	$1, AX
   505		JNZ	again
   506		RET
   507	
   508	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   509		// Stores are already ordered on x86, so this is just a
   510		// compile barrier.
   511		RET
   512	
   513	// void jmpdefer(fn, sp);
   514	// called from deferreturn.
   515	// 1. pop the caller
   516	// 2. sub 5 bytes from the callers return
   517	// 3. jmp to the argument
   518	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   519		MOVL	fv+0(FP), DX
   520		MOVL	argp+4(FP), BX
   521		LEAL	-8(BX), SP	// caller sp after CALL
   522		SUBL	$5, (SP)	// return to CALL again
   523		MOVL	0(DX), BX
   524		JMP	BX	// but first run the deferred function
   525	
   526	// func asmcgocall(fn, arg unsafe.Pointer) int32
   527	// Not implemented.
   528	TEXT runtime·asmcgocall(SB),NOSPLIT,$0-12
   529		MOVL	0, AX
   530		RET
   531	
   532	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
   533	// Not implemented.
   534	TEXT runtime·cgocallback(SB),NOSPLIT,$0-16
   535		MOVL	0, AX
   536		RET
   537	
   538	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize)
   539	// Not implemented.
   540	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$0-16
   541		MOVL	0, AX
   542		RET
   543	
   544	// void setg(G*); set g. for use by needm.
   545	// Not implemented.
   546	TEXT runtime·setg(SB), NOSPLIT, $0-4
   547		MOVL	0, AX
   548		RET
   549	
   550	// check that SP is in range [g->stack.lo, g->stack.hi)
   551	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   552		get_tls(CX)
   553		MOVL	g(CX), AX
   554		CMPL	(g_stack+stack_hi)(AX), SP
   555		JHI	2(PC)
   556		MOVL	0, AX
   557		CMPL	SP, (g_stack+stack_lo)(AX)
   558		JHI	2(PC)
   559		MOVL	0, AX
   560		RET
   561	
   562	TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-8
   563		MOVL	ptr+0(FP), DI
   564		MOVL	n+4(FP), CX
   565		MOVQ	CX, BX
   566		ANDQ	$3, BX
   567		SHRQ	$2, CX
   568		MOVQ	$0, AX
   569		CLD
   570		REP
   571		STOSL
   572		MOVQ	BX, CX
   573		REP
   574		STOSB
   575		// Note: we zero only 4 bytes at a time so that the tail is at most
   576		// 3 bytes. That guarantees that we aren't zeroing pointers with STOSB.
   577		// See issue 13160.
   578		RET
   579	
   580	TEXT runtime·getcallerpc(SB),NOSPLIT,$8-12
   581		MOVL	argp+0(FP),AX		// addr of first arg
   582		MOVL	-8(AX),AX		// get calling pc
   583		MOVL	AX, ret+8(FP)
   584		RET
   585	
   586	// int64 runtime·cputicks(void)
   587	TEXT runtime·cputicks(SB),NOSPLIT,$0-0
   588		RDTSC
   589		SHLQ	$32, DX
   590		ADDQ	DX, AX
   591		MOVQ	AX, ret+0(FP)
   592		RET
   593	
   594	// memhash_varlen(p unsafe.Pointer, h seed) uintptr
   595	// redirects to memhash(p, h, size) using the size
   596	// stored in the closure.
   597	TEXT runtime·memhash_varlen(SB),NOSPLIT,$24-12
   598		GO_ARGS
   599		NO_LOCAL_POINTERS
   600		MOVL	p+0(FP), AX
   601		MOVL	h+4(FP), BX
   602		MOVL	4(DX), CX
   603		MOVL	AX, 0(SP)
   604		MOVL	BX, 4(SP)
   605		MOVL	CX, 8(SP)
   606		CALL	runtime·memhash(SB)
   607		MOVL	16(SP), AX
   608		MOVL	AX, ret+8(FP)
   609		RET
   610	
   611	// hash function using AES hardware instructions
   612	// For now, our one amd64p32 system (NaCl) does not
   613	// support using AES instructions, so have not bothered to
   614	// write the implementations. Can copy and adjust the ones
   615	// in asm_amd64.s when the time comes.
   616	
   617	TEXT runtime·aeshash(SB),NOSPLIT,$0-20
   618		MOVL	AX, ret+16(FP)
   619		RET
   620	
   621	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   622		MOVL	AX, ret+8(FP)
   623		RET
   624	
   625	TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
   626		MOVL	AX, ret+8(FP)
   627		RET
   628	
   629	TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
   630		MOVL	AX, ret+8(FP)
   631		RET
   632	
   633	// memequal(p, q unsafe.Pointer, size uintptr) bool
   634	TEXT runtime·memequal(SB),NOSPLIT,$0-17
   635		MOVL	a+0(FP), SI
   636		MOVL	b+4(FP), DI
   637		CMPL	SI, DI
   638		JEQ	eq
   639		MOVL	size+8(FP), BX
   640		CALL	runtime·memeqbody(SB)
   641		MOVB	AX, ret+16(FP)
   642		RET
   643	eq:
   644		MOVB    $1, ret+16(FP)
   645		RET
   646	
   647	// memequal_varlen(a, b unsafe.Pointer) bool
   648	TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
   649		MOVL    a+0(FP), SI
   650		MOVL    b+4(FP), DI
   651		CMPL    SI, DI
   652		JEQ     eq
   653		MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
   654		CALL    runtime·memeqbody(SB)
   655		MOVB    AX, ret+8(FP)
   656		RET
   657	eq:
   658		MOVB    $1, ret+8(FP)
   659		RET
   660	
   661	// eqstring tests whether two strings are equal.
   662	// The compiler guarantees that strings passed
   663	// to eqstring have equal length.
   664	// See runtime_test.go:eqstring_generic for
   665	// equivalent Go code.
   666	TEXT runtime·eqstring(SB),NOSPLIT,$0-17
   667		MOVL	s1_base+0(FP), SI
   668		MOVL	s2_base+8(FP), DI
   669		CMPL	SI, DI
   670		JEQ	same
   671		MOVL	s1_len+4(FP), BX
   672		CALL	runtime·memeqbody(SB)
   673		MOVB	AX, ret+16(FP)
   674		RET
   675	same:
   676		MOVB	$1, ret+16(FP)
   677		RET
   678	
   679	// a in SI
   680	// b in DI
   681	// count in BX
   682	TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
   683		XORQ	AX, AX
   684	
   685		CMPQ	BX, $8
   686		JB	small
   687		
   688		// 64 bytes at a time using xmm registers
   689	hugeloop:
   690		CMPQ	BX, $64
   691		JB	bigloop
   692		MOVOU	(SI), X0
   693		MOVOU	(DI), X1
   694		MOVOU	16(SI), X2
   695		MOVOU	16(DI), X3
   696		MOVOU	32(SI), X4
   697		MOVOU	32(DI), X5
   698		MOVOU	48(SI), X6
   699		MOVOU	48(DI), X7
   700		PCMPEQB	X1, X0
   701		PCMPEQB	X3, X2
   702		PCMPEQB	X5, X4
   703		PCMPEQB	X7, X6
   704		PAND	X2, X0
   705		PAND	X6, X4
   706		PAND	X4, X0
   707		PMOVMSKB X0, DX
   708		ADDQ	$64, SI
   709		ADDQ	$64, DI
   710		SUBQ	$64, BX
   711		CMPL	DX, $0xffff
   712		JEQ	hugeloop
   713		RET
   714	
   715		// 8 bytes at a time using 64-bit register
   716	bigloop:
   717		CMPQ	BX, $8
   718		JBE	leftover
   719		MOVQ	(SI), CX
   720		MOVQ	(DI), DX
   721		ADDQ	$8, SI
   722		ADDQ	$8, DI
   723		SUBQ	$8, BX
   724		CMPQ	CX, DX
   725		JEQ	bigloop
   726		RET
   727	
   728		// remaining 0-8 bytes
   729	leftover:
   730		ADDQ	BX, SI
   731		ADDQ	BX, DI
   732		MOVQ	-8(SI), CX
   733		MOVQ	-8(DI), DX
   734		CMPQ	CX, DX
   735		SETEQ	AX
   736		RET
   737	
   738	small:
   739		CMPQ	BX, $0
   740		JEQ	equal
   741	
   742		LEAQ	0(BX*8), CX
   743		NEGQ	CX
   744	
   745		CMPB	SI, $0xf8
   746		JA	si_high
   747	
   748		// load at SI won't cross a page boundary.
   749		MOVQ	(SI), SI
   750		JMP	si_finish
   751	si_high:
   752		// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   753		MOVQ	BX, DX
   754		ADDQ	SI, DX
   755		MOVQ	-8(DX), SI
   756		SHRQ	CX, SI
   757	si_finish:
   758	
   759		// same for DI.
   760		CMPB	DI, $0xf8
   761		JA	di_high
   762		MOVQ	(DI), DI
   763		JMP	di_finish
   764	di_high:
   765		MOVQ	BX, DX
   766		ADDQ	DI, DX
   767		MOVQ	-8(DX), DI
   768		SHRQ	CX, DI
   769	di_finish:
   770	
   771		SUBQ	SI, DI
   772		SHLQ	CX, DI
   773	equal:
   774		SETEQ	AX
   775		RET
   776	
   777	TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
   778		MOVL	s1_base+0(FP), SI
   779		MOVL	s1_len+4(FP), BX
   780		MOVL	s2_base+8(FP), DI
   781		MOVL	s2_len+12(FP), DX
   782		CALL	runtime·cmpbody(SB)
   783		MOVL	AX, ret+16(FP)
   784		RET
   785	
   786	TEXT bytes·Compare(SB),NOSPLIT,$0-28
   787		MOVL	s1+0(FP), SI
   788		MOVL	s1+4(FP), BX
   789		MOVL	s2+12(FP), DI
   790		MOVL	s2+16(FP), DX
   791		CALL	runtime·cmpbody(SB)
   792		MOVL	AX, res+24(FP)
   793		RET
   794	
   795	// input:
   796	//   SI = a
   797	//   DI = b
   798	//   BX = alen
   799	//   DX = blen
   800	// output:
   801	//   AX = 1/0/-1
   802	TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
   803		CMPQ	SI, DI
   804		JEQ	allsame
   805		CMPQ	BX, DX
   806		MOVQ	DX, R8
   807		CMOVQLT	BX, R8 // R8 = min(alen, blen) = # of bytes to compare
   808		CMPQ	R8, $8
   809		JB	small
   810	
   811	loop:
   812		CMPQ	R8, $16
   813		JBE	_0through16
   814		MOVOU	(SI), X0
   815		MOVOU	(DI), X1
   816		PCMPEQB X0, X1
   817		PMOVMSKB X1, AX
   818		XORQ	$0xffff, AX	// convert EQ to NE
   819		JNE	diff16	// branch if at least one byte is not equal
   820		ADDQ	$16, SI
   821		ADDQ	$16, DI
   822		SUBQ	$16, R8
   823		JMP	loop
   824		
   825		// AX = bit mask of differences
   826	diff16:
   827		BSFQ	AX, BX	// index of first byte that differs
   828		XORQ	AX, AX
   829		ADDQ	BX, SI
   830		MOVB	(SI), CX
   831		ADDQ	BX, DI
   832		CMPB	CX, (DI)
   833		SETHI	AX
   834		LEAQ	-1(AX*2), AX	// convert 1/0 to +1/-1
   835		RET
   836	
   837		// 0 through 16 bytes left, alen>=8, blen>=8
   838	_0through16:
   839		CMPQ	R8, $8
   840		JBE	_0through8
   841		MOVQ	(SI), AX
   842		MOVQ	(DI), CX
   843		CMPQ	AX, CX
   844		JNE	diff8
   845	_0through8:
   846		ADDQ	R8, SI
   847		ADDQ	R8, DI
   848		MOVQ	-8(SI), AX
   849		MOVQ	-8(DI), CX
   850		CMPQ	AX, CX
   851		JEQ	allsame
   852	
   853		// AX and CX contain parts of a and b that differ.
   854	diff8:
   855		BSWAPQ	AX	// reverse order of bytes
   856		BSWAPQ	CX
   857		XORQ	AX, CX
   858		BSRQ	CX, CX	// index of highest bit difference
   859		SHRQ	CX, AX	// move a's bit to bottom
   860		ANDQ	$1, AX	// mask bit
   861		LEAQ	-1(AX*2), AX // 1/0 => +1/-1
   862		RET
   863	
   864		// 0-7 bytes in common
   865	small:
   866		LEAQ	(R8*8), CX	// bytes left -> bits left
   867		NEGQ	CX		//  - bits lift (== 64 - bits left mod 64)
   868		JEQ	allsame
   869	
   870		// load bytes of a into high bytes of AX
   871		CMPB	SI, $0xf8
   872		JA	si_high
   873		MOVQ	(SI), SI
   874		JMP	si_finish
   875	si_high:
   876		ADDQ	R8, SI
   877		MOVQ	-8(SI), SI
   878		SHRQ	CX, SI
   879	si_finish:
   880		SHLQ	CX, SI
   881	
   882		// load bytes of b in to high bytes of BX
   883		CMPB	DI, $0xf8
   884		JA	di_high
   885		MOVQ	(DI), DI
   886		JMP	di_finish
   887	di_high:
   888		ADDQ	R8, DI
   889		MOVQ	-8(DI), DI
   890		SHRQ	CX, DI
   891	di_finish:
   892		SHLQ	CX, DI
   893	
   894		BSWAPQ	SI	// reverse order of bytes
   895		BSWAPQ	DI
   896		XORQ	SI, DI	// find bit differences
   897		JEQ	allsame
   898		BSRQ	DI, CX	// index of highest bit difference
   899		SHRQ	CX, SI	// move a's bit to bottom
   900		ANDQ	$1, SI	// mask bit
   901		LEAQ	-1(SI*2), AX // 1/0 => +1/-1
   902		RET
   903	
   904	allsame:
   905		XORQ	AX, AX
   906		XORQ	CX, CX
   907		CMPQ	BX, DX
   908		SETGT	AX	// 1 if alen > blen
   909		SETEQ	CX	// 1 if alen == blen
   910		LEAQ	-1(CX)(AX*2), AX	// 1,0,-1 result
   911		RET
   912	
   913	TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
   914		MOVL s+0(FP), SI
   915		MOVL s_len+4(FP), BX
   916		MOVB c+12(FP), AL
   917		CALL runtime·indexbytebody(SB)
   918		MOVL AX, ret+16(FP)
   919		RET
   920	
   921	TEXT strings·IndexByte(SB),NOSPLIT,$0-20
   922		MOVL s+0(FP), SI
   923		MOVL s_len+4(FP), BX
   924		MOVB c+8(FP), AL
   925		CALL runtime·indexbytebody(SB)
   926		MOVL AX, ret+16(FP)
   927		RET
   928	
   929	// input:
   930	//   SI: data
   931	//   BX: data len
   932	//   AL: byte sought
   933	// output:
   934	//   AX
   935	TEXT runtime·indexbytebody(SB),NOSPLIT,$0
   936		MOVL SI, DI
   937	
   938		CMPL BX, $16
   939		JLT small
   940	
   941		// round up to first 16-byte boundary
   942		TESTL $15, SI
   943		JZ aligned
   944		MOVL SI, CX
   945		ANDL $~15, CX
   946		ADDL $16, CX
   947	
   948		// search the beginning
   949		SUBL SI, CX
   950		REPN; SCASB
   951		JZ success
   952	
   953	// DI is 16-byte aligned; get ready to search using SSE instructions
   954	aligned:
   955		// round down to last 16-byte boundary
   956		MOVL BX, R11
   957		ADDL SI, R11
   958		ANDL $~15, R11
   959	
   960		// shuffle X0 around so that each byte contains c
   961		MOVD AX, X0
   962		PUNPCKLBW X0, X0
   963		PUNPCKLBW X0, X0
   964		PSHUFL $0, X0, X0
   965		JMP condition
   966	
   967	sse:
   968		// move the next 16-byte chunk of the buffer into X1
   969		MOVO (DI), X1
   970		// compare bytes in X0 to X1
   971		PCMPEQB X0, X1
   972		// take the top bit of each byte in X1 and put the result in DX
   973		PMOVMSKB X1, DX
   974		TESTL DX, DX
   975		JNZ ssesuccess
   976		ADDL $16, DI
   977	
   978	condition:
   979		CMPL DI, R11
   980		JLT sse
   981	
   982		// search the end
   983		MOVL SI, CX
   984		ADDL BX, CX
   985		SUBL R11, CX
   986		// if CX == 0, the zero flag will be set and we'll end up
   987		// returning a false success
   988		JZ failure
   989		REPN; SCASB
   990		JZ success
   991	
   992	failure:
   993		MOVL $-1, AX
   994		RET
   995	
   996	// handle for lengths < 16
   997	small:
   998		MOVL BX, CX
   999		REPN; SCASB
  1000		JZ success
  1001		MOVL $-1, AX
  1002		RET
  1003	
  1004	// we've found the chunk containing the byte
  1005	// now just figure out which specific byte it is
  1006	ssesuccess:
  1007		// get the index of the least significant set bit
  1008		BSFW DX, DX
  1009		SUBL SI, DI
  1010		ADDL DI, DX
  1011		MOVL DX, AX
  1012		RET
  1013	
  1014	success:
  1015		SUBL SI, DI
  1016		SUBL $1, DI
  1017		MOVL DI, AX
  1018		RET
  1019	
  1020	TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1021		MOVL	a_len+4(FP), BX
  1022		MOVL	b_len+16(FP), CX
  1023		XORL	AX, AX
  1024		CMPL	BX, CX
  1025		JNE	eqret
  1026		MOVL	a+0(FP), SI
  1027		MOVL	b+12(FP), DI
  1028		CALL	runtime·memeqbody(SB)
  1029	eqret:
  1030		MOVB	AX, ret+24(FP)
  1031		RET
  1032	
  1033	TEXT runtime·return0(SB), NOSPLIT, $0
  1034		MOVL	$0, AX
  1035		RET
  1036	
  1037	// The top-most function running on a goroutine
  1038	// returns to goexit+PCQuantum.
  1039	TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1040		BYTE	$0x90	// NOP
  1041		CALL	runtime·goexit1(SB)	// does not return
  1042		// traceback from goexit1 must hit code range of goexit
  1043		BYTE	$0x90	// NOP
  1044	
  1045	TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1046		MOVL	addr+0(FP), AX
  1047		PREFETCHT0	(AX)
  1048		RET
  1049	
  1050	TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1051		MOVL	addr+0(FP), AX
  1052		PREFETCHT1	(AX)
  1053		RET
  1054	
  1055	
  1056	TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1057		MOVL	addr+0(FP), AX
  1058		PREFETCHT2	(AX)
  1059		RET
  1060	
  1061	TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1062		MOVL	addr+0(FP), AX
  1063		PREFETCHNTA	(AX)
  1064		RET
  1065	
  1066	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1067		MOVB	$1, ret+0(FP)
  1068		RET

View as plain text