...
Run Format

Text file src/runtime/asm_386.s

Documentation: runtime

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11		// copy arguments forward on an even stack
    12		MOVL	argc+0(FP), AX
    13		MOVL	argv+4(FP), BX
    14		SUBL	$128, SP		// plenty of scratch
    15		ANDL	$~15, SP
    16		MOVL	AX, 120(SP)		// save argc, argv away
    17		MOVL	BX, 124(SP)
    18	
    19		// set default stack bounds.
    20		// _cgo_init may update stackguard.
    21		MOVL	$runtime·g0(SB), BP
    22		LEAL	(-64*1024+104)(SP), BX
    23		MOVL	BX, g_stackguard0(BP)
    24		MOVL	BX, g_stackguard1(BP)
    25		MOVL	BX, (g_stack+stack_lo)(BP)
    26		MOVL	SP, (g_stack+stack_hi)(BP)
    27		
    28		// find out information about the processor we're on
    29	#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
    30		JMP 	has_cpuid
    31	#else
    32		// first see if CPUID instruction is supported.
    33		PUSHFL
    34		PUSHFL
    35		XORL	$(1<<21), 0(SP) // flip ID bit
    36		POPFL
    37		PUSHFL
    38		POPL	AX
    39		XORL	0(SP), AX
    40		POPFL	// restore EFLAGS
    41		TESTL	$(1<<21), AX
    42		JNE 	has_cpuid
    43	#endif
    44	
    45	bad_proc: // show that the program requires MMX.
    46		MOVL	$2, 0(SP)
    47		MOVL	$bad_proc_msg<>(SB), 4(SP)
    48		MOVL	$0x3d, 8(SP)
    49		CALL	runtime·write(SB)
    50		MOVL	$1, 0(SP)
    51		CALL	runtime·exit(SB)
    52		INT	$3
    53	
    54	has_cpuid:
    55		MOVL	$0, AX
    56		CPUID
    57		MOVL	AX, SI
    58		CMPL	AX, $0
    59		JE	nocpuinfo
    60	
    61		// Figure out how to serialize RDTSC.
    62		// On Intel processors LFENCE is enough. AMD requires MFENCE.
    63		// Don't know about the rest, so let's do MFENCE.
    64		CMPL	BX, $0x756E6547  // "Genu"
    65		JNE	notintel
    66		CMPL	DX, $0x49656E69  // "ineI"
    67		JNE	notintel
    68		CMPL	CX, $0x6C65746E  // "ntel"
    69		JNE	notintel
    70		MOVB	$1, runtime·isIntel(SB)
    71		MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
    72	notintel:
    73	
    74		// Load EAX=1 cpuid flags
    75		MOVL	$1, AX
    76		CPUID
    77		MOVL	CX, DI // Move to global variable clobbers CX when generating PIC
    78		MOVL	AX, runtime·processorVersionInfo(SB)
    79	
    80		// Check for MMX support
    81		TESTL	$(1<<23), DX // MMX
    82		JZ	bad_proc
    83	
    84		TESTL	$(1<<26), DX // SSE2
    85		SETNE	runtime·support_sse2(SB)
    86	
    87		TESTL	$(1<<9), DI // SSSE3
    88		SETNE	runtime·support_ssse3(SB)
    89	
    90		TESTL	$(1<<19), DI // SSE4.1
    91		SETNE	runtime·support_sse41(SB)
    92	
    93		TESTL	$(1<<20), DI // SSE4.2
    94		SETNE	runtime·support_sse42(SB)
    95	
    96		TESTL	$(1<<23), DI // POPCNT
    97		SETNE	runtime·support_popcnt(SB)
    98	
    99		TESTL	$(1<<25), DI // AES
   100		SETNE	runtime·support_aes(SB)
   101	
   102		TESTL	$(1<<27), DI // OSXSAVE
   103		SETNE	runtime·support_osxsave(SB)
   104	
   105		// If OS support for XMM and YMM is not present
   106		// support_avx will be set back to false later.
   107		TESTL	$(1<<28), DI // AVX
   108		SETNE	runtime·support_avx(SB)
   109	
   110	eax7:
   111		// Load EAX=7/ECX=0 cpuid flags
   112		CMPL	SI, $7
   113		JLT	osavx
   114		MOVL	$7, AX
   115		MOVL	$0, CX
   116		CPUID
   117	
   118		TESTL	$(1<<3), BX // BMI1
   119		SETNE	runtime·support_bmi1(SB)
   120	
   121		// If OS support for XMM and YMM is not present
   122		// support_avx2 will be set back to false later.
   123		TESTL	$(1<<5), BX
   124		SETNE	runtime·support_avx2(SB)
   125	
   126		TESTL	$(1<<8), BX // BMI2
   127		SETNE	runtime·support_bmi2(SB)
   128	
   129		TESTL	$(1<<9), BX // ERMS
   130		SETNE	runtime·support_erms(SB)
   131	
   132	osavx:
   133		// nacl does not support XGETBV to test
   134		// for XMM and YMM OS support.
   135	#ifndef GOOS_nacl
   136		CMPB	runtime·support_osxsave(SB), $1
   137		JNE	noavx
   138		MOVL	$0, CX
   139		// For XGETBV, OSXSAVE bit is required and sufficient
   140		XGETBV
   141		ANDL	$6, AX
   142		CMPL	AX, $6 // Check for OS support of XMM and YMM registers.
   143		JE nocpuinfo
   144	#endif
   145	noavx:
   146		MOVB $0, runtime·support_avx(SB)
   147		MOVB $0, runtime·support_avx2(SB)
   148	
   149	nocpuinfo:
   150		// if there is an _cgo_init, call it to let it
   151		// initialize and to set up GS.  if not,
   152		// we set up GS ourselves.
   153		MOVL	_cgo_init(SB), AX
   154		TESTL	AX, AX
   155		JZ	needtls
   156		MOVL	$setg_gcc<>(SB), BX
   157		MOVL	BX, 4(SP)
   158		MOVL	BP, 0(SP)
   159		CALL	AX
   160	
   161		// update stackguard after _cgo_init
   162		MOVL	$runtime·g0(SB), CX
   163		MOVL	(g_stack+stack_lo)(CX), AX
   164		ADDL	$const__StackGuard, AX
   165		MOVL	AX, g_stackguard0(CX)
   166		MOVL	AX, g_stackguard1(CX)
   167	
   168	#ifndef GOOS_windows
   169		// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   170		JMP ok
   171	#endif
   172	needtls:
   173	#ifdef GOOS_plan9
   174		// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   175		JMP	ok
   176	#endif
   177	
   178		// set up %gs
   179		CALL	runtime·ldt0setup(SB)
   180	
   181		// store through it, to make sure it works
   182		get_tls(BX)
   183		MOVL	$0x123, g(BX)
   184		MOVL	runtime·m0+m_tls(SB), AX
   185		CMPL	AX, $0x123
   186		JEQ	ok
   187		MOVL	AX, 0	// abort
   188	ok:
   189		// set up m and g "registers"
   190		get_tls(BX)
   191		LEAL	runtime·g0(SB), DX
   192		MOVL	DX, g(BX)
   193		LEAL	runtime·m0(SB), AX
   194	
   195		// save m->g0 = g0
   196		MOVL	DX, m_g0(AX)
   197		// save g0->m = m0
   198		MOVL	AX, g_m(DX)
   199	
   200		CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   201	
   202		// convention is D is always cleared
   203		CLD
   204	
   205		CALL	runtime·check(SB)
   206	
   207		// saved argc, argv
   208		MOVL	120(SP), AX
   209		MOVL	AX, 0(SP)
   210		MOVL	124(SP), AX
   211		MOVL	AX, 4(SP)
   212		CALL	runtime·args(SB)
   213		CALL	runtime·osinit(SB)
   214		CALL	runtime·schedinit(SB)
   215	
   216		// create a new goroutine to start program
   217		PUSHL	$runtime·mainPC(SB)	// entry
   218		PUSHL	$0	// arg size
   219		CALL	runtime·newproc(SB)
   220		POPL	AX
   221		POPL	AX
   222	
   223		// start this M
   224		CALL	runtime·mstart(SB)
   225	
   226		INT $3
   227		RET
   228	
   229	DATA	bad_proc_msg<>+0x00(SB)/8, $"This pro"
   230	DATA	bad_proc_msg<>+0x08(SB)/8, $"gram can"
   231	DATA	bad_proc_msg<>+0x10(SB)/8, $" only be"
   232	DATA	bad_proc_msg<>+0x18(SB)/8, $" run on "
   233	DATA	bad_proc_msg<>+0x20(SB)/8, $"processo"
   234	DATA	bad_proc_msg<>+0x28(SB)/8, $"rs with "
   235	DATA	bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
   236	DATA	bad_proc_msg<>+0x38(SB)/4, $"ort."
   237	DATA	bad_proc_msg<>+0x3c(SB)/1, $0xa
   238	GLOBL	bad_proc_msg<>(SB), RODATA, $0x3d
   239	
   240	DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   241	GLOBL	runtime·mainPC(SB),RODATA,$4
   242	
   243	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   244		INT $3
   245		RET
   246	
   247	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   248		// Linux and MinGW start the FPU in extended double precision.
   249		// Other operating systems use double precision.
   250		// Change to double precision to match them,
   251		// and to match other hardware that only has double.
   252		FLDCW	runtime·controlWord64(SB)
   253		RET
   254	
   255	/*
   256	 *  go-routine
   257	 */
   258	
   259	// void gosave(Gobuf*)
   260	// save state in Gobuf; setjmp
   261	TEXT runtime·gosave(SB), NOSPLIT, $0-4
   262		MOVL	buf+0(FP), AX		// gobuf
   263		LEAL	buf+0(FP), BX		// caller's SP
   264		MOVL	BX, gobuf_sp(AX)
   265		MOVL	0(SP), BX		// caller's PC
   266		MOVL	BX, gobuf_pc(AX)
   267		MOVL	$0, gobuf_ret(AX)
   268		// Assert ctxt is zero. See func save.
   269		MOVL	gobuf_ctxt(AX), BX
   270		TESTL	BX, BX
   271		JZ	2(PC)
   272		CALL	runtime·badctxt(SB)
   273		get_tls(CX)
   274		MOVL	g(CX), BX
   275		MOVL	BX, gobuf_g(AX)
   276		RET
   277	
   278	// void gogo(Gobuf*)
   279	// restore state from Gobuf; longjmp
   280	TEXT runtime·gogo(SB), NOSPLIT, $8-4
   281		MOVL	buf+0(FP), BX		// gobuf
   282	
   283		// If ctxt is not nil, invoke deletion barrier before overwriting.
   284		MOVL	gobuf_ctxt(BX), DX
   285		TESTL	DX, DX
   286		JZ	nilctxt
   287		LEAL	gobuf_ctxt(BX), AX
   288		MOVL	AX, 0(SP)
   289		MOVL	$0, 4(SP)
   290		CALL	runtime·writebarrierptr_prewrite(SB)
   291		MOVL	buf+0(FP), BX
   292	
   293	nilctxt:
   294		MOVL	gobuf_g(BX), DX
   295		MOVL	0(DX), CX		// make sure g != nil
   296		get_tls(CX)
   297		MOVL	DX, g(CX)
   298		MOVL	gobuf_sp(BX), SP	// restore SP
   299		MOVL	gobuf_ret(BX), AX
   300		MOVL	gobuf_ctxt(BX), DX
   301		MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   302		MOVL	$0, gobuf_ret(BX)
   303		MOVL	$0, gobuf_ctxt(BX)
   304		MOVL	gobuf_pc(BX), BX
   305		JMP	BX
   306	
   307	// func mcall(fn func(*g))
   308	// Switch to m->g0's stack, call fn(g).
   309	// Fn must never return. It should gogo(&g->sched)
   310	// to keep running g.
   311	TEXT runtime·mcall(SB), NOSPLIT, $0-4
   312		MOVL	fn+0(FP), DI
   313	
   314		get_tls(DX)
   315		MOVL	g(DX), AX	// save state in g->sched
   316		MOVL	0(SP), BX	// caller's PC
   317		MOVL	BX, (g_sched+gobuf_pc)(AX)
   318		LEAL	fn+0(FP), BX	// caller's SP
   319		MOVL	BX, (g_sched+gobuf_sp)(AX)
   320		MOVL	AX, (g_sched+gobuf_g)(AX)
   321	
   322		// switch to m->g0 & its stack, call fn
   323		MOVL	g(DX), BX
   324		MOVL	g_m(BX), BX
   325		MOVL	m_g0(BX), SI
   326		CMPL	SI, AX	// if g == m->g0 call badmcall
   327		JNE	3(PC)
   328		MOVL	$runtime·badmcall(SB), AX
   329		JMP	AX
   330		MOVL	SI, g(DX)	// g = m->g0
   331		MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   332		PUSHL	AX
   333		MOVL	DI, DX
   334		MOVL	0(DI), DI
   335		CALL	DI
   336		POPL	AX
   337		MOVL	$runtime·badmcall2(SB), AX
   338		JMP	AX
   339		RET
   340	
   341	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   342	// of the G stack. We need to distinguish the routine that
   343	// lives at the bottom of the G stack from the one that lives
   344	// at the top of the system stack because the one at the top of
   345	// the system stack terminates the stack walk (see topofstack()).
   346	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   347		RET
   348	
   349	// func systemstack(fn func())
   350	TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   351		MOVL	fn+0(FP), DI	// DI = fn
   352		get_tls(CX)
   353		MOVL	g(CX), AX	// AX = g
   354		MOVL	g_m(AX), BX	// BX = m
   355	
   356		MOVL	m_gsignal(BX), DX	// DX = gsignal
   357		CMPL	AX, DX
   358		JEQ	noswitch
   359	
   360		MOVL	m_g0(BX), DX	// DX = g0
   361		CMPL	AX, DX
   362		JEQ	noswitch
   363	
   364		MOVL	m_curg(BX), BP
   365		CMPL	AX, BP
   366		JEQ	switch
   367		
   368		// Bad: g is not gsignal, not g0, not curg. What is it?
   369		// Hide call from linker nosplit analysis.
   370		MOVL	$runtime·badsystemstack(SB), AX
   371		CALL	AX
   372	
   373	switch:
   374		// save our state in g->sched. Pretend to
   375		// be systemstack_switch if the G stack is scanned.
   376		MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   377		MOVL	SP, (g_sched+gobuf_sp)(AX)
   378		MOVL	AX, (g_sched+gobuf_g)(AX)
   379	
   380		// switch to g0
   381		get_tls(CX)
   382		MOVL	DX, g(CX)
   383		MOVL	(g_sched+gobuf_sp)(DX), BX
   384		// make it look like mstart called systemstack on g0, to stop traceback
   385		SUBL	$4, BX
   386		MOVL	$runtime·mstart(SB), DX
   387		MOVL	DX, 0(BX)
   388		MOVL	BX, SP
   389	
   390		// call target function
   391		MOVL	DI, DX
   392		MOVL	0(DI), DI
   393		CALL	DI
   394	
   395		// switch back to g
   396		get_tls(CX)
   397		MOVL	g(CX), AX
   398		MOVL	g_m(AX), BX
   399		MOVL	m_curg(BX), AX
   400		MOVL	AX, g(CX)
   401		MOVL	(g_sched+gobuf_sp)(AX), SP
   402		MOVL	$0, (g_sched+gobuf_sp)(AX)
   403		RET
   404	
   405	noswitch:
   406		// already on system stack, just call directly
   407		MOVL	DI, DX
   408		MOVL	0(DI), DI
   409		CALL	DI
   410		RET
   411	
   412	/*
   413	 * support for morestack
   414	 */
   415	
   416	// Called during function prolog when more stack is needed.
   417	//
   418	// The traceback routines see morestack on a g0 as being
   419	// the top of a stack (for example, morestack calling newstack
   420	// calling the scheduler calling newm calling gc), so we must
   421	// record an argument size. For that purpose, it has no arguments.
   422	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   423		// Cannot grow scheduler stack (m->g0).
   424		get_tls(CX)
   425		MOVL	g(CX), BX
   426		MOVL	g_m(BX), BX
   427		MOVL	m_g0(BX), SI
   428		CMPL	g(CX), SI
   429		JNE	3(PC)
   430		CALL	runtime·badmorestackg0(SB)
   431		INT	$3
   432	
   433		// Cannot grow signal stack.
   434		MOVL	m_gsignal(BX), SI
   435		CMPL	g(CX), SI
   436		JNE	3(PC)
   437		CALL	runtime·badmorestackgsignal(SB)
   438		INT	$3
   439	
   440		// Called from f.
   441		// Set m->morebuf to f's caller.
   442		MOVL	4(SP), DI	// f's caller's PC
   443		MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   444		LEAL	8(SP), CX	// f's caller's SP
   445		MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   446		get_tls(CX)
   447		MOVL	g(CX), SI
   448		MOVL	SI, (m_morebuf+gobuf_g)(BX)
   449	
   450		// Set g->sched to context in f.
   451		MOVL	0(SP), AX	// f's PC
   452		MOVL	AX, (g_sched+gobuf_pc)(SI)
   453		MOVL	SI, (g_sched+gobuf_g)(SI)
   454		LEAL	4(SP), AX	// f's SP
   455		MOVL	AX, (g_sched+gobuf_sp)(SI)
   456		// newstack will fill gobuf.ctxt.
   457	
   458		// Call newstack on m->g0's stack.
   459		MOVL	m_g0(BX), BP
   460		MOVL	BP, g(CX)
   461		MOVL	(g_sched+gobuf_sp)(BP), AX
   462		MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   463		MOVL	AX, SP
   464		PUSHL	DX	// ctxt argument
   465		CALL	runtime·newstack(SB)
   466		MOVL	$0, 0x1003	// crash if newstack returns
   467		POPL	DX	// keep balance check happy
   468		RET
   469	
   470	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   471		MOVL	$0, DX
   472		JMP runtime·morestack(SB)
   473	
   474	// reflectcall: call a function with the given argument list
   475	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   476	// we don't have variable-sized frames, so we use a small number
   477	// of constant-sized-frame functions to encode a few bits of size in the pc.
   478	// Caution: ugly multiline assembly macros in your future!
   479	
   480	#define DISPATCH(NAME,MAXSIZE)		\
   481		CMPL	CX, $MAXSIZE;		\
   482		JA	3(PC);			\
   483		MOVL	$NAME(SB), AX;		\
   484		JMP	AX
   485	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   486	
   487	TEXT reflect·call(SB), NOSPLIT, $0-0
   488		JMP	·reflectcall(SB)
   489	
   490	TEXT ·reflectcall(SB), NOSPLIT, $0-20
   491		MOVL	argsize+12(FP), CX
   492		DISPATCH(runtime·call16, 16)
   493		DISPATCH(runtime·call32, 32)
   494		DISPATCH(runtime·call64, 64)
   495		DISPATCH(runtime·call128, 128)
   496		DISPATCH(runtime·call256, 256)
   497		DISPATCH(runtime·call512, 512)
   498		DISPATCH(runtime·call1024, 1024)
   499		DISPATCH(runtime·call2048, 2048)
   500		DISPATCH(runtime·call4096, 4096)
   501		DISPATCH(runtime·call8192, 8192)
   502		DISPATCH(runtime·call16384, 16384)
   503		DISPATCH(runtime·call32768, 32768)
   504		DISPATCH(runtime·call65536, 65536)
   505		DISPATCH(runtime·call131072, 131072)
   506		DISPATCH(runtime·call262144, 262144)
   507		DISPATCH(runtime·call524288, 524288)
   508		DISPATCH(runtime·call1048576, 1048576)
   509		DISPATCH(runtime·call2097152, 2097152)
   510		DISPATCH(runtime·call4194304, 4194304)
   511		DISPATCH(runtime·call8388608, 8388608)
   512		DISPATCH(runtime·call16777216, 16777216)
   513		DISPATCH(runtime·call33554432, 33554432)
   514		DISPATCH(runtime·call67108864, 67108864)
   515		DISPATCH(runtime·call134217728, 134217728)
   516		DISPATCH(runtime·call268435456, 268435456)
   517		DISPATCH(runtime·call536870912, 536870912)
   518		DISPATCH(runtime·call1073741824, 1073741824)
   519		MOVL	$runtime·badreflectcall(SB), AX
   520		JMP	AX
   521	
   522	#define CALLFN(NAME,MAXSIZE)			\
   523	TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   524		NO_LOCAL_POINTERS;			\
   525		/* copy arguments to stack */		\
   526		MOVL	argptr+8(FP), SI;		\
   527		MOVL	argsize+12(FP), CX;		\
   528		MOVL	SP, DI;				\
   529		REP;MOVSB;				\
   530		/* call function */			\
   531		MOVL	f+4(FP), DX;			\
   532		MOVL	(DX), AX; 			\
   533		PCDATA  $PCDATA_StackMapIndex, $0;	\
   534		CALL	AX;				\
   535		/* copy return values back */		\
   536		MOVL	argtype+0(FP), DX;		\
   537		MOVL	argptr+8(FP), DI;		\
   538		MOVL	argsize+12(FP), CX;		\
   539		MOVL	retoffset+16(FP), BX;		\
   540		MOVL	SP, SI;				\
   541		ADDL	BX, DI;				\
   542		ADDL	BX, SI;				\
   543		SUBL	BX, CX;				\
   544		CALL	callRet<>(SB);			\
   545		RET
   546	
   547	// callRet copies return values back at the end of call*. This is a
   548	// separate function so it can allocate stack space for the arguments
   549	// to reflectcallmove. It does not follow the Go ABI; it expects its
   550	// arguments in registers.
   551	TEXT callRet<>(SB), NOSPLIT, $16-0
   552		MOVL	DX, 0(SP)
   553		MOVL	DI, 4(SP)
   554		MOVL	SI, 8(SP)
   555		MOVL	CX, 12(SP)
   556		CALL	runtime·reflectcallmove(SB)
   557		RET
   558	
   559	CALLFN(·call16, 16)
   560	CALLFN(·call32, 32)
   561	CALLFN(·call64, 64)
   562	CALLFN(·call128, 128)
   563	CALLFN(·call256, 256)
   564	CALLFN(·call512, 512)
   565	CALLFN(·call1024, 1024)
   566	CALLFN(·call2048, 2048)
   567	CALLFN(·call4096, 4096)
   568	CALLFN(·call8192, 8192)
   569	CALLFN(·call16384, 16384)
   570	CALLFN(·call32768, 32768)
   571	CALLFN(·call65536, 65536)
   572	CALLFN(·call131072, 131072)
   573	CALLFN(·call262144, 262144)
   574	CALLFN(·call524288, 524288)
   575	CALLFN(·call1048576, 1048576)
   576	CALLFN(·call2097152, 2097152)
   577	CALLFN(·call4194304, 4194304)
   578	CALLFN(·call8388608, 8388608)
   579	CALLFN(·call16777216, 16777216)
   580	CALLFN(·call33554432, 33554432)
   581	CALLFN(·call67108864, 67108864)
   582	CALLFN(·call134217728, 134217728)
   583	CALLFN(·call268435456, 268435456)
   584	CALLFN(·call536870912, 536870912)
   585	CALLFN(·call1073741824, 1073741824)
   586	
   587	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   588		MOVL	cycles+0(FP), AX
   589	again:
   590		PAUSE
   591		SUBL	$1, AX
   592		JNZ	again
   593		RET
   594	
   595	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   596		// Stores are already ordered on x86, so this is just a
   597		// compile barrier.
   598		RET
   599	
   600	// void jmpdefer(fn, sp);
   601	// called from deferreturn.
   602	// 1. pop the caller
   603	// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
   604	//    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
   605	//    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
   606	//    LEAL to load the offset into BX, and finally 5 for the call & displacement)
   607	// 3. jmp to the argument
   608	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   609		MOVL	fv+0(FP), DX	// fn
   610		MOVL	argp+4(FP), BX	// caller sp
   611		LEAL	-4(BX), SP	// caller sp after CALL
   612	#ifdef GOBUILDMODE_shared
   613		SUBL	$16, (SP)	// return to CALL again
   614	#else
   615		SUBL	$5, (SP)	// return to CALL again
   616	#endif
   617		MOVL	0(DX), BX
   618		JMP	BX	// but first run the deferred function
   619	
   620	// Save state of caller into g->sched.
   621	TEXT gosave<>(SB),NOSPLIT,$0
   622		PUSHL	AX
   623		PUSHL	BX
   624		get_tls(BX)
   625		MOVL	g(BX), BX
   626		LEAL	arg+0(FP), AX
   627		MOVL	AX, (g_sched+gobuf_sp)(BX)
   628		MOVL	-4(AX), AX
   629		MOVL	AX, (g_sched+gobuf_pc)(BX)
   630		MOVL	$0, (g_sched+gobuf_ret)(BX)
   631		// Assert ctxt is zero. See func save.
   632		MOVL	(g_sched+gobuf_ctxt)(BX), AX
   633		TESTL	AX, AX
   634		JZ	2(PC)
   635		CALL	runtime·badctxt(SB)
   636		POPL	BX
   637		POPL	AX
   638		RET
   639	
   640	// func asmcgocall(fn, arg unsafe.Pointer) int32
   641	// Call fn(arg) on the scheduler stack,
   642	// aligned appropriately for the gcc ABI.
   643	// See cgocall.go for more details.
   644	TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   645		MOVL	fn+0(FP), AX
   646		MOVL	arg+4(FP), BX
   647	
   648		MOVL	SP, DX
   649	
   650		// Figure out if we need to switch to m->g0 stack.
   651		// We get called to create new OS threads too, and those
   652		// come in on the m->g0 stack already.
   653		get_tls(CX)
   654		MOVL	g(CX), BP
   655		MOVL	g_m(BP), BP
   656		MOVL	m_g0(BP), SI
   657		MOVL	g(CX), DI
   658		CMPL	SI, DI
   659		JEQ	noswitch
   660		CALL	gosave<>(SB)
   661		get_tls(CX)
   662		MOVL	SI, g(CX)
   663		MOVL	(g_sched+gobuf_sp)(SI), SP
   664	
   665	noswitch:
   666		// Now on a scheduling stack (a pthread-created stack).
   667		SUBL	$32, SP
   668		ANDL	$~15, SP	// alignment, perhaps unnecessary
   669		MOVL	DI, 8(SP)	// save g
   670		MOVL	(g_stack+stack_hi)(DI), DI
   671		SUBL	DX, DI
   672		MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   673		MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   674		CALL	AX
   675	
   676		// Restore registers, g, stack pointer.
   677		get_tls(CX)
   678		MOVL	8(SP), DI
   679		MOVL	(g_stack+stack_hi)(DI), SI
   680		SUBL	4(SP), SI
   681		MOVL	DI, g(CX)
   682		MOVL	SI, SP
   683	
   684		MOVL	AX, ret+8(FP)
   685		RET
   686	
   687	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   688	// Turn the fn into a Go func (by taking its address) and call
   689	// cgocallback_gofunc.
   690	TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
   691		LEAL	fn+0(FP), AX
   692		MOVL	AX, 0(SP)
   693		MOVL	frame+4(FP), AX
   694		MOVL	AX, 4(SP)
   695		MOVL	framesize+8(FP), AX
   696		MOVL	AX, 8(SP)
   697		MOVL	ctxt+12(FP), AX
   698		MOVL	AX, 12(SP)
   699		MOVL	$runtime·cgocallback_gofunc(SB), AX
   700		CALL	AX
   701		RET
   702	
   703	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   704	// See cgocall.go for more details.
   705	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
   706		NO_LOCAL_POINTERS
   707	
   708		// If g is nil, Go did not create the current thread.
   709		// Call needm to obtain one for temporary use.
   710		// In this case, we're running on the thread stack, so there's
   711		// lots of space, but the linker doesn't know. Hide the call from
   712		// the linker analysis by using an indirect call through AX.
   713		get_tls(CX)
   714	#ifdef GOOS_windows
   715		MOVL	$0, BP
   716		CMPL	CX, $0
   717		JEQ	2(PC) // TODO
   718	#endif
   719		MOVL	g(CX), BP
   720		CMPL	BP, $0
   721		JEQ	needm
   722		MOVL	g_m(BP), BP
   723		MOVL	BP, DX // saved copy of oldm
   724		JMP	havem
   725	needm:
   726		MOVL	$0, 0(SP)
   727		MOVL	$runtime·needm(SB), AX
   728		CALL	AX
   729		MOVL	0(SP), DX
   730		get_tls(CX)
   731		MOVL	g(CX), BP
   732		MOVL	g_m(BP), BP
   733	
   734		// Set m->sched.sp = SP, so that if a panic happens
   735		// during the function we are about to execute, it will
   736		// have a valid SP to run on the g0 stack.
   737		// The next few lines (after the havem label)
   738		// will save this SP onto the stack and then write
   739		// the same SP back to m->sched.sp. That seems redundant,
   740		// but if an unrecovered panic happens, unwindm will
   741		// restore the g->sched.sp from the stack location
   742		// and then systemstack will try to use it. If we don't set it here,
   743		// that restored SP will be uninitialized (typically 0) and
   744		// will not be usable.
   745		MOVL	m_g0(BP), SI
   746		MOVL	SP, (g_sched+gobuf_sp)(SI)
   747	
   748	havem:
   749		// Now there's a valid m, and we're running on its m->g0.
   750		// Save current m->g0->sched.sp on stack and then set it to SP.
   751		// Save current sp in m->g0->sched.sp in preparation for
   752		// switch back to m->curg stack.
   753		// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   754		MOVL	m_g0(BP), SI
   755		MOVL	(g_sched+gobuf_sp)(SI), AX
   756		MOVL	AX, 0(SP)
   757		MOVL	SP, (g_sched+gobuf_sp)(SI)
   758	
   759		// Switch to m->curg stack and call runtime.cgocallbackg.
   760		// Because we are taking over the execution of m->curg
   761		// but *not* resuming what had been running, we need to
   762		// save that information (m->curg->sched) so we can restore it.
   763		// We can restore m->curg->sched.sp easily, because calling
   764		// runtime.cgocallbackg leaves SP unchanged upon return.
   765		// To save m->curg->sched.pc, we push it onto the stack.
   766		// This has the added benefit that it looks to the traceback
   767		// routine like cgocallbackg is going to return to that
   768		// PC (because the frame we allocate below has the same
   769		// size as cgocallback_gofunc's frame declared above)
   770		// so that the traceback will seamlessly trace back into
   771		// the earlier calls.
   772		//
   773		// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
   774		// 8(SP) is unused.
   775		MOVL	m_curg(BP), SI
   776		MOVL	SI, g(CX)
   777		MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   778		MOVL	(g_sched+gobuf_pc)(SI), BP
   779		MOVL	BP, -4(DI)
   780		MOVL	ctxt+12(FP), CX
   781		LEAL	-(4+12)(DI), SP
   782		MOVL	DX, 4(SP)
   783		MOVL	CX, 0(SP)
   784		CALL	runtime·cgocallbackg(SB)
   785		MOVL	4(SP), DX
   786	
   787		// Restore g->sched (== m->curg->sched) from saved values.
   788		get_tls(CX)
   789		MOVL	g(CX), SI
   790		MOVL	12(SP), BP
   791		MOVL	BP, (g_sched+gobuf_pc)(SI)
   792		LEAL	(12+4)(SP), DI
   793		MOVL	DI, (g_sched+gobuf_sp)(SI)
   794	
   795		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   796		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   797		// so we do not have to restore it.)
   798		MOVL	g(CX), BP
   799		MOVL	g_m(BP), BP
   800		MOVL	m_g0(BP), SI
   801		MOVL	SI, g(CX)
   802		MOVL	(g_sched+gobuf_sp)(SI), SP
   803		MOVL	0(SP), AX
   804		MOVL	AX, (g_sched+gobuf_sp)(SI)
   805		
   806		// If the m on entry was nil, we called needm above to borrow an m
   807		// for the duration of the call. Since the call is over, return it with dropm.
   808		CMPL	DX, $0
   809		JNE 3(PC)
   810		MOVL	$runtime·dropm(SB), AX
   811		CALL	AX
   812	
   813		// Done!
   814		RET
   815	
   816	// void setg(G*); set g. for use by needm.
   817	TEXT runtime·setg(SB), NOSPLIT, $0-4
   818		MOVL	gg+0(FP), BX
   819	#ifdef GOOS_windows
   820		CMPL	BX, $0
   821		JNE	settls
   822		MOVL	$0, 0x14(FS)
   823		RET
   824	settls:
   825		MOVL	g_m(BX), AX
   826		LEAL	m_tls(AX), AX
   827		MOVL	AX, 0x14(FS)
   828	#endif
   829		get_tls(CX)
   830		MOVL	BX, g(CX)
   831		RET
   832	
   833	// void setg_gcc(G*); set g. for use by gcc
   834	TEXT setg_gcc<>(SB), NOSPLIT, $0
   835		get_tls(AX)
   836		MOVL	gg+0(FP), DX
   837		MOVL	DX, g(AX)
   838		RET
   839	
   840	// check that SP is in range [g->stack.lo, g->stack.hi)
   841	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   842		get_tls(CX)
   843		MOVL	g(CX), AX
   844		CMPL	(g_stack+stack_hi)(AX), SP
   845		JHI	2(PC)
   846		INT	$3
   847		CMPL	SP, (g_stack+stack_lo)(AX)
   848		JHI	2(PC)
   849		INT	$3
   850		RET
   851	
   852	TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
   853		MOVL	argp+0(FP),AX		// addr of first arg
   854		MOVL	-4(AX),AX		// get calling pc
   855		MOVL	AX, ret+4(FP)
   856		RET
   857	
   858	// func cputicks() int64
   859	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   860		CMPB	runtime·support_sse2(SB), $1
   861		JNE	done
   862		CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   863		JNE	mfence
   864		BYTE	$0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
   865		JMP	done
   866	mfence:
   867		BYTE	$0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
   868	done:
   869		RDTSC
   870		MOVL	AX, ret_lo+0(FP)
   871		MOVL	DX, ret_hi+4(FP)
   872		RET
   873	
   874	TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   875		// set up ldt 7 to point at m0.tls
   876		// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   877		// the entry number is just a hint.  setldt will set up GS with what it used.
   878		MOVL	$7, 0(SP)
   879		LEAL	runtime·m0+m_tls(SB), AX
   880		MOVL	AX, 4(SP)
   881		MOVL	$32, 8(SP)	// sizeof(tls array)
   882		CALL	runtime·setldt(SB)
   883		RET
   884	
   885	TEXT runtime·emptyfunc(SB),0,$0-0
   886		RET
   887	
   888	// memhash_varlen(p unsafe.Pointer, h seed) uintptr
   889	// redirects to memhash(p, h, size) using the size
   890	// stored in the closure.
   891	TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
   892		GO_ARGS
   893		NO_LOCAL_POINTERS
   894		MOVL	p+0(FP), AX
   895		MOVL	h+4(FP), BX
   896		MOVL	4(DX), CX
   897		MOVL	AX, 0(SP)
   898		MOVL	BX, 4(SP)
   899		MOVL	CX, 8(SP)
   900		CALL	runtime·memhash(SB)
   901		MOVL	12(SP), AX
   902		MOVL	AX, ret+8(FP)
   903		RET
   904	
   905	// hash function using AES hardware instructions
   906	TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   907		MOVL	p+0(FP), AX	// ptr to data
   908		MOVL	s+8(FP), BX	// size
   909		LEAL	ret+12(FP), DX
   910		JMP	runtime·aeshashbody(SB)
   911	
   912	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   913		MOVL	p+0(FP), AX	// ptr to string object
   914		MOVL	4(AX), BX	// length of string
   915		MOVL	(AX), AX	// string data
   916		LEAL	ret+8(FP), DX
   917		JMP	runtime·aeshashbody(SB)
   918	
   919	// AX: data
   920	// BX: length
   921	// DX: address to put return value
   922	TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   923		MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   924		PINSRW	$4, BX, X0	            // 16 bits of length
   925		PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   926		MOVO	X0, X1                      // save unscrambled seed
   927		PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   928		AESENC	X0, X0                      // scramble seed
   929	
   930		CMPL	BX, $16
   931		JB	aes0to15
   932		JE	aes16
   933		CMPL	BX, $32
   934		JBE	aes17to32
   935		CMPL	BX, $64
   936		JBE	aes33to64
   937		JMP	aes65plus
   938		
   939	aes0to15:
   940		TESTL	BX, BX
   941		JE	aes0
   942	
   943		ADDL	$16, AX
   944		TESTW	$0xff0, AX
   945		JE	endofpage
   946	
   947		// 16 bytes loaded at this address won't cross
   948		// a page boundary, so we can load it directly.
   949		MOVOU	-16(AX), X1
   950		ADDL	BX, BX
   951		PAND	masks<>(SB)(BX*8), X1
   952	
   953	final1:	
   954		AESENC	X0, X1  // scramble input, xor in seed
   955		AESENC	X1, X1  // scramble combo 2 times
   956		AESENC	X1, X1
   957		MOVL	X1, (DX)
   958		RET
   959	
   960	endofpage:
   961		// address ends in 1111xxxx. Might be up against
   962		// a page boundary, so load ending at last byte.
   963		// Then shift bytes down using pshufb.
   964		MOVOU	-32(AX)(BX*1), X1
   965		ADDL	BX, BX
   966		PSHUFB	shifts<>(SB)(BX*8), X1
   967		JMP	final1
   968	
   969	aes0:
   970		// Return scrambled input seed
   971		AESENC	X0, X0
   972		MOVL	X0, (DX)
   973		RET
   974	
   975	aes16:
   976		MOVOU	(AX), X1
   977		JMP	final1
   978	
   979	aes17to32:
   980		// make second starting seed
   981		PXOR	runtime·aeskeysched+16(SB), X1
   982		AESENC	X1, X1
   983		
   984		// load data to be hashed
   985		MOVOU	(AX), X2
   986		MOVOU	-16(AX)(BX*1), X3
   987	
   988		// scramble 3 times
   989		AESENC	X0, X2
   990		AESENC	X1, X3
   991		AESENC	X2, X2
   992		AESENC	X3, X3
   993		AESENC	X2, X2
   994		AESENC	X3, X3
   995	
   996		// combine results
   997		PXOR	X3, X2
   998		MOVL	X2, (DX)
   999		RET
  1000	
  1001	aes33to64:
  1002		// make 3 more starting seeds
  1003		MOVO	X1, X2
  1004		MOVO	X1, X3
  1005		PXOR	runtime·aeskeysched+16(SB), X1
  1006		PXOR	runtime·aeskeysched+32(SB), X2
  1007		PXOR	runtime·aeskeysched+48(SB), X3
  1008		AESENC	X1, X1
  1009		AESENC	X2, X2
  1010		AESENC	X3, X3
  1011		
  1012		MOVOU	(AX), X4
  1013		MOVOU	16(AX), X5
  1014		MOVOU	-32(AX)(BX*1), X6
  1015		MOVOU	-16(AX)(BX*1), X7
  1016		
  1017		AESENC	X0, X4
  1018		AESENC	X1, X5
  1019		AESENC	X2, X6
  1020		AESENC	X3, X7
  1021		
  1022		AESENC	X4, X4
  1023		AESENC	X5, X5
  1024		AESENC	X6, X6
  1025		AESENC	X7, X7
  1026		
  1027		AESENC	X4, X4
  1028		AESENC	X5, X5
  1029		AESENC	X6, X6
  1030		AESENC	X7, X7
  1031	
  1032		PXOR	X6, X4
  1033		PXOR	X7, X5
  1034		PXOR	X5, X4
  1035		MOVL	X4, (DX)
  1036		RET
  1037	
  1038	aes65plus:
  1039		// make 3 more starting seeds
  1040		MOVO	X1, X2
  1041		MOVO	X1, X3
  1042		PXOR	runtime·aeskeysched+16(SB), X1
  1043		PXOR	runtime·aeskeysched+32(SB), X2
  1044		PXOR	runtime·aeskeysched+48(SB), X3
  1045		AESENC	X1, X1
  1046		AESENC	X2, X2
  1047		AESENC	X3, X3
  1048		
  1049		// start with last (possibly overlapping) block
  1050		MOVOU	-64(AX)(BX*1), X4
  1051		MOVOU	-48(AX)(BX*1), X5
  1052		MOVOU	-32(AX)(BX*1), X6
  1053		MOVOU	-16(AX)(BX*1), X7
  1054	
  1055		// scramble state once
  1056		AESENC	X0, X4
  1057		AESENC	X1, X5
  1058		AESENC	X2, X6
  1059		AESENC	X3, X7
  1060	
  1061		// compute number of remaining 64-byte blocks
  1062		DECL	BX
  1063		SHRL	$6, BX
  1064		
  1065	aesloop:
  1066		// scramble state, xor in a block
  1067		MOVOU	(AX), X0
  1068		MOVOU	16(AX), X1
  1069		MOVOU	32(AX), X2
  1070		MOVOU	48(AX), X3
  1071		AESENC	X0, X4
  1072		AESENC	X1, X5
  1073		AESENC	X2, X6
  1074		AESENC	X3, X7
  1075	
  1076		// scramble state
  1077		AESENC	X4, X4
  1078		AESENC	X5, X5
  1079		AESENC	X6, X6
  1080		AESENC	X7, X7
  1081	
  1082		ADDL	$64, AX
  1083		DECL	BX
  1084		JNE	aesloop
  1085	
  1086		// 2 more scrambles to finish
  1087		AESENC	X4, X4
  1088		AESENC	X5, X5
  1089		AESENC	X6, X6
  1090		AESENC	X7, X7
  1091		
  1092		AESENC	X4, X4
  1093		AESENC	X5, X5
  1094		AESENC	X6, X6
  1095		AESENC	X7, X7
  1096	
  1097		PXOR	X6, X4
  1098		PXOR	X7, X5
  1099		PXOR	X5, X4
  1100		MOVL	X4, (DX)
  1101		RET
  1102	
  1103	TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1104		MOVL	p+0(FP), AX	// ptr to data
  1105		MOVL	h+4(FP), X0	// seed
  1106		PINSRD	$1, (AX), X0	// data
  1107		AESENC	runtime·aeskeysched+0(SB), X0
  1108		AESENC	runtime·aeskeysched+16(SB), X0
  1109		AESENC	runtime·aeskeysched+32(SB), X0
  1110		MOVL	X0, ret+8(FP)
  1111		RET
  1112	
  1113	TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1114		MOVL	p+0(FP), AX	// ptr to data
  1115		MOVQ	(AX), X0	// data
  1116		PINSRD	$2, h+4(FP), X0	// seed
  1117		AESENC	runtime·aeskeysched+0(SB), X0
  1118		AESENC	runtime·aeskeysched+16(SB), X0
  1119		AESENC	runtime·aeskeysched+32(SB), X0
  1120		MOVL	X0, ret+8(FP)
  1121		RET
  1122	
  1123	// simple mask to get rid of data in the high part of the register.
  1124	DATA masks<>+0x00(SB)/4, $0x00000000
  1125	DATA masks<>+0x04(SB)/4, $0x00000000
  1126	DATA masks<>+0x08(SB)/4, $0x00000000
  1127	DATA masks<>+0x0c(SB)/4, $0x00000000
  1128		
  1129	DATA masks<>+0x10(SB)/4, $0x000000ff
  1130	DATA masks<>+0x14(SB)/4, $0x00000000
  1131	DATA masks<>+0x18(SB)/4, $0x00000000
  1132	DATA masks<>+0x1c(SB)/4, $0x00000000
  1133		
  1134	DATA masks<>+0x20(SB)/4, $0x0000ffff
  1135	DATA masks<>+0x24(SB)/4, $0x00000000
  1136	DATA masks<>+0x28(SB)/4, $0x00000000
  1137	DATA masks<>+0x2c(SB)/4, $0x00000000
  1138		
  1139	DATA masks<>+0x30(SB)/4, $0x00ffffff
  1140	DATA masks<>+0x34(SB)/4, $0x00000000
  1141	DATA masks<>+0x38(SB)/4, $0x00000000
  1142	DATA masks<>+0x3c(SB)/4, $0x00000000
  1143		
  1144	DATA masks<>+0x40(SB)/4, $0xffffffff
  1145	DATA masks<>+0x44(SB)/4, $0x00000000
  1146	DATA masks<>+0x48(SB)/4, $0x00000000
  1147	DATA masks<>+0x4c(SB)/4, $0x00000000
  1148		
  1149	DATA masks<>+0x50(SB)/4, $0xffffffff
  1150	DATA masks<>+0x54(SB)/4, $0x000000ff
  1151	DATA masks<>+0x58(SB)/4, $0x00000000
  1152	DATA masks<>+0x5c(SB)/4, $0x00000000
  1153		
  1154	DATA masks<>+0x60(SB)/4, $0xffffffff
  1155	DATA masks<>+0x64(SB)/4, $0x0000ffff
  1156	DATA masks<>+0x68(SB)/4, $0x00000000
  1157	DATA masks<>+0x6c(SB)/4, $0x00000000
  1158		
  1159	DATA masks<>+0x70(SB)/4, $0xffffffff
  1160	DATA masks<>+0x74(SB)/4, $0x00ffffff
  1161	DATA masks<>+0x78(SB)/4, $0x00000000
  1162	DATA masks<>+0x7c(SB)/4, $0x00000000
  1163		
  1164	DATA masks<>+0x80(SB)/4, $0xffffffff
  1165	DATA masks<>+0x84(SB)/4, $0xffffffff
  1166	DATA masks<>+0x88(SB)/4, $0x00000000
  1167	DATA masks<>+0x8c(SB)/4, $0x00000000
  1168		
  1169	DATA masks<>+0x90(SB)/4, $0xffffffff
  1170	DATA masks<>+0x94(SB)/4, $0xffffffff
  1171	DATA masks<>+0x98(SB)/4, $0x000000ff
  1172	DATA masks<>+0x9c(SB)/4, $0x00000000
  1173		
  1174	DATA masks<>+0xa0(SB)/4, $0xffffffff
  1175	DATA masks<>+0xa4(SB)/4, $0xffffffff
  1176	DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1177	DATA masks<>+0xac(SB)/4, $0x00000000
  1178		
  1179	DATA masks<>+0xb0(SB)/4, $0xffffffff
  1180	DATA masks<>+0xb4(SB)/4, $0xffffffff
  1181	DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1182	DATA masks<>+0xbc(SB)/4, $0x00000000
  1183		
  1184	DATA masks<>+0xc0(SB)/4, $0xffffffff
  1185	DATA masks<>+0xc4(SB)/4, $0xffffffff
  1186	DATA masks<>+0xc8(SB)/4, $0xffffffff
  1187	DATA masks<>+0xcc(SB)/4, $0x00000000
  1188		
  1189	DATA masks<>+0xd0(SB)/4, $0xffffffff
  1190	DATA masks<>+0xd4(SB)/4, $0xffffffff
  1191	DATA masks<>+0xd8(SB)/4, $0xffffffff
  1192	DATA masks<>+0xdc(SB)/4, $0x000000ff
  1193		
  1194	DATA masks<>+0xe0(SB)/4, $0xffffffff
  1195	DATA masks<>+0xe4(SB)/4, $0xffffffff
  1196	DATA masks<>+0xe8(SB)/4, $0xffffffff
  1197	DATA masks<>+0xec(SB)/4, $0x0000ffff
  1198		
  1199	DATA masks<>+0xf0(SB)/4, $0xffffffff
  1200	DATA masks<>+0xf4(SB)/4, $0xffffffff
  1201	DATA masks<>+0xf8(SB)/4, $0xffffffff
  1202	DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1203	
  1204	GLOBL masks<>(SB),RODATA,$256
  1205	
  1206	// these are arguments to pshufb. They move data down from
  1207	// the high bytes of the register to the low bytes of the register.
  1208	// index is how many bytes to move.
  1209	DATA shifts<>+0x00(SB)/4, $0x00000000
  1210	DATA shifts<>+0x04(SB)/4, $0x00000000
  1211	DATA shifts<>+0x08(SB)/4, $0x00000000
  1212	DATA shifts<>+0x0c(SB)/4, $0x00000000
  1213		
  1214	DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1215	DATA shifts<>+0x14(SB)/4, $0xffffffff
  1216	DATA shifts<>+0x18(SB)/4, $0xffffffff
  1217	DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1218		
  1219	DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1220	DATA shifts<>+0x24(SB)/4, $0xffffffff
  1221	DATA shifts<>+0x28(SB)/4, $0xffffffff
  1222	DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1223		
  1224	DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1225	DATA shifts<>+0x34(SB)/4, $0xffffffff
  1226	DATA shifts<>+0x38(SB)/4, $0xffffffff
  1227	DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1228		
  1229	DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1230	DATA shifts<>+0x44(SB)/4, $0xffffffff
  1231	DATA shifts<>+0x48(SB)/4, $0xffffffff
  1232	DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1233		
  1234	DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1235	DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1236	DATA shifts<>+0x58(SB)/4, $0xffffffff
  1237	DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1238		
  1239	DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1240	DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1241	DATA shifts<>+0x68(SB)/4, $0xffffffff
  1242	DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1243		
  1244	DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1245	DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1246	DATA shifts<>+0x78(SB)/4, $0xffffffff
  1247	DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1248		
  1249	DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1250	DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1251	DATA shifts<>+0x88(SB)/4, $0xffffffff
  1252	DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1253		
  1254	DATA shifts<>+0x90(SB)/4, $0x0a090807
  1255	DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1256	DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1257	DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1258		
  1259	DATA shifts<>+0xa0(SB)/4, $0x09080706
  1260	DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1261	DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1262	DATA shifts<>+0xac(SB)/4, $0xffffffff
  1263		
  1264	DATA shifts<>+0xb0(SB)/4, $0x08070605
  1265	DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1266	DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1267	DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1268		
  1269	DATA shifts<>+0xc0(SB)/4, $0x07060504
  1270	DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1271	DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1272	DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1273		
  1274	DATA shifts<>+0xd0(SB)/4, $0x06050403
  1275	DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1276	DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1277	DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1278		
  1279	DATA shifts<>+0xe0(SB)/4, $0x05040302
  1280	DATA shifts<>+0xe4(SB)/4, $0x09080706
  1281	DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1282	DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1283		
  1284	DATA shifts<>+0xf0(SB)/4, $0x04030201
  1285	DATA shifts<>+0xf4(SB)/4, $0x08070605
  1286	DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1287	DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1288	
  1289	GLOBL shifts<>(SB),RODATA,$256
  1290	
  1291	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1292		// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1293		MOVL	$masks<>(SB), AX
  1294		MOVL	$shifts<>(SB), BX
  1295		ORL	BX, AX
  1296		TESTL	$15, AX
  1297		SETEQ	ret+0(FP)
  1298		RET
  1299	
  1300	// memequal(p, q unsafe.Pointer, size uintptr) bool
  1301	TEXT runtime·memequal(SB),NOSPLIT,$0-13
  1302		MOVL	a+0(FP), SI
  1303		MOVL	b+4(FP), DI
  1304		CMPL	SI, DI
  1305		JEQ	eq
  1306		MOVL	size+8(FP), BX
  1307		LEAL	ret+12(FP), AX
  1308		JMP	runtime·memeqbody(SB)
  1309	eq:
  1310		MOVB    $1, ret+12(FP)
  1311		RET
  1312	
  1313	// memequal_varlen(a, b unsafe.Pointer) bool
  1314	TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
  1315		MOVL    a+0(FP), SI
  1316		MOVL    b+4(FP), DI
  1317		CMPL    SI, DI
  1318		JEQ     eq
  1319		MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
  1320		LEAL	ret+8(FP), AX
  1321		JMP	runtime·memeqbody(SB)
  1322	eq:
  1323		MOVB    $1, ret+8(FP)
  1324		RET
  1325	
  1326	// eqstring tests whether two strings are equal.
  1327	// The compiler guarantees that strings passed
  1328	// to eqstring have equal length.
  1329	// See runtime_test.go:eqstring_generic for
  1330	// equivalent Go code.
  1331	TEXT runtime·eqstring(SB),NOSPLIT,$0-17
  1332		MOVL	s1_base+0(FP), SI
  1333		MOVL	s2_base+8(FP), DI
  1334		CMPL	SI, DI
  1335		JEQ	same
  1336		MOVL	s1_len+4(FP), BX
  1337		LEAL	ret+16(FP), AX
  1338		JMP	runtime·memeqbody(SB)
  1339	same:
  1340		MOVB	$1, ret+16(FP)
  1341		RET
  1342	
  1343	TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1344		MOVL	a_len+4(FP), BX
  1345		MOVL	b_len+16(FP), CX
  1346		CMPL	BX, CX
  1347		JNE	eqret
  1348		MOVL	a+0(FP), SI
  1349		MOVL	b+12(FP), DI
  1350		LEAL	ret+24(FP), AX
  1351		JMP	runtime·memeqbody(SB)
  1352	eqret:
  1353		MOVB	$0, ret+24(FP)
  1354		RET
  1355	
  1356	// a in SI
  1357	// b in DI
  1358	// count in BX
  1359	// address of result byte in AX
  1360	TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1361		CMPL	BX, $4
  1362		JB	small
  1363	
  1364		// 64 bytes at a time using xmm registers
  1365	hugeloop:
  1366		CMPL	BX, $64
  1367		JB	bigloop
  1368		CMPB	runtime·support_sse2(SB), $1
  1369		JNE	bigloop
  1370		MOVOU	(SI), X0
  1371		MOVOU	(DI), X1
  1372		MOVOU	16(SI), X2
  1373		MOVOU	16(DI), X3
  1374		MOVOU	32(SI), X4
  1375		MOVOU	32(DI), X5
  1376		MOVOU	48(SI), X6
  1377		MOVOU	48(DI), X7
  1378		PCMPEQB	X1, X0
  1379		PCMPEQB	X3, X2
  1380		PCMPEQB	X5, X4
  1381		PCMPEQB	X7, X6
  1382		PAND	X2, X0
  1383		PAND	X6, X4
  1384		PAND	X4, X0
  1385		PMOVMSKB X0, DX
  1386		ADDL	$64, SI
  1387		ADDL	$64, DI
  1388		SUBL	$64, BX
  1389		CMPL	DX, $0xffff
  1390		JEQ	hugeloop
  1391		MOVB	$0, (AX)
  1392		RET
  1393	
  1394		// 4 bytes at a time using 32-bit register
  1395	bigloop:
  1396		CMPL	BX, $4
  1397		JBE	leftover
  1398		MOVL	(SI), CX
  1399		MOVL	(DI), DX
  1400		ADDL	$4, SI
  1401		ADDL	$4, DI
  1402		SUBL	$4, BX
  1403		CMPL	CX, DX
  1404		JEQ	bigloop
  1405		MOVB	$0, (AX)
  1406		RET
  1407	
  1408		// remaining 0-4 bytes
  1409	leftover:
  1410		MOVL	-4(SI)(BX*1), CX
  1411		MOVL	-4(DI)(BX*1), DX
  1412		CMPL	CX, DX
  1413		SETEQ	(AX)
  1414		RET
  1415	
  1416	small:
  1417		CMPL	BX, $0
  1418		JEQ	equal
  1419	
  1420		LEAL	0(BX*8), CX
  1421		NEGL	CX
  1422	
  1423		MOVL	SI, DX
  1424		CMPB	DX, $0xfc
  1425		JA	si_high
  1426	
  1427		// load at SI won't cross a page boundary.
  1428		MOVL	(SI), SI
  1429		JMP	si_finish
  1430	si_high:
  1431		// address ends in 111111xx. Load up to bytes we want, move to correct position.
  1432		MOVL	-4(SI)(BX*1), SI
  1433		SHRL	CX, SI
  1434	si_finish:
  1435	
  1436		// same for DI.
  1437		MOVL	DI, DX
  1438		CMPB	DX, $0xfc
  1439		JA	di_high
  1440		MOVL	(DI), DI
  1441		JMP	di_finish
  1442	di_high:
  1443		MOVL	-4(DI)(BX*1), DI
  1444		SHRL	CX, DI
  1445	di_finish:
  1446	
  1447		SUBL	SI, DI
  1448		SHLL	CX, DI
  1449	equal:
  1450		SETEQ	(AX)
  1451		RET
  1452	
  1453	TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
  1454		MOVL	s1_base+0(FP), SI
  1455		MOVL	s1_len+4(FP), BX
  1456		MOVL	s2_base+8(FP), DI
  1457		MOVL	s2_len+12(FP), DX
  1458		LEAL	ret+16(FP), AX
  1459		JMP	runtime·cmpbody(SB)
  1460	
  1461	TEXT bytes·Compare(SB),NOSPLIT,$0-28
  1462		MOVL	s1+0(FP), SI
  1463		MOVL	s1+4(FP), BX
  1464		MOVL	s2+12(FP), DI
  1465		MOVL	s2+16(FP), DX
  1466		LEAL	ret+24(FP), AX
  1467		JMP	runtime·cmpbody(SB)
  1468	
  1469	TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
  1470		MOVL	s+0(FP), SI
  1471		MOVL	s_len+4(FP), CX
  1472		MOVB	c+12(FP), AL
  1473		MOVL	SI, DI
  1474		CLD; REPN; SCASB
  1475		JZ 3(PC)
  1476		MOVL	$-1, ret+16(FP)
  1477		RET
  1478		SUBL	SI, DI
  1479		SUBL	$1, DI
  1480		MOVL	DI, ret+16(FP)
  1481		RET
  1482	
  1483	TEXT strings·IndexByte(SB),NOSPLIT,$0-16
  1484		MOVL	s+0(FP), SI
  1485		MOVL	s_len+4(FP), CX
  1486		MOVB	c+8(FP), AL
  1487		MOVL	SI, DI
  1488		CLD; REPN; SCASB
  1489		JZ 3(PC)
  1490		MOVL	$-1, ret+12(FP)
  1491		RET
  1492		SUBL	SI, DI
  1493		SUBL	$1, DI
  1494		MOVL	DI, ret+12(FP)
  1495		RET
  1496	
  1497	// input:
  1498	//   SI = a
  1499	//   DI = b
  1500	//   BX = alen
  1501	//   DX = blen
  1502	//   AX = address of return word (set to 1/0/-1)
  1503	TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1504		MOVL	DX, BP
  1505		SUBL	BX, DX // DX = blen-alen
  1506		JLE	2(PC)
  1507		MOVL	BX, BP // BP = min(alen, blen)
  1508		CMPL	SI, DI
  1509		JEQ	allsame
  1510		CMPL	BP, $4
  1511		JB	small
  1512		CMPB	runtime·support_sse2(SB), $1
  1513		JNE	mediumloop
  1514	largeloop:
  1515		CMPL	BP, $16
  1516		JB	mediumloop
  1517		MOVOU	(SI), X0
  1518		MOVOU	(DI), X1
  1519		PCMPEQB X0, X1
  1520		PMOVMSKB X1, BX
  1521		XORL	$0xffff, BX	// convert EQ to NE
  1522		JNE	diff16	// branch if at least one byte is not equal
  1523		ADDL	$16, SI
  1524		ADDL	$16, DI
  1525		SUBL	$16, BP
  1526		JMP	largeloop
  1527	
  1528	diff16:
  1529		BSFL	BX, BX	// index of first byte that differs
  1530		XORL	DX, DX
  1531		MOVB	(SI)(BX*1), CX
  1532		CMPB	CX, (DI)(BX*1)
  1533		SETHI	DX
  1534		LEAL	-1(DX*2), DX	// convert 1/0 to +1/-1
  1535		MOVL	DX, (AX)
  1536		RET
  1537	
  1538	mediumloop:
  1539		CMPL	BP, $4
  1540		JBE	_0through4
  1541		MOVL	(SI), BX
  1542		MOVL	(DI), CX
  1543		CMPL	BX, CX
  1544		JNE	diff4
  1545		ADDL	$4, SI
  1546		ADDL	$4, DI
  1547		SUBL	$4, BP
  1548		JMP	mediumloop
  1549	
  1550	_0through4:
  1551		MOVL	-4(SI)(BP*1), BX
  1552		MOVL	-4(DI)(BP*1), CX
  1553		CMPL	BX, CX
  1554		JEQ	allsame
  1555	
  1556	diff4:
  1557		BSWAPL	BX	// reverse order of bytes
  1558		BSWAPL	CX
  1559		XORL	BX, CX	// find bit differences
  1560		BSRL	CX, CX	// index of highest bit difference
  1561		SHRL	CX, BX	// move a's bit to bottom
  1562		ANDL	$1, BX	// mask bit
  1563		LEAL	-1(BX*2), BX // 1/0 => +1/-1
  1564		MOVL	BX, (AX)
  1565		RET
  1566	
  1567		// 0-3 bytes in common
  1568	small:
  1569		LEAL	(BP*8), CX
  1570		NEGL	CX
  1571		JEQ	allsame
  1572	
  1573		// load si
  1574		CMPB	SI, $0xfc
  1575		JA	si_high
  1576		MOVL	(SI), SI
  1577		JMP	si_finish
  1578	si_high:
  1579		MOVL	-4(SI)(BP*1), SI
  1580		SHRL	CX, SI
  1581	si_finish:
  1582		SHLL	CX, SI
  1583	
  1584		// same for di
  1585		CMPB	DI, $0xfc
  1586		JA	di_high
  1587		MOVL	(DI), DI
  1588		JMP	di_finish
  1589	di_high:
  1590		MOVL	-4(DI)(BP*1), DI
  1591		SHRL	CX, DI
  1592	di_finish:
  1593		SHLL	CX, DI
  1594	
  1595		BSWAPL	SI	// reverse order of bytes
  1596		BSWAPL	DI
  1597		XORL	SI, DI	// find bit differences
  1598		JEQ	allsame
  1599		BSRL	DI, CX	// index of highest bit difference
  1600		SHRL	CX, SI	// move a's bit to bottom
  1601		ANDL	$1, SI	// mask bit
  1602		LEAL	-1(SI*2), BX // 1/0 => +1/-1
  1603		MOVL	BX, (AX)
  1604		RET
  1605	
  1606		// all the bytes in common are the same, so we just need
  1607		// to compare the lengths.
  1608	allsame:
  1609		XORL	BX, BX
  1610		XORL	CX, CX
  1611		TESTL	DX, DX
  1612		SETLT	BX	// 1 if alen > blen
  1613		SETEQ	CX	// 1 if alen == blen
  1614		LEAL	-1(CX)(BX*2), BX	// 1,0,-1 result
  1615		MOVL	BX, (AX)
  1616		RET
  1617	
  1618	TEXT runtime·return0(SB), NOSPLIT, $0
  1619		MOVL	$0, AX
  1620		RET
  1621	
  1622	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1623	// Must obey the gcc calling convention.
  1624	TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1625		get_tls(CX)
  1626		MOVL	g(CX), AX
  1627		MOVL	g_m(AX), AX
  1628		MOVL	m_curg(AX), AX
  1629		MOVL	(g_stack+stack_hi)(AX), AX
  1630		RET
  1631	
  1632	// The top-most function running on a goroutine
  1633	// returns to goexit+PCQuantum.
  1634	TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1635		BYTE	$0x90	// NOP
  1636		CALL	runtime·goexit1(SB)	// does not return
  1637		// traceback from goexit1 must hit code range of goexit
  1638		BYTE	$0x90	// NOP
  1639	
  1640	// Prefetching doesn't seem to help.
  1641	TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1642		RET
  1643	
  1644	TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1645		RET
  1646	
  1647	TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1648		RET
  1649	
  1650	TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1651		RET
  1652	
  1653	// Add a module's moduledata to the linked list of moduledata objects. This
  1654	// is called from .init_array by a function generated in the linker and so
  1655	// follows the platform ABI wrt register preservation -- it only touches AX,
  1656	// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1657	// instead the pointer to the moduledata is passed in AX.
  1658	TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1659	       MOVL    runtime·lastmoduledatap(SB), DX
  1660	       MOVL    AX, moduledata_next(DX)
  1661	       MOVL    AX, runtime·lastmoduledatap(SB)
  1662	       RET
  1663	
  1664	TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
  1665		MOVL	a+0(FP), AX
  1666		MOVL	AX, 0(SP)
  1667		MOVL	$0, 4(SP)
  1668		FMOVV	0(SP), F0
  1669		FMOVDP	F0, ret+4(FP)
  1670		RET
  1671	
  1672	TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
  1673		FMOVD	a+0(FP), F0
  1674		FSTCW	0(SP)
  1675		FLDCW	runtime·controlWord64trunc(SB)
  1676		FMOVVP	F0, 4(SP)
  1677		FLDCW	0(SP)
  1678		MOVL	4(SP), AX
  1679		MOVL	AX, ret+8(FP)
  1680		RET

View as plain text