...
Run Format

Text file src/runtime/asm_386.s

Documentation: runtime

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11		// copy arguments forward on an even stack
    12		MOVL	argc+0(FP), AX
    13		MOVL	argv+4(FP), BX
    14		SUBL	$128, SP		// plenty of scratch
    15		ANDL	$~15, SP
    16		MOVL	AX, 120(SP)		// save argc, argv away
    17		MOVL	BX, 124(SP)
    18	
    19		// set default stack bounds.
    20		// _cgo_init may update stackguard.
    21		MOVL	$runtime·g0(SB), BP
    22		LEAL	(-64*1024+104)(SP), BX
    23		MOVL	BX, g_stackguard0(BP)
    24		MOVL	BX, g_stackguard1(BP)
    25		MOVL	BX, (g_stack+stack_lo)(BP)
    26		MOVL	SP, (g_stack+stack_hi)(BP)
    27		
    28		// find out information about the processor we're on
    29	#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
    30		JMP 	has_cpuid
    31	#else
    32		// first see if CPUID instruction is supported.
    33		PUSHFL
    34		PUSHFL
    35		XORL	$(1<<21), 0(SP) // flip ID bit
    36		POPFL
    37		PUSHFL
    38		POPL	AX
    39		XORL	0(SP), AX
    40		POPFL	// restore EFLAGS
    41		TESTL	$(1<<21), AX
    42		JNE 	has_cpuid
    43	#endif
    44	
    45	bad_proc: // show that the program requires MMX.
    46		MOVL	$2, 0(SP)
    47		MOVL	$bad_proc_msg<>(SB), 4(SP)
    48		MOVL	$0x3d, 8(SP)
    49		CALL	runtime·write(SB)
    50		MOVL	$1, 0(SP)
    51		CALL	runtime·exit(SB)
    52		INT	$3
    53	
    54	has_cpuid:
    55		MOVL	$0, AX
    56		CPUID
    57		MOVL	AX, SI
    58		CMPL	AX, $0
    59		JE	nocpuinfo
    60	
    61		// Figure out how to serialize RDTSC.
    62		// On Intel processors LFENCE is enough. AMD requires MFENCE.
    63		// Don't know about the rest, so let's do MFENCE.
    64		CMPL	BX, $0x756E6547  // "Genu"
    65		JNE	notintel
    66		CMPL	DX, $0x49656E69  // "ineI"
    67		JNE	notintel
    68		CMPL	CX, $0x6C65746E  // "ntel"
    69		JNE	notintel
    70		MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
    71	notintel:
    72	
    73		// Load EAX=1 cpuid flags
    74		MOVL	$1, AX
    75		CPUID
    76		MOVL	CX, AX // Move to global variable clobbers CX when generating PIC
    77		MOVL	AX, runtime·cpuid_ecx(SB)
    78		MOVL	DX, runtime·cpuid_edx(SB)
    79	
    80		// Check for MMX support
    81		TESTL	$(1<<23), DX	// MMX
    82		JZ 	bad_proc
    83	
    84		// Load EAX=7/ECX=0 cpuid flags
    85		CMPL	SI, $7
    86		JLT	nocpuinfo
    87		MOVL	$7, AX
    88		MOVL	$0, CX
    89		CPUID
    90		MOVL	BX, runtime·cpuid_ebx7(SB)
    91	
    92	nocpuinfo:	
    93	
    94		// if there is an _cgo_init, call it to let it
    95		// initialize and to set up GS.  if not,
    96		// we set up GS ourselves.
    97		MOVL	_cgo_init(SB), AX
    98		TESTL	AX, AX
    99		JZ	needtls
   100		MOVL	$setg_gcc<>(SB), BX
   101		MOVL	BX, 4(SP)
   102		MOVL	BP, 0(SP)
   103		CALL	AX
   104	
   105		// update stackguard after _cgo_init
   106		MOVL	$runtime·g0(SB), CX
   107		MOVL	(g_stack+stack_lo)(CX), AX
   108		ADDL	$const__StackGuard, AX
   109		MOVL	AX, g_stackguard0(CX)
   110		MOVL	AX, g_stackguard1(CX)
   111	
   112	#ifndef GOOS_windows
   113		// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   114		JMP ok
   115	#endif
   116	needtls:
   117	#ifdef GOOS_plan9
   118		// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   119		JMP	ok
   120	#endif
   121	
   122		// set up %gs
   123		CALL	runtime·ldt0setup(SB)
   124	
   125		// store through it, to make sure it works
   126		get_tls(BX)
   127		MOVL	$0x123, g(BX)
   128		MOVL	runtime·m0+m_tls(SB), AX
   129		CMPL	AX, $0x123
   130		JEQ	ok
   131		MOVL	AX, 0	// abort
   132	ok:
   133		// set up m and g "registers"
   134		get_tls(BX)
   135		LEAL	runtime·g0(SB), DX
   136		MOVL	DX, g(BX)
   137		LEAL	runtime·m0(SB), AX
   138	
   139		// save m->g0 = g0
   140		MOVL	DX, m_g0(AX)
   141		// save g0->m = m0
   142		MOVL	AX, g_m(DX)
   143	
   144		CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   145	
   146		// convention is D is always cleared
   147		CLD
   148	
   149		CALL	runtime·check(SB)
   150	
   151		// saved argc, argv
   152		MOVL	120(SP), AX
   153		MOVL	AX, 0(SP)
   154		MOVL	124(SP), AX
   155		MOVL	AX, 4(SP)
   156		CALL	runtime·args(SB)
   157		CALL	runtime·osinit(SB)
   158		CALL	runtime·schedinit(SB)
   159	
   160		// create a new goroutine to start program
   161		PUSHL	$runtime·mainPC(SB)	// entry
   162		PUSHL	$0	// arg size
   163		CALL	runtime·newproc(SB)
   164		POPL	AX
   165		POPL	AX
   166	
   167		// start this M
   168		CALL	runtime·mstart(SB)
   169	
   170		INT $3
   171		RET
   172	
   173	DATA	bad_proc_msg<>+0x00(SB)/8, $"This pro"
   174	DATA	bad_proc_msg<>+0x08(SB)/8, $"gram can"
   175	DATA	bad_proc_msg<>+0x10(SB)/8, $" only be"
   176	DATA	bad_proc_msg<>+0x18(SB)/8, $" run on "
   177	DATA	bad_proc_msg<>+0x20(SB)/8, $"processo"
   178	DATA	bad_proc_msg<>+0x28(SB)/8, $"rs with "
   179	DATA	bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
   180	DATA	bad_proc_msg<>+0x38(SB)/4, $"ort."
   181	DATA	bad_proc_msg<>+0x3c(SB)/1, $0xa
   182	GLOBL	bad_proc_msg<>(SB), RODATA, $0x3d
   183	
   184	DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   185	GLOBL	runtime·mainPC(SB),RODATA,$4
   186	
   187	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   188		INT $3
   189		RET
   190	
   191	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   192		// Linux and MinGW start the FPU in extended double precision.
   193		// Other operating systems use double precision.
   194		// Change to double precision to match them,
   195		// and to match other hardware that only has double.
   196		FLDCW	runtime·controlWord64(SB)
   197		RET
   198	
   199	/*
   200	 *  go-routine
   201	 */
   202	
   203	// void gosave(Gobuf*)
   204	// save state in Gobuf; setjmp
   205	TEXT runtime·gosave(SB), NOSPLIT, $0-4
   206		MOVL	buf+0(FP), AX		// gobuf
   207		LEAL	buf+0(FP), BX		// caller's SP
   208		MOVL	BX, gobuf_sp(AX)
   209		MOVL	0(SP), BX		// caller's PC
   210		MOVL	BX, gobuf_pc(AX)
   211		MOVL	$0, gobuf_ret(AX)
   212		// Assert ctxt is zero. See func save.
   213		MOVL	gobuf_ctxt(AX), BX
   214		TESTL	BX, BX
   215		JZ	2(PC)
   216		CALL	runtime·badctxt(SB)
   217		get_tls(CX)
   218		MOVL	g(CX), BX
   219		MOVL	BX, gobuf_g(AX)
   220		RET
   221	
   222	// void gogo(Gobuf*)
   223	// restore state from Gobuf; longjmp
   224	TEXT runtime·gogo(SB), NOSPLIT, $8-4
   225		MOVL	buf+0(FP), BX		// gobuf
   226	
   227		// If ctxt is not nil, invoke deletion barrier before overwriting.
   228		MOVL	gobuf_ctxt(BX), DX
   229		TESTL	DX, DX
   230		JZ	nilctxt
   231		LEAL	gobuf_ctxt(BX), AX
   232		MOVL	AX, 0(SP)
   233		MOVL	$0, 4(SP)
   234		CALL	runtime·writebarrierptr_prewrite(SB)
   235		MOVL	buf+0(FP), BX
   236	
   237	nilctxt:
   238		MOVL	gobuf_g(BX), DX
   239		MOVL	0(DX), CX		// make sure g != nil
   240		get_tls(CX)
   241		MOVL	DX, g(CX)
   242		MOVL	gobuf_sp(BX), SP	// restore SP
   243		MOVL	gobuf_ret(BX), AX
   244		MOVL	gobuf_ctxt(BX), DX
   245		MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   246		MOVL	$0, gobuf_ret(BX)
   247		MOVL	$0, gobuf_ctxt(BX)
   248		MOVL	gobuf_pc(BX), BX
   249		JMP	BX
   250	
   251	// func mcall(fn func(*g))
   252	// Switch to m->g0's stack, call fn(g).
   253	// Fn must never return. It should gogo(&g->sched)
   254	// to keep running g.
   255	TEXT runtime·mcall(SB), NOSPLIT, $0-4
   256		MOVL	fn+0(FP), DI
   257	
   258		get_tls(DX)
   259		MOVL	g(DX), AX	// save state in g->sched
   260		MOVL	0(SP), BX	// caller's PC
   261		MOVL	BX, (g_sched+gobuf_pc)(AX)
   262		LEAL	fn+0(FP), BX	// caller's SP
   263		MOVL	BX, (g_sched+gobuf_sp)(AX)
   264		MOVL	AX, (g_sched+gobuf_g)(AX)
   265	
   266		// switch to m->g0 & its stack, call fn
   267		MOVL	g(DX), BX
   268		MOVL	g_m(BX), BX
   269		MOVL	m_g0(BX), SI
   270		CMPL	SI, AX	// if g == m->g0 call badmcall
   271		JNE	3(PC)
   272		MOVL	$runtime·badmcall(SB), AX
   273		JMP	AX
   274		MOVL	SI, g(DX)	// g = m->g0
   275		MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   276		PUSHL	AX
   277		MOVL	DI, DX
   278		MOVL	0(DI), DI
   279		CALL	DI
   280		POPL	AX
   281		MOVL	$runtime·badmcall2(SB), AX
   282		JMP	AX
   283		RET
   284	
   285	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   286	// of the G stack. We need to distinguish the routine that
   287	// lives at the bottom of the G stack from the one that lives
   288	// at the top of the system stack because the one at the top of
   289	// the system stack terminates the stack walk (see topofstack()).
   290	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   291		RET
   292	
   293	// func systemstack(fn func())
   294	TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   295		MOVL	fn+0(FP), DI	// DI = fn
   296		get_tls(CX)
   297		MOVL	g(CX), AX	// AX = g
   298		MOVL	g_m(AX), BX	// BX = m
   299	
   300		MOVL	m_gsignal(BX), DX	// DX = gsignal
   301		CMPL	AX, DX
   302		JEQ	noswitch
   303	
   304		MOVL	m_g0(BX), DX	// DX = g0
   305		CMPL	AX, DX
   306		JEQ	noswitch
   307	
   308		MOVL	m_curg(BX), BP
   309		CMPL	AX, BP
   310		JEQ	switch
   311		
   312		// Bad: g is not gsignal, not g0, not curg. What is it?
   313		// Hide call from linker nosplit analysis.
   314		MOVL	$runtime·badsystemstack(SB), AX
   315		CALL	AX
   316	
   317	switch:
   318		// save our state in g->sched. Pretend to
   319		// be systemstack_switch if the G stack is scanned.
   320		MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   321		MOVL	SP, (g_sched+gobuf_sp)(AX)
   322		MOVL	AX, (g_sched+gobuf_g)(AX)
   323	
   324		// switch to g0
   325		get_tls(CX)
   326		MOVL	DX, g(CX)
   327		MOVL	(g_sched+gobuf_sp)(DX), BX
   328		// make it look like mstart called systemstack on g0, to stop traceback
   329		SUBL	$4, BX
   330		MOVL	$runtime·mstart(SB), DX
   331		MOVL	DX, 0(BX)
   332		MOVL	BX, SP
   333	
   334		// call target function
   335		MOVL	DI, DX
   336		MOVL	0(DI), DI
   337		CALL	DI
   338	
   339		// switch back to g
   340		get_tls(CX)
   341		MOVL	g(CX), AX
   342		MOVL	g_m(AX), BX
   343		MOVL	m_curg(BX), AX
   344		MOVL	AX, g(CX)
   345		MOVL	(g_sched+gobuf_sp)(AX), SP
   346		MOVL	$0, (g_sched+gobuf_sp)(AX)
   347		RET
   348	
   349	noswitch:
   350		// already on system stack, just call directly
   351		MOVL	DI, DX
   352		MOVL	0(DI), DI
   353		CALL	DI
   354		RET
   355	
   356	/*
   357	 * support for morestack
   358	 */
   359	
   360	// Called during function prolog when more stack is needed.
   361	//
   362	// The traceback routines see morestack on a g0 as being
   363	// the top of a stack (for example, morestack calling newstack
   364	// calling the scheduler calling newm calling gc), so we must
   365	// record an argument size. For that purpose, it has no arguments.
   366	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   367		// Cannot grow scheduler stack (m->g0).
   368		get_tls(CX)
   369		MOVL	g(CX), BX
   370		MOVL	g_m(BX), BX
   371		MOVL	m_g0(BX), SI
   372		CMPL	g(CX), SI
   373		JNE	3(PC)
   374		CALL	runtime·badmorestackg0(SB)
   375		INT	$3
   376	
   377		// Cannot grow signal stack.
   378		MOVL	m_gsignal(BX), SI
   379		CMPL	g(CX), SI
   380		JNE	3(PC)
   381		CALL	runtime·badmorestackgsignal(SB)
   382		INT	$3
   383	
   384		// Called from f.
   385		// Set m->morebuf to f's caller.
   386		MOVL	4(SP), DI	// f's caller's PC
   387		MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   388		LEAL	8(SP), CX	// f's caller's SP
   389		MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   390		get_tls(CX)
   391		MOVL	g(CX), SI
   392		MOVL	SI, (m_morebuf+gobuf_g)(BX)
   393	
   394		// Set g->sched to context in f.
   395		MOVL	0(SP), AX	// f's PC
   396		MOVL	AX, (g_sched+gobuf_pc)(SI)
   397		MOVL	SI, (g_sched+gobuf_g)(SI)
   398		LEAL	4(SP), AX	// f's SP
   399		MOVL	AX, (g_sched+gobuf_sp)(SI)
   400		// newstack will fill gobuf.ctxt.
   401	
   402		// Call newstack on m->g0's stack.
   403		MOVL	m_g0(BX), BP
   404		MOVL	BP, g(CX)
   405		MOVL	(g_sched+gobuf_sp)(BP), AX
   406		MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   407		MOVL	AX, SP
   408		PUSHL	DX	// ctxt argument
   409		CALL	runtime·newstack(SB)
   410		MOVL	$0, 0x1003	// crash if newstack returns
   411		POPL	DX	// keep balance check happy
   412		RET
   413	
   414	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   415		MOVL	$0, DX
   416		JMP runtime·morestack(SB)
   417	
   418	TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   419		// We came here via a RET to an overwritten return PC.
   420		// AX may be live. Other registers are available.
   421	
   422		// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   423		get_tls(CX)
   424		MOVL	g(CX), CX
   425		MOVL	(g_stkbar+slice_array)(CX), DX
   426		MOVL	g_stkbarPos(CX), BX
   427		IMULL	$stkbar__size, BX	// Too big for SIB.
   428		MOVL	stkbar_savedLRVal(DX)(BX*1), BX
   429		// Record that this stack barrier was hit.
   430		ADDL	$1, g_stkbarPos(CX)
   431		// Jump to the original return PC.
   432		JMP	BX
   433	
   434	// reflectcall: call a function with the given argument list
   435	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   436	// we don't have variable-sized frames, so we use a small number
   437	// of constant-sized-frame functions to encode a few bits of size in the pc.
   438	// Caution: ugly multiline assembly macros in your future!
   439	
   440	#define DISPATCH(NAME,MAXSIZE)		\
   441		CMPL	CX, $MAXSIZE;		\
   442		JA	3(PC);			\
   443		MOVL	$NAME(SB), AX;		\
   444		JMP	AX
   445	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   446	
   447	TEXT reflect·call(SB), NOSPLIT, $0-0
   448		JMP	·reflectcall(SB)
   449	
   450	TEXT ·reflectcall(SB), NOSPLIT, $0-20
   451		MOVL	argsize+12(FP), CX
   452		DISPATCH(runtime·call16, 16)
   453		DISPATCH(runtime·call32, 32)
   454		DISPATCH(runtime·call64, 64)
   455		DISPATCH(runtime·call128, 128)
   456		DISPATCH(runtime·call256, 256)
   457		DISPATCH(runtime·call512, 512)
   458		DISPATCH(runtime·call1024, 1024)
   459		DISPATCH(runtime·call2048, 2048)
   460		DISPATCH(runtime·call4096, 4096)
   461		DISPATCH(runtime·call8192, 8192)
   462		DISPATCH(runtime·call16384, 16384)
   463		DISPATCH(runtime·call32768, 32768)
   464		DISPATCH(runtime·call65536, 65536)
   465		DISPATCH(runtime·call131072, 131072)
   466		DISPATCH(runtime·call262144, 262144)
   467		DISPATCH(runtime·call524288, 524288)
   468		DISPATCH(runtime·call1048576, 1048576)
   469		DISPATCH(runtime·call2097152, 2097152)
   470		DISPATCH(runtime·call4194304, 4194304)
   471		DISPATCH(runtime·call8388608, 8388608)
   472		DISPATCH(runtime·call16777216, 16777216)
   473		DISPATCH(runtime·call33554432, 33554432)
   474		DISPATCH(runtime·call67108864, 67108864)
   475		DISPATCH(runtime·call134217728, 134217728)
   476		DISPATCH(runtime·call268435456, 268435456)
   477		DISPATCH(runtime·call536870912, 536870912)
   478		DISPATCH(runtime·call1073741824, 1073741824)
   479		MOVL	$runtime·badreflectcall(SB), AX
   480		JMP	AX
   481	
   482	#define CALLFN(NAME,MAXSIZE)			\
   483	TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   484		NO_LOCAL_POINTERS;			\
   485		/* copy arguments to stack */		\
   486		MOVL	argptr+8(FP), SI;		\
   487		MOVL	argsize+12(FP), CX;		\
   488		MOVL	SP, DI;				\
   489		REP;MOVSB;				\
   490		/* call function */			\
   491		MOVL	f+4(FP), DX;			\
   492		MOVL	(DX), AX; 			\
   493		PCDATA  $PCDATA_StackMapIndex, $0;	\
   494		CALL	AX;				\
   495		/* copy return values back */		\
   496		MOVL	argtype+0(FP), DX;		\
   497		MOVL	argptr+8(FP), DI;		\
   498		MOVL	argsize+12(FP), CX;		\
   499		MOVL	retoffset+16(FP), BX;		\
   500		MOVL	SP, SI;				\
   501		ADDL	BX, DI;				\
   502		ADDL	BX, SI;				\
   503		SUBL	BX, CX;				\
   504		CALL	callRet<>(SB);			\
   505		RET
   506	
   507	// callRet copies return values back at the end of call*. This is a
   508	// separate function so it can allocate stack space for the arguments
   509	// to reflectcallmove. It does not follow the Go ABI; it expects its
   510	// arguments in registers.
   511	TEXT callRet<>(SB), NOSPLIT, $16-0
   512		MOVL	DX, 0(SP)
   513		MOVL	DI, 4(SP)
   514		MOVL	SI, 8(SP)
   515		MOVL	CX, 12(SP)
   516		CALL	runtime·reflectcallmove(SB)
   517		RET
   518	
   519	CALLFN(·call16, 16)
   520	CALLFN(·call32, 32)
   521	CALLFN(·call64, 64)
   522	CALLFN(·call128, 128)
   523	CALLFN(·call256, 256)
   524	CALLFN(·call512, 512)
   525	CALLFN(·call1024, 1024)
   526	CALLFN(·call2048, 2048)
   527	CALLFN(·call4096, 4096)
   528	CALLFN(·call8192, 8192)
   529	CALLFN(·call16384, 16384)
   530	CALLFN(·call32768, 32768)
   531	CALLFN(·call65536, 65536)
   532	CALLFN(·call131072, 131072)
   533	CALLFN(·call262144, 262144)
   534	CALLFN(·call524288, 524288)
   535	CALLFN(·call1048576, 1048576)
   536	CALLFN(·call2097152, 2097152)
   537	CALLFN(·call4194304, 4194304)
   538	CALLFN(·call8388608, 8388608)
   539	CALLFN(·call16777216, 16777216)
   540	CALLFN(·call33554432, 33554432)
   541	CALLFN(·call67108864, 67108864)
   542	CALLFN(·call134217728, 134217728)
   543	CALLFN(·call268435456, 268435456)
   544	CALLFN(·call536870912, 536870912)
   545	CALLFN(·call1073741824, 1073741824)
   546	
   547	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   548		MOVL	cycles+0(FP), AX
   549	again:
   550		PAUSE
   551		SUBL	$1, AX
   552		JNZ	again
   553		RET
   554	
   555	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   556		// Stores are already ordered on x86, so this is just a
   557		// compile barrier.
   558		RET
   559	
   560	// void jmpdefer(fn, sp);
   561	// called from deferreturn.
   562	// 1. pop the caller
   563	// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
   564	//    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
   565	//    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
   566	//    LEAL to load the offset into BX, and finally 5 for the call & displacement)
   567	// 3. jmp to the argument
   568	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   569		MOVL	fv+0(FP), DX	// fn
   570		MOVL	argp+4(FP), BX	// caller sp
   571		LEAL	-4(BX), SP	// caller sp after CALL
   572	#ifdef GOBUILDMODE_shared
   573		SUBL	$16, (SP)	// return to CALL again
   574	#else
   575		SUBL	$5, (SP)	// return to CALL again
   576	#endif
   577		MOVL	0(DX), BX
   578		JMP	BX	// but first run the deferred function
   579	
   580	// Save state of caller into g->sched.
   581	TEXT gosave<>(SB),NOSPLIT,$0
   582		PUSHL	AX
   583		PUSHL	BX
   584		get_tls(BX)
   585		MOVL	g(BX), BX
   586		LEAL	arg+0(FP), AX
   587		MOVL	AX, (g_sched+gobuf_sp)(BX)
   588		MOVL	-4(AX), AX
   589		MOVL	AX, (g_sched+gobuf_pc)(BX)
   590		MOVL	$0, (g_sched+gobuf_ret)(BX)
   591		// Assert ctxt is zero. See func save.
   592		MOVL	(g_sched+gobuf_ctxt)(BX), AX
   593		TESTL	AX, AX
   594		JZ	2(PC)
   595		CALL	runtime·badctxt(SB)
   596		POPL	BX
   597		POPL	AX
   598		RET
   599	
   600	// func asmcgocall(fn, arg unsafe.Pointer) int32
   601	// Call fn(arg) on the scheduler stack,
   602	// aligned appropriately for the gcc ABI.
   603	// See cgocall.go for more details.
   604	TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   605		MOVL	fn+0(FP), AX
   606		MOVL	arg+4(FP), BX
   607	
   608		MOVL	SP, DX
   609	
   610		// Figure out if we need to switch to m->g0 stack.
   611		// We get called to create new OS threads too, and those
   612		// come in on the m->g0 stack already.
   613		get_tls(CX)
   614		MOVL	g(CX), BP
   615		MOVL	g_m(BP), BP
   616		MOVL	m_g0(BP), SI
   617		MOVL	g(CX), DI
   618		CMPL	SI, DI
   619		JEQ	noswitch
   620		CALL	gosave<>(SB)
   621		get_tls(CX)
   622		MOVL	SI, g(CX)
   623		MOVL	(g_sched+gobuf_sp)(SI), SP
   624	
   625	noswitch:
   626		// Now on a scheduling stack (a pthread-created stack).
   627		SUBL	$32, SP
   628		ANDL	$~15, SP	// alignment, perhaps unnecessary
   629		MOVL	DI, 8(SP)	// save g
   630		MOVL	(g_stack+stack_hi)(DI), DI
   631		SUBL	DX, DI
   632		MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   633		MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   634		CALL	AX
   635	
   636		// Restore registers, g, stack pointer.
   637		get_tls(CX)
   638		MOVL	8(SP), DI
   639		MOVL	(g_stack+stack_hi)(DI), SI
   640		SUBL	4(SP), SI
   641		MOVL	DI, g(CX)
   642		MOVL	SI, SP
   643	
   644		MOVL	AX, ret+8(FP)
   645		RET
   646	
   647	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   648	// Turn the fn into a Go func (by taking its address) and call
   649	// cgocallback_gofunc.
   650	TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
   651		LEAL	fn+0(FP), AX
   652		MOVL	AX, 0(SP)
   653		MOVL	frame+4(FP), AX
   654		MOVL	AX, 4(SP)
   655		MOVL	framesize+8(FP), AX
   656		MOVL	AX, 8(SP)
   657		MOVL	ctxt+12(FP), AX
   658		MOVL	AX, 12(SP)
   659		MOVL	$runtime·cgocallback_gofunc(SB), AX
   660		CALL	AX
   661		RET
   662	
   663	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   664	// See cgocall.go for more details.
   665	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
   666		NO_LOCAL_POINTERS
   667	
   668		// If g is nil, Go did not create the current thread.
   669		// Call needm to obtain one for temporary use.
   670		// In this case, we're running on the thread stack, so there's
   671		// lots of space, but the linker doesn't know. Hide the call from
   672		// the linker analysis by using an indirect call through AX.
   673		get_tls(CX)
   674	#ifdef GOOS_windows
   675		MOVL	$0, BP
   676		CMPL	CX, $0
   677		JEQ	2(PC) // TODO
   678	#endif
   679		MOVL	g(CX), BP
   680		CMPL	BP, $0
   681		JEQ	needm
   682		MOVL	g_m(BP), BP
   683		MOVL	BP, DX // saved copy of oldm
   684		JMP	havem
   685	needm:
   686		MOVL	$0, 0(SP)
   687		MOVL	$runtime·needm(SB), AX
   688		CALL	AX
   689		MOVL	0(SP), DX
   690		get_tls(CX)
   691		MOVL	g(CX), BP
   692		MOVL	g_m(BP), BP
   693	
   694		// Set m->sched.sp = SP, so that if a panic happens
   695		// during the function we are about to execute, it will
   696		// have a valid SP to run on the g0 stack.
   697		// The next few lines (after the havem label)
   698		// will save this SP onto the stack and then write
   699		// the same SP back to m->sched.sp. That seems redundant,
   700		// but if an unrecovered panic happens, unwindm will
   701		// restore the g->sched.sp from the stack location
   702		// and then systemstack will try to use it. If we don't set it here,
   703		// that restored SP will be uninitialized (typically 0) and
   704		// will not be usable.
   705		MOVL	m_g0(BP), SI
   706		MOVL	SP, (g_sched+gobuf_sp)(SI)
   707	
   708	havem:
   709		// Now there's a valid m, and we're running on its m->g0.
   710		// Save current m->g0->sched.sp on stack and then set it to SP.
   711		// Save current sp in m->g0->sched.sp in preparation for
   712		// switch back to m->curg stack.
   713		// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   714		MOVL	m_g0(BP), SI
   715		MOVL	(g_sched+gobuf_sp)(SI), AX
   716		MOVL	AX, 0(SP)
   717		MOVL	SP, (g_sched+gobuf_sp)(SI)
   718	
   719		// Switch to m->curg stack and call runtime.cgocallbackg.
   720		// Because we are taking over the execution of m->curg
   721		// but *not* resuming what had been running, we need to
   722		// save that information (m->curg->sched) so we can restore it.
   723		// We can restore m->curg->sched.sp easily, because calling
   724		// runtime.cgocallbackg leaves SP unchanged upon return.
   725		// To save m->curg->sched.pc, we push it onto the stack.
   726		// This has the added benefit that it looks to the traceback
   727		// routine like cgocallbackg is going to return to that
   728		// PC (because the frame we allocate below has the same
   729		// size as cgocallback_gofunc's frame declared above)
   730		// so that the traceback will seamlessly trace back into
   731		// the earlier calls.
   732		//
   733		// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
   734		// 8(SP) is unused.
   735		MOVL	m_curg(BP), SI
   736		MOVL	SI, g(CX)
   737		MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   738		MOVL	(g_sched+gobuf_pc)(SI), BP
   739		MOVL	BP, -4(DI)
   740		MOVL	ctxt+12(FP), CX
   741		LEAL	-(4+12)(DI), SP
   742		MOVL	DX, 4(SP)
   743		MOVL	CX, 0(SP)
   744		CALL	runtime·cgocallbackg(SB)
   745		MOVL	4(SP), DX
   746	
   747		// Restore g->sched (== m->curg->sched) from saved values.
   748		get_tls(CX)
   749		MOVL	g(CX), SI
   750		MOVL	12(SP), BP
   751		MOVL	BP, (g_sched+gobuf_pc)(SI)
   752		LEAL	(12+4)(SP), DI
   753		MOVL	DI, (g_sched+gobuf_sp)(SI)
   754	
   755		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   756		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   757		// so we do not have to restore it.)
   758		MOVL	g(CX), BP
   759		MOVL	g_m(BP), BP
   760		MOVL	m_g0(BP), SI
   761		MOVL	SI, g(CX)
   762		MOVL	(g_sched+gobuf_sp)(SI), SP
   763		MOVL	0(SP), AX
   764		MOVL	AX, (g_sched+gobuf_sp)(SI)
   765		
   766		// If the m on entry was nil, we called needm above to borrow an m
   767		// for the duration of the call. Since the call is over, return it with dropm.
   768		CMPL	DX, $0
   769		JNE 3(PC)
   770		MOVL	$runtime·dropm(SB), AX
   771		CALL	AX
   772	
   773		// Done!
   774		RET
   775	
   776	// void setg(G*); set g. for use by needm.
   777	TEXT runtime·setg(SB), NOSPLIT, $0-4
   778		MOVL	gg+0(FP), BX
   779	#ifdef GOOS_windows
   780		CMPL	BX, $0
   781		JNE	settls
   782		MOVL	$0, 0x14(FS)
   783		RET
   784	settls:
   785		MOVL	g_m(BX), AX
   786		LEAL	m_tls(AX), AX
   787		MOVL	AX, 0x14(FS)
   788	#endif
   789		get_tls(CX)
   790		MOVL	BX, g(CX)
   791		RET
   792	
   793	// void setg_gcc(G*); set g. for use by gcc
   794	TEXT setg_gcc<>(SB), NOSPLIT, $0
   795		get_tls(AX)
   796		MOVL	gg+0(FP), DX
   797		MOVL	DX, g(AX)
   798		RET
   799	
   800	// check that SP is in range [g->stack.lo, g->stack.hi)
   801	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   802		get_tls(CX)
   803		MOVL	g(CX), AX
   804		CMPL	(g_stack+stack_hi)(AX), SP
   805		JHI	2(PC)
   806		INT	$3
   807		CMPL	SP, (g_stack+stack_lo)(AX)
   808		JHI	2(PC)
   809		INT	$3
   810		RET
   811	
   812	TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
   813		MOVL	argp+0(FP),AX		// addr of first arg
   814		MOVL	-4(AX),AX		// get calling pc
   815		CMPL	AX, runtime·stackBarrierPC(SB)
   816		JNE	nobar
   817		// Get original return PC.
   818		CALL	runtime·nextBarrierPC(SB)
   819		MOVL	0(SP), AX
   820	nobar:
   821		MOVL	AX, ret+4(FP)
   822		RET
   823	
   824	TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8
   825		MOVL	argp+0(FP),AX		// addr of first arg
   826		MOVL	pc+4(FP), BX
   827		MOVL	-4(AX), DX
   828		CMPL	DX, runtime·stackBarrierPC(SB)
   829		JEQ	setbar
   830		MOVL	BX, -4(AX)		// set calling pc
   831		RET
   832	setbar:
   833		// Set the stack barrier return PC.
   834		MOVL	BX, 0(SP)
   835		CALL	runtime·setNextBarrierPC(SB)
   836		RET
   837	
   838	// func cputicks() int64
   839	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   840		TESTL	$0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence
   841		JEQ	done
   842		CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   843		JNE	mfence
   844		BYTE	$0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
   845		JMP	done
   846	mfence:
   847		BYTE	$0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
   848	done:
   849		RDTSC
   850		MOVL	AX, ret_lo+0(FP)
   851		MOVL	DX, ret_hi+4(FP)
   852		RET
   853	
   854	TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   855		// set up ldt 7 to point at m0.tls
   856		// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   857		// the entry number is just a hint.  setldt will set up GS with what it used.
   858		MOVL	$7, 0(SP)
   859		LEAL	runtime·m0+m_tls(SB), AX
   860		MOVL	AX, 4(SP)
   861		MOVL	$32, 8(SP)	// sizeof(tls array)
   862		CALL	runtime·setldt(SB)
   863		RET
   864	
   865	TEXT runtime·emptyfunc(SB),0,$0-0
   866		RET
   867	
   868	// memhash_varlen(p unsafe.Pointer, h seed) uintptr
   869	// redirects to memhash(p, h, size) using the size
   870	// stored in the closure.
   871	TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
   872		GO_ARGS
   873		NO_LOCAL_POINTERS
   874		MOVL	p+0(FP), AX
   875		MOVL	h+4(FP), BX
   876		MOVL	4(DX), CX
   877		MOVL	AX, 0(SP)
   878		MOVL	BX, 4(SP)
   879		MOVL	CX, 8(SP)
   880		CALL	runtime·memhash(SB)
   881		MOVL	12(SP), AX
   882		MOVL	AX, ret+8(FP)
   883		RET
   884	
   885	// hash function using AES hardware instructions
   886	TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   887		MOVL	p+0(FP), AX	// ptr to data
   888		MOVL	s+8(FP), BX	// size
   889		LEAL	ret+12(FP), DX
   890		JMP	runtime·aeshashbody(SB)
   891	
   892	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   893		MOVL	p+0(FP), AX	// ptr to string object
   894		MOVL	4(AX), BX	// length of string
   895		MOVL	(AX), AX	// string data
   896		LEAL	ret+8(FP), DX
   897		JMP	runtime·aeshashbody(SB)
   898	
   899	// AX: data
   900	// BX: length
   901	// DX: address to put return value
   902	TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   903		MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   904		PINSRW	$4, BX, X0	            // 16 bits of length
   905		PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   906		MOVO	X0, X1                      // save unscrambled seed
   907		PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   908		AESENC	X0, X0                      // scramble seed
   909	
   910		CMPL	BX, $16
   911		JB	aes0to15
   912		JE	aes16
   913		CMPL	BX, $32
   914		JBE	aes17to32
   915		CMPL	BX, $64
   916		JBE	aes33to64
   917		JMP	aes65plus
   918		
   919	aes0to15:
   920		TESTL	BX, BX
   921		JE	aes0
   922	
   923		ADDL	$16, AX
   924		TESTW	$0xff0, AX
   925		JE	endofpage
   926	
   927		// 16 bytes loaded at this address won't cross
   928		// a page boundary, so we can load it directly.
   929		MOVOU	-16(AX), X1
   930		ADDL	BX, BX
   931		PAND	masks<>(SB)(BX*8), X1
   932	
   933	final1:	
   934		AESENC	X0, X1  // scramble input, xor in seed
   935		AESENC	X1, X1  // scramble combo 2 times
   936		AESENC	X1, X1
   937		MOVL	X1, (DX)
   938		RET
   939	
   940	endofpage:
   941		// address ends in 1111xxxx. Might be up against
   942		// a page boundary, so load ending at last byte.
   943		// Then shift bytes down using pshufb.
   944		MOVOU	-32(AX)(BX*1), X1
   945		ADDL	BX, BX
   946		PSHUFB	shifts<>(SB)(BX*8), X1
   947		JMP	final1
   948	
   949	aes0:
   950		// Return scrambled input seed
   951		AESENC	X0, X0
   952		MOVL	X0, (DX)
   953		RET
   954	
   955	aes16:
   956		MOVOU	(AX), X1
   957		JMP	final1
   958	
   959	aes17to32:
   960		// make second starting seed
   961		PXOR	runtime·aeskeysched+16(SB), X1
   962		AESENC	X1, X1
   963		
   964		// load data to be hashed
   965		MOVOU	(AX), X2
   966		MOVOU	-16(AX)(BX*1), X3
   967	
   968		// scramble 3 times
   969		AESENC	X0, X2
   970		AESENC	X1, X3
   971		AESENC	X2, X2
   972		AESENC	X3, X3
   973		AESENC	X2, X2
   974		AESENC	X3, X3
   975	
   976		// combine results
   977		PXOR	X3, X2
   978		MOVL	X2, (DX)
   979		RET
   980	
   981	aes33to64:
   982		// make 3 more starting seeds
   983		MOVO	X1, X2
   984		MOVO	X1, X3
   985		PXOR	runtime·aeskeysched+16(SB), X1
   986		PXOR	runtime·aeskeysched+32(SB), X2
   987		PXOR	runtime·aeskeysched+48(SB), X3
   988		AESENC	X1, X1
   989		AESENC	X2, X2
   990		AESENC	X3, X3
   991		
   992		MOVOU	(AX), X4
   993		MOVOU	16(AX), X5
   994		MOVOU	-32(AX)(BX*1), X6
   995		MOVOU	-16(AX)(BX*1), X7
   996		
   997		AESENC	X0, X4
   998		AESENC	X1, X5
   999		AESENC	X2, X6
  1000		AESENC	X3, X7
  1001		
  1002		AESENC	X4, X4
  1003		AESENC	X5, X5
  1004		AESENC	X6, X6
  1005		AESENC	X7, X7
  1006		
  1007		AESENC	X4, X4
  1008		AESENC	X5, X5
  1009		AESENC	X6, X6
  1010		AESENC	X7, X7
  1011	
  1012		PXOR	X6, X4
  1013		PXOR	X7, X5
  1014		PXOR	X5, X4
  1015		MOVL	X4, (DX)
  1016		RET
  1017	
  1018	aes65plus:
  1019		// make 3 more starting seeds
  1020		MOVO	X1, X2
  1021		MOVO	X1, X3
  1022		PXOR	runtime·aeskeysched+16(SB), X1
  1023		PXOR	runtime·aeskeysched+32(SB), X2
  1024		PXOR	runtime·aeskeysched+48(SB), X3
  1025		AESENC	X1, X1
  1026		AESENC	X2, X2
  1027		AESENC	X3, X3
  1028		
  1029		// start with last (possibly overlapping) block
  1030		MOVOU	-64(AX)(BX*1), X4
  1031		MOVOU	-48(AX)(BX*1), X5
  1032		MOVOU	-32(AX)(BX*1), X6
  1033		MOVOU	-16(AX)(BX*1), X7
  1034	
  1035		// scramble state once
  1036		AESENC	X0, X4
  1037		AESENC	X1, X5
  1038		AESENC	X2, X6
  1039		AESENC	X3, X7
  1040	
  1041		// compute number of remaining 64-byte blocks
  1042		DECL	BX
  1043		SHRL	$6, BX
  1044		
  1045	aesloop:
  1046		// scramble state, xor in a block
  1047		MOVOU	(AX), X0
  1048		MOVOU	16(AX), X1
  1049		MOVOU	32(AX), X2
  1050		MOVOU	48(AX), X3
  1051		AESENC	X0, X4
  1052		AESENC	X1, X5
  1053		AESENC	X2, X6
  1054		AESENC	X3, X7
  1055	
  1056		// scramble state
  1057		AESENC	X4, X4
  1058		AESENC	X5, X5
  1059		AESENC	X6, X6
  1060		AESENC	X7, X7
  1061	
  1062		ADDL	$64, AX
  1063		DECL	BX
  1064		JNE	aesloop
  1065	
  1066		// 2 more scrambles to finish
  1067		AESENC	X4, X4
  1068		AESENC	X5, X5
  1069		AESENC	X6, X6
  1070		AESENC	X7, X7
  1071		
  1072		AESENC	X4, X4
  1073		AESENC	X5, X5
  1074		AESENC	X6, X6
  1075		AESENC	X7, X7
  1076	
  1077		PXOR	X6, X4
  1078		PXOR	X7, X5
  1079		PXOR	X5, X4
  1080		MOVL	X4, (DX)
  1081		RET
  1082	
  1083	TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1084		MOVL	p+0(FP), AX	// ptr to data
  1085		MOVL	h+4(FP), X0	// seed
  1086		PINSRD	$1, (AX), X0	// data
  1087		AESENC	runtime·aeskeysched+0(SB), X0
  1088		AESENC	runtime·aeskeysched+16(SB), X0
  1089		AESENC	runtime·aeskeysched+32(SB), X0
  1090		MOVL	X0, ret+8(FP)
  1091		RET
  1092	
  1093	TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1094		MOVL	p+0(FP), AX	// ptr to data
  1095		MOVQ	(AX), X0	// data
  1096		PINSRD	$2, h+4(FP), X0	// seed
  1097		AESENC	runtime·aeskeysched+0(SB), X0
  1098		AESENC	runtime·aeskeysched+16(SB), X0
  1099		AESENC	runtime·aeskeysched+32(SB), X0
  1100		MOVL	X0, ret+8(FP)
  1101		RET
  1102	
  1103	// simple mask to get rid of data in the high part of the register.
  1104	DATA masks<>+0x00(SB)/4, $0x00000000
  1105	DATA masks<>+0x04(SB)/4, $0x00000000
  1106	DATA masks<>+0x08(SB)/4, $0x00000000
  1107	DATA masks<>+0x0c(SB)/4, $0x00000000
  1108		
  1109	DATA masks<>+0x10(SB)/4, $0x000000ff
  1110	DATA masks<>+0x14(SB)/4, $0x00000000
  1111	DATA masks<>+0x18(SB)/4, $0x00000000
  1112	DATA masks<>+0x1c(SB)/4, $0x00000000
  1113		
  1114	DATA masks<>+0x20(SB)/4, $0x0000ffff
  1115	DATA masks<>+0x24(SB)/4, $0x00000000
  1116	DATA masks<>+0x28(SB)/4, $0x00000000
  1117	DATA masks<>+0x2c(SB)/4, $0x00000000
  1118		
  1119	DATA masks<>+0x30(SB)/4, $0x00ffffff
  1120	DATA masks<>+0x34(SB)/4, $0x00000000
  1121	DATA masks<>+0x38(SB)/4, $0x00000000
  1122	DATA masks<>+0x3c(SB)/4, $0x00000000
  1123		
  1124	DATA masks<>+0x40(SB)/4, $0xffffffff
  1125	DATA masks<>+0x44(SB)/4, $0x00000000
  1126	DATA masks<>+0x48(SB)/4, $0x00000000
  1127	DATA masks<>+0x4c(SB)/4, $0x00000000
  1128		
  1129	DATA masks<>+0x50(SB)/4, $0xffffffff
  1130	DATA masks<>+0x54(SB)/4, $0x000000ff
  1131	DATA masks<>+0x58(SB)/4, $0x00000000
  1132	DATA masks<>+0x5c(SB)/4, $0x00000000
  1133		
  1134	DATA masks<>+0x60(SB)/4, $0xffffffff
  1135	DATA masks<>+0x64(SB)/4, $0x0000ffff
  1136	DATA masks<>+0x68(SB)/4, $0x00000000
  1137	DATA masks<>+0x6c(SB)/4, $0x00000000
  1138		
  1139	DATA masks<>+0x70(SB)/4, $0xffffffff
  1140	DATA masks<>+0x74(SB)/4, $0x00ffffff
  1141	DATA masks<>+0x78(SB)/4, $0x00000000
  1142	DATA masks<>+0x7c(SB)/4, $0x00000000
  1143		
  1144	DATA masks<>+0x80(SB)/4, $0xffffffff
  1145	DATA masks<>+0x84(SB)/4, $0xffffffff
  1146	DATA masks<>+0x88(SB)/4, $0x00000000
  1147	DATA masks<>+0x8c(SB)/4, $0x00000000
  1148		
  1149	DATA masks<>+0x90(SB)/4, $0xffffffff
  1150	DATA masks<>+0x94(SB)/4, $0xffffffff
  1151	DATA masks<>+0x98(SB)/4, $0x000000ff
  1152	DATA masks<>+0x9c(SB)/4, $0x00000000
  1153		
  1154	DATA masks<>+0xa0(SB)/4, $0xffffffff
  1155	DATA masks<>+0xa4(SB)/4, $0xffffffff
  1156	DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1157	DATA masks<>+0xac(SB)/4, $0x00000000
  1158		
  1159	DATA masks<>+0xb0(SB)/4, $0xffffffff
  1160	DATA masks<>+0xb4(SB)/4, $0xffffffff
  1161	DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1162	DATA masks<>+0xbc(SB)/4, $0x00000000
  1163		
  1164	DATA masks<>+0xc0(SB)/4, $0xffffffff
  1165	DATA masks<>+0xc4(SB)/4, $0xffffffff
  1166	DATA masks<>+0xc8(SB)/4, $0xffffffff
  1167	DATA masks<>+0xcc(SB)/4, $0x00000000
  1168		
  1169	DATA masks<>+0xd0(SB)/4, $0xffffffff
  1170	DATA masks<>+0xd4(SB)/4, $0xffffffff
  1171	DATA masks<>+0xd8(SB)/4, $0xffffffff
  1172	DATA masks<>+0xdc(SB)/4, $0x000000ff
  1173		
  1174	DATA masks<>+0xe0(SB)/4, $0xffffffff
  1175	DATA masks<>+0xe4(SB)/4, $0xffffffff
  1176	DATA masks<>+0xe8(SB)/4, $0xffffffff
  1177	DATA masks<>+0xec(SB)/4, $0x0000ffff
  1178		
  1179	DATA masks<>+0xf0(SB)/4, $0xffffffff
  1180	DATA masks<>+0xf4(SB)/4, $0xffffffff
  1181	DATA masks<>+0xf8(SB)/4, $0xffffffff
  1182	DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1183	
  1184	GLOBL masks<>(SB),RODATA,$256
  1185	
  1186	// these are arguments to pshufb. They move data down from
  1187	// the high bytes of the register to the low bytes of the register.
  1188	// index is how many bytes to move.
  1189	DATA shifts<>+0x00(SB)/4, $0x00000000
  1190	DATA shifts<>+0x04(SB)/4, $0x00000000
  1191	DATA shifts<>+0x08(SB)/4, $0x00000000
  1192	DATA shifts<>+0x0c(SB)/4, $0x00000000
  1193		
  1194	DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1195	DATA shifts<>+0x14(SB)/4, $0xffffffff
  1196	DATA shifts<>+0x18(SB)/4, $0xffffffff
  1197	DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1198		
  1199	DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1200	DATA shifts<>+0x24(SB)/4, $0xffffffff
  1201	DATA shifts<>+0x28(SB)/4, $0xffffffff
  1202	DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1203		
  1204	DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1205	DATA shifts<>+0x34(SB)/4, $0xffffffff
  1206	DATA shifts<>+0x38(SB)/4, $0xffffffff
  1207	DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1208		
  1209	DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1210	DATA shifts<>+0x44(SB)/4, $0xffffffff
  1211	DATA shifts<>+0x48(SB)/4, $0xffffffff
  1212	DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1213		
  1214	DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1215	DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1216	DATA shifts<>+0x58(SB)/4, $0xffffffff
  1217	DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1218		
  1219	DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1220	DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1221	DATA shifts<>+0x68(SB)/4, $0xffffffff
  1222	DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1223		
  1224	DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1225	DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1226	DATA shifts<>+0x78(SB)/4, $0xffffffff
  1227	DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1228		
  1229	DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1230	DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1231	DATA shifts<>+0x88(SB)/4, $0xffffffff
  1232	DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1233		
  1234	DATA shifts<>+0x90(SB)/4, $0x0a090807
  1235	DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1236	DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1237	DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1238		
  1239	DATA shifts<>+0xa0(SB)/4, $0x09080706
  1240	DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1241	DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1242	DATA shifts<>+0xac(SB)/4, $0xffffffff
  1243		
  1244	DATA shifts<>+0xb0(SB)/4, $0x08070605
  1245	DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1246	DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1247	DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1248		
  1249	DATA shifts<>+0xc0(SB)/4, $0x07060504
  1250	DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1251	DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1252	DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1253		
  1254	DATA shifts<>+0xd0(SB)/4, $0x06050403
  1255	DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1256	DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1257	DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1258		
  1259	DATA shifts<>+0xe0(SB)/4, $0x05040302
  1260	DATA shifts<>+0xe4(SB)/4, $0x09080706
  1261	DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1262	DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1263		
  1264	DATA shifts<>+0xf0(SB)/4, $0x04030201
  1265	DATA shifts<>+0xf4(SB)/4, $0x08070605
  1266	DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1267	DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1268	
  1269	GLOBL shifts<>(SB),RODATA,$256
  1270	
  1271	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1272		// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1273		MOVL	$masks<>(SB), AX
  1274		MOVL	$shifts<>(SB), BX
  1275		ORL	BX, AX
  1276		TESTL	$15, AX
  1277		SETEQ	ret+0(FP)
  1278		RET
  1279	
  1280	// memequal(p, q unsafe.Pointer, size uintptr) bool
  1281	TEXT runtime·memequal(SB),NOSPLIT,$0-13
  1282		MOVL	a+0(FP), SI
  1283		MOVL	b+4(FP), DI
  1284		CMPL	SI, DI
  1285		JEQ	eq
  1286		MOVL	size+8(FP), BX
  1287		LEAL	ret+12(FP), AX
  1288		JMP	runtime·memeqbody(SB)
  1289	eq:
  1290		MOVB    $1, ret+12(FP)
  1291		RET
  1292	
  1293	// memequal_varlen(a, b unsafe.Pointer) bool
  1294	TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
  1295		MOVL    a+0(FP), SI
  1296		MOVL    b+4(FP), DI
  1297		CMPL    SI, DI
  1298		JEQ     eq
  1299		MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
  1300		LEAL	ret+8(FP), AX
  1301		JMP	runtime·memeqbody(SB)
  1302	eq:
  1303		MOVB    $1, ret+8(FP)
  1304		RET
  1305	
  1306	// eqstring tests whether two strings are equal.
  1307	// The compiler guarantees that strings passed
  1308	// to eqstring have equal length.
  1309	// See runtime_test.go:eqstring_generic for
  1310	// equivalent Go code.
  1311	TEXT runtime·eqstring(SB),NOSPLIT,$0-17
  1312		MOVL	s1_base+0(FP), SI
  1313		MOVL	s2_base+8(FP), DI
  1314		CMPL	SI, DI
  1315		JEQ	same
  1316		MOVL	s1_len+4(FP), BX
  1317		LEAL	ret+16(FP), AX
  1318		JMP	runtime·memeqbody(SB)
  1319	same:
  1320		MOVB	$1, ret+16(FP)
  1321		RET
  1322	
  1323	TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1324		MOVL	a_len+4(FP), BX
  1325		MOVL	b_len+16(FP), CX
  1326		CMPL	BX, CX
  1327		JNE	eqret
  1328		MOVL	a+0(FP), SI
  1329		MOVL	b+12(FP), DI
  1330		LEAL	ret+24(FP), AX
  1331		JMP	runtime·memeqbody(SB)
  1332	eqret:
  1333		MOVB	$0, ret+24(FP)
  1334		RET
  1335	
  1336	// a in SI
  1337	// b in DI
  1338	// count in BX
  1339	// address of result byte in AX
  1340	TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1341		CMPL	BX, $4
  1342		JB	small
  1343	
  1344		// 64 bytes at a time using xmm registers
  1345	hugeloop:
  1346		CMPL	BX, $64
  1347		JB	bigloop
  1348		TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1349		JE	bigloop
  1350		MOVOU	(SI), X0
  1351		MOVOU	(DI), X1
  1352		MOVOU	16(SI), X2
  1353		MOVOU	16(DI), X3
  1354		MOVOU	32(SI), X4
  1355		MOVOU	32(DI), X5
  1356		MOVOU	48(SI), X6
  1357		MOVOU	48(DI), X7
  1358		PCMPEQB	X1, X0
  1359		PCMPEQB	X3, X2
  1360		PCMPEQB	X5, X4
  1361		PCMPEQB	X7, X6
  1362		PAND	X2, X0
  1363		PAND	X6, X4
  1364		PAND	X4, X0
  1365		PMOVMSKB X0, DX
  1366		ADDL	$64, SI
  1367		ADDL	$64, DI
  1368		SUBL	$64, BX
  1369		CMPL	DX, $0xffff
  1370		JEQ	hugeloop
  1371		MOVB	$0, (AX)
  1372		RET
  1373	
  1374		// 4 bytes at a time using 32-bit register
  1375	bigloop:
  1376		CMPL	BX, $4
  1377		JBE	leftover
  1378		MOVL	(SI), CX
  1379		MOVL	(DI), DX
  1380		ADDL	$4, SI
  1381		ADDL	$4, DI
  1382		SUBL	$4, BX
  1383		CMPL	CX, DX
  1384		JEQ	bigloop
  1385		MOVB	$0, (AX)
  1386		RET
  1387	
  1388		// remaining 0-4 bytes
  1389	leftover:
  1390		MOVL	-4(SI)(BX*1), CX
  1391		MOVL	-4(DI)(BX*1), DX
  1392		CMPL	CX, DX
  1393		SETEQ	(AX)
  1394		RET
  1395	
  1396	small:
  1397		CMPL	BX, $0
  1398		JEQ	equal
  1399	
  1400		LEAL	0(BX*8), CX
  1401		NEGL	CX
  1402	
  1403		MOVL	SI, DX
  1404		CMPB	DX, $0xfc
  1405		JA	si_high
  1406	
  1407		// load at SI won't cross a page boundary.
  1408		MOVL	(SI), SI
  1409		JMP	si_finish
  1410	si_high:
  1411		// address ends in 111111xx. Load up to bytes we want, move to correct position.
  1412		MOVL	-4(SI)(BX*1), SI
  1413		SHRL	CX, SI
  1414	si_finish:
  1415	
  1416		// same for DI.
  1417		MOVL	DI, DX
  1418		CMPB	DX, $0xfc
  1419		JA	di_high
  1420		MOVL	(DI), DI
  1421		JMP	di_finish
  1422	di_high:
  1423		MOVL	-4(DI)(BX*1), DI
  1424		SHRL	CX, DI
  1425	di_finish:
  1426	
  1427		SUBL	SI, DI
  1428		SHLL	CX, DI
  1429	equal:
  1430		SETEQ	(AX)
  1431		RET
  1432	
  1433	TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
  1434		MOVL	s1_base+0(FP), SI
  1435		MOVL	s1_len+4(FP), BX
  1436		MOVL	s2_base+8(FP), DI
  1437		MOVL	s2_len+12(FP), DX
  1438		LEAL	ret+16(FP), AX
  1439		JMP	runtime·cmpbody(SB)
  1440	
  1441	TEXT bytes·Compare(SB),NOSPLIT,$0-28
  1442		MOVL	s1+0(FP), SI
  1443		MOVL	s1+4(FP), BX
  1444		MOVL	s2+12(FP), DI
  1445		MOVL	s2+16(FP), DX
  1446		LEAL	ret+24(FP), AX
  1447		JMP	runtime·cmpbody(SB)
  1448	
  1449	TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
  1450		MOVL	s+0(FP), SI
  1451		MOVL	s_len+4(FP), CX
  1452		MOVB	c+12(FP), AL
  1453		MOVL	SI, DI
  1454		CLD; REPN; SCASB
  1455		JZ 3(PC)
  1456		MOVL	$-1, ret+16(FP)
  1457		RET
  1458		SUBL	SI, DI
  1459		SUBL	$1, DI
  1460		MOVL	DI, ret+16(FP)
  1461		RET
  1462	
  1463	TEXT strings·IndexByte(SB),NOSPLIT,$0-16
  1464		MOVL	s+0(FP), SI
  1465		MOVL	s_len+4(FP), CX
  1466		MOVB	c+8(FP), AL
  1467		MOVL	SI, DI
  1468		CLD; REPN; SCASB
  1469		JZ 3(PC)
  1470		MOVL	$-1, ret+12(FP)
  1471		RET
  1472		SUBL	SI, DI
  1473		SUBL	$1, DI
  1474		MOVL	DI, ret+12(FP)
  1475		RET
  1476	
  1477	// input:
  1478	//   SI = a
  1479	//   DI = b
  1480	//   BX = alen
  1481	//   DX = blen
  1482	//   AX = address of return word (set to 1/0/-1)
  1483	TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1484		MOVL	DX, BP
  1485		SUBL	BX, DX // DX = blen-alen
  1486		JLE	2(PC)
  1487		MOVL	BX, BP // BP = min(alen, blen)
  1488		CMPL	SI, DI
  1489		JEQ	allsame
  1490		CMPL	BP, $4
  1491		JB	small
  1492		TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1493		JE	mediumloop
  1494	largeloop:
  1495		CMPL	BP, $16
  1496		JB	mediumloop
  1497		MOVOU	(SI), X0
  1498		MOVOU	(DI), X1
  1499		PCMPEQB X0, X1
  1500		PMOVMSKB X1, BX
  1501		XORL	$0xffff, BX	// convert EQ to NE
  1502		JNE	diff16	// branch if at least one byte is not equal
  1503		ADDL	$16, SI
  1504		ADDL	$16, DI
  1505		SUBL	$16, BP
  1506		JMP	largeloop
  1507	
  1508	diff16:
  1509		BSFL	BX, BX	// index of first byte that differs
  1510		XORL	DX, DX
  1511		MOVB	(SI)(BX*1), CX
  1512		CMPB	CX, (DI)(BX*1)
  1513		SETHI	DX
  1514		LEAL	-1(DX*2), DX	// convert 1/0 to +1/-1
  1515		MOVL	DX, (AX)
  1516		RET
  1517	
  1518	mediumloop:
  1519		CMPL	BP, $4
  1520		JBE	_0through4
  1521		MOVL	(SI), BX
  1522		MOVL	(DI), CX
  1523		CMPL	BX, CX
  1524		JNE	diff4
  1525		ADDL	$4, SI
  1526		ADDL	$4, DI
  1527		SUBL	$4, BP
  1528		JMP	mediumloop
  1529	
  1530	_0through4:
  1531		MOVL	-4(SI)(BP*1), BX
  1532		MOVL	-4(DI)(BP*1), CX
  1533		CMPL	BX, CX
  1534		JEQ	allsame
  1535	
  1536	diff4:
  1537		BSWAPL	BX	// reverse order of bytes
  1538		BSWAPL	CX
  1539		XORL	BX, CX	// find bit differences
  1540		BSRL	CX, CX	// index of highest bit difference
  1541		SHRL	CX, BX	// move a's bit to bottom
  1542		ANDL	$1, BX	// mask bit
  1543		LEAL	-1(BX*2), BX // 1/0 => +1/-1
  1544		MOVL	BX, (AX)
  1545		RET
  1546	
  1547		// 0-3 bytes in common
  1548	small:
  1549		LEAL	(BP*8), CX
  1550		NEGL	CX
  1551		JEQ	allsame
  1552	
  1553		// load si
  1554		CMPB	SI, $0xfc
  1555		JA	si_high
  1556		MOVL	(SI), SI
  1557		JMP	si_finish
  1558	si_high:
  1559		MOVL	-4(SI)(BP*1), SI
  1560		SHRL	CX, SI
  1561	si_finish:
  1562		SHLL	CX, SI
  1563	
  1564		// same for di
  1565		CMPB	DI, $0xfc
  1566		JA	di_high
  1567		MOVL	(DI), DI
  1568		JMP	di_finish
  1569	di_high:
  1570		MOVL	-4(DI)(BP*1), DI
  1571		SHRL	CX, DI
  1572	di_finish:
  1573		SHLL	CX, DI
  1574	
  1575		BSWAPL	SI	// reverse order of bytes
  1576		BSWAPL	DI
  1577		XORL	SI, DI	// find bit differences
  1578		JEQ	allsame
  1579		BSRL	DI, CX	// index of highest bit difference
  1580		SHRL	CX, SI	// move a's bit to bottom
  1581		ANDL	$1, SI	// mask bit
  1582		LEAL	-1(SI*2), BX // 1/0 => +1/-1
  1583		MOVL	BX, (AX)
  1584		RET
  1585	
  1586		// all the bytes in common are the same, so we just need
  1587		// to compare the lengths.
  1588	allsame:
  1589		XORL	BX, BX
  1590		XORL	CX, CX
  1591		TESTL	DX, DX
  1592		SETLT	BX	// 1 if alen > blen
  1593		SETEQ	CX	// 1 if alen == blen
  1594		LEAL	-1(CX)(BX*2), BX	// 1,0,-1 result
  1595		MOVL	BX, (AX)
  1596		RET
  1597	
  1598	TEXT runtime·fastrand(SB), NOSPLIT, $0-4
  1599		get_tls(CX)
  1600		MOVL	g(CX), AX
  1601		MOVL	g_m(AX), AX
  1602		MOVL	m_fastrand(AX), DX
  1603		ADDL	DX, DX
  1604		MOVL	DX, BX
  1605		XORL	$0x88888eef, DX
  1606		JPL	2(PC)
  1607		MOVL	BX, DX
  1608		MOVL	DX, m_fastrand(AX)
  1609		MOVL	DX, ret+0(FP)
  1610		RET
  1611	
  1612	TEXT runtime·return0(SB), NOSPLIT, $0
  1613		MOVL	$0, AX
  1614		RET
  1615	
  1616	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1617	// Must obey the gcc calling convention.
  1618	TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1619		get_tls(CX)
  1620		MOVL	g(CX), AX
  1621		MOVL	g_m(AX), AX
  1622		MOVL	m_curg(AX), AX
  1623		MOVL	(g_stack+stack_hi)(AX), AX
  1624		RET
  1625	
  1626	// The top-most function running on a goroutine
  1627	// returns to goexit+PCQuantum.
  1628	TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1629		BYTE	$0x90	// NOP
  1630		CALL	runtime·goexit1(SB)	// does not return
  1631		// traceback from goexit1 must hit code range of goexit
  1632		BYTE	$0x90	// NOP
  1633	
  1634	// Prefetching doesn't seem to help.
  1635	TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1636		RET
  1637	
  1638	TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1639		RET
  1640	
  1641	TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1642		RET
  1643	
  1644	TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1645		RET
  1646	
  1647	// Add a module's moduledata to the linked list of moduledata objects. This
  1648	// is called from .init_array by a function generated in the linker and so
  1649	// follows the platform ABI wrt register preservation -- it only touches AX,
  1650	// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1651	// instead the pointer to the moduledata is passed in AX.
  1652	TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1653	       MOVL    runtime·lastmoduledatap(SB), DX
  1654	       MOVL    AX, moduledata_next(DX)
  1655	       MOVL    AX, runtime·lastmoduledatap(SB)
  1656	       RET
  1657	
  1658	TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
  1659		MOVL	a+0(FP), AX
  1660		MOVL	AX, 0(SP)
  1661		MOVL	$0, 4(SP)
  1662		FMOVV	0(SP), F0
  1663		FMOVDP	F0, ret+4(FP)
  1664		RET
  1665	
  1666	TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
  1667		FMOVD	a+0(FP), F0
  1668		FSTCW	0(SP)
  1669		FLDCW	runtime·controlWord64trunc(SB)
  1670		FMOVVP	F0, 4(SP)
  1671		FLDCW	0(SP)
  1672		MOVL	4(SP), AX
  1673		MOVL	AX, ret+8(FP)
  1674		RET

View as plain text