...
Run Format

Text file src/runtime/asm_386.s

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    11		// copy arguments forward on an even stack
    12		MOVL	argc+0(FP), AX
    13		MOVL	argv+4(FP), BX
    14		SUBL	$128, SP		// plenty of scratch
    15		ANDL	$~15, SP
    16		MOVL	AX, 120(SP)		// save argc, argv away
    17		MOVL	BX, 124(SP)
    18	
    19		// set default stack bounds.
    20		// _cgo_init may update stackguard.
    21		MOVL	$runtime·g0(SB), BP
    22		LEAL	(-64*1024+104)(SP), BX
    23		MOVL	BX, g_stackguard0(BP)
    24		MOVL	BX, g_stackguard1(BP)
    25		MOVL	BX, (g_stack+stack_lo)(BP)
    26		MOVL	SP, (g_stack+stack_hi)(BP)
    27		
    28		// find out information about the processor we're on
    29	#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
    30		JMP 	has_cpuid
    31	#else
    32		// first see if CPUID instruction is supported.
    33		PUSHFL
    34		PUSHFL
    35		XORL	$(1<<21), 0(SP) // flip ID bit
    36		POPFL
    37		PUSHFL
    38		POPL	AX
    39		XORL	0(SP), AX
    40		POPFL	// restore EFLAGS
    41		TESTL	$(1<<21), AX
    42		JNE 	has_cpuid
    43	#endif
    44	
    45	bad_proc: // show that the program requires MMX.
    46		MOVL	$2, 0(SP)
    47		MOVL	$bad_proc_msg<>(SB), 4(SP)
    48		MOVL	$0x3d, 8(SP)
    49		CALL	runtime·write(SB)
    50		MOVL	$1, 0(SP)
    51		CALL	runtime·exit(SB)
    52		INT	$3
    53	
    54	has_cpuid:
    55		MOVL	$0, AX
    56		CPUID
    57		MOVL	AX, SI
    58		CMPL	AX, $0
    59		JE	nocpuinfo
    60	
    61		// Figure out how to serialize RDTSC.
    62		// On Intel processors LFENCE is enough. AMD requires MFENCE.
    63		// Don't know about the rest, so let's do MFENCE.
    64		CMPL	BX, $0x756E6547  // "Genu"
    65		JNE	notintel
    66		CMPL	DX, $0x49656E69  // "ineI"
    67		JNE	notintel
    68		CMPL	CX, $0x6C65746E  // "ntel"
    69		JNE	notintel
    70		MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
    71	notintel:
    72	
    73		// Load EAX=1 cpuid flags
    74		MOVL	$1, AX
    75		CPUID
    76		MOVL	CX, AX // Move to global variable clobbers CX when generating PIC
    77		MOVL	AX, runtime·cpuid_ecx(SB)
    78		MOVL	DX, runtime·cpuid_edx(SB)
    79	
    80		// Check for MMX support
    81		TESTL	$(1<<23), DX	// MMX
    82		JZ 	bad_proc
    83	
    84		// Load EAX=7/ECX=0 cpuid flags
    85		CMPL	SI, $7
    86		JLT	nocpuinfo
    87		MOVL	$7, AX
    88		MOVL	$0, CX
    89		CPUID
    90		MOVL	BX, runtime·cpuid_ebx7(SB)
    91	
    92	nocpuinfo:	
    93	
    94		// if there is an _cgo_init, call it to let it
    95		// initialize and to set up GS.  if not,
    96		// we set up GS ourselves.
    97		MOVL	_cgo_init(SB), AX
    98		TESTL	AX, AX
    99		JZ	needtls
   100		MOVL	$setg_gcc<>(SB), BX
   101		MOVL	BX, 4(SP)
   102		MOVL	BP, 0(SP)
   103		CALL	AX
   104	
   105		// update stackguard after _cgo_init
   106		MOVL	$runtime·g0(SB), CX
   107		MOVL	(g_stack+stack_lo)(CX), AX
   108		ADDL	$const__StackGuard, AX
   109		MOVL	AX, g_stackguard0(CX)
   110		MOVL	AX, g_stackguard1(CX)
   111	
   112	#ifndef GOOS_windows
   113		// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   114		JMP ok
   115	#endif
   116	needtls:
   117	#ifdef GOOS_plan9
   118		// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   119		JMP	ok
   120	#endif
   121	
   122		// set up %gs
   123		CALL	runtime·ldt0setup(SB)
   124	
   125		// store through it, to make sure it works
   126		get_tls(BX)
   127		MOVL	$0x123, g(BX)
   128		MOVL	runtime·m0+m_tls(SB), AX
   129		CMPL	AX, $0x123
   130		JEQ	ok
   131		MOVL	AX, 0	// abort
   132	ok:
   133		// set up m and g "registers"
   134		get_tls(BX)
   135		LEAL	runtime·g0(SB), DX
   136		MOVL	DX, g(BX)
   137		LEAL	runtime·m0(SB), AX
   138	
   139		// save m->g0 = g0
   140		MOVL	DX, m_g0(AX)
   141		// save g0->m = m0
   142		MOVL	AX, g_m(DX)
   143	
   144		CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   145	
   146		// convention is D is always cleared
   147		CLD
   148	
   149		CALL	runtime·check(SB)
   150	
   151		// saved argc, argv
   152		MOVL	120(SP), AX
   153		MOVL	AX, 0(SP)
   154		MOVL	124(SP), AX
   155		MOVL	AX, 4(SP)
   156		CALL	runtime·args(SB)
   157		CALL	runtime·osinit(SB)
   158		CALL	runtime·schedinit(SB)
   159	
   160		// create a new goroutine to start program
   161		PUSHL	$runtime·mainPC(SB)	// entry
   162		PUSHL	$0	// arg size
   163		CALL	runtime·newproc(SB)
   164		POPL	AX
   165		POPL	AX
   166	
   167		// start this M
   168		CALL	runtime·mstart(SB)
   169	
   170		INT $3
   171		RET
   172	
   173	DATA	bad_proc_msg<>+0x00(SB)/8, $"This pro"
   174	DATA	bad_proc_msg<>+0x08(SB)/8, $"gram can"
   175	DATA	bad_proc_msg<>+0x10(SB)/8, $" only be"
   176	DATA	bad_proc_msg<>+0x18(SB)/8, $" run on "
   177	DATA	bad_proc_msg<>+0x20(SB)/8, $"processo"
   178	DATA	bad_proc_msg<>+0x28(SB)/8, $"rs with "
   179	DATA	bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
   180	DATA	bad_proc_msg<>+0x38(SB)/4, $"ort."
   181	DATA	bad_proc_msg<>+0x3c(SB)/1, $0xa
   182	GLOBL	bad_proc_msg<>(SB), RODATA, $0x3d
   183	
   184	DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   185	GLOBL	runtime·mainPC(SB),RODATA,$4
   186	
   187	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   188		INT $3
   189		RET
   190	
   191	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   192		// Linux and MinGW start the FPU in extended double precision.
   193		// Other operating systems use double precision.
   194		// Change to double precision to match them,
   195		// and to match other hardware that only has double.
   196		PUSHL $0x27F
   197		FLDCW	0(SP)
   198		POPL AX
   199		RET
   200	
   201	/*
   202	 *  go-routine
   203	 */
   204	
   205	// void gosave(Gobuf*)
   206	// save state in Gobuf; setjmp
   207	TEXT runtime·gosave(SB), NOSPLIT, $0-4
   208		MOVL	buf+0(FP), AX		// gobuf
   209		LEAL	buf+0(FP), BX		// caller's SP
   210		MOVL	BX, gobuf_sp(AX)
   211		MOVL	0(SP), BX		// caller's PC
   212		MOVL	BX, gobuf_pc(AX)
   213		MOVL	$0, gobuf_ret(AX)
   214		MOVL	$0, gobuf_ctxt(AX)
   215		get_tls(CX)
   216		MOVL	g(CX), BX
   217		MOVL	BX, gobuf_g(AX)
   218		RET
   219	
   220	// void gogo(Gobuf*)
   221	// restore state from Gobuf; longjmp
   222	TEXT runtime·gogo(SB), NOSPLIT, $0-4
   223		MOVL	buf+0(FP), BX		// gobuf
   224		MOVL	gobuf_g(BX), DX
   225		MOVL	0(DX), CX		// make sure g != nil
   226		get_tls(CX)
   227		MOVL	DX, g(CX)
   228		MOVL	gobuf_sp(BX), SP	// restore SP
   229		MOVL	gobuf_ret(BX), AX
   230		MOVL	gobuf_ctxt(BX), DX
   231		MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   232		MOVL	$0, gobuf_ret(BX)
   233		MOVL	$0, gobuf_ctxt(BX)
   234		MOVL	gobuf_pc(BX), BX
   235		JMP	BX
   236	
   237	// func mcall(fn func(*g))
   238	// Switch to m->g0's stack, call fn(g).
   239	// Fn must never return. It should gogo(&g->sched)
   240	// to keep running g.
   241	TEXT runtime·mcall(SB), NOSPLIT, $0-4
   242		MOVL	fn+0(FP), DI
   243	
   244		get_tls(DX)
   245		MOVL	g(DX), AX	// save state in g->sched
   246		MOVL	0(SP), BX	// caller's PC
   247		MOVL	BX, (g_sched+gobuf_pc)(AX)
   248		LEAL	fn+0(FP), BX	// caller's SP
   249		MOVL	BX, (g_sched+gobuf_sp)(AX)
   250		MOVL	AX, (g_sched+gobuf_g)(AX)
   251	
   252		// switch to m->g0 & its stack, call fn
   253		MOVL	g(DX), BX
   254		MOVL	g_m(BX), BX
   255		MOVL	m_g0(BX), SI
   256		CMPL	SI, AX	// if g == m->g0 call badmcall
   257		JNE	3(PC)
   258		MOVL	$runtime·badmcall(SB), AX
   259		JMP	AX
   260		MOVL	SI, g(DX)	// g = m->g0
   261		MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   262		PUSHL	AX
   263		MOVL	DI, DX
   264		MOVL	0(DI), DI
   265		CALL	DI
   266		POPL	AX
   267		MOVL	$runtime·badmcall2(SB), AX
   268		JMP	AX
   269		RET
   270	
   271	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   272	// of the G stack. We need to distinguish the routine that
   273	// lives at the bottom of the G stack from the one that lives
   274	// at the top of the system stack because the one at the top of
   275	// the system stack terminates the stack walk (see topofstack()).
   276	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   277		RET
   278	
   279	// func systemstack(fn func())
   280	TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   281		MOVL	fn+0(FP), DI	// DI = fn
   282		get_tls(CX)
   283		MOVL	g(CX), AX	// AX = g
   284		MOVL	g_m(AX), BX	// BX = m
   285	
   286		MOVL	m_gsignal(BX), DX	// DX = gsignal
   287		CMPL	AX, DX
   288		JEQ	noswitch
   289	
   290		MOVL	m_g0(BX), DX	// DX = g0
   291		CMPL	AX, DX
   292		JEQ	noswitch
   293	
   294		MOVL	m_curg(BX), BP
   295		CMPL	AX, BP
   296		JEQ	switch
   297		
   298		// Bad: g is not gsignal, not g0, not curg. What is it?
   299		// Hide call from linker nosplit analysis.
   300		MOVL	$runtime·badsystemstack(SB), AX
   301		CALL	AX
   302	
   303	switch:
   304		// save our state in g->sched. Pretend to
   305		// be systemstack_switch if the G stack is scanned.
   306		MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   307		MOVL	SP, (g_sched+gobuf_sp)(AX)
   308		MOVL	AX, (g_sched+gobuf_g)(AX)
   309	
   310		// switch to g0
   311		get_tls(CX)
   312		MOVL	DX, g(CX)
   313		MOVL	(g_sched+gobuf_sp)(DX), BX
   314		// make it look like mstart called systemstack on g0, to stop traceback
   315		SUBL	$4, BX
   316		MOVL	$runtime·mstart(SB), DX
   317		MOVL	DX, 0(BX)
   318		MOVL	BX, SP
   319	
   320		// call target function
   321		MOVL	DI, DX
   322		MOVL	0(DI), DI
   323		CALL	DI
   324	
   325		// switch back to g
   326		get_tls(CX)
   327		MOVL	g(CX), AX
   328		MOVL	g_m(AX), BX
   329		MOVL	m_curg(BX), AX
   330		MOVL	AX, g(CX)
   331		MOVL	(g_sched+gobuf_sp)(AX), SP
   332		MOVL	$0, (g_sched+gobuf_sp)(AX)
   333		RET
   334	
   335	noswitch:
   336		// already on system stack, just call directly
   337		MOVL	DI, DX
   338		MOVL	0(DI), DI
   339		CALL	DI
   340		RET
   341	
   342	/*
   343	 * support for morestack
   344	 */
   345	
   346	// Called during function prolog when more stack is needed.
   347	//
   348	// The traceback routines see morestack on a g0 as being
   349	// the top of a stack (for example, morestack calling newstack
   350	// calling the scheduler calling newm calling gc), so we must
   351	// record an argument size. For that purpose, it has no arguments.
   352	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   353		// Cannot grow scheduler stack (m->g0).
   354		get_tls(CX)
   355		MOVL	g(CX), BX
   356		MOVL	g_m(BX), BX
   357		MOVL	m_g0(BX), SI
   358		CMPL	g(CX), SI
   359		JNE	2(PC)
   360		INT	$3
   361	
   362		// Cannot grow signal stack.
   363		MOVL	m_gsignal(BX), SI
   364		CMPL	g(CX), SI
   365		JNE	2(PC)
   366		INT	$3
   367	
   368		// Called from f.
   369		// Set m->morebuf to f's caller.
   370		MOVL	4(SP), DI	// f's caller's PC
   371		MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   372		LEAL	8(SP), CX	// f's caller's SP
   373		MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   374		get_tls(CX)
   375		MOVL	g(CX), SI
   376		MOVL	SI, (m_morebuf+gobuf_g)(BX)
   377	
   378		// Set g->sched to context in f.
   379		MOVL	0(SP), AX	// f's PC
   380		MOVL	AX, (g_sched+gobuf_pc)(SI)
   381		MOVL	SI, (g_sched+gobuf_g)(SI)
   382		LEAL	4(SP), AX	// f's SP
   383		MOVL	AX, (g_sched+gobuf_sp)(SI)
   384		MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   385	
   386		// Call newstack on m->g0's stack.
   387		MOVL	m_g0(BX), BP
   388		MOVL	BP, g(CX)
   389		MOVL	(g_sched+gobuf_sp)(BP), AX
   390		MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   391		MOVL	AX, SP
   392		CALL	runtime·newstack(SB)
   393		MOVL	$0, 0x1003	// crash if newstack returns
   394		RET
   395	
   396	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   397		MOVL	$0, DX
   398		JMP runtime·morestack(SB)
   399	
   400	TEXT runtime·stackBarrier(SB),NOSPLIT,$0
   401		// We came here via a RET to an overwritten return PC.
   402		// AX may be live. Other registers are available.
   403	
   404		// Get the original return PC, g.stkbar[g.stkbarPos].savedLRVal.
   405		get_tls(CX)
   406		MOVL	g(CX), CX
   407		MOVL	(g_stkbar+slice_array)(CX), DX
   408		MOVL	g_stkbarPos(CX), BX
   409		IMULL	$stkbar__size, BX	// Too big for SIB.
   410		MOVL	stkbar_savedLRVal(DX)(BX*1), BX
   411		// Record that this stack barrier was hit.
   412		ADDL	$1, g_stkbarPos(CX)
   413		// Jump to the original return PC.
   414		JMP	BX
   415	
   416	// reflectcall: call a function with the given argument list
   417	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   418	// we don't have variable-sized frames, so we use a small number
   419	// of constant-sized-frame functions to encode a few bits of size in the pc.
   420	// Caution: ugly multiline assembly macros in your future!
   421	
   422	#define DISPATCH(NAME,MAXSIZE)		\
   423		CMPL	CX, $MAXSIZE;		\
   424		JA	3(PC);			\
   425		MOVL	$NAME(SB), AX;		\
   426		JMP	AX
   427	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   428	
   429	TEXT reflect·call(SB), NOSPLIT, $0-0
   430		JMP	·reflectcall(SB)
   431	
   432	TEXT ·reflectcall(SB), NOSPLIT, $0-20
   433		MOVL	argsize+12(FP), CX
   434		DISPATCH(runtime·call16, 16)
   435		DISPATCH(runtime·call32, 32)
   436		DISPATCH(runtime·call64, 64)
   437		DISPATCH(runtime·call128, 128)
   438		DISPATCH(runtime·call256, 256)
   439		DISPATCH(runtime·call512, 512)
   440		DISPATCH(runtime·call1024, 1024)
   441		DISPATCH(runtime·call2048, 2048)
   442		DISPATCH(runtime·call4096, 4096)
   443		DISPATCH(runtime·call8192, 8192)
   444		DISPATCH(runtime·call16384, 16384)
   445		DISPATCH(runtime·call32768, 32768)
   446		DISPATCH(runtime·call65536, 65536)
   447		DISPATCH(runtime·call131072, 131072)
   448		DISPATCH(runtime·call262144, 262144)
   449		DISPATCH(runtime·call524288, 524288)
   450		DISPATCH(runtime·call1048576, 1048576)
   451		DISPATCH(runtime·call2097152, 2097152)
   452		DISPATCH(runtime·call4194304, 4194304)
   453		DISPATCH(runtime·call8388608, 8388608)
   454		DISPATCH(runtime·call16777216, 16777216)
   455		DISPATCH(runtime·call33554432, 33554432)
   456		DISPATCH(runtime·call67108864, 67108864)
   457		DISPATCH(runtime·call134217728, 134217728)
   458		DISPATCH(runtime·call268435456, 268435456)
   459		DISPATCH(runtime·call536870912, 536870912)
   460		DISPATCH(runtime·call1073741824, 1073741824)
   461		MOVL	$runtime·badreflectcall(SB), AX
   462		JMP	AX
   463	
   464	#define CALLFN(NAME,MAXSIZE)			\
   465	TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   466		NO_LOCAL_POINTERS;			\
   467		/* copy arguments to stack */		\
   468		MOVL	argptr+8(FP), SI;		\
   469		MOVL	argsize+12(FP), CX;		\
   470		MOVL	SP, DI;				\
   471		REP;MOVSB;				\
   472		/* call function */			\
   473		MOVL	f+4(FP), DX;			\
   474		MOVL	(DX), AX; 			\
   475		PCDATA  $PCDATA_StackMapIndex, $0;	\
   476		CALL	AX;				\
   477		/* copy return values back */		\
   478		MOVL	argptr+8(FP), DI;		\
   479		MOVL	argsize+12(FP), CX;		\
   480		MOVL	retoffset+16(FP), BX;		\
   481		MOVL	SP, SI;				\
   482		ADDL	BX, DI;				\
   483		ADDL	BX, SI;				\
   484		SUBL	BX, CX;				\
   485		REP;MOVSB;				\
   486		/* execute write barrier updates */	\
   487		MOVL	argtype+0(FP), DX;		\
   488		MOVL	argptr+8(FP), DI;		\
   489		MOVL	argsize+12(FP), CX;		\
   490		MOVL	retoffset+16(FP), BX;		\
   491		MOVL	DX, 0(SP);			\
   492		MOVL	DI, 4(SP);			\
   493		MOVL	CX, 8(SP);			\
   494		MOVL	BX, 12(SP);			\
   495		CALL	runtime·callwritebarrier(SB);	\
   496		RET
   497	
   498	CALLFN(·call16, 16)
   499	CALLFN(·call32, 32)
   500	CALLFN(·call64, 64)
   501	CALLFN(·call128, 128)
   502	CALLFN(·call256, 256)
   503	CALLFN(·call512, 512)
   504	CALLFN(·call1024, 1024)
   505	CALLFN(·call2048, 2048)
   506	CALLFN(·call4096, 4096)
   507	CALLFN(·call8192, 8192)
   508	CALLFN(·call16384, 16384)
   509	CALLFN(·call32768, 32768)
   510	CALLFN(·call65536, 65536)
   511	CALLFN(·call131072, 131072)
   512	CALLFN(·call262144, 262144)
   513	CALLFN(·call524288, 524288)
   514	CALLFN(·call1048576, 1048576)
   515	CALLFN(·call2097152, 2097152)
   516	CALLFN(·call4194304, 4194304)
   517	CALLFN(·call8388608, 8388608)
   518	CALLFN(·call16777216, 16777216)
   519	CALLFN(·call33554432, 33554432)
   520	CALLFN(·call67108864, 67108864)
   521	CALLFN(·call134217728, 134217728)
   522	CALLFN(·call268435456, 268435456)
   523	CALLFN(·call536870912, 536870912)
   524	CALLFN(·call1073741824, 1073741824)
   525	
   526	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   527		MOVL	cycles+0(FP), AX
   528	again:
   529		PAUSE
   530		SUBL	$1, AX
   531		JNZ	again
   532		RET
   533	
   534	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   535		// Stores are already ordered on x86, so this is just a
   536		// compile barrier.
   537		RET
   538	
   539	// void jmpdefer(fn, sp);
   540	// called from deferreturn.
   541	// 1. pop the caller
   542	// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
   543	//    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
   544	//    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
   545	//    LEAL to load the offset into BX, and finally 5 for the call & displacement)
   546	// 3. jmp to the argument
   547	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   548		MOVL	fv+0(FP), DX	// fn
   549		MOVL	argp+4(FP), BX	// caller sp
   550		LEAL	-4(BX), SP	// caller sp after CALL
   551	#ifdef GOBUILDMODE_shared
   552		SUBL	$16, (SP)	// return to CALL again
   553	#else
   554		SUBL	$5, (SP)	// return to CALL again
   555	#endif
   556		MOVL	0(DX), BX
   557		JMP	BX	// but first run the deferred function
   558	
   559	// Save state of caller into g->sched.
   560	TEXT gosave<>(SB),NOSPLIT,$0
   561		PUSHL	AX
   562		PUSHL	BX
   563		get_tls(BX)
   564		MOVL	g(BX), BX
   565		LEAL	arg+0(FP), AX
   566		MOVL	AX, (g_sched+gobuf_sp)(BX)
   567		MOVL	-4(AX), AX
   568		MOVL	AX, (g_sched+gobuf_pc)(BX)
   569		MOVL	$0, (g_sched+gobuf_ret)(BX)
   570		MOVL	$0, (g_sched+gobuf_ctxt)(BX)
   571		POPL	BX
   572		POPL	AX
   573		RET
   574	
   575	// func asmcgocall(fn, arg unsafe.Pointer) int32
   576	// Call fn(arg) on the scheduler stack,
   577	// aligned appropriately for the gcc ABI.
   578	// See cgocall.go for more details.
   579	TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   580		MOVL	fn+0(FP), AX
   581		MOVL	arg+4(FP), BX
   582	
   583		MOVL	SP, DX
   584	
   585		// Figure out if we need to switch to m->g0 stack.
   586		// We get called to create new OS threads too, and those
   587		// come in on the m->g0 stack already.
   588		get_tls(CX)
   589		MOVL	g(CX), BP
   590		MOVL	g_m(BP), BP
   591		MOVL	m_g0(BP), SI
   592		MOVL	g(CX), DI
   593		CMPL	SI, DI
   594		JEQ	noswitch
   595		CALL	gosave<>(SB)
   596		get_tls(CX)
   597		MOVL	SI, g(CX)
   598		MOVL	(g_sched+gobuf_sp)(SI), SP
   599	
   600	noswitch:
   601		// Now on a scheduling stack (a pthread-created stack).
   602		SUBL	$32, SP
   603		ANDL	$~15, SP	// alignment, perhaps unnecessary
   604		MOVL	DI, 8(SP)	// save g
   605		MOVL	(g_stack+stack_hi)(DI), DI
   606		SUBL	DX, DI
   607		MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   608		MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   609		CALL	AX
   610	
   611		// Restore registers, g, stack pointer.
   612		get_tls(CX)
   613		MOVL	8(SP), DI
   614		MOVL	(g_stack+stack_hi)(DI), SI
   615		SUBL	4(SP), SI
   616		MOVL	DI, g(CX)
   617		MOVL	SI, SP
   618	
   619		MOVL	AX, ret+8(FP)
   620		RET
   621	
   622	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   623	// Turn the fn into a Go func (by taking its address) and call
   624	// cgocallback_gofunc.
   625	TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
   626		LEAL	fn+0(FP), AX
   627		MOVL	AX, 0(SP)
   628		MOVL	frame+4(FP), AX
   629		MOVL	AX, 4(SP)
   630		MOVL	framesize+8(FP), AX
   631		MOVL	AX, 8(SP)
   632		MOVL	ctxt+12(FP), AX
   633		MOVL	AX, 12(SP)
   634		MOVL	$runtime·cgocallback_gofunc(SB), AX
   635		CALL	AX
   636		RET
   637	
   638	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   639	// See cgocall.go for more details.
   640	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
   641		NO_LOCAL_POINTERS
   642	
   643		// If g is nil, Go did not create the current thread.
   644		// Call needm to obtain one for temporary use.
   645		// In this case, we're running on the thread stack, so there's
   646		// lots of space, but the linker doesn't know. Hide the call from
   647		// the linker analysis by using an indirect call through AX.
   648		get_tls(CX)
   649	#ifdef GOOS_windows
   650		MOVL	$0, BP
   651		CMPL	CX, $0
   652		JEQ	2(PC) // TODO
   653	#endif
   654		MOVL	g(CX), BP
   655		CMPL	BP, $0
   656		JEQ	needm
   657		MOVL	g_m(BP), BP
   658		MOVL	BP, DX // saved copy of oldm
   659		JMP	havem
   660	needm:
   661		MOVL	$0, 0(SP)
   662		MOVL	$runtime·needm(SB), AX
   663		CALL	AX
   664		MOVL	0(SP), DX
   665		get_tls(CX)
   666		MOVL	g(CX), BP
   667		MOVL	g_m(BP), BP
   668	
   669		// Set m->sched.sp = SP, so that if a panic happens
   670		// during the function we are about to execute, it will
   671		// have a valid SP to run on the g0 stack.
   672		// The next few lines (after the havem label)
   673		// will save this SP onto the stack and then write
   674		// the same SP back to m->sched.sp. That seems redundant,
   675		// but if an unrecovered panic happens, unwindm will
   676		// restore the g->sched.sp from the stack location
   677		// and then systemstack will try to use it. If we don't set it here,
   678		// that restored SP will be uninitialized (typically 0) and
   679		// will not be usable.
   680		MOVL	m_g0(BP), SI
   681		MOVL	SP, (g_sched+gobuf_sp)(SI)
   682	
   683	havem:
   684		// Now there's a valid m, and we're running on its m->g0.
   685		// Save current m->g0->sched.sp on stack and then set it to SP.
   686		// Save current sp in m->g0->sched.sp in preparation for
   687		// switch back to m->curg stack.
   688		// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   689		MOVL	m_g0(BP), SI
   690		MOVL	(g_sched+gobuf_sp)(SI), AX
   691		MOVL	AX, 0(SP)
   692		MOVL	SP, (g_sched+gobuf_sp)(SI)
   693	
   694		// Switch to m->curg stack and call runtime.cgocallbackg.
   695		// Because we are taking over the execution of m->curg
   696		// but *not* resuming what had been running, we need to
   697		// save that information (m->curg->sched) so we can restore it.
   698		// We can restore m->curg->sched.sp easily, because calling
   699		// runtime.cgocallbackg leaves SP unchanged upon return.
   700		// To save m->curg->sched.pc, we push it onto the stack.
   701		// This has the added benefit that it looks to the traceback
   702		// routine like cgocallbackg is going to return to that
   703		// PC (because the frame we allocate below has the same
   704		// size as cgocallback_gofunc's frame declared above)
   705		// so that the traceback will seamlessly trace back into
   706		// the earlier calls.
   707		//
   708		// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
   709		// 8(SP) is unused.
   710		MOVL	m_curg(BP), SI
   711		MOVL	SI, g(CX)
   712		MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   713		MOVL	(g_sched+gobuf_pc)(SI), BP
   714		MOVL	BP, -4(DI)
   715		MOVL	ctxt+12(FP), CX
   716		LEAL	-(4+12)(DI), SP
   717		MOVL	DX, 4(SP)
   718		MOVL	CX, 0(SP)
   719		CALL	runtime·cgocallbackg(SB)
   720		MOVL	4(SP), DX
   721	
   722		// Restore g->sched (== m->curg->sched) from saved values.
   723		get_tls(CX)
   724		MOVL	g(CX), SI
   725		MOVL	12(SP), BP
   726		MOVL	BP, (g_sched+gobuf_pc)(SI)
   727		LEAL	(12+4)(SP), DI
   728		MOVL	DI, (g_sched+gobuf_sp)(SI)
   729	
   730		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   731		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   732		// so we do not have to restore it.)
   733		MOVL	g(CX), BP
   734		MOVL	g_m(BP), BP
   735		MOVL	m_g0(BP), SI
   736		MOVL	SI, g(CX)
   737		MOVL	(g_sched+gobuf_sp)(SI), SP
   738		MOVL	0(SP), AX
   739		MOVL	AX, (g_sched+gobuf_sp)(SI)
   740		
   741		// If the m on entry was nil, we called needm above to borrow an m
   742		// for the duration of the call. Since the call is over, return it with dropm.
   743		CMPL	DX, $0
   744		JNE 3(PC)
   745		MOVL	$runtime·dropm(SB), AX
   746		CALL	AX
   747	
   748		// Done!
   749		RET
   750	
   751	// void setg(G*); set g. for use by needm.
   752	TEXT runtime·setg(SB), NOSPLIT, $0-4
   753		MOVL	gg+0(FP), BX
   754	#ifdef GOOS_windows
   755		CMPL	BX, $0
   756		JNE	settls
   757		MOVL	$0, 0x14(FS)
   758		RET
   759	settls:
   760		MOVL	g_m(BX), AX
   761		LEAL	m_tls(AX), AX
   762		MOVL	AX, 0x14(FS)
   763	#endif
   764		get_tls(CX)
   765		MOVL	BX, g(CX)
   766		RET
   767	
   768	// void setg_gcc(G*); set g. for use by gcc
   769	TEXT setg_gcc<>(SB), NOSPLIT, $0
   770		get_tls(AX)
   771		MOVL	gg+0(FP), DX
   772		MOVL	DX, g(AX)
   773		RET
   774	
   775	// check that SP is in range [g->stack.lo, g->stack.hi)
   776	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   777		get_tls(CX)
   778		MOVL	g(CX), AX
   779		CMPL	(g_stack+stack_hi)(AX), SP
   780		JHI	2(PC)
   781		INT	$3
   782		CMPL	SP, (g_stack+stack_lo)(AX)
   783		JHI	2(PC)
   784		INT	$3
   785		RET
   786	
   787	TEXT runtime·getcallerpc(SB),NOSPLIT,$4-8
   788		MOVL	argp+0(FP),AX		// addr of first arg
   789		MOVL	-4(AX),AX		// get calling pc
   790		CMPL	AX, runtime·stackBarrierPC(SB)
   791		JNE	nobar
   792		// Get original return PC.
   793		CALL	runtime·nextBarrierPC(SB)
   794		MOVL	0(SP), AX
   795	nobar:
   796		MOVL	AX, ret+4(FP)
   797		RET
   798	
   799	TEXT runtime·setcallerpc(SB),NOSPLIT,$4-8
   800		MOVL	argp+0(FP),AX		// addr of first arg
   801		MOVL	pc+4(FP), BX
   802		MOVL	-4(AX), DX
   803		CMPL	DX, runtime·stackBarrierPC(SB)
   804		JEQ	setbar
   805		MOVL	BX, -4(AX)		// set calling pc
   806		RET
   807	setbar:
   808		// Set the stack barrier return PC.
   809		MOVL	BX, 0(SP)
   810		CALL	runtime·setNextBarrierPC(SB)
   811		RET
   812	
   813	TEXT runtime·getcallersp(SB), NOSPLIT, $0-8
   814		MOVL	argp+0(FP), AX
   815		MOVL	AX, ret+4(FP)
   816		RET
   817	
   818	// func cputicks() int64
   819	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   820		TESTL	$0x4000000, runtime·cpuid_edx(SB) // no sse2, no mfence
   821		JEQ	done
   822		CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   823		JNE	mfence
   824		BYTE	$0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
   825		JMP	done
   826	mfence:
   827		BYTE	$0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
   828	done:
   829		RDTSC
   830		MOVL	AX, ret_lo+0(FP)
   831		MOVL	DX, ret_hi+4(FP)
   832		RET
   833	
   834	TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   835		// set up ldt 7 to point at m0.tls
   836		// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   837		// the entry number is just a hint.  setldt will set up GS with what it used.
   838		MOVL	$7, 0(SP)
   839		LEAL	runtime·m0+m_tls(SB), AX
   840		MOVL	AX, 4(SP)
   841		MOVL	$32, 8(SP)	// sizeof(tls array)
   842		CALL	runtime·setldt(SB)
   843		RET
   844	
   845	TEXT runtime·emptyfunc(SB),0,$0-0
   846		RET
   847	
   848	TEXT runtime·abort(SB),NOSPLIT,$0-0
   849		INT $0x3
   850	
   851	// memhash_varlen(p unsafe.Pointer, h seed) uintptr
   852	// redirects to memhash(p, h, size) using the size
   853	// stored in the closure.
   854	TEXT runtime·memhash_varlen(SB),NOSPLIT,$16-12
   855		GO_ARGS
   856		NO_LOCAL_POINTERS
   857		MOVL	p+0(FP), AX
   858		MOVL	h+4(FP), BX
   859		MOVL	4(DX), CX
   860		MOVL	AX, 0(SP)
   861		MOVL	BX, 4(SP)
   862		MOVL	CX, 8(SP)
   863		CALL	runtime·memhash(SB)
   864		MOVL	12(SP), AX
   865		MOVL	AX, ret+8(FP)
   866		RET
   867	
   868	// hash function using AES hardware instructions
   869	TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   870		MOVL	p+0(FP), AX	// ptr to data
   871		MOVL	s+8(FP), BX	// size
   872		LEAL	ret+12(FP), DX
   873		JMP	runtime·aeshashbody(SB)
   874	
   875	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   876		MOVL	p+0(FP), AX	// ptr to string object
   877		MOVL	4(AX), BX	// length of string
   878		MOVL	(AX), AX	// string data
   879		LEAL	ret+8(FP), DX
   880		JMP	runtime·aeshashbody(SB)
   881	
   882	// AX: data
   883	// BX: length
   884	// DX: address to put return value
   885	TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   886		MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   887		PINSRW	$4, BX, X0	            // 16 bits of length
   888		PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   889		MOVO	X0, X1                      // save unscrambled seed
   890		PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   891		AESENC	X0, X0                      // scramble seed
   892	
   893		CMPL	BX, $16
   894		JB	aes0to15
   895		JE	aes16
   896		CMPL	BX, $32
   897		JBE	aes17to32
   898		CMPL	BX, $64
   899		JBE	aes33to64
   900		JMP	aes65plus
   901		
   902	aes0to15:
   903		TESTL	BX, BX
   904		JE	aes0
   905	
   906		ADDL	$16, AX
   907		TESTW	$0xff0, AX
   908		JE	endofpage
   909	
   910		// 16 bytes loaded at this address won't cross
   911		// a page boundary, so we can load it directly.
   912		MOVOU	-16(AX), X1
   913		ADDL	BX, BX
   914		PAND	masks<>(SB)(BX*8), X1
   915	
   916	final1:	
   917		AESENC	X0, X1  // scramble input, xor in seed
   918		AESENC	X1, X1  // scramble combo 2 times
   919		AESENC	X1, X1
   920		MOVL	X1, (DX)
   921		RET
   922	
   923	endofpage:
   924		// address ends in 1111xxxx. Might be up against
   925		// a page boundary, so load ending at last byte.
   926		// Then shift bytes down using pshufb.
   927		MOVOU	-32(AX)(BX*1), X1
   928		ADDL	BX, BX
   929		PSHUFB	shifts<>(SB)(BX*8), X1
   930		JMP	final1
   931	
   932	aes0:
   933		// Return scrambled input seed
   934		AESENC	X0, X0
   935		MOVL	X0, (DX)
   936		RET
   937	
   938	aes16:
   939		MOVOU	(AX), X1
   940		JMP	final1
   941	
   942	aes17to32:
   943		// make second starting seed
   944		PXOR	runtime·aeskeysched+16(SB), X1
   945		AESENC	X1, X1
   946		
   947		// load data to be hashed
   948		MOVOU	(AX), X2
   949		MOVOU	-16(AX)(BX*1), X3
   950	
   951		// scramble 3 times
   952		AESENC	X0, X2
   953		AESENC	X1, X3
   954		AESENC	X2, X2
   955		AESENC	X3, X3
   956		AESENC	X2, X2
   957		AESENC	X3, X3
   958	
   959		// combine results
   960		PXOR	X3, X2
   961		MOVL	X2, (DX)
   962		RET
   963	
   964	aes33to64:
   965		// make 3 more starting seeds
   966		MOVO	X1, X2
   967		MOVO	X1, X3
   968		PXOR	runtime·aeskeysched+16(SB), X1
   969		PXOR	runtime·aeskeysched+32(SB), X2
   970		PXOR	runtime·aeskeysched+48(SB), X3
   971		AESENC	X1, X1
   972		AESENC	X2, X2
   973		AESENC	X3, X3
   974		
   975		MOVOU	(AX), X4
   976		MOVOU	16(AX), X5
   977		MOVOU	-32(AX)(BX*1), X6
   978		MOVOU	-16(AX)(BX*1), X7
   979		
   980		AESENC	X0, X4
   981		AESENC	X1, X5
   982		AESENC	X2, X6
   983		AESENC	X3, X7
   984		
   985		AESENC	X4, X4
   986		AESENC	X5, X5
   987		AESENC	X6, X6
   988		AESENC	X7, X7
   989		
   990		AESENC	X4, X4
   991		AESENC	X5, X5
   992		AESENC	X6, X6
   993		AESENC	X7, X7
   994	
   995		PXOR	X6, X4
   996		PXOR	X7, X5
   997		PXOR	X5, X4
   998		MOVL	X4, (DX)
   999		RET
  1000	
  1001	aes65plus:
  1002		// make 3 more starting seeds
  1003		MOVO	X1, X2
  1004		MOVO	X1, X3
  1005		PXOR	runtime·aeskeysched+16(SB), X1
  1006		PXOR	runtime·aeskeysched+32(SB), X2
  1007		PXOR	runtime·aeskeysched+48(SB), X3
  1008		AESENC	X1, X1
  1009		AESENC	X2, X2
  1010		AESENC	X3, X3
  1011		
  1012		// start with last (possibly overlapping) block
  1013		MOVOU	-64(AX)(BX*1), X4
  1014		MOVOU	-48(AX)(BX*1), X5
  1015		MOVOU	-32(AX)(BX*1), X6
  1016		MOVOU	-16(AX)(BX*1), X7
  1017	
  1018		// scramble state once
  1019		AESENC	X0, X4
  1020		AESENC	X1, X5
  1021		AESENC	X2, X6
  1022		AESENC	X3, X7
  1023	
  1024		// compute number of remaining 64-byte blocks
  1025		DECL	BX
  1026		SHRL	$6, BX
  1027		
  1028	aesloop:
  1029		// scramble state, xor in a block
  1030		MOVOU	(AX), X0
  1031		MOVOU	16(AX), X1
  1032		MOVOU	32(AX), X2
  1033		MOVOU	48(AX), X3
  1034		AESENC	X0, X4
  1035		AESENC	X1, X5
  1036		AESENC	X2, X6
  1037		AESENC	X3, X7
  1038	
  1039		// scramble state
  1040		AESENC	X4, X4
  1041		AESENC	X5, X5
  1042		AESENC	X6, X6
  1043		AESENC	X7, X7
  1044	
  1045		ADDL	$64, AX
  1046		DECL	BX
  1047		JNE	aesloop
  1048	
  1049		// 2 more scrambles to finish
  1050		AESENC	X4, X4
  1051		AESENC	X5, X5
  1052		AESENC	X6, X6
  1053		AESENC	X7, X7
  1054		
  1055		AESENC	X4, X4
  1056		AESENC	X5, X5
  1057		AESENC	X6, X6
  1058		AESENC	X7, X7
  1059	
  1060		PXOR	X6, X4
  1061		PXOR	X7, X5
  1062		PXOR	X5, X4
  1063		MOVL	X4, (DX)
  1064		RET
  1065	
  1066	TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1067		MOVL	p+0(FP), AX	// ptr to data
  1068		MOVL	h+4(FP), X0	// seed
  1069		PINSRD	$1, (AX), X0	// data
  1070		AESENC	runtime·aeskeysched+0(SB), X0
  1071		AESENC	runtime·aeskeysched+16(SB), X0
  1072		AESENC	runtime·aeskeysched+32(SB), X0
  1073		MOVL	X0, ret+8(FP)
  1074		RET
  1075	
  1076	TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1077		MOVL	p+0(FP), AX	// ptr to data
  1078		MOVQ	(AX), X0	// data
  1079		PINSRD	$2, h+4(FP), X0	// seed
  1080		AESENC	runtime·aeskeysched+0(SB), X0
  1081		AESENC	runtime·aeskeysched+16(SB), X0
  1082		AESENC	runtime·aeskeysched+32(SB), X0
  1083		MOVL	X0, ret+8(FP)
  1084		RET
  1085	
  1086	// simple mask to get rid of data in the high part of the register.
  1087	DATA masks<>+0x00(SB)/4, $0x00000000
  1088	DATA masks<>+0x04(SB)/4, $0x00000000
  1089	DATA masks<>+0x08(SB)/4, $0x00000000
  1090	DATA masks<>+0x0c(SB)/4, $0x00000000
  1091		
  1092	DATA masks<>+0x10(SB)/4, $0x000000ff
  1093	DATA masks<>+0x14(SB)/4, $0x00000000
  1094	DATA masks<>+0x18(SB)/4, $0x00000000
  1095	DATA masks<>+0x1c(SB)/4, $0x00000000
  1096		
  1097	DATA masks<>+0x20(SB)/4, $0x0000ffff
  1098	DATA masks<>+0x24(SB)/4, $0x00000000
  1099	DATA masks<>+0x28(SB)/4, $0x00000000
  1100	DATA masks<>+0x2c(SB)/4, $0x00000000
  1101		
  1102	DATA masks<>+0x30(SB)/4, $0x00ffffff
  1103	DATA masks<>+0x34(SB)/4, $0x00000000
  1104	DATA masks<>+0x38(SB)/4, $0x00000000
  1105	DATA masks<>+0x3c(SB)/4, $0x00000000
  1106		
  1107	DATA masks<>+0x40(SB)/4, $0xffffffff
  1108	DATA masks<>+0x44(SB)/4, $0x00000000
  1109	DATA masks<>+0x48(SB)/4, $0x00000000
  1110	DATA masks<>+0x4c(SB)/4, $0x00000000
  1111		
  1112	DATA masks<>+0x50(SB)/4, $0xffffffff
  1113	DATA masks<>+0x54(SB)/4, $0x000000ff
  1114	DATA masks<>+0x58(SB)/4, $0x00000000
  1115	DATA masks<>+0x5c(SB)/4, $0x00000000
  1116		
  1117	DATA masks<>+0x60(SB)/4, $0xffffffff
  1118	DATA masks<>+0x64(SB)/4, $0x0000ffff
  1119	DATA masks<>+0x68(SB)/4, $0x00000000
  1120	DATA masks<>+0x6c(SB)/4, $0x00000000
  1121		
  1122	DATA masks<>+0x70(SB)/4, $0xffffffff
  1123	DATA masks<>+0x74(SB)/4, $0x00ffffff
  1124	DATA masks<>+0x78(SB)/4, $0x00000000
  1125	DATA masks<>+0x7c(SB)/4, $0x00000000
  1126		
  1127	DATA masks<>+0x80(SB)/4, $0xffffffff
  1128	DATA masks<>+0x84(SB)/4, $0xffffffff
  1129	DATA masks<>+0x88(SB)/4, $0x00000000
  1130	DATA masks<>+0x8c(SB)/4, $0x00000000
  1131		
  1132	DATA masks<>+0x90(SB)/4, $0xffffffff
  1133	DATA masks<>+0x94(SB)/4, $0xffffffff
  1134	DATA masks<>+0x98(SB)/4, $0x000000ff
  1135	DATA masks<>+0x9c(SB)/4, $0x00000000
  1136		
  1137	DATA masks<>+0xa0(SB)/4, $0xffffffff
  1138	DATA masks<>+0xa4(SB)/4, $0xffffffff
  1139	DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1140	DATA masks<>+0xac(SB)/4, $0x00000000
  1141		
  1142	DATA masks<>+0xb0(SB)/4, $0xffffffff
  1143	DATA masks<>+0xb4(SB)/4, $0xffffffff
  1144	DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1145	DATA masks<>+0xbc(SB)/4, $0x00000000
  1146		
  1147	DATA masks<>+0xc0(SB)/4, $0xffffffff
  1148	DATA masks<>+0xc4(SB)/4, $0xffffffff
  1149	DATA masks<>+0xc8(SB)/4, $0xffffffff
  1150	DATA masks<>+0xcc(SB)/4, $0x00000000
  1151		
  1152	DATA masks<>+0xd0(SB)/4, $0xffffffff
  1153	DATA masks<>+0xd4(SB)/4, $0xffffffff
  1154	DATA masks<>+0xd8(SB)/4, $0xffffffff
  1155	DATA masks<>+0xdc(SB)/4, $0x000000ff
  1156		
  1157	DATA masks<>+0xe0(SB)/4, $0xffffffff
  1158	DATA masks<>+0xe4(SB)/4, $0xffffffff
  1159	DATA masks<>+0xe8(SB)/4, $0xffffffff
  1160	DATA masks<>+0xec(SB)/4, $0x0000ffff
  1161		
  1162	DATA masks<>+0xf0(SB)/4, $0xffffffff
  1163	DATA masks<>+0xf4(SB)/4, $0xffffffff
  1164	DATA masks<>+0xf8(SB)/4, $0xffffffff
  1165	DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1166	
  1167	GLOBL masks<>(SB),RODATA,$256
  1168	
  1169	// these are arguments to pshufb. They move data down from
  1170	// the high bytes of the register to the low bytes of the register.
  1171	// index is how many bytes to move.
  1172	DATA shifts<>+0x00(SB)/4, $0x00000000
  1173	DATA shifts<>+0x04(SB)/4, $0x00000000
  1174	DATA shifts<>+0x08(SB)/4, $0x00000000
  1175	DATA shifts<>+0x0c(SB)/4, $0x00000000
  1176		
  1177	DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1178	DATA shifts<>+0x14(SB)/4, $0xffffffff
  1179	DATA shifts<>+0x18(SB)/4, $0xffffffff
  1180	DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1181		
  1182	DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1183	DATA shifts<>+0x24(SB)/4, $0xffffffff
  1184	DATA shifts<>+0x28(SB)/4, $0xffffffff
  1185	DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1186		
  1187	DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1188	DATA shifts<>+0x34(SB)/4, $0xffffffff
  1189	DATA shifts<>+0x38(SB)/4, $0xffffffff
  1190	DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1191		
  1192	DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1193	DATA shifts<>+0x44(SB)/4, $0xffffffff
  1194	DATA shifts<>+0x48(SB)/4, $0xffffffff
  1195	DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1196		
  1197	DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1198	DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1199	DATA shifts<>+0x58(SB)/4, $0xffffffff
  1200	DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1201		
  1202	DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1203	DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1204	DATA shifts<>+0x68(SB)/4, $0xffffffff
  1205	DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1206		
  1207	DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1208	DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1209	DATA shifts<>+0x78(SB)/4, $0xffffffff
  1210	DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1211		
  1212	DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1213	DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1214	DATA shifts<>+0x88(SB)/4, $0xffffffff
  1215	DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1216		
  1217	DATA shifts<>+0x90(SB)/4, $0x0a090807
  1218	DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1219	DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1220	DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1221		
  1222	DATA shifts<>+0xa0(SB)/4, $0x09080706
  1223	DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1224	DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1225	DATA shifts<>+0xac(SB)/4, $0xffffffff
  1226		
  1227	DATA shifts<>+0xb0(SB)/4, $0x08070605
  1228	DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1229	DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1230	DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1231		
  1232	DATA shifts<>+0xc0(SB)/4, $0x07060504
  1233	DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1234	DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1235	DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1236		
  1237	DATA shifts<>+0xd0(SB)/4, $0x06050403
  1238	DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1239	DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1240	DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1241		
  1242	DATA shifts<>+0xe0(SB)/4, $0x05040302
  1243	DATA shifts<>+0xe4(SB)/4, $0x09080706
  1244	DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1245	DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1246		
  1247	DATA shifts<>+0xf0(SB)/4, $0x04030201
  1248	DATA shifts<>+0xf4(SB)/4, $0x08070605
  1249	DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1250	DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1251	
  1252	GLOBL shifts<>(SB),RODATA,$256
  1253	
  1254	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1255		// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1256		MOVL	$masks<>(SB), AX
  1257		MOVL	$shifts<>(SB), BX
  1258		ORL	BX, AX
  1259		TESTL	$15, AX
  1260		SETEQ	ret+0(FP)
  1261		RET
  1262	
  1263	// memequal(p, q unsafe.Pointer, size uintptr) bool
  1264	TEXT runtime·memequal(SB),NOSPLIT,$0-13
  1265		MOVL	a+0(FP), SI
  1266		MOVL	b+4(FP), DI
  1267		CMPL	SI, DI
  1268		JEQ	eq
  1269		MOVL	size+8(FP), BX
  1270		LEAL	ret+12(FP), AX
  1271		JMP	runtime·memeqbody(SB)
  1272	eq:
  1273		MOVB    $1, ret+12(FP)
  1274		RET
  1275	
  1276	// memequal_varlen(a, b unsafe.Pointer) bool
  1277	TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
  1278		MOVL    a+0(FP), SI
  1279		MOVL    b+4(FP), DI
  1280		CMPL    SI, DI
  1281		JEQ     eq
  1282		MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
  1283		LEAL	ret+8(FP), AX
  1284		JMP	runtime·memeqbody(SB)
  1285	eq:
  1286		MOVB    $1, ret+8(FP)
  1287		RET
  1288	
  1289	// eqstring tests whether two strings are equal.
  1290	// The compiler guarantees that strings passed
  1291	// to eqstring have equal length.
  1292	// See runtime_test.go:eqstring_generic for
  1293	// equivalent Go code.
  1294	TEXT runtime·eqstring(SB),NOSPLIT,$0-17
  1295		MOVL	s1str+0(FP), SI
  1296		MOVL	s2str+8(FP), DI
  1297		CMPL	SI, DI
  1298		JEQ	same
  1299		MOVL	s1len+4(FP), BX
  1300		LEAL	v+16(FP), AX
  1301		JMP	runtime·memeqbody(SB)
  1302	same:
  1303		MOVB	$1, v+16(FP)
  1304		RET
  1305	
  1306	TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1307		MOVL	a_len+4(FP), BX
  1308		MOVL	b_len+16(FP), CX
  1309		CMPL	BX, CX
  1310		JNE	eqret
  1311		MOVL	a+0(FP), SI
  1312		MOVL	b+12(FP), DI
  1313		LEAL	ret+24(FP), AX
  1314		JMP	runtime·memeqbody(SB)
  1315	eqret:
  1316		MOVB	$0, ret+24(FP)
  1317		RET
  1318	
  1319	// a in SI
  1320	// b in DI
  1321	// count in BX
  1322	// address of result byte in AX
  1323	TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1324		CMPL	BX, $4
  1325		JB	small
  1326	
  1327		// 64 bytes at a time using xmm registers
  1328	hugeloop:
  1329		CMPL	BX, $64
  1330		JB	bigloop
  1331		TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1332		JE	bigloop
  1333		MOVOU	(SI), X0
  1334		MOVOU	(DI), X1
  1335		MOVOU	16(SI), X2
  1336		MOVOU	16(DI), X3
  1337		MOVOU	32(SI), X4
  1338		MOVOU	32(DI), X5
  1339		MOVOU	48(SI), X6
  1340		MOVOU	48(DI), X7
  1341		PCMPEQB	X1, X0
  1342		PCMPEQB	X3, X2
  1343		PCMPEQB	X5, X4
  1344		PCMPEQB	X7, X6
  1345		PAND	X2, X0
  1346		PAND	X6, X4
  1347		PAND	X4, X0
  1348		PMOVMSKB X0, DX
  1349		ADDL	$64, SI
  1350		ADDL	$64, DI
  1351		SUBL	$64, BX
  1352		CMPL	DX, $0xffff
  1353		JEQ	hugeloop
  1354		MOVB	$0, (AX)
  1355		RET
  1356	
  1357		// 4 bytes at a time using 32-bit register
  1358	bigloop:
  1359		CMPL	BX, $4
  1360		JBE	leftover
  1361		MOVL	(SI), CX
  1362		MOVL	(DI), DX
  1363		ADDL	$4, SI
  1364		ADDL	$4, DI
  1365		SUBL	$4, BX
  1366		CMPL	CX, DX
  1367		JEQ	bigloop
  1368		MOVB	$0, (AX)
  1369		RET
  1370	
  1371		// remaining 0-4 bytes
  1372	leftover:
  1373		MOVL	-4(SI)(BX*1), CX
  1374		MOVL	-4(DI)(BX*1), DX
  1375		CMPL	CX, DX
  1376		SETEQ	(AX)
  1377		RET
  1378	
  1379	small:
  1380		CMPL	BX, $0
  1381		JEQ	equal
  1382	
  1383		LEAL	0(BX*8), CX
  1384		NEGL	CX
  1385	
  1386		MOVL	SI, DX
  1387		CMPB	DX, $0xfc
  1388		JA	si_high
  1389	
  1390		// load at SI won't cross a page boundary.
  1391		MOVL	(SI), SI
  1392		JMP	si_finish
  1393	si_high:
  1394		// address ends in 111111xx. Load up to bytes we want, move to correct position.
  1395		MOVL	-4(SI)(BX*1), SI
  1396		SHRL	CX, SI
  1397	si_finish:
  1398	
  1399		// same for DI.
  1400		MOVL	DI, DX
  1401		CMPB	DX, $0xfc
  1402		JA	di_high
  1403		MOVL	(DI), DI
  1404		JMP	di_finish
  1405	di_high:
  1406		MOVL	-4(DI)(BX*1), DI
  1407		SHRL	CX, DI
  1408	di_finish:
  1409	
  1410		SUBL	SI, DI
  1411		SHLL	CX, DI
  1412	equal:
  1413		SETEQ	(AX)
  1414		RET
  1415	
  1416	TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
  1417		MOVL	s1_base+0(FP), SI
  1418		MOVL	s1_len+4(FP), BX
  1419		MOVL	s2_base+8(FP), DI
  1420		MOVL	s2_len+12(FP), DX
  1421		LEAL	ret+16(FP), AX
  1422		JMP	runtime·cmpbody(SB)
  1423	
  1424	TEXT bytes·Compare(SB),NOSPLIT,$0-28
  1425		MOVL	s1+0(FP), SI
  1426		MOVL	s1+4(FP), BX
  1427		MOVL	s2+12(FP), DI
  1428		MOVL	s2+16(FP), DX
  1429		LEAL	ret+24(FP), AX
  1430		JMP	runtime·cmpbody(SB)
  1431	
  1432	TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
  1433		MOVL	s+0(FP), SI
  1434		MOVL	s_len+4(FP), CX
  1435		MOVB	c+12(FP), AL
  1436		MOVL	SI, DI
  1437		CLD; REPN; SCASB
  1438		JZ 3(PC)
  1439		MOVL	$-1, ret+16(FP)
  1440		RET
  1441		SUBL	SI, DI
  1442		SUBL	$1, DI
  1443		MOVL	DI, ret+16(FP)
  1444		RET
  1445	
  1446	TEXT strings·IndexByte(SB),NOSPLIT,$0-16
  1447		MOVL	s+0(FP), SI
  1448		MOVL	s_len+4(FP), CX
  1449		MOVB	c+8(FP), AL
  1450		MOVL	SI, DI
  1451		CLD; REPN; SCASB
  1452		JZ 3(PC)
  1453		MOVL	$-1, ret+12(FP)
  1454		RET
  1455		SUBL	SI, DI
  1456		SUBL	$1, DI
  1457		MOVL	DI, ret+12(FP)
  1458		RET
  1459	
  1460	// input:
  1461	//   SI = a
  1462	//   DI = b
  1463	//   BX = alen
  1464	//   DX = blen
  1465	//   AX = address of return word (set to 1/0/-1)
  1466	TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1467		MOVL	DX, BP
  1468		SUBL	BX, DX // DX = blen-alen
  1469		JLE	2(PC)
  1470		MOVL	BX, BP // BP = min(alen, blen)
  1471		CMPL	SI, DI
  1472		JEQ	allsame
  1473		CMPL	BP, $4
  1474		JB	small
  1475		TESTL	$0x4000000, runtime·cpuid_edx(SB) // check for sse2
  1476		JE	mediumloop
  1477	largeloop:
  1478		CMPL	BP, $16
  1479		JB	mediumloop
  1480		MOVOU	(SI), X0
  1481		MOVOU	(DI), X1
  1482		PCMPEQB X0, X1
  1483		PMOVMSKB X1, BX
  1484		XORL	$0xffff, BX	// convert EQ to NE
  1485		JNE	diff16	// branch if at least one byte is not equal
  1486		ADDL	$16, SI
  1487		ADDL	$16, DI
  1488		SUBL	$16, BP
  1489		JMP	largeloop
  1490	
  1491	diff16:
  1492		BSFL	BX, BX	// index of first byte that differs
  1493		XORL	DX, DX
  1494		MOVB	(SI)(BX*1), CX
  1495		CMPB	CX, (DI)(BX*1)
  1496		SETHI	DX
  1497		LEAL	-1(DX*2), DX	// convert 1/0 to +1/-1
  1498		MOVL	DX, (AX)
  1499		RET
  1500	
  1501	mediumloop:
  1502		CMPL	BP, $4
  1503		JBE	_0through4
  1504		MOVL	(SI), BX
  1505		MOVL	(DI), CX
  1506		CMPL	BX, CX
  1507		JNE	diff4
  1508		ADDL	$4, SI
  1509		ADDL	$4, DI
  1510		SUBL	$4, BP
  1511		JMP	mediumloop
  1512	
  1513	_0through4:
  1514		MOVL	-4(SI)(BP*1), BX
  1515		MOVL	-4(DI)(BP*1), CX
  1516		CMPL	BX, CX
  1517		JEQ	allsame
  1518	
  1519	diff4:
  1520		BSWAPL	BX	// reverse order of bytes
  1521		BSWAPL	CX
  1522		XORL	BX, CX	// find bit differences
  1523		BSRL	CX, CX	// index of highest bit difference
  1524		SHRL	CX, BX	// move a's bit to bottom
  1525		ANDL	$1, BX	// mask bit
  1526		LEAL	-1(BX*2), BX // 1/0 => +1/-1
  1527		MOVL	BX, (AX)
  1528		RET
  1529	
  1530		// 0-3 bytes in common
  1531	small:
  1532		LEAL	(BP*8), CX
  1533		NEGL	CX
  1534		JEQ	allsame
  1535	
  1536		// load si
  1537		CMPB	SI, $0xfc
  1538		JA	si_high
  1539		MOVL	(SI), SI
  1540		JMP	si_finish
  1541	si_high:
  1542		MOVL	-4(SI)(BP*1), SI
  1543		SHRL	CX, SI
  1544	si_finish:
  1545		SHLL	CX, SI
  1546	
  1547		// same for di
  1548		CMPB	DI, $0xfc
  1549		JA	di_high
  1550		MOVL	(DI), DI
  1551		JMP	di_finish
  1552	di_high:
  1553		MOVL	-4(DI)(BP*1), DI
  1554		SHRL	CX, DI
  1555	di_finish:
  1556		SHLL	CX, DI
  1557	
  1558		BSWAPL	SI	// reverse order of bytes
  1559		BSWAPL	DI
  1560		XORL	SI, DI	// find bit differences
  1561		JEQ	allsame
  1562		BSRL	DI, CX	// index of highest bit difference
  1563		SHRL	CX, SI	// move a's bit to bottom
  1564		ANDL	$1, SI	// mask bit
  1565		LEAL	-1(SI*2), BX // 1/0 => +1/-1
  1566		MOVL	BX, (AX)
  1567		RET
  1568	
  1569		// all the bytes in common are the same, so we just need
  1570		// to compare the lengths.
  1571	allsame:
  1572		XORL	BX, BX
  1573		XORL	CX, CX
  1574		TESTL	DX, DX
  1575		SETLT	BX	// 1 if alen > blen
  1576		SETEQ	CX	// 1 if alen == blen
  1577		LEAL	-1(CX)(BX*2), BX	// 1,0,-1 result
  1578		MOVL	BX, (AX)
  1579		RET
  1580	
  1581	TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
  1582		get_tls(CX)
  1583		MOVL	g(CX), AX
  1584		MOVL	g_m(AX), AX
  1585		MOVL	m_fastrand(AX), DX
  1586		ADDL	DX, DX
  1587		MOVL	DX, BX
  1588		XORL	$0x88888eef, DX
  1589		JPL	2(PC)
  1590		MOVL	BX, DX
  1591		MOVL	DX, m_fastrand(AX)
  1592		MOVL	DX, ret+0(FP)
  1593		RET
  1594	
  1595	TEXT runtime·return0(SB), NOSPLIT, $0
  1596		MOVL	$0, AX
  1597		RET
  1598	
  1599	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1600	// Must obey the gcc calling convention.
  1601	TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1602		get_tls(CX)
  1603		MOVL	g(CX), AX
  1604		MOVL	g_m(AX), AX
  1605		MOVL	m_curg(AX), AX
  1606		MOVL	(g_stack+stack_hi)(AX), AX
  1607		RET
  1608	
  1609	// The top-most function running on a goroutine
  1610	// returns to goexit+PCQuantum.
  1611	TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1612		BYTE	$0x90	// NOP
  1613		CALL	runtime·goexit1(SB)	// does not return
  1614		// traceback from goexit1 must hit code range of goexit
  1615		BYTE	$0x90	// NOP
  1616	
  1617	// Prefetching doesn't seem to help.
  1618	TEXT runtime·prefetcht0(SB),NOSPLIT,$0-4
  1619		RET
  1620	
  1621	TEXT runtime·prefetcht1(SB),NOSPLIT,$0-4
  1622		RET
  1623	
  1624	TEXT runtime·prefetcht2(SB),NOSPLIT,$0-4
  1625		RET
  1626	
  1627	TEXT runtime·prefetchnta(SB),NOSPLIT,$0-4
  1628		RET
  1629	
  1630	// Add a module's moduledata to the linked list of moduledata objects. This
  1631	// is called from .init_array by a function generated in the linker and so
  1632	// follows the platform ABI wrt register preservation -- it only touches AX,
  1633	// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1634	// instead the pointer to the moduledata is passed in AX.
  1635	TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1636	       MOVL    runtime·lastmoduledatap(SB), DX
  1637	       MOVL    AX, moduledata_next(DX)
  1638	       MOVL    AX, runtime·lastmoduledatap(SB)
  1639	       RET

View as plain text