...
Run Format

Text file src/runtime/asm_386.s

Documentation: runtime

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	// _rt0_386 is common startup code for most 386 systems when using
    11	// internal linking. This is the entry point for the program from the
    12	// kernel for an ordinary -buildmode=exe program. The stack holds the
    13	// number of arguments and the C-style argv.
    14	TEXT _rt0_386(SB),NOSPLIT,$8
    15		MOVL	8(SP), AX	// argc
    16		LEAL	12(SP), BX	// argv
    17		MOVL	AX, 0(SP)
    18		MOVL	BX, 4(SP)
    19		JMP	runtime·rt0_go(SB)
    20	
    21	// _rt0_386_lib is common startup code for most 386 systems when
    22	// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    23	// arrange to invoke this function as a global constructor (for
    24	// c-archive) or when the shared library is loaded (for c-shared).
    25	// We expect argc and argv to be passed on the stack following the
    26	// usual C ABI.
    27	TEXT _rt0_386_lib(SB),NOSPLIT,$0
    28		PUSHL	BP
    29		MOVL	SP, BP
    30		PUSHL	BX
    31		PUSHL	SI
    32		PUSHL	DI
    33	
    34		MOVL	8(BP), AX
    35		MOVL	AX, _rt0_386_lib_argc<>(SB)
    36		MOVL	12(BP), AX
    37		MOVL	AX, _rt0_386_lib_argv<>(SB)
    38	
    39		// Synchronous initialization.
    40		CALL	runtime·libpreinit(SB)
    41	
    42		SUBL	$8, SP
    43	
    44		// Create a new thread to do the runtime initialization.
    45		MOVL	_cgo_sys_thread_create(SB), AX
    46		TESTL	AX, AX
    47		JZ	nocgo
    48	
    49		// Align stack to call C function.
    50		// We moved SP to BP above, but BP was clobbered by the libpreinit call.
    51		MOVL	SP, BP
    52		ANDL	$~15, SP
    53	
    54		MOVL	$_rt0_386_lib_go(SB), BX
    55		MOVL	BX, 0(SP)
    56		MOVL	$0, 4(SP)
    57	
    58		CALL	AX
    59	
    60		MOVL	BP, SP
    61	
    62		JMP	restore
    63	
    64	nocgo:
    65		MOVL	$0x800000, 0(SP)                    // stacksize = 8192KB
    66		MOVL	$_rt0_386_lib_go(SB), AX
    67		MOVL	AX, 4(SP)                           // fn
    68		CALL	runtime·newosproc0(SB)
    69	
    70	restore:
    71		ADDL	$8, SP
    72		POPL	DI
    73		POPL	SI
    74		POPL	BX
    75		POPL	BP
    76		RET
    77	
    78	// _rt0_386_lib_go initializes the Go runtime.
    79	// This is started in a separate thread by _rt0_386_lib.
    80	TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
    81		MOVL	_rt0_386_lib_argc<>(SB), AX
    82		MOVL	AX, 0(SP)
    83		MOVL	_rt0_386_lib_argv<>(SB), AX
    84		MOVL	AX, 4(SP)
    85		JMP	runtime·rt0_go(SB)
    86	
    87	DATA _rt0_386_lib_argc<>(SB)/4, $0
    88	GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
    89	DATA _rt0_386_lib_argv<>(SB)/4, $0
    90	GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
    91	
    92	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    93		// Copy arguments forward on an even stack.
    94		// Users of this function jump to it, they don't call it.
    95		MOVL	0(SP), AX
    96		MOVL	4(SP), BX
    97		SUBL	$128, SP		// plenty of scratch
    98		ANDL	$~15, SP
    99		MOVL	AX, 120(SP)		// save argc, argv away
   100		MOVL	BX, 124(SP)
   101	
   102		// set default stack bounds.
   103		// _cgo_init may update stackguard.
   104		MOVL	$runtime·g0(SB), BP
   105		LEAL	(-64*1024+104)(SP), BX
   106		MOVL	BX, g_stackguard0(BP)
   107		MOVL	BX, g_stackguard1(BP)
   108		MOVL	BX, (g_stack+stack_lo)(BP)
   109		MOVL	SP, (g_stack+stack_hi)(BP)
   110		
   111		// find out information about the processor we're on
   112	#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
   113		JMP 	has_cpuid
   114	#else
   115		// first see if CPUID instruction is supported.
   116		PUSHFL
   117		PUSHFL
   118		XORL	$(1<<21), 0(SP) // flip ID bit
   119		POPFL
   120		PUSHFL
   121		POPL	AX
   122		XORL	0(SP), AX
   123		POPFL	// restore EFLAGS
   124		TESTL	$(1<<21), AX
   125		JNE 	has_cpuid
   126	#endif
   127	
   128	bad_proc: // show that the program requires MMX.
   129		MOVL	$2, 0(SP)
   130		MOVL	$bad_proc_msg<>(SB), 4(SP)
   131		MOVL	$0x3d, 8(SP)
   132		CALL	runtime·write(SB)
   133		MOVL	$1, 0(SP)
   134		CALL	runtime·exit(SB)
   135		CALL	runtime·abort(SB)
   136	
   137	has_cpuid:
   138		MOVL	$0, AX
   139		CPUID
   140		MOVL	AX, SI
   141		CMPL	AX, $0
   142		JE	nocpuinfo
   143	
   144		// Figure out how to serialize RDTSC.
   145		// On Intel processors LFENCE is enough. AMD requires MFENCE.
   146		// Don't know about the rest, so let's do MFENCE.
   147		CMPL	BX, $0x756E6547  // "Genu"
   148		JNE	notintel
   149		CMPL	DX, $0x49656E69  // "ineI"
   150		JNE	notintel
   151		CMPL	CX, $0x6C65746E  // "ntel"
   152		JNE	notintel
   153		MOVB	$1, runtime·isIntel(SB)
   154		MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   155	notintel:
   156	
   157		// Load EAX=1 cpuid flags
   158		MOVL	$1, AX
   159		CPUID
   160		MOVL	CX, DI // Move to global variable clobbers CX when generating PIC
   161		MOVL	AX, runtime·processorVersionInfo(SB)
   162	
   163		// Check for MMX support
   164		TESTL	$(1<<23), DX // MMX
   165		JZ	bad_proc
   166	
   167	nocpuinfo:
   168		// if there is an _cgo_init, call it to let it
   169		// initialize and to set up GS.  if not,
   170		// we set up GS ourselves.
   171		MOVL	_cgo_init(SB), AX
   172		TESTL	AX, AX
   173		JZ	needtls
   174		MOVL	$setg_gcc<>(SB), BX
   175		MOVL	BX, 4(SP)
   176		MOVL	BP, 0(SP)
   177		CALL	AX
   178	
   179		// update stackguard after _cgo_init
   180		MOVL	$runtime·g0(SB), CX
   181		MOVL	(g_stack+stack_lo)(CX), AX
   182		ADDL	$const__StackGuard, AX
   183		MOVL	AX, g_stackguard0(CX)
   184		MOVL	AX, g_stackguard1(CX)
   185	
   186	#ifndef GOOS_windows
   187		// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   188		JMP ok
   189	#endif
   190	needtls:
   191	#ifdef GOOS_plan9
   192		// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   193		JMP	ok
   194	#endif
   195	#ifdef GOOS_darwin
   196		// skip runtime·ldt0setup(SB) on Darwin
   197		JMP	ok
   198	#endif
   199	
   200		// set up %gs
   201		CALL	runtime·ldt0setup(SB)
   202	
   203		// store through it, to make sure it works
   204		get_tls(BX)
   205		MOVL	$0x123, g(BX)
   206		MOVL	runtime·m0+m_tls(SB), AX
   207		CMPL	AX, $0x123
   208		JEQ	ok
   209		MOVL	AX, 0	// abort
   210	ok:
   211		// set up m and g "registers"
   212		get_tls(BX)
   213		LEAL	runtime·g0(SB), DX
   214		MOVL	DX, g(BX)
   215		LEAL	runtime·m0(SB), AX
   216	
   217		// save m->g0 = g0
   218		MOVL	DX, m_g0(AX)
   219		// save g0->m = m0
   220		MOVL	AX, g_m(DX)
   221	
   222		CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   223	
   224		// convention is D is always cleared
   225		CLD
   226	
   227		CALL	runtime·check(SB)
   228	
   229		// saved argc, argv
   230		MOVL	120(SP), AX
   231		MOVL	AX, 0(SP)
   232		MOVL	124(SP), AX
   233		MOVL	AX, 4(SP)
   234		CALL	runtime·args(SB)
   235		CALL	runtime·osinit(SB)
   236		CALL	runtime·schedinit(SB)
   237	
   238		// create a new goroutine to start program
   239		PUSHL	$runtime·mainPC(SB)	// entry
   240		PUSHL	$0	// arg size
   241		CALL	runtime·newproc(SB)
   242		POPL	AX
   243		POPL	AX
   244	
   245		// start this M
   246		CALL	runtime·mstart(SB)
   247	
   248		CALL	runtime·abort(SB)
   249		RET
   250	
   251	DATA	bad_proc_msg<>+0x00(SB)/8, $"This pro"
   252	DATA	bad_proc_msg<>+0x08(SB)/8, $"gram can"
   253	DATA	bad_proc_msg<>+0x10(SB)/8, $" only be"
   254	DATA	bad_proc_msg<>+0x18(SB)/8, $" run on "
   255	DATA	bad_proc_msg<>+0x20(SB)/8, $"processo"
   256	DATA	bad_proc_msg<>+0x28(SB)/8, $"rs with "
   257	DATA	bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
   258	DATA	bad_proc_msg<>+0x38(SB)/4, $"ort."
   259	DATA	bad_proc_msg<>+0x3c(SB)/1, $0xa
   260	GLOBL	bad_proc_msg<>(SB), RODATA, $0x3d
   261	
   262	DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   263	GLOBL	runtime·mainPC(SB),RODATA,$4
   264	
   265	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   266		INT $3
   267		RET
   268	
   269	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   270		// Linux and MinGW start the FPU in extended double precision.
   271		// Other operating systems use double precision.
   272		// Change to double precision to match them,
   273		// and to match other hardware that only has double.
   274		FLDCW	runtime·controlWord64(SB)
   275		RET
   276	
   277	/*
   278	 *  go-routine
   279	 */
   280	
   281	// void gosave(Gobuf*)
   282	// save state in Gobuf; setjmp
   283	TEXT runtime·gosave(SB), NOSPLIT, $0-4
   284		MOVL	buf+0(FP), AX		// gobuf
   285		LEAL	buf+0(FP), BX		// caller's SP
   286		MOVL	BX, gobuf_sp(AX)
   287		MOVL	0(SP), BX		// caller's PC
   288		MOVL	BX, gobuf_pc(AX)
   289		MOVL	$0, gobuf_ret(AX)
   290		// Assert ctxt is zero. See func save.
   291		MOVL	gobuf_ctxt(AX), BX
   292		TESTL	BX, BX
   293		JZ	2(PC)
   294		CALL	runtime·badctxt(SB)
   295		get_tls(CX)
   296		MOVL	g(CX), BX
   297		MOVL	BX, gobuf_g(AX)
   298		RET
   299	
   300	// void gogo(Gobuf*)
   301	// restore state from Gobuf; longjmp
   302	TEXT runtime·gogo(SB), NOSPLIT, $8-4
   303		MOVL	buf+0(FP), BX		// gobuf
   304		MOVL	gobuf_g(BX), DX
   305		MOVL	0(DX), CX		// make sure g != nil
   306		get_tls(CX)
   307		MOVL	DX, g(CX)
   308		MOVL	gobuf_sp(BX), SP	// restore SP
   309		MOVL	gobuf_ret(BX), AX
   310		MOVL	gobuf_ctxt(BX), DX
   311		MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   312		MOVL	$0, gobuf_ret(BX)
   313		MOVL	$0, gobuf_ctxt(BX)
   314		MOVL	gobuf_pc(BX), BX
   315		JMP	BX
   316	
   317	// func mcall(fn func(*g))
   318	// Switch to m->g0's stack, call fn(g).
   319	// Fn must never return. It should gogo(&g->sched)
   320	// to keep running g.
   321	TEXT runtime·mcall(SB), NOSPLIT, $0-4
   322		MOVL	fn+0(FP), DI
   323	
   324		get_tls(DX)
   325		MOVL	g(DX), AX	// save state in g->sched
   326		MOVL	0(SP), BX	// caller's PC
   327		MOVL	BX, (g_sched+gobuf_pc)(AX)
   328		LEAL	fn+0(FP), BX	// caller's SP
   329		MOVL	BX, (g_sched+gobuf_sp)(AX)
   330		MOVL	AX, (g_sched+gobuf_g)(AX)
   331	
   332		// switch to m->g0 & its stack, call fn
   333		MOVL	g(DX), BX
   334		MOVL	g_m(BX), BX
   335		MOVL	m_g0(BX), SI
   336		CMPL	SI, AX	// if g == m->g0 call badmcall
   337		JNE	3(PC)
   338		MOVL	$runtime·badmcall(SB), AX
   339		JMP	AX
   340		MOVL	SI, g(DX)	// g = m->g0
   341		MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   342		PUSHL	AX
   343		MOVL	DI, DX
   344		MOVL	0(DI), DI
   345		CALL	DI
   346		POPL	AX
   347		MOVL	$runtime·badmcall2(SB), AX
   348		JMP	AX
   349		RET
   350	
   351	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   352	// of the G stack. We need to distinguish the routine that
   353	// lives at the bottom of the G stack from the one that lives
   354	// at the top of the system stack because the one at the top of
   355	// the system stack terminates the stack walk (see topofstack()).
   356	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   357		RET
   358	
   359	// func systemstack(fn func())
   360	TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   361		MOVL	fn+0(FP), DI	// DI = fn
   362		get_tls(CX)
   363		MOVL	g(CX), AX	// AX = g
   364		MOVL	g_m(AX), BX	// BX = m
   365	
   366		CMPL	AX, m_gsignal(BX)
   367		JEQ	noswitch
   368	
   369		MOVL	m_g0(BX), DX	// DX = g0
   370		CMPL	AX, DX
   371		JEQ	noswitch
   372	
   373		CMPL	AX, m_curg(BX)
   374		JNE	bad
   375	
   376		// switch stacks
   377		// save our state in g->sched. Pretend to
   378		// be systemstack_switch if the G stack is scanned.
   379		MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   380		MOVL	SP, (g_sched+gobuf_sp)(AX)
   381		MOVL	AX, (g_sched+gobuf_g)(AX)
   382	
   383		// switch to g0
   384		get_tls(CX)
   385		MOVL	DX, g(CX)
   386		MOVL	(g_sched+gobuf_sp)(DX), BX
   387		// make it look like mstart called systemstack on g0, to stop traceback
   388		SUBL	$4, BX
   389		MOVL	$runtime·mstart(SB), DX
   390		MOVL	DX, 0(BX)
   391		MOVL	BX, SP
   392	
   393		// call target function
   394		MOVL	DI, DX
   395		MOVL	0(DI), DI
   396		CALL	DI
   397	
   398		// switch back to g
   399		get_tls(CX)
   400		MOVL	g(CX), AX
   401		MOVL	g_m(AX), BX
   402		MOVL	m_curg(BX), AX
   403		MOVL	AX, g(CX)
   404		MOVL	(g_sched+gobuf_sp)(AX), SP
   405		MOVL	$0, (g_sched+gobuf_sp)(AX)
   406		RET
   407	
   408	noswitch:
   409		// already on system stack; tail call the function
   410		// Using a tail call here cleans up tracebacks since we won't stop
   411		// at an intermediate systemstack.
   412		MOVL	DI, DX
   413		MOVL	0(DI), DI
   414		JMP	DI
   415	
   416	bad:
   417		// Bad: g is not gsignal, not g0, not curg. What is it?
   418		// Hide call from linker nosplit analysis.
   419		MOVL	$runtime·badsystemstack(SB), AX
   420		CALL	AX
   421		INT	$3
   422	
   423	/*
   424	 * support for morestack
   425	 */
   426	
   427	// Called during function prolog when more stack is needed.
   428	//
   429	// The traceback routines see morestack on a g0 as being
   430	// the top of a stack (for example, morestack calling newstack
   431	// calling the scheduler calling newm calling gc), so we must
   432	// record an argument size. For that purpose, it has no arguments.
   433	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   434		// Cannot grow scheduler stack (m->g0).
   435		get_tls(CX)
   436		MOVL	g(CX), BX
   437		MOVL	g_m(BX), BX
   438		MOVL	m_g0(BX), SI
   439		CMPL	g(CX), SI
   440		JNE	3(PC)
   441		CALL	runtime·badmorestackg0(SB)
   442		CALL	runtime·abort(SB)
   443	
   444		// Cannot grow signal stack.
   445		MOVL	m_gsignal(BX), SI
   446		CMPL	g(CX), SI
   447		JNE	3(PC)
   448		CALL	runtime·badmorestackgsignal(SB)
   449		CALL	runtime·abort(SB)
   450	
   451		// Called from f.
   452		// Set m->morebuf to f's caller.
   453		MOVL	4(SP), DI	// f's caller's PC
   454		MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   455		LEAL	8(SP), CX	// f's caller's SP
   456		MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   457		get_tls(CX)
   458		MOVL	g(CX), SI
   459		MOVL	SI, (m_morebuf+gobuf_g)(BX)
   460	
   461		// Set g->sched to context in f.
   462		MOVL	0(SP), AX	// f's PC
   463		MOVL	AX, (g_sched+gobuf_pc)(SI)
   464		MOVL	SI, (g_sched+gobuf_g)(SI)
   465		LEAL	4(SP), AX	// f's SP
   466		MOVL	AX, (g_sched+gobuf_sp)(SI)
   467		MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   468	
   469		// Call newstack on m->g0's stack.
   470		MOVL	m_g0(BX), BP
   471		MOVL	BP, g(CX)
   472		MOVL	(g_sched+gobuf_sp)(BP), AX
   473		MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   474		MOVL	AX, SP
   475		CALL	runtime·newstack(SB)
   476		CALL	runtime·abort(SB)	// crash if newstack returns
   477		RET
   478	
   479	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   480		MOVL	$0, DX
   481		JMP runtime·morestack(SB)
   482	
   483	// reflectcall: call a function with the given argument list
   484	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   485	// we don't have variable-sized frames, so we use a small number
   486	// of constant-sized-frame functions to encode a few bits of size in the pc.
   487	// Caution: ugly multiline assembly macros in your future!
   488	
   489	#define DISPATCH(NAME,MAXSIZE)		\
   490		CMPL	CX, $MAXSIZE;		\
   491		JA	3(PC);			\
   492		MOVL	$NAME(SB), AX;		\
   493		JMP	AX
   494	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   495	
   496	TEXT reflect·call(SB), NOSPLIT, $0-0
   497		JMP	·reflectcall(SB)
   498	
   499	TEXT ·reflectcall(SB), NOSPLIT, $0-20
   500		MOVL	argsize+12(FP), CX
   501		DISPATCH(runtime·call16, 16)
   502		DISPATCH(runtime·call32, 32)
   503		DISPATCH(runtime·call64, 64)
   504		DISPATCH(runtime·call128, 128)
   505		DISPATCH(runtime·call256, 256)
   506		DISPATCH(runtime·call512, 512)
   507		DISPATCH(runtime·call1024, 1024)
   508		DISPATCH(runtime·call2048, 2048)
   509		DISPATCH(runtime·call4096, 4096)
   510		DISPATCH(runtime·call8192, 8192)
   511		DISPATCH(runtime·call16384, 16384)
   512		DISPATCH(runtime·call32768, 32768)
   513		DISPATCH(runtime·call65536, 65536)
   514		DISPATCH(runtime·call131072, 131072)
   515		DISPATCH(runtime·call262144, 262144)
   516		DISPATCH(runtime·call524288, 524288)
   517		DISPATCH(runtime·call1048576, 1048576)
   518		DISPATCH(runtime·call2097152, 2097152)
   519		DISPATCH(runtime·call4194304, 4194304)
   520		DISPATCH(runtime·call8388608, 8388608)
   521		DISPATCH(runtime·call16777216, 16777216)
   522		DISPATCH(runtime·call33554432, 33554432)
   523		DISPATCH(runtime·call67108864, 67108864)
   524		DISPATCH(runtime·call134217728, 134217728)
   525		DISPATCH(runtime·call268435456, 268435456)
   526		DISPATCH(runtime·call536870912, 536870912)
   527		DISPATCH(runtime·call1073741824, 1073741824)
   528		MOVL	$runtime·badreflectcall(SB), AX
   529		JMP	AX
   530	
   531	#define CALLFN(NAME,MAXSIZE)			\
   532	TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   533		NO_LOCAL_POINTERS;			\
   534		/* copy arguments to stack */		\
   535		MOVL	argptr+8(FP), SI;		\
   536		MOVL	argsize+12(FP), CX;		\
   537		MOVL	SP, DI;				\
   538		REP;MOVSB;				\
   539		/* call function */			\
   540		MOVL	f+4(FP), DX;			\
   541		MOVL	(DX), AX; 			\
   542		PCDATA  $PCDATA_StackMapIndex, $0;	\
   543		CALL	AX;				\
   544		/* copy return values back */		\
   545		MOVL	argtype+0(FP), DX;		\
   546		MOVL	argptr+8(FP), DI;		\
   547		MOVL	argsize+12(FP), CX;		\
   548		MOVL	retoffset+16(FP), BX;		\
   549		MOVL	SP, SI;				\
   550		ADDL	BX, DI;				\
   551		ADDL	BX, SI;				\
   552		SUBL	BX, CX;				\
   553		CALL	callRet<>(SB);			\
   554		RET
   555	
   556	// callRet copies return values back at the end of call*. This is a
   557	// separate function so it can allocate stack space for the arguments
   558	// to reflectcallmove. It does not follow the Go ABI; it expects its
   559	// arguments in registers.
   560	TEXT callRet<>(SB), NOSPLIT, $16-0
   561		MOVL	DX, 0(SP)
   562		MOVL	DI, 4(SP)
   563		MOVL	SI, 8(SP)
   564		MOVL	CX, 12(SP)
   565		CALL	runtime·reflectcallmove(SB)
   566		RET
   567	
   568	CALLFN(·call16, 16)
   569	CALLFN(·call32, 32)
   570	CALLFN(·call64, 64)
   571	CALLFN(·call128, 128)
   572	CALLFN(·call256, 256)
   573	CALLFN(·call512, 512)
   574	CALLFN(·call1024, 1024)
   575	CALLFN(·call2048, 2048)
   576	CALLFN(·call4096, 4096)
   577	CALLFN(·call8192, 8192)
   578	CALLFN(·call16384, 16384)
   579	CALLFN(·call32768, 32768)
   580	CALLFN(·call65536, 65536)
   581	CALLFN(·call131072, 131072)
   582	CALLFN(·call262144, 262144)
   583	CALLFN(·call524288, 524288)
   584	CALLFN(·call1048576, 1048576)
   585	CALLFN(·call2097152, 2097152)
   586	CALLFN(·call4194304, 4194304)
   587	CALLFN(·call8388608, 8388608)
   588	CALLFN(·call16777216, 16777216)
   589	CALLFN(·call33554432, 33554432)
   590	CALLFN(·call67108864, 67108864)
   591	CALLFN(·call134217728, 134217728)
   592	CALLFN(·call268435456, 268435456)
   593	CALLFN(·call536870912, 536870912)
   594	CALLFN(·call1073741824, 1073741824)
   595	
   596	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   597		MOVL	cycles+0(FP), AX
   598	again:
   599		PAUSE
   600		SUBL	$1, AX
   601		JNZ	again
   602		RET
   603	
   604	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   605		// Stores are already ordered on x86, so this is just a
   606		// compile barrier.
   607		RET
   608	
   609	// void jmpdefer(fn, sp);
   610	// called from deferreturn.
   611	// 1. pop the caller
   612	// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
   613	//    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
   614	//    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
   615	//    LEAL to load the offset into BX, and finally 5 for the call & displacement)
   616	// 3. jmp to the argument
   617	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   618		MOVL	fv+0(FP), DX	// fn
   619		MOVL	argp+4(FP), BX	// caller sp
   620		LEAL	-4(BX), SP	// caller sp after CALL
   621	#ifdef GOBUILDMODE_shared
   622		SUBL	$16, (SP)	// return to CALL again
   623	#else
   624		SUBL	$5, (SP)	// return to CALL again
   625	#endif
   626		MOVL	0(DX), BX
   627		JMP	BX	// but first run the deferred function
   628	
   629	// Save state of caller into g->sched.
   630	TEXT gosave<>(SB),NOSPLIT,$0
   631		PUSHL	AX
   632		PUSHL	BX
   633		get_tls(BX)
   634		MOVL	g(BX), BX
   635		LEAL	arg+0(FP), AX
   636		MOVL	AX, (g_sched+gobuf_sp)(BX)
   637		MOVL	-4(AX), AX
   638		MOVL	AX, (g_sched+gobuf_pc)(BX)
   639		MOVL	$0, (g_sched+gobuf_ret)(BX)
   640		// Assert ctxt is zero. See func save.
   641		MOVL	(g_sched+gobuf_ctxt)(BX), AX
   642		TESTL	AX, AX
   643		JZ	2(PC)
   644		CALL	runtime·badctxt(SB)
   645		POPL	BX
   646		POPL	AX
   647		RET
   648	
   649	// func asmcgocall(fn, arg unsafe.Pointer) int32
   650	// Call fn(arg) on the scheduler stack,
   651	// aligned appropriately for the gcc ABI.
   652	// See cgocall.go for more details.
   653	TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   654		MOVL	fn+0(FP), AX
   655		MOVL	arg+4(FP), BX
   656	
   657		MOVL	SP, DX
   658	
   659		// Figure out if we need to switch to m->g0 stack.
   660		// We get called to create new OS threads too, and those
   661		// come in on the m->g0 stack already.
   662		get_tls(CX)
   663		MOVL	g(CX), BP
   664		CMPL	BP, $0
   665		JEQ	nosave	// Don't even have a G yet.
   666		MOVL	g_m(BP), BP
   667		MOVL	m_g0(BP), SI
   668		MOVL	g(CX), DI
   669		CMPL	SI, DI
   670		JEQ	noswitch
   671		CMPL	DI, m_gsignal(BP)
   672		JEQ	noswitch
   673		CALL	gosave<>(SB)
   674		get_tls(CX)
   675		MOVL	SI, g(CX)
   676		MOVL	(g_sched+gobuf_sp)(SI), SP
   677	
   678	noswitch:
   679		// Now on a scheduling stack (a pthread-created stack).
   680		SUBL	$32, SP
   681		ANDL	$~15, SP	// alignment, perhaps unnecessary
   682		MOVL	DI, 8(SP)	// save g
   683		MOVL	(g_stack+stack_hi)(DI), DI
   684		SUBL	DX, DI
   685		MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   686		MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   687		CALL	AX
   688	
   689		// Restore registers, g, stack pointer.
   690		get_tls(CX)
   691		MOVL	8(SP), DI
   692		MOVL	(g_stack+stack_hi)(DI), SI
   693		SUBL	4(SP), SI
   694		MOVL	DI, g(CX)
   695		MOVL	SI, SP
   696	
   697		MOVL	AX, ret+8(FP)
   698		RET
   699	nosave:
   700		// Now on a scheduling stack (a pthread-created stack).
   701		SUBL	$32, SP
   702		ANDL	$~15, SP	// alignment, perhaps unnecessary
   703		MOVL	DX, 4(SP)	// save original stack pointer
   704		MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   705		CALL	AX
   706	
   707		MOVL	4(SP), CX	// restore original stack pointer
   708		MOVL	CX, SP
   709		MOVL	AX, ret+8(FP)
   710		RET
   711	
   712	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   713	// Turn the fn into a Go func (by taking its address) and call
   714	// cgocallback_gofunc.
   715	TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
   716		LEAL	fn+0(FP), AX
   717		MOVL	AX, 0(SP)
   718		MOVL	frame+4(FP), AX
   719		MOVL	AX, 4(SP)
   720		MOVL	framesize+8(FP), AX
   721		MOVL	AX, 8(SP)
   722		MOVL	ctxt+12(FP), AX
   723		MOVL	AX, 12(SP)
   724		MOVL	$runtime·cgocallback_gofunc(SB), AX
   725		CALL	AX
   726		RET
   727	
   728	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   729	// See cgocall.go for more details.
   730	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
   731		NO_LOCAL_POINTERS
   732	
   733		// If g is nil, Go did not create the current thread.
   734		// Call needm to obtain one for temporary use.
   735		// In this case, we're running on the thread stack, so there's
   736		// lots of space, but the linker doesn't know. Hide the call from
   737		// the linker analysis by using an indirect call through AX.
   738		get_tls(CX)
   739	#ifdef GOOS_windows
   740		MOVL	$0, BP
   741		CMPL	CX, $0
   742		JEQ	2(PC) // TODO
   743	#endif
   744		MOVL	g(CX), BP
   745		CMPL	BP, $0
   746		JEQ	needm
   747		MOVL	g_m(BP), BP
   748		MOVL	BP, DX // saved copy of oldm
   749		JMP	havem
   750	needm:
   751		MOVL	$0, 0(SP)
   752		MOVL	$runtime·needm(SB), AX
   753		CALL	AX
   754		MOVL	0(SP), DX
   755		get_tls(CX)
   756		MOVL	g(CX), BP
   757		MOVL	g_m(BP), BP
   758	
   759		// Set m->sched.sp = SP, so that if a panic happens
   760		// during the function we are about to execute, it will
   761		// have a valid SP to run on the g0 stack.
   762		// The next few lines (after the havem label)
   763		// will save this SP onto the stack and then write
   764		// the same SP back to m->sched.sp. That seems redundant,
   765		// but if an unrecovered panic happens, unwindm will
   766		// restore the g->sched.sp from the stack location
   767		// and then systemstack will try to use it. If we don't set it here,
   768		// that restored SP will be uninitialized (typically 0) and
   769		// will not be usable.
   770		MOVL	m_g0(BP), SI
   771		MOVL	SP, (g_sched+gobuf_sp)(SI)
   772	
   773	havem:
   774		// Now there's a valid m, and we're running on its m->g0.
   775		// Save current m->g0->sched.sp on stack and then set it to SP.
   776		// Save current sp in m->g0->sched.sp in preparation for
   777		// switch back to m->curg stack.
   778		// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   779		MOVL	m_g0(BP), SI
   780		MOVL	(g_sched+gobuf_sp)(SI), AX
   781		MOVL	AX, 0(SP)
   782		MOVL	SP, (g_sched+gobuf_sp)(SI)
   783	
   784		// Switch to m->curg stack and call runtime.cgocallbackg.
   785		// Because we are taking over the execution of m->curg
   786		// but *not* resuming what had been running, we need to
   787		// save that information (m->curg->sched) so we can restore it.
   788		// We can restore m->curg->sched.sp easily, because calling
   789		// runtime.cgocallbackg leaves SP unchanged upon return.
   790		// To save m->curg->sched.pc, we push it onto the stack.
   791		// This has the added benefit that it looks to the traceback
   792		// routine like cgocallbackg is going to return to that
   793		// PC (because the frame we allocate below has the same
   794		// size as cgocallback_gofunc's frame declared above)
   795		// so that the traceback will seamlessly trace back into
   796		// the earlier calls.
   797		//
   798		// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
   799		// 8(SP) is unused.
   800		MOVL	m_curg(BP), SI
   801		MOVL	SI, g(CX)
   802		MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   803		MOVL	(g_sched+gobuf_pc)(SI), BP
   804		MOVL	BP, -4(DI)
   805		MOVL	ctxt+12(FP), CX
   806		LEAL	-(4+12)(DI), SP
   807		MOVL	DX, 4(SP)
   808		MOVL	CX, 0(SP)
   809		CALL	runtime·cgocallbackg(SB)
   810		MOVL	4(SP), DX
   811	
   812		// Restore g->sched (== m->curg->sched) from saved values.
   813		get_tls(CX)
   814		MOVL	g(CX), SI
   815		MOVL	12(SP), BP
   816		MOVL	BP, (g_sched+gobuf_pc)(SI)
   817		LEAL	(12+4)(SP), DI
   818		MOVL	DI, (g_sched+gobuf_sp)(SI)
   819	
   820		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   821		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   822		// so we do not have to restore it.)
   823		MOVL	g(CX), BP
   824		MOVL	g_m(BP), BP
   825		MOVL	m_g0(BP), SI
   826		MOVL	SI, g(CX)
   827		MOVL	(g_sched+gobuf_sp)(SI), SP
   828		MOVL	0(SP), AX
   829		MOVL	AX, (g_sched+gobuf_sp)(SI)
   830		
   831		// If the m on entry was nil, we called needm above to borrow an m
   832		// for the duration of the call. Since the call is over, return it with dropm.
   833		CMPL	DX, $0
   834		JNE 3(PC)
   835		MOVL	$runtime·dropm(SB), AX
   836		CALL	AX
   837	
   838		// Done!
   839		RET
   840	
   841	// void setg(G*); set g. for use by needm.
   842	TEXT runtime·setg(SB), NOSPLIT, $0-4
   843		MOVL	gg+0(FP), BX
   844	#ifdef GOOS_windows
   845		CMPL	BX, $0
   846		JNE	settls
   847		MOVL	$0, 0x14(FS)
   848		RET
   849	settls:
   850		MOVL	g_m(BX), AX
   851		LEAL	m_tls(AX), AX
   852		MOVL	AX, 0x14(FS)
   853	#endif
   854		get_tls(CX)
   855		MOVL	BX, g(CX)
   856		RET
   857	
   858	// void setg_gcc(G*); set g. for use by gcc
   859	TEXT setg_gcc<>(SB), NOSPLIT, $0
   860		get_tls(AX)
   861		MOVL	gg+0(FP), DX
   862		MOVL	DX, g(AX)
   863		RET
   864	
   865	TEXT runtime·abort(SB),NOSPLIT,$0-0
   866		INT	$3
   867	loop:
   868		JMP	loop
   869	
   870	// check that SP is in range [g->stack.lo, g->stack.hi)
   871	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   872		get_tls(CX)
   873		MOVL	g(CX), AX
   874		CMPL	(g_stack+stack_hi)(AX), SP
   875		JHI	2(PC)
   876		CALL	runtime·abort(SB)
   877		CMPL	SP, (g_stack+stack_lo)(AX)
   878		JHI	2(PC)
   879		CALL	runtime·abort(SB)
   880		RET
   881	
   882	// func cputicks() int64
   883	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   884		CMPB	runtime·support_sse2(SB), $1
   885		JNE	done
   886		CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   887		JNE	mfence
   888		LFENCE
   889		JMP	done
   890	mfence:
   891		MFENCE
   892	done:
   893		RDTSC
   894		MOVL	AX, ret_lo+0(FP)
   895		MOVL	DX, ret_hi+4(FP)
   896		RET
   897	
   898	TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   899		// set up ldt 7 to point at m0.tls
   900		// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   901		// the entry number is just a hint.  setldt will set up GS with what it used.
   902		MOVL	$7, 0(SP)
   903		LEAL	runtime·m0+m_tls(SB), AX
   904		MOVL	AX, 4(SP)
   905		MOVL	$32, 8(SP)	// sizeof(tls array)
   906		CALL	runtime·setldt(SB)
   907		RET
   908	
   909	TEXT runtime·emptyfunc(SB),0,$0-0
   910		RET
   911	
   912	// hash function using AES hardware instructions
   913	TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   914		MOVL	p+0(FP), AX	// ptr to data
   915		MOVL	s+8(FP), BX	// size
   916		LEAL	ret+12(FP), DX
   917		JMP	runtime·aeshashbody(SB)
   918	
   919	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   920		MOVL	p+0(FP), AX	// ptr to string object
   921		MOVL	4(AX), BX	// length of string
   922		MOVL	(AX), AX	// string data
   923		LEAL	ret+8(FP), DX
   924		JMP	runtime·aeshashbody(SB)
   925	
   926	// AX: data
   927	// BX: length
   928	// DX: address to put return value
   929	TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   930		MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   931		PINSRW	$4, BX, X0	            // 16 bits of length
   932		PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   933		MOVO	X0, X1                      // save unscrambled seed
   934		PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   935		AESENC	X0, X0                      // scramble seed
   936	
   937		CMPL	BX, $16
   938		JB	aes0to15
   939		JE	aes16
   940		CMPL	BX, $32
   941		JBE	aes17to32
   942		CMPL	BX, $64
   943		JBE	aes33to64
   944		JMP	aes65plus
   945		
   946	aes0to15:
   947		TESTL	BX, BX
   948		JE	aes0
   949	
   950		ADDL	$16, AX
   951		TESTW	$0xff0, AX
   952		JE	endofpage
   953	
   954		// 16 bytes loaded at this address won't cross
   955		// a page boundary, so we can load it directly.
   956		MOVOU	-16(AX), X1
   957		ADDL	BX, BX
   958		PAND	masks<>(SB)(BX*8), X1
   959	
   960	final1:	
   961		AESENC	X0, X1  // scramble input, xor in seed
   962		AESENC	X1, X1  // scramble combo 2 times
   963		AESENC	X1, X1
   964		MOVL	X1, (DX)
   965		RET
   966	
   967	endofpage:
   968		// address ends in 1111xxxx. Might be up against
   969		// a page boundary, so load ending at last byte.
   970		// Then shift bytes down using pshufb.
   971		MOVOU	-32(AX)(BX*1), X1
   972		ADDL	BX, BX
   973		PSHUFB	shifts<>(SB)(BX*8), X1
   974		JMP	final1
   975	
   976	aes0:
   977		// Return scrambled input seed
   978		AESENC	X0, X0
   979		MOVL	X0, (DX)
   980		RET
   981	
   982	aes16:
   983		MOVOU	(AX), X1
   984		JMP	final1
   985	
   986	aes17to32:
   987		// make second starting seed
   988		PXOR	runtime·aeskeysched+16(SB), X1
   989		AESENC	X1, X1
   990		
   991		// load data to be hashed
   992		MOVOU	(AX), X2
   993		MOVOU	-16(AX)(BX*1), X3
   994	
   995		// scramble 3 times
   996		AESENC	X0, X2
   997		AESENC	X1, X3
   998		AESENC	X2, X2
   999		AESENC	X3, X3
  1000		AESENC	X2, X2
  1001		AESENC	X3, X3
  1002	
  1003		// combine results
  1004		PXOR	X3, X2
  1005		MOVL	X2, (DX)
  1006		RET
  1007	
  1008	aes33to64:
  1009		// make 3 more starting seeds
  1010		MOVO	X1, X2
  1011		MOVO	X1, X3
  1012		PXOR	runtime·aeskeysched+16(SB), X1
  1013		PXOR	runtime·aeskeysched+32(SB), X2
  1014		PXOR	runtime·aeskeysched+48(SB), X3
  1015		AESENC	X1, X1
  1016		AESENC	X2, X2
  1017		AESENC	X3, X3
  1018		
  1019		MOVOU	(AX), X4
  1020		MOVOU	16(AX), X5
  1021		MOVOU	-32(AX)(BX*1), X6
  1022		MOVOU	-16(AX)(BX*1), X7
  1023		
  1024		AESENC	X0, X4
  1025		AESENC	X1, X5
  1026		AESENC	X2, X6
  1027		AESENC	X3, X7
  1028		
  1029		AESENC	X4, X4
  1030		AESENC	X5, X5
  1031		AESENC	X6, X6
  1032		AESENC	X7, X7
  1033		
  1034		AESENC	X4, X4
  1035		AESENC	X5, X5
  1036		AESENC	X6, X6
  1037		AESENC	X7, X7
  1038	
  1039		PXOR	X6, X4
  1040		PXOR	X7, X5
  1041		PXOR	X5, X4
  1042		MOVL	X4, (DX)
  1043		RET
  1044	
  1045	aes65plus:
  1046		// make 3 more starting seeds
  1047		MOVO	X1, X2
  1048		MOVO	X1, X3
  1049		PXOR	runtime·aeskeysched+16(SB), X1
  1050		PXOR	runtime·aeskeysched+32(SB), X2
  1051		PXOR	runtime·aeskeysched+48(SB), X3
  1052		AESENC	X1, X1
  1053		AESENC	X2, X2
  1054		AESENC	X3, X3
  1055		
  1056		// start with last (possibly overlapping) block
  1057		MOVOU	-64(AX)(BX*1), X4
  1058		MOVOU	-48(AX)(BX*1), X5
  1059		MOVOU	-32(AX)(BX*1), X6
  1060		MOVOU	-16(AX)(BX*1), X7
  1061	
  1062		// scramble state once
  1063		AESENC	X0, X4
  1064		AESENC	X1, X5
  1065		AESENC	X2, X6
  1066		AESENC	X3, X7
  1067	
  1068		// compute number of remaining 64-byte blocks
  1069		DECL	BX
  1070		SHRL	$6, BX
  1071		
  1072	aesloop:
  1073		// scramble state, xor in a block
  1074		MOVOU	(AX), X0
  1075		MOVOU	16(AX), X1
  1076		MOVOU	32(AX), X2
  1077		MOVOU	48(AX), X3
  1078		AESENC	X0, X4
  1079		AESENC	X1, X5
  1080		AESENC	X2, X6
  1081		AESENC	X3, X7
  1082	
  1083		// scramble state
  1084		AESENC	X4, X4
  1085		AESENC	X5, X5
  1086		AESENC	X6, X6
  1087		AESENC	X7, X7
  1088	
  1089		ADDL	$64, AX
  1090		DECL	BX
  1091		JNE	aesloop
  1092	
  1093		// 2 more scrambles to finish
  1094		AESENC	X4, X4
  1095		AESENC	X5, X5
  1096		AESENC	X6, X6
  1097		AESENC	X7, X7
  1098		
  1099		AESENC	X4, X4
  1100		AESENC	X5, X5
  1101		AESENC	X6, X6
  1102		AESENC	X7, X7
  1103	
  1104		PXOR	X6, X4
  1105		PXOR	X7, X5
  1106		PXOR	X5, X4
  1107		MOVL	X4, (DX)
  1108		RET
  1109	
  1110	TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1111		MOVL	p+0(FP), AX	// ptr to data
  1112		MOVL	h+4(FP), X0	// seed
  1113		PINSRD	$1, (AX), X0	// data
  1114		AESENC	runtime·aeskeysched+0(SB), X0
  1115		AESENC	runtime·aeskeysched+16(SB), X0
  1116		AESENC	runtime·aeskeysched+32(SB), X0
  1117		MOVL	X0, ret+8(FP)
  1118		RET
  1119	
  1120	TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1121		MOVL	p+0(FP), AX	// ptr to data
  1122		MOVQ	(AX), X0	// data
  1123		PINSRD	$2, h+4(FP), X0	// seed
  1124		AESENC	runtime·aeskeysched+0(SB), X0
  1125		AESENC	runtime·aeskeysched+16(SB), X0
  1126		AESENC	runtime·aeskeysched+32(SB), X0
  1127		MOVL	X0, ret+8(FP)
  1128		RET
  1129	
  1130	// simple mask to get rid of data in the high part of the register.
  1131	DATA masks<>+0x00(SB)/4, $0x00000000
  1132	DATA masks<>+0x04(SB)/4, $0x00000000
  1133	DATA masks<>+0x08(SB)/4, $0x00000000
  1134	DATA masks<>+0x0c(SB)/4, $0x00000000
  1135		
  1136	DATA masks<>+0x10(SB)/4, $0x000000ff
  1137	DATA masks<>+0x14(SB)/4, $0x00000000
  1138	DATA masks<>+0x18(SB)/4, $0x00000000
  1139	DATA masks<>+0x1c(SB)/4, $0x00000000
  1140		
  1141	DATA masks<>+0x20(SB)/4, $0x0000ffff
  1142	DATA masks<>+0x24(SB)/4, $0x00000000
  1143	DATA masks<>+0x28(SB)/4, $0x00000000
  1144	DATA masks<>+0x2c(SB)/4, $0x00000000
  1145		
  1146	DATA masks<>+0x30(SB)/4, $0x00ffffff
  1147	DATA masks<>+0x34(SB)/4, $0x00000000
  1148	DATA masks<>+0x38(SB)/4, $0x00000000
  1149	DATA masks<>+0x3c(SB)/4, $0x00000000
  1150		
  1151	DATA masks<>+0x40(SB)/4, $0xffffffff
  1152	DATA masks<>+0x44(SB)/4, $0x00000000
  1153	DATA masks<>+0x48(SB)/4, $0x00000000
  1154	DATA masks<>+0x4c(SB)/4, $0x00000000
  1155		
  1156	DATA masks<>+0x50(SB)/4, $0xffffffff
  1157	DATA masks<>+0x54(SB)/4, $0x000000ff
  1158	DATA masks<>+0x58(SB)/4, $0x00000000
  1159	DATA masks<>+0x5c(SB)/4, $0x00000000
  1160		
  1161	DATA masks<>+0x60(SB)/4, $0xffffffff
  1162	DATA masks<>+0x64(SB)/4, $0x0000ffff
  1163	DATA masks<>+0x68(SB)/4, $0x00000000
  1164	DATA masks<>+0x6c(SB)/4, $0x00000000
  1165		
  1166	DATA masks<>+0x70(SB)/4, $0xffffffff
  1167	DATA masks<>+0x74(SB)/4, $0x00ffffff
  1168	DATA masks<>+0x78(SB)/4, $0x00000000
  1169	DATA masks<>+0x7c(SB)/4, $0x00000000
  1170		
  1171	DATA masks<>+0x80(SB)/4, $0xffffffff
  1172	DATA masks<>+0x84(SB)/4, $0xffffffff
  1173	DATA masks<>+0x88(SB)/4, $0x00000000
  1174	DATA masks<>+0x8c(SB)/4, $0x00000000
  1175		
  1176	DATA masks<>+0x90(SB)/4, $0xffffffff
  1177	DATA masks<>+0x94(SB)/4, $0xffffffff
  1178	DATA masks<>+0x98(SB)/4, $0x000000ff
  1179	DATA masks<>+0x9c(SB)/4, $0x00000000
  1180		
  1181	DATA masks<>+0xa0(SB)/4, $0xffffffff
  1182	DATA masks<>+0xa4(SB)/4, $0xffffffff
  1183	DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1184	DATA masks<>+0xac(SB)/4, $0x00000000
  1185		
  1186	DATA masks<>+0xb0(SB)/4, $0xffffffff
  1187	DATA masks<>+0xb4(SB)/4, $0xffffffff
  1188	DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1189	DATA masks<>+0xbc(SB)/4, $0x00000000
  1190		
  1191	DATA masks<>+0xc0(SB)/4, $0xffffffff
  1192	DATA masks<>+0xc4(SB)/4, $0xffffffff
  1193	DATA masks<>+0xc8(SB)/4, $0xffffffff
  1194	DATA masks<>+0xcc(SB)/4, $0x00000000
  1195		
  1196	DATA masks<>+0xd0(SB)/4, $0xffffffff
  1197	DATA masks<>+0xd4(SB)/4, $0xffffffff
  1198	DATA masks<>+0xd8(SB)/4, $0xffffffff
  1199	DATA masks<>+0xdc(SB)/4, $0x000000ff
  1200		
  1201	DATA masks<>+0xe0(SB)/4, $0xffffffff
  1202	DATA masks<>+0xe4(SB)/4, $0xffffffff
  1203	DATA masks<>+0xe8(SB)/4, $0xffffffff
  1204	DATA masks<>+0xec(SB)/4, $0x0000ffff
  1205		
  1206	DATA masks<>+0xf0(SB)/4, $0xffffffff
  1207	DATA masks<>+0xf4(SB)/4, $0xffffffff
  1208	DATA masks<>+0xf8(SB)/4, $0xffffffff
  1209	DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1210	
  1211	GLOBL masks<>(SB),RODATA,$256
  1212	
  1213	// these are arguments to pshufb. They move data down from
  1214	// the high bytes of the register to the low bytes of the register.
  1215	// index is how many bytes to move.
  1216	DATA shifts<>+0x00(SB)/4, $0x00000000
  1217	DATA shifts<>+0x04(SB)/4, $0x00000000
  1218	DATA shifts<>+0x08(SB)/4, $0x00000000
  1219	DATA shifts<>+0x0c(SB)/4, $0x00000000
  1220		
  1221	DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1222	DATA shifts<>+0x14(SB)/4, $0xffffffff
  1223	DATA shifts<>+0x18(SB)/4, $0xffffffff
  1224	DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1225		
  1226	DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1227	DATA shifts<>+0x24(SB)/4, $0xffffffff
  1228	DATA shifts<>+0x28(SB)/4, $0xffffffff
  1229	DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1230		
  1231	DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1232	DATA shifts<>+0x34(SB)/4, $0xffffffff
  1233	DATA shifts<>+0x38(SB)/4, $0xffffffff
  1234	DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1235		
  1236	DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1237	DATA shifts<>+0x44(SB)/4, $0xffffffff
  1238	DATA shifts<>+0x48(SB)/4, $0xffffffff
  1239	DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1240		
  1241	DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1242	DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1243	DATA shifts<>+0x58(SB)/4, $0xffffffff
  1244	DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1245		
  1246	DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1247	DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1248	DATA shifts<>+0x68(SB)/4, $0xffffffff
  1249	DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1250		
  1251	DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1252	DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1253	DATA shifts<>+0x78(SB)/4, $0xffffffff
  1254	DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1255		
  1256	DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1257	DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1258	DATA shifts<>+0x88(SB)/4, $0xffffffff
  1259	DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1260		
  1261	DATA shifts<>+0x90(SB)/4, $0x0a090807
  1262	DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1263	DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1264	DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1265		
  1266	DATA shifts<>+0xa0(SB)/4, $0x09080706
  1267	DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1268	DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1269	DATA shifts<>+0xac(SB)/4, $0xffffffff
  1270		
  1271	DATA shifts<>+0xb0(SB)/4, $0x08070605
  1272	DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1273	DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1274	DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1275		
  1276	DATA shifts<>+0xc0(SB)/4, $0x07060504
  1277	DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1278	DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1279	DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1280		
  1281	DATA shifts<>+0xd0(SB)/4, $0x06050403
  1282	DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1283	DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1284	DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1285		
  1286	DATA shifts<>+0xe0(SB)/4, $0x05040302
  1287	DATA shifts<>+0xe4(SB)/4, $0x09080706
  1288	DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1289	DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1290		
  1291	DATA shifts<>+0xf0(SB)/4, $0x04030201
  1292	DATA shifts<>+0xf4(SB)/4, $0x08070605
  1293	DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1294	DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1295	
  1296	GLOBL shifts<>(SB),RODATA,$256
  1297	
  1298	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1299		// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1300		MOVL	$masks<>(SB), AX
  1301		MOVL	$shifts<>(SB), BX
  1302		ORL	BX, AX
  1303		TESTL	$15, AX
  1304		SETEQ	ret+0(FP)
  1305		RET
  1306	
  1307	TEXT runtime·return0(SB), NOSPLIT, $0
  1308		MOVL	$0, AX
  1309		RET
  1310	
  1311	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1312	// Must obey the gcc calling convention.
  1313	TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1314		get_tls(CX)
  1315		MOVL	g(CX), AX
  1316		MOVL	g_m(AX), AX
  1317		MOVL	m_curg(AX), AX
  1318		MOVL	(g_stack+stack_hi)(AX), AX
  1319		RET
  1320	
  1321	// The top-most function running on a goroutine
  1322	// returns to goexit+PCQuantum.
  1323	TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1324		BYTE	$0x90	// NOP
  1325		CALL	runtime·goexit1(SB)	// does not return
  1326		// traceback from goexit1 must hit code range of goexit
  1327		BYTE	$0x90	// NOP
  1328	
  1329	// Add a module's moduledata to the linked list of moduledata objects. This
  1330	// is called from .init_array by a function generated in the linker and so
  1331	// follows the platform ABI wrt register preservation -- it only touches AX,
  1332	// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1333	// instead the pointer to the moduledata is passed in AX.
  1334	TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1335	       MOVL    runtime·lastmoduledatap(SB), DX
  1336	       MOVL    AX, moduledata_next(DX)
  1337	       MOVL    AX, runtime·lastmoduledatap(SB)
  1338	       RET
  1339	
  1340	TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
  1341		MOVL	a+0(FP), AX
  1342		MOVL	AX, 0(SP)
  1343		MOVL	$0, 4(SP)
  1344		FMOVV	0(SP), F0
  1345		FMOVDP	F0, ret+4(FP)
  1346		RET
  1347	
  1348	TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
  1349		FMOVD	a+0(FP), F0
  1350		FSTCW	0(SP)
  1351		FLDCW	runtime·controlWord64trunc(SB)
  1352		FMOVVP	F0, 4(SP)
  1353		FLDCW	0(SP)
  1354		MOVL	4(SP), AX
  1355		MOVL	AX, ret+8(FP)
  1356		RET
  1357	
  1358	// gcWriteBarrier performs a heap pointer write and informs the GC.
  1359	//
  1360	// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
  1361	// - DI is the destination of the write
  1362	// - AX is the value being written at DI
  1363	// It clobbers FLAGS. It does not clobber any general-purpose registers,
  1364	// but may clobber others (e.g., SSE registers).
  1365	TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$28
  1366		// Save the registers clobbered by the fast path. This is slightly
  1367		// faster than having the caller spill these.
  1368		MOVL	CX, 20(SP)
  1369		MOVL	BX, 24(SP)
  1370		// TODO: Consider passing g.m.p in as an argument so they can be shared
  1371		// across a sequence of write barriers.
  1372		get_tls(BX)
  1373		MOVL	g(BX), BX
  1374		MOVL	g_m(BX), BX
  1375		MOVL	m_p(BX), BX
  1376		MOVL	(p_wbBuf+wbBuf_next)(BX), CX
  1377		// Increment wbBuf.next position.
  1378		LEAL	8(CX), CX
  1379		MOVL	CX, (p_wbBuf+wbBuf_next)(BX)
  1380		CMPL	CX, (p_wbBuf+wbBuf_end)(BX)
  1381		// Record the write.
  1382		MOVL	AX, -8(CX)	// Record value
  1383		MOVL	(DI), BX	// TODO: This turns bad writes into bad reads.
  1384		MOVL	BX, -4(CX)	// Record *slot
  1385		// Is the buffer full? (flags set in CMPL above)
  1386		JEQ	flush
  1387	ret:
  1388		MOVL	20(SP), CX
  1389		MOVL	24(SP), BX
  1390		// Do the write.
  1391		MOVL	AX, (DI)
  1392		RET
  1393	
  1394	flush:
  1395		// Save all general purpose registers since these could be
  1396		// clobbered by wbBufFlush and were not saved by the caller.
  1397		MOVL	DI, 0(SP)	// Also first argument to wbBufFlush
  1398		MOVL	AX, 4(SP)	// Also second argument to wbBufFlush
  1399		// BX already saved
  1400		// CX already saved
  1401		MOVL	DX, 8(SP)
  1402		MOVL	BP, 12(SP)
  1403		MOVL	SI, 16(SP)
  1404		// DI already saved
  1405	
  1406		// This takes arguments DI and AX
  1407		CALL	runtime·wbBufFlush(SB)
  1408	
  1409		MOVL	0(SP), DI
  1410		MOVL	4(SP), AX
  1411		MOVL	8(SP), DX
  1412		MOVL	12(SP), BP
  1413		MOVL	16(SP), SI
  1414		JMP	ret

View as plain text