...
Run Format

Text file src/runtime/asm_386.s

Documentation: runtime

     1	// Copyright 2009 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	#include "go_asm.h"
     6	#include "go_tls.h"
     7	#include "funcdata.h"
     8	#include "textflag.h"
     9	
    10	// _rt0_386 is common startup code for most 386 systems when using
    11	// internal linking. This is the entry point for the program from the
    12	// kernel for an ordinary -buildmode=exe program. The stack holds the
    13	// number of arguments and the C-style argv.
    14	TEXT _rt0_386(SB),NOSPLIT,$8
    15		MOVL	8(SP), AX	// argc
    16		LEAL	12(SP), BX	// argv
    17		MOVL	AX, 0(SP)
    18		MOVL	BX, 4(SP)
    19		JMP	runtime·rt0_go(SB)
    20	
    21	// _rt0_386_lib is common startup code for most 386 systems when
    22	// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    23	// arrange to invoke this function as a global constructor (for
    24	// c-archive) or when the shared library is loaded (for c-shared).
    25	// We expect argc and argv to be passed on the stack following the
    26	// usual C ABI.
    27	TEXT _rt0_386_lib(SB),NOSPLIT,$0
    28		PUSHL	BP
    29		MOVL	SP, BP
    30		PUSHL	BX
    31		PUSHL	SI
    32		PUSHL	DI
    33	
    34		MOVL	8(BP), AX
    35		MOVL	AX, _rt0_386_lib_argc<>(SB)
    36		MOVL	12(BP), AX
    37		MOVL	AX, _rt0_386_lib_argv<>(SB)
    38	
    39		// Synchronous initialization.
    40		CALL	runtime·libpreinit(SB)
    41	
    42		SUBL	$8, SP
    43	
    44		// Create a new thread to do the runtime initialization.
    45		MOVL	_cgo_sys_thread_create(SB), AX
    46		TESTL	AX, AX
    47		JZ	nocgo
    48	
    49		// Align stack to call C function.
    50		// We moved SP to BP above, but BP was clobbered by the libpreinit call.
    51		MOVL	SP, BP
    52		ANDL	$~15, SP
    53	
    54		MOVL	$_rt0_386_lib_go(SB), BX
    55		MOVL	BX, 0(SP)
    56		MOVL	$0, 4(SP)
    57	
    58		CALL	AX
    59	
    60		MOVL	BP, SP
    61	
    62		JMP	restore
    63	
    64	nocgo:
    65		MOVL	$0x800000, 0(SP)                    // stacksize = 8192KB
    66		MOVL	$_rt0_386_lib_go(SB), AX
    67		MOVL	AX, 4(SP)                           // fn
    68		CALL	runtime·newosproc0(SB)
    69	
    70	restore:
    71		ADDL	$8, SP
    72		POPL	DI
    73		POPL	SI
    74		POPL	BX
    75		POPL	BP
    76		RET
    77	
    78	// _rt0_386_lib_go initializes the Go runtime.
    79	// This is started in a separate thread by _rt0_386_lib.
    80	TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
    81		MOVL	_rt0_386_lib_argc<>(SB), AX
    82		MOVL	AX, 0(SP)
    83		MOVL	_rt0_386_lib_argv<>(SB), AX
    84		MOVL	AX, 4(SP)
    85		JMP	runtime·rt0_go(SB)
    86	
    87	DATA _rt0_386_lib_argc<>(SB)/4, $0
    88	GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
    89	DATA _rt0_386_lib_argv<>(SB)/4, $0
    90	GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
    91	
    92	TEXT runtime·rt0_go(SB),NOSPLIT,$0
    93		// Copy arguments forward on an even stack.
    94		// Users of this function jump to it, they don't call it.
    95		MOVL	0(SP), AX
    96		MOVL	4(SP), BX
    97		SUBL	$128, SP		// plenty of scratch
    98		ANDL	$~15, SP
    99		MOVL	AX, 120(SP)		// save argc, argv away
   100		MOVL	BX, 124(SP)
   101	
   102		// set default stack bounds.
   103		// _cgo_init may update stackguard.
   104		MOVL	$runtime·g0(SB), BP
   105		LEAL	(-64*1024+104)(SP), BX
   106		MOVL	BX, g_stackguard0(BP)
   107		MOVL	BX, g_stackguard1(BP)
   108		MOVL	BX, (g_stack+stack_lo)(BP)
   109		MOVL	SP, (g_stack+stack_hi)(BP)
   110		
   111		// find out information about the processor we're on
   112	#ifdef GOOS_nacl // NaCl doesn't like PUSHFL/POPFL
   113		JMP 	has_cpuid
   114	#else
   115		// first see if CPUID instruction is supported.
   116		PUSHFL
   117		PUSHFL
   118		XORL	$(1<<21), 0(SP) // flip ID bit
   119		POPFL
   120		PUSHFL
   121		POPL	AX
   122		XORL	0(SP), AX
   123		POPFL	// restore EFLAGS
   124		TESTL	$(1<<21), AX
   125		JNE 	has_cpuid
   126	#endif
   127	
   128	bad_proc: // show that the program requires MMX.
   129		MOVL	$2, 0(SP)
   130		MOVL	$bad_proc_msg<>(SB), 4(SP)
   131		MOVL	$0x3d, 8(SP)
   132		CALL	runtime·write(SB)
   133		MOVL	$1, 0(SP)
   134		CALL	runtime·exit(SB)
   135		INT	$3
   136	
   137	has_cpuid:
   138		MOVL	$0, AX
   139		CPUID
   140		MOVL	AX, SI
   141		CMPL	AX, $0
   142		JE	nocpuinfo
   143	
   144		// Figure out how to serialize RDTSC.
   145		// On Intel processors LFENCE is enough. AMD requires MFENCE.
   146		// Don't know about the rest, so let's do MFENCE.
   147		CMPL	BX, $0x756E6547  // "Genu"
   148		JNE	notintel
   149		CMPL	DX, $0x49656E69  // "ineI"
   150		JNE	notintel
   151		CMPL	CX, $0x6C65746E  // "ntel"
   152		JNE	notintel
   153		MOVB	$1, runtime·isIntel(SB)
   154		MOVB	$1, runtime·lfenceBeforeRdtsc(SB)
   155	notintel:
   156	
   157		// Load EAX=1 cpuid flags
   158		MOVL	$1, AX
   159		CPUID
   160		MOVL	CX, DI // Move to global variable clobbers CX when generating PIC
   161		MOVL	AX, runtime·processorVersionInfo(SB)
   162	
   163		// Check for MMX support
   164		TESTL	$(1<<23), DX // MMX
   165		JZ	bad_proc
   166	
   167		TESTL	$(1<<26), DX // SSE2
   168		SETNE	runtime·support_sse2(SB)
   169	
   170		TESTL	$(1<<9), DI // SSSE3
   171		SETNE	runtime·support_ssse3(SB)
   172	
   173		TESTL	$(1<<19), DI // SSE4.1
   174		SETNE	runtime·support_sse41(SB)
   175	
   176		TESTL	$(1<<20), DI // SSE4.2
   177		SETNE	runtime·support_sse42(SB)
   178	
   179		TESTL	$(1<<23), DI // POPCNT
   180		SETNE	runtime·support_popcnt(SB)
   181	
   182		TESTL	$(1<<25), DI // AES
   183		SETNE	runtime·support_aes(SB)
   184	
   185		TESTL	$(1<<27), DI // OSXSAVE
   186		SETNE	runtime·support_osxsave(SB)
   187	
   188		// If OS support for XMM and YMM is not present
   189		// support_avx will be set back to false later.
   190		TESTL	$(1<<28), DI // AVX
   191		SETNE	runtime·support_avx(SB)
   192	
   193	eax7:
   194		// Load EAX=7/ECX=0 cpuid flags
   195		CMPL	SI, $7
   196		JLT	osavx
   197		MOVL	$7, AX
   198		MOVL	$0, CX
   199		CPUID
   200	
   201		TESTL	$(1<<3), BX // BMI1
   202		SETNE	runtime·support_bmi1(SB)
   203	
   204		// If OS support for XMM and YMM is not present
   205		// support_avx2 will be set back to false later.
   206		TESTL	$(1<<5), BX
   207		SETNE	runtime·support_avx2(SB)
   208	
   209		TESTL	$(1<<8), BX // BMI2
   210		SETNE	runtime·support_bmi2(SB)
   211	
   212		TESTL	$(1<<9), BX // ERMS
   213		SETNE	runtime·support_erms(SB)
   214	
   215	osavx:
   216		// nacl does not support XGETBV to test
   217		// for XMM and YMM OS support.
   218	#ifndef GOOS_nacl
   219		CMPB	runtime·support_osxsave(SB), $1
   220		JNE	noavx
   221		MOVL	$0, CX
   222		// For XGETBV, OSXSAVE bit is required and sufficient
   223		XGETBV
   224		ANDL	$6, AX
   225		CMPL	AX, $6 // Check for OS support of XMM and YMM registers.
   226		JE nocpuinfo
   227	#endif
   228	noavx:
   229		MOVB $0, runtime·support_avx(SB)
   230		MOVB $0, runtime·support_avx2(SB)
   231	
   232	nocpuinfo:
   233		// if there is an _cgo_init, call it to let it
   234		// initialize and to set up GS.  if not,
   235		// we set up GS ourselves.
   236		MOVL	_cgo_init(SB), AX
   237		TESTL	AX, AX
   238		JZ	needtls
   239		MOVL	$setg_gcc<>(SB), BX
   240		MOVL	BX, 4(SP)
   241		MOVL	BP, 0(SP)
   242		CALL	AX
   243	
   244		// update stackguard after _cgo_init
   245		MOVL	$runtime·g0(SB), CX
   246		MOVL	(g_stack+stack_lo)(CX), AX
   247		ADDL	$const__StackGuard, AX
   248		MOVL	AX, g_stackguard0(CX)
   249		MOVL	AX, g_stackguard1(CX)
   250	
   251	#ifndef GOOS_windows
   252		// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
   253		JMP ok
   254	#endif
   255	needtls:
   256	#ifdef GOOS_plan9
   257		// skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
   258		JMP	ok
   259	#endif
   260	
   261		// set up %gs
   262		CALL	runtime·ldt0setup(SB)
   263	
   264		// store through it, to make sure it works
   265		get_tls(BX)
   266		MOVL	$0x123, g(BX)
   267		MOVL	runtime·m0+m_tls(SB), AX
   268		CMPL	AX, $0x123
   269		JEQ	ok
   270		MOVL	AX, 0	// abort
   271	ok:
   272		// set up m and g "registers"
   273		get_tls(BX)
   274		LEAL	runtime·g0(SB), DX
   275		MOVL	DX, g(BX)
   276		LEAL	runtime·m0(SB), AX
   277	
   278		// save m->g0 = g0
   279		MOVL	DX, m_g0(AX)
   280		// save g0->m = m0
   281		MOVL	AX, g_m(DX)
   282	
   283		CALL	runtime·emptyfunc(SB)	// fault if stack check is wrong
   284	
   285		// convention is D is always cleared
   286		CLD
   287	
   288		CALL	runtime·check(SB)
   289	
   290		// saved argc, argv
   291		MOVL	120(SP), AX
   292		MOVL	AX, 0(SP)
   293		MOVL	124(SP), AX
   294		MOVL	AX, 4(SP)
   295		CALL	runtime·args(SB)
   296		CALL	runtime·osinit(SB)
   297		CALL	runtime·schedinit(SB)
   298	
   299		// create a new goroutine to start program
   300		PUSHL	$runtime·mainPC(SB)	// entry
   301		PUSHL	$0	// arg size
   302		CALL	runtime·newproc(SB)
   303		POPL	AX
   304		POPL	AX
   305	
   306		// start this M
   307		CALL	runtime·mstart(SB)
   308	
   309		INT $3
   310		RET
   311	
   312	DATA	bad_proc_msg<>+0x00(SB)/8, $"This pro"
   313	DATA	bad_proc_msg<>+0x08(SB)/8, $"gram can"
   314	DATA	bad_proc_msg<>+0x10(SB)/8, $" only be"
   315	DATA	bad_proc_msg<>+0x18(SB)/8, $" run on "
   316	DATA	bad_proc_msg<>+0x20(SB)/8, $"processo"
   317	DATA	bad_proc_msg<>+0x28(SB)/8, $"rs with "
   318	DATA	bad_proc_msg<>+0x30(SB)/8, $"MMX supp"
   319	DATA	bad_proc_msg<>+0x38(SB)/4, $"ort."
   320	DATA	bad_proc_msg<>+0x3c(SB)/1, $0xa
   321	GLOBL	bad_proc_msg<>(SB), RODATA, $0x3d
   322	
   323	DATA	runtime·mainPC+0(SB)/4,$runtime·main(SB)
   324	GLOBL	runtime·mainPC(SB),RODATA,$4
   325	
   326	TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   327		INT $3
   328		RET
   329	
   330	TEXT runtime·asminit(SB),NOSPLIT,$0-0
   331		// Linux and MinGW start the FPU in extended double precision.
   332		// Other operating systems use double precision.
   333		// Change to double precision to match them,
   334		// and to match other hardware that only has double.
   335		FLDCW	runtime·controlWord64(SB)
   336		RET
   337	
   338	/*
   339	 *  go-routine
   340	 */
   341	
   342	// void gosave(Gobuf*)
   343	// save state in Gobuf; setjmp
   344	TEXT runtime·gosave(SB), NOSPLIT, $0-4
   345		MOVL	buf+0(FP), AX		// gobuf
   346		LEAL	buf+0(FP), BX		// caller's SP
   347		MOVL	BX, gobuf_sp(AX)
   348		MOVL	0(SP), BX		// caller's PC
   349		MOVL	BX, gobuf_pc(AX)
   350		MOVL	$0, gobuf_ret(AX)
   351		// Assert ctxt is zero. See func save.
   352		MOVL	gobuf_ctxt(AX), BX
   353		TESTL	BX, BX
   354		JZ	2(PC)
   355		CALL	runtime·badctxt(SB)
   356		get_tls(CX)
   357		MOVL	g(CX), BX
   358		MOVL	BX, gobuf_g(AX)
   359		RET
   360	
   361	// void gogo(Gobuf*)
   362	// restore state from Gobuf; longjmp
   363	TEXT runtime·gogo(SB), NOSPLIT, $8-4
   364		MOVL	buf+0(FP), BX		// gobuf
   365		MOVL	gobuf_g(BX), DX
   366		MOVL	0(DX), CX		// make sure g != nil
   367		get_tls(CX)
   368		MOVL	DX, g(CX)
   369		MOVL	gobuf_sp(BX), SP	// restore SP
   370		MOVL	gobuf_ret(BX), AX
   371		MOVL	gobuf_ctxt(BX), DX
   372		MOVL	$0, gobuf_sp(BX)	// clear to help garbage collector
   373		MOVL	$0, gobuf_ret(BX)
   374		MOVL	$0, gobuf_ctxt(BX)
   375		MOVL	gobuf_pc(BX), BX
   376		JMP	BX
   377	
   378	// func mcall(fn func(*g))
   379	// Switch to m->g0's stack, call fn(g).
   380	// Fn must never return. It should gogo(&g->sched)
   381	// to keep running g.
   382	TEXT runtime·mcall(SB), NOSPLIT, $0-4
   383		MOVL	fn+0(FP), DI
   384	
   385		get_tls(DX)
   386		MOVL	g(DX), AX	// save state in g->sched
   387		MOVL	0(SP), BX	// caller's PC
   388		MOVL	BX, (g_sched+gobuf_pc)(AX)
   389		LEAL	fn+0(FP), BX	// caller's SP
   390		MOVL	BX, (g_sched+gobuf_sp)(AX)
   391		MOVL	AX, (g_sched+gobuf_g)(AX)
   392	
   393		// switch to m->g0 & its stack, call fn
   394		MOVL	g(DX), BX
   395		MOVL	g_m(BX), BX
   396		MOVL	m_g0(BX), SI
   397		CMPL	SI, AX	// if g == m->g0 call badmcall
   398		JNE	3(PC)
   399		MOVL	$runtime·badmcall(SB), AX
   400		JMP	AX
   401		MOVL	SI, g(DX)	// g = m->g0
   402		MOVL	(g_sched+gobuf_sp)(SI), SP	// sp = m->g0->sched.sp
   403		PUSHL	AX
   404		MOVL	DI, DX
   405		MOVL	0(DI), DI
   406		CALL	DI
   407		POPL	AX
   408		MOVL	$runtime·badmcall2(SB), AX
   409		JMP	AX
   410		RET
   411	
   412	// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   413	// of the G stack. We need to distinguish the routine that
   414	// lives at the bottom of the G stack from the one that lives
   415	// at the top of the system stack because the one at the top of
   416	// the system stack terminates the stack walk (see topofstack()).
   417	TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   418		RET
   419	
   420	// func systemstack(fn func())
   421	TEXT runtime·systemstack(SB), NOSPLIT, $0-4
   422		MOVL	fn+0(FP), DI	// DI = fn
   423		get_tls(CX)
   424		MOVL	g(CX), AX	// AX = g
   425		MOVL	g_m(AX), BX	// BX = m
   426	
   427		MOVL	m_gsignal(BX), DX	// DX = gsignal
   428		CMPL	AX, DX
   429		JEQ	noswitch
   430	
   431		MOVL	m_g0(BX), DX	// DX = g0
   432		CMPL	AX, DX
   433		JEQ	noswitch
   434	
   435		MOVL	m_curg(BX), BP
   436		CMPL	AX, BP
   437		JEQ	switch
   438		
   439		// Bad: g is not gsignal, not g0, not curg. What is it?
   440		// Hide call from linker nosplit analysis.
   441		MOVL	$runtime·badsystemstack(SB), AX
   442		CALL	AX
   443	
   444	switch:
   445		// save our state in g->sched. Pretend to
   446		// be systemstack_switch if the G stack is scanned.
   447		MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
   448		MOVL	SP, (g_sched+gobuf_sp)(AX)
   449		MOVL	AX, (g_sched+gobuf_g)(AX)
   450	
   451		// switch to g0
   452		get_tls(CX)
   453		MOVL	DX, g(CX)
   454		MOVL	(g_sched+gobuf_sp)(DX), BX
   455		// make it look like mstart called systemstack on g0, to stop traceback
   456		SUBL	$4, BX
   457		MOVL	$runtime·mstart(SB), DX
   458		MOVL	DX, 0(BX)
   459		MOVL	BX, SP
   460	
   461		// call target function
   462		MOVL	DI, DX
   463		MOVL	0(DI), DI
   464		CALL	DI
   465	
   466		// switch back to g
   467		get_tls(CX)
   468		MOVL	g(CX), AX
   469		MOVL	g_m(AX), BX
   470		MOVL	m_curg(BX), AX
   471		MOVL	AX, g(CX)
   472		MOVL	(g_sched+gobuf_sp)(AX), SP
   473		MOVL	$0, (g_sched+gobuf_sp)(AX)
   474		RET
   475	
   476	noswitch:
   477		// already on system stack; tail call the function
   478		// Using a tail call here cleans up tracebacks since we won't stop
   479		// at an intermediate systemstack.
   480		MOVL	DI, DX
   481		MOVL	0(DI), DI
   482		JMP	DI
   483	
   484	/*
   485	 * support for morestack
   486	 */
   487	
   488	// Called during function prolog when more stack is needed.
   489	//
   490	// The traceback routines see morestack on a g0 as being
   491	// the top of a stack (for example, morestack calling newstack
   492	// calling the scheduler calling newm calling gc), so we must
   493	// record an argument size. For that purpose, it has no arguments.
   494	TEXT runtime·morestack(SB),NOSPLIT,$0-0
   495		// Cannot grow scheduler stack (m->g0).
   496		get_tls(CX)
   497		MOVL	g(CX), BX
   498		MOVL	g_m(BX), BX
   499		MOVL	m_g0(BX), SI
   500		CMPL	g(CX), SI
   501		JNE	3(PC)
   502		CALL	runtime·badmorestackg0(SB)
   503		INT	$3
   504	
   505		// Cannot grow signal stack.
   506		MOVL	m_gsignal(BX), SI
   507		CMPL	g(CX), SI
   508		JNE	3(PC)
   509		CALL	runtime·badmorestackgsignal(SB)
   510		INT	$3
   511	
   512		// Called from f.
   513		// Set m->morebuf to f's caller.
   514		MOVL	4(SP), DI	// f's caller's PC
   515		MOVL	DI, (m_morebuf+gobuf_pc)(BX)
   516		LEAL	8(SP), CX	// f's caller's SP
   517		MOVL	CX, (m_morebuf+gobuf_sp)(BX)
   518		get_tls(CX)
   519		MOVL	g(CX), SI
   520		MOVL	SI, (m_morebuf+gobuf_g)(BX)
   521	
   522		// Set g->sched to context in f.
   523		MOVL	0(SP), AX	// f's PC
   524		MOVL	AX, (g_sched+gobuf_pc)(SI)
   525		MOVL	SI, (g_sched+gobuf_g)(SI)
   526		LEAL	4(SP), AX	// f's SP
   527		MOVL	AX, (g_sched+gobuf_sp)(SI)
   528		MOVL	DX, (g_sched+gobuf_ctxt)(SI)
   529	
   530		// Call newstack on m->g0's stack.
   531		MOVL	m_g0(BX), BP
   532		MOVL	BP, g(CX)
   533		MOVL	(g_sched+gobuf_sp)(BP), AX
   534		MOVL	-4(AX), BX	// fault if CALL would, before smashing SP
   535		MOVL	AX, SP
   536		CALL	runtime·newstack(SB)
   537		MOVL	$0, 0x1003	// crash if newstack returns
   538		RET
   539	
   540	TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
   541		MOVL	$0, DX
   542		JMP runtime·morestack(SB)
   543	
   544	// reflectcall: call a function with the given argument list
   545	// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
   546	// we don't have variable-sized frames, so we use a small number
   547	// of constant-sized-frame functions to encode a few bits of size in the pc.
   548	// Caution: ugly multiline assembly macros in your future!
   549	
   550	#define DISPATCH(NAME,MAXSIZE)		\
   551		CMPL	CX, $MAXSIZE;		\
   552		JA	3(PC);			\
   553		MOVL	$NAME(SB), AX;		\
   554		JMP	AX
   555	// Note: can't just "JMP NAME(SB)" - bad inlining results.
   556	
   557	TEXT reflect·call(SB), NOSPLIT, $0-0
   558		JMP	·reflectcall(SB)
   559	
   560	TEXT ·reflectcall(SB), NOSPLIT, $0-20
   561		MOVL	argsize+12(FP), CX
   562		DISPATCH(runtime·call16, 16)
   563		DISPATCH(runtime·call32, 32)
   564		DISPATCH(runtime·call64, 64)
   565		DISPATCH(runtime·call128, 128)
   566		DISPATCH(runtime·call256, 256)
   567		DISPATCH(runtime·call512, 512)
   568		DISPATCH(runtime·call1024, 1024)
   569		DISPATCH(runtime·call2048, 2048)
   570		DISPATCH(runtime·call4096, 4096)
   571		DISPATCH(runtime·call8192, 8192)
   572		DISPATCH(runtime·call16384, 16384)
   573		DISPATCH(runtime·call32768, 32768)
   574		DISPATCH(runtime·call65536, 65536)
   575		DISPATCH(runtime·call131072, 131072)
   576		DISPATCH(runtime·call262144, 262144)
   577		DISPATCH(runtime·call524288, 524288)
   578		DISPATCH(runtime·call1048576, 1048576)
   579		DISPATCH(runtime·call2097152, 2097152)
   580		DISPATCH(runtime·call4194304, 4194304)
   581		DISPATCH(runtime·call8388608, 8388608)
   582		DISPATCH(runtime·call16777216, 16777216)
   583		DISPATCH(runtime·call33554432, 33554432)
   584		DISPATCH(runtime·call67108864, 67108864)
   585		DISPATCH(runtime·call134217728, 134217728)
   586		DISPATCH(runtime·call268435456, 268435456)
   587		DISPATCH(runtime·call536870912, 536870912)
   588		DISPATCH(runtime·call1073741824, 1073741824)
   589		MOVL	$runtime·badreflectcall(SB), AX
   590		JMP	AX
   591	
   592	#define CALLFN(NAME,MAXSIZE)			\
   593	TEXT NAME(SB), WRAPPER, $MAXSIZE-20;		\
   594		NO_LOCAL_POINTERS;			\
   595		/* copy arguments to stack */		\
   596		MOVL	argptr+8(FP), SI;		\
   597		MOVL	argsize+12(FP), CX;		\
   598		MOVL	SP, DI;				\
   599		REP;MOVSB;				\
   600		/* call function */			\
   601		MOVL	f+4(FP), DX;			\
   602		MOVL	(DX), AX; 			\
   603		PCDATA  $PCDATA_StackMapIndex, $0;	\
   604		CALL	AX;				\
   605		/* copy return values back */		\
   606		MOVL	argtype+0(FP), DX;		\
   607		MOVL	argptr+8(FP), DI;		\
   608		MOVL	argsize+12(FP), CX;		\
   609		MOVL	retoffset+16(FP), BX;		\
   610		MOVL	SP, SI;				\
   611		ADDL	BX, DI;				\
   612		ADDL	BX, SI;				\
   613		SUBL	BX, CX;				\
   614		CALL	callRet<>(SB);			\
   615		RET
   616	
   617	// callRet copies return values back at the end of call*. This is a
   618	// separate function so it can allocate stack space for the arguments
   619	// to reflectcallmove. It does not follow the Go ABI; it expects its
   620	// arguments in registers.
   621	TEXT callRet<>(SB), NOSPLIT, $16-0
   622		MOVL	DX, 0(SP)
   623		MOVL	DI, 4(SP)
   624		MOVL	SI, 8(SP)
   625		MOVL	CX, 12(SP)
   626		CALL	runtime·reflectcallmove(SB)
   627		RET
   628	
   629	CALLFN(·call16, 16)
   630	CALLFN(·call32, 32)
   631	CALLFN(·call64, 64)
   632	CALLFN(·call128, 128)
   633	CALLFN(·call256, 256)
   634	CALLFN(·call512, 512)
   635	CALLFN(·call1024, 1024)
   636	CALLFN(·call2048, 2048)
   637	CALLFN(·call4096, 4096)
   638	CALLFN(·call8192, 8192)
   639	CALLFN(·call16384, 16384)
   640	CALLFN(·call32768, 32768)
   641	CALLFN(·call65536, 65536)
   642	CALLFN(·call131072, 131072)
   643	CALLFN(·call262144, 262144)
   644	CALLFN(·call524288, 524288)
   645	CALLFN(·call1048576, 1048576)
   646	CALLFN(·call2097152, 2097152)
   647	CALLFN(·call4194304, 4194304)
   648	CALLFN(·call8388608, 8388608)
   649	CALLFN(·call16777216, 16777216)
   650	CALLFN(·call33554432, 33554432)
   651	CALLFN(·call67108864, 67108864)
   652	CALLFN(·call134217728, 134217728)
   653	CALLFN(·call268435456, 268435456)
   654	CALLFN(·call536870912, 536870912)
   655	CALLFN(·call1073741824, 1073741824)
   656	
   657	TEXT runtime·procyield(SB),NOSPLIT,$0-0
   658		MOVL	cycles+0(FP), AX
   659	again:
   660		PAUSE
   661		SUBL	$1, AX
   662		JNZ	again
   663		RET
   664	
   665	TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
   666		// Stores are already ordered on x86, so this is just a
   667		// compile barrier.
   668		RET
   669	
   670	// void jmpdefer(fn, sp);
   671	// called from deferreturn.
   672	// 1. pop the caller
   673	// 2. sub 5 bytes (the length of CALL & a 32 bit displacement) from the callers
   674	//    return (when building for shared libraries, subtract 16 bytes -- 5 bytes
   675	//    for CALL & displacement to call __x86.get_pc_thunk.cx, 6 bytes for the
   676	//    LEAL to load the offset into BX, and finally 5 for the call & displacement)
   677	// 3. jmp to the argument
   678	TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
   679		MOVL	fv+0(FP), DX	// fn
   680		MOVL	argp+4(FP), BX	// caller sp
   681		LEAL	-4(BX), SP	// caller sp after CALL
   682	#ifdef GOBUILDMODE_shared
   683		SUBL	$16, (SP)	// return to CALL again
   684	#else
   685		SUBL	$5, (SP)	// return to CALL again
   686	#endif
   687		MOVL	0(DX), BX
   688		JMP	BX	// but first run the deferred function
   689	
   690	// Save state of caller into g->sched.
   691	TEXT gosave<>(SB),NOSPLIT,$0
   692		PUSHL	AX
   693		PUSHL	BX
   694		get_tls(BX)
   695		MOVL	g(BX), BX
   696		LEAL	arg+0(FP), AX
   697		MOVL	AX, (g_sched+gobuf_sp)(BX)
   698		MOVL	-4(AX), AX
   699		MOVL	AX, (g_sched+gobuf_pc)(BX)
   700		MOVL	$0, (g_sched+gobuf_ret)(BX)
   701		// Assert ctxt is zero. See func save.
   702		MOVL	(g_sched+gobuf_ctxt)(BX), AX
   703		TESTL	AX, AX
   704		JZ	2(PC)
   705		CALL	runtime·badctxt(SB)
   706		POPL	BX
   707		POPL	AX
   708		RET
   709	
   710	// func asmcgocall(fn, arg unsafe.Pointer) int32
   711	// Call fn(arg) on the scheduler stack,
   712	// aligned appropriately for the gcc ABI.
   713	// See cgocall.go for more details.
   714	TEXT ·asmcgocall(SB),NOSPLIT,$0-12
   715		MOVL	fn+0(FP), AX
   716		MOVL	arg+4(FP), BX
   717	
   718		MOVL	SP, DX
   719	
   720		// Figure out if we need to switch to m->g0 stack.
   721		// We get called to create new OS threads too, and those
   722		// come in on the m->g0 stack already.
   723		get_tls(CX)
   724		MOVL	g(CX), BP
   725		MOVL	g_m(BP), BP
   726		MOVL	m_g0(BP), SI
   727		MOVL	g(CX), DI
   728		CMPL	SI, DI
   729		JEQ	noswitch
   730		CALL	gosave<>(SB)
   731		get_tls(CX)
   732		MOVL	SI, g(CX)
   733		MOVL	(g_sched+gobuf_sp)(SI), SP
   734	
   735	noswitch:
   736		// Now on a scheduling stack (a pthread-created stack).
   737		SUBL	$32, SP
   738		ANDL	$~15, SP	// alignment, perhaps unnecessary
   739		MOVL	DI, 8(SP)	// save g
   740		MOVL	(g_stack+stack_hi)(DI), DI
   741		SUBL	DX, DI
   742		MOVL	DI, 4(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   743		MOVL	BX, 0(SP)	// first argument in x86-32 ABI
   744		CALL	AX
   745	
   746		// Restore registers, g, stack pointer.
   747		get_tls(CX)
   748		MOVL	8(SP), DI
   749		MOVL	(g_stack+stack_hi)(DI), SI
   750		SUBL	4(SP), SI
   751		MOVL	DI, g(CX)
   752		MOVL	SI, SP
   753	
   754		MOVL	AX, ret+8(FP)
   755		RET
   756	
   757	// cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
   758	// Turn the fn into a Go func (by taking its address) and call
   759	// cgocallback_gofunc.
   760	TEXT runtime·cgocallback(SB),NOSPLIT,$16-16
   761		LEAL	fn+0(FP), AX
   762		MOVL	AX, 0(SP)
   763		MOVL	frame+4(FP), AX
   764		MOVL	AX, 4(SP)
   765		MOVL	framesize+8(FP), AX
   766		MOVL	AX, 8(SP)
   767		MOVL	ctxt+12(FP), AX
   768		MOVL	AX, 12(SP)
   769		MOVL	$runtime·cgocallback_gofunc(SB), AX
   770		CALL	AX
   771		RET
   772	
   773	// cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
   774	// See cgocall.go for more details.
   775	TEXT ·cgocallback_gofunc(SB),NOSPLIT,$12-16
   776		NO_LOCAL_POINTERS
   777	
   778		// If g is nil, Go did not create the current thread.
   779		// Call needm to obtain one for temporary use.
   780		// In this case, we're running on the thread stack, so there's
   781		// lots of space, but the linker doesn't know. Hide the call from
   782		// the linker analysis by using an indirect call through AX.
   783		get_tls(CX)
   784	#ifdef GOOS_windows
   785		MOVL	$0, BP
   786		CMPL	CX, $0
   787		JEQ	2(PC) // TODO
   788	#endif
   789		MOVL	g(CX), BP
   790		CMPL	BP, $0
   791		JEQ	needm
   792		MOVL	g_m(BP), BP
   793		MOVL	BP, DX // saved copy of oldm
   794		JMP	havem
   795	needm:
   796		MOVL	$0, 0(SP)
   797		MOVL	$runtime·needm(SB), AX
   798		CALL	AX
   799		MOVL	0(SP), DX
   800		get_tls(CX)
   801		MOVL	g(CX), BP
   802		MOVL	g_m(BP), BP
   803	
   804		// Set m->sched.sp = SP, so that if a panic happens
   805		// during the function we are about to execute, it will
   806		// have a valid SP to run on the g0 stack.
   807		// The next few lines (after the havem label)
   808		// will save this SP onto the stack and then write
   809		// the same SP back to m->sched.sp. That seems redundant,
   810		// but if an unrecovered panic happens, unwindm will
   811		// restore the g->sched.sp from the stack location
   812		// and then systemstack will try to use it. If we don't set it here,
   813		// that restored SP will be uninitialized (typically 0) and
   814		// will not be usable.
   815		MOVL	m_g0(BP), SI
   816		MOVL	SP, (g_sched+gobuf_sp)(SI)
   817	
   818	havem:
   819		// Now there's a valid m, and we're running on its m->g0.
   820		// Save current m->g0->sched.sp on stack and then set it to SP.
   821		// Save current sp in m->g0->sched.sp in preparation for
   822		// switch back to m->curg stack.
   823		// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
   824		MOVL	m_g0(BP), SI
   825		MOVL	(g_sched+gobuf_sp)(SI), AX
   826		MOVL	AX, 0(SP)
   827		MOVL	SP, (g_sched+gobuf_sp)(SI)
   828	
   829		// Switch to m->curg stack and call runtime.cgocallbackg.
   830		// Because we are taking over the execution of m->curg
   831		// but *not* resuming what had been running, we need to
   832		// save that information (m->curg->sched) so we can restore it.
   833		// We can restore m->curg->sched.sp easily, because calling
   834		// runtime.cgocallbackg leaves SP unchanged upon return.
   835		// To save m->curg->sched.pc, we push it onto the stack.
   836		// This has the added benefit that it looks to the traceback
   837		// routine like cgocallbackg is going to return to that
   838		// PC (because the frame we allocate below has the same
   839		// size as cgocallback_gofunc's frame declared above)
   840		// so that the traceback will seamlessly trace back into
   841		// the earlier calls.
   842		//
   843		// In the new goroutine, 4(SP) holds the saved oldm (DX) register.
   844		// 8(SP) is unused.
   845		MOVL	m_curg(BP), SI
   846		MOVL	SI, g(CX)
   847		MOVL	(g_sched+gobuf_sp)(SI), DI // prepare stack as DI
   848		MOVL	(g_sched+gobuf_pc)(SI), BP
   849		MOVL	BP, -4(DI)
   850		MOVL	ctxt+12(FP), CX
   851		LEAL	-(4+12)(DI), SP
   852		MOVL	DX, 4(SP)
   853		MOVL	CX, 0(SP)
   854		CALL	runtime·cgocallbackg(SB)
   855		MOVL	4(SP), DX
   856	
   857		// Restore g->sched (== m->curg->sched) from saved values.
   858		get_tls(CX)
   859		MOVL	g(CX), SI
   860		MOVL	12(SP), BP
   861		MOVL	BP, (g_sched+gobuf_pc)(SI)
   862		LEAL	(12+4)(SP), DI
   863		MOVL	DI, (g_sched+gobuf_sp)(SI)
   864	
   865		// Switch back to m->g0's stack and restore m->g0->sched.sp.
   866		// (Unlike m->curg, the g0 goroutine never uses sched.pc,
   867		// so we do not have to restore it.)
   868		MOVL	g(CX), BP
   869		MOVL	g_m(BP), BP
   870		MOVL	m_g0(BP), SI
   871		MOVL	SI, g(CX)
   872		MOVL	(g_sched+gobuf_sp)(SI), SP
   873		MOVL	0(SP), AX
   874		MOVL	AX, (g_sched+gobuf_sp)(SI)
   875		
   876		// If the m on entry was nil, we called needm above to borrow an m
   877		// for the duration of the call. Since the call is over, return it with dropm.
   878		CMPL	DX, $0
   879		JNE 3(PC)
   880		MOVL	$runtime·dropm(SB), AX
   881		CALL	AX
   882	
   883		// Done!
   884		RET
   885	
   886	// void setg(G*); set g. for use by needm.
   887	TEXT runtime·setg(SB), NOSPLIT, $0-4
   888		MOVL	gg+0(FP), BX
   889	#ifdef GOOS_windows
   890		CMPL	BX, $0
   891		JNE	settls
   892		MOVL	$0, 0x14(FS)
   893		RET
   894	settls:
   895		MOVL	g_m(BX), AX
   896		LEAL	m_tls(AX), AX
   897		MOVL	AX, 0x14(FS)
   898	#endif
   899		get_tls(CX)
   900		MOVL	BX, g(CX)
   901		RET
   902	
   903	// void setg_gcc(G*); set g. for use by gcc
   904	TEXT setg_gcc<>(SB), NOSPLIT, $0
   905		get_tls(AX)
   906		MOVL	gg+0(FP), DX
   907		MOVL	DX, g(AX)
   908		RET
   909	
   910	// check that SP is in range [g->stack.lo, g->stack.hi)
   911	TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
   912		get_tls(CX)
   913		MOVL	g(CX), AX
   914		CMPL	(g_stack+stack_hi)(AX), SP
   915		JHI	2(PC)
   916		INT	$3
   917		CMPL	SP, (g_stack+stack_lo)(AX)
   918		JHI	2(PC)
   919		INT	$3
   920		RET
   921	
   922	// func cputicks() int64
   923	TEXT runtime·cputicks(SB),NOSPLIT,$0-8
   924		CMPB	runtime·support_sse2(SB), $1
   925		JNE	done
   926		CMPB	runtime·lfenceBeforeRdtsc(SB), $1
   927		JNE	mfence
   928		BYTE	$0x0f; BYTE $0xae; BYTE $0xe8 // LFENCE
   929		JMP	done
   930	mfence:
   931		BYTE	$0x0f; BYTE $0xae; BYTE $0xf0 // MFENCE
   932	done:
   933		RDTSC
   934		MOVL	AX, ret_lo+0(FP)
   935		MOVL	DX, ret_hi+4(FP)
   936		RET
   937	
   938	TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0
   939		// set up ldt 7 to point at m0.tls
   940		// ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
   941		// the entry number is just a hint.  setldt will set up GS with what it used.
   942		MOVL	$7, 0(SP)
   943		LEAL	runtime·m0+m_tls(SB), AX
   944		MOVL	AX, 4(SP)
   945		MOVL	$32, 8(SP)	// sizeof(tls array)
   946		CALL	runtime·setldt(SB)
   947		RET
   948	
   949	TEXT runtime·emptyfunc(SB),0,$0-0
   950		RET
   951	
   952	// hash function using AES hardware instructions
   953	TEXT runtime·aeshash(SB),NOSPLIT,$0-16
   954		MOVL	p+0(FP), AX	// ptr to data
   955		MOVL	s+8(FP), BX	// size
   956		LEAL	ret+12(FP), DX
   957		JMP	runtime·aeshashbody(SB)
   958	
   959	TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
   960		MOVL	p+0(FP), AX	// ptr to string object
   961		MOVL	4(AX), BX	// length of string
   962		MOVL	(AX), AX	// string data
   963		LEAL	ret+8(FP), DX
   964		JMP	runtime·aeshashbody(SB)
   965	
   966	// AX: data
   967	// BX: length
   968	// DX: address to put return value
   969	TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
   970		MOVL	h+4(FP), X0	            // 32 bits of per-table hash seed
   971		PINSRW	$4, BX, X0	            // 16 bits of length
   972		PSHUFHW	$0, X0, X0	            // replace size with its low 2 bytes repeated 4 times
   973		MOVO	X0, X1                      // save unscrambled seed
   974		PXOR	runtime·aeskeysched(SB), X0 // xor in per-process seed
   975		AESENC	X0, X0                      // scramble seed
   976	
   977		CMPL	BX, $16
   978		JB	aes0to15
   979		JE	aes16
   980		CMPL	BX, $32
   981		JBE	aes17to32
   982		CMPL	BX, $64
   983		JBE	aes33to64
   984		JMP	aes65plus
   985		
   986	aes0to15:
   987		TESTL	BX, BX
   988		JE	aes0
   989	
   990		ADDL	$16, AX
   991		TESTW	$0xff0, AX
   992		JE	endofpage
   993	
   994		// 16 bytes loaded at this address won't cross
   995		// a page boundary, so we can load it directly.
   996		MOVOU	-16(AX), X1
   997		ADDL	BX, BX
   998		PAND	masks<>(SB)(BX*8), X1
   999	
  1000	final1:	
  1001		AESENC	X0, X1  // scramble input, xor in seed
  1002		AESENC	X1, X1  // scramble combo 2 times
  1003		AESENC	X1, X1
  1004		MOVL	X1, (DX)
  1005		RET
  1006	
  1007	endofpage:
  1008		// address ends in 1111xxxx. Might be up against
  1009		// a page boundary, so load ending at last byte.
  1010		// Then shift bytes down using pshufb.
  1011		MOVOU	-32(AX)(BX*1), X1
  1012		ADDL	BX, BX
  1013		PSHUFB	shifts<>(SB)(BX*8), X1
  1014		JMP	final1
  1015	
  1016	aes0:
  1017		// Return scrambled input seed
  1018		AESENC	X0, X0
  1019		MOVL	X0, (DX)
  1020		RET
  1021	
  1022	aes16:
  1023		MOVOU	(AX), X1
  1024		JMP	final1
  1025	
  1026	aes17to32:
  1027		// make second starting seed
  1028		PXOR	runtime·aeskeysched+16(SB), X1
  1029		AESENC	X1, X1
  1030		
  1031		// load data to be hashed
  1032		MOVOU	(AX), X2
  1033		MOVOU	-16(AX)(BX*1), X3
  1034	
  1035		// scramble 3 times
  1036		AESENC	X0, X2
  1037		AESENC	X1, X3
  1038		AESENC	X2, X2
  1039		AESENC	X3, X3
  1040		AESENC	X2, X2
  1041		AESENC	X3, X3
  1042	
  1043		// combine results
  1044		PXOR	X3, X2
  1045		MOVL	X2, (DX)
  1046		RET
  1047	
  1048	aes33to64:
  1049		// make 3 more starting seeds
  1050		MOVO	X1, X2
  1051		MOVO	X1, X3
  1052		PXOR	runtime·aeskeysched+16(SB), X1
  1053		PXOR	runtime·aeskeysched+32(SB), X2
  1054		PXOR	runtime·aeskeysched+48(SB), X3
  1055		AESENC	X1, X1
  1056		AESENC	X2, X2
  1057		AESENC	X3, X3
  1058		
  1059		MOVOU	(AX), X4
  1060		MOVOU	16(AX), X5
  1061		MOVOU	-32(AX)(BX*1), X6
  1062		MOVOU	-16(AX)(BX*1), X7
  1063		
  1064		AESENC	X0, X4
  1065		AESENC	X1, X5
  1066		AESENC	X2, X6
  1067		AESENC	X3, X7
  1068		
  1069		AESENC	X4, X4
  1070		AESENC	X5, X5
  1071		AESENC	X6, X6
  1072		AESENC	X7, X7
  1073		
  1074		AESENC	X4, X4
  1075		AESENC	X5, X5
  1076		AESENC	X6, X6
  1077		AESENC	X7, X7
  1078	
  1079		PXOR	X6, X4
  1080		PXOR	X7, X5
  1081		PXOR	X5, X4
  1082		MOVL	X4, (DX)
  1083		RET
  1084	
  1085	aes65plus:
  1086		// make 3 more starting seeds
  1087		MOVO	X1, X2
  1088		MOVO	X1, X3
  1089		PXOR	runtime·aeskeysched+16(SB), X1
  1090		PXOR	runtime·aeskeysched+32(SB), X2
  1091		PXOR	runtime·aeskeysched+48(SB), X3
  1092		AESENC	X1, X1
  1093		AESENC	X2, X2
  1094		AESENC	X3, X3
  1095		
  1096		// start with last (possibly overlapping) block
  1097		MOVOU	-64(AX)(BX*1), X4
  1098		MOVOU	-48(AX)(BX*1), X5
  1099		MOVOU	-32(AX)(BX*1), X6
  1100		MOVOU	-16(AX)(BX*1), X7
  1101	
  1102		// scramble state once
  1103		AESENC	X0, X4
  1104		AESENC	X1, X5
  1105		AESENC	X2, X6
  1106		AESENC	X3, X7
  1107	
  1108		// compute number of remaining 64-byte blocks
  1109		DECL	BX
  1110		SHRL	$6, BX
  1111		
  1112	aesloop:
  1113		// scramble state, xor in a block
  1114		MOVOU	(AX), X0
  1115		MOVOU	16(AX), X1
  1116		MOVOU	32(AX), X2
  1117		MOVOU	48(AX), X3
  1118		AESENC	X0, X4
  1119		AESENC	X1, X5
  1120		AESENC	X2, X6
  1121		AESENC	X3, X7
  1122	
  1123		// scramble state
  1124		AESENC	X4, X4
  1125		AESENC	X5, X5
  1126		AESENC	X6, X6
  1127		AESENC	X7, X7
  1128	
  1129		ADDL	$64, AX
  1130		DECL	BX
  1131		JNE	aesloop
  1132	
  1133		// 2 more scrambles to finish
  1134		AESENC	X4, X4
  1135		AESENC	X5, X5
  1136		AESENC	X6, X6
  1137		AESENC	X7, X7
  1138		
  1139		AESENC	X4, X4
  1140		AESENC	X5, X5
  1141		AESENC	X6, X6
  1142		AESENC	X7, X7
  1143	
  1144		PXOR	X6, X4
  1145		PXOR	X7, X5
  1146		PXOR	X5, X4
  1147		MOVL	X4, (DX)
  1148		RET
  1149	
  1150	TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
  1151		MOVL	p+0(FP), AX	// ptr to data
  1152		MOVL	h+4(FP), X0	// seed
  1153		PINSRD	$1, (AX), X0	// data
  1154		AESENC	runtime·aeskeysched+0(SB), X0
  1155		AESENC	runtime·aeskeysched+16(SB), X0
  1156		AESENC	runtime·aeskeysched+32(SB), X0
  1157		MOVL	X0, ret+8(FP)
  1158		RET
  1159	
  1160	TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
  1161		MOVL	p+0(FP), AX	// ptr to data
  1162		MOVQ	(AX), X0	// data
  1163		PINSRD	$2, h+4(FP), X0	// seed
  1164		AESENC	runtime·aeskeysched+0(SB), X0
  1165		AESENC	runtime·aeskeysched+16(SB), X0
  1166		AESENC	runtime·aeskeysched+32(SB), X0
  1167		MOVL	X0, ret+8(FP)
  1168		RET
  1169	
  1170	// simple mask to get rid of data in the high part of the register.
  1171	DATA masks<>+0x00(SB)/4, $0x00000000
  1172	DATA masks<>+0x04(SB)/4, $0x00000000
  1173	DATA masks<>+0x08(SB)/4, $0x00000000
  1174	DATA masks<>+0x0c(SB)/4, $0x00000000
  1175		
  1176	DATA masks<>+0x10(SB)/4, $0x000000ff
  1177	DATA masks<>+0x14(SB)/4, $0x00000000
  1178	DATA masks<>+0x18(SB)/4, $0x00000000
  1179	DATA masks<>+0x1c(SB)/4, $0x00000000
  1180		
  1181	DATA masks<>+0x20(SB)/4, $0x0000ffff
  1182	DATA masks<>+0x24(SB)/4, $0x00000000
  1183	DATA masks<>+0x28(SB)/4, $0x00000000
  1184	DATA masks<>+0x2c(SB)/4, $0x00000000
  1185		
  1186	DATA masks<>+0x30(SB)/4, $0x00ffffff
  1187	DATA masks<>+0x34(SB)/4, $0x00000000
  1188	DATA masks<>+0x38(SB)/4, $0x00000000
  1189	DATA masks<>+0x3c(SB)/4, $0x00000000
  1190		
  1191	DATA masks<>+0x40(SB)/4, $0xffffffff
  1192	DATA masks<>+0x44(SB)/4, $0x00000000
  1193	DATA masks<>+0x48(SB)/4, $0x00000000
  1194	DATA masks<>+0x4c(SB)/4, $0x00000000
  1195		
  1196	DATA masks<>+0x50(SB)/4, $0xffffffff
  1197	DATA masks<>+0x54(SB)/4, $0x000000ff
  1198	DATA masks<>+0x58(SB)/4, $0x00000000
  1199	DATA masks<>+0x5c(SB)/4, $0x00000000
  1200		
  1201	DATA masks<>+0x60(SB)/4, $0xffffffff
  1202	DATA masks<>+0x64(SB)/4, $0x0000ffff
  1203	DATA masks<>+0x68(SB)/4, $0x00000000
  1204	DATA masks<>+0x6c(SB)/4, $0x00000000
  1205		
  1206	DATA masks<>+0x70(SB)/4, $0xffffffff
  1207	DATA masks<>+0x74(SB)/4, $0x00ffffff
  1208	DATA masks<>+0x78(SB)/4, $0x00000000
  1209	DATA masks<>+0x7c(SB)/4, $0x00000000
  1210		
  1211	DATA masks<>+0x80(SB)/4, $0xffffffff
  1212	DATA masks<>+0x84(SB)/4, $0xffffffff
  1213	DATA masks<>+0x88(SB)/4, $0x00000000
  1214	DATA masks<>+0x8c(SB)/4, $0x00000000
  1215		
  1216	DATA masks<>+0x90(SB)/4, $0xffffffff
  1217	DATA masks<>+0x94(SB)/4, $0xffffffff
  1218	DATA masks<>+0x98(SB)/4, $0x000000ff
  1219	DATA masks<>+0x9c(SB)/4, $0x00000000
  1220		
  1221	DATA masks<>+0xa0(SB)/4, $0xffffffff
  1222	DATA masks<>+0xa4(SB)/4, $0xffffffff
  1223	DATA masks<>+0xa8(SB)/4, $0x0000ffff
  1224	DATA masks<>+0xac(SB)/4, $0x00000000
  1225		
  1226	DATA masks<>+0xb0(SB)/4, $0xffffffff
  1227	DATA masks<>+0xb4(SB)/4, $0xffffffff
  1228	DATA masks<>+0xb8(SB)/4, $0x00ffffff
  1229	DATA masks<>+0xbc(SB)/4, $0x00000000
  1230		
  1231	DATA masks<>+0xc0(SB)/4, $0xffffffff
  1232	DATA masks<>+0xc4(SB)/4, $0xffffffff
  1233	DATA masks<>+0xc8(SB)/4, $0xffffffff
  1234	DATA masks<>+0xcc(SB)/4, $0x00000000
  1235		
  1236	DATA masks<>+0xd0(SB)/4, $0xffffffff
  1237	DATA masks<>+0xd4(SB)/4, $0xffffffff
  1238	DATA masks<>+0xd8(SB)/4, $0xffffffff
  1239	DATA masks<>+0xdc(SB)/4, $0x000000ff
  1240		
  1241	DATA masks<>+0xe0(SB)/4, $0xffffffff
  1242	DATA masks<>+0xe4(SB)/4, $0xffffffff
  1243	DATA masks<>+0xe8(SB)/4, $0xffffffff
  1244	DATA masks<>+0xec(SB)/4, $0x0000ffff
  1245		
  1246	DATA masks<>+0xf0(SB)/4, $0xffffffff
  1247	DATA masks<>+0xf4(SB)/4, $0xffffffff
  1248	DATA masks<>+0xf8(SB)/4, $0xffffffff
  1249	DATA masks<>+0xfc(SB)/4, $0x00ffffff
  1250	
  1251	GLOBL masks<>(SB),RODATA,$256
  1252	
  1253	// these are arguments to pshufb. They move data down from
  1254	// the high bytes of the register to the low bytes of the register.
  1255	// index is how many bytes to move.
  1256	DATA shifts<>+0x00(SB)/4, $0x00000000
  1257	DATA shifts<>+0x04(SB)/4, $0x00000000
  1258	DATA shifts<>+0x08(SB)/4, $0x00000000
  1259	DATA shifts<>+0x0c(SB)/4, $0x00000000
  1260		
  1261	DATA shifts<>+0x10(SB)/4, $0xffffff0f
  1262	DATA shifts<>+0x14(SB)/4, $0xffffffff
  1263	DATA shifts<>+0x18(SB)/4, $0xffffffff
  1264	DATA shifts<>+0x1c(SB)/4, $0xffffffff
  1265		
  1266	DATA shifts<>+0x20(SB)/4, $0xffff0f0e
  1267	DATA shifts<>+0x24(SB)/4, $0xffffffff
  1268	DATA shifts<>+0x28(SB)/4, $0xffffffff
  1269	DATA shifts<>+0x2c(SB)/4, $0xffffffff
  1270		
  1271	DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
  1272	DATA shifts<>+0x34(SB)/4, $0xffffffff
  1273	DATA shifts<>+0x38(SB)/4, $0xffffffff
  1274	DATA shifts<>+0x3c(SB)/4, $0xffffffff
  1275		
  1276	DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
  1277	DATA shifts<>+0x44(SB)/4, $0xffffffff
  1278	DATA shifts<>+0x48(SB)/4, $0xffffffff
  1279	DATA shifts<>+0x4c(SB)/4, $0xffffffff
  1280		
  1281	DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
  1282	DATA shifts<>+0x54(SB)/4, $0xffffff0f
  1283	DATA shifts<>+0x58(SB)/4, $0xffffffff
  1284	DATA shifts<>+0x5c(SB)/4, $0xffffffff
  1285		
  1286	DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
  1287	DATA shifts<>+0x64(SB)/4, $0xffff0f0e
  1288	DATA shifts<>+0x68(SB)/4, $0xffffffff
  1289	DATA shifts<>+0x6c(SB)/4, $0xffffffff
  1290		
  1291	DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
  1292	DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
  1293	DATA shifts<>+0x78(SB)/4, $0xffffffff
  1294	DATA shifts<>+0x7c(SB)/4, $0xffffffff
  1295		
  1296	DATA shifts<>+0x80(SB)/4, $0x0b0a0908
  1297	DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
  1298	DATA shifts<>+0x88(SB)/4, $0xffffffff
  1299	DATA shifts<>+0x8c(SB)/4, $0xffffffff
  1300		
  1301	DATA shifts<>+0x90(SB)/4, $0x0a090807
  1302	DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
  1303	DATA shifts<>+0x98(SB)/4, $0xffffff0f
  1304	DATA shifts<>+0x9c(SB)/4, $0xffffffff
  1305		
  1306	DATA shifts<>+0xa0(SB)/4, $0x09080706
  1307	DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
  1308	DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
  1309	DATA shifts<>+0xac(SB)/4, $0xffffffff
  1310		
  1311	DATA shifts<>+0xb0(SB)/4, $0x08070605
  1312	DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
  1313	DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
  1314	DATA shifts<>+0xbc(SB)/4, $0xffffffff
  1315		
  1316	DATA shifts<>+0xc0(SB)/4, $0x07060504
  1317	DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
  1318	DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
  1319	DATA shifts<>+0xcc(SB)/4, $0xffffffff
  1320		
  1321	DATA shifts<>+0xd0(SB)/4, $0x06050403
  1322	DATA shifts<>+0xd4(SB)/4, $0x0a090807
  1323	DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
  1324	DATA shifts<>+0xdc(SB)/4, $0xffffff0f
  1325		
  1326	DATA shifts<>+0xe0(SB)/4, $0x05040302
  1327	DATA shifts<>+0xe4(SB)/4, $0x09080706
  1328	DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
  1329	DATA shifts<>+0xec(SB)/4, $0xffff0f0e
  1330		
  1331	DATA shifts<>+0xf0(SB)/4, $0x04030201
  1332	DATA shifts<>+0xf4(SB)/4, $0x08070605
  1333	DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
  1334	DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
  1335	
  1336	GLOBL shifts<>(SB),RODATA,$256
  1337	
  1338	TEXT ·checkASM(SB),NOSPLIT,$0-1
  1339		// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1340		MOVL	$masks<>(SB), AX
  1341		MOVL	$shifts<>(SB), BX
  1342		ORL	BX, AX
  1343		TESTL	$15, AX
  1344		SETEQ	ret+0(FP)
  1345		RET
  1346	
  1347	// memequal(p, q unsafe.Pointer, size uintptr) bool
  1348	TEXT runtime·memequal(SB),NOSPLIT,$0-13
  1349		MOVL	a+0(FP), SI
  1350		MOVL	b+4(FP), DI
  1351		CMPL	SI, DI
  1352		JEQ	eq
  1353		MOVL	size+8(FP), BX
  1354		LEAL	ret+12(FP), AX
  1355		JMP	runtime·memeqbody(SB)
  1356	eq:
  1357		MOVB    $1, ret+12(FP)
  1358		RET
  1359	
  1360	// memequal_varlen(a, b unsafe.Pointer) bool
  1361	TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
  1362		MOVL    a+0(FP), SI
  1363		MOVL    b+4(FP), DI
  1364		CMPL    SI, DI
  1365		JEQ     eq
  1366		MOVL    4(DX), BX    // compiler stores size at offset 4 in the closure
  1367		LEAL	ret+8(FP), AX
  1368		JMP	runtime·memeqbody(SB)
  1369	eq:
  1370		MOVB    $1, ret+8(FP)
  1371		RET
  1372	
  1373	TEXT bytes·Equal(SB),NOSPLIT,$0-25
  1374		MOVL	a_len+4(FP), BX
  1375		MOVL	b_len+16(FP), CX
  1376		CMPL	BX, CX
  1377		JNE	eqret
  1378		MOVL	a+0(FP), SI
  1379		MOVL	b+12(FP), DI
  1380		LEAL	ret+24(FP), AX
  1381		JMP	runtime·memeqbody(SB)
  1382	eqret:
  1383		MOVB	$0, ret+24(FP)
  1384		RET
  1385	
  1386	// a in SI
  1387	// b in DI
  1388	// count in BX
  1389	// address of result byte in AX
  1390	TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
  1391		CMPL	BX, $4
  1392		JB	small
  1393	
  1394		// 64 bytes at a time using xmm registers
  1395	hugeloop:
  1396		CMPL	BX, $64
  1397		JB	bigloop
  1398		CMPB	runtime·support_sse2(SB), $1
  1399		JNE	bigloop
  1400		MOVOU	(SI), X0
  1401		MOVOU	(DI), X1
  1402		MOVOU	16(SI), X2
  1403		MOVOU	16(DI), X3
  1404		MOVOU	32(SI), X4
  1405		MOVOU	32(DI), X5
  1406		MOVOU	48(SI), X6
  1407		MOVOU	48(DI), X7
  1408		PCMPEQB	X1, X0
  1409		PCMPEQB	X3, X2
  1410		PCMPEQB	X5, X4
  1411		PCMPEQB	X7, X6
  1412		PAND	X2, X0
  1413		PAND	X6, X4
  1414		PAND	X4, X0
  1415		PMOVMSKB X0, DX
  1416		ADDL	$64, SI
  1417		ADDL	$64, DI
  1418		SUBL	$64, BX
  1419		CMPL	DX, $0xffff
  1420		JEQ	hugeloop
  1421		MOVB	$0, (AX)
  1422		RET
  1423	
  1424		// 4 bytes at a time using 32-bit register
  1425	bigloop:
  1426		CMPL	BX, $4
  1427		JBE	leftover
  1428		MOVL	(SI), CX
  1429		MOVL	(DI), DX
  1430		ADDL	$4, SI
  1431		ADDL	$4, DI
  1432		SUBL	$4, BX
  1433		CMPL	CX, DX
  1434		JEQ	bigloop
  1435		MOVB	$0, (AX)
  1436		RET
  1437	
  1438		// remaining 0-4 bytes
  1439	leftover:
  1440		MOVL	-4(SI)(BX*1), CX
  1441		MOVL	-4(DI)(BX*1), DX
  1442		CMPL	CX, DX
  1443		SETEQ	(AX)
  1444		RET
  1445	
  1446	small:
  1447		CMPL	BX, $0
  1448		JEQ	equal
  1449	
  1450		LEAL	0(BX*8), CX
  1451		NEGL	CX
  1452	
  1453		MOVL	SI, DX
  1454		CMPB	DX, $0xfc
  1455		JA	si_high
  1456	
  1457		// load at SI won't cross a page boundary.
  1458		MOVL	(SI), SI
  1459		JMP	si_finish
  1460	si_high:
  1461		// address ends in 111111xx. Load up to bytes we want, move to correct position.
  1462		MOVL	-4(SI)(BX*1), SI
  1463		SHRL	CX, SI
  1464	si_finish:
  1465	
  1466		// same for DI.
  1467		MOVL	DI, DX
  1468		CMPB	DX, $0xfc
  1469		JA	di_high
  1470		MOVL	(DI), DI
  1471		JMP	di_finish
  1472	di_high:
  1473		MOVL	-4(DI)(BX*1), DI
  1474		SHRL	CX, DI
  1475	di_finish:
  1476	
  1477		SUBL	SI, DI
  1478		SHLL	CX, DI
  1479	equal:
  1480		SETEQ	(AX)
  1481		RET
  1482	
  1483	TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
  1484		MOVL	s1_base+0(FP), SI
  1485		MOVL	s1_len+4(FP), BX
  1486		MOVL	s2_base+8(FP), DI
  1487		MOVL	s2_len+12(FP), DX
  1488		LEAL	ret+16(FP), AX
  1489		JMP	runtime·cmpbody(SB)
  1490	
  1491	TEXT bytes·Compare(SB),NOSPLIT,$0-28
  1492		MOVL	s1+0(FP), SI
  1493		MOVL	s1+4(FP), BX
  1494		MOVL	s2+12(FP), DI
  1495		MOVL	s2+16(FP), DX
  1496		LEAL	ret+24(FP), AX
  1497		JMP	runtime·cmpbody(SB)
  1498	
  1499	TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
  1500		MOVL	s+0(FP), SI
  1501		MOVL	s_len+4(FP), CX
  1502		MOVB	c+12(FP), AL
  1503		MOVL	SI, DI
  1504		CLD; REPN; SCASB
  1505		JZ 3(PC)
  1506		MOVL	$-1, ret+16(FP)
  1507		RET
  1508		SUBL	SI, DI
  1509		SUBL	$1, DI
  1510		MOVL	DI, ret+16(FP)
  1511		RET
  1512	
  1513	TEXT strings·IndexByte(SB),NOSPLIT,$0-16
  1514		MOVL	s+0(FP), SI
  1515		MOVL	s_len+4(FP), CX
  1516		MOVB	c+8(FP), AL
  1517		MOVL	SI, DI
  1518		CLD; REPN; SCASB
  1519		JZ 3(PC)
  1520		MOVL	$-1, ret+12(FP)
  1521		RET
  1522		SUBL	SI, DI
  1523		SUBL	$1, DI
  1524		MOVL	DI, ret+12(FP)
  1525		RET
  1526	
  1527	// input:
  1528	//   SI = a
  1529	//   DI = b
  1530	//   BX = alen
  1531	//   DX = blen
  1532	//   AX = address of return word (set to 1/0/-1)
  1533	TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
  1534		MOVL	DX, BP
  1535		SUBL	BX, DX // DX = blen-alen
  1536		JLE	2(PC)
  1537		MOVL	BX, BP // BP = min(alen, blen)
  1538		CMPL	SI, DI
  1539		JEQ	allsame
  1540		CMPL	BP, $4
  1541		JB	small
  1542		CMPB	runtime·support_sse2(SB), $1
  1543		JNE	mediumloop
  1544	largeloop:
  1545		CMPL	BP, $16
  1546		JB	mediumloop
  1547		MOVOU	(SI), X0
  1548		MOVOU	(DI), X1
  1549		PCMPEQB X0, X1
  1550		PMOVMSKB X1, BX
  1551		XORL	$0xffff, BX	// convert EQ to NE
  1552		JNE	diff16	// branch if at least one byte is not equal
  1553		ADDL	$16, SI
  1554		ADDL	$16, DI
  1555		SUBL	$16, BP
  1556		JMP	largeloop
  1557	
  1558	diff16:
  1559		BSFL	BX, BX	// index of first byte that differs
  1560		XORL	DX, DX
  1561		MOVB	(SI)(BX*1), CX
  1562		CMPB	CX, (DI)(BX*1)
  1563		SETHI	DX
  1564		LEAL	-1(DX*2), DX	// convert 1/0 to +1/-1
  1565		MOVL	DX, (AX)
  1566		RET
  1567	
  1568	mediumloop:
  1569		CMPL	BP, $4
  1570		JBE	_0through4
  1571		MOVL	(SI), BX
  1572		MOVL	(DI), CX
  1573		CMPL	BX, CX
  1574		JNE	diff4
  1575		ADDL	$4, SI
  1576		ADDL	$4, DI
  1577		SUBL	$4, BP
  1578		JMP	mediumloop
  1579	
  1580	_0through4:
  1581		MOVL	-4(SI)(BP*1), BX
  1582		MOVL	-4(DI)(BP*1), CX
  1583		CMPL	BX, CX
  1584		JEQ	allsame
  1585	
  1586	diff4:
  1587		BSWAPL	BX	// reverse order of bytes
  1588		BSWAPL	CX
  1589		XORL	BX, CX	// find bit differences
  1590		BSRL	CX, CX	// index of highest bit difference
  1591		SHRL	CX, BX	// move a's bit to bottom
  1592		ANDL	$1, BX	// mask bit
  1593		LEAL	-1(BX*2), BX // 1/0 => +1/-1
  1594		MOVL	BX, (AX)
  1595		RET
  1596	
  1597		// 0-3 bytes in common
  1598	small:
  1599		LEAL	(BP*8), CX
  1600		NEGL	CX
  1601		JEQ	allsame
  1602	
  1603		// load si
  1604		CMPB	SI, $0xfc
  1605		JA	si_high
  1606		MOVL	(SI), SI
  1607		JMP	si_finish
  1608	si_high:
  1609		MOVL	-4(SI)(BP*1), SI
  1610		SHRL	CX, SI
  1611	si_finish:
  1612		SHLL	CX, SI
  1613	
  1614		// same for di
  1615		CMPB	DI, $0xfc
  1616		JA	di_high
  1617		MOVL	(DI), DI
  1618		JMP	di_finish
  1619	di_high:
  1620		MOVL	-4(DI)(BP*1), DI
  1621		SHRL	CX, DI
  1622	di_finish:
  1623		SHLL	CX, DI
  1624	
  1625		BSWAPL	SI	// reverse order of bytes
  1626		BSWAPL	DI
  1627		XORL	SI, DI	// find bit differences
  1628		JEQ	allsame
  1629		BSRL	DI, CX	// index of highest bit difference
  1630		SHRL	CX, SI	// move a's bit to bottom
  1631		ANDL	$1, SI	// mask bit
  1632		LEAL	-1(SI*2), BX // 1/0 => +1/-1
  1633		MOVL	BX, (AX)
  1634		RET
  1635	
  1636		// all the bytes in common are the same, so we just need
  1637		// to compare the lengths.
  1638	allsame:
  1639		XORL	BX, BX
  1640		XORL	CX, CX
  1641		TESTL	DX, DX
  1642		SETLT	BX	// 1 if alen > blen
  1643		SETEQ	CX	// 1 if alen == blen
  1644		LEAL	-1(CX)(BX*2), BX	// 1,0,-1 result
  1645		MOVL	BX, (AX)
  1646		RET
  1647	
  1648	TEXT runtime·return0(SB), NOSPLIT, $0
  1649		MOVL	$0, AX
  1650		RET
  1651	
  1652	// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1653	// Must obey the gcc calling convention.
  1654	TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1655		get_tls(CX)
  1656		MOVL	g(CX), AX
  1657		MOVL	g_m(AX), AX
  1658		MOVL	m_curg(AX), AX
  1659		MOVL	(g_stack+stack_hi)(AX), AX
  1660		RET
  1661	
  1662	// The top-most function running on a goroutine
  1663	// returns to goexit+PCQuantum.
  1664	TEXT runtime·goexit(SB),NOSPLIT,$0-0
  1665		BYTE	$0x90	// NOP
  1666		CALL	runtime·goexit1(SB)	// does not return
  1667		// traceback from goexit1 must hit code range of goexit
  1668		BYTE	$0x90	// NOP
  1669	
  1670	// Add a module's moduledata to the linked list of moduledata objects. This
  1671	// is called from .init_array by a function generated in the linker and so
  1672	// follows the platform ABI wrt register preservation -- it only touches AX,
  1673	// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
  1674	// instead the pointer to the moduledata is passed in AX.
  1675	TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1676	       MOVL    runtime·lastmoduledatap(SB), DX
  1677	       MOVL    AX, moduledata_next(DX)
  1678	       MOVL    AX, runtime·lastmoduledatap(SB)
  1679	       RET
  1680	
  1681	TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
  1682		MOVL	a+0(FP), AX
  1683		MOVL	AX, 0(SP)
  1684		MOVL	$0, 4(SP)
  1685		FMOVV	0(SP), F0
  1686		FMOVDP	F0, ret+4(FP)
  1687		RET
  1688	
  1689	TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
  1690		FMOVD	a+0(FP), F0
  1691		FSTCW	0(SP)
  1692		FLDCW	runtime·controlWord64trunc(SB)
  1693		FMOVVP	F0, 4(SP)
  1694		FLDCW	0(SP)
  1695		MOVL	4(SP), AX
  1696		MOVL	AX, ret+8(FP)
  1697		RET

View as plain text