Text file
src/runtime/asm_amd64.s
Documentation: runtime
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "go_tls.h"
7 #include "funcdata.h"
8 #include "textflag.h"
9
10 // _rt0_amd64 is common startup code for most amd64 systems when using
11 // internal linking. This is the entry point for the program from the
12 // kernel for an ordinary -buildmode=exe program. The stack holds the
13 // number of arguments and the C-style argv.
14 TEXT _rt0_amd64(SB),NOSPLIT,$-8
15 MOVQ 0(SP), DI // argc
16 LEAQ 8(SP), SI // argv
17 JMP runtime·rt0_go(SB)
18
19 // main is common startup code for most amd64 systems when using
20 // external linking. The C startup code will call the symbol "main"
21 // passing argc and argv in the usual C ABI registers DI and SI.
22 TEXT main(SB),NOSPLIT,$-8
23 JMP runtime·rt0_go(SB)
24
25 // _rt0_amd64_lib is common startup code for most amd64 systems when
26 // using -buildmode=c-archive or -buildmode=c-shared. The linker will
27 // arrange to invoke this function as a global constructor (for
28 // c-archive) or when the shared library is loaded (for c-shared).
29 // We expect argc and argv to be passed in the usual C ABI registers
30 // DI and SI.
31 TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50
32 // Align stack per ELF ABI requirements.
33 MOVQ SP, AX
34 ANDQ $~15, SP
35 // Save C ABI callee-saved registers, as caller may need them.
36 MOVQ BX, 0x10(SP)
37 MOVQ BP, 0x18(SP)
38 MOVQ R12, 0x20(SP)
39 MOVQ R13, 0x28(SP)
40 MOVQ R14, 0x30(SP)
41 MOVQ R15, 0x38(SP)
42 MOVQ AX, 0x40(SP)
43
44 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
45 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
46
47 // Synchronous initialization.
48 CALL runtime·libpreinit(SB)
49
50 // Create a new thread to finish Go runtime initialization.
51 MOVQ _cgo_sys_thread_create(SB), AX
52 TESTQ AX, AX
53 JZ nocgo
54 MOVQ $_rt0_amd64_lib_go(SB), DI
55 MOVQ $0, SI
56 CALL AX
57 JMP restore
58
59 nocgo:
60 MOVQ $0x800000, 0(SP) // stacksize
61 MOVQ $_rt0_amd64_lib_go(SB), AX
62 MOVQ AX, 8(SP) // fn
63 CALL runtime·newosproc0(SB)
64
65 restore:
66 MOVQ 0x10(SP), BX
67 MOVQ 0x18(SP), BP
68 MOVQ 0x20(SP), R12
69 MOVQ 0x28(SP), R13
70 MOVQ 0x30(SP), R14
71 MOVQ 0x38(SP), R15
72 MOVQ 0x40(SP), SP
73 RET
74
75 // _rt0_amd64_lib_go initializes the Go runtime.
76 // This is started in a separate thread by _rt0_amd64_lib.
77 TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
78 MOVQ _rt0_amd64_lib_argc<>(SB), DI
79 MOVQ _rt0_amd64_lib_argv<>(SB), SI
80 JMP runtime·rt0_go(SB)
81
82 DATA _rt0_amd64_lib_argc<>(SB)/8, $0
83 GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
84 DATA _rt0_amd64_lib_argv<>(SB)/8, $0
85 GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
86
87 TEXT runtime·rt0_go(SB),NOSPLIT,$0
88 // copy arguments forward on an even stack
89 MOVQ DI, AX // argc
90 MOVQ SI, BX // argv
91 SUBQ $(4*8+7), SP // 2args 2auto
92 ANDQ $~15, SP
93 MOVQ AX, 16(SP)
94 MOVQ BX, 24(SP)
95
96 // create istack out of the given (operating system) stack.
97 // _cgo_init may update stackguard.
98 MOVQ $runtime·g0(SB), DI
99 LEAQ (-64*1024+104)(SP), BX
100 MOVQ BX, g_stackguard0(DI)
101 MOVQ BX, g_stackguard1(DI)
102 MOVQ BX, (g_stack+stack_lo)(DI)
103 MOVQ SP, (g_stack+stack_hi)(DI)
104
105 // find out information about the processor we're on
106 MOVL $0, AX
107 CPUID
108 MOVL AX, SI
109 CMPL AX, $0
110 JE nocpuinfo
111
112 // Figure out how to serialize RDTSC.
113 // On Intel processors LFENCE is enough. AMD requires MFENCE.
114 // Don't know about the rest, so let's do MFENCE.
115 CMPL BX, $0x756E6547 // "Genu"
116 JNE notintel
117 CMPL DX, $0x49656E69 // "ineI"
118 JNE notintel
119 CMPL CX, $0x6C65746E // "ntel"
120 JNE notintel
121 MOVB $1, runtime·isIntel(SB)
122 MOVB $1, runtime·lfenceBeforeRdtsc(SB)
123 notintel:
124
125 // Load EAX=1 cpuid flags
126 MOVL $1, AX
127 CPUID
128 MOVL AX, runtime·processorVersionInfo(SB)
129
130 nocpuinfo:
131 // if there is an _cgo_init, call it.
132 MOVQ _cgo_init(SB), AX
133 TESTQ AX, AX
134 JZ needtls
135 // g0 already in DI
136 MOVQ DI, CX // Win64 uses CX for first parameter
137 MOVQ $setg_gcc<>(SB), SI
138 CALL AX
139
140 // update stackguard after _cgo_init
141 MOVQ $runtime·g0(SB), CX
142 MOVQ (g_stack+stack_lo)(CX), AX
143 ADDQ $const__StackGuard, AX
144 MOVQ AX, g_stackguard0(CX)
145 MOVQ AX, g_stackguard1(CX)
146
147 #ifndef GOOS_windows
148 JMP ok
149 #endif
150 needtls:
151 #ifdef GOOS_plan9
152 // skip TLS setup on Plan 9
153 JMP ok
154 #endif
155 #ifdef GOOS_solaris
156 // skip TLS setup on Solaris
157 JMP ok
158 #endif
159 #ifdef GOOS_darwin
160 // skip TLS setup on Darwin
161 JMP ok
162 #endif
163
164 LEAQ runtime·m0+m_tls(SB), DI
165 CALL runtime·settls(SB)
166
167 // store through it, to make sure it works
168 get_tls(BX)
169 MOVQ $0x123, g(BX)
170 MOVQ runtime·m0+m_tls(SB), AX
171 CMPQ AX, $0x123
172 JEQ 2(PC)
173 CALL runtime·abort(SB)
174 ok:
175 // set the per-goroutine and per-mach "registers"
176 get_tls(BX)
177 LEAQ runtime·g0(SB), CX
178 MOVQ CX, g(BX)
179 LEAQ runtime·m0(SB), AX
180
181 // save m->g0 = g0
182 MOVQ CX, m_g0(AX)
183 // save m0 to g0->m
184 MOVQ AX, g_m(CX)
185
186 CLD // convention is D is always left cleared
187 CALL runtime·check(SB)
188
189 MOVL 16(SP), AX // copy argc
190 MOVL AX, 0(SP)
191 MOVQ 24(SP), AX // copy argv
192 MOVQ AX, 8(SP)
193 CALL runtime·args(SB)
194 CALL runtime·osinit(SB)
195 CALL runtime·schedinit(SB)
196
197 // create a new goroutine to start program
198 MOVQ $runtime·mainPC(SB), AX // entry
199 PUSHQ AX
200 PUSHQ $0 // arg size
201 CALL runtime·newproc(SB)
202 POPQ AX
203 POPQ AX
204
205 // start this M
206 CALL runtime·mstart(SB)
207
208 CALL runtime·abort(SB) // mstart should never return
209 RET
210
211 // Prevent dead-code elimination of debugCallV1, which is
212 // intended to be called by debuggers.
213 MOVQ $runtime·debugCallV1(SB), AX
214 RET
215
216 DATA runtime·mainPC+0(SB)/8,$runtime·main(SB)
217 GLOBL runtime·mainPC(SB),RODATA,$8
218
219 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
220 BYTE $0xcc
221 RET
222
223 TEXT runtime·asminit(SB),NOSPLIT,$0-0
224 // No per-thread init.
225 RET
226
227 /*
228 * go-routine
229 */
230
231 // void gosave(Gobuf*)
232 // save state in Gobuf; setjmp
233 TEXT runtime·gosave(SB), NOSPLIT, $0-8
234 MOVQ buf+0(FP), AX // gobuf
235 LEAQ buf+0(FP), BX // caller's SP
236 MOVQ BX, gobuf_sp(AX)
237 MOVQ 0(SP), BX // caller's PC
238 MOVQ BX, gobuf_pc(AX)
239 MOVQ $0, gobuf_ret(AX)
240 MOVQ BP, gobuf_bp(AX)
241 // Assert ctxt is zero. See func save.
242 MOVQ gobuf_ctxt(AX), BX
243 TESTQ BX, BX
244 JZ 2(PC)
245 CALL runtime·badctxt(SB)
246 get_tls(CX)
247 MOVQ g(CX), BX
248 MOVQ BX, gobuf_g(AX)
249 RET
250
251 // void gogo(Gobuf*)
252 // restore state from Gobuf; longjmp
253 TEXT runtime·gogo(SB), NOSPLIT, $16-8
254 MOVQ buf+0(FP), BX // gobuf
255 MOVQ gobuf_g(BX), DX
256 MOVQ 0(DX), CX // make sure g != nil
257 get_tls(CX)
258 MOVQ DX, g(CX)
259 MOVQ gobuf_sp(BX), SP // restore SP
260 MOVQ gobuf_ret(BX), AX
261 MOVQ gobuf_ctxt(BX), DX
262 MOVQ gobuf_bp(BX), BP
263 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
264 MOVQ $0, gobuf_ret(BX)
265 MOVQ $0, gobuf_ctxt(BX)
266 MOVQ $0, gobuf_bp(BX)
267 MOVQ gobuf_pc(BX), BX
268 JMP BX
269
270 // func mcall(fn func(*g))
271 // Switch to m->g0's stack, call fn(g).
272 // Fn must never return. It should gogo(&g->sched)
273 // to keep running g.
274 TEXT runtime·mcall(SB), NOSPLIT, $0-8
275 MOVQ fn+0(FP), DI
276
277 get_tls(CX)
278 MOVQ g(CX), AX // save state in g->sched
279 MOVQ 0(SP), BX // caller's PC
280 MOVQ BX, (g_sched+gobuf_pc)(AX)
281 LEAQ fn+0(FP), BX // caller's SP
282 MOVQ BX, (g_sched+gobuf_sp)(AX)
283 MOVQ AX, (g_sched+gobuf_g)(AX)
284 MOVQ BP, (g_sched+gobuf_bp)(AX)
285
286 // switch to m->g0 & its stack, call fn
287 MOVQ g(CX), BX
288 MOVQ g_m(BX), BX
289 MOVQ m_g0(BX), SI
290 CMPQ SI, AX // if g == m->g0 call badmcall
291 JNE 3(PC)
292 MOVQ $runtime·badmcall(SB), AX
293 JMP AX
294 MOVQ SI, g(CX) // g = m->g0
295 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
296 PUSHQ AX
297 MOVQ DI, DX
298 MOVQ 0(DI), DI
299 CALL DI
300 POPQ AX
301 MOVQ $runtime·badmcall2(SB), AX
302 JMP AX
303 RET
304
305 // systemstack_switch is a dummy routine that systemstack leaves at the bottom
306 // of the G stack. We need to distinguish the routine that
307 // lives at the bottom of the G stack from the one that lives
308 // at the top of the system stack because the one at the top of
309 // the system stack terminates the stack walk (see topofstack()).
310 TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
311 RET
312
313 // func systemstack(fn func())
314 TEXT runtime·systemstack(SB), NOSPLIT, $0-8
315 MOVQ fn+0(FP), DI // DI = fn
316 get_tls(CX)
317 MOVQ g(CX), AX // AX = g
318 MOVQ g_m(AX), BX // BX = m
319
320 CMPQ AX, m_gsignal(BX)
321 JEQ noswitch
322
323 MOVQ m_g0(BX), DX // DX = g0
324 CMPQ AX, DX
325 JEQ noswitch
326
327 CMPQ AX, m_curg(BX)
328 JNE bad
329
330 // switch stacks
331 // save our state in g->sched. Pretend to
332 // be systemstack_switch if the G stack is scanned.
333 MOVQ $runtime·systemstack_switch(SB), SI
334 MOVQ SI, (g_sched+gobuf_pc)(AX)
335 MOVQ SP, (g_sched+gobuf_sp)(AX)
336 MOVQ AX, (g_sched+gobuf_g)(AX)
337 MOVQ BP, (g_sched+gobuf_bp)(AX)
338
339 // switch to g0
340 MOVQ DX, g(CX)
341 MOVQ (g_sched+gobuf_sp)(DX), BX
342 // make it look like mstart called systemstack on g0, to stop traceback
343 SUBQ $8, BX
344 MOVQ $runtime·mstart(SB), DX
345 MOVQ DX, 0(BX)
346 MOVQ BX, SP
347
348 // call target function
349 MOVQ DI, DX
350 MOVQ 0(DI), DI
351 CALL DI
352
353 // switch back to g
354 get_tls(CX)
355 MOVQ g(CX), AX
356 MOVQ g_m(AX), BX
357 MOVQ m_curg(BX), AX
358 MOVQ AX, g(CX)
359 MOVQ (g_sched+gobuf_sp)(AX), SP
360 MOVQ $0, (g_sched+gobuf_sp)(AX)
361 RET
362
363 noswitch:
364 // already on m stack; tail call the function
365 // Using a tail call here cleans up tracebacks since we won't stop
366 // at an intermediate systemstack.
367 MOVQ DI, DX
368 MOVQ 0(DI), DI
369 JMP DI
370
371 bad:
372 // Bad: g is not gsignal, not g0, not curg. What is it?
373 MOVQ $runtime·badsystemstack(SB), AX
374 CALL AX
375 INT $3
376
377
378 /*
379 * support for morestack
380 */
381
382 // Called during function prolog when more stack is needed.
383 //
384 // The traceback routines see morestack on a g0 as being
385 // the top of a stack (for example, morestack calling newstack
386 // calling the scheduler calling newm calling gc), so we must
387 // record an argument size. For that purpose, it has no arguments.
388 TEXT runtime·morestack(SB),NOSPLIT,$0-0
389 // Cannot grow scheduler stack (m->g0).
390 get_tls(CX)
391 MOVQ g(CX), BX
392 MOVQ g_m(BX), BX
393 MOVQ m_g0(BX), SI
394 CMPQ g(CX), SI
395 JNE 3(PC)
396 CALL runtime·badmorestackg0(SB)
397 CALL runtime·abort(SB)
398
399 // Cannot grow signal stack (m->gsignal).
400 MOVQ m_gsignal(BX), SI
401 CMPQ g(CX), SI
402 JNE 3(PC)
403 CALL runtime·badmorestackgsignal(SB)
404 CALL runtime·abort(SB)
405
406 // Called from f.
407 // Set m->morebuf to f's caller.
408 MOVQ 8(SP), AX // f's caller's PC
409 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
410 LEAQ 16(SP), AX // f's caller's SP
411 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
412 get_tls(CX)
413 MOVQ g(CX), SI
414 MOVQ SI, (m_morebuf+gobuf_g)(BX)
415
416 // Set g->sched to context in f.
417 MOVQ 0(SP), AX // f's PC
418 MOVQ AX, (g_sched+gobuf_pc)(SI)
419 MOVQ SI, (g_sched+gobuf_g)(SI)
420 LEAQ 8(SP), AX // f's SP
421 MOVQ AX, (g_sched+gobuf_sp)(SI)
422 MOVQ BP, (g_sched+gobuf_bp)(SI)
423 MOVQ DX, (g_sched+gobuf_ctxt)(SI)
424
425 // Call newstack on m->g0's stack.
426 MOVQ m_g0(BX), BX
427 MOVQ BX, g(CX)
428 MOVQ (g_sched+gobuf_sp)(BX), SP
429 CALL runtime·newstack(SB)
430 CALL runtime·abort(SB) // crash if newstack returns
431 RET
432
433 // morestack but not preserving ctxt.
434 TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
435 MOVL $0, DX
436 JMP runtime·morestack(SB)
437
438 // reflectcall: call a function with the given argument list
439 // func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32).
440 // we don't have variable-sized frames, so we use a small number
441 // of constant-sized-frame functions to encode a few bits of size in the pc.
442 // Caution: ugly multiline assembly macros in your future!
443
444 #define DISPATCH(NAME,MAXSIZE) \
445 CMPQ CX, $MAXSIZE; \
446 JA 3(PC); \
447 MOVQ $NAME(SB), AX; \
448 JMP AX
449 // Note: can't just "JMP NAME(SB)" - bad inlining results.
450
451 TEXT reflect·call(SB), NOSPLIT, $0-0
452 JMP ·reflectcall(SB)
453
454 TEXT ·reflectcall(SB), NOSPLIT, $0-32
455 MOVLQZX argsize+24(FP), CX
456 DISPATCH(runtime·call32, 32)
457 DISPATCH(runtime·call64, 64)
458 DISPATCH(runtime·call128, 128)
459 DISPATCH(runtime·call256, 256)
460 DISPATCH(runtime·call512, 512)
461 DISPATCH(runtime·call1024, 1024)
462 DISPATCH(runtime·call2048, 2048)
463 DISPATCH(runtime·call4096, 4096)
464 DISPATCH(runtime·call8192, 8192)
465 DISPATCH(runtime·call16384, 16384)
466 DISPATCH(runtime·call32768, 32768)
467 DISPATCH(runtime·call65536, 65536)
468 DISPATCH(runtime·call131072, 131072)
469 DISPATCH(runtime·call262144, 262144)
470 DISPATCH(runtime·call524288, 524288)
471 DISPATCH(runtime·call1048576, 1048576)
472 DISPATCH(runtime·call2097152, 2097152)
473 DISPATCH(runtime·call4194304, 4194304)
474 DISPATCH(runtime·call8388608, 8388608)
475 DISPATCH(runtime·call16777216, 16777216)
476 DISPATCH(runtime·call33554432, 33554432)
477 DISPATCH(runtime·call67108864, 67108864)
478 DISPATCH(runtime·call134217728, 134217728)
479 DISPATCH(runtime·call268435456, 268435456)
480 DISPATCH(runtime·call536870912, 536870912)
481 DISPATCH(runtime·call1073741824, 1073741824)
482 MOVQ $runtime·badreflectcall(SB), AX
483 JMP AX
484
485 #define CALLFN(NAME,MAXSIZE) \
486 TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \
487 NO_LOCAL_POINTERS; \
488 /* copy arguments to stack */ \
489 MOVQ argptr+16(FP), SI; \
490 MOVLQZX argsize+24(FP), CX; \
491 MOVQ SP, DI; \
492 REP;MOVSB; \
493 /* call function */ \
494 MOVQ f+8(FP), DX; \
495 PCDATA $PCDATA_StackMapIndex, $0; \
496 CALL (DX); \
497 /* copy return values back */ \
498 MOVQ argtype+0(FP), DX; \
499 MOVQ argptr+16(FP), DI; \
500 MOVLQZX argsize+24(FP), CX; \
501 MOVLQZX retoffset+28(FP), BX; \
502 MOVQ SP, SI; \
503 ADDQ BX, DI; \
504 ADDQ BX, SI; \
505 SUBQ BX, CX; \
506 CALL callRet<>(SB); \
507 RET
508
509 // callRet copies return values back at the end of call*. This is a
510 // separate function so it can allocate stack space for the arguments
511 // to reflectcallmove. It does not follow the Go ABI; it expects its
512 // arguments in registers.
513 TEXT callRet<>(SB), NOSPLIT, $32-0
514 NO_LOCAL_POINTERS
515 MOVQ DX, 0(SP)
516 MOVQ DI, 8(SP)
517 MOVQ SI, 16(SP)
518 MOVQ CX, 24(SP)
519 CALL runtime·reflectcallmove(SB)
520 RET
521
522 CALLFN(·call32, 32)
523 CALLFN(·call64, 64)
524 CALLFN(·call128, 128)
525 CALLFN(·call256, 256)
526 CALLFN(·call512, 512)
527 CALLFN(·call1024, 1024)
528 CALLFN(·call2048, 2048)
529 CALLFN(·call4096, 4096)
530 CALLFN(·call8192, 8192)
531 CALLFN(·call16384, 16384)
532 CALLFN(·call32768, 32768)
533 CALLFN(·call65536, 65536)
534 CALLFN(·call131072, 131072)
535 CALLFN(·call262144, 262144)
536 CALLFN(·call524288, 524288)
537 CALLFN(·call1048576, 1048576)
538 CALLFN(·call2097152, 2097152)
539 CALLFN(·call4194304, 4194304)
540 CALLFN(·call8388608, 8388608)
541 CALLFN(·call16777216, 16777216)
542 CALLFN(·call33554432, 33554432)
543 CALLFN(·call67108864, 67108864)
544 CALLFN(·call134217728, 134217728)
545 CALLFN(·call268435456, 268435456)
546 CALLFN(·call536870912, 536870912)
547 CALLFN(·call1073741824, 1073741824)
548
549 TEXT runtime·procyield(SB),NOSPLIT,$0-0
550 MOVL cycles+0(FP), AX
551 again:
552 PAUSE
553 SUBL $1, AX
554 JNZ again
555 RET
556
557
558 TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
559 // Stores are already ordered on x86, so this is just a
560 // compile barrier.
561 RET
562
563 // void jmpdefer(fn, sp);
564 // called from deferreturn.
565 // 1. pop the caller
566 // 2. sub 5 bytes from the callers return
567 // 3. jmp to the argument
568 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
569 MOVQ fv+0(FP), DX // fn
570 MOVQ argp+8(FP), BX // caller sp
571 LEAQ -8(BX), SP // caller sp after CALL
572 MOVQ -8(SP), BP // restore BP as if deferreturn returned (harmless if framepointers not in use)
573 SUBQ $5, (SP) // return to CALL again
574 MOVQ 0(DX), BX
575 JMP BX // but first run the deferred function
576
577 // Save state of caller into g->sched. Smashes R8, R9.
578 TEXT gosave<>(SB),NOSPLIT,$0
579 get_tls(R8)
580 MOVQ g(R8), R8
581 MOVQ 0(SP), R9
582 MOVQ R9, (g_sched+gobuf_pc)(R8)
583 LEAQ 8(SP), R9
584 MOVQ R9, (g_sched+gobuf_sp)(R8)
585 MOVQ $0, (g_sched+gobuf_ret)(R8)
586 MOVQ BP, (g_sched+gobuf_bp)(R8)
587 // Assert ctxt is zero. See func save.
588 MOVQ (g_sched+gobuf_ctxt)(R8), R9
589 TESTQ R9, R9
590 JZ 2(PC)
591 CALL runtime·badctxt(SB)
592 RET
593
594 // func asmcgocall(fn, arg unsafe.Pointer) int32
595 // Call fn(arg) on the scheduler stack,
596 // aligned appropriately for the gcc ABI.
597 // See cgocall.go for more details.
598 TEXT ·asmcgocall(SB),NOSPLIT,$0-20
599 MOVQ fn+0(FP), AX
600 MOVQ arg+8(FP), BX
601
602 MOVQ SP, DX
603
604 // Figure out if we need to switch to m->g0 stack.
605 // We get called to create new OS threads too, and those
606 // come in on the m->g0 stack already.
607 get_tls(CX)
608 MOVQ g(CX), R8
609 CMPQ R8, $0
610 JEQ nosave
611 MOVQ g_m(R8), R8
612 MOVQ m_g0(R8), SI
613 MOVQ g(CX), DI
614 CMPQ SI, DI
615 JEQ nosave
616 MOVQ m_gsignal(R8), SI
617 CMPQ SI, DI
618 JEQ nosave
619
620 // Switch to system stack.
621 MOVQ m_g0(R8), SI
622 CALL gosave<>(SB)
623 MOVQ SI, g(CX)
624 MOVQ (g_sched+gobuf_sp)(SI), SP
625
626 // Now on a scheduling stack (a pthread-created stack).
627 // Make sure we have enough room for 4 stack-backed fast-call
628 // registers as per windows amd64 calling convention.
629 SUBQ $64, SP
630 ANDQ $~15, SP // alignment for gcc ABI
631 MOVQ DI, 48(SP) // save g
632 MOVQ (g_stack+stack_hi)(DI), DI
633 SUBQ DX, DI
634 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
635 MOVQ BX, DI // DI = first argument in AMD64 ABI
636 MOVQ BX, CX // CX = first argument in Win64
637 CALL AX
638
639 // Restore registers, g, stack pointer.
640 get_tls(CX)
641 MOVQ 48(SP), DI
642 MOVQ (g_stack+stack_hi)(DI), SI
643 SUBQ 40(SP), SI
644 MOVQ DI, g(CX)
645 MOVQ SI, SP
646
647 MOVL AX, ret+16(FP)
648 RET
649
650 nosave:
651 // Running on a system stack, perhaps even without a g.
652 // Having no g can happen during thread creation or thread teardown
653 // (see needm/dropm on Solaris, for example).
654 // This code is like the above sequence but without saving/restoring g
655 // and without worrying about the stack moving out from under us
656 // (because we're on a system stack, not a goroutine stack).
657 // The above code could be used directly if already on a system stack,
658 // but then the only path through this code would be a rare case on Solaris.
659 // Using this code for all "already on system stack" calls exercises it more,
660 // which should help keep it correct.
661 SUBQ $64, SP
662 ANDQ $~15, SP
663 MOVQ $0, 48(SP) // where above code stores g, in case someone looks during debugging
664 MOVQ DX, 40(SP) // save original stack pointer
665 MOVQ BX, DI // DI = first argument in AMD64 ABI
666 MOVQ BX, CX // CX = first argument in Win64
667 CALL AX
668 MOVQ 40(SP), SI // restore original stack pointer
669 MOVQ SI, SP
670 MOVL AX, ret+16(FP)
671 RET
672
673 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize, uintptr ctxt)
674 // Turn the fn into a Go func (by taking its address) and call
675 // cgocallback_gofunc.
676 TEXT runtime·cgocallback(SB),NOSPLIT,$32-32
677 LEAQ fn+0(FP), AX
678 MOVQ AX, 0(SP)
679 MOVQ frame+8(FP), AX
680 MOVQ AX, 8(SP)
681 MOVQ framesize+16(FP), AX
682 MOVQ AX, 16(SP)
683 MOVQ ctxt+24(FP), AX
684 MOVQ AX, 24(SP)
685 MOVQ $runtime·cgocallback_gofunc(SB), AX
686 CALL AX
687 RET
688
689 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize, uintptr ctxt)
690 // See cgocall.go for more details.
691 TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32
692 NO_LOCAL_POINTERS
693
694 // If g is nil, Go did not create the current thread.
695 // Call needm to obtain one m for temporary use.
696 // In this case, we're running on the thread stack, so there's
697 // lots of space, but the linker doesn't know. Hide the call from
698 // the linker analysis by using an indirect call through AX.
699 get_tls(CX)
700 #ifdef GOOS_windows
701 MOVL $0, BX
702 CMPQ CX, $0
703 JEQ 2(PC)
704 #endif
705 MOVQ g(CX), BX
706 CMPQ BX, $0
707 JEQ needm
708 MOVQ g_m(BX), BX
709 MOVQ BX, R8 // holds oldm until end of function
710 JMP havem
711 needm:
712 MOVQ $0, 0(SP)
713 MOVQ $runtime·needm(SB), AX
714 CALL AX
715 MOVQ 0(SP), R8
716 get_tls(CX)
717 MOVQ g(CX), BX
718 MOVQ g_m(BX), BX
719
720 // Set m->sched.sp = SP, so that if a panic happens
721 // during the function we are about to execute, it will
722 // have a valid SP to run on the g0 stack.
723 // The next few lines (after the havem label)
724 // will save this SP onto the stack and then write
725 // the same SP back to m->sched.sp. That seems redundant,
726 // but if an unrecovered panic happens, unwindm will
727 // restore the g->sched.sp from the stack location
728 // and then systemstack will try to use it. If we don't set it here,
729 // that restored SP will be uninitialized (typically 0) and
730 // will not be usable.
731 MOVQ m_g0(BX), SI
732 MOVQ SP, (g_sched+gobuf_sp)(SI)
733
734 havem:
735 // Now there's a valid m, and we're running on its m->g0.
736 // Save current m->g0->sched.sp on stack and then set it to SP.
737 // Save current sp in m->g0->sched.sp in preparation for
738 // switch back to m->curg stack.
739 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
740 MOVQ m_g0(BX), SI
741 MOVQ (g_sched+gobuf_sp)(SI), AX
742 MOVQ AX, 0(SP)
743 MOVQ SP, (g_sched+gobuf_sp)(SI)
744
745 // Switch to m->curg stack and call runtime.cgocallbackg.
746 // Because we are taking over the execution of m->curg
747 // but *not* resuming what had been running, we need to
748 // save that information (m->curg->sched) so we can restore it.
749 // We can restore m->curg->sched.sp easily, because calling
750 // runtime.cgocallbackg leaves SP unchanged upon return.
751 // To save m->curg->sched.pc, we push it onto the stack.
752 // This has the added benefit that it looks to the traceback
753 // routine like cgocallbackg is going to return to that
754 // PC (because the frame we allocate below has the same
755 // size as cgocallback_gofunc's frame declared above)
756 // so that the traceback will seamlessly trace back into
757 // the earlier calls.
758 //
759 // In the new goroutine, 8(SP) holds the saved R8.
760 MOVQ m_curg(BX), SI
761 MOVQ SI, g(CX)
762 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
763 MOVQ (g_sched+gobuf_pc)(SI), BX
764 MOVQ BX, -8(DI)
765 // Compute the size of the frame, including return PC and, if
766 // GOEXPERIMENT=framepointer, the saved base pointer
767 MOVQ ctxt+24(FP), BX
768 LEAQ fv+0(FP), AX
769 SUBQ SP, AX
770 SUBQ AX, DI
771 MOVQ DI, SP
772
773 MOVQ R8, 8(SP)
774 MOVQ BX, 0(SP)
775 CALL runtime·cgocallbackg(SB)
776 MOVQ 8(SP), R8
777
778 // Compute the size of the frame again. FP and SP have
779 // completely different values here than they did above,
780 // but only their difference matters.
781 LEAQ fv+0(FP), AX
782 SUBQ SP, AX
783
784 // Restore g->sched (== m->curg->sched) from saved values.
785 get_tls(CX)
786 MOVQ g(CX), SI
787 MOVQ SP, DI
788 ADDQ AX, DI
789 MOVQ -8(DI), BX
790 MOVQ BX, (g_sched+gobuf_pc)(SI)
791 MOVQ DI, (g_sched+gobuf_sp)(SI)
792
793 // Switch back to m->g0's stack and restore m->g0->sched.sp.
794 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
795 // so we do not have to restore it.)
796 MOVQ g(CX), BX
797 MOVQ g_m(BX), BX
798 MOVQ m_g0(BX), SI
799 MOVQ SI, g(CX)
800 MOVQ (g_sched+gobuf_sp)(SI), SP
801 MOVQ 0(SP), AX
802 MOVQ AX, (g_sched+gobuf_sp)(SI)
803
804 // If the m on entry was nil, we called needm above to borrow an m
805 // for the duration of the call. Since the call is over, return it with dropm.
806 CMPQ R8, $0
807 JNE 3(PC)
808 MOVQ $runtime·dropm(SB), AX
809 CALL AX
810
811 // Done!
812 RET
813
814 // void setg(G*); set g. for use by needm.
815 TEXT runtime·setg(SB), NOSPLIT, $0-8
816 MOVQ gg+0(FP), BX
817 #ifdef GOOS_windows
818 CMPQ BX, $0
819 JNE settls
820 MOVQ $0, 0x28(GS)
821 RET
822 settls:
823 MOVQ g_m(BX), AX
824 LEAQ m_tls(AX), AX
825 MOVQ AX, 0x28(GS)
826 #endif
827 get_tls(CX)
828 MOVQ BX, g(CX)
829 RET
830
831 // void setg_gcc(G*); set g called from gcc.
832 TEXT setg_gcc<>(SB),NOSPLIT,$0
833 get_tls(AX)
834 MOVQ DI, g(AX)
835 RET
836
837 TEXT runtime·abort(SB),NOSPLIT,$0-0
838 INT $3
839 loop:
840 JMP loop
841
842 // check that SP is in range [g->stack.lo, g->stack.hi)
843 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
844 get_tls(CX)
845 MOVQ g(CX), AX
846 CMPQ (g_stack+stack_hi)(AX), SP
847 JHI 2(PC)
848 CALL runtime·abort(SB)
849 CMPQ SP, (g_stack+stack_lo)(AX)
850 JHI 2(PC)
851 CALL runtime·abort(SB)
852 RET
853
854 // func cputicks() int64
855 TEXT runtime·cputicks(SB),NOSPLIT,$0-0
856 CMPB runtime·lfenceBeforeRdtsc(SB), $1
857 JNE mfence
858 LFENCE
859 JMP done
860 mfence:
861 MFENCE
862 done:
863 RDTSC
864 SHLQ $32, DX
865 ADDQ DX, AX
866 MOVQ AX, ret+0(FP)
867 RET
868
869 // hash function using AES hardware instructions
870 TEXT runtime·aeshash(SB),NOSPLIT,$0-32
871 MOVQ p+0(FP), AX // ptr to data
872 MOVQ s+16(FP), CX // size
873 LEAQ ret+24(FP), DX
874 JMP runtime·aeshashbody(SB)
875
876 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
877 MOVQ p+0(FP), AX // ptr to string struct
878 MOVQ 8(AX), CX // length of string
879 MOVQ (AX), AX // string data
880 LEAQ ret+16(FP), DX
881 JMP runtime·aeshashbody(SB)
882
883 // AX: data
884 // CX: length
885 // DX: address to put return value
886 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
887 // Fill an SSE register with our seeds.
888 MOVQ h+8(FP), X0 // 64 bits of per-table hash seed
889 PINSRW $4, CX, X0 // 16 bits of length
890 PSHUFHW $0, X0, X0 // repeat length 4 times total
891 MOVO X0, X1 // save unscrambled seed
892 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
893 AESENC X0, X0 // scramble seed
894
895 CMPQ CX, $16
896 JB aes0to15
897 JE aes16
898 CMPQ CX, $32
899 JBE aes17to32
900 CMPQ CX, $64
901 JBE aes33to64
902 CMPQ CX, $128
903 JBE aes65to128
904 JMP aes129plus
905
906 aes0to15:
907 TESTQ CX, CX
908 JE aes0
909
910 ADDQ $16, AX
911 TESTW $0xff0, AX
912 JE endofpage
913
914 // 16 bytes loaded at this address won't cross
915 // a page boundary, so we can load it directly.
916 MOVOU -16(AX), X1
917 ADDQ CX, CX
918 MOVQ $masks<>(SB), AX
919 PAND (AX)(CX*8), X1
920 final1:
921 PXOR X0, X1 // xor data with seed
922 AESENC X1, X1 // scramble combo 3 times
923 AESENC X1, X1
924 AESENC X1, X1
925 MOVQ X1, (DX)
926 RET
927
928 endofpage:
929 // address ends in 1111xxxx. Might be up against
930 // a page boundary, so load ending at last byte.
931 // Then shift bytes down using pshufb.
932 MOVOU -32(AX)(CX*1), X1
933 ADDQ CX, CX
934 MOVQ $shifts<>(SB), AX
935 PSHUFB (AX)(CX*8), X1
936 JMP final1
937
938 aes0:
939 // Return scrambled input seed
940 AESENC X0, X0
941 MOVQ X0, (DX)
942 RET
943
944 aes16:
945 MOVOU (AX), X1
946 JMP final1
947
948 aes17to32:
949 // make second starting seed
950 PXOR runtime·aeskeysched+16(SB), X1
951 AESENC X1, X1
952
953 // load data to be hashed
954 MOVOU (AX), X2
955 MOVOU -16(AX)(CX*1), X3
956
957 // xor with seed
958 PXOR X0, X2
959 PXOR X1, X3
960
961 // scramble 3 times
962 AESENC X2, X2
963 AESENC X3, X3
964 AESENC X2, X2
965 AESENC X3, X3
966 AESENC X2, X2
967 AESENC X3, X3
968
969 // combine results
970 PXOR X3, X2
971 MOVQ X2, (DX)
972 RET
973
974 aes33to64:
975 // make 3 more starting seeds
976 MOVO X1, X2
977 MOVO X1, X3
978 PXOR runtime·aeskeysched+16(SB), X1
979 PXOR runtime·aeskeysched+32(SB), X2
980 PXOR runtime·aeskeysched+48(SB), X3
981 AESENC X1, X1
982 AESENC X2, X2
983 AESENC X3, X3
984
985 MOVOU (AX), X4
986 MOVOU 16(AX), X5
987 MOVOU -32(AX)(CX*1), X6
988 MOVOU -16(AX)(CX*1), X7
989
990 PXOR X0, X4
991 PXOR X1, X5
992 PXOR X2, X6
993 PXOR X3, X7
994
995 AESENC X4, X4
996 AESENC X5, X5
997 AESENC X6, X6
998 AESENC X7, X7
999
1000 AESENC X4, X4
1001 AESENC X5, X5
1002 AESENC X6, X6
1003 AESENC X7, X7
1004
1005 AESENC X4, X4
1006 AESENC X5, X5
1007 AESENC X6, X6
1008 AESENC X7, X7
1009
1010 PXOR X6, X4
1011 PXOR X7, X5
1012 PXOR X5, X4
1013 MOVQ X4, (DX)
1014 RET
1015
1016 aes65to128:
1017 // make 7 more starting seeds
1018 MOVO X1, X2
1019 MOVO X1, X3
1020 MOVO X1, X4
1021 MOVO X1, X5
1022 MOVO X1, X6
1023 MOVO X1, X7
1024 PXOR runtime·aeskeysched+16(SB), X1
1025 PXOR runtime·aeskeysched+32(SB), X2
1026 PXOR runtime·aeskeysched+48(SB), X3
1027 PXOR runtime·aeskeysched+64(SB), X4
1028 PXOR runtime·aeskeysched+80(SB), X5
1029 PXOR runtime·aeskeysched+96(SB), X6
1030 PXOR runtime·aeskeysched+112(SB), X7
1031 AESENC X1, X1
1032 AESENC X2, X2
1033 AESENC X3, X3
1034 AESENC X4, X4
1035 AESENC X5, X5
1036 AESENC X6, X6
1037 AESENC X7, X7
1038
1039 // load data
1040 MOVOU (AX), X8
1041 MOVOU 16(AX), X9
1042 MOVOU 32(AX), X10
1043 MOVOU 48(AX), X11
1044 MOVOU -64(AX)(CX*1), X12
1045 MOVOU -48(AX)(CX*1), X13
1046 MOVOU -32(AX)(CX*1), X14
1047 MOVOU -16(AX)(CX*1), X15
1048
1049 // xor with seed
1050 PXOR X0, X8
1051 PXOR X1, X9
1052 PXOR X2, X10
1053 PXOR X3, X11
1054 PXOR X4, X12
1055 PXOR X5, X13
1056 PXOR X6, X14
1057 PXOR X7, X15
1058
1059 // scramble 3 times
1060 AESENC X8, X8
1061 AESENC X9, X9
1062 AESENC X10, X10
1063 AESENC X11, X11
1064 AESENC X12, X12
1065 AESENC X13, X13
1066 AESENC X14, X14
1067 AESENC X15, X15
1068
1069 AESENC X8, X8
1070 AESENC X9, X9
1071 AESENC X10, X10
1072 AESENC X11, X11
1073 AESENC X12, X12
1074 AESENC X13, X13
1075 AESENC X14, X14
1076 AESENC X15, X15
1077
1078 AESENC X8, X8
1079 AESENC X9, X9
1080 AESENC X10, X10
1081 AESENC X11, X11
1082 AESENC X12, X12
1083 AESENC X13, X13
1084 AESENC X14, X14
1085 AESENC X15, X15
1086
1087 // combine results
1088 PXOR X12, X8
1089 PXOR X13, X9
1090 PXOR X14, X10
1091 PXOR X15, X11
1092 PXOR X10, X8
1093 PXOR X11, X9
1094 PXOR X9, X8
1095 MOVQ X8, (DX)
1096 RET
1097
1098 aes129plus:
1099 // make 7 more starting seeds
1100 MOVO X1, X2
1101 MOVO X1, X3
1102 MOVO X1, X4
1103 MOVO X1, X5
1104 MOVO X1, X6
1105 MOVO X1, X7
1106 PXOR runtime·aeskeysched+16(SB), X1
1107 PXOR runtime·aeskeysched+32(SB), X2
1108 PXOR runtime·aeskeysched+48(SB), X3
1109 PXOR runtime·aeskeysched+64(SB), X4
1110 PXOR runtime·aeskeysched+80(SB), X5
1111 PXOR runtime·aeskeysched+96(SB), X6
1112 PXOR runtime·aeskeysched+112(SB), X7
1113 AESENC X1, X1
1114 AESENC X2, X2
1115 AESENC X3, X3
1116 AESENC X4, X4
1117 AESENC X5, X5
1118 AESENC X6, X6
1119 AESENC X7, X7
1120
1121 // start with last (possibly overlapping) block
1122 MOVOU -128(AX)(CX*1), X8
1123 MOVOU -112(AX)(CX*1), X9
1124 MOVOU -96(AX)(CX*1), X10
1125 MOVOU -80(AX)(CX*1), X11
1126 MOVOU -64(AX)(CX*1), X12
1127 MOVOU -48(AX)(CX*1), X13
1128 MOVOU -32(AX)(CX*1), X14
1129 MOVOU -16(AX)(CX*1), X15
1130
1131 // xor in seed
1132 PXOR X0, X8
1133 PXOR X1, X9
1134 PXOR X2, X10
1135 PXOR X3, X11
1136 PXOR X4, X12
1137 PXOR X5, X13
1138 PXOR X6, X14
1139 PXOR X7, X15
1140
1141 // compute number of remaining 128-byte blocks
1142 DECQ CX
1143 SHRQ $7, CX
1144
1145 aesloop:
1146 // scramble state
1147 AESENC X8, X8
1148 AESENC X9, X9
1149 AESENC X10, X10
1150 AESENC X11, X11
1151 AESENC X12, X12
1152 AESENC X13, X13
1153 AESENC X14, X14
1154 AESENC X15, X15
1155
1156 // scramble state, xor in a block
1157 MOVOU (AX), X0
1158 MOVOU 16(AX), X1
1159 MOVOU 32(AX), X2
1160 MOVOU 48(AX), X3
1161 AESENC X0, X8
1162 AESENC X1, X9
1163 AESENC X2, X10
1164 AESENC X3, X11
1165 MOVOU 64(AX), X4
1166 MOVOU 80(AX), X5
1167 MOVOU 96(AX), X6
1168 MOVOU 112(AX), X7
1169 AESENC X4, X12
1170 AESENC X5, X13
1171 AESENC X6, X14
1172 AESENC X7, X15
1173
1174 ADDQ $128, AX
1175 DECQ CX
1176 JNE aesloop
1177
1178 // 3 more scrambles to finish
1179 AESENC X8, X8
1180 AESENC X9, X9
1181 AESENC X10, X10
1182 AESENC X11, X11
1183 AESENC X12, X12
1184 AESENC X13, X13
1185 AESENC X14, X14
1186 AESENC X15, X15
1187 AESENC X8, X8
1188 AESENC X9, X9
1189 AESENC X10, X10
1190 AESENC X11, X11
1191 AESENC X12, X12
1192 AESENC X13, X13
1193 AESENC X14, X14
1194 AESENC X15, X15
1195 AESENC X8, X8
1196 AESENC X9, X9
1197 AESENC X10, X10
1198 AESENC X11, X11
1199 AESENC X12, X12
1200 AESENC X13, X13
1201 AESENC X14, X14
1202 AESENC X15, X15
1203
1204 PXOR X12, X8
1205 PXOR X13, X9
1206 PXOR X14, X10
1207 PXOR X15, X11
1208 PXOR X10, X8
1209 PXOR X11, X9
1210 PXOR X9, X8
1211 MOVQ X8, (DX)
1212 RET
1213
1214 TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
1215 MOVQ p+0(FP), AX // ptr to data
1216 MOVQ h+8(FP), X0 // seed
1217 PINSRD $2, (AX), X0 // data
1218 AESENC runtime·aeskeysched+0(SB), X0
1219 AESENC runtime·aeskeysched+16(SB), X0
1220 AESENC runtime·aeskeysched+32(SB), X0
1221 MOVQ X0, ret+16(FP)
1222 RET
1223
1224 TEXT runtime·aeshash64(SB),NOSPLIT,$0-24
1225 MOVQ p+0(FP), AX // ptr to data
1226 MOVQ h+8(FP), X0 // seed
1227 PINSRQ $1, (AX), X0 // data
1228 AESENC runtime·aeskeysched+0(SB), X0
1229 AESENC runtime·aeskeysched+16(SB), X0
1230 AESENC runtime·aeskeysched+32(SB), X0
1231 MOVQ X0, ret+16(FP)
1232 RET
1233
1234 // simple mask to get rid of data in the high part of the register.
1235 DATA masks<>+0x00(SB)/8, $0x0000000000000000
1236 DATA masks<>+0x08(SB)/8, $0x0000000000000000
1237 DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1238 DATA masks<>+0x18(SB)/8, $0x0000000000000000
1239 DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1240 DATA masks<>+0x28(SB)/8, $0x0000000000000000
1241 DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1242 DATA masks<>+0x38(SB)/8, $0x0000000000000000
1243 DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1244 DATA masks<>+0x48(SB)/8, $0x0000000000000000
1245 DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1246 DATA masks<>+0x58(SB)/8, $0x0000000000000000
1247 DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1248 DATA masks<>+0x68(SB)/8, $0x0000000000000000
1249 DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1250 DATA masks<>+0x78(SB)/8, $0x0000000000000000
1251 DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1252 DATA masks<>+0x88(SB)/8, $0x0000000000000000
1253 DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1254 DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1255 DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1256 DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1257 DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1258 DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1259 DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1260 DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1261 DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1262 DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1263 DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1264 DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1265 DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1266 DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1267 GLOBL masks<>(SB),RODATA,$256
1268
1269 TEXT ·checkASM(SB),NOSPLIT,$0-1
1270 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1271 MOVQ $masks<>(SB), AX
1272 MOVQ $shifts<>(SB), BX
1273 ORQ BX, AX
1274 TESTQ $15, AX
1275 SETEQ ret+0(FP)
1276 RET
1277
1278 // these are arguments to pshufb. They move data down from
1279 // the high bytes of the register to the low bytes of the register.
1280 // index is how many bytes to move.
1281 DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1282 DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1283 DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1284 DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1285 DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1286 DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1287 DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1288 DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1289 DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1290 DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1291 DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1292 DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1293 DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1294 DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1295 DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1296 DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1297 DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1298 DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1299 DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1300 DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1301 DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1302 DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1303 DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1304 DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1305 DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1306 DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1307 DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1308 DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1309 DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1310 DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1311 DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1312 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1313 GLOBL shifts<>(SB),RODATA,$256
1314
1315 TEXT runtime·return0(SB), NOSPLIT, $0
1316 MOVL $0, AX
1317 RET
1318
1319
1320 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1321 // Must obey the gcc calling convention.
1322 TEXT _cgo_topofstack(SB),NOSPLIT,$0
1323 get_tls(CX)
1324 MOVQ g(CX), AX
1325 MOVQ g_m(AX), AX
1326 MOVQ m_curg(AX), AX
1327 MOVQ (g_stack+stack_hi)(AX), AX
1328 RET
1329
1330 // The top-most function running on a goroutine
1331 // returns to goexit+PCQuantum.
1332 TEXT runtime·goexit(SB),NOSPLIT,$0-0
1333 BYTE $0x90 // NOP
1334 CALL runtime·goexit1(SB) // does not return
1335 // traceback from goexit1 must hit code range of goexit
1336 BYTE $0x90 // NOP
1337
1338 // This is called from .init_array and follows the platform, not Go, ABI.
1339 TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1340 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1341 MOVQ runtime·lastmoduledatap(SB), AX
1342 MOVQ DI, moduledata_next(AX)
1343 MOVQ DI, runtime·lastmoduledatap(SB)
1344 POPQ R15
1345 RET
1346
1347 // gcWriteBarrier performs a heap pointer write and informs the GC.
1348 //
1349 // gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
1350 // - DI is the destination of the write
1351 // - AX is the value being written at DI
1352 // It clobbers FLAGS. It does not clobber any general-purpose registers,
1353 // but may clobber others (e.g., SSE registers).
1354 TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120
1355 // Save the registers clobbered by the fast path. This is slightly
1356 // faster than having the caller spill these.
1357 MOVQ R14, 104(SP)
1358 MOVQ R13, 112(SP)
1359 // TODO: Consider passing g.m.p in as an argument so they can be shared
1360 // across a sequence of write barriers.
1361 get_tls(R13)
1362 MOVQ g(R13), R13
1363 MOVQ g_m(R13), R13
1364 MOVQ m_p(R13), R13
1365 MOVQ (p_wbBuf+wbBuf_next)(R13), R14
1366 // Increment wbBuf.next position.
1367 LEAQ 16(R14), R14
1368 MOVQ R14, (p_wbBuf+wbBuf_next)(R13)
1369 CMPQ R14, (p_wbBuf+wbBuf_end)(R13)
1370 // Record the write.
1371 MOVQ AX, -16(R14) // Record value
1372 // Note: This turns bad pointer writes into bad
1373 // pointer reads, which could be confusing. We could avoid
1374 // reading from obviously bad pointers, which would
1375 // take care of the vast majority of these. We could
1376 // patch this up in the signal handler, or use XCHG to
1377 // combine the read and the write.
1378 MOVQ (DI), R13
1379 MOVQ R13, -8(R14) // Record *slot
1380 // Is the buffer full? (flags set in CMPQ above)
1381 JEQ flush
1382 ret:
1383 MOVQ 104(SP), R14
1384 MOVQ 112(SP), R13
1385 // Do the write.
1386 MOVQ AX, (DI)
1387 RET
1388
1389 flush:
1390 // Save all general purpose registers since these could be
1391 // clobbered by wbBufFlush and were not saved by the caller.
1392 // It is possible for wbBufFlush to clobber other registers
1393 // (e.g., SSE registers), but the compiler takes care of saving
1394 // those in the caller if necessary. This strikes a balance
1395 // with registers that are likely to be used.
1396 //
1397 // We don't have type information for these, but all code under
1398 // here is NOSPLIT, so nothing will observe these.
1399 //
1400 // TODO: We could strike a different balance; e.g., saving X0
1401 // and not saving GP registers that are less likely to be used.
1402 MOVQ DI, 0(SP) // Also first argument to wbBufFlush
1403 MOVQ AX, 8(SP) // Also second argument to wbBufFlush
1404 MOVQ BX, 16(SP)
1405 MOVQ CX, 24(SP)
1406 MOVQ DX, 32(SP)
1407 // DI already saved
1408 MOVQ SI, 40(SP)
1409 MOVQ BP, 48(SP)
1410 MOVQ R8, 56(SP)
1411 MOVQ R9, 64(SP)
1412 MOVQ R10, 72(SP)
1413 MOVQ R11, 80(SP)
1414 MOVQ R12, 88(SP)
1415 // R13 already saved
1416 // R14 already saved
1417 MOVQ R15, 96(SP)
1418
1419 // This takes arguments DI and AX
1420 CALL runtime·wbBufFlush(SB)
1421
1422 MOVQ 0(SP), DI
1423 MOVQ 8(SP), AX
1424 MOVQ 16(SP), BX
1425 MOVQ 24(SP), CX
1426 MOVQ 32(SP), DX
1427 MOVQ 40(SP), SI
1428 MOVQ 48(SP), BP
1429 MOVQ 56(SP), R8
1430 MOVQ 64(SP), R9
1431 MOVQ 72(SP), R10
1432 MOVQ 80(SP), R11
1433 MOVQ 88(SP), R12
1434 MOVQ 96(SP), R15
1435 JMP ret
1436
1437 DATA debugCallFrameTooLarge<>+0x00(SB)/8, $"call fra"
1438 DATA debugCallFrameTooLarge<>+0x08(SB)/8, $"me too l"
1439 DATA debugCallFrameTooLarge<>+0x10(SB)/4, $"arge"
1440 GLOBL debugCallFrameTooLarge<>(SB), RODATA, $0x14 // Size duplicated below
1441
1442 // debugCallV1 is the entry point for debugger-injected function
1443 // calls on running goroutines. It informs the runtime that a
1444 // debug call has been injected and creates a call frame for the
1445 // debugger to fill in.
1446 //
1447 // To inject a function call, a debugger should:
1448 // 1. Check that the goroutine is in state _Grunning and that
1449 // there are at least 256 bytes free on the stack.
1450 // 2. Push the current PC on the stack (updating SP).
1451 // 3. Write the desired argument frame size at SP-16 (using the SP
1452 // after step 2).
1453 // 4. Save all machine registers (including flags and XMM reigsters)
1454 // so they can be restored later by the debugger.
1455 // 5. Set the PC to debugCallV1 and resume execution.
1456 //
1457 // If the goroutine is in state _Grunnable, then it's not generally
1458 // safe to inject a call because it may return out via other runtime
1459 // operations. Instead, the debugger should unwind the stack to find
1460 // the return to non-runtime code, add a temporary breakpoint there,
1461 // and inject the call once that breakpoint is hit.
1462 //
1463 // If the goroutine is in any other state, it's not safe to inject a call.
1464 //
1465 // This function communicates back to the debugger by setting RAX and
1466 // invoking INT3 to raise a breakpoint signal. See the comments in the
1467 // implementation for the protocol the debugger is expected to
1468 // follow. InjectDebugCall in the runtime tests demonstates this protocol.
1469 //
1470 // The debugger must ensure that any pointers passed to the function
1471 // obey escape analysis requirements. Specifically, it must not pass
1472 // a stack pointer to an escaping argument. debugCallV1 cannot check
1473 // this invariant.
1474 TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0
1475 // Save all registers that may contain pointers in GC register
1476 // map order (see ssa.registersAMD64). This makes it possible
1477 // to copy the stack while updating pointers currently held in
1478 // registers, and for the GC to find roots in registers.
1479 //
1480 // We can't do anything that might clobber any of these
1481 // registers before this.
1482 MOVQ R15, r15-(14*8+8)(SP)
1483 MOVQ R14, r14-(13*8+8)(SP)
1484 MOVQ R13, r13-(12*8+8)(SP)
1485 MOVQ R12, r12-(11*8+8)(SP)
1486 MOVQ R11, r11-(10*8+8)(SP)
1487 MOVQ R10, r10-(9*8+8)(SP)
1488 MOVQ R9, r9-(8*8+8)(SP)
1489 MOVQ R8, r8-(7*8+8)(SP)
1490 MOVQ DI, di-(6*8+8)(SP)
1491 MOVQ SI, si-(5*8+8)(SP)
1492 MOVQ BP, bp-(4*8+8)(SP)
1493 MOVQ BX, bx-(3*8+8)(SP)
1494 MOVQ DX, dx-(2*8+8)(SP)
1495 // Save the frame size before we clobber it. Either of the last
1496 // saves could clobber this depending on whether there's a saved BP.
1497 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1498 MOVQ CX, cx-(1*8+8)(SP)
1499 MOVQ AX, ax-(0*8+8)(SP)
1500
1501 // Save the argument frame size.
1502 MOVQ DX, frameSize-128(SP)
1503
1504 // Perform a safe-point check.
1505 MOVQ retpc-8(FP), AX // Caller's PC
1506 MOVQ AX, 0(SP)
1507 CALL runtime·debugCallCheck(SB)
1508 MOVQ 8(SP), AX
1509 TESTQ AX, AX
1510 JZ good
1511 // The safety check failed. Put the reason string at the top
1512 // of the stack.
1513 MOVQ AX, 0(SP)
1514 MOVQ 16(SP), AX
1515 MOVQ AX, 8(SP)
1516 // Set AX to 8 and invoke INT3. The debugger should get the
1517 // reason a call can't be injected from the top of the stack
1518 // and resume execution.
1519 MOVQ $8, AX
1520 BYTE $0xcc
1521 JMP restore
1522
1523 good:
1524 // Registers are saved and it's safe to make a call.
1525 // Open up a call frame, moving the stack if necessary.
1526 //
1527 // Once the frame is allocated, this will set AX to 0 and
1528 // invoke INT3. The debugger should write the argument
1529 // frame for the call at SP, push the trapping PC on the
1530 // stack, set the PC to the function to call, set RCX to point
1531 // to the closure (if a closure call), and resume execution.
1532 //
1533 // If the function returns, this will set AX to 1 and invoke
1534 // INT3. The debugger can then inspect any return value saved
1535 // on the stack at SP and resume execution again.
1536 //
1537 // If the function panics, this will set AX to 2 and invoke INT3.
1538 // The interface{} value of the panic will be at SP. The debugger
1539 // can inspect the panic value and resume execution again.
1540 #define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1541 CMPQ AX, $MAXSIZE; \
1542 JA 5(PC); \
1543 MOVQ $NAME(SB), AX; \
1544 MOVQ AX, 0(SP); \
1545 CALL runtime·debugCallWrap(SB); \
1546 JMP restore
1547
1548 MOVQ frameSize-128(SP), AX
1549 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1550 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1551 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1552 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1553 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1554 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1555 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1556 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1557 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1558 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1559 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1560 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1561 // The frame size is too large. Report the error.
1562 MOVQ $debugCallFrameTooLarge<>(SB), AX
1563 MOVQ AX, 0(SP)
1564 MOVQ $0x14, 8(SP)
1565 MOVQ $8, AX
1566 BYTE $0xcc
1567 JMP restore
1568
1569 restore:
1570 // Calls and failures resume here.
1571 //
1572 // Set AX to 16 and invoke INT3. The debugger should restore
1573 // all registers except RIP and RSP and resume execution.
1574 MOVQ $16, AX
1575 BYTE $0xcc
1576 // We must not modify flags after this point.
1577
1578 // Restore pointer-containing registers, which may have been
1579 // modified from the debugger's copy by stack copying.
1580 MOVQ ax-(0*8+8)(SP), AX
1581 MOVQ cx-(1*8+8)(SP), CX
1582 MOVQ dx-(2*8+8)(SP), DX
1583 MOVQ bx-(3*8+8)(SP), BX
1584 MOVQ bp-(4*8+8)(SP), BP
1585 MOVQ si-(5*8+8)(SP), SI
1586 MOVQ di-(6*8+8)(SP), DI
1587 MOVQ r8-(7*8+8)(SP), R8
1588 MOVQ r9-(8*8+8)(SP), R9
1589 MOVQ r10-(9*8+8)(SP), R10
1590 MOVQ r11-(10*8+8)(SP), R11
1591 MOVQ r12-(11*8+8)(SP), R12
1592 MOVQ r13-(12*8+8)(SP), R13
1593 MOVQ r14-(13*8+8)(SP), R14
1594 MOVQ r15-(14*8+8)(SP), R15
1595
1596 RET
1597
1598 #define DEBUG_CALL_FN(NAME,MAXSIZE) \
1599 TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1600 NO_LOCAL_POINTERS; \
1601 MOVQ $0, AX; \
1602 BYTE $0xcc; \
1603 MOVQ $1, AX; \
1604 BYTE $0xcc; \
1605 RET
1606 DEBUG_CALL_FN(debugCall32<>, 32)
1607 DEBUG_CALL_FN(debugCall64<>, 64)
1608 DEBUG_CALL_FN(debugCall128<>, 128)
1609 DEBUG_CALL_FN(debugCall256<>, 256)
1610 DEBUG_CALL_FN(debugCall512<>, 512)
1611 DEBUG_CALL_FN(debugCall1024<>, 1024)
1612 DEBUG_CALL_FN(debugCall2048<>, 2048)
1613 DEBUG_CALL_FN(debugCall4096<>, 4096)
1614 DEBUG_CALL_FN(debugCall8192<>, 8192)
1615 DEBUG_CALL_FN(debugCall16384<>, 16384)
1616 DEBUG_CALL_FN(debugCall32768<>, 32768)
1617 DEBUG_CALL_FN(debugCall65536<>, 65536)
1618
1619 TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1620 // Copy the panic value to the top of stack.
1621 MOVQ val_type+0(FP), AX
1622 MOVQ AX, 0(SP)
1623 MOVQ val_data+8(FP), AX
1624 MOVQ AX, 8(SP)
1625 MOVQ $2, AX
1626 BYTE $0xcc
1627 RET
View as plain text