1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5#include "go_asm.h" 6#include "go_tls.h" 7#include "funcdata.h" 8#include "textflag.h" 9 10// _rt0_amd64 is common startup code for most amd64 systems when using 11// internal linking. This is the entry point for the program from the 12// kernel for an ordinary -buildmode=exe program. The stack holds the 13// number of arguments and the C-style argv. 14TEXT _rt0_amd64(SB),NOSPLIT,$-8 15 MOVQ 0(SP), DI // argc 16 LEAQ 8(SP), SI // argv 17 JMP runtime·rt0_go(SB) 18 19// main is common startup code for most amd64 systems when using 20// external linking. The C startup code will call the symbol "main" 21// passing argc and argv in the usual C ABI registers DI and SI. 22TEXT main(SB),NOSPLIT,$-8 23 JMP runtime·rt0_go(SB) 24 25// _rt0_amd64_lib is common startup code for most amd64 systems when 26// using -buildmode=c-archive or -buildmode=c-shared. The linker will 27// arrange to invoke this function as a global constructor (for 28// c-archive) or when the shared library is loaded (for c-shared). 29// We expect argc and argv to be passed in the usual C ABI registers 30// DI and SI. 31TEXT _rt0_amd64_lib(SB),NOSPLIT,$0x50 32 // Align stack per ELF ABI requirements. 33 MOVQ SP, AX 34 ANDQ $~15, SP 35 // Save C ABI callee-saved registers, as caller may need them. 36 MOVQ BX, 0x10(SP) 37 MOVQ BP, 0x18(SP) 38 MOVQ R12, 0x20(SP) 39 MOVQ R13, 0x28(SP) 40 MOVQ R14, 0x30(SP) 41 MOVQ R15, 0x38(SP) 42 MOVQ AX, 0x40(SP) 43 44 MOVQ DI, _rt0_amd64_lib_argc<>(SB) 45 MOVQ SI, _rt0_amd64_lib_argv<>(SB) 46 47 // Synchronous initialization. 48 CALL runtime·libpreinit(SB) 49 50 // Create a new thread to finish Go runtime initialization. 51 MOVQ _cgo_sys_thread_create(SB), AX 52 TESTQ AX, AX 53 JZ nocgo 54 MOVQ $_rt0_amd64_lib_go(SB), DI 55 MOVQ $0, SI 56 CALL AX 57 JMP restore 58 59nocgo: 60 MOVQ $0x800000, 0(SP) // stacksize 61 MOVQ $_rt0_amd64_lib_go(SB), AX 62 MOVQ AX, 8(SP) // fn 63 CALL runtime·newosproc0(SB) 64 65restore: 66 MOVQ 0x10(SP), BX 67 MOVQ 0x18(SP), BP 68 MOVQ 0x20(SP), R12 69 MOVQ 0x28(SP), R13 70 MOVQ 0x30(SP), R14 71 MOVQ 0x38(SP), R15 72 MOVQ 0x40(SP), SP 73 RET 74 75// _rt0_amd64_lib_go initializes the Go runtime. 76// This is started in a separate thread by _rt0_amd64_lib. 77TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0 78 MOVQ _rt0_amd64_lib_argc<>(SB), DI 79 MOVQ _rt0_amd64_lib_argv<>(SB), SI 80 JMP runtime·rt0_go(SB) 81 82DATA _rt0_amd64_lib_argc<>(SB)/8, $0 83GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8 84DATA _rt0_amd64_lib_argv<>(SB)/8, $0 85GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8 86 87TEXT runtime·rt0_go(SB),NOSPLIT,$0 88 // copy arguments forward on an even stack 89 MOVQ DI, AX // argc 90 MOVQ SI, BX // argv 91 SUBQ $(4*8+7), SP // 2args 2auto 92 ANDQ $~15, SP 93 MOVQ AX, 16(SP) 94 MOVQ BX, 24(SP) 95 96 // create istack out of the given (operating system) stack. 97 // _cgo_init may update stackguard. 98 MOVQ $runtime·g0(SB), DI 99 LEAQ (-64*1024+104)(SP), BX 100 MOVQ BX, g_stackguard0(DI) 101 MOVQ BX, g_stackguard1(DI) 102 MOVQ BX, (g_stack+stack_lo)(DI) 103 MOVQ SP, (g_stack+stack_hi)(DI) 104 105 // find out information about the processor we're on 106 MOVL $0, AX 107 CPUID 108 MOVL AX, SI 109 CMPL AX, $0 110 JE nocpuinfo 111 112 // Figure out how to serialize RDTSC. 113 // On Intel processors LFENCE is enough. AMD requires MFENCE. 114 // Don't know about the rest, so let's do MFENCE. 115 CMPL BX, $0x756E6547 // "Genu" 116 JNE notintel 117 CMPL DX, $0x49656E69 // "ineI" 118 JNE notintel 119 CMPL CX, $0x6C65746E // "ntel" 120 JNE notintel 121 MOVB $1, runtime·isIntel(SB) 122 MOVB $1, runtime·lfenceBeforeRdtsc(SB) 123notintel: 124 125 // Load EAX=1 cpuid flags 126 MOVL $1, AX 127 CPUID 128 MOVL AX, runtime·processorVersionInfo(SB) 129 130nocpuinfo: 131 // if there is an _cgo_init, call it. 132 MOVQ _cgo_init(SB), AX 133 TESTQ AX, AX 134 JZ needtls 135 // arg 1: g0, already in DI 136 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc 137#ifdef GOOS_android 138 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g 139 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF). 140 // Compensate for tls_g (+16). 141 MOVQ -16(TLS), CX 142#else 143 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS 144 MOVQ $0, CX 145#endif 146#ifdef GOOS_windows 147 // Adjust for the Win64 calling convention. 148 MOVQ CX, R9 // arg 4 149 MOVQ DX, R8 // arg 3 150 MOVQ SI, DX // arg 2 151 MOVQ DI, CX // arg 1 152#endif 153 CALL AX 154 155 // update stackguard after _cgo_init 156 MOVQ $runtime·g0(SB), CX 157 MOVQ (g_stack+stack_lo)(CX), AX 158 ADDQ $const__StackGuard, AX 159 MOVQ AX, g_stackguard0(CX) 160 MOVQ AX, g_stackguard1(CX) 161 162#ifndef GOOS_windows 163 JMP ok 164#endif 165needtls: 166#ifdef GOOS_plan9 167 // skip TLS setup on Plan 9 168 JMP ok 169#endif 170#ifdef GOOS_solaris 171 // skip TLS setup on Solaris 172 JMP ok 173#endif 174#ifdef GOOS_illumos 175 // skip TLS setup on illumos 176 JMP ok 177#endif 178#ifdef GOOS_darwin 179 // skip TLS setup on Darwin 180 JMP ok 181#endif 182 183 LEAQ runtime·m0+m_tls(SB), DI 184 CALL runtime·settls(SB) 185 186 // store through it, to make sure it works 187 get_tls(BX) 188 MOVQ $0x123, g(BX) 189 MOVQ runtime·m0+m_tls(SB), AX 190 CMPQ AX, $0x123 191 JEQ 2(PC) 192 CALL runtime·abort(SB) 193ok: 194 // set the per-goroutine and per-mach "registers" 195 get_tls(BX) 196 LEAQ runtime·g0(SB), CX 197 MOVQ CX, g(BX) 198 LEAQ runtime·m0(SB), AX 199 200 // save m->g0 = g0 201 MOVQ CX, m_g0(AX) 202 // save m0 to g0->m 203 MOVQ AX, g_m(CX) 204 205 CLD // convention is D is always left cleared 206 CALL runtime·check(SB) 207 208 MOVL 16(SP), AX // copy argc 209 MOVL AX, 0(SP) 210 MOVQ 24(SP), AX // copy argv 211 MOVQ AX, 8(SP) 212 CALL runtime·args(SB) 213 CALL runtime·osinit(SB) 214 CALL runtime·schedinit(SB) 215 216 // create a new goroutine to start program 217 MOVQ $runtime·mainPC(SB), AX // entry 218 PUSHQ AX 219 PUSHQ $0 // arg size 220 CALL runtime·newproc(SB) 221 POPQ AX 222 POPQ AX 223 224 // start this M 225 CALL runtime·mstart(SB) 226 227 CALL runtime·abort(SB) // mstart should never return 228 RET 229 230 // Prevent dead-code elimination of debugCallV1, which is 231 // intended to be called by debuggers. 232 MOVQ $runtime·debugCallV1(SB), AX 233 RET 234 235DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) 236GLOBL runtime·mainPC(SB),RODATA,$8 237 238TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 239 BYTE $0xcc 240 RET 241 242TEXT runtime·asminit(SB),NOSPLIT,$0-0 243 // No per-thread init. 244 RET 245 246/* 247 * go-routine 248 */ 249 250// func gosave(buf *gobuf) 251// save state in Gobuf; setjmp 252TEXT runtime·gosave(SB), NOSPLIT, $0-8 253 MOVQ buf+0(FP), AX // gobuf 254 LEAQ buf+0(FP), BX // caller's SP 255 MOVQ BX, gobuf_sp(AX) 256 MOVQ 0(SP), BX // caller's PC 257 MOVQ BX, gobuf_pc(AX) 258 MOVQ $0, gobuf_ret(AX) 259 MOVQ BP, gobuf_bp(AX) 260 // Assert ctxt is zero. See func save. 261 MOVQ gobuf_ctxt(AX), BX 262 TESTQ BX, BX 263 JZ 2(PC) 264 CALL runtime·badctxt(SB) 265 get_tls(CX) 266 MOVQ g(CX), BX 267 MOVQ BX, gobuf_g(AX) 268 RET 269 270// func gogo(buf *gobuf) 271// restore state from Gobuf; longjmp 272TEXT runtime·gogo(SB), NOSPLIT, $16-8 273 MOVQ buf+0(FP), BX // gobuf 274 MOVQ gobuf_g(BX), DX 275 MOVQ 0(DX), CX // make sure g != nil 276 get_tls(CX) 277 MOVQ DX, g(CX) 278 MOVQ gobuf_sp(BX), SP // restore SP 279 MOVQ gobuf_ret(BX), AX 280 MOVQ gobuf_ctxt(BX), DX 281 MOVQ gobuf_bp(BX), BP 282 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector 283 MOVQ $0, gobuf_ret(BX) 284 MOVQ $0, gobuf_ctxt(BX) 285 MOVQ $0, gobuf_bp(BX) 286 MOVQ gobuf_pc(BX), BX 287 JMP BX 288 289// func mcall(fn func(*g)) 290// Switch to m->g0's stack, call fn(g). 291// Fn must never return. It should gogo(&g->sched) 292// to keep running g. 293TEXT runtime·mcall(SB), NOSPLIT, $0-8 294 MOVQ fn+0(FP), DI 295 296 get_tls(CX) 297 MOVQ g(CX), AX // save state in g->sched 298 MOVQ 0(SP), BX // caller's PC 299 MOVQ BX, (g_sched+gobuf_pc)(AX) 300 LEAQ fn+0(FP), BX // caller's SP 301 MOVQ BX, (g_sched+gobuf_sp)(AX) 302 MOVQ AX, (g_sched+gobuf_g)(AX) 303 MOVQ BP, (g_sched+gobuf_bp)(AX) 304 305 // switch to m->g0 & its stack, call fn 306 MOVQ g(CX), BX 307 MOVQ g_m(BX), BX 308 MOVQ m_g0(BX), SI 309 CMPQ SI, AX // if g == m->g0 call badmcall 310 JNE 3(PC) 311 MOVQ $runtime·badmcall(SB), AX 312 JMP AX 313 MOVQ SI, g(CX) // g = m->g0 314 MOVQ (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp 315 PUSHQ AX 316 MOVQ DI, DX 317 MOVQ 0(DI), DI 318 CALL DI 319 POPQ AX 320 MOVQ $runtime·badmcall2(SB), AX 321 JMP AX 322 RET 323 324// systemstack_switch is a dummy routine that systemstack leaves at the bottom 325// of the G stack. We need to distinguish the routine that 326// lives at the bottom of the G stack from the one that lives 327// at the top of the system stack because the one at the top of 328// the system stack terminates the stack walk (see topofstack()). 329TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0 330 RET 331 332// func systemstack(fn func()) 333TEXT runtime·systemstack(SB), NOSPLIT, $0-8 334 MOVQ fn+0(FP), DI // DI = fn 335 get_tls(CX) 336 MOVQ g(CX), AX // AX = g 337 MOVQ g_m(AX), BX // BX = m 338 339 CMPQ AX, m_gsignal(BX) 340 JEQ noswitch 341 342 MOVQ m_g0(BX), DX // DX = g0 343 CMPQ AX, DX 344 JEQ noswitch 345 346 CMPQ AX, m_curg(BX) 347 JNE bad 348 349 // switch stacks 350 // save our state in g->sched. Pretend to 351 // be systemstack_switch if the G stack is scanned. 352 MOVQ $runtime·systemstack_switch(SB), SI 353 MOVQ SI, (g_sched+gobuf_pc)(AX) 354 MOVQ SP, (g_sched+gobuf_sp)(AX) 355 MOVQ AX, (g_sched+gobuf_g)(AX) 356 MOVQ BP, (g_sched+gobuf_bp)(AX) 357 358 // switch to g0 359 MOVQ DX, g(CX) 360 MOVQ (g_sched+gobuf_sp)(DX), BX 361 // make it look like mstart called systemstack on g0, to stop traceback 362 SUBQ $8, BX 363 MOVQ $runtime·mstart(SB), DX 364 MOVQ DX, 0(BX) 365 MOVQ BX, SP 366 367 // call target function 368 MOVQ DI, DX 369 MOVQ 0(DI), DI 370 CALL DI 371 372 // switch back to g 373 get_tls(CX) 374 MOVQ g(CX), AX 375 MOVQ g_m(AX), BX 376 MOVQ m_curg(BX), AX 377 MOVQ AX, g(CX) 378 MOVQ (g_sched+gobuf_sp)(AX), SP 379 MOVQ $0, (g_sched+gobuf_sp)(AX) 380 RET 381 382noswitch: 383 // already on m stack; tail call the function 384 // Using a tail call here cleans up tracebacks since we won't stop 385 // at an intermediate systemstack. 386 MOVQ DI, DX 387 MOVQ 0(DI), DI 388 JMP DI 389 390bad: 391 // Bad: g is not gsignal, not g0, not curg. What is it? 392 MOVQ $runtime·badsystemstack(SB), AX 393 CALL AX 394 INT $3 395 396 397/* 398 * support for morestack 399 */ 400 401// Called during function prolog when more stack is needed. 402// 403// The traceback routines see morestack on a g0 as being 404// the top of a stack (for example, morestack calling newstack 405// calling the scheduler calling newm calling gc), so we must 406// record an argument size. For that purpose, it has no arguments. 407TEXT runtime·morestack(SB),NOSPLIT,$0-0 408 // Cannot grow scheduler stack (m->g0). 409 get_tls(CX) 410 MOVQ g(CX), BX 411 MOVQ g_m(BX), BX 412 MOVQ m_g0(BX), SI 413 CMPQ g(CX), SI 414 JNE 3(PC) 415 CALL runtime·badmorestackg0(SB) 416 CALL runtime·abort(SB) 417 418 // Cannot grow signal stack (m->gsignal). 419 MOVQ m_gsignal(BX), SI 420 CMPQ g(CX), SI 421 JNE 3(PC) 422 CALL runtime·badmorestackgsignal(SB) 423 CALL runtime·abort(SB) 424 425 // Called from f. 426 // Set m->morebuf to f's caller. 427 NOP SP // tell vet SP changed - stop checking offsets 428 MOVQ 8(SP), AX // f's caller's PC 429 MOVQ AX, (m_morebuf+gobuf_pc)(BX) 430 LEAQ 16(SP), AX // f's caller's SP 431 MOVQ AX, (m_morebuf+gobuf_sp)(BX) 432 get_tls(CX) 433 MOVQ g(CX), SI 434 MOVQ SI, (m_morebuf+gobuf_g)(BX) 435 436 // Set g->sched to context in f. 437 MOVQ 0(SP), AX // f's PC 438 MOVQ AX, (g_sched+gobuf_pc)(SI) 439 MOVQ SI, (g_sched+gobuf_g)(SI) 440 LEAQ 8(SP), AX // f's SP 441 MOVQ AX, (g_sched+gobuf_sp)(SI) 442 MOVQ BP, (g_sched+gobuf_bp)(SI) 443 MOVQ DX, (g_sched+gobuf_ctxt)(SI) 444 445 // Call newstack on m->g0's stack. 446 MOVQ m_g0(BX), BX 447 MOVQ BX, g(CX) 448 MOVQ (g_sched+gobuf_sp)(BX), SP 449 CALL runtime·newstack(SB) 450 CALL runtime·abort(SB) // crash if newstack returns 451 RET 452 453// morestack but not preserving ctxt. 454TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0 455 MOVL $0, DX 456 JMP runtime·morestack(SB) 457 458// reflectcall: call a function with the given argument list 459// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). 460// we don't have variable-sized frames, so we use a small number 461// of constant-sized-frame functions to encode a few bits of size in the pc. 462// Caution: ugly multiline assembly macros in your future! 463 464#define DISPATCH(NAME,MAXSIZE) \ 465 CMPQ CX, $MAXSIZE; \ 466 JA 3(PC); \ 467 MOVQ $NAME(SB), AX; \ 468 JMP AX 469// Note: can't just "JMP NAME(SB)" - bad inlining results. 470 471TEXT ·reflectcall(SB), NOSPLIT, $0-32 472 MOVLQZX argsize+24(FP), CX 473 DISPATCH(runtime·call32, 32) 474 DISPATCH(runtime·call64, 64) 475 DISPATCH(runtime·call128, 128) 476 DISPATCH(runtime·call256, 256) 477 DISPATCH(runtime·call512, 512) 478 DISPATCH(runtime·call1024, 1024) 479 DISPATCH(runtime·call2048, 2048) 480 DISPATCH(runtime·call4096, 4096) 481 DISPATCH(runtime·call8192, 8192) 482 DISPATCH(runtime·call16384, 16384) 483 DISPATCH(runtime·call32768, 32768) 484 DISPATCH(runtime·call65536, 65536) 485 DISPATCH(runtime·call131072, 131072) 486 DISPATCH(runtime·call262144, 262144) 487 DISPATCH(runtime·call524288, 524288) 488 DISPATCH(runtime·call1048576, 1048576) 489 DISPATCH(runtime·call2097152, 2097152) 490 DISPATCH(runtime·call4194304, 4194304) 491 DISPATCH(runtime·call8388608, 8388608) 492 DISPATCH(runtime·call16777216, 16777216) 493 DISPATCH(runtime·call33554432, 33554432) 494 DISPATCH(runtime·call67108864, 67108864) 495 DISPATCH(runtime·call134217728, 134217728) 496 DISPATCH(runtime·call268435456, 268435456) 497 DISPATCH(runtime·call536870912, 536870912) 498 DISPATCH(runtime·call1073741824, 1073741824) 499 MOVQ $runtime·badreflectcall(SB), AX 500 JMP AX 501 502#define CALLFN(NAME,MAXSIZE) \ 503TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \ 504 NO_LOCAL_POINTERS; \ 505 /* copy arguments to stack */ \ 506 MOVQ argptr+16(FP), SI; \ 507 MOVLQZX argsize+24(FP), CX; \ 508 MOVQ SP, DI; \ 509 REP;MOVSB; \ 510 /* call function */ \ 511 MOVQ f+8(FP), DX; \ 512 PCDATA $PCDATA_StackMapIndex, $0; \ 513 CALL (DX); \ 514 /* copy return values back */ \ 515 MOVQ argtype+0(FP), DX; \ 516 MOVQ argptr+16(FP), DI; \ 517 MOVLQZX argsize+24(FP), CX; \ 518 MOVLQZX retoffset+28(FP), BX; \ 519 MOVQ SP, SI; \ 520 ADDQ BX, DI; \ 521 ADDQ BX, SI; \ 522 SUBQ BX, CX; \ 523 CALL callRet<>(SB); \ 524 RET 525 526// callRet copies return values back at the end of call*. This is a 527// separate function so it can allocate stack space for the arguments 528// to reflectcallmove. It does not follow the Go ABI; it expects its 529// arguments in registers. 530TEXT callRet<>(SB), NOSPLIT, $32-0 531 NO_LOCAL_POINTERS 532 MOVQ DX, 0(SP) 533 MOVQ DI, 8(SP) 534 MOVQ SI, 16(SP) 535 MOVQ CX, 24(SP) 536 CALL runtime·reflectcallmove(SB) 537 RET 538 539CALLFN(·call32, 32) 540CALLFN(·call64, 64) 541CALLFN(·call128, 128) 542CALLFN(·call256, 256) 543CALLFN(·call512, 512) 544CALLFN(·call1024, 1024) 545CALLFN(·call2048, 2048) 546CALLFN(·call4096, 4096) 547CALLFN(·call8192, 8192) 548CALLFN(·call16384, 16384) 549CALLFN(·call32768, 32768) 550CALLFN(·call65536, 65536) 551CALLFN(·call131072, 131072) 552CALLFN(·call262144, 262144) 553CALLFN(·call524288, 524288) 554CALLFN(·call1048576, 1048576) 555CALLFN(·call2097152, 2097152) 556CALLFN(·call4194304, 4194304) 557CALLFN(·call8388608, 8388608) 558CALLFN(·call16777216, 16777216) 559CALLFN(·call33554432, 33554432) 560CALLFN(·call67108864, 67108864) 561CALLFN(·call134217728, 134217728) 562CALLFN(·call268435456, 268435456) 563CALLFN(·call536870912, 536870912) 564CALLFN(·call1073741824, 1073741824) 565 566TEXT runtime·procyield(SB),NOSPLIT,$0-0 567 MOVL cycles+0(FP), AX 568again: 569 PAUSE 570 SUBL $1, AX 571 JNZ again 572 RET 573 574 575TEXT ·publicationBarrier(SB),NOSPLIT,$0-0 576 // Stores are already ordered on x86, so this is just a 577 // compile barrier. 578 RET 579 580// func jmpdefer(fv *funcval, argp uintptr) 581// argp is a caller SP. 582// called from deferreturn. 583// 1. pop the caller 584// 2. sub 5 bytes from the callers return 585// 3. jmp to the argument 586TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16 587 MOVQ fv+0(FP), DX // fn 588 MOVQ argp+8(FP), BX // caller sp 589 LEAQ -8(BX), SP // caller sp after CALL 590 MOVQ -8(SP), BP // restore BP as if deferreturn returned (harmless if framepointers not in use) 591 SUBQ $5, (SP) // return to CALL again 592 MOVQ 0(DX), BX 593 JMP BX // but first run the deferred function 594 595// Save state of caller into g->sched. Smashes R8, R9. 596TEXT gosave<>(SB),NOSPLIT,$0 597 get_tls(R8) 598 MOVQ g(R8), R8 599 MOVQ 0(SP), R9 600 MOVQ R9, (g_sched+gobuf_pc)(R8) 601 LEAQ 8(SP), R9 602 MOVQ R9, (g_sched+gobuf_sp)(R8) 603 MOVQ $0, (g_sched+gobuf_ret)(R8) 604 MOVQ BP, (g_sched+gobuf_bp)(R8) 605 // Assert ctxt is zero. See func save. 606 MOVQ (g_sched+gobuf_ctxt)(R8), R9 607 TESTQ R9, R9 608 JZ 2(PC) 609 CALL runtime·badctxt(SB) 610 RET 611 612// func asmcgocall(fn, arg unsafe.Pointer) int32 613// Call fn(arg) on the scheduler stack, 614// aligned appropriately for the gcc ABI. 615// See cgocall.go for more details. 616TEXT ·asmcgocall(SB),NOSPLIT,$0-20 617 MOVQ fn+0(FP), AX 618 MOVQ arg+8(FP), BX 619 620 MOVQ SP, DX 621 622 // Figure out if we need to switch to m->g0 stack. 623 // We get called to create new OS threads too, and those 624 // come in on the m->g0 stack already. 625 get_tls(CX) 626 MOVQ g(CX), R8 627 CMPQ R8, $0 628 JEQ nosave 629 MOVQ g_m(R8), R8 630 MOVQ m_g0(R8), SI 631 MOVQ g(CX), DI 632 CMPQ SI, DI 633 JEQ nosave 634 MOVQ m_gsignal(R8), SI 635 CMPQ SI, DI 636 JEQ nosave 637 638 // Switch to system stack. 639 MOVQ m_g0(R8), SI 640 CALL gosave<>(SB) 641 MOVQ SI, g(CX) 642 MOVQ (g_sched+gobuf_sp)(SI), SP 643 644 // Now on a scheduling stack (a pthread-created stack). 645 // Make sure we have enough room for 4 stack-backed fast-call 646 // registers as per windows amd64 calling convention. 647 SUBQ $64, SP 648 ANDQ $~15, SP // alignment for gcc ABI 649 MOVQ DI, 48(SP) // save g 650 MOVQ (g_stack+stack_hi)(DI), DI 651 SUBQ DX, DI 652 MOVQ DI, 40(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback) 653 MOVQ BX, DI // DI = first argument in AMD64 ABI 654 MOVQ BX, CX // CX = first argument in Win64 655 CALL AX 656 657 // Restore registers, g, stack pointer. 658 get_tls(CX) 659 MOVQ 48(SP), DI 660 MOVQ (g_stack+stack_hi)(DI), SI 661 SUBQ 40(SP), SI 662 MOVQ DI, g(CX) 663 MOVQ SI, SP 664 665 MOVL AX, ret+16(FP) 666 RET 667 668nosave: 669 // Running on a system stack, perhaps even without a g. 670 // Having no g can happen during thread creation or thread teardown 671 // (see needm/dropm on Solaris, for example). 672 // This code is like the above sequence but without saving/restoring g 673 // and without worrying about the stack moving out from under us 674 // (because we're on a system stack, not a goroutine stack). 675 // The above code could be used directly if already on a system stack, 676 // but then the only path through this code would be a rare case on Solaris. 677 // Using this code for all "already on system stack" calls exercises it more, 678 // which should help keep it correct. 679 SUBQ $64, SP 680 ANDQ $~15, SP 681 MOVQ $0, 48(SP) // where above code stores g, in case someone looks during debugging 682 MOVQ DX, 40(SP) // save original stack pointer 683 MOVQ BX, DI // DI = first argument in AMD64 ABI 684 MOVQ BX, CX // CX = first argument in Win64 685 CALL AX 686 MOVQ 40(SP), SI // restore original stack pointer 687 MOVQ SI, SP 688 MOVL AX, ret+16(FP) 689 RET 690 691// func cgocallback(fn, frame unsafe.Pointer, framesize, ctxt uintptr) 692// Turn the fn into a Go func (by taking its address) and call 693// cgocallback_gofunc. 694TEXT runtime·cgocallback(SB),NOSPLIT,$32-32 695 LEAQ fn+0(FP), AX 696 MOVQ AX, 0(SP) 697 MOVQ frame+8(FP), AX 698 MOVQ AX, 8(SP) 699 MOVQ framesize+16(FP), AX 700 MOVQ AX, 16(SP) 701 MOVQ ctxt+24(FP), AX 702 MOVQ AX, 24(SP) 703 MOVQ $runtime·cgocallback_gofunc(SB), AX 704 CALL AX 705 RET 706 707// func cgocallback_gofunc(fn, frame, framesize, ctxt uintptr) 708// See cgocall.go for more details. 709TEXT ·cgocallback_gofunc(SB),NOSPLIT,$16-32 710 NO_LOCAL_POINTERS 711 712 // If g is nil, Go did not create the current thread. 713 // Call needm to obtain one m for temporary use. 714 // In this case, we're running on the thread stack, so there's 715 // lots of space, but the linker doesn't know. Hide the call from 716 // the linker analysis by using an indirect call through AX. 717 get_tls(CX) 718#ifdef GOOS_windows 719 MOVL $0, BX 720 CMPQ CX, $0 721 JEQ 2(PC) 722#endif 723 MOVQ g(CX), BX 724 CMPQ BX, $0 725 JEQ needm 726 MOVQ g_m(BX), BX 727 MOVQ BX, R8 // holds oldm until end of function 728 JMP havem 729needm: 730 MOVQ $0, 0(SP) 731 MOVQ $runtime·needm(SB), AX 732 CALL AX 733 MOVQ 0(SP), R8 734 get_tls(CX) 735 MOVQ g(CX), BX 736 MOVQ g_m(BX), BX 737 738 // Set m->sched.sp = SP, so that if a panic happens 739 // during the function we are about to execute, it will 740 // have a valid SP to run on the g0 stack. 741 // The next few lines (after the havem label) 742 // will save this SP onto the stack and then write 743 // the same SP back to m->sched.sp. That seems redundant, 744 // but if an unrecovered panic happens, unwindm will 745 // restore the g->sched.sp from the stack location 746 // and then systemstack will try to use it. If we don't set it here, 747 // that restored SP will be uninitialized (typically 0) and 748 // will not be usable. 749 MOVQ m_g0(BX), SI 750 MOVQ SP, (g_sched+gobuf_sp)(SI) 751 752havem: 753 // Now there's a valid m, and we're running on its m->g0. 754 // Save current m->g0->sched.sp on stack and then set it to SP. 755 // Save current sp in m->g0->sched.sp in preparation for 756 // switch back to m->curg stack. 757 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP). 758 MOVQ m_g0(BX), SI 759 MOVQ (g_sched+gobuf_sp)(SI), AX 760 MOVQ AX, 0(SP) 761 MOVQ SP, (g_sched+gobuf_sp)(SI) 762 763 // Switch to m->curg stack and call runtime.cgocallbackg. 764 // Because we are taking over the execution of m->curg 765 // but *not* resuming what had been running, we need to 766 // save that information (m->curg->sched) so we can restore it. 767 // We can restore m->curg->sched.sp easily, because calling 768 // runtime.cgocallbackg leaves SP unchanged upon return. 769 // To save m->curg->sched.pc, we push it onto the stack. 770 // This has the added benefit that it looks to the traceback 771 // routine like cgocallbackg is going to return to that 772 // PC (because the frame we allocate below has the same 773 // size as cgocallback_gofunc's frame declared above) 774 // so that the traceback will seamlessly trace back into 775 // the earlier calls. 776 // 777 // In the new goroutine, 8(SP) holds the saved R8. 778 MOVQ m_curg(BX), SI 779 MOVQ SI, g(CX) 780 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI 781 MOVQ (g_sched+gobuf_pc)(SI), BX 782 MOVQ BX, -8(DI) 783 // Compute the size of the frame, including return PC and, if 784 // GOEXPERIMENT=framepointer, the saved base pointer 785 MOVQ ctxt+24(FP), BX 786 LEAQ fv+0(FP), AX 787 SUBQ SP, AX 788 SUBQ AX, DI 789 MOVQ DI, SP 790 791 MOVQ R8, 8(SP) 792 MOVQ BX, 0(SP) 793 CALL runtime·cgocallbackg(SB) 794 MOVQ 8(SP), R8 795 796 // Compute the size of the frame again. FP and SP have 797 // completely different values here than they did above, 798 // but only their difference matters. 799 LEAQ fv+0(FP), AX 800 SUBQ SP, AX 801 802 // Restore g->sched (== m->curg->sched) from saved values. 803 get_tls(CX) 804 MOVQ g(CX), SI 805 MOVQ SP, DI 806 ADDQ AX, DI 807 MOVQ -8(DI), BX 808 MOVQ BX, (g_sched+gobuf_pc)(SI) 809 MOVQ DI, (g_sched+gobuf_sp)(SI) 810 811 // Switch back to m->g0's stack and restore m->g0->sched.sp. 812 // (Unlike m->curg, the g0 goroutine never uses sched.pc, 813 // so we do not have to restore it.) 814 MOVQ g(CX), BX 815 MOVQ g_m(BX), BX 816 MOVQ m_g0(BX), SI 817 MOVQ SI, g(CX) 818 MOVQ (g_sched+gobuf_sp)(SI), SP 819 MOVQ 0(SP), AX 820 MOVQ AX, (g_sched+gobuf_sp)(SI) 821 822 // If the m on entry was nil, we called needm above to borrow an m 823 // for the duration of the call. Since the call is over, return it with dropm. 824 CMPQ R8, $0 825 JNE 3(PC) 826 MOVQ $runtime·dropm(SB), AX 827 CALL AX 828 829 // Done! 830 RET 831 832// func setg(gg *g) 833// set g. for use by needm. 834TEXT runtime·setg(SB), NOSPLIT, $0-8 835 MOVQ gg+0(FP), BX 836#ifdef GOOS_windows 837 CMPQ BX, $0 838 JNE settls 839 MOVQ $0, 0x28(GS) 840 RET 841settls: 842 MOVQ g_m(BX), AX 843 LEAQ m_tls(AX), AX 844 MOVQ AX, 0x28(GS) 845#endif 846 get_tls(CX) 847 MOVQ BX, g(CX) 848 RET 849 850// void setg_gcc(G*); set g called from gcc. 851TEXT setg_gcc<>(SB),NOSPLIT,$0 852 get_tls(AX) 853 MOVQ DI, g(AX) 854 RET 855 856TEXT runtime·abort(SB),NOSPLIT,$0-0 857 INT $3 858loop: 859 JMP loop 860 861// check that SP is in range [g->stack.lo, g->stack.hi) 862TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 863 get_tls(CX) 864 MOVQ g(CX), AX 865 CMPQ (g_stack+stack_hi)(AX), SP 866 JHI 2(PC) 867 CALL runtime·abort(SB) 868 CMPQ SP, (g_stack+stack_lo)(AX) 869 JHI 2(PC) 870 CALL runtime·abort(SB) 871 RET 872 873// func cputicks() int64 874TEXT runtime·cputicks(SB),NOSPLIT,$0-0 875 CMPB runtime·lfenceBeforeRdtsc(SB), $1 876 JNE mfence 877 LFENCE 878 JMP done 879mfence: 880 MFENCE 881done: 882 RDTSC 883 SHLQ $32, DX 884 ADDQ DX, AX 885 MOVQ AX, ret+0(FP) 886 RET 887 888// func aeshash(p unsafe.Pointer, h, s uintptr) uintptr 889// hash function using AES hardware instructions 890TEXT runtime·aeshash(SB),NOSPLIT,$0-32 891 MOVQ p+0(FP), AX // ptr to data 892 MOVQ s+16(FP), CX // size 893 LEAQ ret+24(FP), DX 894 JMP aeshashbody<>(SB) 895 896// func aeshashstr(p unsafe.Pointer, h uintptr) uintptr 897TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24 898 MOVQ p+0(FP), AX // ptr to string struct 899 MOVQ 8(AX), CX // length of string 900 MOVQ (AX), AX // string data 901 LEAQ ret+16(FP), DX 902 JMP aeshashbody<>(SB) 903 904// AX: data 905// CX: length 906// DX: address to put return value 907TEXT aeshashbody<>(SB),NOSPLIT,$0-0 908 // Fill an SSE register with our seeds. 909 MOVQ h+8(FP), X0 // 64 bits of per-table hash seed 910 PINSRW $4, CX, X0 // 16 bits of length 911 PSHUFHW $0, X0, X0 // repeat length 4 times total 912 MOVO X0, X1 // save unscrambled seed 913 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed 914 AESENC X0, X0 // scramble seed 915 916 CMPQ CX, $16 917 JB aes0to15 918 JE aes16 919 CMPQ CX, $32 920 JBE aes17to32 921 CMPQ CX, $64 922 JBE aes33to64 923 CMPQ CX, $128 924 JBE aes65to128 925 JMP aes129plus 926 927aes0to15: 928 TESTQ CX, CX 929 JE aes0 930 931 ADDQ $16, AX 932 TESTW $0xff0, AX 933 JE endofpage 934 935 // 16 bytes loaded at this address won't cross 936 // a page boundary, so we can load it directly. 937 MOVOU -16(AX), X1 938 ADDQ CX, CX 939 MOVQ $masks<>(SB), AX 940 PAND (AX)(CX*8), X1 941final1: 942 PXOR X0, X1 // xor data with seed 943 AESENC X1, X1 // scramble combo 3 times 944 AESENC X1, X1 945 AESENC X1, X1 946 MOVQ X1, (DX) 947 RET 948 949endofpage: 950 // address ends in 1111xxxx. Might be up against 951 // a page boundary, so load ending at last byte. 952 // Then shift bytes down using pshufb. 953 MOVOU -32(AX)(CX*1), X1 954 ADDQ CX, CX 955 MOVQ $shifts<>(SB), AX 956 PSHUFB (AX)(CX*8), X1 957 JMP final1 958 959aes0: 960 // Return scrambled input seed 961 AESENC X0, X0 962 MOVQ X0, (DX) 963 RET 964 965aes16: 966 MOVOU (AX), X1 967 JMP final1 968 969aes17to32: 970 // make second starting seed 971 PXOR runtime·aeskeysched+16(SB), X1 972 AESENC X1, X1 973 974 // load data to be hashed 975 MOVOU (AX), X2 976 MOVOU -16(AX)(CX*1), X3 977 978 // xor with seed 979 PXOR X0, X2 980 PXOR X1, X3 981 982 // scramble 3 times 983 AESENC X2, X2 984 AESENC X3, X3 985 AESENC X2, X2 986 AESENC X3, X3 987 AESENC X2, X2 988 AESENC X3, X3 989 990 // combine results 991 PXOR X3, X2 992 MOVQ X2, (DX) 993 RET 994 995aes33to64: 996 // make 3 more starting seeds 997 MOVO X1, X2 998 MOVO X1, X3 999 PXOR runtime·aeskeysched+16(SB), X1 1000 PXOR runtime·aeskeysched+32(SB), X2 1001 PXOR runtime·aeskeysched+48(SB), X3 1002 AESENC X1, X1 1003 AESENC X2, X2 1004 AESENC X3, X3 1005 1006 MOVOU (AX), X4 1007 MOVOU 16(AX), X5 1008 MOVOU -32(AX)(CX*1), X6 1009 MOVOU -16(AX)(CX*1), X7 1010 1011 PXOR X0, X4 1012 PXOR X1, X5 1013 PXOR X2, X6 1014 PXOR X3, X7 1015 1016 AESENC X4, X4 1017 AESENC X5, X5 1018 AESENC X6, X6 1019 AESENC X7, X7 1020 1021 AESENC X4, X4 1022 AESENC X5, X5 1023 AESENC X6, X6 1024 AESENC X7, X7 1025 1026 AESENC X4, X4 1027 AESENC X5, X5 1028 AESENC X6, X6 1029 AESENC X7, X7 1030 1031 PXOR X6, X4 1032 PXOR X7, X5 1033 PXOR X5, X4 1034 MOVQ X4, (DX) 1035 RET 1036 1037aes65to128: 1038 // make 7 more starting seeds 1039 MOVO X1, X2 1040 MOVO X1, X3 1041 MOVO X1, X4 1042 MOVO X1, X5 1043 MOVO X1, X6 1044 MOVO X1, X7 1045 PXOR runtime·aeskeysched+16(SB), X1 1046 PXOR runtime·aeskeysched+32(SB), X2 1047 PXOR runtime·aeskeysched+48(SB), X3 1048 PXOR runtime·aeskeysched+64(SB), X4 1049 PXOR runtime·aeskeysched+80(SB), X5 1050 PXOR runtime·aeskeysched+96(SB), X6 1051 PXOR runtime·aeskeysched+112(SB), X7 1052 AESENC X1, X1 1053 AESENC X2, X2 1054 AESENC X3, X3 1055 AESENC X4, X4 1056 AESENC X5, X5 1057 AESENC X6, X6 1058 AESENC X7, X7 1059 1060 // load data 1061 MOVOU (AX), X8 1062 MOVOU 16(AX), X9 1063 MOVOU 32(AX), X10 1064 MOVOU 48(AX), X11 1065 MOVOU -64(AX)(CX*1), X12 1066 MOVOU -48(AX)(CX*1), X13 1067 MOVOU -32(AX)(CX*1), X14 1068 MOVOU -16(AX)(CX*1), X15 1069 1070 // xor with seed 1071 PXOR X0, X8 1072 PXOR X1, X9 1073 PXOR X2, X10 1074 PXOR X3, X11 1075 PXOR X4, X12 1076 PXOR X5, X13 1077 PXOR X6, X14 1078 PXOR X7, X15 1079 1080 // scramble 3 times 1081 AESENC X8, X8 1082 AESENC X9, X9 1083 AESENC X10, X10 1084 AESENC X11, X11 1085 AESENC X12, X12 1086 AESENC X13, X13 1087 AESENC X14, X14 1088 AESENC X15, X15 1089 1090 AESENC X8, X8 1091 AESENC X9, X9 1092 AESENC X10, X10 1093 AESENC X11, X11 1094 AESENC X12, X12 1095 AESENC X13, X13 1096 AESENC X14, X14 1097 AESENC X15, X15 1098 1099 AESENC X8, X8 1100 AESENC X9, X9 1101 AESENC X10, X10 1102 AESENC X11, X11 1103 AESENC X12, X12 1104 AESENC X13, X13 1105 AESENC X14, X14 1106 AESENC X15, X15 1107 1108 // combine results 1109 PXOR X12, X8 1110 PXOR X13, X9 1111 PXOR X14, X10 1112 PXOR X15, X11 1113 PXOR X10, X8 1114 PXOR X11, X9 1115 PXOR X9, X8 1116 MOVQ X8, (DX) 1117 RET 1118 1119aes129plus: 1120 // make 7 more starting seeds 1121 MOVO X1, X2 1122 MOVO X1, X3 1123 MOVO X1, X4 1124 MOVO X1, X5 1125 MOVO X1, X6 1126 MOVO X1, X7 1127 PXOR runtime·aeskeysched+16(SB), X1 1128 PXOR runtime·aeskeysched+32(SB), X2 1129 PXOR runtime·aeskeysched+48(SB), X3 1130 PXOR runtime·aeskeysched+64(SB), X4 1131 PXOR runtime·aeskeysched+80(SB), X5 1132 PXOR runtime·aeskeysched+96(SB), X6 1133 PXOR runtime·aeskeysched+112(SB), X7 1134 AESENC X1, X1 1135 AESENC X2, X2 1136 AESENC X3, X3 1137 AESENC X4, X4 1138 AESENC X5, X5 1139 AESENC X6, X6 1140 AESENC X7, X7 1141 1142 // start with last (possibly overlapping) block 1143 MOVOU -128(AX)(CX*1), X8 1144 MOVOU -112(AX)(CX*1), X9 1145 MOVOU -96(AX)(CX*1), X10 1146 MOVOU -80(AX)(CX*1), X11 1147 MOVOU -64(AX)(CX*1), X12 1148 MOVOU -48(AX)(CX*1), X13 1149 MOVOU -32(AX)(CX*1), X14 1150 MOVOU -16(AX)(CX*1), X15 1151 1152 // xor in seed 1153 PXOR X0, X8 1154 PXOR X1, X9 1155 PXOR X2, X10 1156 PXOR X3, X11 1157 PXOR X4, X12 1158 PXOR X5, X13 1159 PXOR X6, X14 1160 PXOR X7, X15 1161 1162 // compute number of remaining 128-byte blocks 1163 DECQ CX 1164 SHRQ $7, CX 1165 1166aesloop: 1167 // scramble state 1168 AESENC X8, X8 1169 AESENC X9, X9 1170 AESENC X10, X10 1171 AESENC X11, X11 1172 AESENC X12, X12 1173 AESENC X13, X13 1174 AESENC X14, X14 1175 AESENC X15, X15 1176 1177 // scramble state, xor in a block 1178 MOVOU (AX), X0 1179 MOVOU 16(AX), X1 1180 MOVOU 32(AX), X2 1181 MOVOU 48(AX), X3 1182 AESENC X0, X8 1183 AESENC X1, X9 1184 AESENC X2, X10 1185 AESENC X3, X11 1186 MOVOU 64(AX), X4 1187 MOVOU 80(AX), X5 1188 MOVOU 96(AX), X6 1189 MOVOU 112(AX), X7 1190 AESENC X4, X12 1191 AESENC X5, X13 1192 AESENC X6, X14 1193 AESENC X7, X15 1194 1195 ADDQ $128, AX 1196 DECQ CX 1197 JNE aesloop 1198 1199 // 3 more scrambles to finish 1200 AESENC X8, X8 1201 AESENC X9, X9 1202 AESENC X10, X10 1203 AESENC X11, X11 1204 AESENC X12, X12 1205 AESENC X13, X13 1206 AESENC X14, X14 1207 AESENC X15, X15 1208 AESENC X8, X8 1209 AESENC X9, X9 1210 AESENC X10, X10 1211 AESENC X11, X11 1212 AESENC X12, X12 1213 AESENC X13, X13 1214 AESENC X14, X14 1215 AESENC X15, X15 1216 AESENC X8, X8 1217 AESENC X9, X9 1218 AESENC X10, X10 1219 AESENC X11, X11 1220 AESENC X12, X12 1221 AESENC X13, X13 1222 AESENC X14, X14 1223 AESENC X15, X15 1224 1225 PXOR X12, X8 1226 PXOR X13, X9 1227 PXOR X14, X10 1228 PXOR X15, X11 1229 PXOR X10, X8 1230 PXOR X11, X9 1231 PXOR X9, X8 1232 MOVQ X8, (DX) 1233 RET 1234 1235// func aeshash32(p unsafe.Pointer, h uintptr) uintptr 1236TEXT runtime·aeshash32(SB),NOSPLIT,$0-24 1237 MOVQ p+0(FP), AX // ptr to data 1238 MOVQ h+8(FP), X0 // seed 1239 PINSRD $2, (AX), X0 // data 1240 AESENC runtime·aeskeysched+0(SB), X0 1241 AESENC runtime·aeskeysched+16(SB), X0 1242 AESENC runtime·aeskeysched+32(SB), X0 1243 MOVQ X0, ret+16(FP) 1244 RET 1245 1246// func aeshash64(p unsafe.Pointer, h uintptr) uintptr 1247TEXT runtime·aeshash64(SB),NOSPLIT,$0-24 1248 MOVQ p+0(FP), AX // ptr to data 1249 MOVQ h+8(FP), X0 // seed 1250 PINSRQ $1, (AX), X0 // data 1251 AESENC runtime·aeskeysched+0(SB), X0 1252 AESENC runtime·aeskeysched+16(SB), X0 1253 AESENC runtime·aeskeysched+32(SB), X0 1254 MOVQ X0, ret+16(FP) 1255 RET 1256 1257// simple mask to get rid of data in the high part of the register. 1258DATA masks<>+0x00(SB)/8, $0x0000000000000000 1259DATA masks<>+0x08(SB)/8, $0x0000000000000000 1260DATA masks<>+0x10(SB)/8, $0x00000000000000ff 1261DATA masks<>+0x18(SB)/8, $0x0000000000000000 1262DATA masks<>+0x20(SB)/8, $0x000000000000ffff 1263DATA masks<>+0x28(SB)/8, $0x0000000000000000 1264DATA masks<>+0x30(SB)/8, $0x0000000000ffffff 1265DATA masks<>+0x38(SB)/8, $0x0000000000000000 1266DATA masks<>+0x40(SB)/8, $0x00000000ffffffff 1267DATA masks<>+0x48(SB)/8, $0x0000000000000000 1268DATA masks<>+0x50(SB)/8, $0x000000ffffffffff 1269DATA masks<>+0x58(SB)/8, $0x0000000000000000 1270DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff 1271DATA masks<>+0x68(SB)/8, $0x0000000000000000 1272DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff 1273DATA masks<>+0x78(SB)/8, $0x0000000000000000 1274DATA masks<>+0x80(SB)/8, $0xffffffffffffffff 1275DATA masks<>+0x88(SB)/8, $0x0000000000000000 1276DATA masks<>+0x90(SB)/8, $0xffffffffffffffff 1277DATA masks<>+0x98(SB)/8, $0x00000000000000ff 1278DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff 1279DATA masks<>+0xa8(SB)/8, $0x000000000000ffff 1280DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff 1281DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff 1282DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff 1283DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff 1284DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff 1285DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff 1286DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff 1287DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff 1288DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff 1289DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff 1290GLOBL masks<>(SB),RODATA,$256 1291 1292// func checkASM() bool 1293TEXT ·checkASM(SB),NOSPLIT,$0-1 1294 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte 1295 MOVQ $masks<>(SB), AX 1296 MOVQ $shifts<>(SB), BX 1297 ORQ BX, AX 1298 TESTQ $15, AX 1299 SETEQ ret+0(FP) 1300 RET 1301 1302// these are arguments to pshufb. They move data down from 1303// the high bytes of the register to the low bytes of the register. 1304// index is how many bytes to move. 1305DATA shifts<>+0x00(SB)/8, $0x0000000000000000 1306DATA shifts<>+0x08(SB)/8, $0x0000000000000000 1307DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f 1308DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff 1309DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e 1310DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff 1311DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d 1312DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff 1313DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c 1314DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff 1315DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b 1316DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff 1317DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a 1318DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff 1319DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09 1320DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff 1321DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908 1322DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff 1323DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807 1324DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f 1325DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706 1326DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e 1327DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605 1328DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d 1329DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504 1330DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c 1331DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403 1332DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b 1333DATA shifts<>+0xe0(SB)/8, $0x0908070605040302 1334DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a 1335DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 1336DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 1337GLOBL shifts<>(SB),RODATA,$256 1338 1339TEXT runtime·return0(SB), NOSPLIT, $0 1340 MOVL $0, AX 1341 RET 1342 1343 1344// Called from cgo wrappers, this function returns g->m->curg.stack.hi. 1345// Must obey the gcc calling convention. 1346TEXT _cgo_topofstack(SB),NOSPLIT,$0 1347 get_tls(CX) 1348 MOVQ g(CX), AX 1349 MOVQ g_m(AX), AX 1350 MOVQ m_curg(AX), AX 1351 MOVQ (g_stack+stack_hi)(AX), AX 1352 RET 1353 1354// The top-most function running on a goroutine 1355// returns to goexit+PCQuantum. 1356TEXT runtime·goexit(SB),NOSPLIT,$0-0 1357 BYTE $0x90 // NOP 1358 CALL runtime·goexit1(SB) // does not return 1359 // traceback from goexit1 must hit code range of goexit 1360 BYTE $0x90 // NOP 1361 1362// This is called from .init_array and follows the platform, not Go, ABI. 1363TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 1364 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save 1365 MOVQ runtime·lastmoduledatap(SB), AX 1366 MOVQ DI, moduledata_next(AX) 1367 MOVQ DI, runtime·lastmoduledatap(SB) 1368 POPQ R15 1369 RET 1370 1371// gcWriteBarrier performs a heap pointer write and informs the GC. 1372// 1373// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments: 1374// - DI is the destination of the write 1375// - AX is the value being written at DI 1376// It clobbers FLAGS. It does not clobber any general-purpose registers, 1377// but may clobber others (e.g., SSE registers). 1378TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$120 1379 // Save the registers clobbered by the fast path. This is slightly 1380 // faster than having the caller spill these. 1381 MOVQ R14, 104(SP) 1382 MOVQ R13, 112(SP) 1383 // TODO: Consider passing g.m.p in as an argument so they can be shared 1384 // across a sequence of write barriers. 1385 get_tls(R13) 1386 MOVQ g(R13), R13 1387 MOVQ g_m(R13), R13 1388 MOVQ m_p(R13), R13 1389 MOVQ (p_wbBuf+wbBuf_next)(R13), R14 1390 // Increment wbBuf.next position. 1391 LEAQ 16(R14), R14 1392 MOVQ R14, (p_wbBuf+wbBuf_next)(R13) 1393 CMPQ R14, (p_wbBuf+wbBuf_end)(R13) 1394 // Record the write. 1395 MOVQ AX, -16(R14) // Record value 1396 // Note: This turns bad pointer writes into bad 1397 // pointer reads, which could be confusing. We could avoid 1398 // reading from obviously bad pointers, which would 1399 // take care of the vast majority of these. We could 1400 // patch this up in the signal handler, or use XCHG to 1401 // combine the read and the write. 1402 MOVQ (DI), R13 1403 MOVQ R13, -8(R14) // Record *slot 1404 // Is the buffer full? (flags set in CMPQ above) 1405 JEQ flush 1406ret: 1407 MOVQ 104(SP), R14 1408 MOVQ 112(SP), R13 1409 // Do the write. 1410 MOVQ AX, (DI) 1411 RET 1412 1413flush: 1414 // Save all general purpose registers since these could be 1415 // clobbered by wbBufFlush and were not saved by the caller. 1416 // It is possible for wbBufFlush to clobber other registers 1417 // (e.g., SSE registers), but the compiler takes care of saving 1418 // those in the caller if necessary. This strikes a balance 1419 // with registers that are likely to be used. 1420 // 1421 // We don't have type information for these, but all code under 1422 // here is NOSPLIT, so nothing will observe these. 1423 // 1424 // TODO: We could strike a different balance; e.g., saving X0 1425 // and not saving GP registers that are less likely to be used. 1426 MOVQ DI, 0(SP) // Also first argument to wbBufFlush 1427 MOVQ AX, 8(SP) // Also second argument to wbBufFlush 1428 MOVQ BX, 16(SP) 1429 MOVQ CX, 24(SP) 1430 MOVQ DX, 32(SP) 1431 // DI already saved 1432 MOVQ SI, 40(SP) 1433 MOVQ BP, 48(SP) 1434 MOVQ R8, 56(SP) 1435 MOVQ R9, 64(SP) 1436 MOVQ R10, 72(SP) 1437 MOVQ R11, 80(SP) 1438 MOVQ R12, 88(SP) 1439 // R13 already saved 1440 // R14 already saved 1441 MOVQ R15, 96(SP) 1442 1443 // This takes arguments DI and AX 1444 CALL runtime·wbBufFlush(SB) 1445 1446 MOVQ 0(SP), DI 1447 MOVQ 8(SP), AX 1448 MOVQ 16(SP), BX 1449 MOVQ 24(SP), CX 1450 MOVQ 32(SP), DX 1451 MOVQ 40(SP), SI 1452 MOVQ 48(SP), BP 1453 MOVQ 56(SP), R8 1454 MOVQ 64(SP), R9 1455 MOVQ 72(SP), R10 1456 MOVQ 80(SP), R11 1457 MOVQ 88(SP), R12 1458 MOVQ 96(SP), R15 1459 JMP ret 1460 1461DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large" 1462GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below 1463 1464// debugCallV1 is the entry point for debugger-injected function 1465// calls on running goroutines. It informs the runtime that a 1466// debug call has been injected and creates a call frame for the 1467// debugger to fill in. 1468// 1469// To inject a function call, a debugger should: 1470// 1. Check that the goroutine is in state _Grunning and that 1471// there are at least 256 bytes free on the stack. 1472// 2. Push the current PC on the stack (updating SP). 1473// 3. Write the desired argument frame size at SP-16 (using the SP 1474// after step 2). 1475// 4. Save all machine registers (including flags and XMM reigsters) 1476// so they can be restored later by the debugger. 1477// 5. Set the PC to debugCallV1 and resume execution. 1478// 1479// If the goroutine is in state _Grunnable, then it's not generally 1480// safe to inject a call because it may return out via other runtime 1481// operations. Instead, the debugger should unwind the stack to find 1482// the return to non-runtime code, add a temporary breakpoint there, 1483// and inject the call once that breakpoint is hit. 1484// 1485// If the goroutine is in any other state, it's not safe to inject a call. 1486// 1487// This function communicates back to the debugger by setting RAX and 1488// invoking INT3 to raise a breakpoint signal. See the comments in the 1489// implementation for the protocol the debugger is expected to 1490// follow. InjectDebugCall in the runtime tests demonstrates this protocol. 1491// 1492// The debugger must ensure that any pointers passed to the function 1493// obey escape analysis requirements. Specifically, it must not pass 1494// a stack pointer to an escaping argument. debugCallV1 cannot check 1495// this invariant. 1496TEXT runtime·debugCallV1(SB),NOSPLIT,$152-0 1497 // Save all registers that may contain pointers in GC register 1498 // map order (see ssa.registersAMD64). This makes it possible 1499 // to copy the stack while updating pointers currently held in 1500 // registers, and for the GC to find roots in registers. 1501 // 1502 // We can't do anything that might clobber any of these 1503 // registers before this. 1504 MOVQ R15, r15-(14*8+8)(SP) 1505 MOVQ R14, r14-(13*8+8)(SP) 1506 MOVQ R13, r13-(12*8+8)(SP) 1507 MOVQ R12, r12-(11*8+8)(SP) 1508 MOVQ R11, r11-(10*8+8)(SP) 1509 MOVQ R10, r10-(9*8+8)(SP) 1510 MOVQ R9, r9-(8*8+8)(SP) 1511 MOVQ R8, r8-(7*8+8)(SP) 1512 MOVQ DI, di-(6*8+8)(SP) 1513 MOVQ SI, si-(5*8+8)(SP) 1514 MOVQ BP, bp-(4*8+8)(SP) 1515 MOVQ BX, bx-(3*8+8)(SP) 1516 MOVQ DX, dx-(2*8+8)(SP) 1517 // Save the frame size before we clobber it. Either of the last 1518 // saves could clobber this depending on whether there's a saved BP. 1519 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue 1520 MOVQ CX, cx-(1*8+8)(SP) 1521 MOVQ AX, ax-(0*8+8)(SP) 1522 1523 // Save the argument frame size. 1524 MOVQ DX, frameSize-128(SP) 1525 1526 // Perform a safe-point check. 1527 MOVQ retpc-8(FP), AX // Caller's PC 1528 MOVQ AX, 0(SP) 1529 CALL runtime·debugCallCheck(SB) 1530 MOVQ 8(SP), AX 1531 TESTQ AX, AX 1532 JZ good 1533 // The safety check failed. Put the reason string at the top 1534 // of the stack. 1535 MOVQ AX, 0(SP) 1536 MOVQ 16(SP), AX 1537 MOVQ AX, 8(SP) 1538 // Set AX to 8 and invoke INT3. The debugger should get the 1539 // reason a call can't be injected from the top of the stack 1540 // and resume execution. 1541 MOVQ $8, AX 1542 BYTE $0xcc 1543 JMP restore 1544 1545good: 1546 // Registers are saved and it's safe to make a call. 1547 // Open up a call frame, moving the stack if necessary. 1548 // 1549 // Once the frame is allocated, this will set AX to 0 and 1550 // invoke INT3. The debugger should write the argument 1551 // frame for the call at SP, push the trapping PC on the 1552 // stack, set the PC to the function to call, set RCX to point 1553 // to the closure (if a closure call), and resume execution. 1554 // 1555 // If the function returns, this will set AX to 1 and invoke 1556 // INT3. The debugger can then inspect any return value saved 1557 // on the stack at SP and resume execution again. 1558 // 1559 // If the function panics, this will set AX to 2 and invoke INT3. 1560 // The interface{} value of the panic will be at SP. The debugger 1561 // can inspect the panic value and resume execution again. 1562#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \ 1563 CMPQ AX, $MAXSIZE; \ 1564 JA 5(PC); \ 1565 MOVQ $NAME(SB), AX; \ 1566 MOVQ AX, 0(SP); \ 1567 CALL runtime·debugCallWrap(SB); \ 1568 JMP restore 1569 1570 MOVQ frameSize-128(SP), AX 1571 DEBUG_CALL_DISPATCH(debugCall32<>, 32) 1572 DEBUG_CALL_DISPATCH(debugCall64<>, 64) 1573 DEBUG_CALL_DISPATCH(debugCall128<>, 128) 1574 DEBUG_CALL_DISPATCH(debugCall256<>, 256) 1575 DEBUG_CALL_DISPATCH(debugCall512<>, 512) 1576 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024) 1577 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048) 1578 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096) 1579 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192) 1580 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384) 1581 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768) 1582 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536) 1583 // The frame size is too large. Report the error. 1584 MOVQ $debugCallFrameTooLarge<>(SB), AX 1585 MOVQ AX, 0(SP) 1586 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string 1587 MOVQ $8, AX 1588 BYTE $0xcc 1589 JMP restore 1590 1591restore: 1592 // Calls and failures resume here. 1593 // 1594 // Set AX to 16 and invoke INT3. The debugger should restore 1595 // all registers except RIP and RSP and resume execution. 1596 MOVQ $16, AX 1597 BYTE $0xcc 1598 // We must not modify flags after this point. 1599 1600 // Restore pointer-containing registers, which may have been 1601 // modified from the debugger's copy by stack copying. 1602 MOVQ ax-(0*8+8)(SP), AX 1603 MOVQ cx-(1*8+8)(SP), CX 1604 MOVQ dx-(2*8+8)(SP), DX 1605 MOVQ bx-(3*8+8)(SP), BX 1606 MOVQ bp-(4*8+8)(SP), BP 1607 MOVQ si-(5*8+8)(SP), SI 1608 MOVQ di-(6*8+8)(SP), DI 1609 MOVQ r8-(7*8+8)(SP), R8 1610 MOVQ r9-(8*8+8)(SP), R9 1611 MOVQ r10-(9*8+8)(SP), R10 1612 MOVQ r11-(10*8+8)(SP), R11 1613 MOVQ r12-(11*8+8)(SP), R12 1614 MOVQ r13-(12*8+8)(SP), R13 1615 MOVQ r14-(13*8+8)(SP), R14 1616 MOVQ r15-(14*8+8)(SP), R15 1617 1618 RET 1619 1620// runtime.debugCallCheck assumes that functions defined with the 1621// DEBUG_CALL_FN macro are safe points to inject calls. 1622#define DEBUG_CALL_FN(NAME,MAXSIZE) \ 1623TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \ 1624 NO_LOCAL_POINTERS; \ 1625 MOVQ $0, AX; \ 1626 BYTE $0xcc; \ 1627 MOVQ $1, AX; \ 1628 BYTE $0xcc; \ 1629 RET 1630DEBUG_CALL_FN(debugCall32<>, 32) 1631DEBUG_CALL_FN(debugCall64<>, 64) 1632DEBUG_CALL_FN(debugCall128<>, 128) 1633DEBUG_CALL_FN(debugCall256<>, 256) 1634DEBUG_CALL_FN(debugCall512<>, 512) 1635DEBUG_CALL_FN(debugCall1024<>, 1024) 1636DEBUG_CALL_FN(debugCall2048<>, 2048) 1637DEBUG_CALL_FN(debugCall4096<>, 4096) 1638DEBUG_CALL_FN(debugCall8192<>, 8192) 1639DEBUG_CALL_FN(debugCall16384<>, 16384) 1640DEBUG_CALL_FN(debugCall32768<>, 32768) 1641DEBUG_CALL_FN(debugCall65536<>, 65536) 1642 1643// func debugCallPanicked(val interface{}) 1644TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16 1645 // Copy the panic value to the top of stack. 1646 MOVQ val_type+0(FP), AX 1647 MOVQ AX, 0(SP) 1648 MOVQ val_data+8(FP), AX 1649 MOVQ AX, 8(SP) 1650 MOVQ $2, AX 1651 BYTE $0xcc 1652 RET 1653 1654// Note: these functions use a special calling convention to save generated code space. 1655// Arguments are passed in registers, but the space for those arguments are allocated 1656// in the caller's stack frame. These stubs write the args into that stack space and 1657// then tail call to the corresponding runtime handler. 1658// The tail call makes these stubs disappear in backtraces. 1659TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 1660 MOVQ AX, x+0(FP) 1661 MOVQ CX, y+8(FP) 1662 JMP runtime·goPanicIndex(SB) 1663TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 1664 MOVQ AX, x+0(FP) 1665 MOVQ CX, y+8(FP) 1666 JMP runtime·goPanicIndexU(SB) 1667TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 1668 MOVQ CX, x+0(FP) 1669 MOVQ DX, y+8(FP) 1670 JMP runtime·goPanicSliceAlen(SB) 1671TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 1672 MOVQ CX, x+0(FP) 1673 MOVQ DX, y+8(FP) 1674 JMP runtime·goPanicSliceAlenU(SB) 1675TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 1676 MOVQ CX, x+0(FP) 1677 MOVQ DX, y+8(FP) 1678 JMP runtime·goPanicSliceAcap(SB) 1679TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 1680 MOVQ CX, x+0(FP) 1681 MOVQ DX, y+8(FP) 1682 JMP runtime·goPanicSliceAcapU(SB) 1683TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 1684 MOVQ AX, x+0(FP) 1685 MOVQ CX, y+8(FP) 1686 JMP runtime·goPanicSliceB(SB) 1687TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 1688 MOVQ AX, x+0(FP) 1689 MOVQ CX, y+8(FP) 1690 JMP runtime·goPanicSliceBU(SB) 1691TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 1692 MOVQ DX, x+0(FP) 1693 MOVQ BX, y+8(FP) 1694 JMP runtime·goPanicSlice3Alen(SB) 1695TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 1696 MOVQ DX, x+0(FP) 1697 MOVQ BX, y+8(FP) 1698 JMP runtime·goPanicSlice3AlenU(SB) 1699TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 1700 MOVQ DX, x+0(FP) 1701 MOVQ BX, y+8(FP) 1702 JMP runtime·goPanicSlice3Acap(SB) 1703TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 1704 MOVQ DX, x+0(FP) 1705 MOVQ BX, y+8(FP) 1706 JMP runtime·goPanicSlice3AcapU(SB) 1707TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 1708 MOVQ CX, x+0(FP) 1709 MOVQ DX, y+8(FP) 1710 JMP runtime·goPanicSlice3B(SB) 1711TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 1712 MOVQ CX, x+0(FP) 1713 MOVQ DX, y+8(FP) 1714 JMP runtime·goPanicSlice3BU(SB) 1715TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 1716 MOVQ AX, x+0(FP) 1717 MOVQ CX, y+8(FP) 1718 JMP runtime·goPanicSlice3C(SB) 1719TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 1720 MOVQ AX, x+0(FP) 1721 MOVQ CX, y+8(FP) 1722 JMP runtime·goPanicSlice3CU(SB) 1723 1724#ifdef GOOS_android 1725// Use the free TLS_SLOT_APP slot #2 on Android Q. 1726// Earlier androids are set up in gcc_android.c. 1727DATA runtime·tls_g+0(SB)/8, $16 1728GLOBL runtime·tls_g+0(SB), NOPTR, $8 1729#endif
View as plain text