Source file src/runtime/preempt.go
Documentation: runtime
1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Goroutine preemption 6 // 7 // A goroutine can be preempted at any safe-point. Currently, there 8 // are a few categories of safe-points: 9 // 10 // 1. A blocked safe-point occurs for the duration that a goroutine is 11 // descheduled, blocked on synchronization, or in a system call. 12 // 13 // 2. Synchronous safe-points occur when a running goroutine checks 14 // for a preemption request. 15 // 16 // 3. Asynchronous safe-points occur at any instruction in user code 17 // where the goroutine can be safely paused and a conservative 18 // stack and register scan can find stack roots. The runtime can 19 // stop a goroutine at an async safe-point using a signal. 20 // 21 // At both blocked and synchronous safe-points, a goroutine's CPU 22 // state is minimal and the garbage collector has complete information 23 // about its entire stack. This makes it possible to deschedule a 24 // goroutine with minimal space, and to precisely scan a goroutine's 25 // stack. 26 // 27 // Synchronous safe-points are implemented by overloading the stack 28 // bound check in function prologues. To preempt a goroutine at the 29 // next synchronous safe-point, the runtime poisons the goroutine's 30 // stack bound to a value that will cause the next stack bound check 31 // to fail and enter the stack growth implementation, which will 32 // detect that it was actually a preemption and redirect to preemption 33 // handling. 34 // 35 // Preemption at asynchronous safe-points is implemented by suspending 36 // the thread using an OS mechanism (e.g., signals) and inspecting its 37 // state to determine if the goroutine was at an asynchronous 38 // safe-point. Since the thread suspension itself is generally 39 // asynchronous, it also checks if the running goroutine wants to be 40 // preempted, since this could have changed. If all conditions are 41 // satisfied, it adjusts the signal context to make it look like the 42 // signaled thread just called asyncPreempt and resumes the thread. 43 // asyncPreempt spills all registers and enters the scheduler. 44 // 45 // (An alternative would be to preempt in the signal handler itself. 46 // This would let the OS save and restore the register state and the 47 // runtime would only need to know how to extract potentially 48 // pointer-containing registers from the signal context. However, this 49 // would consume an M for every preempted G, and the scheduler itself 50 // is not designed to run from a signal handler, as it tends to 51 // allocate memory and start threads in the preemption path.) 52 53 package runtime 54 55 import ( 56 "runtime/internal/atomic" 57 "runtime/internal/sys" 58 "unsafe" 59 ) 60 61 // Keep in sync with cmd/compile/internal/gc/plive.go:go115ReduceLiveness. 62 const go115ReduceLiveness = true 63 64 const go115RestartSeq = go115ReduceLiveness && true // enable restartable sequences 65 66 type suspendGState struct { 67 g *g 68 69 // dead indicates the goroutine was not suspended because it 70 // is dead. This goroutine could be reused after the dead 71 // state was observed, so the caller must not assume that it 72 // remains dead. 73 dead bool 74 75 // stopped indicates that this suspendG transitioned the G to 76 // _Gwaiting via g.preemptStop and thus is responsible for 77 // readying it when done. 78 stopped bool 79 } 80 81 // suspendG suspends goroutine gp at a safe-point and returns the 82 // state of the suspended goroutine. The caller gets read access to 83 // the goroutine until it calls resumeG. 84 // 85 // It is safe for multiple callers to attempt to suspend the same 86 // goroutine at the same time. The goroutine may execute between 87 // subsequent successful suspend operations. The current 88 // implementation grants exclusive access to the goroutine, and hence 89 // multiple callers will serialize. However, the intent is to grant 90 // shared read access, so please don't depend on exclusive access. 91 // 92 // This must be called from the system stack and the user goroutine on 93 // the current M (if any) must be in a preemptible state. This 94 // prevents deadlocks where two goroutines attempt to suspend each 95 // other and both are in non-preemptible states. There are other ways 96 // to resolve this deadlock, but this seems simplest. 97 // 98 // TODO(austin): What if we instead required this to be called from a 99 // user goroutine? Then we could deschedule the goroutine while 100 // waiting instead of blocking the thread. If two goroutines tried to 101 // suspend each other, one of them would win and the other wouldn't 102 // complete the suspend until it was resumed. We would have to be 103 // careful that they couldn't actually queue up suspend for each other 104 // and then both be suspended. This would also avoid the need for a 105 // kernel context switch in the synchronous case because we could just 106 // directly schedule the waiter. The context switch is unavoidable in 107 // the signal case. 108 // 109 //go:systemstack 110 func suspendG(gp *g) suspendGState { 111 if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning { 112 // Since we're on the system stack of this M, the user 113 // G is stuck at an unsafe point. If another goroutine 114 // were to try to preempt m.curg, it could deadlock. 115 throw("suspendG from non-preemptible goroutine") 116 } 117 118 // See https://golang.org/cl/21503 for justification of the yield delay. 119 const yieldDelay = 10 * 1000 120 var nextYield int64 121 122 // Drive the goroutine to a preemption point. 123 stopped := false 124 var asyncM *m 125 var asyncGen uint32 126 var nextPreemptM int64 127 for i := 0; ; i++ { 128 switch s := readgstatus(gp); s { 129 default: 130 if s&_Gscan != 0 { 131 // Someone else is suspending it. Wait 132 // for them to finish. 133 // 134 // TODO: It would be nicer if we could 135 // coalesce suspends. 136 break 137 } 138 139 dumpgstatus(gp) 140 throw("invalid g status") 141 142 case _Gdead: 143 // Nothing to suspend. 144 // 145 // preemptStop may need to be cleared, but 146 // doing that here could race with goroutine 147 // reuse. Instead, goexit0 clears it. 148 return suspendGState{dead: true} 149 150 case _Gcopystack: 151 // The stack is being copied. We need to wait 152 // until this is done. 153 154 case _Gpreempted: 155 // We (or someone else) suspended the G. Claim 156 // ownership of it by transitioning it to 157 // _Gwaiting. 158 if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) { 159 break 160 } 161 162 // We stopped the G, so we have to ready it later. 163 stopped = true 164 165 s = _Gwaiting 166 fallthrough 167 168 case _Grunnable, _Gsyscall, _Gwaiting: 169 // Claim goroutine by setting scan bit. 170 // This may race with execution or readying of gp. 171 // The scan bit keeps it from transition state. 172 if !castogscanstatus(gp, s, s|_Gscan) { 173 break 174 } 175 176 // Clear the preemption request. It's safe to 177 // reset the stack guard because we hold the 178 // _Gscan bit and thus own the stack. 179 gp.preemptStop = false 180 gp.preempt = false 181 gp.stackguard0 = gp.stack.lo + _StackGuard 182 183 // The goroutine was already at a safe-point 184 // and we've now locked that in. 185 // 186 // TODO: It would be much better if we didn't 187 // leave it in _Gscan, but instead gently 188 // prevented its scheduling until resumption. 189 // Maybe we only use this to bump a suspended 190 // count and the scheduler skips suspended 191 // goroutines? That wouldn't be enough for 192 // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe 193 // for all those transitions we need to check 194 // suspended and deschedule? 195 return suspendGState{g: gp, stopped: stopped} 196 197 case _Grunning: 198 // Optimization: if there is already a pending preemption request 199 // (from the previous loop iteration), don't bother with the atomics. 200 if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && atomic.Load(&asyncM.preemptGen) == asyncGen { 201 break 202 } 203 204 // Temporarily block state transitions. 205 if !castogscanstatus(gp, _Grunning, _Gscanrunning) { 206 break 207 } 208 209 // Request synchronous preemption. 210 gp.preemptStop = true 211 gp.preempt = true 212 gp.stackguard0 = stackPreempt 213 214 // Prepare for asynchronous preemption. 215 asyncM2 := gp.m 216 asyncGen2 := atomic.Load(&asyncM2.preemptGen) 217 needAsync := asyncM != asyncM2 || asyncGen != asyncGen2 218 asyncM = asyncM2 219 asyncGen = asyncGen2 220 221 casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning) 222 223 // Send asynchronous preemption. We do this 224 // after CASing the G back to _Grunning 225 // because preemptM may be synchronous and we 226 // don't want to catch the G just spinning on 227 // its status. 228 if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync { 229 // Rate limit preemptM calls. This is 230 // particularly important on Windows 231 // where preemptM is actually 232 // synchronous and the spin loop here 233 // can lead to live-lock. 234 now := nanotime() 235 if now >= nextPreemptM { 236 nextPreemptM = now + yieldDelay/2 237 preemptM(asyncM) 238 } 239 } 240 } 241 242 // TODO: Don't busy wait. This loop should really only 243 // be a simple read/decide/CAS loop that only fails if 244 // there's an active race. Once the CAS succeeds, we 245 // should queue up the preemption (which will require 246 // it to be reliable in the _Grunning case, not 247 // best-effort) and then sleep until we're notified 248 // that the goroutine is suspended. 249 if i == 0 { 250 nextYield = nanotime() + yieldDelay 251 } 252 if nanotime() < nextYield { 253 procyield(10) 254 } else { 255 osyield() 256 nextYield = nanotime() + yieldDelay/2 257 } 258 } 259 } 260 261 // resumeG undoes the effects of suspendG, allowing the suspended 262 // goroutine to continue from its current safe-point. 263 func resumeG(state suspendGState) { 264 if state.dead { 265 // We didn't actually stop anything. 266 return 267 } 268 269 gp := state.g 270 switch s := readgstatus(gp); s { 271 default: 272 dumpgstatus(gp) 273 throw("unexpected g status") 274 275 case _Grunnable | _Gscan, 276 _Gwaiting | _Gscan, 277 _Gsyscall | _Gscan: 278 casfrom_Gscanstatus(gp, s, s&^_Gscan) 279 } 280 281 if state.stopped { 282 // We stopped it, so we need to re-schedule it. 283 ready(gp, 0, true) 284 } 285 } 286 287 // canPreemptM reports whether mp is in a state that is safe to preempt. 288 // 289 // It is nosplit because it has nosplit callers. 290 // 291 //go:nosplit 292 func canPreemptM(mp *m) bool { 293 return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning 294 } 295 296 //go:generate go run mkpreempt.go 297 298 // asyncPreempt saves all user registers and calls asyncPreempt2. 299 // 300 // When stack scanning encounters an asyncPreempt frame, it scans that 301 // frame and its parent frame conservatively. 302 // 303 // asyncPreempt is implemented in assembly. 304 func asyncPreempt() 305 306 //go:nosplit 307 func asyncPreempt2() { 308 gp := getg() 309 gp.asyncSafePoint = true 310 if gp.preemptStop { 311 mcall(preemptPark) 312 } else { 313 mcall(gopreempt_m) 314 } 315 gp.asyncSafePoint = false 316 } 317 318 // asyncPreemptStack is the bytes of stack space required to inject an 319 // asyncPreempt call. 320 var asyncPreemptStack = ^uintptr(0) 321 322 func init() { 323 f := findfunc(funcPC(asyncPreempt)) 324 total := funcMaxSPDelta(f) 325 f = findfunc(funcPC(asyncPreempt2)) 326 total += funcMaxSPDelta(f) 327 // Add some overhead for return PCs, etc. 328 asyncPreemptStack = uintptr(total) + 8*sys.PtrSize 329 if asyncPreemptStack > _StackLimit { 330 // We need more than the nosplit limit. This isn't 331 // unsafe, but it may limit asynchronous preemption. 332 // 333 // This may be a problem if we start using more 334 // registers. In that case, we should store registers 335 // in a context object. If we pre-allocate one per P, 336 // asyncPreempt can spill just a few registers to the 337 // stack, then grab its context object and spill into 338 // it. When it enters the runtime, it would allocate a 339 // new context for the P. 340 print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n") 341 throw("async stack too large") 342 } 343 } 344 345 // wantAsyncPreempt returns whether an asynchronous preemption is 346 // queued for gp. 347 func wantAsyncPreempt(gp *g) bool { 348 // Check both the G and the P. 349 return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning 350 } 351 352 // isAsyncSafePoint reports whether gp at instruction PC is an 353 // asynchronous safe point. This indicates that: 354 // 355 // 1. It's safe to suspend gp and conservatively scan its stack and 356 // registers. There are no potentially hidden pointer values and it's 357 // not in the middle of an atomic sequence like a write barrier. 358 // 359 // 2. gp has enough stack space to inject the asyncPreempt call. 360 // 361 // 3. It's generally safe to interact with the runtime, even if we're 362 // in a signal handler stopped here. For example, there are no runtime 363 // locks held, so acquiring a runtime lock won't self-deadlock. 364 // 365 // In some cases the PC is safe for asynchronous preemption but it 366 // also needs to adjust the resumption PC. The new PC is returned in 367 // the second result. 368 func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) { 369 mp := gp.m 370 371 // Only user Gs can have safe-points. We check this first 372 // because it's extremely common that we'll catch mp in the 373 // scheduler processing this G preemption. 374 if mp.curg != gp { 375 return false, 0 376 } 377 378 // Check M state. 379 if mp.p == 0 || !canPreemptM(mp) { 380 return false, 0 381 } 382 383 // Check stack space. 384 if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack { 385 return false, 0 386 } 387 388 // Check if PC is an unsafe-point. 389 f := findfunc(pc) 390 if !f.valid() { 391 // Not Go code. 392 return false, 0 393 } 394 if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc, nil) == 0 { 395 // We probably stopped at a half-executed CALL instruction, 396 // where the LR is updated but the PC has not. If we preempt 397 // here we'll see a seemingly self-recursive call, which is in 398 // fact not. 399 // This is normally ok, as we use the return address saved on 400 // stack for unwinding, not the LR value. But if this is a 401 // call to morestack, we haven't created the frame, and we'll 402 // use the LR for unwinding, which will be bad. 403 return false, 0 404 } 405 var up int32 406 var startpc uintptr 407 if !go115ReduceLiveness { 408 smi := pcdatavalue(f, _PCDATA_RegMapIndex, pc, nil) 409 if smi == -2 { 410 // Unsafe-point marked by compiler. This includes 411 // atomic sequences (e.g., write barrier) and nosplit 412 // functions (except at calls). 413 return false, 0 414 } 415 } else { 416 up, startpc = pcdatavalue2(f, _PCDATA_UnsafePoint, pc) 417 if up != _PCDATA_UnsafePointSafe { 418 // Unsafe-point marked by compiler. This includes 419 // atomic sequences (e.g., write barrier) and nosplit 420 // functions (except at calls). 421 return false, 0 422 } 423 } 424 if fd := funcdata(f, _FUNCDATA_LocalsPointerMaps); fd == nil || fd == unsafe.Pointer(&no_pointers_stackmap) { 425 // This is assembly code. Don't assume it's 426 // well-formed. We identify assembly code by 427 // checking that it has either no stack map, or 428 // no_pointers_stackmap, which is the stack map 429 // for ones marked as NO_LOCAL_POINTERS. 430 // 431 // TODO: Are there cases that are safe but don't have a 432 // locals pointer map, like empty frame functions? 433 return false, 0 434 } 435 name := funcname(f) 436 if inldata := funcdata(f, _FUNCDATA_InlTree); inldata != nil { 437 inltree := (*[1 << 20]inlinedCall)(inldata) 438 ix := pcdatavalue(f, _PCDATA_InlTreeIndex, pc, nil) 439 if ix >= 0 { 440 name = funcnameFromNameoff(f, inltree[ix].func_) 441 } 442 } 443 if hasPrefix(name, "runtime.") || 444 hasPrefix(name, "runtime/internal/") || 445 hasPrefix(name, "reflect.") { 446 // For now we never async preempt the runtime or 447 // anything closely tied to the runtime. Known issues 448 // include: various points in the scheduler ("don't 449 // preempt between here and here"), much of the defer 450 // implementation (untyped info on stack), bulk write 451 // barriers (write barrier check), 452 // reflect.{makeFuncStub,methodValueCall}. 453 // 454 // TODO(austin): We should improve this, or opt things 455 // in incrementally. 456 return false, 0 457 } 458 if go115RestartSeq { 459 switch up { 460 case _PCDATA_Restart1, _PCDATA_Restart2: 461 // Restartable instruction sequence. Back off PC to 462 // the start PC. 463 if startpc == 0 || startpc > pc || pc-startpc > 20 { 464 throw("bad restart PC") 465 } 466 return true, startpc 467 case _PCDATA_RestartAtEntry: 468 // Restart from the function entry at resumption. 469 return true, f.entry 470 } 471 } else { 472 switch up { 473 case _PCDATA_Restart1, _PCDATA_Restart2, _PCDATA_RestartAtEntry: 474 // go115RestartSeq is not enabled. Treat it as unsafe point. 475 return false, 0 476 } 477 } 478 return true, pc 479 } 480 481 var no_pointers_stackmap uint64 // defined in assembly, for NO_LOCAL_POINTERS macro 482