1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Goroutine preemption 6 // 7 // A goroutine can be preempted at any safe-point. Currently, there 8 // are a few categories of safe-points: 9 // 10 // 1. A blocked safe-point occurs for the duration that a goroutine is 11 // descheduled, blocked on synchronization, or in a system call. 12 // 13 // 2. Synchronous safe-points occur when a running goroutine checks 14 // for a preemption request. 15 // 16 // 3. Asynchronous safe-points occur at any instruction in user code 17 // where the goroutine can be safely paused and a conservative 18 // stack and register scan can find stack roots. The runtime can 19 // stop a goroutine at an async safe-point using a signal. 20 // 21 // At both blocked and synchronous safe-points, a goroutine's CPU 22 // state is minimal and the garbage collector has complete information 23 // about its entire stack. This makes it possible to deschedule a 24 // goroutine with minimal space, and to precisely scan a goroutine's 25 // stack. 26 // 27 // Synchronous safe-points are implemented by overloading the stack 28 // bound check in function prologues. To preempt a goroutine at the 29 // next synchronous safe-point, the runtime poisons the goroutine's 30 // stack bound to a value that will cause the next stack bound check 31 // to fail and enter the stack growth implementation, which will 32 // detect that it was actually a preemption and redirect to preemption 33 // handling. 34 // 35 // Preemption at asynchronous safe-points is implemented by suspending 36 // the thread using an OS mechanism (e.g., signals) and inspecting its 37 // state to determine if the goroutine was at an asynchronous 38 // safe-point. Since the thread suspension itself is generally 39 // asynchronous, it also checks if the running goroutine wants to be 40 // preempted, since this could have changed. If all conditions are 41 // satisfied, it adjusts the signal context to make it look like the 42 // signaled thread just called asyncPreempt and resumes the thread. 43 // asyncPreempt spills all registers and enters the scheduler. 44 // 45 // (An alternative would be to preempt in the signal handler itself. 46 // This would let the OS save and restore the register state and the 47 // runtime would only need to know how to extract potentially 48 // pointer-containing registers from the signal context. However, this 49 // would consume an M for every preempted G, and the scheduler itself 50 // is not designed to run from a signal handler, as it tends to 51 // allocate memory and start threads in the preemption path.) 52 53 package runtime 54 55 import ( 56 "internal/abi" 57 "internal/goarch" 58 ) 59 60 type suspendGState struct { 61 g *g 62 63 // dead indicates the goroutine was not suspended because it 64 // is dead. This goroutine could be reused after the dead 65 // state was observed, so the caller must not assume that it 66 // remains dead. 67 dead bool 68 69 // stopped indicates that this suspendG transitioned the G to 70 // _Gwaiting via g.preemptStop and thus is responsible for 71 // readying it when done. 72 stopped bool 73 } 74 75 // suspendG suspends goroutine gp at a safe-point and returns the 76 // state of the suspended goroutine. The caller gets read access to 77 // the goroutine until it calls resumeG. 78 // 79 // It is safe for multiple callers to attempt to suspend the same 80 // goroutine at the same time. The goroutine may execute between 81 // subsequent successful suspend operations. The current 82 // implementation grants exclusive access to the goroutine, and hence 83 // multiple callers will serialize. However, the intent is to grant 84 // shared read access, so please don't depend on exclusive access. 85 // 86 // This must be called from the system stack and the user goroutine on 87 // the current M (if any) must be in a preemptible state. This 88 // prevents deadlocks where two goroutines attempt to suspend each 89 // other and both are in non-preemptible states. There are other ways 90 // to resolve this deadlock, but this seems simplest. 91 // 92 // TODO(austin): What if we instead required this to be called from a 93 // user goroutine? Then we could deschedule the goroutine while 94 // waiting instead of blocking the thread. If two goroutines tried to 95 // suspend each other, one of them would win and the other wouldn't 96 // complete the suspend until it was resumed. We would have to be 97 // careful that they couldn't actually queue up suspend for each other 98 // and then both be suspended. This would also avoid the need for a 99 // kernel context switch in the synchronous case because we could just 100 // directly schedule the waiter. The context switch is unavoidable in 101 // the signal case. 102 // 103 //go:systemstack 104 func suspendG(gp *g) suspendGState { 105 if mp := getg().m; mp.curg != nil && readgstatus(mp.curg) == _Grunning { 106 // Since we're on the system stack of this M, the user 107 // G is stuck at an unsafe point. If another goroutine 108 // were to try to preempt m.curg, it could deadlock. 109 throw("suspendG from non-preemptible goroutine") 110 } 111 112 // See https://golang.org/cl/21503 for justification of the yield delay. 113 const yieldDelay = 10 * 1000 114 var nextYield int64 115 116 // Drive the goroutine to a preemption point. 117 stopped := false 118 var asyncM *m 119 var asyncGen uint32 120 var nextPreemptM int64 121 for i := 0; ; i++ { 122 switch s := readgstatus(gp); s { 123 default: 124 if s&_Gscan != 0 { 125 // Someone else is suspending it. Wait 126 // for them to finish. 127 // 128 // TODO: It would be nicer if we could 129 // coalesce suspends. 130 break 131 } 132 133 dumpgstatus(gp) 134 throw("invalid g status") 135 136 case _Gdead: 137 // Nothing to suspend. 138 // 139 // preemptStop may need to be cleared, but 140 // doing that here could race with goroutine 141 // reuse. Instead, goexit0 clears it. 142 return suspendGState{dead: true} 143 144 case _Gcopystack: 145 // The stack is being copied. We need to wait 146 // until this is done. 147 148 case _Gpreempted: 149 // We (or someone else) suspended the G. Claim 150 // ownership of it by transitioning it to 151 // _Gwaiting. 152 if !casGFromPreempted(gp, _Gpreempted, _Gwaiting) { 153 break 154 } 155 156 // We stopped the G, so we have to ready it later. 157 stopped = true 158 159 s = _Gwaiting 160 fallthrough 161 162 case _Grunnable, _Gsyscall, _Gwaiting: 163 // Claim goroutine by setting scan bit. 164 // This may race with execution or readying of gp. 165 // The scan bit keeps it from transition state. 166 if !castogscanstatus(gp, s, s|_Gscan) { 167 break 168 } 169 170 // Clear the preemption request. It's safe to 171 // reset the stack guard because we hold the 172 // _Gscan bit and thus own the stack. 173 gp.preemptStop = false 174 gp.preempt = false 175 gp.stackguard0 = gp.stack.lo + stackGuard 176 177 // The goroutine was already at a safe-point 178 // and we've now locked that in. 179 // 180 // TODO: It would be much better if we didn't 181 // leave it in _Gscan, but instead gently 182 // prevented its scheduling until resumption. 183 // Maybe we only use this to bump a suspended 184 // count and the scheduler skips suspended 185 // goroutines? That wouldn't be enough for 186 // {_Gsyscall,_Gwaiting} -> _Grunning. Maybe 187 // for all those transitions we need to check 188 // suspended and deschedule? 189 return suspendGState{g: gp, stopped: stopped} 190 191 case _Grunning: 192 // Optimization: if there is already a pending preemption request 193 // (from the previous loop iteration), don't bother with the atomics. 194 if gp.preemptStop && gp.preempt && gp.stackguard0 == stackPreempt && asyncM == gp.m && asyncM.preemptGen.Load() == asyncGen { 195 break 196 } 197 198 // Temporarily block state transitions. 199 if !castogscanstatus(gp, _Grunning, _Gscanrunning) { 200 break 201 } 202 203 // Request synchronous preemption. 204 gp.preemptStop = true 205 gp.preempt = true 206 gp.stackguard0 = stackPreempt 207 208 // Prepare for asynchronous preemption. 209 asyncM2 := gp.m 210 asyncGen2 := asyncM2.preemptGen.Load() 211 needAsync := asyncM != asyncM2 || asyncGen != asyncGen2 212 asyncM = asyncM2 213 asyncGen = asyncGen2 214 215 casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning) 216 217 // Send asynchronous preemption. We do this 218 // after CASing the G back to _Grunning 219 // because preemptM may be synchronous and we 220 // don't want to catch the G just spinning on 221 // its status. 222 if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync { 223 // Rate limit preemptM calls. This is 224 // particularly important on Windows 225 // where preemptM is actually 226 // synchronous and the spin loop here 227 // can lead to live-lock. 228 now := nanotime() 229 if now >= nextPreemptM { 230 nextPreemptM = now + yieldDelay/2 231 preemptM(asyncM) 232 } 233 } 234 } 235 236 // TODO: Don't busy wait. This loop should really only 237 // be a simple read/decide/CAS loop that only fails if 238 // there's an active race. Once the CAS succeeds, we 239 // should queue up the preemption (which will require 240 // it to be reliable in the _Grunning case, not 241 // best-effort) and then sleep until we're notified 242 // that the goroutine is suspended. 243 if i == 0 { 244 nextYield = nanotime() + yieldDelay 245 } 246 if nanotime() < nextYield { 247 procyield(10) 248 } else { 249 osyield() 250 nextYield = nanotime() + yieldDelay/2 251 } 252 } 253 } 254 255 // resumeG undoes the effects of suspendG, allowing the suspended 256 // goroutine to continue from its current safe-point. 257 func resumeG(state suspendGState) { 258 if state.dead { 259 // We didn't actually stop anything. 260 return 261 } 262 263 gp := state.g 264 switch s := readgstatus(gp); s { 265 default: 266 dumpgstatus(gp) 267 throw("unexpected g status") 268 269 case _Grunnable | _Gscan, 270 _Gwaiting | _Gscan, 271 _Gsyscall | _Gscan: 272 casfrom_Gscanstatus(gp, s, s&^_Gscan) 273 } 274 275 if state.stopped { 276 // We stopped it, so we need to re-schedule it. 277 ready(gp, 0, true) 278 } 279 } 280 281 // canPreemptM reports whether mp is in a state that is safe to preempt. 282 // 283 // It is nosplit because it has nosplit callers. 284 // 285 //go:nosplit 286 func canPreemptM(mp *m) bool { 287 return mp.locks == 0 && mp.mallocing == 0 && mp.preemptoff == "" && mp.p.ptr().status == _Prunning 288 } 289 290 //go:generate go run mkpreempt.go 291 292 // asyncPreempt saves all user registers and calls asyncPreempt2. 293 // 294 // When stack scanning encounters an asyncPreempt frame, it scans that 295 // frame and its parent frame conservatively. 296 // 297 // asyncPreempt is implemented in assembly. 298 func asyncPreempt() 299 300 //go:nosplit 301 func asyncPreempt2() { 302 gp := getg() 303 gp.asyncSafePoint = true 304 if gp.preemptStop { 305 mcall(preemptPark) 306 } else { 307 mcall(gopreempt_m) 308 } 309 gp.asyncSafePoint = false 310 } 311 312 // asyncPreemptStack is the bytes of stack space required to inject an 313 // asyncPreempt call. 314 var asyncPreemptStack = ^uintptr(0) 315 316 func init() { 317 f := findfunc(abi.FuncPCABI0(asyncPreempt)) 318 total := funcMaxSPDelta(f) 319 f = findfunc(abi.FuncPCABIInternal(asyncPreempt2)) 320 total += funcMaxSPDelta(f) 321 // Add some overhead for return PCs, etc. 322 asyncPreemptStack = uintptr(total) + 8*goarch.PtrSize 323 if asyncPreemptStack > stackNosplit { 324 // We need more than the nosplit limit. This isn't 325 // unsafe, but it may limit asynchronous preemption. 326 // 327 // This may be a problem if we start using more 328 // registers. In that case, we should store registers 329 // in a context object. If we pre-allocate one per P, 330 // asyncPreempt can spill just a few registers to the 331 // stack, then grab its context object and spill into 332 // it. When it enters the runtime, it would allocate a 333 // new context for the P. 334 print("runtime: asyncPreemptStack=", asyncPreemptStack, "\n") 335 throw("async stack too large") 336 } 337 } 338 339 // wantAsyncPreempt returns whether an asynchronous preemption is 340 // queued for gp. 341 func wantAsyncPreempt(gp *g) bool { 342 // Check both the G and the P. 343 return (gp.preempt || gp.m.p != 0 && gp.m.p.ptr().preempt) && readgstatus(gp)&^_Gscan == _Grunning 344 } 345 346 // isAsyncSafePoint reports whether gp at instruction PC is an 347 // asynchronous safe point. This indicates that: 348 // 349 // 1. It's safe to suspend gp and conservatively scan its stack and 350 // registers. There are no potentially hidden pointer values and it's 351 // not in the middle of an atomic sequence like a write barrier. 352 // 353 // 2. gp has enough stack space to inject the asyncPreempt call. 354 // 355 // 3. It's generally safe to interact with the runtime, even if we're 356 // in a signal handler stopped here. For example, there are no runtime 357 // locks held, so acquiring a runtime lock won't self-deadlock. 358 // 359 // In some cases the PC is safe for asynchronous preemption but it 360 // also needs to adjust the resumption PC. The new PC is returned in 361 // the second result. 362 func isAsyncSafePoint(gp *g, pc, sp, lr uintptr) (bool, uintptr) { 363 mp := gp.m 364 365 // Only user Gs can have safe-points. We check this first 366 // because it's extremely common that we'll catch mp in the 367 // scheduler processing this G preemption. 368 if mp.curg != gp { 369 return false, 0 370 } 371 372 // Check M state. 373 if mp.p == 0 || !canPreemptM(mp) { 374 return false, 0 375 } 376 377 // Check stack space. 378 if sp < gp.stack.lo || sp-gp.stack.lo < asyncPreemptStack { 379 return false, 0 380 } 381 382 // Check if PC is an unsafe-point. 383 f := findfunc(pc) 384 if !f.valid() { 385 // Not Go code. 386 return false, 0 387 } 388 if (GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "mips64" || GOARCH == "mips64le") && lr == pc+8 && funcspdelta(f, pc) == 0 { 389 // We probably stopped at a half-executed CALL instruction, 390 // where the LR is updated but the PC has not. If we preempt 391 // here we'll see a seemingly self-recursive call, which is in 392 // fact not. 393 // This is normally ok, as we use the return address saved on 394 // stack for unwinding, not the LR value. But if this is a 395 // call to morestack, we haven't created the frame, and we'll 396 // use the LR for unwinding, which will be bad. 397 return false, 0 398 } 399 up, startpc := pcdatavalue2(f, abi.PCDATA_UnsafePoint, pc) 400 if up == abi.UnsafePointUnsafe { 401 // Unsafe-point marked by compiler. This includes 402 // atomic sequences (e.g., write barrier) and nosplit 403 // functions (except at calls). 404 return false, 0 405 } 406 if fd := funcdata(f, abi.FUNCDATA_LocalsPointerMaps); fd == nil || f.flag&abi.FuncFlagAsm != 0 { 407 // This is assembly code. Don't assume it's well-formed. 408 // TODO: Empirically we still need the fd == nil check. Why? 409 // 410 // TODO: Are there cases that are safe but don't have a 411 // locals pointer map, like empty frame functions? 412 // It might be possible to preempt any assembly functions 413 // except the ones that have funcFlag_SPWRITE set in f.flag. 414 return false, 0 415 } 416 // Check the inner-most name 417 u, uf := newInlineUnwinder(f, pc) 418 name := u.srcFunc(uf).name() 419 if hasPrefix(name, "runtime.") || 420 hasPrefix(name, "runtime/internal/") || 421 hasPrefix(name, "reflect.") { 422 // For now we never async preempt the runtime or 423 // anything closely tied to the runtime. Known issues 424 // include: various points in the scheduler ("don't 425 // preempt between here and here"), much of the defer 426 // implementation (untyped info on stack), bulk write 427 // barriers (write barrier check), 428 // reflect.{makeFuncStub,methodValueCall}. 429 // 430 // TODO(austin): We should improve this, or opt things 431 // in incrementally. 432 return false, 0 433 } 434 switch up { 435 case abi.UnsafePointRestart1, abi.UnsafePointRestart2: 436 // Restartable instruction sequence. Back off PC to 437 // the start PC. 438 if startpc == 0 || startpc > pc || pc-startpc > 20 { 439 throw("bad restart PC") 440 } 441 return true, startpc 442 case abi.UnsafePointRestartAtEntry: 443 // Restart from the function entry at resumption. 444 return true, f.entry() 445 } 446 return true, pc 447 } 448