1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build goexperiment.exectracer2 6 7 // CPU profile -> trace 8 9 package runtime 10 11 // traceInitReadCPU initializes CPU profile -> tracer state for tracing. 12 // 13 // Returns a profBuf for reading from. 14 func traceInitReadCPU() { 15 if traceEnabled() { 16 throw("traceInitReadCPU called with trace enabled") 17 } 18 // Create new profBuf for CPU samples that will be emitted as events. 19 // Format: after the timestamp, header is [pp.id, gp.goid, mp.procid]. 20 trace.cpuLogRead[0] = newProfBuf(3, profBufWordCount, profBufTagCount) 21 trace.cpuLogRead[1] = newProfBuf(3, profBufWordCount, profBufTagCount) 22 // We must not acquire trace.signalLock outside of a signal handler: a 23 // profiling signal may arrive at any time and try to acquire it, leading to 24 // deadlock. Because we can't use that lock to protect updates to 25 // trace.cpuLogWrite (only use of the structure it references), reads and 26 // writes of the pointer must be atomic. (And although this field is never 27 // the sole pointer to the profBuf value, it's best to allow a write barrier 28 // here.) 29 trace.cpuLogWrite[0].Store(trace.cpuLogRead[0]) 30 trace.cpuLogWrite[1].Store(trace.cpuLogRead[1]) 31 } 32 33 // traceStartReadCPU creates a goroutine to start reading CPU profile 34 // data into an active trace. 35 // 36 // traceAdvanceSema must be held. 37 func traceStartReadCPU() { 38 if !traceEnabled() { 39 throw("traceStartReadCPU called with trace disabled") 40 } 41 // Spin up the logger goroutine. 42 trace.cpuSleep = newWakeableSleep() 43 done := make(chan struct{}, 1) 44 go func() { 45 for traceEnabled() { 46 // Sleep here because traceReadCPU is non-blocking. This mirrors 47 // how the runtime/pprof package obtains CPU profile data. 48 // 49 // We can't do a blocking read here because Darwin can't do a 50 // wakeup from a signal handler, so all CPU profiling is just 51 // non-blocking. See #61768 for more details. 52 // 53 // Like the runtime/pprof package, even if that bug didn't exist 54 // we would still want to do a goroutine-level sleep in between 55 // reads to avoid frequent wakeups. 56 trace.cpuSleep.sleep(100_000_000) 57 58 tl := traceAcquire() 59 if !tl.ok() { 60 // Tracing disabled. 61 break 62 } 63 keepGoing := traceReadCPU(tl.gen) 64 traceRelease(tl) 65 if !keepGoing { 66 break 67 } 68 } 69 done <- struct{}{} 70 }() 71 trace.cpuLogDone = done 72 } 73 74 // traceStopReadCPU blocks until the trace CPU reading goroutine exits. 75 // 76 // traceAdvanceSema must be held, and tracing must be disabled. 77 func traceStopReadCPU() { 78 if traceEnabled() { 79 throw("traceStopReadCPU called with trace enabled") 80 } 81 82 // Once we close the profbuf, we'll be in one of two situations: 83 // - The logger goroutine has already exited because it observed 84 // that the trace is disabled. 85 // - The logger goroutine is asleep. 86 // 87 // Wake the goroutine so it can observe that their the buffer is 88 // closed an exit. 89 trace.cpuLogWrite[0].Store(nil) 90 trace.cpuLogWrite[1].Store(nil) 91 trace.cpuLogRead[0].close() 92 trace.cpuLogRead[1].close() 93 trace.cpuSleep.wake() 94 95 // Wait until the logger goroutine exits. 96 <-trace.cpuLogDone 97 98 // Clear state for the next trace. 99 trace.cpuLogDone = nil 100 trace.cpuLogRead[0] = nil 101 trace.cpuLogRead[1] = nil 102 trace.cpuSleep.close() 103 } 104 105 // traceReadCPU attempts to read from the provided profBuf[gen%2] and write 106 // into the trace. Returns true if there might be more to read or false 107 // if the profBuf is closed or the caller should otherwise stop reading. 108 // 109 // The caller is responsible for ensuring that gen does not change. Either 110 // the caller must be in a traceAcquire/traceRelease block, or must be calling 111 // with traceAdvanceSema held. 112 // 113 // No more than one goroutine may be in traceReadCPU for the same 114 // profBuf at a time. 115 // 116 // Must not run on the system stack because profBuf.read performs race 117 // operations. 118 func traceReadCPU(gen uintptr) bool { 119 var pcBuf [traceStackSize]uintptr 120 121 data, tags, eof := trace.cpuLogRead[gen%2].read(profBufNonBlocking) 122 for len(data) > 0 { 123 if len(data) < 4 || data[0] > uint64(len(data)) { 124 break // truncated profile 125 } 126 if data[0] < 4 || tags != nil && len(tags) < 1 { 127 break // malformed profile 128 } 129 if len(tags) < 1 { 130 break // mismatched profile records and tags 131 } 132 133 // Deserialize the data in the profile buffer. 134 recordLen := data[0] 135 timestamp := data[1] 136 ppid := data[2] >> 1 137 if hasP := (data[2] & 0b1) != 0; !hasP { 138 ppid = ^uint64(0) 139 } 140 goid := data[3] 141 mpid := data[4] 142 stk := data[5:recordLen] 143 144 // Overflow records always have their headers contain 145 // all zeroes. 146 isOverflowRecord := len(stk) == 1 && data[2] == 0 && data[3] == 0 && data[4] == 0 147 148 // Move the data iterator forward. 149 data = data[recordLen:] 150 // No support here for reporting goroutine tags at the moment; if 151 // that information is to be part of the execution trace, we'd 152 // probably want to see when the tags are applied and when they 153 // change, instead of only seeing them when we get a CPU sample. 154 tags = tags[1:] 155 156 if isOverflowRecord { 157 // Looks like an overflow record from the profBuf. Not much to 158 // do here, we only want to report full records. 159 continue 160 } 161 162 // Construct the stack for insertion to the stack table. 163 nstk := 1 164 pcBuf[0] = logicalStackSentinel 165 for ; nstk < len(pcBuf) && nstk-1 < len(stk); nstk++ { 166 pcBuf[nstk] = uintptr(stk[nstk-1]) 167 } 168 169 // Write out a trace event. 170 w := unsafeTraceWriter(gen, trace.cpuBuf[gen%2]) 171 172 // Ensure we have a place to write to. 173 var flushed bool 174 w, flushed = w.ensure(2 + 5*traceBytesPerNumber /* traceEvCPUSamples + traceEvCPUSample + timestamp + g + m + p + stack ID */) 175 if flushed { 176 // Annotate the batch as containing strings. 177 w.byte(byte(traceEvCPUSamples)) 178 } 179 180 // Add the stack to the table. 181 stackID := trace.stackTab[gen%2].put(pcBuf[:nstk]) 182 183 // Write out the CPU sample. 184 w.byte(byte(traceEvCPUSample)) 185 w.varint(timestamp) 186 w.varint(mpid) 187 w.varint(ppid) 188 w.varint(goid) 189 w.varint(stackID) 190 191 trace.cpuBuf[gen%2] = w.traceBuf 192 } 193 return !eof 194 } 195 196 // traceCPUFlush flushes trace.cpuBuf[gen%2]. The caller must be certain that gen 197 // has completed and that there are no more writers to it. 198 // 199 // Must run on the systemstack because it flushes buffers and acquires trace.lock 200 // to do so. 201 // 202 //go:systemstack 203 func traceCPUFlush(gen uintptr) { 204 // Flush any remaining trace buffers containing CPU samples. 205 if buf := trace.cpuBuf[gen%2]; buf != nil { 206 lock(&trace.lock) 207 traceBufFlush(buf, gen) 208 unlock(&trace.lock) 209 trace.cpuBuf[gen%2] = nil 210 } 211 } 212 213 // traceCPUSample writes a CPU profile sample stack to the execution tracer's 214 // profiling buffer. It is called from a signal handler, so is limited in what 215 // it can do. mp must be the thread that is currently stopped in a signal. 216 func traceCPUSample(gp *g, mp *m, pp *p, stk []uintptr) { 217 if !traceEnabled() { 218 // Tracing is usually turned off; don't spend time acquiring the signal 219 // lock unless it's active. 220 return 221 } 222 if mp == nil { 223 // Drop samples that don't have an identifiable thread. We can't render 224 // this in any useful way anyway. 225 return 226 } 227 228 // We're going to conditionally write to one of two buffers based on the 229 // generation. To make sure we write to the correct one, we need to make 230 // sure this thread's trace seqlock is held. If it already is, then we're 231 // in the tracer and we can just take advantage of that. If it isn't, then 232 // we need to acquire it and read the generation. 233 locked := false 234 if mp.trace.seqlock.Load()%2 == 0 { 235 mp.trace.seqlock.Add(1) 236 locked = true 237 } 238 gen := trace.gen.Load() 239 if gen == 0 { 240 // Tracing is disabled, as it turns out. Release the seqlock if necessary 241 // and exit. 242 if locked { 243 mp.trace.seqlock.Add(1) 244 } 245 return 246 } 247 248 now := traceClockNow() 249 // The "header" here is the ID of the M that was running the profiled code, 250 // followed by the IDs of the P and goroutine. (For normal CPU profiling, it's 251 // usually the number of samples with the given stack.) Near syscalls, pp 252 // may be nil. Reporting goid of 0 is fine for either g0 or a nil gp. 253 var hdr [3]uint64 254 if pp != nil { 255 // Overflow records in profBuf have all header values set to zero. Make 256 // sure that real headers have at least one bit set. 257 hdr[0] = uint64(pp.id)<<1 | 0b1 258 } else { 259 hdr[0] = 0b10 260 } 261 if gp != nil { 262 hdr[1] = gp.goid 263 } 264 if mp != nil { 265 hdr[2] = uint64(mp.procid) 266 } 267 268 // Allow only one writer at a time 269 for !trace.signalLock.CompareAndSwap(0, 1) { 270 // TODO: Is it safe to osyield here? https://go.dev/issue/52672 271 osyield() 272 } 273 274 if log := trace.cpuLogWrite[gen%2].Load(); log != nil { 275 // Note: we don't pass a tag pointer here (how should profiling tags 276 // interact with the execution tracer?), but if we did we'd need to be 277 // careful about write barriers. See the long comment in profBuf.write. 278 log.write(nil, int64(now), hdr[:], stk) 279 } 280 281 trace.signalLock.Store(0) 282 283 // Release the seqlock if we acquired it earlier. 284 if locked { 285 mp.trace.seqlock.Add(1) 286 } 287 } 288