1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // CPU profiling. 6 // 7 // The signal handler for the profiling clock tick adds a new stack trace 8 // to a log of recent traces. The log is read by a user goroutine that 9 // turns it into formatted profile data. If the reader does not keep up 10 // with the log, those writes will be recorded as a count of lost records. 11 // The actual profile buffer is in profbuf.go. 12 13 package runtime 14 15 import ( 16 "internal/abi" 17 "runtime/internal/sys" 18 "unsafe" 19 ) 20 21 const ( 22 maxCPUProfStack = 64 23 24 // profBufWordCount is the size of the CPU profile buffer's storage for the 25 // header and stack of each sample, measured in 64-bit words. Every sample 26 // has a required header of two words. With a small additional header (a 27 // word or two) and stacks at the profiler's maximum length of 64 frames, 28 // that capacity can support 1900 samples or 19 thread-seconds at a 100 Hz 29 // sample rate, at a cost of 1 MiB. 30 profBufWordCount = 1 << 17 31 // profBufTagCount is the size of the CPU profile buffer's storage for the 32 // goroutine tags associated with each sample. A capacity of 1<<14 means 33 // room for 16k samples, or 160 thread-seconds at a 100 Hz sample rate. 34 profBufTagCount = 1 << 14 35 ) 36 37 type cpuProfile struct { 38 lock mutex 39 on bool // profiling is on 40 log *profBuf // profile events written here 41 42 // extra holds extra stacks accumulated in addNonGo 43 // corresponding to profiling signals arriving on 44 // non-Go-created threads. Those stacks are written 45 // to log the next time a normal Go thread gets the 46 // signal handler. 47 // Assuming the stacks are 2 words each (we don't get 48 // a full traceback from those threads), plus one word 49 // size for framing, 100 Hz profiling would generate 50 // 300 words per second. 51 // Hopefully a normal Go thread will get the profiling 52 // signal at least once every few seconds. 53 extra [1000]uintptr 54 numExtra int 55 lostExtra uint64 // count of frames lost because extra is full 56 lostAtomic uint64 // count of frames lost because of being in atomic64 on mips/arm; updated racily 57 } 58 59 var cpuprof cpuProfile 60 61 // SetCPUProfileRate sets the CPU profiling rate to hz samples per second. 62 // If hz <= 0, SetCPUProfileRate turns off profiling. 63 // If the profiler is on, the rate cannot be changed without first turning it off. 64 // 65 // Most clients should use the [runtime/pprof] package or 66 // the [testing] package's -test.cpuprofile flag instead of calling 67 // SetCPUProfileRate directly. 68 func SetCPUProfileRate(hz int) { 69 // Clamp hz to something reasonable. 70 if hz < 0 { 71 hz = 0 72 } 73 if hz > 1000000 { 74 hz = 1000000 75 } 76 77 lock(&cpuprof.lock) 78 if hz > 0 { 79 if cpuprof.on || cpuprof.log != nil { 80 print("runtime: cannot set cpu profile rate until previous profile has finished.\n") 81 unlock(&cpuprof.lock) 82 return 83 } 84 85 cpuprof.on = true 86 cpuprof.log = newProfBuf(1, profBufWordCount, profBufTagCount) 87 hdr := [1]uint64{uint64(hz)} 88 cpuprof.log.write(nil, nanotime(), hdr[:], nil) 89 setcpuprofilerate(int32(hz)) 90 } else if cpuprof.on { 91 setcpuprofilerate(0) 92 cpuprof.on = false 93 cpuprof.addExtra() 94 cpuprof.log.close() 95 } 96 unlock(&cpuprof.lock) 97 } 98 99 // add adds the stack trace to the profile. 100 // It is called from signal handlers and other limited environments 101 // and cannot allocate memory or acquire locks that might be 102 // held at the time of the signal, nor can it use substantial amounts 103 // of stack. 104 // 105 //go:nowritebarrierrec 106 func (p *cpuProfile) add(tagPtr *unsafe.Pointer, stk []uintptr) { 107 // Simple cas-lock to coordinate with setcpuprofilerate. 108 for !prof.signalLock.CompareAndSwap(0, 1) { 109 // TODO: Is it safe to osyield here? https://go.dev/issue/52672 110 osyield() 111 } 112 113 if prof.hz.Load() != 0 { // implies cpuprof.log != nil 114 if p.numExtra > 0 || p.lostExtra > 0 || p.lostAtomic > 0 { 115 p.addExtra() 116 } 117 hdr := [1]uint64{1} 118 // Note: write "knows" that the argument is &gp.labels, 119 // because otherwise its write barrier behavior may not 120 // be correct. See the long comment there before 121 // changing the argument here. 122 cpuprof.log.write(tagPtr, nanotime(), hdr[:], stk) 123 } 124 125 prof.signalLock.Store(0) 126 } 127 128 // addNonGo adds the non-Go stack trace to the profile. 129 // It is called from a non-Go thread, so we cannot use much stack at all, 130 // nor do anything that needs a g or an m. 131 // In particular, we can't call cpuprof.log.write. 132 // Instead, we copy the stack into cpuprof.extra, 133 // which will be drained the next time a Go thread 134 // gets the signal handling event. 135 // 136 //go:nosplit 137 //go:nowritebarrierrec 138 func (p *cpuProfile) addNonGo(stk []uintptr) { 139 // Simple cas-lock to coordinate with SetCPUProfileRate. 140 // (Other calls to add or addNonGo should be blocked out 141 // by the fact that only one SIGPROF can be handled by the 142 // process at a time. If not, this lock will serialize those too. 143 // The use of timer_create(2) on Linux to request process-targeted 144 // signals may have changed this.) 145 for !prof.signalLock.CompareAndSwap(0, 1) { 146 // TODO: Is it safe to osyield here? https://go.dev/issue/52672 147 osyield() 148 } 149 150 if cpuprof.numExtra+1+len(stk) < len(cpuprof.extra) { 151 i := cpuprof.numExtra 152 cpuprof.extra[i] = uintptr(1 + len(stk)) 153 copy(cpuprof.extra[i+1:], stk) 154 cpuprof.numExtra += 1 + len(stk) 155 } else { 156 cpuprof.lostExtra++ 157 } 158 159 prof.signalLock.Store(0) 160 } 161 162 // addExtra adds the "extra" profiling events, 163 // queued by addNonGo, to the profile log. 164 // addExtra is called either from a signal handler on a Go thread 165 // or from an ordinary goroutine; either way it can use stack 166 // and has a g. The world may be stopped, though. 167 func (p *cpuProfile) addExtra() { 168 // Copy accumulated non-Go profile events. 169 hdr := [1]uint64{1} 170 for i := 0; i < p.numExtra; { 171 p.log.write(nil, 0, hdr[:], p.extra[i+1:i+int(p.extra[i])]) 172 i += int(p.extra[i]) 173 } 174 p.numExtra = 0 175 176 // Report any lost events. 177 if p.lostExtra > 0 { 178 hdr := [1]uint64{p.lostExtra} 179 lostStk := [2]uintptr{ 180 abi.FuncPCABIInternal(_LostExternalCode) + sys.PCQuantum, 181 abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum, 182 } 183 p.log.write(nil, 0, hdr[:], lostStk[:]) 184 p.lostExtra = 0 185 } 186 187 if p.lostAtomic > 0 { 188 hdr := [1]uint64{p.lostAtomic} 189 lostStk := [2]uintptr{ 190 abi.FuncPCABIInternal(_LostSIGPROFDuringAtomic64) + sys.PCQuantum, 191 abi.FuncPCABIInternal(_System) + sys.PCQuantum, 192 } 193 p.log.write(nil, 0, hdr[:], lostStk[:]) 194 p.lostAtomic = 0 195 } 196 197 } 198 199 // CPUProfile panics. 200 // It formerly provided raw access to chunks of 201 // a pprof-format profile generated by the runtime. 202 // The details of generating that format have changed, 203 // so this functionality has been removed. 204 // 205 // Deprecated: Use the [runtime/pprof] package, 206 // or the handlers in the [net/http/pprof] package, 207 // or the [testing] package's -test.cpuprofile flag instead. 208 func CPUProfile() []byte { 209 panic("CPUProfile no longer available") 210 } 211 212 //go:linkname runtime_pprof_runtime_cyclesPerSecond runtime/pprof.runtime_cyclesPerSecond 213 func runtime_pprof_runtime_cyclesPerSecond() int64 { 214 return ticksPerSecond() 215 } 216 217 // readProfile, provided to runtime/pprof, returns the next chunk of 218 // binary CPU profiling stack trace data, blocking until data is available. 219 // If profiling is turned off and all the profile data accumulated while it was 220 // on has been returned, readProfile returns eof=true. 221 // The caller must save the returned data and tags before calling readProfile again. 222 // The returned data contains a whole number of records, and tags contains 223 // exactly one entry per record. 224 // 225 //go:linkname runtime_pprof_readProfile runtime/pprof.readProfile 226 func runtime_pprof_readProfile() ([]uint64, []unsafe.Pointer, bool) { 227 lock(&cpuprof.lock) 228 log := cpuprof.log 229 unlock(&cpuprof.lock) 230 readMode := profBufBlocking 231 if GOOS == "darwin" || GOOS == "ios" { 232 readMode = profBufNonBlocking // For #61768; on Darwin notes are not async-signal-safe. See sigNoteSetup in os_darwin.go. 233 } 234 data, tags, eof := log.read(readMode) 235 if len(data) == 0 && eof { 236 lock(&cpuprof.lock) 237 cpuprof.log = nil 238 unlock(&cpuprof.lock) 239 } 240 return data, tags, eof 241 } 242