Source file
src/runtime/metrics_test.go
Documentation: runtime
1
2
3
4
5 package runtime_test
6
7 import (
8 "bytes"
9 "fmt"
10 "internal/goexperiment"
11 "internal/profile"
12 "internal/testenv"
13 "os"
14 "reflect"
15 "runtime"
16 "runtime/debug"
17 "runtime/metrics"
18 "runtime/pprof"
19 "runtime/trace"
20 "slices"
21 "sort"
22 "strings"
23 "sync"
24 "sync/atomic"
25 "testing"
26 "time"
27 "unsafe"
28 )
29
30 func prepareAllMetricsSamples() (map[string]metrics.Description, []metrics.Sample) {
31 all := metrics.All()
32 samples := make([]metrics.Sample, len(all))
33 descs := make(map[string]metrics.Description)
34 for i := range all {
35 samples[i].Name = all[i].Name
36 descs[all[i].Name] = all[i]
37 }
38 return descs, samples
39 }
40
41 func TestReadMetrics(t *testing.T) {
42
43 runtime.GC()
44
45
46 limit := int64(512 * 1024 * 1024)
47 oldLimit := debug.SetMemoryLimit(limit)
48 defer debug.SetMemoryLimit(oldLimit)
49
50
51 gcPercent := 99
52 oldGCPercent := debug.SetGCPercent(gcPercent)
53 defer debug.SetGCPercent(oldGCPercent)
54
55
56
57 var mstats runtime.MemStats
58 _, samples := prepareAllMetricsSamples()
59 runtime.ReadMetricsSlow(&mstats, unsafe.Pointer(&samples[0]), len(samples), cap(samples))
60
61 checkUint64 := func(t *testing.T, m string, got, want uint64) {
62 t.Helper()
63 if got != want {
64 t.Errorf("metric %q: got %d, want %d", m, got, want)
65 }
66 }
67
68
69 var allocsBySize, gcPauses, schedPausesTotalGC *metrics.Float64Histogram
70 var tinyAllocs uint64
71 var mallocs, frees uint64
72 for i := range samples {
73 switch name := samples[i].Name; name {
74 case "/cgo/go-to-c-calls:calls":
75 checkUint64(t, name, samples[i].Value.Uint64(), uint64(runtime.NumCgoCall()))
76 case "/memory/classes/heap/free:bytes":
77 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapIdle-mstats.HeapReleased)
78 case "/memory/classes/heap/released:bytes":
79 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapReleased)
80 case "/memory/classes/heap/objects:bytes":
81 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapAlloc)
82 case "/memory/classes/heap/unused:bytes":
83 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapInuse-mstats.HeapAlloc)
84 case "/memory/classes/heap/stacks:bytes":
85 checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackInuse)
86 case "/memory/classes/metadata/mcache/free:bytes":
87 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheSys-mstats.MCacheInuse)
88 case "/memory/classes/metadata/mcache/inuse:bytes":
89 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MCacheInuse)
90 case "/memory/classes/metadata/mspan/free:bytes":
91 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanSys-mstats.MSpanInuse)
92 case "/memory/classes/metadata/mspan/inuse:bytes":
93 checkUint64(t, name, samples[i].Value.Uint64(), mstats.MSpanInuse)
94 case "/memory/classes/metadata/other:bytes":
95 checkUint64(t, name, samples[i].Value.Uint64(), mstats.GCSys)
96 case "/memory/classes/os-stacks:bytes":
97 checkUint64(t, name, samples[i].Value.Uint64(), mstats.StackSys-mstats.StackInuse)
98 case "/memory/classes/other:bytes":
99 checkUint64(t, name, samples[i].Value.Uint64(), mstats.OtherSys)
100 case "/memory/classes/profiling/buckets:bytes":
101 checkUint64(t, name, samples[i].Value.Uint64(), mstats.BuckHashSys)
102 case "/memory/classes/total:bytes":
103 checkUint64(t, name, samples[i].Value.Uint64(), mstats.Sys)
104 case "/gc/heap/allocs-by-size:bytes":
105 hist := samples[i].Value.Float64Histogram()
106
107
108 for i, sc := range mstats.BySize[1:] {
109 if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
110 t.Errorf("bucket does not match size class: got %f, want %f", b, s)
111
112 continue
113 }
114 if c, m := hist.Counts[i], sc.Mallocs; c != m {
115 t.Errorf("histogram counts do not much BySize for class %d: got %d, want %d", i, c, m)
116 }
117 }
118 allocsBySize = hist
119 case "/gc/heap/allocs:bytes":
120 checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc)
121 case "/gc/heap/frees-by-size:bytes":
122 hist := samples[i].Value.Float64Histogram()
123
124
125 for i, sc := range mstats.BySize[1:] {
126 if b, s := hist.Buckets[i+1], float64(sc.Size+1); b != s {
127 t.Errorf("bucket does not match size class: got %f, want %f", b, s)
128
129 continue
130 }
131 if c, f := hist.Counts[i], sc.Frees; c != f {
132 t.Errorf("histogram counts do not match BySize for class %d: got %d, want %d", i, c, f)
133 }
134 }
135 case "/gc/heap/frees:bytes":
136 checkUint64(t, name, samples[i].Value.Uint64(), mstats.TotalAlloc-mstats.HeapAlloc)
137 case "/gc/heap/tiny/allocs:objects":
138
139
140
141
142
143
144
145
146
147 tinyAllocs = samples[i].Value.Uint64()
148
149
150
151 case "/gc/heap/allocs:objects":
152 mallocs = samples[i].Value.Uint64()
153 case "/gc/heap/frees:objects":
154 frees = samples[i].Value.Uint64()
155 case "/gc/heap/live:bytes":
156
157
158
159
160
161
162
163
164 if live := samples[i].Value.Uint64(); live > mstats.HeapSys {
165 t.Errorf("live bytes: %d > heap sys: %d", live, mstats.HeapSys)
166 } else if live == 0 {
167
168 t.Error("live bytes is 0")
169 }
170 case "/gc/gomemlimit:bytes":
171 checkUint64(t, name, samples[i].Value.Uint64(), uint64(limit))
172 case "/gc/heap/objects:objects":
173 checkUint64(t, name, samples[i].Value.Uint64(), mstats.HeapObjects)
174 case "/gc/heap/goal:bytes":
175 checkUint64(t, name, samples[i].Value.Uint64(), mstats.NextGC)
176 case "/gc/gogc:percent":
177 checkUint64(t, name, samples[i].Value.Uint64(), uint64(gcPercent))
178 case "/gc/cycles/automatic:gc-cycles":
179 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC-mstats.NumForcedGC))
180 case "/gc/cycles/forced:gc-cycles":
181 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumForcedGC))
182 case "/gc/cycles/total:gc-cycles":
183 checkUint64(t, name, samples[i].Value.Uint64(), uint64(mstats.NumGC))
184 case "/gc/pauses:seconds":
185 gcPauses = samples[i].Value.Float64Histogram()
186 case "/sched/pauses/total/gc:seconds":
187 schedPausesTotalGC = samples[i].Value.Float64Histogram()
188 }
189 }
190
191
192 nonTinyAllocs := uint64(0)
193 for _, c := range allocsBySize.Counts {
194 nonTinyAllocs += c
195 }
196 checkUint64(t, "/gc/heap/tiny/allocs:objects", tinyAllocs, mstats.Mallocs-nonTinyAllocs)
197
198
199 checkUint64(t, "/gc/heap/allocs:objects", mallocs, mstats.Mallocs-tinyAllocs)
200 checkUint64(t, "/gc/heap/frees:objects", frees, mstats.Frees-tinyAllocs)
201
202
203 if !reflect.DeepEqual(gcPauses.Buckets, schedPausesTotalGC.Buckets) {
204 t.Errorf("/gc/pauses:seconds buckets %v do not match /sched/pauses/total/gc:seconds buckets %v", gcPauses.Buckets, schedPausesTotalGC.Counts)
205 }
206 if !reflect.DeepEqual(gcPauses.Counts, schedPausesTotalGC.Counts) {
207 t.Errorf("/gc/pauses:seconds counts %v do not match /sched/pauses/total/gc:seconds counts %v", gcPauses.Counts, schedPausesTotalGC.Counts)
208 }
209 }
210
211 func TestReadMetricsConsistency(t *testing.T) {
212
213
214
215
216
217
218 runtime.GC()
219 runtime.GC()
220 runtime.GC()
221
222
223
224 oldmaxprocs := runtime.GOMAXPROCS(10)
225 time.Sleep(time.Millisecond)
226 runtime.GOMAXPROCS(oldmaxprocs)
227
228
229 descs, samples := prepareAllMetricsSamples()
230 metrics.Read(samples)
231
232
233 var totalVirtual struct {
234 got, want uint64
235 }
236 var objects struct {
237 alloc, free *metrics.Float64Histogram
238 allocs, frees uint64
239 allocdBytes, freedBytes uint64
240 total, totalBytes uint64
241 }
242 var gc struct {
243 numGC uint64
244 pauses uint64
245 }
246 var totalScan struct {
247 got, want uint64
248 }
249 var cpu struct {
250 gcAssist float64
251 gcDedicated float64
252 gcIdle float64
253 gcPause float64
254 gcTotal float64
255
256 idle float64
257 user float64
258
259 scavengeAssist float64
260 scavengeBg float64
261 scavengeTotal float64
262
263 total float64
264 }
265 for i := range samples {
266 kind := samples[i].Value.Kind()
267 if want := descs[samples[i].Name].Kind; kind != want {
268 t.Errorf("supported metric %q has unexpected kind: got %d, want %d", samples[i].Name, kind, want)
269 continue
270 }
271 if samples[i].Name != "/memory/classes/total:bytes" && strings.HasPrefix(samples[i].Name, "/memory/classes") {
272 v := samples[i].Value.Uint64()
273 totalVirtual.want += v
274
275
276
277
278 if int64(v) < 0 {
279 t.Errorf("%q has high/negative value: %d", samples[i].Name, v)
280 }
281 }
282 switch samples[i].Name {
283 case "/cpu/classes/gc/mark/assist:cpu-seconds":
284 cpu.gcAssist = samples[i].Value.Float64()
285 case "/cpu/classes/gc/mark/dedicated:cpu-seconds":
286 cpu.gcDedicated = samples[i].Value.Float64()
287 case "/cpu/classes/gc/mark/idle:cpu-seconds":
288 cpu.gcIdle = samples[i].Value.Float64()
289 case "/cpu/classes/gc/pause:cpu-seconds":
290 cpu.gcPause = samples[i].Value.Float64()
291 case "/cpu/classes/gc/total:cpu-seconds":
292 cpu.gcTotal = samples[i].Value.Float64()
293 case "/cpu/classes/idle:cpu-seconds":
294 cpu.idle = samples[i].Value.Float64()
295 case "/cpu/classes/scavenge/assist:cpu-seconds":
296 cpu.scavengeAssist = samples[i].Value.Float64()
297 case "/cpu/classes/scavenge/background:cpu-seconds":
298 cpu.scavengeBg = samples[i].Value.Float64()
299 case "/cpu/classes/scavenge/total:cpu-seconds":
300 cpu.scavengeTotal = samples[i].Value.Float64()
301 case "/cpu/classes/total:cpu-seconds":
302 cpu.total = samples[i].Value.Float64()
303 case "/cpu/classes/user:cpu-seconds":
304 cpu.user = samples[i].Value.Float64()
305 case "/memory/classes/total:bytes":
306 totalVirtual.got = samples[i].Value.Uint64()
307 case "/memory/classes/heap/objects:bytes":
308 objects.totalBytes = samples[i].Value.Uint64()
309 case "/gc/heap/objects:objects":
310 objects.total = samples[i].Value.Uint64()
311 case "/gc/heap/allocs:bytes":
312 objects.allocdBytes = samples[i].Value.Uint64()
313 case "/gc/heap/allocs:objects":
314 objects.allocs = samples[i].Value.Uint64()
315 case "/gc/heap/allocs-by-size:bytes":
316 objects.alloc = samples[i].Value.Float64Histogram()
317 case "/gc/heap/frees:bytes":
318 objects.freedBytes = samples[i].Value.Uint64()
319 case "/gc/heap/frees:objects":
320 objects.frees = samples[i].Value.Uint64()
321 case "/gc/heap/frees-by-size:bytes":
322 objects.free = samples[i].Value.Float64Histogram()
323 case "/gc/cycles:gc-cycles":
324 gc.numGC = samples[i].Value.Uint64()
325 case "/gc/pauses:seconds":
326 h := samples[i].Value.Float64Histogram()
327 gc.pauses = 0
328 for i := range h.Counts {
329 gc.pauses += h.Counts[i]
330 }
331 case "/gc/scan/heap:bytes":
332 totalScan.want += samples[i].Value.Uint64()
333 case "/gc/scan/globals:bytes":
334 totalScan.want += samples[i].Value.Uint64()
335 case "/gc/scan/stack:bytes":
336 totalScan.want += samples[i].Value.Uint64()
337 case "/gc/scan/total:bytes":
338 totalScan.got = samples[i].Value.Uint64()
339 case "/sched/gomaxprocs:threads":
340 if got, want := samples[i].Value.Uint64(), uint64(runtime.GOMAXPROCS(-1)); got != want {
341 t.Errorf("gomaxprocs doesn't match runtime.GOMAXPROCS: got %d, want %d", got, want)
342 }
343 case "/sched/goroutines:goroutines":
344 if samples[i].Value.Uint64() < 1 {
345 t.Error("number of goroutines is less than one")
346 }
347 }
348 }
349
350 if runtime.GOOS == "linux" {
351 if cpu.gcDedicated <= 0 && cpu.gcAssist <= 0 && cpu.gcIdle <= 0 {
352 t.Errorf("found no time spent on GC work: %#v", cpu)
353 }
354 if cpu.gcPause <= 0 {
355 t.Errorf("found no GC pauses: %f", cpu.gcPause)
356 }
357 if cpu.idle <= 0 {
358 t.Errorf("found no idle time: %f", cpu.idle)
359 }
360 if total := cpu.gcDedicated + cpu.gcAssist + cpu.gcIdle + cpu.gcPause; !withinEpsilon(cpu.gcTotal, total, 0.01) {
361 t.Errorf("calculated total GC CPU not within 1%% of sampled total: %f vs. %f", total, cpu.gcTotal)
362 }
363 if total := cpu.scavengeAssist + cpu.scavengeBg; !withinEpsilon(cpu.scavengeTotal, total, 0.01) {
364 t.Errorf("calculated total scavenge CPU not within 1%% of sampled total: %f vs. %f", total, cpu.scavengeTotal)
365 }
366 if cpu.total <= 0 {
367 t.Errorf("found no total CPU time passed")
368 }
369 if cpu.user <= 0 {
370 t.Errorf("found no user time passed")
371 }
372 if total := cpu.gcTotal + cpu.scavengeTotal + cpu.user + cpu.idle; !withinEpsilon(cpu.total, total, 0.02) {
373 t.Errorf("calculated total CPU not within 2%% of sampled total: %f vs. %f", total, cpu.total)
374 }
375 }
376 if totalVirtual.got != totalVirtual.want {
377 t.Errorf(`"/memory/classes/total:bytes" does not match sum of /memory/classes/**: got %d, want %d`, totalVirtual.got, totalVirtual.want)
378 }
379 if got, want := objects.allocs-objects.frees, objects.total; got != want {
380 t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
381 }
382 if got, want := objects.allocdBytes-objects.freedBytes, objects.totalBytes; got != want {
383 t.Errorf("mismatch between object alloc/free tallies and total: got %d, want %d", got, want)
384 }
385 if b, c := len(objects.alloc.Buckets), len(objects.alloc.Counts); b != c+1 {
386 t.Errorf("allocs-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
387 }
388 if b, c := len(objects.free.Buckets), len(objects.free.Counts); b != c+1 {
389 t.Errorf("frees-by-size has wrong bucket or counts length: %d buckets, %d counts", b, c)
390 }
391 if len(objects.alloc.Buckets) != len(objects.free.Buckets) {
392 t.Error("allocs-by-size and frees-by-size buckets don't match in length")
393 } else if len(objects.alloc.Counts) != len(objects.free.Counts) {
394 t.Error("allocs-by-size and frees-by-size counts don't match in length")
395 } else {
396 for i := range objects.alloc.Buckets {
397 ba := objects.alloc.Buckets[i]
398 bf := objects.free.Buckets[i]
399 if ba != bf {
400 t.Errorf("bucket %d is different for alloc and free hists: %f != %f", i, ba, bf)
401 }
402 }
403 if !t.Failed() {
404 var gotAlloc, gotFree uint64
405 want := objects.total
406 for i := range objects.alloc.Counts {
407 if objects.alloc.Counts[i] < objects.free.Counts[i] {
408 t.Errorf("found more allocs than frees in object dist bucket %d", i)
409 continue
410 }
411 gotAlloc += objects.alloc.Counts[i]
412 gotFree += objects.free.Counts[i]
413 }
414 if got := gotAlloc - gotFree; got != want {
415 t.Errorf("object distribution counts don't match count of live objects: got %d, want %d", got, want)
416 }
417 if gotAlloc != objects.allocs {
418 t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotAlloc, objects.allocs)
419 }
420 if gotFree != objects.frees {
421 t.Errorf("object distribution counts don't match total allocs: got %d, want %d", gotFree, objects.frees)
422 }
423 }
424 }
425
426
427 if gc.pauses < gc.numGC*2 {
428 t.Errorf("fewer pauses than expected: got %d, want at least %d", gc.pauses, gc.numGC*2)
429 }
430 if totalScan.got <= 0 {
431 t.Errorf("scannable GC space is empty: %d", totalScan.got)
432 }
433 if totalScan.got != totalScan.want {
434 t.Errorf("/gc/scan/total:bytes doesn't line up with sum of /gc/scan*: total %d vs. sum %d", totalScan.got, totalScan.want)
435 }
436 }
437
438 func BenchmarkReadMetricsLatency(b *testing.B) {
439 stop := applyGCLoad(b)
440
441
442 latencies := make([]time.Duration, 0, 1024)
443 _, samples := prepareAllMetricsSamples()
444
445
446 b.ResetTimer()
447 for i := 0; i < b.N; i++ {
448 start := time.Now()
449 metrics.Read(samples)
450 latencies = append(latencies, time.Since(start))
451 }
452
453
454
455 b.StopTimer()
456 stop()
457
458
459
460
461 b.ReportMetric(0, "ns/op")
462 b.ReportMetric(0, "B/op")
463 b.ReportMetric(0, "allocs/op")
464
465
466 sort.Slice(latencies, func(i, j int) bool {
467 return latencies[i] < latencies[j]
468 })
469 b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
470 b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
471 b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
472 }
473
474 var readMetricsSink [1024]interface{}
475
476 func TestReadMetricsCumulative(t *testing.T) {
477
478 descs := metrics.All()
479 var samples [2][]metrics.Sample
480 samples[0] = make([]metrics.Sample, len(descs))
481 samples[1] = make([]metrics.Sample, len(descs))
482 total := 0
483 for i := range samples[0] {
484 if !descs[i].Cumulative {
485 continue
486 }
487 samples[0][total].Name = descs[i].Name
488 total++
489 }
490 samples[0] = samples[0][:total]
491 samples[1] = samples[1][:total]
492 copy(samples[1], samples[0])
493
494
495 var wg sync.WaitGroup
496 wg.Add(1)
497 done := make(chan struct{})
498 go func() {
499 defer wg.Done()
500 for {
501
502 for i := 0; i < len(readMetricsSink); i++ {
503 readMetricsSink[i] = make([]byte, 1024)
504 select {
505 case <-done:
506 return
507 default:
508 }
509 }
510 runtime.GC()
511 }
512 }()
513
514 sum := func(us []uint64) uint64 {
515 total := uint64(0)
516 for _, u := range us {
517 total += u
518 }
519 return total
520 }
521
522
523 metrics.Read(samples[0])
524
525
526 for gen := 1; gen < 10; gen++ {
527 metrics.Read(samples[gen%2])
528 for i := range samples[gen%2] {
529 name := samples[gen%2][i].Name
530 vNew, vOld := samples[gen%2][i].Value, samples[1-(gen%2)][i].Value
531
532 switch vNew.Kind() {
533 case metrics.KindUint64:
534 new := vNew.Uint64()
535 old := vOld.Uint64()
536 if new < old {
537 t.Errorf("%s decreased: %d < %d", name, new, old)
538 }
539 case metrics.KindFloat64:
540 new := vNew.Float64()
541 old := vOld.Float64()
542 if new < old {
543 t.Errorf("%s decreased: %f < %f", name, new, old)
544 }
545 case metrics.KindFloat64Histogram:
546 new := sum(vNew.Float64Histogram().Counts)
547 old := sum(vOld.Float64Histogram().Counts)
548 if new < old {
549 t.Errorf("%s counts decreased: %d < %d", name, new, old)
550 }
551 }
552 }
553 }
554 close(done)
555
556 wg.Wait()
557 }
558
559 func withinEpsilon(v1, v2, e float64) bool {
560 return v2-v2*e <= v1 && v1 <= v2+v2*e
561 }
562
563 func TestMutexWaitTimeMetric(t *testing.T) {
564 var sample [1]metrics.Sample
565 sample[0].Name = "/sync/mutex/wait/total:seconds"
566
567 locks := []locker2{
568 new(mutex),
569 new(rwmutexWrite),
570 new(rwmutexReadWrite),
571 new(rwmutexWriteRead),
572 }
573 for _, lock := range locks {
574 t.Run(reflect.TypeOf(lock).Elem().Name(), func(t *testing.T) {
575 metrics.Read(sample[:])
576 before := time.Duration(sample[0].Value.Float64() * 1e9)
577
578 minMutexWaitTime := generateMutexWaitTime(lock)
579
580 metrics.Read(sample[:])
581 after := time.Duration(sample[0].Value.Float64() * 1e9)
582
583 if wt := after - before; wt < minMutexWaitTime {
584 t.Errorf("too little mutex wait time: got %s, want %s", wt, minMutexWaitTime)
585 }
586 })
587 }
588 }
589
590
591
592
593
594 type locker2 interface {
595 Lock1()
596 Unlock1()
597 Lock2()
598 Unlock2()
599 }
600
601 type mutex struct {
602 mu sync.Mutex
603 }
604
605 func (m *mutex) Lock1() { m.mu.Lock() }
606 func (m *mutex) Unlock1() { m.mu.Unlock() }
607 func (m *mutex) Lock2() { m.mu.Lock() }
608 func (m *mutex) Unlock2() { m.mu.Unlock() }
609
610 type rwmutexWrite struct {
611 mu sync.RWMutex
612 }
613
614 func (m *rwmutexWrite) Lock1() { m.mu.Lock() }
615 func (m *rwmutexWrite) Unlock1() { m.mu.Unlock() }
616 func (m *rwmutexWrite) Lock2() { m.mu.Lock() }
617 func (m *rwmutexWrite) Unlock2() { m.mu.Unlock() }
618
619 type rwmutexReadWrite struct {
620 mu sync.RWMutex
621 }
622
623 func (m *rwmutexReadWrite) Lock1() { m.mu.RLock() }
624 func (m *rwmutexReadWrite) Unlock1() { m.mu.RUnlock() }
625 func (m *rwmutexReadWrite) Lock2() { m.mu.Lock() }
626 func (m *rwmutexReadWrite) Unlock2() { m.mu.Unlock() }
627
628 type rwmutexWriteRead struct {
629 mu sync.RWMutex
630 }
631
632 func (m *rwmutexWriteRead) Lock1() { m.mu.Lock() }
633 func (m *rwmutexWriteRead) Unlock1() { m.mu.Unlock() }
634 func (m *rwmutexWriteRead) Lock2() { m.mu.RLock() }
635 func (m *rwmutexWriteRead) Unlock2() { m.mu.RUnlock() }
636
637
638
639
640
641 func generateMutexWaitTime(mu locker2) time.Duration {
642
643 *runtime.CasGStatusAlwaysTrack = true
644
645 mu.Lock1()
646
647
648 gc := make(chan *runtime.G)
649 done := make(chan bool)
650 go func() {
651 gc <- runtime.Getg()
652
653 for {
654 mu.Lock2()
655 mu.Unlock2()
656 if <-done {
657 return
658 }
659 }
660 }()
661 gp := <-gc
662
663
664
665 const blockTime = 100 * time.Millisecond
666
667
668 for {
669 if runtime.GIsWaitingOnMutex(gp) {
670 break
671 }
672 runtime.Gosched()
673 }
674
675
676 time.Sleep(blockTime)
677
678
679 mu.Unlock1()
680 done <- true
681
682
683 *runtime.CasGStatusAlwaysTrack = false
684 return blockTime
685 }
686
687
688 func TestCPUMetricsSleep(t *testing.T) {
689 if runtime.GOOS == "wasip1" {
690
691
692
693 t.Skip("wasip1 currently busy-waits in idle time; test not applicable")
694 }
695
696 names := []string{
697 "/cpu/classes/idle:cpu-seconds",
698
699 "/cpu/classes/gc/mark/assist:cpu-seconds",
700 "/cpu/classes/gc/mark/dedicated:cpu-seconds",
701 "/cpu/classes/gc/mark/idle:cpu-seconds",
702 "/cpu/classes/gc/pause:cpu-seconds",
703 "/cpu/classes/gc/total:cpu-seconds",
704 "/cpu/classes/scavenge/assist:cpu-seconds",
705 "/cpu/classes/scavenge/background:cpu-seconds",
706 "/cpu/classes/scavenge/total:cpu-seconds",
707 "/cpu/classes/total:cpu-seconds",
708 "/cpu/classes/user:cpu-seconds",
709 }
710 prep := func() []metrics.Sample {
711 mm := make([]metrics.Sample, len(names))
712 for i := range names {
713 mm[i].Name = names[i]
714 }
715 return mm
716 }
717 m1, m2 := prep(), prep()
718
719 const (
720
721 dur = 100 * time.Millisecond
722
723
724 maxFailures = 10
725 )
726
727 failureIdleTimes := make([]float64, 0, maxFailures)
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744 minIdleCPUSeconds := dur.Seconds() * (float64(runtime.GOMAXPROCS(-1)) - 0.5)
745
746
747
748
749
750 debug.FreeOSMemory()
751
752 for retries := 0; retries < maxFailures; retries++ {
753
754 runtime.GC()
755 metrics.Read(m1)
756
757
758 time.Sleep(dur)
759
760
761 runtime.GC()
762 metrics.Read(m2)
763
764 dt := m2[0].Value.Float64() - m1[0].Value.Float64()
765 if dt >= minIdleCPUSeconds {
766
767 return
768 }
769 failureIdleTimes = append(failureIdleTimes, dt)
770
771 }
772
773
774 for i, dt := range failureIdleTimes {
775 t.Logf("try %2d: idle time = %.5fs\n", i+1, dt)
776 }
777 t.Logf("try %d breakdown:\n", len(failureIdleTimes))
778 for i := range names {
779 if m1[i].Value.Kind() == metrics.KindBad {
780 continue
781 }
782 t.Logf("\t%s %0.3f\n", names[i], m2[i].Value.Float64()-m1[i].Value.Float64())
783 }
784 t.Errorf(`time.Sleep did not contribute enough to "idle" class: minimum idle time = %.5fs`, minIdleCPUSeconds)
785 }
786
787
788
789 func testSchedPauseMetrics(t *testing.T, fn func(t *testing.T), isGC bool) {
790 m := []metrics.Sample{
791 {Name: "/sched/pauses/stopping/gc:seconds"},
792 {Name: "/sched/pauses/stopping/other:seconds"},
793 {Name: "/sched/pauses/total/gc:seconds"},
794 {Name: "/sched/pauses/total/other:seconds"},
795 }
796
797 stoppingGC := &m[0]
798 stoppingOther := &m[1]
799 totalGC := &m[2]
800 totalOther := &m[3]
801
802 sampleCount := func(s *metrics.Sample) uint64 {
803 h := s.Value.Float64Histogram()
804
805 var n uint64
806 for _, c := range h.Counts {
807 n += c
808 }
809 return n
810 }
811
812
813 metrics.Read(m)
814
815 baselineStartGC := sampleCount(stoppingGC)
816 baselineStartOther := sampleCount(stoppingOther)
817 baselineTotalGC := sampleCount(totalGC)
818 baselineTotalOther := sampleCount(totalOther)
819
820 fn(t)
821
822 metrics.Read(m)
823
824 if isGC {
825 if got := sampleCount(stoppingGC); got <= baselineStartGC {
826 t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d did not increase from baseline of %d", got, baselineStartGC)
827 }
828 if got := sampleCount(totalGC); got <= baselineTotalGC {
829 t.Errorf("/sched/pauses/total/gc:seconds sample count %d did not increase from baseline of %d", got, baselineTotalGC)
830 }
831
832 if got := sampleCount(stoppingOther); got != baselineStartOther {
833 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineStartOther)
834 }
835 if got := sampleCount(totalOther); got != baselineTotalOther {
836 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d changed from baseline of %d", got, baselineTotalOther)
837 }
838 } else {
839 if got := sampleCount(stoppingGC); got != baselineStartGC {
840 t.Errorf("/sched/pauses/stopping/gc:seconds sample count %d changed from baseline of %d", got, baselineStartGC)
841 }
842 if got := sampleCount(totalGC); got != baselineTotalGC {
843 t.Errorf("/sched/pauses/total/gc:seconds sample count %d changed from baseline of %d", got, baselineTotalGC)
844 }
845
846 if got := sampleCount(stoppingOther); got <= baselineStartOther {
847 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineStartOther)
848 }
849 if got := sampleCount(totalOther); got <= baselineTotalOther {
850 t.Errorf("/sched/pauses/stopping/other:seconds sample count %d did not increase from baseline of %d", got, baselineTotalOther)
851 }
852 }
853 }
854
855 func TestSchedPauseMetrics(t *testing.T) {
856 tests := []struct {
857 name string
858 isGC bool
859 fn func(t *testing.T)
860 }{
861 {
862 name: "runtime.GC",
863 isGC: true,
864 fn: func(t *testing.T) {
865 runtime.GC()
866 },
867 },
868 {
869 name: "runtime.GOMAXPROCS",
870 fn: func(t *testing.T) {
871 if runtime.GOARCH == "wasm" {
872 t.Skip("GOMAXPROCS >1 not supported on wasm")
873 }
874
875 n := runtime.GOMAXPROCS(0)
876 defer runtime.GOMAXPROCS(n)
877
878 runtime.GOMAXPROCS(n + 1)
879 },
880 },
881 {
882 name: "runtime.GoroutineProfile",
883 fn: func(t *testing.T) {
884 var s [1]runtime.StackRecord
885 runtime.GoroutineProfile(s[:])
886 },
887 },
888 {
889 name: "runtime.ReadMemStats",
890 fn: func(t *testing.T) {
891 var mstats runtime.MemStats
892 runtime.ReadMemStats(&mstats)
893 },
894 },
895 {
896 name: "runtime.Stack",
897 fn: func(t *testing.T) {
898 var b [64]byte
899 runtime.Stack(b[:], true)
900 },
901 },
902 {
903 name: "runtime/debug.WriteHeapDump",
904 fn: func(t *testing.T) {
905 if runtime.GOOS == "js" {
906 t.Skip("WriteHeapDump not supported on js")
907 }
908
909 f, err := os.CreateTemp(t.TempDir(), "heapdumptest")
910 if err != nil {
911 t.Fatalf("os.CreateTemp failed: %v", err)
912 }
913 defer os.Remove(f.Name())
914 defer f.Close()
915 debug.WriteHeapDump(f.Fd())
916 },
917 },
918 {
919 name: "runtime/trace.Start",
920 fn: func(t *testing.T) {
921 if trace.IsEnabled() {
922 t.Skip("tracing already enabled")
923 }
924
925 var buf bytes.Buffer
926 if err := trace.Start(&buf); err != nil {
927 t.Errorf("trace.Start err got %v want nil", err)
928 }
929 trace.Stop()
930 },
931 },
932 }
933
934
935
936
937
938
939
940 defer debug.SetGCPercent(debug.SetGCPercent(-1))
941 runtime.GC()
942
943 for _, tc := range tests {
944 t.Run(tc.name, func(t *testing.T) {
945 testSchedPauseMetrics(t, tc.fn, tc.isGC)
946 })
947 }
948 }
949
950 func TestRuntimeLockMetricsAndProfile(t *testing.T) {
951 testenv.SkipFlaky(t, 64253)
952
953 old := runtime.SetMutexProfileFraction(0)
954 defer runtime.SetMutexProfileFraction(old)
955 if old != 0 {
956 t.Fatalf("need MutexProfileRate 0, got %d", old)
957 }
958
959 {
960 before := os.Getenv("GODEBUG")
961 for _, s := range strings.Split(before, ",") {
962 if strings.HasPrefix(s, "runtimecontentionstacks=") {
963 t.Logf("GODEBUG includes explicit setting %q", s)
964 }
965 }
966 defer func() { os.Setenv("GODEBUG", before) }()
967 os.Setenv("GODEBUG", fmt.Sprintf("%s,runtimecontentionstacks=1", before))
968 }
969
970 t.Logf("NumCPU %d", runtime.NumCPU())
971 t.Logf("GOMAXPROCS %d", runtime.GOMAXPROCS(0))
972 if minCPU := 2; runtime.NumCPU() < minCPU {
973 t.Skipf("creating and observing contention on runtime-internal locks requires NumCPU >= %d", minCPU)
974 }
975
976 loadProfile := func(t *testing.T) *profile.Profile {
977 var w bytes.Buffer
978 pprof.Lookup("mutex").WriteTo(&w, 0)
979 p, err := profile.Parse(&w)
980 if err != nil {
981 t.Fatalf("failed to parse profile: %v", err)
982 }
983 if err := p.CheckValid(); err != nil {
984 t.Fatalf("invalid profile: %v", err)
985 }
986 return p
987 }
988
989 measureDelta := func(t *testing.T, fn func()) (metricGrowth, profileGrowth float64, p *profile.Profile) {
990 beforeProfile := loadProfile(t)
991 beforeMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
992 metrics.Read(beforeMetrics)
993
994 fn()
995
996 afterProfile := loadProfile(t)
997 afterMetrics := []metrics.Sample{{Name: "/sync/mutex/wait/total:seconds"}}
998 metrics.Read(afterMetrics)
999
1000 sumSamples := func(p *profile.Profile, i int) int64 {
1001 var sum int64
1002 for _, s := range p.Sample {
1003 sum += s.Value[i]
1004 }
1005 return sum
1006 }
1007
1008 metricGrowth = afterMetrics[0].Value.Float64() - beforeMetrics[0].Value.Float64()
1009 profileGrowth = float64(sumSamples(afterProfile, 1)-sumSamples(beforeProfile, 1)) * time.Nanosecond.Seconds()
1010
1011
1012
1013 p = afterProfile.Copy()
1014 if len(beforeProfile.Sample) > 0 {
1015 err := p.Merge(beforeProfile, -1)
1016 if err != nil {
1017 t.Fatalf("Merge profiles: %v", err)
1018 }
1019 }
1020
1021 return metricGrowth, profileGrowth, p
1022 }
1023
1024 testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
1025 return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64) {
1026 metricGrowth, profileGrowth, p := measureDelta(t, func() {
1027 var started, stopped sync.WaitGroup
1028 started.Add(workers)
1029 stopped.Add(workers)
1030 for i := 0; i < workers; i++ {
1031 w := &contentionWorker{
1032 before: func() {
1033 started.Done()
1034 started.Wait()
1035 },
1036 after: func() {
1037 stopped.Done()
1038 },
1039 fn: fn,
1040 }
1041 go w.run()
1042 }
1043 stopped.Wait()
1044 })
1045
1046 if profileGrowth == 0 {
1047 t.Errorf("no increase in mutex profile")
1048 }
1049 if metricGrowth == 0 && strictTiming {
1050
1051
1052 t.Errorf("no increase in /sync/mutex/wait/total:seconds metric")
1053 }
1054
1055
1056
1057
1058
1059 t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth)
1060 t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth)
1061
1062 acceptStacks = append([][]string(nil), acceptStacks...)
1063 for i, stk := range acceptStacks {
1064 if goexperiment.StaticLockRanking {
1065 if !slices.ContainsFunc(stk, func(s string) bool {
1066 return s == "runtime.systemstack" || s == "runtime.mcall" || s == "runtime.mstart"
1067 }) {
1068
1069
1070
1071
1072 stk = append([]string{"runtime.unlockWithRank"}, stk...)
1073 }
1074 }
1075 acceptStacks[i] = stk
1076 }
1077
1078 var stks [][]string
1079 values := make([][2]int64, len(acceptStacks))
1080 for _, s := range p.Sample {
1081 var have []string
1082 for _, loc := range s.Location {
1083 for _, line := range loc.Line {
1084 have = append(have, line.Function.Name)
1085 }
1086 }
1087 stks = append(stks, have)
1088 for i, stk := range acceptStacks {
1089 if slices.Equal(have, stk) {
1090 values[i][0] += s.Value[0]
1091 values[i][1] += s.Value[1]
1092 }
1093 }
1094 }
1095 for i, stk := range acceptStacks {
1096 n += values[i][0]
1097 value += values[i][1]
1098 t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1])
1099 }
1100 if n == 0 && value == 0 {
1101 t.Logf("profile:\n%s", p)
1102 for _, have := range stks {
1103 t.Logf("have stack %v", have)
1104 }
1105 for _, stk := range acceptStacks {
1106 t.Errorf("want stack %v", stk)
1107 }
1108 }
1109
1110 return metricGrowth, profileGrowth, n, value
1111 }
1112 }
1113
1114 name := t.Name()
1115
1116 t.Run("runtime.lock", func(t *testing.T) {
1117 mus := make([]runtime.Mutex, 100)
1118 var needContention atomic.Int64
1119 delay := 100 * time.Microsecond
1120 delayMicros := delay.Microseconds()
1121
1122
1123
1124
1125
1126
1127
1128
1129 defer debug.SetGCPercent(debug.SetGCPercent(-1))
1130
1131 const workers = 2
1132 if runtime.GOMAXPROCS(0) < workers {
1133 t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers)
1134 }
1135
1136 fn := func() bool {
1137 n := int(needContention.Load())
1138 if n < 0 {
1139 return false
1140 }
1141 mu := &mus[n]
1142
1143 runtime.Lock(mu)
1144 for int(needContention.Load()) == n {
1145 if runtime.MutexContended(mu) {
1146
1147 for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; {
1148 runtime.Usleep(uint32(delayMicros))
1149 }
1150 break
1151 }
1152 }
1153 runtime.Unlock(mu)
1154 needContention.Store(int64(n - 1))
1155
1156 return true
1157 }
1158
1159 stks := [][]string{{
1160 "runtime.unlock",
1161 "runtime_test." + name + ".func5.1",
1162 "runtime_test.(*contentionWorker).run",
1163 }}
1164
1165 t.Run("sample-1", func(t *testing.T) {
1166 old := runtime.SetMutexProfileFraction(1)
1167 defer runtime.SetMutexProfileFraction(old)
1168
1169 needContention.Store(int64(len(mus) - 1))
1170 metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t)
1171
1172 if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want {
1173
1174
1175
1176
1177 t.Errorf("runtime/metrics reported less than the known minimum contention duration (%fs < %fs)", have, want)
1178 }
1179 if have, want := n, int64(len(mus)); have != want {
1180 t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want)
1181 }
1182
1183 const slop = 1.5
1184 if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth {
1185 t.Errorf("views differ by more than %fx", slop)
1186 }
1187 })
1188
1189 t.Run("sample-2", func(t *testing.T) {
1190 old := runtime.SetMutexProfileFraction(2)
1191 defer runtime.SetMutexProfileFraction(old)
1192
1193 needContention.Store(int64(len(mus) - 1))
1194 metricGrowth, profileGrowth, n, _ := testcase(true, stks, workers, fn)(t)
1195
1196
1197
1198
1199
1200 const samplingSlop = 2.5
1201
1202 if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want {
1203
1204
1205
1206
1207 t.Errorf("runtime/metrics reported less than the known minimum contention duration (%f * %fs < %fs)", samplingSlop, have, want)
1208 }
1209 if have, want := n, int64(len(mus)); float64(have) > float64(want)*samplingSlop || float64(want) > float64(have)*samplingSlop {
1210 t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want)
1211 }
1212
1213 const timerSlop = 1.5 * samplingSlop
1214 if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth {
1215 t.Errorf("views differ by more than %fx", timerSlop)
1216 }
1217 })
1218 })
1219
1220 t.Run("runtime.semrelease", func(t *testing.T) {
1221 old := runtime.SetMutexProfileFraction(1)
1222 defer runtime.SetMutexProfileFraction(old)
1223
1224 const workers = 3
1225 if runtime.GOMAXPROCS(0) < workers {
1226 t.Skipf("creating and observing contention on runtime-internal semaphores requires GOMAXPROCS >= %d", workers)
1227 }
1228
1229 var sem uint32 = 1
1230 var tries atomic.Int32
1231 tries.Store(10_000_000)
1232 var sawContention atomic.Int32
1233 var need int32 = 1
1234 fn := func() bool {
1235 if sawContention.Load() >= need {
1236 return false
1237 }
1238 if tries.Add(-1) < 0 {
1239 return false
1240 }
1241
1242 runtime.Semacquire(&sem)
1243 runtime.Semrelease1(&sem, false, 0)
1244 if runtime.MutexContended(runtime.SemRootLock(&sem)) {
1245 sawContention.Add(1)
1246 }
1247 return true
1248 }
1249
1250 stks := [][]string{
1251 {
1252 "runtime.unlock",
1253 "runtime.semrelease1",
1254 "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1",
1255 "runtime_test.(*contentionWorker).run",
1256 },
1257 {
1258 "runtime.unlock",
1259 "runtime.semacquire1",
1260 "runtime.semacquire",
1261 "runtime_test.TestRuntimeLockMetricsAndProfile.func6.1",
1262 "runtime_test.(*contentionWorker).run",
1263 },
1264 }
1265
1266
1267
1268
1269
1270
1271 testcase(false, stks, workers, fn)(t)
1272
1273 if remaining := tries.Load(); remaining >= 0 {
1274 t.Logf("finished test early (%d tries remaining)", remaining)
1275 }
1276 })
1277 }
1278
1279
1280 type contentionWorker struct {
1281 before func()
1282 fn func() bool
1283 after func()
1284 }
1285
1286 func (w *contentionWorker) run() {
1287 defer w.after()
1288 w.before()
1289
1290 for w.fn() {
1291 }
1292 }
1293
1294 func TestMetricHeapUnusedLargeObjectOverflow(t *testing.T) {
1295
1296
1297
1298 done := make(chan struct{})
1299 var wg sync.WaitGroup
1300 wg.Add(1)
1301 go func() {
1302 defer wg.Done()
1303 for {
1304 for i := 0; i < 10; i++ {
1305 runtime.Escape(make([]byte, 1<<20))
1306 }
1307 runtime.GC()
1308 select {
1309 case <-done:
1310 return
1311 default:
1312 }
1313 }
1314 }()
1315 s := []metrics.Sample{
1316 {Name: "/memory/classes/heap/unused:bytes"},
1317 }
1318 for i := 0; i < 1000; i++ {
1319 metrics.Read(s)
1320 if s[0].Value.Uint64() > 1<<40 {
1321 t.Errorf("overflow")
1322 break
1323 }
1324 }
1325 done <- struct{}{}
1326 wg.Wait()
1327 }
1328
View as plain text