Text file
src/runtime/race_arm64.s
Documentation: runtime
1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build race
6
7#include "go_asm.h"
8#include "funcdata.h"
9#include "textflag.h"
10#include "tls_arm64.h"
11#include "cgo/abi_arm64.h"
12
13// The following thunks allow calling the gcc-compiled race runtime directly
14// from Go code without going all the way through cgo.
15// First, it's much faster (up to 50% speedup for real Go programs).
16// Second, it eliminates race-related special cases from cgocall and scheduler.
17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19// A brief recap of the arm64 calling convention.
20// Arguments are passed in R0...R7, the rest is on stack.
21// Callee-saved registers are: R19...R28.
22// Temporary registers are: R9...R15
23// SP must be 16-byte aligned.
24
25// When calling racecalladdr, R9 is the call target address.
26
27// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
28
29// Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
30// No-op on other OSes.
31#ifdef TLS_darwin
32#define TP_ALIGN AND $~7, R0
33#else
34#define TP_ALIGN
35#endif
36
37// Load g from TLS. (See tls_arm64.s)
38#define load_g \
39 MRS_TPIDR_R0 \
40 TP_ALIGN \
41 MOVD runtime·tls_g(SB), R11 \
42 MOVD (R0)(R11), g
43
44// func runtime·raceread(addr uintptr)
45// Called from instrumented code.
46// Defined as ABIInternal so as to avoid introducing a wrapper,
47// which would make caller's PC ineffective.
48TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
49 MOVD R0, R1 // addr
50 MOVD LR, R2
51 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
52 MOVD $__tsan_read(SB), R9
53 JMP racecalladdr<>(SB)
54
55// func runtime·RaceRead(addr uintptr)
56TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
57 // This needs to be a tail call, because raceread reads caller pc.
58 JMP runtime·raceread(SB)
59
60// func runtime·racereadpc(void *addr, void *callpc, void *pc)
61TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
62 MOVD addr+0(FP), R1
63 MOVD callpc+8(FP), R2
64 MOVD pc+16(FP), R3
65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66 MOVD $__tsan_read_pc(SB), R9
67 JMP racecalladdr<>(SB)
68
69// func runtime·racewrite(addr uintptr)
70// Called from instrumented code.
71// Defined as ABIInternal so as to avoid introducing a wrapper,
72// which would make caller's PC ineffective.
73TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
74 MOVD R0, R1 // addr
75 MOVD LR, R2
76 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
77 MOVD $__tsan_write(SB), R9
78 JMP racecalladdr<>(SB)
79
80// func runtime·RaceWrite(addr uintptr)
81TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
82 // This needs to be a tail call, because racewrite reads caller pc.
83 JMP runtime·racewrite(SB)
84
85// func runtime·racewritepc(void *addr, void *callpc, void *pc)
86TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
87 MOVD addr+0(FP), R1
88 MOVD callpc+8(FP), R2
89 MOVD pc+16(FP), R3
90 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
91 MOVD $__tsan_write_pc(SB), R9
92 JMP racecalladdr<>(SB)
93
94// func runtime·racereadrange(addr, size uintptr)
95// Called from instrumented code.
96// Defined as ABIInternal so as to avoid introducing a wrapper,
97// which would make caller's PC ineffective.
98TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
99 MOVD R1, R2 // size
100 MOVD R0, R1 // addr
101 MOVD LR, R3
102 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
103 MOVD $__tsan_read_range(SB), R9
104 JMP racecalladdr<>(SB)
105
106// func runtime·RaceReadRange(addr, size uintptr)
107TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
108 // This needs to be a tail call, because racereadrange reads caller pc.
109 JMP runtime·racereadrange(SB)
110
111// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
112TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
113 MOVD addr+0(FP), R1
114 MOVD size+8(FP), R2
115 MOVD pc+16(FP), R3
116 ADD $4, R3 // pc is function start, tsan wants return address.
117 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
118 MOVD $__tsan_read_range(SB), R9
119 JMP racecalladdr<>(SB)
120
121// func runtime·racewriterange(addr, size uintptr)
122// Called from instrumented code.
123// Defined as ABIInternal so as to avoid introducing a wrapper,
124// which would make caller's PC ineffective.
125TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
126 MOVD R1, R2 // size
127 MOVD R0, R1 // addr
128 MOVD LR, R3
129 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
130 MOVD $__tsan_write_range(SB), R9
131 JMP racecalladdr<>(SB)
132
133// func runtime·RaceWriteRange(addr, size uintptr)
134TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
135 // This needs to be a tail call, because racewriterange reads caller pc.
136 JMP runtime·racewriterange(SB)
137
138// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
139TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
140 MOVD addr+0(FP), R1
141 MOVD size+8(FP), R2
142 MOVD pc+16(FP), R3
143 ADD $4, R3 // pc is function start, tsan wants return address.
144 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
145 MOVD $__tsan_write_range(SB), R9
146 JMP racecalladdr<>(SB)
147
148// If addr (R1) is out of range, do nothing.
149// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
150TEXT racecalladdr<>(SB), NOSPLIT, $0-0
151 load_g
152 MOVD g_racectx(g), R0
153 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
154 MOVD runtime·racearenastart(SB), R10
155 CMP R10, R1
156 BLT data
157 MOVD runtime·racearenaend(SB), R10
158 CMP R10, R1
159 BLT call
160data:
161 MOVD runtime·racedatastart(SB), R10
162 CMP R10, R1
163 BLT ret
164 MOVD runtime·racedataend(SB), R10
165 CMP R10, R1
166 BGT ret
167call:
168 JMP racecall<>(SB)
169ret:
170 RET
171
172// func runtime·racefuncenter(pc uintptr)
173// Called from instrumented code.
174TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
175 MOVD R0, R9 // callpc
176 JMP racefuncenter<>(SB)
177
178// Common code for racefuncenter
179// R9 = caller's return address
180TEXT racefuncenter<>(SB), NOSPLIT, $0-0
181 load_g
182 MOVD g_racectx(g), R0 // goroutine racectx
183 MOVD R9, R1
184 // void __tsan_func_enter(ThreadState *thr, void *pc);
185 MOVD $__tsan_func_enter(SB), R9
186 BL racecall<>(SB)
187 RET
188
189// func runtime·racefuncexit()
190// Called from instrumented code.
191TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
192 load_g
193 MOVD g_racectx(g), R0 // race context
194 // void __tsan_func_exit(ThreadState *thr);
195 MOVD $__tsan_func_exit(SB), R9
196 JMP racecall<>(SB)
197
198// Atomic operations for sync/atomic package.
199// R3 = addr of arguments passed to this function, it can
200// be fetched at 40(RSP) in racecallatomic after two times BL
201// R0, R1, R2 set in racecallatomic
202
203// Load
204TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
205 GO_ARGS
206 MOVD $__tsan_go_atomic32_load(SB), R9
207 BL racecallatomic<>(SB)
208 RET
209
210TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
211 GO_ARGS
212 MOVD $__tsan_go_atomic64_load(SB), R9
213 BL racecallatomic<>(SB)
214 RET
215
216TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
217 GO_ARGS
218 JMP sync∕atomic·LoadInt32(SB)
219
220TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
221 GO_ARGS
222 JMP sync∕atomic·LoadInt64(SB)
223
224TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
225 GO_ARGS
226 JMP sync∕atomic·LoadInt64(SB)
227
228TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
229 GO_ARGS
230 JMP sync∕atomic·LoadInt64(SB)
231
232// Store
233TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
234 GO_ARGS
235 MOVD $__tsan_go_atomic32_store(SB), R9
236 BL racecallatomic<>(SB)
237 RET
238
239TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
240 GO_ARGS
241 MOVD $__tsan_go_atomic64_store(SB), R9
242 BL racecallatomic<>(SB)
243 RET
244
245TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
246 GO_ARGS
247 JMP sync∕atomic·StoreInt32(SB)
248
249TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
250 GO_ARGS
251 JMP sync∕atomic·StoreInt64(SB)
252
253TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
254 GO_ARGS
255 JMP sync∕atomic·StoreInt64(SB)
256
257// Swap
258TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
259 GO_ARGS
260 MOVD $__tsan_go_atomic32_exchange(SB), R9
261 BL racecallatomic<>(SB)
262 RET
263
264TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
265 GO_ARGS
266 MOVD $__tsan_go_atomic64_exchange(SB), R9
267 BL racecallatomic<>(SB)
268 RET
269
270TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
271 GO_ARGS
272 JMP sync∕atomic·SwapInt32(SB)
273
274TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
275 GO_ARGS
276 JMP sync∕atomic·SwapInt64(SB)
277
278TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
279 GO_ARGS
280 JMP sync∕atomic·SwapInt64(SB)
281
282// Add
283TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
284 GO_ARGS
285 MOVD $__tsan_go_atomic32_fetch_add(SB), R9
286 BL racecallatomic<>(SB)
287 MOVW add+8(FP), R0 // convert fetch_add to add_fetch
288 MOVW ret+16(FP), R1
289 ADD R0, R1, R0
290 MOVW R0, ret+16(FP)
291 RET
292
293TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
294 GO_ARGS
295 MOVD $__tsan_go_atomic64_fetch_add(SB), R9
296 BL racecallatomic<>(SB)
297 MOVD add+8(FP), R0 // convert fetch_add to add_fetch
298 MOVD ret+16(FP), R1
299 ADD R0, R1, R0
300 MOVD R0, ret+16(FP)
301 RET
302
303TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
304 GO_ARGS
305 JMP sync∕atomic·AddInt32(SB)
306
307TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
308 GO_ARGS
309 JMP sync∕atomic·AddInt64(SB)
310
311TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
312 GO_ARGS
313 JMP sync∕atomic·AddInt64(SB)
314
315// CompareAndSwap
316TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
317 GO_ARGS
318 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9
319 BL racecallatomic<>(SB)
320 RET
321
322TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
323 GO_ARGS
324 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9
325 BL racecallatomic<>(SB)
326 RET
327
328TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
329 GO_ARGS
330 JMP sync∕atomic·CompareAndSwapInt32(SB)
331
332TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
333 GO_ARGS
334 JMP sync∕atomic·CompareAndSwapInt64(SB)
335
336TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
337 GO_ARGS
338 JMP sync∕atomic·CompareAndSwapInt64(SB)
339
340// Generic atomic operation implementation.
341// R9 = addr of target function
342TEXT racecallatomic<>(SB), NOSPLIT, $0
343 // Set up these registers
344 // R0 = *ThreadState
345 // R1 = caller pc
346 // R2 = pc
347 // R3 = addr of incoming arg list
348
349 // Trigger SIGSEGV early.
350 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
351 MOVB (R3), R13 // segv here if addr is bad
352 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
353 MOVD runtime·racearenastart(SB), R10
354 CMP R10, R3
355 BLT racecallatomic_data
356 MOVD runtime·racearenaend(SB), R10
357 CMP R10, R3
358 BLT racecallatomic_ok
359racecallatomic_data:
360 MOVD runtime·racedatastart(SB), R10
361 CMP R10, R3
362 BLT racecallatomic_ignore
363 MOVD runtime·racedataend(SB), R10
364 CMP R10, R3
365 BGE racecallatomic_ignore
366racecallatomic_ok:
367 // Addr is within the good range, call the atomic function.
368 load_g
369 MOVD g_racectx(g), R0 // goroutine context
370 MOVD 16(RSP), R1 // caller pc
371 MOVD R9, R2 // pc
372 ADD $40, RSP, R3
373 JMP racecall<>(SB) // does not return
374racecallatomic_ignore:
375 // Addr is outside the good range.
376 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
377 // An attempt to synchronize on the address would cause crash.
378 MOVD R9, R21 // remember the original function
379 MOVD $__tsan_go_ignore_sync_begin(SB), R9
380 load_g
381 MOVD g_racectx(g), R0 // goroutine context
382 BL racecall<>(SB)
383 MOVD R21, R9 // restore the original function
384 // Call the atomic function.
385 // racecall will call LLVM race code which might clobber R28 (g)
386 load_g
387 MOVD g_racectx(g), R0 // goroutine context
388 MOVD 16(RSP), R1 // caller pc
389 MOVD R9, R2 // pc
390 ADD $40, RSP, R3 // arguments
391 BL racecall<>(SB)
392 // Call __tsan_go_ignore_sync_end.
393 MOVD $__tsan_go_ignore_sync_end(SB), R9
394 MOVD g_racectx(g), R0 // goroutine context
395 BL racecall<>(SB)
396 RET
397
398// func runtime·racecall(void(*f)(...), ...)
399// Calls C function f from race runtime and passes up to 4 arguments to it.
400// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
401TEXT runtime·racecall(SB), NOSPLIT, $0-0
402 MOVD fn+0(FP), R9
403 MOVD arg0+8(FP), R0
404 MOVD arg1+16(FP), R1
405 MOVD arg2+24(FP), R2
406 MOVD arg3+32(FP), R3
407 JMP racecall<>(SB)
408
409// Switches SP to g0 stack and calls (R9). Arguments already set.
410// Clobbers R19, R20.
411TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
412 MOVD g_m(g), R10
413 // Switch to g0 stack.
414 MOVD RSP, R19 // callee-saved, preserved across the CALL
415 MOVD R30, R20 // callee-saved, preserved across the CALL
416 MOVD m_g0(R10), R11
417 CMP R11, g
418 BEQ call // already on g0
419 MOVD (g_sched+gobuf_sp)(R11), R12
420 MOVD R12, RSP
421call:
422 BL R9
423 MOVD R19, RSP
424 JMP (R20)
425
426// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
427// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
428// The overall effect of Go->C->Go call chain is similar to that of mcall.
429// R0 contains command code. R1 contains command-specific context.
430// See racecallback for command codes.
431TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
432 // Handle command raceGetProcCmd (0) here.
433 // First, code below assumes that we are on curg, while raceGetProcCmd
434 // can be executed on g0. Second, it is called frequently, so will
435 // benefit from this fast path.
436 CBNZ R0, rest
437 MOVD g, R13
438#ifdef TLS_darwin
439 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
440#endif
441 load_g
442#ifdef TLS_darwin
443 MOVD R12, R27
444#endif
445 MOVD g_m(g), R0
446 MOVD m_p(R0), R0
447 MOVD p_raceprocctx(R0), R0
448 MOVD R0, (R1)
449 MOVD R13, g
450 JMP (LR)
451rest:
452 // Save callee-saved registers (Go code won't respect that).
453 // 8(RSP) and 16(RSP) are for args passed through racecallback
454 SUB $176, RSP
455 MOVD LR, 0(RSP)
456
457 SAVE_R19_TO_R28(8*3)
458 SAVE_F8_TO_F15(8*13)
459 MOVD R29, (8*21)(RSP)
460 // Set g = g0.
461 // load_g will clobber R0, Save R0
462 MOVD R0, R13
463 load_g
464 // restore R0
465 MOVD R13, R0
466 MOVD g_m(g), R13
467 MOVD m_g0(R13), R14
468 CMP R14, g
469 BEQ noswitch // branch if already on g0
470 MOVD R14, g
471
472 MOVD R0, 8(RSP) // func arg
473 MOVD R1, 16(RSP) // func arg
474 BL runtime·racecallback(SB)
475
476 // All registers are smashed after Go code, reload.
477 MOVD g_m(g), R13
478 MOVD m_curg(R13), g // g = m->curg
479ret:
480 // Restore callee-saved registers.
481 MOVD 0(RSP), LR
482 MOVD (8*21)(RSP), R29
483 RESTORE_F8_TO_F15(8*13)
484 RESTORE_R19_TO_R28(8*3)
485 ADD $176, RSP
486 JMP (LR)
487
488noswitch:
489 // already on g0
490 MOVD R0, 8(RSP) // func arg
491 MOVD R1, 16(RSP) // func arg
492 BL runtime·racecallback(SB)
493 JMP ret
494
495#ifndef TLSG_IS_VARIABLE
496// tls_g, g value for each thread in TLS
497GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
498#endif
View as plain text