race_arm64.s

Documentation: runtime

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build race
     6
     7#include "go_asm.h"
     8#include "funcdata.h"
     9#include "textflag.h"
    10#include "tls_arm64.h"
    11#include "cgo/abi_arm64.h"
    12
    13// The following thunks allow calling the gcc-compiled race runtime directly
    14// from Go code without going all the way through cgo.
    15// First, it's much faster (up to 50% speedup for real Go programs).
    16// Second, it eliminates race-related special cases from cgocall and scheduler.
    17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18
    19// A brief recap of the arm64 calling convention.
    20// Arguments are passed in R0...R7, the rest is on stack.
    21// Callee-saved registers are: R19...R28.
    22// Temporary registers are: R9...R15
    23// SP must be 16-byte aligned.
    24
    25// When calling racecalladdr, R9 is the call target address.
    26
    27// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
    28
    29// Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
    30// No-op on other OSes.
    31#ifdef TLS_darwin
    32#define TP_ALIGN	AND	$~7, R0
    33#else
    34#define TP_ALIGN
    35#endif
    36
    37// Load g from TLS. (See tls_arm64.s)
    38#define load_g \
    39	MRS_TPIDR_R0 \
    40	TP_ALIGN \
    41	MOVD    runtime·tls_g(SB), R11 \
    42	MOVD    (R0)(R11), g
    43
    44// func runtime·raceread(addr uintptr)
    45// Called from instrumented code.
    46// Defined as ABIInternal so as to avoid introducing a wrapper,
    47// which would make caller's PC ineffective.
    48TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    49	MOVD	R0, R1	// addr
    50	MOVD	LR, R2
    51	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    52	MOVD	$__tsan_read(SB), R9
    53	JMP	racecalladdr<>(SB)
    54
    55// func runtime·RaceRead(addr uintptr)
    56TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    57	// This needs to be a tail call, because raceread reads caller pc.
    58	JMP	runtime·raceread(SB)
    59
    60// func runtime·racereadpc(void *addr, void *callpc, void *pc)
    61TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    62	MOVD	addr+0(FP), R1
    63	MOVD	callpc+8(FP), R2
    64	MOVD	pc+16(FP), R3
    65	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    66	MOVD	$__tsan_read_pc(SB), R9
    67	JMP	racecalladdr<>(SB)
    68
    69// func runtime·racewrite(addr uintptr)
    70// Called from instrumented code.
    71// Defined as ABIInternal so as to avoid introducing a wrapper,
    72// which would make caller's PC ineffective.
    73TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    74	MOVD	R0, R1	// addr
    75	MOVD	LR, R2
    76	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    77	MOVD	$__tsan_write(SB), R9
    78	JMP	racecalladdr<>(SB)
    79
    80// func runtime·RaceWrite(addr uintptr)
    81TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    82	// This needs to be a tail call, because racewrite reads caller pc.
    83	JMP	runtime·racewrite(SB)
    84
    85// func runtime·racewritepc(void *addr, void *callpc, void *pc)
    86TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    87	MOVD	addr+0(FP), R1
    88	MOVD	callpc+8(FP), R2
    89	MOVD	pc+16(FP), R3
    90	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    91	MOVD	$__tsan_write_pc(SB), R9
    92	JMP	racecalladdr<>(SB)
    93
    94// func runtime·racereadrange(addr, size uintptr)
    95// Called from instrumented code.
    96// Defined as ABIInternal so as to avoid introducing a wrapper,
    97// which would make caller's PC ineffective.
    98TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
    99	MOVD	R1, R2	// size
   100	MOVD	R0, R1	// addr
   101	MOVD	LR, R3
   102	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   103	MOVD	$__tsan_read_range(SB), R9
   104	JMP	racecalladdr<>(SB)
   105
   106// func runtime·RaceReadRange(addr, size uintptr)
   107TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   108	// This needs to be a tail call, because racereadrange reads caller pc.
   109	JMP	runtime·racereadrange(SB)
   110
   111// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   112TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   113	MOVD	addr+0(FP), R1
   114	MOVD	size+8(FP), R2
   115	MOVD	pc+16(FP), R3
   116	ADD	$4, R3	// pc is function start, tsan wants return address.
   117	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   118	MOVD	$__tsan_read_range(SB), R9
   119	JMP	racecalladdr<>(SB)
   120
   121// func runtime·racewriterange(addr, size uintptr)
   122// Called from instrumented code.
   123// Defined as ABIInternal so as to avoid introducing a wrapper,
   124// which would make caller's PC ineffective.
   125TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   126	MOVD	R1, R2	// size
   127	MOVD	R0, R1	// addr
   128	MOVD	LR, R3
   129	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   130	MOVD	$__tsan_write_range(SB), R9
   131	JMP	racecalladdr<>(SB)
   132
   133// func runtime·RaceWriteRange(addr, size uintptr)
   134TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   135	// This needs to be a tail call, because racewriterange reads caller pc.
   136	JMP	runtime·racewriterange(SB)
   137
   138// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   139TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   140	MOVD	addr+0(FP), R1
   141	MOVD	size+8(FP), R2
   142	MOVD	pc+16(FP), R3
   143	ADD	$4, R3	// pc is function start, tsan wants return address.
   144	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   145	MOVD	$__tsan_write_range(SB), R9
   146	JMP	racecalladdr<>(SB)
   147
   148// If addr (R1) is out of range, do nothing.
   149// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   150TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   151	load_g
   152	MOVD	g_racectx(g), R0
   153	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   154	MOVD	runtime·racearenastart(SB), R10
   155	CMP	R10, R1
   156	BLT	data
   157	MOVD	runtime·racearenaend(SB), R10
   158	CMP	R10, R1
   159	BLT	call
   160data:
   161	MOVD	runtime·racedatastart(SB), R10
   162	CMP	R10, R1
   163	BLT	ret
   164	MOVD	runtime·racedataend(SB), R10
   165	CMP	R10, R1
   166	BGT	ret
   167call:
   168	JMP	racecall<>(SB)
   169ret:
   170	RET
   171
   172// func runtime·racefuncenter(pc uintptr)
   173// Called from instrumented code.
   174TEXT	runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
   175	MOVD	R0, R9	// callpc
   176	JMP	racefuncenter<>(SB)
   177
   178// Common code for racefuncenter
   179// R9 = caller's return address
   180TEXT	racefuncenter<>(SB), NOSPLIT, $0-0
   181	load_g
   182	MOVD	g_racectx(g), R0	// goroutine racectx
   183	MOVD	R9, R1
   184	// void __tsan_func_enter(ThreadState *thr, void *pc);
   185	MOVD	$__tsan_func_enter(SB), R9
   186	BL	racecall<>(SB)
   187	RET
   188
   189// func runtime·racefuncexit()
   190// Called from instrumented code.
   191TEXT	runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
   192	load_g
   193	MOVD	g_racectx(g), R0	// race context
   194	// void __tsan_func_exit(ThreadState *thr);
   195	MOVD	$__tsan_func_exit(SB), R9
   196	JMP	racecall<>(SB)
   197
   198// Atomic operations for sync/atomic package.
   199// R3 = addr of arguments passed to this function, it can
   200// be fetched at 40(RSP) in racecallatomic after two times BL
   201// R0, R1, R2 set in racecallatomic
   202
   203// Load
   204TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
   205	GO_ARGS
   206	MOVD	$__tsan_go_atomic32_load(SB), R9
   207	BL	racecallatomic<>(SB)
   208	RET
   209
   210TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
   211	GO_ARGS
   212	MOVD	$__tsan_go_atomic64_load(SB), R9
   213	BL	racecallatomic<>(SB)
   214	RET
   215
   216TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   217	GO_ARGS
   218	JMP	sync∕atomic·LoadInt32(SB)
   219
   220TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   221	GO_ARGS
   222	JMP	sync∕atomic·LoadInt64(SB)
   223
   224TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   225	GO_ARGS
   226	JMP	sync∕atomic·LoadInt64(SB)
   227
   228TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   229	GO_ARGS
   230	JMP	sync∕atomic·LoadInt64(SB)
   231
   232// Store
   233TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
   234	GO_ARGS
   235	MOVD	$__tsan_go_atomic32_store(SB), R9
   236	BL	racecallatomic<>(SB)
   237	RET
   238
   239TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
   240	GO_ARGS
   241	MOVD	$__tsan_go_atomic64_store(SB), R9
   242	BL	racecallatomic<>(SB)
   243	RET
   244
   245TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   246	GO_ARGS
   247	JMP	sync∕atomic·StoreInt32(SB)
   248
   249TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   250	GO_ARGS
   251	JMP	sync∕atomic·StoreInt64(SB)
   252
   253TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   254	GO_ARGS
   255	JMP	sync∕atomic·StoreInt64(SB)
   256
   257// Swap
   258TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
   259	GO_ARGS
   260	MOVD	$__tsan_go_atomic32_exchange(SB), R9
   261	BL	racecallatomic<>(SB)
   262	RET
   263
   264TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
   265	GO_ARGS
   266	MOVD	$__tsan_go_atomic64_exchange(SB), R9
   267	BL	racecallatomic<>(SB)
   268	RET
   269
   270TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   271	GO_ARGS
   272	JMP	sync∕atomic·SwapInt32(SB)
   273
   274TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   275	GO_ARGS
   276	JMP	sync∕atomic·SwapInt64(SB)
   277
   278TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   279	GO_ARGS
   280	JMP	sync∕atomic·SwapInt64(SB)
   281
   282// Add
   283TEXT	sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
   284	GO_ARGS
   285	MOVD	$__tsan_go_atomic32_fetch_add(SB), R9
   286	BL	racecallatomic<>(SB)
   287	MOVW	add+8(FP), R0	// convert fetch_add to add_fetch
   288	MOVW	ret+16(FP), R1
   289	ADD	R0, R1, R0
   290	MOVW	R0, ret+16(FP)
   291	RET
   292
   293TEXT	sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
   294	GO_ARGS
   295	MOVD	$__tsan_go_atomic64_fetch_add(SB), R9
   296	BL	racecallatomic<>(SB)
   297	MOVD	add+8(FP), R0	// convert fetch_add to add_fetch
   298	MOVD	ret+16(FP), R1
   299	ADD	R0, R1, R0
   300	MOVD	R0, ret+16(FP)
   301	RET
   302
   303TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   304	GO_ARGS
   305	JMP	sync∕atomic·AddInt32(SB)
   306
   307TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   308	GO_ARGS
   309	JMP	sync∕atomic·AddInt64(SB)
   310
   311TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   312	GO_ARGS
   313	JMP	sync∕atomic·AddInt64(SB)
   314
   315// CompareAndSwap
   316TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
   317	GO_ARGS
   318	MOVD	$__tsan_go_atomic32_compare_exchange(SB), R9
   319	BL	racecallatomic<>(SB)
   320	RET
   321
   322TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
   323	GO_ARGS
   324	MOVD	$__tsan_go_atomic64_compare_exchange(SB), R9
   325	BL	racecallatomic<>(SB)
   326	RET
   327
   328TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   329	GO_ARGS
   330	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   331
   332TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   333	GO_ARGS
   334	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   335
   336TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   337	GO_ARGS
   338	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   339
   340// Generic atomic operation implementation.
   341// R9 = addr of target function
   342TEXT	racecallatomic<>(SB), NOSPLIT, $0
   343	// Set up these registers
   344	// R0 = *ThreadState
   345	// R1 = caller pc
   346	// R2 = pc
   347	// R3 = addr of incoming arg list
   348
   349	// Trigger SIGSEGV early.
   350	MOVD	40(RSP), R3	// 1st arg is addr. after two times BL, get it at 40(RSP)
   351	MOVB	(R3), R13	// segv here if addr is bad
   352	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   353	MOVD	runtime·racearenastart(SB), R10
   354	CMP	R10, R3
   355	BLT	racecallatomic_data
   356	MOVD	runtime·racearenaend(SB), R10
   357	CMP	R10, R3
   358	BLT	racecallatomic_ok
   359racecallatomic_data:
   360	MOVD	runtime·racedatastart(SB), R10
   361	CMP	R10, R3
   362	BLT	racecallatomic_ignore
   363	MOVD	runtime·racedataend(SB), R10
   364	CMP	R10, R3
   365	BGE	racecallatomic_ignore
   366racecallatomic_ok:
   367	// Addr is within the good range, call the atomic function.
   368	load_g
   369	MOVD	g_racectx(g), R0	// goroutine context
   370	MOVD	16(RSP), R1	// caller pc
   371	MOVD	R9, R2	// pc
   372	ADD	$40, RSP, R3
   373	JMP	racecall<>(SB)	// does not return
   374racecallatomic_ignore:
   375	// Addr is outside the good range.
   376	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   377	// An attempt to synchronize on the address would cause crash.
   378	MOVD	R9, R21	// remember the original function
   379	MOVD	$__tsan_go_ignore_sync_begin(SB), R9
   380	load_g
   381	MOVD	g_racectx(g), R0	// goroutine context
   382	BL	racecall<>(SB)
   383	MOVD	R21, R9	// restore the original function
   384	// Call the atomic function.
   385	// racecall will call LLVM race code which might clobber R28 (g)
   386	load_g
   387	MOVD	g_racectx(g), R0	// goroutine context
   388	MOVD	16(RSP), R1	// caller pc
   389	MOVD	R9, R2	// pc
   390	ADD	$40, RSP, R3	// arguments
   391	BL	racecall<>(SB)
   392	// Call __tsan_go_ignore_sync_end.
   393	MOVD	$__tsan_go_ignore_sync_end(SB), R9
   394	MOVD	g_racectx(g), R0	// goroutine context
   395	BL	racecall<>(SB)
   396	RET
   397
   398// func runtime·racecall(void(*f)(...), ...)
   399// Calls C function f from race runtime and passes up to 4 arguments to it.
   400// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   401TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   402	MOVD	fn+0(FP), R9
   403	MOVD	arg0+8(FP), R0
   404	MOVD	arg1+16(FP), R1
   405	MOVD	arg2+24(FP), R2
   406	MOVD	arg3+32(FP), R3
   407	JMP	racecall<>(SB)
   408
   409// Switches SP to g0 stack and calls (R9). Arguments already set.
   410// Clobbers R19, R20.
   411TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
   412	MOVD	g_m(g), R10
   413	// Switch to g0 stack.
   414	MOVD	RSP, R19	// callee-saved, preserved across the CALL
   415	MOVD	R30, R20	// callee-saved, preserved across the CALL
   416	MOVD	m_g0(R10), R11
   417	CMP	R11, g
   418	BEQ	call	// already on g0
   419	MOVD	(g_sched+gobuf_sp)(R11), R12
   420	MOVD	R12, RSP
   421call:
   422	BL	R9
   423	MOVD	R19, RSP
   424	JMP	(R20)
   425
   426// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   427// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   428// The overall effect of Go->C->Go call chain is similar to that of mcall.
   429// R0 contains command code. R1 contains command-specific context.
   430// See racecallback for command codes.
   431TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
   432	// Handle command raceGetProcCmd (0) here.
   433	// First, code below assumes that we are on curg, while raceGetProcCmd
   434	// can be executed on g0. Second, it is called frequently, so will
   435	// benefit from this fast path.
   436	CBNZ	R0, rest
   437	MOVD	g, R13
   438#ifdef TLS_darwin
   439	MOVD	R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
   440#endif
   441	load_g
   442#ifdef TLS_darwin
   443	MOVD	R12, R27
   444#endif
   445	MOVD	g_m(g), R0
   446	MOVD	m_p(R0), R0
   447	MOVD	p_raceprocctx(R0), R0
   448	MOVD	R0, (R1)
   449	MOVD	R13, g
   450	JMP	(LR)
   451rest:
   452	// Save callee-saved registers (Go code won't respect that).
   453	// 8(RSP) and 16(RSP) are for args passed through racecallback
   454	SUB	$176, RSP
   455	MOVD	LR, 0(RSP)
   456
   457	SAVE_R19_TO_R28(8*3)
   458	SAVE_F8_TO_F15(8*13)
   459	MOVD	R29, (8*21)(RSP)
   460	// Set g = g0.
   461	// load_g will clobber R0, Save R0
   462	MOVD	R0, R13
   463	load_g
   464	// restore R0
   465	MOVD	R13, R0
   466	MOVD	g_m(g), R13
   467	MOVD	m_g0(R13), R14
   468	CMP	R14, g
   469	BEQ	noswitch	// branch if already on g0
   470	MOVD	R14, g
   471
   472	MOVD	R0, 8(RSP)	// func arg
   473	MOVD	R1, 16(RSP)	// func arg
   474	BL	runtime·racecallback(SB)
   475
   476	// All registers are smashed after Go code, reload.
   477	MOVD	g_m(g), R13
   478	MOVD	m_curg(R13), g	// g = m->curg
   479ret:
   480	// Restore callee-saved registers.
   481	MOVD	0(RSP), LR
   482	MOVD	(8*21)(RSP), R29
   483	RESTORE_F8_TO_F15(8*13)
   484	RESTORE_R19_TO_R28(8*3)
   485	ADD	$176, RSP
   486	JMP	(LR)
   487
   488noswitch:
   489	// already on g0
   490	MOVD	R0, 8(RSP)	// func arg
   491	MOVD	R1, 16(RSP)	// func arg
   492	BL	runtime·racecallback(SB)
   493	JMP	ret
   494
   495#ifndef TLSG_IS_VARIABLE
   496// tls_g, g value for each thread in TLS
   497GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
   498#endif
View as plain text