Text file
src/runtime/asm_arm64.s
Documentation: runtime
1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "go_tls.h"
7#include "tls_arm64.h"
8#include "funcdata.h"
9#include "textflag.h"
10
11TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
12 // SP = stack; R0 = argc; R1 = argv
13
14 SUB $32, RSP
15 MOVW R0, 8(RSP) // argc
16 MOVD R1, 16(RSP) // argv
17
18#ifdef TLS_darwin
19 // Initialize TLS.
20 MOVD ZR, g // clear g, make sure it's not junk.
21 SUB $32, RSP
22 MRS_TPIDR_R0
23 AND $~7, R0
24 MOVD R0, 16(RSP) // arg2: TLS base
25 MOVD $runtime·tls_g(SB), R2
26 MOVD R2, 8(RSP) // arg1: &tlsg
27 BL ·tlsinit(SB)
28 ADD $32, RSP
29#endif
30
31 // create istack out of the given (operating system) stack.
32 // _cgo_init may update stackguard.
33 MOVD $runtime·g0(SB), g
34 MOVD RSP, R7
35 MOVD $(-64*1024)(R7), R0
36 MOVD R0, g_stackguard0(g)
37 MOVD R0, g_stackguard1(g)
38 MOVD R0, (g_stack+stack_lo)(g)
39 MOVD R7, (g_stack+stack_hi)(g)
40
41 // if there is a _cgo_init, call it using the gcc ABI.
42 MOVD _cgo_init(SB), R12
43 CBZ R12, nocgo
44
45#ifdef GOOS_android
46 MRS_TPIDR_R0 // load TLS base pointer
47 MOVD R0, R3 // arg 3: TLS base pointer
48 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g
49#else
50 MOVD $0, R2 // arg 2: not used when using platform's TLS
51#endif
52 MOVD $setg_gcc<>(SB), R1 // arg 1: setg
53 MOVD g, R0 // arg 0: G
54 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
55 BL (R12)
56 ADD $16, RSP
57
58nocgo:
59 BL runtime·save_g(SB)
60 // update stackguard after _cgo_init
61 MOVD (g_stack+stack_lo)(g), R0
62 ADD $const_stackGuard, R0
63 MOVD R0, g_stackguard0(g)
64 MOVD R0, g_stackguard1(g)
65
66 // set the per-goroutine and per-mach "registers"
67 MOVD $runtime·m0(SB), R0
68
69 // save m->g0 = g0
70 MOVD g, m_g0(R0)
71 // save m0 to g0->m
72 MOVD R0, g_m(g)
73
74 BL runtime·check(SB)
75
76#ifdef GOOS_windows
77 BL runtime·wintls(SB)
78#endif
79
80 MOVW 8(RSP), R0 // copy argc
81 MOVW R0, -8(RSP)
82 MOVD 16(RSP), R0 // copy argv
83 MOVD R0, 0(RSP)
84 BL runtime·args(SB)
85 BL runtime·osinit(SB)
86 BL runtime·schedinit(SB)
87
88 // create a new goroutine to start program
89 MOVD $runtime·mainPC(SB), R0 // entry
90 SUB $16, RSP
91 MOVD R0, 8(RSP) // arg
92 MOVD $0, 0(RSP) // dummy LR
93 BL runtime·newproc(SB)
94 ADD $16, RSP
95
96 // start this M
97 BL runtime·mstart(SB)
98
99 // Prevent dead-code elimination of debugCallV2, which is
100 // intended to be called by debuggers.
101 MOVD $runtime·debugCallV2<ABIInternal>(SB), R0
102
103 MOVD $0, R0
104 MOVD R0, (R0) // boom
105 UNDEF
106
107DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
108GLOBL runtime·mainPC(SB),RODATA,$8
109
110// Windows ARM64 needs an immediate 0xf000 argument.
111// See go.dev/issues/53837.
112#define BREAK \
113#ifdef GOOS_windows \
114 BRK $0xf000 \
115#else \
116 BRK \
117#endif \
118
119
120TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
121 BREAK
122 RET
123
124TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
125 RET
126
127TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
128 BL runtime·mstart0(SB)
129 RET // not reached
130
131/*
132 * go-routine
133 */
134
135// void gogo(Gobuf*)
136// restore state from Gobuf; longjmp
137TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
138 MOVD buf+0(FP), R5
139 MOVD gobuf_g(R5), R6
140 MOVD 0(R6), R4 // make sure g != nil
141 B gogo<>(SB)
142
143TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
144 MOVD R6, g
145 BL runtime·save_g(SB)
146
147 MOVD gobuf_sp(R5), R0
148 MOVD R0, RSP
149 MOVD gobuf_bp(R5), R29
150 MOVD gobuf_lr(R5), LR
151 MOVD gobuf_ret(R5), R0
152 MOVD gobuf_ctxt(R5), R26
153 MOVD $0, gobuf_sp(R5)
154 MOVD $0, gobuf_bp(R5)
155 MOVD $0, gobuf_ret(R5)
156 MOVD $0, gobuf_lr(R5)
157 MOVD $0, gobuf_ctxt(R5)
158 CMP ZR, ZR // set condition codes for == test, needed by stack split
159 MOVD gobuf_pc(R5), R6
160 B (R6)
161
162// void mcall(fn func(*g))
163// Switch to m->g0's stack, call fn(g).
164// Fn must never return. It should gogo(&g->sched)
165// to keep running g.
166TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
167 MOVD R0, R26 // context
168
169 // Save caller state in g->sched
170 MOVD RSP, R0
171 MOVD R0, (g_sched+gobuf_sp)(g)
172 MOVD R29, (g_sched+gobuf_bp)(g)
173 MOVD LR, (g_sched+gobuf_pc)(g)
174 MOVD $0, (g_sched+gobuf_lr)(g)
175
176 // Switch to m->g0 & its stack, call fn.
177 MOVD g, R3
178 MOVD g_m(g), R8
179 MOVD m_g0(R8), g
180 BL runtime·save_g(SB)
181 CMP g, R3
182 BNE 2(PC)
183 B runtime·badmcall(SB)
184
185 MOVD (g_sched+gobuf_sp)(g), R0
186 MOVD R0, RSP // sp = m->g0->sched.sp
187 MOVD (g_sched+gobuf_bp)(g), R29
188 MOVD R3, R0 // arg = g
189 MOVD $0, -16(RSP) // dummy LR
190 SUB $16, RSP
191 MOVD 0(R26), R4 // code pointer
192 BL (R4)
193 B runtime·badmcall2(SB)
194
195// systemstack_switch is a dummy routine that systemstack leaves at the bottom
196// of the G stack. We need to distinguish the routine that
197// lives at the bottom of the G stack from the one that lives
198// at the top of the system stack because the one at the top of
199// the system stack terminates the stack walk (see topofstack()).
200TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
201 UNDEF
202 BL (LR) // make sure this function is not leaf
203 RET
204
205// func systemstack(fn func())
206TEXT runtime·systemstack(SB), NOSPLIT, $0-8
207 MOVD fn+0(FP), R3 // R3 = fn
208 MOVD R3, R26 // context
209 MOVD g_m(g), R4 // R4 = m
210
211 MOVD m_gsignal(R4), R5 // R5 = gsignal
212 CMP g, R5
213 BEQ noswitch
214
215 MOVD m_g0(R4), R5 // R5 = g0
216 CMP g, R5
217 BEQ noswitch
218
219 MOVD m_curg(R4), R6
220 CMP g, R6
221 BEQ switch
222
223 // Bad: g is not gsignal, not g0, not curg. What is it?
224 // Hide call from linker nosplit analysis.
225 MOVD $runtime·badsystemstack(SB), R3
226 BL (R3)
227 B runtime·abort(SB)
228
229switch:
230 // save our state in g->sched. Pretend to
231 // be systemstack_switch if the G stack is scanned.
232 BL gosave_systemstack_switch<>(SB)
233
234 // switch to g0
235 MOVD R5, g
236 BL runtime·save_g(SB)
237 MOVD (g_sched+gobuf_sp)(g), R3
238 MOVD R3, RSP
239 MOVD (g_sched+gobuf_bp)(g), R29
240
241 // call target function
242 MOVD 0(R26), R3 // code pointer
243 BL (R3)
244
245 // switch back to g
246 MOVD g_m(g), R3
247 MOVD m_curg(R3), g
248 BL runtime·save_g(SB)
249 MOVD (g_sched+gobuf_sp)(g), R0
250 MOVD R0, RSP
251 MOVD (g_sched+gobuf_bp)(g), R29
252 MOVD $0, (g_sched+gobuf_sp)(g)
253 MOVD $0, (g_sched+gobuf_bp)(g)
254 RET
255
256noswitch:
257 // already on m stack, just call directly
258 // Using a tail call here cleans up tracebacks since we won't stop
259 // at an intermediate systemstack.
260 MOVD 0(R26), R3 // code pointer
261 MOVD.P 16(RSP), R30 // restore LR
262 SUB $8, RSP, R29 // restore FP
263 B (R3)
264
265// func switchToCrashStack0(fn func())
266TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
267 MOVD R0, R26 // context register
268 MOVD g_m(g), R1 // curm
269
270 // set g to gcrash
271 MOVD $runtime·gcrash(SB), g // g = &gcrash
272 BL runtime·save_g(SB) // clobbers R0
273 MOVD R1, g_m(g) // g.m = curm
274 MOVD g, m_g0(R1) // curm.g0 = g
275
276 // switch to crashstack
277 MOVD (g_stack+stack_hi)(g), R1
278 SUB $(4*8), R1
279 MOVD R1, RSP
280
281 // call target function
282 MOVD 0(R26), R0
283 CALL (R0)
284
285 // should never return
286 CALL runtime·abort(SB)
287 UNDEF
288
289/*
290 * support for morestack
291 */
292
293// Called during function prolog when more stack is needed.
294// Caller has already loaded:
295// R3 prolog's LR (R30)
296//
297// The traceback routines see morestack on a g0 as being
298// the top of a stack (for example, morestack calling newstack
299// calling the scheduler calling newm calling gc), so we must
300// record an argument size. For that purpose, it has no arguments.
301TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
302 // Cannot grow scheduler stack (m->g0).
303 MOVD g_m(g), R8
304 MOVD m_g0(R8), R4
305
306 // Called from f.
307 // Set g->sched to context in f
308 MOVD RSP, R0
309 MOVD R0, (g_sched+gobuf_sp)(g)
310 MOVD R29, (g_sched+gobuf_bp)(g)
311 MOVD LR, (g_sched+gobuf_pc)(g)
312 MOVD R3, (g_sched+gobuf_lr)(g)
313 MOVD R26, (g_sched+gobuf_ctxt)(g)
314
315 CMP g, R4
316 BNE 3(PC)
317 BL runtime·badmorestackg0(SB)
318 B runtime·abort(SB)
319
320 // Cannot grow signal stack (m->gsignal).
321 MOVD m_gsignal(R8), R4
322 CMP g, R4
323 BNE 3(PC)
324 BL runtime·badmorestackgsignal(SB)
325 B runtime·abort(SB)
326
327 // Called from f.
328 // Set m->morebuf to f's callers.
329 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
330 MOVD RSP, R0
331 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP
332 MOVD g, (m_morebuf+gobuf_g)(R8)
333
334 // Call newstack on m->g0's stack.
335 MOVD m_g0(R8), g
336 BL runtime·save_g(SB)
337 MOVD (g_sched+gobuf_sp)(g), R0
338 MOVD R0, RSP
339 MOVD (g_sched+gobuf_bp)(g), R29
340 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
341 BL runtime·newstack(SB)
342
343 // Not reached, but make sure the return PC from the call to newstack
344 // is still in this function, and not the beginning of the next.
345 UNDEF
346
347TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
348 // Force SPWRITE. This function doesn't actually write SP,
349 // but it is called with a special calling convention where
350 // the caller doesn't save LR on stack but passes it as a
351 // register (R3), and the unwinder currently doesn't understand.
352 // Make it SPWRITE to stop unwinding. (See issue 54332)
353 MOVD RSP, RSP
354
355 MOVW $0, R26
356 B runtime·morestack(SB)
357
358// spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
359TEXT ·spillArgs(SB),NOSPLIT,$0-0
360 STP (R0, R1), (0*8)(R20)
361 STP (R2, R3), (2*8)(R20)
362 STP (R4, R5), (4*8)(R20)
363 STP (R6, R7), (6*8)(R20)
364 STP (R8, R9), (8*8)(R20)
365 STP (R10, R11), (10*8)(R20)
366 STP (R12, R13), (12*8)(R20)
367 STP (R14, R15), (14*8)(R20)
368 FSTPD (F0, F1), (16*8)(R20)
369 FSTPD (F2, F3), (18*8)(R20)
370 FSTPD (F4, F5), (20*8)(R20)
371 FSTPD (F6, F7), (22*8)(R20)
372 FSTPD (F8, F9), (24*8)(R20)
373 FSTPD (F10, F11), (26*8)(R20)
374 FSTPD (F12, F13), (28*8)(R20)
375 FSTPD (F14, F15), (30*8)(R20)
376 RET
377
378// unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
379TEXT ·unspillArgs(SB),NOSPLIT,$0-0
380 LDP (0*8)(R20), (R0, R1)
381 LDP (2*8)(R20), (R2, R3)
382 LDP (4*8)(R20), (R4, R5)
383 LDP (6*8)(R20), (R6, R7)
384 LDP (8*8)(R20), (R8, R9)
385 LDP (10*8)(R20), (R10, R11)
386 LDP (12*8)(R20), (R12, R13)
387 LDP (14*8)(R20), (R14, R15)
388 FLDPD (16*8)(R20), (F0, F1)
389 FLDPD (18*8)(R20), (F2, F3)
390 FLDPD (20*8)(R20), (F4, F5)
391 FLDPD (22*8)(R20), (F6, F7)
392 FLDPD (24*8)(R20), (F8, F9)
393 FLDPD (26*8)(R20), (F10, F11)
394 FLDPD (28*8)(R20), (F12, F13)
395 FLDPD (30*8)(R20), (F14, F15)
396 RET
397
398// reflectcall: call a function with the given argument list
399// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
400// we don't have variable-sized frames, so we use a small number
401// of constant-sized-frame functions to encode a few bits of size in the pc.
402// Caution: ugly multiline assembly macros in your future!
403
404#define DISPATCH(NAME,MAXSIZE) \
405 MOVD $MAXSIZE, R27; \
406 CMP R27, R16; \
407 BGT 3(PC); \
408 MOVD $NAME(SB), R27; \
409 B (R27)
410// Note: can't just "B NAME(SB)" - bad inlining results.
411
412TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
413 MOVWU frameSize+32(FP), R16
414 DISPATCH(runtime·call16, 16)
415 DISPATCH(runtime·call32, 32)
416 DISPATCH(runtime·call64, 64)
417 DISPATCH(runtime·call128, 128)
418 DISPATCH(runtime·call256, 256)
419 DISPATCH(runtime·call512, 512)
420 DISPATCH(runtime·call1024, 1024)
421 DISPATCH(runtime·call2048, 2048)
422 DISPATCH(runtime·call4096, 4096)
423 DISPATCH(runtime·call8192, 8192)
424 DISPATCH(runtime·call16384, 16384)
425 DISPATCH(runtime·call32768, 32768)
426 DISPATCH(runtime·call65536, 65536)
427 DISPATCH(runtime·call131072, 131072)
428 DISPATCH(runtime·call262144, 262144)
429 DISPATCH(runtime·call524288, 524288)
430 DISPATCH(runtime·call1048576, 1048576)
431 DISPATCH(runtime·call2097152, 2097152)
432 DISPATCH(runtime·call4194304, 4194304)
433 DISPATCH(runtime·call8388608, 8388608)
434 DISPATCH(runtime·call16777216, 16777216)
435 DISPATCH(runtime·call33554432, 33554432)
436 DISPATCH(runtime·call67108864, 67108864)
437 DISPATCH(runtime·call134217728, 134217728)
438 DISPATCH(runtime·call268435456, 268435456)
439 DISPATCH(runtime·call536870912, 536870912)
440 DISPATCH(runtime·call1073741824, 1073741824)
441 MOVD $runtime·badreflectcall(SB), R0
442 B (R0)
443
444#define CALLFN(NAME,MAXSIZE) \
445TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
446 NO_LOCAL_POINTERS; \
447 /* copy arguments to stack */ \
448 MOVD stackArgs+16(FP), R3; \
449 MOVWU stackArgsSize+24(FP), R4; \
450 ADD $8, RSP, R5; \
451 BIC $0xf, R4, R6; \
452 CBZ R6, 6(PC); \
453 /* if R6=(argsize&~15) != 0 */ \
454 ADD R6, R5, R6; \
455 /* copy 16 bytes a time */ \
456 LDP.P 16(R3), (R7, R8); \
457 STP.P (R7, R8), 16(R5); \
458 CMP R5, R6; \
459 BNE -3(PC); \
460 AND $0xf, R4, R6; \
461 CBZ R6, 6(PC); \
462 /* if R6=(argsize&15) != 0 */ \
463 ADD R6, R5, R6; \
464 /* copy 1 byte a time for the rest */ \
465 MOVBU.P 1(R3), R7; \
466 MOVBU.P R7, 1(R5); \
467 CMP R5, R6; \
468 BNE -3(PC); \
469 /* set up argument registers */ \
470 MOVD regArgs+40(FP), R20; \
471 CALL ·unspillArgs(SB); \
472 /* call function */ \
473 MOVD f+8(FP), R26; \
474 MOVD (R26), R20; \
475 PCDATA $PCDATA_StackMapIndex, $0; \
476 BL (R20); \
477 /* copy return values back */ \
478 MOVD regArgs+40(FP), R20; \
479 CALL ·spillArgs(SB); \
480 MOVD stackArgsType+0(FP), R7; \
481 MOVD stackArgs+16(FP), R3; \
482 MOVWU stackArgsSize+24(FP), R4; \
483 MOVWU stackRetOffset+28(FP), R6; \
484 ADD $8, RSP, R5; \
485 ADD R6, R5; \
486 ADD R6, R3; \
487 SUB R6, R4; \
488 BL callRet<>(SB); \
489 RET
490
491// callRet copies return values back at the end of call*. This is a
492// separate function so it can allocate stack space for the arguments
493// to reflectcallmove. It does not follow the Go ABI; it expects its
494// arguments in registers.
495TEXT callRet<>(SB), NOSPLIT, $48-0
496 NO_LOCAL_POINTERS
497 STP (R7, R3), 8(RSP)
498 STP (R5, R4), 24(RSP)
499 MOVD R20, 40(RSP)
500 BL runtime·reflectcallmove(SB)
501 RET
502
503CALLFN(·call16, 16)
504CALLFN(·call32, 32)
505CALLFN(·call64, 64)
506CALLFN(·call128, 128)
507CALLFN(·call256, 256)
508CALLFN(·call512, 512)
509CALLFN(·call1024, 1024)
510CALLFN(·call2048, 2048)
511CALLFN(·call4096, 4096)
512CALLFN(·call8192, 8192)
513CALLFN(·call16384, 16384)
514CALLFN(·call32768, 32768)
515CALLFN(·call65536, 65536)
516CALLFN(·call131072, 131072)
517CALLFN(·call262144, 262144)
518CALLFN(·call524288, 524288)
519CALLFN(·call1048576, 1048576)
520CALLFN(·call2097152, 2097152)
521CALLFN(·call4194304, 4194304)
522CALLFN(·call8388608, 8388608)
523CALLFN(·call16777216, 16777216)
524CALLFN(·call33554432, 33554432)
525CALLFN(·call67108864, 67108864)
526CALLFN(·call134217728, 134217728)
527CALLFN(·call268435456, 268435456)
528CALLFN(·call536870912, 536870912)
529CALLFN(·call1073741824, 1073741824)
530
531// func memhash32(p unsafe.Pointer, h uintptr) uintptr
532TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
533 MOVB runtime·useAeshash(SB), R10
534 CBZ R10, noaes
535 MOVD $runtime·aeskeysched+0(SB), R3
536
537 VEOR V0.B16, V0.B16, V0.B16
538 VLD1 (R3), [V2.B16]
539 VLD1 (R0), V0.S[1]
540 VMOV R1, V0.S[0]
541
542 AESE V2.B16, V0.B16
543 AESMC V0.B16, V0.B16
544 AESE V2.B16, V0.B16
545 AESMC V0.B16, V0.B16
546 AESE V2.B16, V0.B16
547
548 VMOV V0.D[0], R0
549 RET
550noaes:
551 B runtime·memhash32Fallback<ABIInternal>(SB)
552
553// func memhash64(p unsafe.Pointer, h uintptr) uintptr
554TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
555 MOVB runtime·useAeshash(SB), R10
556 CBZ R10, noaes
557 MOVD $runtime·aeskeysched+0(SB), R3
558
559 VEOR V0.B16, V0.B16, V0.B16
560 VLD1 (R3), [V2.B16]
561 VLD1 (R0), V0.D[1]
562 VMOV R1, V0.D[0]
563
564 AESE V2.B16, V0.B16
565 AESMC V0.B16, V0.B16
566 AESE V2.B16, V0.B16
567 AESMC V0.B16, V0.B16
568 AESE V2.B16, V0.B16
569
570 VMOV V0.D[0], R0
571 RET
572noaes:
573 B runtime·memhash64Fallback<ABIInternal>(SB)
574
575// func memhash(p unsafe.Pointer, h, size uintptr) uintptr
576TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
577 MOVB runtime·useAeshash(SB), R10
578 CBZ R10, noaes
579 B aeshashbody<>(SB)
580noaes:
581 B runtime·memhashFallback<ABIInternal>(SB)
582
583// func strhash(p unsafe.Pointer, h uintptr) uintptr
584TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
585 MOVB runtime·useAeshash(SB), R10
586 CBZ R10, noaes
587 LDP (R0), (R0, R2) // string data / length
588 B aeshashbody<>(SB)
589noaes:
590 B runtime·strhashFallback<ABIInternal>(SB)
591
592// R0: data
593// R1: seed data
594// R2: length
595// At return, R0 = return value
596TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
597 VEOR V30.B16, V30.B16, V30.B16
598 VMOV R1, V30.D[0]
599 VMOV R2, V30.D[1] // load length into seed
600
601 MOVD $runtime·aeskeysched+0(SB), R4
602 VLD1.P 16(R4), [V0.B16]
603 AESE V30.B16, V0.B16
604 AESMC V0.B16, V0.B16
605 CMP $16, R2
606 BLO aes0to15
607 BEQ aes16
608 CMP $32, R2
609 BLS aes17to32
610 CMP $64, R2
611 BLS aes33to64
612 CMP $128, R2
613 BLS aes65to128
614 B aes129plus
615
616aes0to15:
617 CBZ R2, aes0
618 VEOR V2.B16, V2.B16, V2.B16
619 TBZ $3, R2, less_than_8
620 VLD1.P 8(R0), V2.D[0]
621
622less_than_8:
623 TBZ $2, R2, less_than_4
624 VLD1.P 4(R0), V2.S[2]
625
626less_than_4:
627 TBZ $1, R2, less_than_2
628 VLD1.P 2(R0), V2.H[6]
629
630less_than_2:
631 TBZ $0, R2, done
632 VLD1 (R0), V2.B[14]
633done:
634 AESE V0.B16, V2.B16
635 AESMC V2.B16, V2.B16
636 AESE V0.B16, V2.B16
637 AESMC V2.B16, V2.B16
638 AESE V0.B16, V2.B16
639 AESMC V2.B16, V2.B16
640
641 VMOV V2.D[0], R0
642 RET
643
644aes0:
645 VMOV V0.D[0], R0
646 RET
647
648aes16:
649 VLD1 (R0), [V2.B16]
650 B done
651
652aes17to32:
653 // make second seed
654 VLD1 (R4), [V1.B16]
655 AESE V30.B16, V1.B16
656 AESMC V1.B16, V1.B16
657 SUB $16, R2, R10
658 VLD1.P (R0)(R10), [V2.B16]
659 VLD1 (R0), [V3.B16]
660
661 AESE V0.B16, V2.B16
662 AESMC V2.B16, V2.B16
663 AESE V1.B16, V3.B16
664 AESMC V3.B16, V3.B16
665
666 AESE V0.B16, V2.B16
667 AESMC V2.B16, V2.B16
668 AESE V1.B16, V3.B16
669 AESMC V3.B16, V3.B16
670
671 AESE V0.B16, V2.B16
672 AESE V1.B16, V3.B16
673
674 VEOR V3.B16, V2.B16, V2.B16
675
676 VMOV V2.D[0], R0
677 RET
678
679aes33to64:
680 VLD1 (R4), [V1.B16, V2.B16, V3.B16]
681 AESE V30.B16, V1.B16
682 AESMC V1.B16, V1.B16
683 AESE V30.B16, V2.B16
684 AESMC V2.B16, V2.B16
685 AESE V30.B16, V3.B16
686 AESMC V3.B16, V3.B16
687 SUB $32, R2, R10
688
689 VLD1.P (R0)(R10), [V4.B16, V5.B16]
690 VLD1 (R0), [V6.B16, V7.B16]
691
692 AESE V0.B16, V4.B16
693 AESMC V4.B16, V4.B16
694 AESE V1.B16, V5.B16
695 AESMC V5.B16, V5.B16
696 AESE V2.B16, V6.B16
697 AESMC V6.B16, V6.B16
698 AESE V3.B16, V7.B16
699 AESMC V7.B16, V7.B16
700
701 AESE V0.B16, V4.B16
702 AESMC V4.B16, V4.B16
703 AESE V1.B16, V5.B16
704 AESMC V5.B16, V5.B16
705 AESE V2.B16, V6.B16
706 AESMC V6.B16, V6.B16
707 AESE V3.B16, V7.B16
708 AESMC V7.B16, V7.B16
709
710 AESE V0.B16, V4.B16
711 AESE V1.B16, V5.B16
712 AESE V2.B16, V6.B16
713 AESE V3.B16, V7.B16
714
715 VEOR V6.B16, V4.B16, V4.B16
716 VEOR V7.B16, V5.B16, V5.B16
717 VEOR V5.B16, V4.B16, V4.B16
718
719 VMOV V4.D[0], R0
720 RET
721
722aes65to128:
723 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
724 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
725 AESE V30.B16, V1.B16
726 AESMC V1.B16, V1.B16
727 AESE V30.B16, V2.B16
728 AESMC V2.B16, V2.B16
729 AESE V30.B16, V3.B16
730 AESMC V3.B16, V3.B16
731 AESE V30.B16, V4.B16
732 AESMC V4.B16, V4.B16
733 AESE V30.B16, V5.B16
734 AESMC V5.B16, V5.B16
735 AESE V30.B16, V6.B16
736 AESMC V6.B16, V6.B16
737 AESE V30.B16, V7.B16
738 AESMC V7.B16, V7.B16
739
740 SUB $64, R2, R10
741 VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
742 VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
743 AESE V0.B16, V8.B16
744 AESMC V8.B16, V8.B16
745 AESE V1.B16, V9.B16
746 AESMC V9.B16, V9.B16
747 AESE V2.B16, V10.B16
748 AESMC V10.B16, V10.B16
749 AESE V3.B16, V11.B16
750 AESMC V11.B16, V11.B16
751 AESE V4.B16, V12.B16
752 AESMC V12.B16, V12.B16
753 AESE V5.B16, V13.B16
754 AESMC V13.B16, V13.B16
755 AESE V6.B16, V14.B16
756 AESMC V14.B16, V14.B16
757 AESE V7.B16, V15.B16
758 AESMC V15.B16, V15.B16
759
760 AESE V0.B16, V8.B16
761 AESMC V8.B16, V8.B16
762 AESE V1.B16, V9.B16
763 AESMC V9.B16, V9.B16
764 AESE V2.B16, V10.B16
765 AESMC V10.B16, V10.B16
766 AESE V3.B16, V11.B16
767 AESMC V11.B16, V11.B16
768 AESE V4.B16, V12.B16
769 AESMC V12.B16, V12.B16
770 AESE V5.B16, V13.B16
771 AESMC V13.B16, V13.B16
772 AESE V6.B16, V14.B16
773 AESMC V14.B16, V14.B16
774 AESE V7.B16, V15.B16
775 AESMC V15.B16, V15.B16
776
777 AESE V0.B16, V8.B16
778 AESE V1.B16, V9.B16
779 AESE V2.B16, V10.B16
780 AESE V3.B16, V11.B16
781 AESE V4.B16, V12.B16
782 AESE V5.B16, V13.B16
783 AESE V6.B16, V14.B16
784 AESE V7.B16, V15.B16
785
786 VEOR V12.B16, V8.B16, V8.B16
787 VEOR V13.B16, V9.B16, V9.B16
788 VEOR V14.B16, V10.B16, V10.B16
789 VEOR V15.B16, V11.B16, V11.B16
790 VEOR V10.B16, V8.B16, V8.B16
791 VEOR V11.B16, V9.B16, V9.B16
792 VEOR V9.B16, V8.B16, V8.B16
793
794 VMOV V8.D[0], R0
795 RET
796
797aes129plus:
798 PRFM (R0), PLDL1KEEP
799 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
800 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
801 AESE V30.B16, V1.B16
802 AESMC V1.B16, V1.B16
803 AESE V30.B16, V2.B16
804 AESMC V2.B16, V2.B16
805 AESE V30.B16, V3.B16
806 AESMC V3.B16, V3.B16
807 AESE V30.B16, V4.B16
808 AESMC V4.B16, V4.B16
809 AESE V30.B16, V5.B16
810 AESMC V5.B16, V5.B16
811 AESE V30.B16, V6.B16
812 AESMC V6.B16, V6.B16
813 AESE V30.B16, V7.B16
814 AESMC V7.B16, V7.B16
815 ADD R0, R2, R10
816 SUB $128, R10, R10
817 VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
818 VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
819 SUB $1, R2, R2
820 LSR $7, R2, R2
821
822aesloop:
823 AESE V8.B16, V0.B16
824 AESMC V0.B16, V0.B16
825 AESE V9.B16, V1.B16
826 AESMC V1.B16, V1.B16
827 AESE V10.B16, V2.B16
828 AESMC V2.B16, V2.B16
829 AESE V11.B16, V3.B16
830 AESMC V3.B16, V3.B16
831 AESE V12.B16, V4.B16
832 AESMC V4.B16, V4.B16
833 AESE V13.B16, V5.B16
834 AESMC V5.B16, V5.B16
835 AESE V14.B16, V6.B16
836 AESMC V6.B16, V6.B16
837 AESE V15.B16, V7.B16
838 AESMC V7.B16, V7.B16
839
840 VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
841 AESE V8.B16, V0.B16
842 AESMC V0.B16, V0.B16
843 AESE V9.B16, V1.B16
844 AESMC V1.B16, V1.B16
845 AESE V10.B16, V2.B16
846 AESMC V2.B16, V2.B16
847 AESE V11.B16, V3.B16
848 AESMC V3.B16, V3.B16
849
850 VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
851 AESE V12.B16, V4.B16
852 AESMC V4.B16, V4.B16
853 AESE V13.B16, V5.B16
854 AESMC V5.B16, V5.B16
855 AESE V14.B16, V6.B16
856 AESMC V6.B16, V6.B16
857 AESE V15.B16, V7.B16
858 AESMC V7.B16, V7.B16
859 SUB $1, R2, R2
860 CBNZ R2, aesloop
861
862 AESE V8.B16, V0.B16
863 AESMC V0.B16, V0.B16
864 AESE V9.B16, V1.B16
865 AESMC V1.B16, V1.B16
866 AESE V10.B16, V2.B16
867 AESMC V2.B16, V2.B16
868 AESE V11.B16, V3.B16
869 AESMC V3.B16, V3.B16
870 AESE V12.B16, V4.B16
871 AESMC V4.B16, V4.B16
872 AESE V13.B16, V5.B16
873 AESMC V5.B16, V5.B16
874 AESE V14.B16, V6.B16
875 AESMC V6.B16, V6.B16
876 AESE V15.B16, V7.B16
877 AESMC V7.B16, V7.B16
878
879 AESE V8.B16, V0.B16
880 AESMC V0.B16, V0.B16
881 AESE V9.B16, V1.B16
882 AESMC V1.B16, V1.B16
883 AESE V10.B16, V2.B16
884 AESMC V2.B16, V2.B16
885 AESE V11.B16, V3.B16
886 AESMC V3.B16, V3.B16
887 AESE V12.B16, V4.B16
888 AESMC V4.B16, V4.B16
889 AESE V13.B16, V5.B16
890 AESMC V5.B16, V5.B16
891 AESE V14.B16, V6.B16
892 AESMC V6.B16, V6.B16
893 AESE V15.B16, V7.B16
894 AESMC V7.B16, V7.B16
895
896 AESE V8.B16, V0.B16
897 AESE V9.B16, V1.B16
898 AESE V10.B16, V2.B16
899 AESE V11.B16, V3.B16
900 AESE V12.B16, V4.B16
901 AESE V13.B16, V5.B16
902 AESE V14.B16, V6.B16
903 AESE V15.B16, V7.B16
904
905 VEOR V0.B16, V1.B16, V0.B16
906 VEOR V2.B16, V3.B16, V2.B16
907 VEOR V4.B16, V5.B16, V4.B16
908 VEOR V6.B16, V7.B16, V6.B16
909 VEOR V0.B16, V2.B16, V0.B16
910 VEOR V4.B16, V6.B16, V4.B16
911 VEOR V4.B16, V0.B16, V0.B16
912
913 VMOV V0.D[0], R0
914 RET
915
916TEXT runtime·procyield(SB),NOSPLIT,$0-0
917 MOVWU cycles+0(FP), R0
918again:
919 YIELD
920 SUBW $1, R0
921 CBNZ R0, again
922 RET
923
924// Save state of caller into g->sched,
925// but using fake PC from systemstack_switch.
926// Must only be called from functions with no locals ($0)
927// or else unwinding from systemstack_switch is incorrect.
928// Smashes R0.
929TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
930 MOVD $runtime·systemstack_switch(SB), R0
931 ADD $8, R0 // get past prologue
932 MOVD R0, (g_sched+gobuf_pc)(g)
933 MOVD RSP, R0
934 MOVD R0, (g_sched+gobuf_sp)(g)
935 MOVD R29, (g_sched+gobuf_bp)(g)
936 MOVD $0, (g_sched+gobuf_lr)(g)
937 MOVD $0, (g_sched+gobuf_ret)(g)
938 // Assert ctxt is zero. See func save.
939 MOVD (g_sched+gobuf_ctxt)(g), R0
940 CBZ R0, 2(PC)
941 CALL runtime·abort(SB)
942 RET
943
944// func asmcgocall_no_g(fn, arg unsafe.Pointer)
945// Call fn(arg) aligned appropriately for the gcc ABI.
946// Called on a system stack, and there may be no g yet (during needm).
947TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
948 MOVD fn+0(FP), R1
949 MOVD arg+8(FP), R0
950 SUB $16, RSP // skip over saved frame pointer below RSP
951 BL (R1)
952 ADD $16, RSP // skip over saved frame pointer below RSP
953 RET
954
955// func asmcgocall(fn, arg unsafe.Pointer) int32
956// Call fn(arg) on the scheduler stack,
957// aligned appropriately for the gcc ABI.
958// See cgocall.go for more details.
959TEXT ·asmcgocall(SB),NOSPLIT,$0-20
960 MOVD fn+0(FP), R1
961 MOVD arg+8(FP), R0
962
963 MOVD RSP, R2 // save original stack pointer
964 CBZ g, nosave
965 MOVD g, R4
966
967 // Figure out if we need to switch to m->g0 stack.
968 // We get called to create new OS threads too, and those
969 // come in on the m->g0 stack already. Or we might already
970 // be on the m->gsignal stack.
971 MOVD g_m(g), R8
972 MOVD m_gsignal(R8), R3
973 CMP R3, g
974 BEQ nosave
975 MOVD m_g0(R8), R3
976 CMP R3, g
977 BEQ nosave
978
979 // Switch to system stack.
980 MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0
981 BL gosave_systemstack_switch<>(SB)
982 MOVD R3, g
983 BL runtime·save_g(SB)
984 MOVD (g_sched+gobuf_sp)(g), R0
985 MOVD R0, RSP
986 MOVD (g_sched+gobuf_bp)(g), R29
987 MOVD R9, R0
988
989 // Now on a scheduling stack (a pthread-created stack).
990 // Save room for two of our pointers /*, plus 32 bytes of callee
991 // save area that lives on the caller stack. */
992 MOVD RSP, R13
993 SUB $16, R13
994 MOVD R13, RSP
995 MOVD R4, 0(RSP) // save old g on stack
996 MOVD (g_stack+stack_hi)(R4), R4
997 SUB R2, R4
998 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
999 BL (R1)
1000 MOVD R0, R9
1001
1002 // Restore g, stack pointer. R0 is errno, so don't touch it
1003 MOVD 0(RSP), g
1004 BL runtime·save_g(SB)
1005 MOVD (g_stack+stack_hi)(g), R5
1006 MOVD 8(RSP), R6
1007 SUB R6, R5
1008 MOVD R9, R0
1009 MOVD R5, RSP
1010
1011 MOVW R0, ret+16(FP)
1012 RET
1013
1014nosave:
1015 // Running on a system stack, perhaps even without a g.
1016 // Having no g can happen during thread creation or thread teardown
1017 // (see needm/dropm on Solaris, for example).
1018 // This code is like the above sequence but without saving/restoring g
1019 // and without worrying about the stack moving out from under us
1020 // (because we're on a system stack, not a goroutine stack).
1021 // The above code could be used directly if already on a system stack,
1022 // but then the only path through this code would be a rare case on Solaris.
1023 // Using this code for all "already on system stack" calls exercises it more,
1024 // which should help keep it correct.
1025 MOVD RSP, R13
1026 SUB $16, R13
1027 MOVD R13, RSP
1028 MOVD $0, R4
1029 MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging.
1030 MOVD R2, 8(RSP) // Save original stack pointer.
1031 BL (R1)
1032 // Restore stack pointer.
1033 MOVD 8(RSP), R2
1034 MOVD R2, RSP
1035 MOVD R0, ret+16(FP)
1036 RET
1037
1038// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1039// See cgocall.go for more details.
1040TEXT ·cgocallback(SB),NOSPLIT,$24-24
1041 NO_LOCAL_POINTERS
1042
1043 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1044 // It is used to dropm while thread is exiting.
1045 MOVD fn+0(FP), R1
1046 CBNZ R1, loadg
1047 // Restore the g from frame.
1048 MOVD frame+8(FP), g
1049 B dropm
1050
1051loadg:
1052 // Load g from thread-local storage.
1053 BL runtime·load_g(SB)
1054
1055 // If g is nil, Go did not create the current thread,
1056 // or if this thread never called into Go on pthread platforms.
1057 // Call needm to obtain one for temporary use.
1058 // In this case, we're running on the thread stack, so there's
1059 // lots of space, but the linker doesn't know. Hide the call from
1060 // the linker analysis by using an indirect call.
1061 CBZ g, needm
1062
1063 MOVD g_m(g), R8
1064 MOVD R8, savedm-8(SP)
1065 B havem
1066
1067needm:
1068 MOVD g, savedm-8(SP) // g is zero, so is m.
1069 MOVD $runtime·needAndBindM(SB), R0
1070 BL (R0)
1071
1072 // Set m->g0->sched.sp = SP, so that if a panic happens
1073 // during the function we are about to execute, it will
1074 // have a valid SP to run on the g0 stack.
1075 // The next few lines (after the havem label)
1076 // will save this SP onto the stack and then write
1077 // the same SP back to m->sched.sp. That seems redundant,
1078 // but if an unrecovered panic happens, unwindm will
1079 // restore the g->sched.sp from the stack location
1080 // and then systemstack will try to use it. If we don't set it here,
1081 // that restored SP will be uninitialized (typically 0) and
1082 // will not be usable.
1083 MOVD g_m(g), R8
1084 MOVD m_g0(R8), R3
1085 MOVD RSP, R0
1086 MOVD R0, (g_sched+gobuf_sp)(R3)
1087 MOVD R29, (g_sched+gobuf_bp)(R3)
1088
1089havem:
1090 // Now there's a valid m, and we're running on its m->g0.
1091 // Save current m->g0->sched.sp on stack and then set it to SP.
1092 // Save current sp in m->g0->sched.sp in preparation for
1093 // switch back to m->curg stack.
1094 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
1095 // Beware that the frame size is actually 32+16.
1096 MOVD m_g0(R8), R3
1097 MOVD (g_sched+gobuf_sp)(R3), R4
1098 MOVD R4, savedsp-16(SP)
1099 MOVD RSP, R0
1100 MOVD R0, (g_sched+gobuf_sp)(R3)
1101
1102 // Switch to m->curg stack and call runtime.cgocallbackg.
1103 // Because we are taking over the execution of m->curg
1104 // but *not* resuming what had been running, we need to
1105 // save that information (m->curg->sched) so we can restore it.
1106 // We can restore m->curg->sched.sp easily, because calling
1107 // runtime.cgocallbackg leaves SP unchanged upon return.
1108 // To save m->curg->sched.pc, we push it onto the curg stack and
1109 // open a frame the same size as cgocallback's g0 frame.
1110 // Once we switch to the curg stack, the pushed PC will appear
1111 // to be the return PC of cgocallback, so that the traceback
1112 // will seamlessly trace back into the earlier calls.
1113 MOVD m_curg(R8), g
1114 BL runtime·save_g(SB)
1115 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
1116 MOVD (g_sched+gobuf_pc)(g), R5
1117 MOVD R5, -48(R4)
1118 MOVD (g_sched+gobuf_bp)(g), R5
1119 MOVD R5, -56(R4)
1120 // Gather our arguments into registers.
1121 MOVD fn+0(FP), R1
1122 MOVD frame+8(FP), R2
1123 MOVD ctxt+16(FP), R3
1124 MOVD $-48(R4), R0 // maintain 16-byte SP alignment
1125 MOVD R0, RSP // switch stack
1126 MOVD R1, 8(RSP)
1127 MOVD R2, 16(RSP)
1128 MOVD R3, 24(RSP)
1129 MOVD $runtime·cgocallbackg(SB), R0
1130 CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
1131
1132 // Restore g->sched (== m->curg->sched) from saved values.
1133 MOVD 0(RSP), R5
1134 MOVD R5, (g_sched+gobuf_pc)(g)
1135 MOVD RSP, R4
1136 ADD $48, R4, R4
1137 MOVD R4, (g_sched+gobuf_sp)(g)
1138
1139 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1140 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1141 // so we do not have to restore it.)
1142 MOVD g_m(g), R8
1143 MOVD m_g0(R8), g
1144 BL runtime·save_g(SB)
1145 MOVD (g_sched+gobuf_sp)(g), R0
1146 MOVD R0, RSP
1147 MOVD savedsp-16(SP), R4
1148 MOVD R4, (g_sched+gobuf_sp)(g)
1149
1150 // If the m on entry was nil, we called needm above to borrow an m,
1151 // 1. for the duration of the call on non-pthread platforms,
1152 // 2. or the duration of the C thread alive on pthread platforms.
1153 // If the m on entry wasn't nil,
1154 // 1. the thread might be a Go thread,
1155 // 2. or it wasn't the first call from a C thread on pthread platforms,
1156 // since then we skip dropm to reuse the m in the first call.
1157 MOVD savedm-8(SP), R6
1158 CBNZ R6, droppedm
1159
1160 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1161 MOVD _cgo_pthread_key_created(SB), R6
1162 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1163 CBZ R6, dropm
1164 MOVD (R6), R6
1165 CBNZ R6, droppedm
1166
1167dropm:
1168 MOVD $runtime·dropm(SB), R0
1169 BL (R0)
1170droppedm:
1171
1172 // Done!
1173 RET
1174
1175// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1176// Must obey the gcc calling convention.
1177TEXT _cgo_topofstack(SB),NOSPLIT,$24
1178 // g (R28) and REGTMP (R27) might be clobbered by load_g. They
1179 // are callee-save in the gcc calling convention, so save them.
1180 MOVD R27, savedR27-8(SP)
1181 MOVD g, saveG-16(SP)
1182
1183 BL runtime·load_g(SB)
1184 MOVD g_m(g), R0
1185 MOVD m_curg(R0), R0
1186 MOVD (g_stack+stack_hi)(R0), R0
1187
1188 MOVD saveG-16(SP), g
1189 MOVD savedR28-8(SP), R27
1190 RET
1191
1192// void setg(G*); set g. for use by needm.
1193TEXT runtime·setg(SB), NOSPLIT, $0-8
1194 MOVD gg+0(FP), g
1195 // This only happens if iscgo, so jump straight to save_g
1196 BL runtime·save_g(SB)
1197 RET
1198
1199// void setg_gcc(G*); set g called from gcc
1200TEXT setg_gcc<>(SB),NOSPLIT,$8
1201 MOVD R0, g
1202 MOVD R27, savedR27-8(SP)
1203 BL runtime·save_g(SB)
1204 MOVD savedR27-8(SP), R27
1205 RET
1206
1207TEXT runtime·emptyfunc(SB),0,$0-0
1208 RET
1209
1210TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
1211 MOVD ZR, R0
1212 MOVD (R0), R0
1213 UNDEF
1214
1215TEXT runtime·return0(SB), NOSPLIT, $0
1216 MOVW $0, R0
1217 RET
1218
1219// The top-most function running on a goroutine
1220// returns to goexit+PCQuantum.
1221TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
1222 MOVD R0, R0 // NOP
1223 BL runtime·goexit1(SB) // does not return
1224
1225// This is called from .init_array and follows the platform, not Go, ABI.
1226TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1227 SUB $0x10, RSP
1228 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
1229 MOVD runtime·lastmoduledatap(SB), R1
1230 MOVD R0, moduledata_next(R1)
1231 MOVD R0, runtime·lastmoduledatap(SB)
1232 MOVD 8(RSP), R27
1233 ADD $0x10, RSP
1234 RET
1235
1236TEXT ·checkASM(SB),NOSPLIT,$0-1
1237 MOVW $1, R3
1238 MOVB R3, ret+0(FP)
1239 RET
1240
1241// gcWriteBarrier informs the GC about heap pointer writes.
1242//
1243// gcWriteBarrier does NOT follow the Go ABI. It accepts the
1244// number of bytes of buffer needed in R25, and returns a pointer
1245// to the buffer space in R25.
1246// It clobbers condition codes.
1247// It does not clobber any general-purpose registers except R27,
1248// but may clobber others (e.g., floating point registers)
1249// The act of CALLing gcWriteBarrier will clobber R30 (LR).
1250TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
1251 // Save the registers clobbered by the fast path.
1252 STP (R0, R1), 184(RSP)
1253retry:
1254 MOVD g_m(g), R0
1255 MOVD m_p(R0), R0
1256 MOVD (p_wbBuf+wbBuf_next)(R0), R1
1257 MOVD (p_wbBuf+wbBuf_end)(R0), R27
1258 // Increment wbBuf.next position.
1259 ADD R25, R1
1260 // Is the buffer full?
1261 CMP R27, R1
1262 BHI flush
1263 // Commit to the larger buffer.
1264 MOVD R1, (p_wbBuf+wbBuf_next)(R0)
1265 // Make return value (the original next position)
1266 SUB R25, R1, R25
1267 // Restore registers.
1268 LDP 184(RSP), (R0, R1)
1269 RET
1270
1271flush:
1272 // Save all general purpose registers since these could be
1273 // clobbered by wbBufFlush and were not saved by the caller.
1274 // R0 and R1 already saved
1275 STP (R2, R3), 1*8(RSP)
1276 STP (R4, R5), 3*8(RSP)
1277 STP (R6, R7), 5*8(RSP)
1278 STP (R8, R9), 7*8(RSP)
1279 STP (R10, R11), 9*8(RSP)
1280 STP (R12, R13), 11*8(RSP)
1281 STP (R14, R15), 13*8(RSP)
1282 // R16, R17 may be clobbered by linker trampoline
1283 // R18 is unused.
1284 STP (R19, R20), 15*8(RSP)
1285 STP (R21, R22), 17*8(RSP)
1286 STP (R23, R24), 19*8(RSP)
1287 STP (R25, R26), 21*8(RSP)
1288 // R27 is temp register.
1289 // R28 is g.
1290 // R29 is frame pointer (unused).
1291 // R30 is LR, which was saved by the prologue.
1292 // R31 is SP.
1293
1294 CALL runtime·wbBufFlush(SB)
1295 LDP 1*8(RSP), (R2, R3)
1296 LDP 3*8(RSP), (R4, R5)
1297 LDP 5*8(RSP), (R6, R7)
1298 LDP 7*8(RSP), (R8, R9)
1299 LDP 9*8(RSP), (R10, R11)
1300 LDP 11*8(RSP), (R12, R13)
1301 LDP 13*8(RSP), (R14, R15)
1302 LDP 15*8(RSP), (R19, R20)
1303 LDP 17*8(RSP), (R21, R22)
1304 LDP 19*8(RSP), (R23, R24)
1305 LDP 21*8(RSP), (R25, R26)
1306 JMP retry
1307
1308TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1309 MOVD $8, R25
1310 JMP gcWriteBarrier<>(SB)
1311TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1312 MOVD $16, R25
1313 JMP gcWriteBarrier<>(SB)
1314TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1315 MOVD $24, R25
1316 JMP gcWriteBarrier<>(SB)
1317TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1318 MOVD $32, R25
1319 JMP gcWriteBarrier<>(SB)
1320TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1321 MOVD $40, R25
1322 JMP gcWriteBarrier<>(SB)
1323TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1324 MOVD $48, R25
1325 JMP gcWriteBarrier<>(SB)
1326TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1327 MOVD $56, R25
1328 JMP gcWriteBarrier<>(SB)
1329TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1330 MOVD $64, R25
1331 JMP gcWriteBarrier<>(SB)
1332
1333DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1334GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1335
1336// debugCallV2 is the entry point for debugger-injected function
1337// calls on running goroutines. It informs the runtime that a
1338// debug call has been injected and creates a call frame for the
1339// debugger to fill in.
1340//
1341// To inject a function call, a debugger should:
1342// 1. Check that the goroutine is in state _Grunning and that
1343// there are at least 288 bytes free on the stack.
1344// 2. Set SP as SP-16.
1345// 3. Store the current LR in (SP) (using the SP after step 2).
1346// 4. Store the current PC in the LR register.
1347// 5. Write the desired argument frame size at SP-16
1348// 6. Save all machine registers (including flags and fpsimd registers)
1349// so they can be restored later by the debugger.
1350// 7. Set the PC to debugCallV2 and resume execution.
1351//
1352// If the goroutine is in state _Grunnable, then it's not generally
1353// safe to inject a call because it may return out via other runtime
1354// operations. Instead, the debugger should unwind the stack to find
1355// the return to non-runtime code, add a temporary breakpoint there,
1356// and inject the call once that breakpoint is hit.
1357//
1358// If the goroutine is in any other state, it's not safe to inject a call.
1359//
1360// This function communicates back to the debugger by setting R20 and
1361// invoking BRK to raise a breakpoint signal. Note that the signal PC of
1362// the signal triggered by the BRK instruction is the PC where the signal
1363// is trapped, not the next PC, so to resume execution, the debugger needs
1364// to set the signal PC to PC+4. See the comments in the implementation for
1365// the protocol the debugger is expected to follow. InjectDebugCall in the
1366// runtime tests demonstrates this protocol.
1367//
1368// The debugger must ensure that any pointers passed to the function
1369// obey escape analysis requirements. Specifically, it must not pass
1370// a stack pointer to an escaping argument. debugCallV2 cannot check
1371// this invariant.
1372//
1373// This is ABIInternal because Go code injects its PC directly into new
1374// goroutine stacks.
1375TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
1376 STP (R29, R30), -280(RSP)
1377 SUB $272, RSP, RSP
1378 SUB $8, RSP, R29
1379 // Save all registers that may contain pointers so they can be
1380 // conservatively scanned.
1381 //
1382 // We can't do anything that might clobber any of these
1383 // registers before this.
1384 STP (R27, g), (30*8)(RSP)
1385 STP (R25, R26), (28*8)(RSP)
1386 STP (R23, R24), (26*8)(RSP)
1387 STP (R21, R22), (24*8)(RSP)
1388 STP (R19, R20), (22*8)(RSP)
1389 STP (R16, R17), (20*8)(RSP)
1390 STP (R14, R15), (18*8)(RSP)
1391 STP (R12, R13), (16*8)(RSP)
1392 STP (R10, R11), (14*8)(RSP)
1393 STP (R8, R9), (12*8)(RSP)
1394 STP (R6, R7), (10*8)(RSP)
1395 STP (R4, R5), (8*8)(RSP)
1396 STP (R2, R3), (6*8)(RSP)
1397 STP (R0, R1), (4*8)(RSP)
1398
1399 // Perform a safe-point check.
1400 MOVD R30, 8(RSP) // Caller's PC
1401 CALL runtime·debugCallCheck(SB)
1402 MOVD 16(RSP), R0
1403 CBZ R0, good
1404
1405 // The safety check failed. Put the reason string at the top
1406 // of the stack.
1407 MOVD R0, 8(RSP)
1408 MOVD 24(RSP), R0
1409 MOVD R0, 16(RSP)
1410
1411 // Set R20 to 8 and invoke BRK. The debugger should get the
1412 // reason a call can't be injected from SP+8 and resume execution.
1413 MOVD $8, R20
1414 BREAK
1415 JMP restore
1416
1417good:
1418 // Registers are saved and it's safe to make a call.
1419 // Open up a call frame, moving the stack if necessary.
1420 //
1421 // Once the frame is allocated, this will set R20 to 0 and
1422 // invoke BRK. The debugger should write the argument
1423 // frame for the call at SP+8, set up argument registers,
1424 // set the LR as the signal PC + 4, set the PC to the function
1425 // to call, set R26 to point to the closure (if a closure call),
1426 // and resume execution.
1427 //
1428 // If the function returns, this will set R20 to 1 and invoke
1429 // BRK. The debugger can then inspect any return value saved
1430 // on the stack at SP+8 and in registers. To resume execution,
1431 // the debugger should restore the LR from (SP).
1432 //
1433 // If the function panics, this will set R20 to 2 and invoke BRK.
1434 // The interface{} value of the panic will be at SP+8. The debugger
1435 // can inspect the panic value and resume execution again.
1436#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1437 CMP $MAXSIZE, R0; \
1438 BGT 5(PC); \
1439 MOVD $NAME(SB), R0; \
1440 MOVD R0, 8(RSP); \
1441 CALL runtime·debugCallWrap(SB); \
1442 JMP restore
1443
1444 MOVD 256(RSP), R0 // the argument frame size
1445 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1446 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1447 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1448 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1449 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1450 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1451 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1452 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1453 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1454 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1455 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1456 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1457 // The frame size is too large. Report the error.
1458 MOVD $debugCallFrameTooLarge<>(SB), R0
1459 MOVD R0, 8(RSP)
1460 MOVD $20, R0
1461 MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string
1462 MOVD $8, R20
1463 BREAK
1464 JMP restore
1465
1466restore:
1467 // Calls and failures resume here.
1468 //
1469 // Set R20 to 16 and invoke BRK. The debugger should restore
1470 // all registers except for PC and RSP and resume execution.
1471 MOVD $16, R20
1472 BREAK
1473 // We must not modify flags after this point.
1474
1475 // Restore pointer-containing registers, which may have been
1476 // modified from the debugger's copy by stack copying.
1477 LDP (30*8)(RSP), (R27, g)
1478 LDP (28*8)(RSP), (R25, R26)
1479 LDP (26*8)(RSP), (R23, R24)
1480 LDP (24*8)(RSP), (R21, R22)
1481 LDP (22*8)(RSP), (R19, R20)
1482 LDP (20*8)(RSP), (R16, R17)
1483 LDP (18*8)(RSP), (R14, R15)
1484 LDP (16*8)(RSP), (R12, R13)
1485 LDP (14*8)(RSP), (R10, R11)
1486 LDP (12*8)(RSP), (R8, R9)
1487 LDP (10*8)(RSP), (R6, R7)
1488 LDP (8*8)(RSP), (R4, R5)
1489 LDP (6*8)(RSP), (R2, R3)
1490 LDP (4*8)(RSP), (R0, R1)
1491
1492 LDP -8(RSP), (R29, R27)
1493 ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
1494 MOVD -16(RSP), R30 // restore old lr
1495 JMP (R27)
1496
1497// runtime.debugCallCheck assumes that functions defined with the
1498// DEBUG_CALL_FN macro are safe points to inject calls.
1499#define DEBUG_CALL_FN(NAME,MAXSIZE) \
1500TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1501 NO_LOCAL_POINTERS; \
1502 MOVD $0, R20; \
1503 BREAK; \
1504 MOVD $1, R20; \
1505 BREAK; \
1506 RET
1507DEBUG_CALL_FN(debugCall32<>, 32)
1508DEBUG_CALL_FN(debugCall64<>, 64)
1509DEBUG_CALL_FN(debugCall128<>, 128)
1510DEBUG_CALL_FN(debugCall256<>, 256)
1511DEBUG_CALL_FN(debugCall512<>, 512)
1512DEBUG_CALL_FN(debugCall1024<>, 1024)
1513DEBUG_CALL_FN(debugCall2048<>, 2048)
1514DEBUG_CALL_FN(debugCall4096<>, 4096)
1515DEBUG_CALL_FN(debugCall8192<>, 8192)
1516DEBUG_CALL_FN(debugCall16384<>, 16384)
1517DEBUG_CALL_FN(debugCall32768<>, 32768)
1518DEBUG_CALL_FN(debugCall65536<>, 65536)
1519
1520// func debugCallPanicked(val interface{})
1521TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1522 // Copy the panic value to the top of stack at SP+8.
1523 MOVD val_type+0(FP), R0
1524 MOVD R0, 8(RSP)
1525 MOVD val_data+8(FP), R0
1526 MOVD R0, 16(RSP)
1527 MOVD $2, R20
1528 BREAK
1529 RET
1530
1531// Note: these functions use a special calling convention to save generated code space.
1532// Arguments are passed in registers, but the space for those arguments are allocated
1533// in the caller's stack frame. These stubs write the args into that stack space and
1534// then tail call to the corresponding runtime handler.
1535// The tail call makes these stubs disappear in backtraces.
1536//
1537// Defined as ABIInternal since the compiler generates ABIInternal
1538// calls to it directly and it does not use the stack-based Go ABI.
1539TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
1540 JMP runtime·goPanicIndex<ABIInternal>(SB)
1541TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
1542 JMP runtime·goPanicIndexU<ABIInternal>(SB)
1543TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
1544 MOVD R1, R0
1545 MOVD R2, R1
1546 JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
1547TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
1548 MOVD R1, R0
1549 MOVD R2, R1
1550 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
1551TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
1552 MOVD R1, R0
1553 MOVD R2, R1
1554 JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
1555TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
1556 MOVD R1, R0
1557 MOVD R2, R1
1558 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
1559TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
1560 JMP runtime·goPanicSliceB<ABIInternal>(SB)
1561TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
1562 JMP runtime·goPanicSliceBU<ABIInternal>(SB)
1563TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
1564 MOVD R2, R0
1565 MOVD R3, R1
1566 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
1567TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
1568 MOVD R2, R0
1569 MOVD R3, R1
1570 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
1571TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
1572 MOVD R2, R0
1573 MOVD R3, R1
1574 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
1575TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
1576 MOVD R2, R0
1577 MOVD R3, R1
1578 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
1579TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
1580 MOVD R1, R0
1581 MOVD R2, R1
1582 JMP runtime·goPanicSlice3B<ABIInternal>(SB)
1583TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
1584 MOVD R1, R0
1585 MOVD R2, R1
1586 JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
1587TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
1588 JMP runtime·goPanicSlice3C<ABIInternal>(SB)
1589TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
1590 JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
1591TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
1592 MOVD R2, R0
1593 MOVD R3, R1
1594 JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
1595
1596TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1597 MOVD R29, R0
1598 RET
View as plain text