Text file
src/runtime/asm_amd64.s
Documentation: runtime
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "go_tls.h"
7#include "funcdata.h"
8#include "textflag.h"
9#include "cgo/abi_amd64.h"
10
11// _rt0_amd64 is common startup code for most amd64 systems when using
12// internal linking. This is the entry point for the program from the
13// kernel for an ordinary -buildmode=exe program. The stack holds the
14// number of arguments and the C-style argv.
15TEXT _rt0_amd64(SB),NOSPLIT,$-8
16 MOVQ 0(SP), DI // argc
17 LEAQ 8(SP), SI // argv
18 JMP runtime·rt0_go(SB)
19
20// main is common startup code for most amd64 systems when using
21// external linking. The C startup code will call the symbol "main"
22// passing argc and argv in the usual C ABI registers DI and SI.
23TEXT main(SB),NOSPLIT,$-8
24 JMP runtime·rt0_go(SB)
25
26// _rt0_amd64_lib is common startup code for most amd64 systems when
27// using -buildmode=c-archive or -buildmode=c-shared. The linker will
28// arrange to invoke this function as a global constructor (for
29// c-archive) or when the shared library is loaded (for c-shared).
30// We expect argc and argv to be passed in the usual C ABI registers
31// DI and SI.
32TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
33 // Transition from C ABI to Go ABI.
34 PUSH_REGS_HOST_TO_ABI0()
35
36 MOVQ DI, _rt0_amd64_lib_argc<>(SB)
37 MOVQ SI, _rt0_amd64_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 // Create a new thread to finish Go runtime initialization.
43 MOVQ _cgo_sys_thread_create(SB), AX
44 TESTQ AX, AX
45 JZ nocgo
46
47 // We're calling back to C.
48 // Align stack per ELF ABI requirements.
49 MOVQ SP, BX // Callee-save in C ABI
50 ANDQ $~15, SP
51 MOVQ $_rt0_amd64_lib_go(SB), DI
52 MOVQ $0, SI
53 CALL AX
54 MOVQ BX, SP
55 JMP restore
56
57nocgo:
58 ADJSP $16
59 MOVQ $0x800000, 0(SP) // stacksize
60 MOVQ $_rt0_amd64_lib_go(SB), AX
61 MOVQ AX, 8(SP) // fn
62 CALL runtime·newosproc0(SB)
63 ADJSP $-16
64
65restore:
66 POP_REGS_HOST_TO_ABI0()
67 RET
68
69// _rt0_amd64_lib_go initializes the Go runtime.
70// This is started in a separate thread by _rt0_amd64_lib.
71TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
72 MOVQ _rt0_amd64_lib_argc<>(SB), DI
73 MOVQ _rt0_amd64_lib_argv<>(SB), SI
74 JMP runtime·rt0_go(SB)
75
76DATA _rt0_amd64_lib_argc<>(SB)/8, $0
77GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
78DATA _rt0_amd64_lib_argv<>(SB)/8, $0
79GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
80
81#ifdef GOAMD64_v2
82DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
83#endif
84
85#ifdef GOAMD64_v3
86DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
87#endif
88
89#ifdef GOAMD64_v4
90DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
91#endif
92
93GLOBL bad_cpu_msg<>(SB), RODATA, $84
94
95// Define a list of AMD64 microarchitecture level features
96// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
97
98 // SSE3 SSSE3 CMPXCHNG16 SSE4.1 SSE4.2 POPCNT
99#define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13 | 1 << 19 | 1 << 20 | 1 << 23)
100 // LAHF/SAHF
101#define V2_EXT_FEATURES_CX (1 << 0)
102 // FMA MOVBE OSXSAVE AVX F16C
103#define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
104 // ABM (FOR LZNCT)
105#define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
106 // BMI1 AVX2 BMI2
107#define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
108 // XMM YMM
109#define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
110
111#define V4_FEATURES_CX V3_FEATURES_CX
112
113#define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
114 // AVX512F AVX512DQ AVX512CD AVX512BW AVX512VL
115#define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
116 // OPMASK ZMM
117#define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
118
119#ifdef GOAMD64_v2
120#define NEED_MAX_CPUID 0x80000001
121#define NEED_FEATURES_CX V2_FEATURES_CX
122#define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
123#endif
124
125#ifdef GOAMD64_v3
126#define NEED_MAX_CPUID 0x80000001
127#define NEED_FEATURES_CX V3_FEATURES_CX
128#define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
129#define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
130#define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
131#endif
132
133#ifdef GOAMD64_v4
134#define NEED_MAX_CPUID 0x80000001
135#define NEED_FEATURES_CX V4_FEATURES_CX
136#define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
137#define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
138
139// Darwin requires a different approach to check AVX512 support, see CL 285572.
140#ifdef GOOS_darwin
141#define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
142// These values are from:
143// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
144#define commpage64_base_address 0x00007fffffe00000
145#define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
146#define commpage64_version (commpage64_base_address+0x01E)
147#define AVX512F 0x0000004000000000
148#define AVX512CD 0x0000008000000000
149#define AVX512DQ 0x0000010000000000
150#define AVX512BW 0x0000020000000000
151#define AVX512VL 0x0000100000000000
152#define NEED_DARWIN_SUPPORT (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
153#else
154#define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
155#endif
156
157#endif
158
159TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
160 // copy arguments forward on an even stack
161 MOVQ DI, AX // argc
162 MOVQ SI, BX // argv
163 SUBQ $(5*8), SP // 3args 2auto
164 ANDQ $~15, SP
165 MOVQ AX, 24(SP)
166 MOVQ BX, 32(SP)
167
168 // create istack out of the given (operating system) stack.
169 // _cgo_init may update stackguard.
170 MOVQ $runtime·g0(SB), DI
171 LEAQ (-64*1024)(SP), BX
172 MOVQ BX, g_stackguard0(DI)
173 MOVQ BX, g_stackguard1(DI)
174 MOVQ BX, (g_stack+stack_lo)(DI)
175 MOVQ SP, (g_stack+stack_hi)(DI)
176
177 // find out information about the processor we're on
178 MOVL $0, AX
179 CPUID
180 CMPL AX, $0
181 JE nocpuinfo
182
183 CMPL BX, $0x756E6547 // "Genu"
184 JNE notintel
185 CMPL DX, $0x49656E69 // "ineI"
186 JNE notintel
187 CMPL CX, $0x6C65746E // "ntel"
188 JNE notintel
189 MOVB $1, runtime·isIntel(SB)
190
191notintel:
192 // Load EAX=1 cpuid flags
193 MOVL $1, AX
194 CPUID
195 MOVL AX, runtime·processorVersionInfo(SB)
196
197nocpuinfo:
198 // if there is an _cgo_init, call it.
199 MOVQ _cgo_init(SB), AX
200 TESTQ AX, AX
201 JZ needtls
202 // arg 1: g0, already in DI
203 MOVQ $setg_gcc<>(SB), SI // arg 2: setg_gcc
204 MOVQ $0, DX // arg 3, 4: not used when using platform's TLS
205 MOVQ $0, CX
206#ifdef GOOS_android
207 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
208 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
209 // Compensate for tls_g (+16).
210 MOVQ -16(TLS), CX
211#endif
212#ifdef GOOS_windows
213 MOVQ $runtime·tls_g(SB), DX // arg 3: &tls_g
214 // Adjust for the Win64 calling convention.
215 MOVQ CX, R9 // arg 4
216 MOVQ DX, R8 // arg 3
217 MOVQ SI, DX // arg 2
218 MOVQ DI, CX // arg 1
219#endif
220 CALL AX
221
222 // update stackguard after _cgo_init
223 MOVQ $runtime·g0(SB), CX
224 MOVQ (g_stack+stack_lo)(CX), AX
225 ADDQ $const_stackGuard, AX
226 MOVQ AX, g_stackguard0(CX)
227 MOVQ AX, g_stackguard1(CX)
228
229#ifndef GOOS_windows
230 JMP ok
231#endif
232needtls:
233#ifdef GOOS_plan9
234 // skip TLS setup on Plan 9
235 JMP ok
236#endif
237#ifdef GOOS_solaris
238 // skip TLS setup on Solaris
239 JMP ok
240#endif
241#ifdef GOOS_illumos
242 // skip TLS setup on illumos
243 JMP ok
244#endif
245#ifdef GOOS_darwin
246 // skip TLS setup on Darwin
247 JMP ok
248#endif
249#ifdef GOOS_openbsd
250 // skip TLS setup on OpenBSD
251 JMP ok
252#endif
253
254#ifdef GOOS_windows
255 CALL runtime·wintls(SB)
256#endif
257
258 LEAQ runtime·m0+m_tls(SB), DI
259 CALL runtime·settls(SB)
260
261 // store through it, to make sure it works
262 get_tls(BX)
263 MOVQ $0x123, g(BX)
264 MOVQ runtime·m0+m_tls(SB), AX
265 CMPQ AX, $0x123
266 JEQ 2(PC)
267 CALL runtime·abort(SB)
268ok:
269 // set the per-goroutine and per-mach "registers"
270 get_tls(BX)
271 LEAQ runtime·g0(SB), CX
272 MOVQ CX, g(BX)
273 LEAQ runtime·m0(SB), AX
274
275 // save m->g0 = g0
276 MOVQ CX, m_g0(AX)
277 // save m0 to g0->m
278 MOVQ AX, g_m(CX)
279
280 CLD // convention is D is always left cleared
281
282 // Check GOAMD64 requirements
283 // We need to do this after setting up TLS, so that
284 // we can report an error if there is a failure. See issue 49586.
285#ifdef NEED_FEATURES_CX
286 MOVL $0, AX
287 CPUID
288 CMPL AX, $0
289 JE bad_cpu
290 MOVL $1, AX
291 CPUID
292 ANDL $NEED_FEATURES_CX, CX
293 CMPL CX, $NEED_FEATURES_CX
294 JNE bad_cpu
295#endif
296
297#ifdef NEED_MAX_CPUID
298 MOVL $0x80000000, AX
299 CPUID
300 CMPL AX, $NEED_MAX_CPUID
301 JL bad_cpu
302#endif
303
304#ifdef NEED_EXT_FEATURES_BX
305 MOVL $7, AX
306 MOVL $0, CX
307 CPUID
308 ANDL $NEED_EXT_FEATURES_BX, BX
309 CMPL BX, $NEED_EXT_FEATURES_BX
310 JNE bad_cpu
311#endif
312
313#ifdef NEED_EXT_FEATURES_CX
314 MOVL $0x80000001, AX
315 CPUID
316 ANDL $NEED_EXT_FEATURES_CX, CX
317 CMPL CX, $NEED_EXT_FEATURES_CX
318 JNE bad_cpu
319#endif
320
321#ifdef NEED_OS_SUPPORT_AX
322 XORL CX, CX
323 XGETBV
324 ANDL $NEED_OS_SUPPORT_AX, AX
325 CMPL AX, $NEED_OS_SUPPORT_AX
326 JNE bad_cpu
327#endif
328
329#ifdef NEED_DARWIN_SUPPORT
330 MOVQ $commpage64_version, BX
331 CMPW (BX), $13 // cpu_capabilities64 undefined in versions < 13
332 JL bad_cpu
333 MOVQ $commpage64_cpu_capabilities64, BX
334 MOVQ (BX), BX
335 MOVQ $NEED_DARWIN_SUPPORT, CX
336 ANDQ CX, BX
337 CMPQ BX, CX
338 JNE bad_cpu
339#endif
340
341 CALL runtime·check(SB)
342
343 MOVL 24(SP), AX // copy argc
344 MOVL AX, 0(SP)
345 MOVQ 32(SP), AX // copy argv
346 MOVQ AX, 8(SP)
347 CALL runtime·args(SB)
348 CALL runtime·osinit(SB)
349 CALL runtime·schedinit(SB)
350
351 // create a new goroutine to start program
352 MOVQ $runtime·mainPC(SB), AX // entry
353 PUSHQ AX
354 CALL runtime·newproc(SB)
355 POPQ AX
356
357 // start this M
358 CALL runtime·mstart(SB)
359
360 CALL runtime·abort(SB) // mstart should never return
361 RET
362
363bad_cpu: // show that the program requires a certain microarchitecture level.
364 MOVQ $2, 0(SP)
365 MOVQ $bad_cpu_msg<>(SB), AX
366 MOVQ AX, 8(SP)
367 MOVQ $84, 16(SP)
368 CALL runtime·write(SB)
369 MOVQ $1, 0(SP)
370 CALL runtime·exit(SB)
371 CALL runtime·abort(SB)
372 RET
373
374 // Prevent dead-code elimination of debugCallV2, which is
375 // intended to be called by debuggers.
376 MOVQ $runtime·debugCallV2<ABIInternal>(SB), AX
377 RET
378
379// mainPC is a function value for runtime.main, to be passed to newproc.
380// The reference to runtime.main is made via ABIInternal, since the
381// actual function (not the ABI0 wrapper) is needed by newproc.
382DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
383GLOBL runtime·mainPC(SB),RODATA,$8
384
385TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
386 BYTE $0xcc
387 RET
388
389TEXT runtime·asminit(SB),NOSPLIT,$0-0
390 // No per-thread init.
391 RET
392
393TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
394 CALL runtime·mstart0(SB)
395 RET // not reached
396
397/*
398 * go-routine
399 */
400
401// func gogo(buf *gobuf)
402// restore state from Gobuf; longjmp
403TEXT runtime·gogo(SB), NOSPLIT, $0-8
404 MOVQ buf+0(FP), BX // gobuf
405 MOVQ gobuf_g(BX), DX
406 MOVQ 0(DX), CX // make sure g != nil
407 JMP gogo<>(SB)
408
409TEXT gogo<>(SB), NOSPLIT, $0
410 get_tls(CX)
411 MOVQ DX, g(CX)
412 MOVQ DX, R14 // set the g register
413 MOVQ gobuf_sp(BX), SP // restore SP
414 MOVQ gobuf_ret(BX), AX
415 MOVQ gobuf_ctxt(BX), DX
416 MOVQ gobuf_bp(BX), BP
417 MOVQ $0, gobuf_sp(BX) // clear to help garbage collector
418 MOVQ $0, gobuf_ret(BX)
419 MOVQ $0, gobuf_ctxt(BX)
420 MOVQ $0, gobuf_bp(BX)
421 MOVQ gobuf_pc(BX), BX
422 JMP BX
423
424// func mcall(fn func(*g))
425// Switch to m->g0's stack, call fn(g).
426// Fn must never return. It should gogo(&g->sched)
427// to keep running g.
428TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
429 MOVQ AX, DX // DX = fn
430
431 // Save state in g->sched. The caller's SP and PC are restored by gogo to
432 // resume execution in the caller's frame (implicit return). The caller's BP
433 // is also restored to support frame pointer unwinding.
434 MOVQ SP, BX // hide (SP) reads from vet
435 MOVQ 8(BX), BX // caller's PC
436 MOVQ BX, (g_sched+gobuf_pc)(R14)
437 LEAQ fn+0(FP), BX // caller's SP
438 MOVQ BX, (g_sched+gobuf_sp)(R14)
439 // Get the caller's frame pointer by dereferencing BP. Storing BP as it is
440 // can cause a frame pointer cycle, see CL 476235.
441 MOVQ (BP), BX // caller's BP
442 MOVQ BX, (g_sched+gobuf_bp)(R14)
443
444 // switch to m->g0 & its stack, call fn
445 MOVQ g_m(R14), BX
446 MOVQ m_g0(BX), SI // SI = g.m.g0
447 CMPQ SI, R14 // if g == m->g0 call badmcall
448 JNE goodm
449 JMP runtime·badmcall(SB)
450goodm:
451 MOVQ R14, AX // AX (and arg 0) = g
452 MOVQ SI, R14 // g = g.m.g0
453 get_tls(CX) // Set G in TLS
454 MOVQ R14, g(CX)
455 MOVQ (g_sched+gobuf_sp)(R14), SP // sp = g0.sched.sp
456 PUSHQ AX // open up space for fn's arg spill slot
457 MOVQ 0(DX), R12
458 CALL R12 // fn(g)
459 POPQ AX
460 JMP runtime·badmcall2(SB)
461 RET
462
463// systemstack_switch is a dummy routine that systemstack leaves at the bottom
464// of the G stack. We need to distinguish the routine that
465// lives at the bottom of the G stack from the one that lives
466// at the top of the system stack because the one at the top of
467// the system stack terminates the stack walk (see topofstack()).
468// The frame layout needs to match systemstack
469// so that it can pretend to be systemstack_switch.
470TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
471 UNDEF
472 // Make sure this function is not leaf,
473 // so the frame is saved.
474 CALL runtime·abort(SB)
475 RET
476
477// func systemstack(fn func())
478TEXT runtime·systemstack(SB), NOSPLIT, $0-8
479 MOVQ fn+0(FP), DI // DI = fn
480 get_tls(CX)
481 MOVQ g(CX), AX // AX = g
482 MOVQ g_m(AX), BX // BX = m
483
484 CMPQ AX, m_gsignal(BX)
485 JEQ noswitch
486
487 MOVQ m_g0(BX), DX // DX = g0
488 CMPQ AX, DX
489 JEQ noswitch
490
491 CMPQ AX, m_curg(BX)
492 JNE bad
493
494 // Switch stacks.
495 // The original frame pointer is stored in BP,
496 // which is useful for stack unwinding.
497 // Save our state in g->sched. Pretend to
498 // be systemstack_switch if the G stack is scanned.
499 CALL gosave_systemstack_switch<>(SB)
500
501 // switch to g0
502 MOVQ DX, g(CX)
503 MOVQ DX, R14 // set the g register
504 MOVQ (g_sched+gobuf_sp)(DX), SP
505
506 // call target function
507 MOVQ DI, DX
508 MOVQ 0(DI), DI
509 CALL DI
510
511 // switch back to g
512 get_tls(CX)
513 MOVQ g(CX), AX
514 MOVQ g_m(AX), BX
515 MOVQ m_curg(BX), AX
516 MOVQ AX, g(CX)
517 MOVQ (g_sched+gobuf_sp)(AX), SP
518 MOVQ (g_sched+gobuf_bp)(AX), BP
519 MOVQ $0, (g_sched+gobuf_sp)(AX)
520 MOVQ $0, (g_sched+gobuf_bp)(AX)
521 RET
522
523noswitch:
524 // already on m stack; tail call the function
525 // Using a tail call here cleans up tracebacks since we won't stop
526 // at an intermediate systemstack.
527 MOVQ DI, DX
528 MOVQ 0(DI), DI
529 // The function epilogue is not called on a tail call.
530 // Pop BP from the stack to simulate it.
531 POPQ BP
532 JMP DI
533
534bad:
535 // Bad: g is not gsignal, not g0, not curg. What is it?
536 MOVQ $runtime·badsystemstack(SB), AX
537 CALL AX
538 INT $3
539
540// func switchToCrashStack0(fn func())
541TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
542 MOVQ g_m(R14), BX // curm
543
544 // set g to gcrash
545 LEAQ runtime·gcrash(SB), R14 // g = &gcrash
546 MOVQ BX, g_m(R14) // g.m = curm
547 MOVQ R14, m_g0(BX) // curm.g0 = g
548 get_tls(CX)
549 MOVQ R14, g(CX)
550
551 // switch to crashstack
552 MOVQ (g_stack+stack_hi)(R14), BX
553 SUBQ $(4*8), BX
554 MOVQ BX, SP
555
556 // call target function
557 MOVQ AX, DX
558 MOVQ 0(AX), AX
559 CALL AX
560
561 // should never return
562 CALL runtime·abort(SB)
563 UNDEF
564
565/*
566 * support for morestack
567 */
568
569// Called during function prolog when more stack is needed.
570//
571// The traceback routines see morestack on a g0 as being
572// the top of a stack (for example, morestack calling newstack
573// calling the scheduler calling newm calling gc), so we must
574// record an argument size. For that purpose, it has no arguments.
575TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
576 // Cannot grow scheduler stack (m->g0).
577 get_tls(CX)
578 MOVQ g(CX), DI // DI = g
579 MOVQ g_m(DI), BX // BX = m
580
581 // Set g->sched to context in f.
582 MOVQ 0(SP), AX // f's PC
583 MOVQ AX, (g_sched+gobuf_pc)(DI)
584 LEAQ 8(SP), AX // f's SP
585 MOVQ AX, (g_sched+gobuf_sp)(DI)
586 MOVQ BP, (g_sched+gobuf_bp)(DI)
587 MOVQ DX, (g_sched+gobuf_ctxt)(DI)
588
589 MOVQ m_g0(BX), SI // SI = m.g0
590 CMPQ DI, SI
591 JNE 3(PC)
592 CALL runtime·badmorestackg0(SB)
593 CALL runtime·abort(SB)
594
595 // Cannot grow signal stack (m->gsignal).
596 MOVQ m_gsignal(BX), SI
597 CMPQ DI, SI
598 JNE 3(PC)
599 CALL runtime·badmorestackgsignal(SB)
600 CALL runtime·abort(SB)
601
602 // Called from f.
603 // Set m->morebuf to f's caller.
604 NOP SP // tell vet SP changed - stop checking offsets
605 MOVQ 8(SP), AX // f's caller's PC
606 MOVQ AX, (m_morebuf+gobuf_pc)(BX)
607 LEAQ 16(SP), AX // f's caller's SP
608 MOVQ AX, (m_morebuf+gobuf_sp)(BX)
609 MOVQ DI, (m_morebuf+gobuf_g)(BX)
610
611 // Call newstack on m->g0's stack.
612 MOVQ m_g0(BX), BX
613 MOVQ BX, g(CX)
614 MOVQ (g_sched+gobuf_sp)(BX), SP
615 MOVQ (g_sched+gobuf_bp)(BX), BP
616 CALL runtime·newstack(SB)
617 CALL runtime·abort(SB) // crash if newstack returns
618 RET
619
620// morestack but not preserving ctxt.
621TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
622 MOVL $0, DX
623 JMP runtime·morestack(SB)
624
625// spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
626TEXT ·spillArgs(SB),NOSPLIT,$0-0
627 MOVQ AX, 0(R12)
628 MOVQ BX, 8(R12)
629 MOVQ CX, 16(R12)
630 MOVQ DI, 24(R12)
631 MOVQ SI, 32(R12)
632 MOVQ R8, 40(R12)
633 MOVQ R9, 48(R12)
634 MOVQ R10, 56(R12)
635 MOVQ R11, 64(R12)
636 MOVQ X0, 72(R12)
637 MOVQ X1, 80(R12)
638 MOVQ X2, 88(R12)
639 MOVQ X3, 96(R12)
640 MOVQ X4, 104(R12)
641 MOVQ X5, 112(R12)
642 MOVQ X6, 120(R12)
643 MOVQ X7, 128(R12)
644 MOVQ X8, 136(R12)
645 MOVQ X9, 144(R12)
646 MOVQ X10, 152(R12)
647 MOVQ X11, 160(R12)
648 MOVQ X12, 168(R12)
649 MOVQ X13, 176(R12)
650 MOVQ X14, 184(R12)
651 RET
652
653// unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
654TEXT ·unspillArgs(SB),NOSPLIT,$0-0
655 MOVQ 0(R12), AX
656 MOVQ 8(R12), BX
657 MOVQ 16(R12), CX
658 MOVQ 24(R12), DI
659 MOVQ 32(R12), SI
660 MOVQ 40(R12), R8
661 MOVQ 48(R12), R9
662 MOVQ 56(R12), R10
663 MOVQ 64(R12), R11
664 MOVQ 72(R12), X0
665 MOVQ 80(R12), X1
666 MOVQ 88(R12), X2
667 MOVQ 96(R12), X3
668 MOVQ 104(R12), X4
669 MOVQ 112(R12), X5
670 MOVQ 120(R12), X6
671 MOVQ 128(R12), X7
672 MOVQ 136(R12), X8
673 MOVQ 144(R12), X9
674 MOVQ 152(R12), X10
675 MOVQ 160(R12), X11
676 MOVQ 168(R12), X12
677 MOVQ 176(R12), X13
678 MOVQ 184(R12), X14
679 RET
680
681// reflectcall: call a function with the given argument list
682// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
683// we don't have variable-sized frames, so we use a small number
684// of constant-sized-frame functions to encode a few bits of size in the pc.
685// Caution: ugly multiline assembly macros in your future!
686
687#define DISPATCH(NAME,MAXSIZE) \
688 CMPQ CX, $MAXSIZE; \
689 JA 3(PC); \
690 MOVQ $NAME(SB), AX; \
691 JMP AX
692// Note: can't just "JMP NAME(SB)" - bad inlining results.
693
694TEXT ·reflectcall(SB), NOSPLIT, $0-48
695 MOVLQZX frameSize+32(FP), CX
696 DISPATCH(runtime·call16, 16)
697 DISPATCH(runtime·call32, 32)
698 DISPATCH(runtime·call64, 64)
699 DISPATCH(runtime·call128, 128)
700 DISPATCH(runtime·call256, 256)
701 DISPATCH(runtime·call512, 512)
702 DISPATCH(runtime·call1024, 1024)
703 DISPATCH(runtime·call2048, 2048)
704 DISPATCH(runtime·call4096, 4096)
705 DISPATCH(runtime·call8192, 8192)
706 DISPATCH(runtime·call16384, 16384)
707 DISPATCH(runtime·call32768, 32768)
708 DISPATCH(runtime·call65536, 65536)
709 DISPATCH(runtime·call131072, 131072)
710 DISPATCH(runtime·call262144, 262144)
711 DISPATCH(runtime·call524288, 524288)
712 DISPATCH(runtime·call1048576, 1048576)
713 DISPATCH(runtime·call2097152, 2097152)
714 DISPATCH(runtime·call4194304, 4194304)
715 DISPATCH(runtime·call8388608, 8388608)
716 DISPATCH(runtime·call16777216, 16777216)
717 DISPATCH(runtime·call33554432, 33554432)
718 DISPATCH(runtime·call67108864, 67108864)
719 DISPATCH(runtime·call134217728, 134217728)
720 DISPATCH(runtime·call268435456, 268435456)
721 DISPATCH(runtime·call536870912, 536870912)
722 DISPATCH(runtime·call1073741824, 1073741824)
723 MOVQ $runtime·badreflectcall(SB), AX
724 JMP AX
725
726#define CALLFN(NAME,MAXSIZE) \
727TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
728 NO_LOCAL_POINTERS; \
729 /* copy arguments to stack */ \
730 MOVQ stackArgs+16(FP), SI; \
731 MOVLQZX stackArgsSize+24(FP), CX; \
732 MOVQ SP, DI; \
733 REP;MOVSB; \
734 /* set up argument registers */ \
735 MOVQ regArgs+40(FP), R12; \
736 CALL ·unspillArgs(SB); \
737 /* call function */ \
738 MOVQ f+8(FP), DX; \
739 PCDATA $PCDATA_StackMapIndex, $0; \
740 MOVQ (DX), R12; \
741 CALL R12; \
742 /* copy register return values back */ \
743 MOVQ regArgs+40(FP), R12; \
744 CALL ·spillArgs(SB); \
745 MOVLQZX stackArgsSize+24(FP), CX; \
746 MOVLQZX stackRetOffset+28(FP), BX; \
747 MOVQ stackArgs+16(FP), DI; \
748 MOVQ stackArgsType+0(FP), DX; \
749 MOVQ SP, SI; \
750 ADDQ BX, DI; \
751 ADDQ BX, SI; \
752 SUBQ BX, CX; \
753 CALL callRet<>(SB); \
754 RET
755
756// callRet copies return values back at the end of call*. This is a
757// separate function so it can allocate stack space for the arguments
758// to reflectcallmove. It does not follow the Go ABI; it expects its
759// arguments in registers.
760TEXT callRet<>(SB), NOSPLIT, $40-0
761 NO_LOCAL_POINTERS
762 MOVQ DX, 0(SP)
763 MOVQ DI, 8(SP)
764 MOVQ SI, 16(SP)
765 MOVQ CX, 24(SP)
766 MOVQ R12, 32(SP)
767 CALL runtime·reflectcallmove(SB)
768 RET
769
770CALLFN(·call16, 16)
771CALLFN(·call32, 32)
772CALLFN(·call64, 64)
773CALLFN(·call128, 128)
774CALLFN(·call256, 256)
775CALLFN(·call512, 512)
776CALLFN(·call1024, 1024)
777CALLFN(·call2048, 2048)
778CALLFN(·call4096, 4096)
779CALLFN(·call8192, 8192)
780CALLFN(·call16384, 16384)
781CALLFN(·call32768, 32768)
782CALLFN(·call65536, 65536)
783CALLFN(·call131072, 131072)
784CALLFN(·call262144, 262144)
785CALLFN(·call524288, 524288)
786CALLFN(·call1048576, 1048576)
787CALLFN(·call2097152, 2097152)
788CALLFN(·call4194304, 4194304)
789CALLFN(·call8388608, 8388608)
790CALLFN(·call16777216, 16777216)
791CALLFN(·call33554432, 33554432)
792CALLFN(·call67108864, 67108864)
793CALLFN(·call134217728, 134217728)
794CALLFN(·call268435456, 268435456)
795CALLFN(·call536870912, 536870912)
796CALLFN(·call1073741824, 1073741824)
797
798TEXT runtime·procyield(SB),NOSPLIT,$0-0
799 MOVL cycles+0(FP), AX
800again:
801 PAUSE
802 SUBL $1, AX
803 JNZ again
804 RET
805
806
807TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
808 // Stores are already ordered on x86, so this is just a
809 // compile barrier.
810 RET
811
812// Save state of caller into g->sched,
813// but using fake PC from systemstack_switch.
814// Must only be called from functions with frame pointer
815// and without locals ($0) or else unwinding from
816// systemstack_switch is incorrect.
817// Smashes R9.
818TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
819 // Take systemstack_switch PC and add 8 bytes to skip
820 // the prologue. The final location does not matter
821 // as long as we are between the prologue and the epilogue.
822 MOVQ $runtime·systemstack_switch+8(SB), R9
823 MOVQ R9, (g_sched+gobuf_pc)(R14)
824 LEAQ 8(SP), R9
825 MOVQ R9, (g_sched+gobuf_sp)(R14)
826 MOVQ $0, (g_sched+gobuf_ret)(R14)
827 MOVQ BP, (g_sched+gobuf_bp)(R14)
828 // Assert ctxt is zero. See func save.
829 MOVQ (g_sched+gobuf_ctxt)(R14), R9
830 TESTQ R9, R9
831 JZ 2(PC)
832 CALL runtime·abort(SB)
833 RET
834
835// func asmcgocall_no_g(fn, arg unsafe.Pointer)
836// Call fn(arg) aligned appropriately for the gcc ABI.
837// Called on a system stack, and there may be no g yet (during needm).
838TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
839 MOVQ fn+0(FP), AX
840 MOVQ arg+8(FP), BX
841 MOVQ SP, DX
842 ANDQ $~15, SP // alignment
843 MOVQ DX, 8(SP)
844 MOVQ BX, DI // DI = first argument in AMD64 ABI
845 MOVQ BX, CX // CX = first argument in Win64
846 CALL AX
847 MOVQ 8(SP), DX
848 MOVQ DX, SP
849 RET
850
851// asmcgocall_landingpad calls AX with BX as argument.
852// Must be called on the system stack.
853TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
854#ifdef GOOS_windows
855 // Make sure we have enough room for 4 stack-backed fast-call
856 // registers as per Windows amd64 calling convention.
857 ADJSP $32
858 // On Windows, asmcgocall_landingpad acts as landing pad for exceptions
859 // thrown in the cgo call. Exceptions that reach this function will be
860 // handled by runtime.sehtramp thanks to the SEH metadata added
861 // by the compiler.
862 // Note that runtime.sehtramp can't be attached directly to asmcgocall
863 // because its initial stack pointer can be outside the system stack bounds,
864 // and Windows stops the stack unwinding without calling the exception handler
865 // when it reaches that point.
866 MOVQ BX, CX // CX = first argument in Win64
867 CALL AX
868 // The exception handler is not called if the next instruction is part of
869 // the epilogue, which includes the RET instruction, so we need to add a NOP here.
870 BYTE $0x90
871 ADJSP $-32
872 RET
873#endif
874 // Tail call AX on non-Windows, as the extra stack frame is not needed.
875 MOVQ BX, DI // DI = first argument in AMD64 ABI
876 JMP AX
877
878// func asmcgocall(fn, arg unsafe.Pointer) int32
879// Call fn(arg) on the scheduler stack,
880// aligned appropriately for the gcc ABI.
881// See cgocall.go for more details.
882TEXT ·asmcgocall(SB),NOSPLIT,$0-20
883 MOVQ fn+0(FP), AX
884 MOVQ arg+8(FP), BX
885
886 MOVQ SP, DX
887
888 // Figure out if we need to switch to m->g0 stack.
889 // We get called to create new OS threads too, and those
890 // come in on the m->g0 stack already. Or we might already
891 // be on the m->gsignal stack.
892 get_tls(CX)
893 MOVQ g(CX), DI
894 CMPQ DI, $0
895 JEQ nosave
896 MOVQ g_m(DI), R8
897 MOVQ m_gsignal(R8), SI
898 CMPQ DI, SI
899 JEQ nosave
900 MOVQ m_g0(R8), SI
901 CMPQ DI, SI
902 JEQ nosave
903
904 // Switch to system stack.
905 // The original frame pointer is stored in BP,
906 // which is useful for stack unwinding.
907 CALL gosave_systemstack_switch<>(SB)
908 MOVQ SI, g(CX)
909 MOVQ (g_sched+gobuf_sp)(SI), SP
910
911 // Now on a scheduling stack (a pthread-created stack).
912 SUBQ $16, SP
913 ANDQ $~15, SP // alignment for gcc ABI
914 MOVQ DI, 8(SP) // save g
915 MOVQ (g_stack+stack_hi)(DI), DI
916 SUBQ DX, DI
917 MOVQ DI, 0(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
918 CALL runtime·asmcgocall_landingpad(SB)
919
920 // Restore registers, g, stack pointer.
921 get_tls(CX)
922 MOVQ 8(SP), DI
923 MOVQ (g_stack+stack_hi)(DI), SI
924 SUBQ 0(SP), SI
925 MOVQ DI, g(CX)
926 MOVQ SI, SP
927
928 MOVL AX, ret+16(FP)
929 RET
930
931nosave:
932 // Running on a system stack, perhaps even without a g.
933 // Having no g can happen during thread creation or thread teardown
934 // (see needm/dropm on Solaris, for example).
935 // This code is like the above sequence but without saving/restoring g
936 // and without worrying about the stack moving out from under us
937 // (because we're on a system stack, not a goroutine stack).
938 // The above code could be used directly if already on a system stack,
939 // but then the only path through this code would be a rare case on Solaris.
940 // Using this code for all "already on system stack" calls exercises it more,
941 // which should help keep it correct.
942 SUBQ $16, SP
943 ANDQ $~15, SP
944 MOVQ $0, 8(SP) // where above code stores g, in case someone looks during debugging
945 MOVQ DX, 0(SP) // save original stack pointer
946 CALL runtime·asmcgocall_landingpad(SB)
947 MOVQ 0(SP), SI // restore original stack pointer
948 MOVQ SI, SP
949 MOVL AX, ret+16(FP)
950 RET
951
952#ifdef GOOS_windows
953// Dummy TLS that's used on Windows so that we don't crash trying
954// to restore the G register in needm. needm and its callees are
955// very careful never to actually use the G, the TLS just can't be
956// unset since we're in Go code.
957GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
958#endif
959
960// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
961// See cgocall.go for more details.
962TEXT ·cgocallback(SB),NOSPLIT,$24-24
963 NO_LOCAL_POINTERS
964
965 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
966 // It is used to dropm while thread is exiting.
967 MOVQ fn+0(FP), AX
968 CMPQ AX, $0
969 JNE loadg
970 // Restore the g from frame.
971 get_tls(CX)
972 MOVQ frame+8(FP), BX
973 MOVQ BX, g(CX)
974 JMP dropm
975
976loadg:
977 // If g is nil, Go did not create the current thread,
978 // or if this thread never called into Go on pthread platforms.
979 // Call needm to obtain one m for temporary use.
980 // In this case, we're running on the thread stack, so there's
981 // lots of space, but the linker doesn't know. Hide the call from
982 // the linker analysis by using an indirect call through AX.
983 get_tls(CX)
984#ifdef GOOS_windows
985 MOVL $0, BX
986 CMPQ CX, $0
987 JEQ 2(PC)
988#endif
989 MOVQ g(CX), BX
990 CMPQ BX, $0
991 JEQ needm
992 MOVQ g_m(BX), BX
993 MOVQ BX, savedm-8(SP) // saved copy of oldm
994 JMP havem
995needm:
996#ifdef GOOS_windows
997 // Set up a dummy TLS value. needm is careful not to use it,
998 // but it needs to be there to prevent autogenerated code from
999 // crashing when it loads from it.
1000 // We don't need to clear it or anything later because needm
1001 // will set up TLS properly.
1002 MOVQ $zeroTLS<>(SB), DI
1003 CALL runtime·settls(SB)
1004#endif
1005 // On some platforms (Windows) we cannot call needm through
1006 // an ABI wrapper because there's no TLS set up, and the ABI
1007 // wrapper will try to restore the G register (R14) from TLS.
1008 // Clear X15 because Go expects it and we're not calling
1009 // through a wrapper, but otherwise avoid setting the G
1010 // register in the wrapper and call needm directly. It
1011 // takes no arguments and doesn't return any values so
1012 // there's no need to handle that. Clear R14 so that there's
1013 // a bad value in there, in case needm tries to use it.
1014 XORPS X15, X15
1015 XORQ R14, R14
1016 MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
1017 CALL AX
1018 MOVQ $0, savedm-8(SP)
1019 get_tls(CX)
1020 MOVQ g(CX), BX
1021 MOVQ g_m(BX), BX
1022
1023 // Set m->sched.sp = SP, so that if a panic happens
1024 // during the function we are about to execute, it will
1025 // have a valid SP to run on the g0 stack.
1026 // The next few lines (after the havem label)
1027 // will save this SP onto the stack and then write
1028 // the same SP back to m->sched.sp. That seems redundant,
1029 // but if an unrecovered panic happens, unwindm will
1030 // restore the g->sched.sp from the stack location
1031 // and then systemstack will try to use it. If we don't set it here,
1032 // that restored SP will be uninitialized (typically 0) and
1033 // will not be usable.
1034 MOVQ m_g0(BX), SI
1035 MOVQ SP, (g_sched+gobuf_sp)(SI)
1036
1037havem:
1038 // Now there's a valid m, and we're running on its m->g0.
1039 // Save current m->g0->sched.sp on stack and then set it to SP.
1040 // Save current sp in m->g0->sched.sp in preparation for
1041 // switch back to m->curg stack.
1042 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
1043 MOVQ m_g0(BX), SI
1044 MOVQ (g_sched+gobuf_sp)(SI), AX
1045 MOVQ AX, 0(SP)
1046 MOVQ SP, (g_sched+gobuf_sp)(SI)
1047
1048 // Switch to m->curg stack and call runtime.cgocallbackg.
1049 // Because we are taking over the execution of m->curg
1050 // but *not* resuming what had been running, we need to
1051 // save that information (m->curg->sched) so we can restore it.
1052 // We can restore m->curg->sched.sp easily, because calling
1053 // runtime.cgocallbackg leaves SP unchanged upon return.
1054 // To save m->curg->sched.pc, we push it onto the curg stack and
1055 // open a frame the same size as cgocallback's g0 frame.
1056 // Once we switch to the curg stack, the pushed PC will appear
1057 // to be the return PC of cgocallback, so that the traceback
1058 // will seamlessly trace back into the earlier calls.
1059 MOVQ m_curg(BX), SI
1060 MOVQ SI, g(CX)
1061 MOVQ (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
1062 MOVQ (g_sched+gobuf_pc)(SI), BX
1063 MOVQ BX, -8(DI) // "push" return PC on the g stack
1064 // Gather our arguments into registers.
1065 MOVQ fn+0(FP), BX
1066 MOVQ frame+8(FP), CX
1067 MOVQ ctxt+16(FP), DX
1068 // Compute the size of the frame, including return PC and, if
1069 // GOEXPERIMENT=framepointer, the saved base pointer
1070 LEAQ fn+0(FP), AX
1071 SUBQ SP, AX // AX is our actual frame size
1072 SUBQ AX, DI // Allocate the same frame size on the g stack
1073 MOVQ DI, SP
1074
1075 MOVQ BX, 0(SP)
1076 MOVQ CX, 8(SP)
1077 MOVQ DX, 16(SP)
1078 MOVQ $runtime·cgocallbackg(SB), AX
1079 CALL AX // indirect call to bypass nosplit check. We're on a different stack now.
1080
1081 // Compute the size of the frame again. FP and SP have
1082 // completely different values here than they did above,
1083 // but only their difference matters.
1084 LEAQ fn+0(FP), AX
1085 SUBQ SP, AX
1086
1087 // Restore g->sched (== m->curg->sched) from saved values.
1088 get_tls(CX)
1089 MOVQ g(CX), SI
1090 MOVQ SP, DI
1091 ADDQ AX, DI
1092 MOVQ -8(DI), BX
1093 MOVQ BX, (g_sched+gobuf_pc)(SI)
1094 MOVQ DI, (g_sched+gobuf_sp)(SI)
1095
1096 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1097 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1098 // so we do not have to restore it.)
1099 MOVQ g(CX), BX
1100 MOVQ g_m(BX), BX
1101 MOVQ m_g0(BX), SI
1102 MOVQ SI, g(CX)
1103 MOVQ (g_sched+gobuf_sp)(SI), SP
1104 MOVQ 0(SP), AX
1105 MOVQ AX, (g_sched+gobuf_sp)(SI)
1106
1107 // If the m on entry was nil, we called needm above to borrow an m,
1108 // 1. for the duration of the call on non-pthread platforms,
1109 // 2. or the duration of the C thread alive on pthread platforms.
1110 // If the m on entry wasn't nil,
1111 // 1. the thread might be a Go thread,
1112 // 2. or it wasn't the first call from a C thread on pthread platforms,
1113 // since then we skip dropm to reuse the m in the first call.
1114 MOVQ savedm-8(SP), BX
1115 CMPQ BX, $0
1116 JNE done
1117
1118 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1119 MOVQ _cgo_pthread_key_created(SB), AX
1120 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1121 CMPQ AX, $0
1122 JEQ dropm
1123 CMPQ (AX), $0
1124 JNE done
1125
1126dropm:
1127 MOVQ $runtime·dropm(SB), AX
1128 CALL AX
1129#ifdef GOOS_windows
1130 // We need to clear the TLS pointer in case the next
1131 // thread that comes into Go tries to reuse that space
1132 // but uses the same M.
1133 XORQ DI, DI
1134 CALL runtime·settls(SB)
1135#endif
1136done:
1137
1138 // Done!
1139 RET
1140
1141// func setg(gg *g)
1142// set g. for use by needm.
1143TEXT runtime·setg(SB), NOSPLIT, $0-8
1144 MOVQ gg+0(FP), BX
1145 get_tls(CX)
1146 MOVQ BX, g(CX)
1147 RET
1148
1149// void setg_gcc(G*); set g called from gcc.
1150TEXT setg_gcc<>(SB),NOSPLIT,$0
1151 get_tls(AX)
1152 MOVQ DI, g(AX)
1153 MOVQ DI, R14 // set the g register
1154 RET
1155
1156TEXT runtime·abort(SB),NOSPLIT,$0-0
1157 INT $3
1158loop:
1159 JMP loop
1160
1161// check that SP is in range [g->stack.lo, g->stack.hi)
1162TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
1163 get_tls(CX)
1164 MOVQ g(CX), AX
1165 CMPQ (g_stack+stack_hi)(AX), SP
1166 JHI 2(PC)
1167 CALL runtime·abort(SB)
1168 CMPQ SP, (g_stack+stack_lo)(AX)
1169 JHI 2(PC)
1170 CALL runtime·abort(SB)
1171 RET
1172
1173// func cputicks() int64
1174TEXT runtime·cputicks(SB),NOSPLIT,$0-0
1175 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
1176 JNE fences
1177 // Instruction stream serializing RDTSCP is supported.
1178 // RDTSCP is supported by Intel Nehalem (2008) and
1179 // AMD K8 Rev. F (2006) and newer.
1180 RDTSCP
1181done:
1182 SHLQ $32, DX
1183 ADDQ DX, AX
1184 MOVQ AX, ret+0(FP)
1185 RET
1186fences:
1187 // MFENCE is instruction stream serializing and flushes the
1188 // store buffers on AMD. The serialization semantics of LFENCE on AMD
1189 // are dependent on MSR C001_1029 and CPU generation.
1190 // LFENCE on Intel does wait for all previous instructions to have executed.
1191 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
1192 // previous instructions executed and all previous loads and stores to globally visible.
1193 // Using MFENCE;LFENCE here aligns the serializing properties without
1194 // runtime detection of CPU manufacturer.
1195 MFENCE
1196 LFENCE
1197 RDTSC
1198 JMP done
1199
1200// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
1201// hash function using AES hardware instructions
1202TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
1203 // AX = ptr to data
1204 // BX = seed
1205 // CX = size
1206 CMPB runtime·useAeshash(SB), $0
1207 JEQ noaes
1208 JMP aeshashbody<>(SB)
1209noaes:
1210 JMP runtime·memhashFallback<ABIInternal>(SB)
1211
1212// func strhash(p unsafe.Pointer, h uintptr) uintptr
1213TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
1214 // AX = ptr to string struct
1215 // BX = seed
1216 CMPB runtime·useAeshash(SB), $0
1217 JEQ noaes
1218 MOVQ 8(AX), CX // length of string
1219 MOVQ (AX), AX // string data
1220 JMP aeshashbody<>(SB)
1221noaes:
1222 JMP runtime·strhashFallback<ABIInternal>(SB)
1223
1224// AX: data
1225// BX: hash seed
1226// CX: length
1227// At return: AX = return value
1228TEXT aeshashbody<>(SB),NOSPLIT,$0-0
1229 // Fill an SSE register with our seeds.
1230 MOVQ BX, X0 // 64 bits of per-table hash seed
1231 PINSRW $4, CX, X0 // 16 bits of length
1232 PSHUFHW $0, X0, X0 // repeat length 4 times total
1233 MOVO X0, X1 // save unscrambled seed
1234 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
1235 AESENC X0, X0 // scramble seed
1236
1237 CMPQ CX, $16
1238 JB aes0to15
1239 JE aes16
1240 CMPQ CX, $32
1241 JBE aes17to32
1242 CMPQ CX, $64
1243 JBE aes33to64
1244 CMPQ CX, $128
1245 JBE aes65to128
1246 JMP aes129plus
1247
1248aes0to15:
1249 TESTQ CX, CX
1250 JE aes0
1251
1252 ADDQ $16, AX
1253 TESTW $0xff0, AX
1254 JE endofpage
1255
1256 // 16 bytes loaded at this address won't cross
1257 // a page boundary, so we can load it directly.
1258 MOVOU -16(AX), X1
1259 ADDQ CX, CX
1260 MOVQ $masks<>(SB), AX
1261 PAND (AX)(CX*8), X1
1262final1:
1263 PXOR X0, X1 // xor data with seed
1264 AESENC X1, X1 // scramble combo 3 times
1265 AESENC X1, X1
1266 AESENC X1, X1
1267 MOVQ X1, AX // return X1
1268 RET
1269
1270endofpage:
1271 // address ends in 1111xxxx. Might be up against
1272 // a page boundary, so load ending at last byte.
1273 // Then shift bytes down using pshufb.
1274 MOVOU -32(AX)(CX*1), X1
1275 ADDQ CX, CX
1276 MOVQ $shifts<>(SB), AX
1277 PSHUFB (AX)(CX*8), X1
1278 JMP final1
1279
1280aes0:
1281 // Return scrambled input seed
1282 AESENC X0, X0
1283 MOVQ X0, AX // return X0
1284 RET
1285
1286aes16:
1287 MOVOU (AX), X1
1288 JMP final1
1289
1290aes17to32:
1291 // make second starting seed
1292 PXOR runtime·aeskeysched+16(SB), X1
1293 AESENC X1, X1
1294
1295 // load data to be hashed
1296 MOVOU (AX), X2
1297 MOVOU -16(AX)(CX*1), X3
1298
1299 // xor with seed
1300 PXOR X0, X2
1301 PXOR X1, X3
1302
1303 // scramble 3 times
1304 AESENC X2, X2
1305 AESENC X3, X3
1306 AESENC X2, X2
1307 AESENC X3, X3
1308 AESENC X2, X2
1309 AESENC X3, X3
1310
1311 // combine results
1312 PXOR X3, X2
1313 MOVQ X2, AX // return X2
1314 RET
1315
1316aes33to64:
1317 // make 3 more starting seeds
1318 MOVO X1, X2
1319 MOVO X1, X3
1320 PXOR runtime·aeskeysched+16(SB), X1
1321 PXOR runtime·aeskeysched+32(SB), X2
1322 PXOR runtime·aeskeysched+48(SB), X3
1323 AESENC X1, X1
1324 AESENC X2, X2
1325 AESENC X3, X3
1326
1327 MOVOU (AX), X4
1328 MOVOU 16(AX), X5
1329 MOVOU -32(AX)(CX*1), X6
1330 MOVOU -16(AX)(CX*1), X7
1331
1332 PXOR X0, X4
1333 PXOR X1, X5
1334 PXOR X2, X6
1335 PXOR X3, X7
1336
1337 AESENC X4, X4
1338 AESENC X5, X5
1339 AESENC X6, X6
1340 AESENC X7, X7
1341
1342 AESENC X4, X4
1343 AESENC X5, X5
1344 AESENC X6, X6
1345 AESENC X7, X7
1346
1347 AESENC X4, X4
1348 AESENC X5, X5
1349 AESENC X6, X6
1350 AESENC X7, X7
1351
1352 PXOR X6, X4
1353 PXOR X7, X5
1354 PXOR X5, X4
1355 MOVQ X4, AX // return X4
1356 RET
1357
1358aes65to128:
1359 // make 7 more starting seeds
1360 MOVO X1, X2
1361 MOVO X1, X3
1362 MOVO X1, X4
1363 MOVO X1, X5
1364 MOVO X1, X6
1365 MOVO X1, X7
1366 PXOR runtime·aeskeysched+16(SB), X1
1367 PXOR runtime·aeskeysched+32(SB), X2
1368 PXOR runtime·aeskeysched+48(SB), X3
1369 PXOR runtime·aeskeysched+64(SB), X4
1370 PXOR runtime·aeskeysched+80(SB), X5
1371 PXOR runtime·aeskeysched+96(SB), X6
1372 PXOR runtime·aeskeysched+112(SB), X7
1373 AESENC X1, X1
1374 AESENC X2, X2
1375 AESENC X3, X3
1376 AESENC X4, X4
1377 AESENC X5, X5
1378 AESENC X6, X6
1379 AESENC X7, X7
1380
1381 // load data
1382 MOVOU (AX), X8
1383 MOVOU 16(AX), X9
1384 MOVOU 32(AX), X10
1385 MOVOU 48(AX), X11
1386 MOVOU -64(AX)(CX*1), X12
1387 MOVOU -48(AX)(CX*1), X13
1388 MOVOU -32(AX)(CX*1), X14
1389 MOVOU -16(AX)(CX*1), X15
1390
1391 // xor with seed
1392 PXOR X0, X8
1393 PXOR X1, X9
1394 PXOR X2, X10
1395 PXOR X3, X11
1396 PXOR X4, X12
1397 PXOR X5, X13
1398 PXOR X6, X14
1399 PXOR X7, X15
1400
1401 // scramble 3 times
1402 AESENC X8, X8
1403 AESENC X9, X9
1404 AESENC X10, X10
1405 AESENC X11, X11
1406 AESENC X12, X12
1407 AESENC X13, X13
1408 AESENC X14, X14
1409 AESENC X15, X15
1410
1411 AESENC X8, X8
1412 AESENC X9, X9
1413 AESENC X10, X10
1414 AESENC X11, X11
1415 AESENC X12, X12
1416 AESENC X13, X13
1417 AESENC X14, X14
1418 AESENC X15, X15
1419
1420 AESENC X8, X8
1421 AESENC X9, X9
1422 AESENC X10, X10
1423 AESENC X11, X11
1424 AESENC X12, X12
1425 AESENC X13, X13
1426 AESENC X14, X14
1427 AESENC X15, X15
1428
1429 // combine results
1430 PXOR X12, X8
1431 PXOR X13, X9
1432 PXOR X14, X10
1433 PXOR X15, X11
1434 PXOR X10, X8
1435 PXOR X11, X9
1436 PXOR X9, X8
1437 // X15 must be zero on return
1438 PXOR X15, X15
1439 MOVQ X8, AX // return X8
1440 RET
1441
1442aes129plus:
1443 // make 7 more starting seeds
1444 MOVO X1, X2
1445 MOVO X1, X3
1446 MOVO X1, X4
1447 MOVO X1, X5
1448 MOVO X1, X6
1449 MOVO X1, X7
1450 PXOR runtime·aeskeysched+16(SB), X1
1451 PXOR runtime·aeskeysched+32(SB), X2
1452 PXOR runtime·aeskeysched+48(SB), X3
1453 PXOR runtime·aeskeysched+64(SB), X4
1454 PXOR runtime·aeskeysched+80(SB), X5
1455 PXOR runtime·aeskeysched+96(SB), X6
1456 PXOR runtime·aeskeysched+112(SB), X7
1457 AESENC X1, X1
1458 AESENC X2, X2
1459 AESENC X3, X3
1460 AESENC X4, X4
1461 AESENC X5, X5
1462 AESENC X6, X6
1463 AESENC X7, X7
1464
1465 // start with last (possibly overlapping) block
1466 MOVOU -128(AX)(CX*1), X8
1467 MOVOU -112(AX)(CX*1), X9
1468 MOVOU -96(AX)(CX*1), X10
1469 MOVOU -80(AX)(CX*1), X11
1470 MOVOU -64(AX)(CX*1), X12
1471 MOVOU -48(AX)(CX*1), X13
1472 MOVOU -32(AX)(CX*1), X14
1473 MOVOU -16(AX)(CX*1), X15
1474
1475 // xor in seed
1476 PXOR X0, X8
1477 PXOR X1, X9
1478 PXOR X2, X10
1479 PXOR X3, X11
1480 PXOR X4, X12
1481 PXOR X5, X13
1482 PXOR X6, X14
1483 PXOR X7, X15
1484
1485 // compute number of remaining 128-byte blocks
1486 DECQ CX
1487 SHRQ $7, CX
1488
1489 PCALIGN $16
1490aesloop:
1491 // scramble state
1492 AESENC X8, X8
1493 AESENC X9, X9
1494 AESENC X10, X10
1495 AESENC X11, X11
1496 AESENC X12, X12
1497 AESENC X13, X13
1498 AESENC X14, X14
1499 AESENC X15, X15
1500
1501 // scramble state, xor in a block
1502 MOVOU (AX), X0
1503 MOVOU 16(AX), X1
1504 MOVOU 32(AX), X2
1505 MOVOU 48(AX), X3
1506 AESENC X0, X8
1507 AESENC X1, X9
1508 AESENC X2, X10
1509 AESENC X3, X11
1510 MOVOU 64(AX), X4
1511 MOVOU 80(AX), X5
1512 MOVOU 96(AX), X6
1513 MOVOU 112(AX), X7
1514 AESENC X4, X12
1515 AESENC X5, X13
1516 AESENC X6, X14
1517 AESENC X7, X15
1518
1519 ADDQ $128, AX
1520 DECQ CX
1521 JNE aesloop
1522
1523 // 3 more scrambles to finish
1524 AESENC X8, X8
1525 AESENC X9, X9
1526 AESENC X10, X10
1527 AESENC X11, X11
1528 AESENC X12, X12
1529 AESENC X13, X13
1530 AESENC X14, X14
1531 AESENC X15, X15
1532 AESENC X8, X8
1533 AESENC X9, X9
1534 AESENC X10, X10
1535 AESENC X11, X11
1536 AESENC X12, X12
1537 AESENC X13, X13
1538 AESENC X14, X14
1539 AESENC X15, X15
1540 AESENC X8, X8
1541 AESENC X9, X9
1542 AESENC X10, X10
1543 AESENC X11, X11
1544 AESENC X12, X12
1545 AESENC X13, X13
1546 AESENC X14, X14
1547 AESENC X15, X15
1548
1549 PXOR X12, X8
1550 PXOR X13, X9
1551 PXOR X14, X10
1552 PXOR X15, X11
1553 PXOR X10, X8
1554 PXOR X11, X9
1555 PXOR X9, X8
1556 // X15 must be zero on return
1557 PXOR X15, X15
1558 MOVQ X8, AX // return X8
1559 RET
1560
1561// func memhash32(p unsafe.Pointer, h uintptr) uintptr
1562// ABIInternal for performance.
1563TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
1564 // AX = ptr to data
1565 // BX = seed
1566 CMPB runtime·useAeshash(SB), $0
1567 JEQ noaes
1568 MOVQ BX, X0 // X0 = seed
1569 PINSRD $2, (AX), X0 // data
1570 AESENC runtime·aeskeysched+0(SB), X0
1571 AESENC runtime·aeskeysched+16(SB), X0
1572 AESENC runtime·aeskeysched+32(SB), X0
1573 MOVQ X0, AX // return X0
1574 RET
1575noaes:
1576 JMP runtime·memhash32Fallback<ABIInternal>(SB)
1577
1578// func memhash64(p unsafe.Pointer, h uintptr) uintptr
1579// ABIInternal for performance.
1580TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
1581 // AX = ptr to data
1582 // BX = seed
1583 CMPB runtime·useAeshash(SB), $0
1584 JEQ noaes
1585 MOVQ BX, X0 // X0 = seed
1586 PINSRQ $1, (AX), X0 // data
1587 AESENC runtime·aeskeysched+0(SB), X0
1588 AESENC runtime·aeskeysched+16(SB), X0
1589 AESENC runtime·aeskeysched+32(SB), X0
1590 MOVQ X0, AX // return X0
1591 RET
1592noaes:
1593 JMP runtime·memhash64Fallback<ABIInternal>(SB)
1594
1595// simple mask to get rid of data in the high part of the register.
1596DATA masks<>+0x00(SB)/8, $0x0000000000000000
1597DATA masks<>+0x08(SB)/8, $0x0000000000000000
1598DATA masks<>+0x10(SB)/8, $0x00000000000000ff
1599DATA masks<>+0x18(SB)/8, $0x0000000000000000
1600DATA masks<>+0x20(SB)/8, $0x000000000000ffff
1601DATA masks<>+0x28(SB)/8, $0x0000000000000000
1602DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
1603DATA masks<>+0x38(SB)/8, $0x0000000000000000
1604DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
1605DATA masks<>+0x48(SB)/8, $0x0000000000000000
1606DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
1607DATA masks<>+0x58(SB)/8, $0x0000000000000000
1608DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
1609DATA masks<>+0x68(SB)/8, $0x0000000000000000
1610DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
1611DATA masks<>+0x78(SB)/8, $0x0000000000000000
1612DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
1613DATA masks<>+0x88(SB)/8, $0x0000000000000000
1614DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
1615DATA masks<>+0x98(SB)/8, $0x00000000000000ff
1616DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
1617DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
1618DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
1619DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
1620DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
1621DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
1622DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
1623DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
1624DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
1625DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
1626DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
1627DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
1628GLOBL masks<>(SB),RODATA,$256
1629
1630// func checkASM() bool
1631TEXT ·checkASM(SB),NOSPLIT,$0-1
1632 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1633 MOVQ $masks<>(SB), AX
1634 MOVQ $shifts<>(SB), BX
1635 ORQ BX, AX
1636 TESTQ $15, AX
1637 SETEQ ret+0(FP)
1638 RET
1639
1640// these are arguments to pshufb. They move data down from
1641// the high bytes of the register to the low bytes of the register.
1642// index is how many bytes to move.
1643DATA shifts<>+0x00(SB)/8, $0x0000000000000000
1644DATA shifts<>+0x08(SB)/8, $0x0000000000000000
1645DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
1646DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
1647DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
1648DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
1649DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
1650DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
1651DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
1652DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
1653DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
1654DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
1655DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
1656DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
1657DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
1658DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
1659DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
1660DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
1661DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
1662DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
1663DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
1664DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
1665DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
1666DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
1667DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
1668DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
1669DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
1670DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
1671DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
1672DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
1673DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
1674DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
1675GLOBL shifts<>(SB),RODATA,$256
1676
1677TEXT runtime·return0(SB), NOSPLIT, $0
1678 MOVL $0, AX
1679 RET
1680
1681
1682// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1683// Must obey the gcc calling convention.
1684TEXT _cgo_topofstack(SB),NOSPLIT,$0
1685 get_tls(CX)
1686 MOVQ g(CX), AX
1687 MOVQ g_m(AX), AX
1688 MOVQ m_curg(AX), AX
1689 MOVQ (g_stack+stack_hi)(AX), AX
1690 RET
1691
1692// The top-most function running on a goroutine
1693// returns to goexit+PCQuantum.
1694TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
1695 BYTE $0x90 // NOP
1696 CALL runtime·goexit1(SB) // does not return
1697 // traceback from goexit1 must hit code range of goexit
1698 BYTE $0x90 // NOP
1699
1700// This is called from .init_array and follows the platform, not Go, ABI.
1701TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1702 PUSHQ R15 // The access to global variables below implicitly uses R15, which is callee-save
1703 MOVQ runtime·lastmoduledatap(SB), AX
1704 MOVQ DI, moduledata_next(AX)
1705 MOVQ DI, runtime·lastmoduledatap(SB)
1706 POPQ R15
1707 RET
1708
1709// Initialize special registers then jump to sigpanic.
1710// This function is injected from the signal handler for panicking
1711// signals. It is quite painful to set X15 in the signal context,
1712// so we do it here.
1713TEXT ·sigpanic0(SB),NOSPLIT,$0-0
1714 get_tls(R14)
1715 MOVQ g(R14), R14
1716#ifndef GOOS_plan9
1717 XORPS X15, X15
1718#endif
1719 JMP ·sigpanic<ABIInternal>(SB)
1720
1721// gcWriteBarrier informs the GC about heap pointer writes.
1722//
1723// gcWriteBarrier returns space in a write barrier buffer which
1724// should be filled in by the caller.
1725// gcWriteBarrier does NOT follow the Go ABI. It accepts the
1726// number of bytes of buffer needed in R11, and returns a pointer
1727// to the buffer space in R11.
1728// It clobbers FLAGS. It does not clobber any general-purpose registers,
1729// but may clobber others (e.g., SSE registers).
1730// Typical use would be, when doing *(CX+88) = AX
1731// CMPL $0, runtime.writeBarrier(SB)
1732// JEQ dowrite
1733// CALL runtime.gcBatchBarrier2(SB)
1734// MOVQ AX, (R11)
1735// MOVQ 88(CX), DX
1736// MOVQ DX, 8(R11)
1737// dowrite:
1738// MOVQ AX, 88(CX)
1739TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
1740 // Save the registers clobbered by the fast path. This is slightly
1741 // faster than having the caller spill these.
1742 MOVQ R12, 96(SP)
1743 MOVQ R13, 104(SP)
1744retry:
1745 // TODO: Consider passing g.m.p in as an argument so they can be shared
1746 // across a sequence of write barriers.
1747 MOVQ g_m(R14), R13
1748 MOVQ m_p(R13), R13
1749 // Get current buffer write position.
1750 MOVQ (p_wbBuf+wbBuf_next)(R13), R12 // original next position
1751 ADDQ R11, R12 // new next position
1752 // Is the buffer full?
1753 CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
1754 JA flush
1755 // Commit to the larger buffer.
1756 MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
1757 // Make return value (the original next position)
1758 SUBQ R11, R12
1759 MOVQ R12, R11
1760 // Restore registers.
1761 MOVQ 96(SP), R12
1762 MOVQ 104(SP), R13
1763 RET
1764
1765flush:
1766 // Save all general purpose registers since these could be
1767 // clobbered by wbBufFlush and were not saved by the caller.
1768 // It is possible for wbBufFlush to clobber other registers
1769 // (e.g., SSE registers), but the compiler takes care of saving
1770 // those in the caller if necessary. This strikes a balance
1771 // with registers that are likely to be used.
1772 //
1773 // We don't have type information for these, but all code under
1774 // here is NOSPLIT, so nothing will observe these.
1775 //
1776 // TODO: We could strike a different balance; e.g., saving X0
1777 // and not saving GP registers that are less likely to be used.
1778 MOVQ DI, 0(SP)
1779 MOVQ AX, 8(SP)
1780 MOVQ BX, 16(SP)
1781 MOVQ CX, 24(SP)
1782 MOVQ DX, 32(SP)
1783 // DI already saved
1784 MOVQ SI, 40(SP)
1785 MOVQ BP, 48(SP)
1786 MOVQ R8, 56(SP)
1787 MOVQ R9, 64(SP)
1788 MOVQ R10, 72(SP)
1789 MOVQ R11, 80(SP)
1790 // R12 already saved
1791 // R13 already saved
1792 // R14 is g
1793 MOVQ R15, 88(SP)
1794
1795 CALL runtime·wbBufFlush(SB)
1796
1797 MOVQ 0(SP), DI
1798 MOVQ 8(SP), AX
1799 MOVQ 16(SP), BX
1800 MOVQ 24(SP), CX
1801 MOVQ 32(SP), DX
1802 MOVQ 40(SP), SI
1803 MOVQ 48(SP), BP
1804 MOVQ 56(SP), R8
1805 MOVQ 64(SP), R9
1806 MOVQ 72(SP), R10
1807 MOVQ 80(SP), R11
1808 MOVQ 88(SP), R15
1809 JMP retry
1810
1811TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1812 MOVL $8, R11
1813 JMP gcWriteBarrier<>(SB)
1814TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1815 MOVL $16, R11
1816 JMP gcWriteBarrier<>(SB)
1817TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1818 MOVL $24, R11
1819 JMP gcWriteBarrier<>(SB)
1820TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1821 MOVL $32, R11
1822 JMP gcWriteBarrier<>(SB)
1823TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1824 MOVL $40, R11
1825 JMP gcWriteBarrier<>(SB)
1826TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1827 MOVL $48, R11
1828 JMP gcWriteBarrier<>(SB)
1829TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1830 MOVL $56, R11
1831 JMP gcWriteBarrier<>(SB)
1832TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1833 MOVL $64, R11
1834 JMP gcWriteBarrier<>(SB)
1835
1836DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1837GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1838
1839// debugCallV2 is the entry point for debugger-injected function
1840// calls on running goroutines. It informs the runtime that a
1841// debug call has been injected and creates a call frame for the
1842// debugger to fill in.
1843//
1844// To inject a function call, a debugger should:
1845// 1. Check that the goroutine is in state _Grunning and that
1846// there are at least 256 bytes free on the stack.
1847// 2. Push the current PC on the stack (updating SP).
1848// 3. Write the desired argument frame size at SP-16 (using the SP
1849// after step 2).
1850// 4. Save all machine registers (including flags and XMM registers)
1851// so they can be restored later by the debugger.
1852// 5. Set the PC to debugCallV2 and resume execution.
1853//
1854// If the goroutine is in state _Grunnable, then it's not generally
1855// safe to inject a call because it may return out via other runtime
1856// operations. Instead, the debugger should unwind the stack to find
1857// the return to non-runtime code, add a temporary breakpoint there,
1858// and inject the call once that breakpoint is hit.
1859//
1860// If the goroutine is in any other state, it's not safe to inject a call.
1861//
1862// This function communicates back to the debugger by setting R12 and
1863// invoking INT3 to raise a breakpoint signal. See the comments in the
1864// implementation for the protocol the debugger is expected to
1865// follow. InjectDebugCall in the runtime tests demonstrates this protocol.
1866//
1867// The debugger must ensure that any pointers passed to the function
1868// obey escape analysis requirements. Specifically, it must not pass
1869// a stack pointer to an escaping argument. debugCallV2 cannot check
1870// this invariant.
1871//
1872// This is ABIInternal because Go code injects its PC directly into new
1873// goroutine stacks.
1874TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
1875 // Save all registers that may contain pointers so they can be
1876 // conservatively scanned.
1877 //
1878 // We can't do anything that might clobber any of these
1879 // registers before this.
1880 MOVQ R15, r15-(14*8+8)(SP)
1881 MOVQ R14, r14-(13*8+8)(SP)
1882 MOVQ R13, r13-(12*8+8)(SP)
1883 MOVQ R12, r12-(11*8+8)(SP)
1884 MOVQ R11, r11-(10*8+8)(SP)
1885 MOVQ R10, r10-(9*8+8)(SP)
1886 MOVQ R9, r9-(8*8+8)(SP)
1887 MOVQ R8, r8-(7*8+8)(SP)
1888 MOVQ DI, di-(6*8+8)(SP)
1889 MOVQ SI, si-(5*8+8)(SP)
1890 MOVQ BP, bp-(4*8+8)(SP)
1891 MOVQ BX, bx-(3*8+8)(SP)
1892 MOVQ DX, dx-(2*8+8)(SP)
1893 // Save the frame size before we clobber it. Either of the last
1894 // saves could clobber this depending on whether there's a saved BP.
1895 MOVQ frameSize-24(FP), DX // aka -16(RSP) before prologue
1896 MOVQ CX, cx-(1*8+8)(SP)
1897 MOVQ AX, ax-(0*8+8)(SP)
1898
1899 // Save the argument frame size.
1900 MOVQ DX, frameSize-128(SP)
1901
1902 // Perform a safe-point check.
1903 MOVQ retpc-8(FP), AX // Caller's PC
1904 MOVQ AX, 0(SP)
1905 CALL runtime·debugCallCheck(SB)
1906 MOVQ 8(SP), AX
1907 TESTQ AX, AX
1908 JZ good
1909 // The safety check failed. Put the reason string at the top
1910 // of the stack.
1911 MOVQ AX, 0(SP)
1912 MOVQ 16(SP), AX
1913 MOVQ AX, 8(SP)
1914 // Set R12 to 8 and invoke INT3. The debugger should get the
1915 // reason a call can't be injected from the top of the stack
1916 // and resume execution.
1917 MOVQ $8, R12
1918 BYTE $0xcc
1919 JMP restore
1920
1921good:
1922 // Registers are saved and it's safe to make a call.
1923 // Open up a call frame, moving the stack if necessary.
1924 //
1925 // Once the frame is allocated, this will set R12 to 0 and
1926 // invoke INT3. The debugger should write the argument
1927 // frame for the call at SP, set up argument registers, push
1928 // the trapping PC on the stack, set the PC to the function to
1929 // call, set RDX to point to the closure (if a closure call),
1930 // and resume execution.
1931 //
1932 // If the function returns, this will set R12 to 1 and invoke
1933 // INT3. The debugger can then inspect any return value saved
1934 // on the stack at SP and in registers and resume execution again.
1935 //
1936 // If the function panics, this will set R12 to 2 and invoke INT3.
1937 // The interface{} value of the panic will be at SP. The debugger
1938 // can inspect the panic value and resume execution again.
1939#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1940 CMPQ AX, $MAXSIZE; \
1941 JA 5(PC); \
1942 MOVQ $NAME(SB), AX; \
1943 MOVQ AX, 0(SP); \
1944 CALL runtime·debugCallWrap(SB); \
1945 JMP restore
1946
1947 MOVQ frameSize-128(SP), AX
1948 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1949 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1950 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1951 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1952 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1953 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1954 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1955 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1956 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1957 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1958 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1959 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1960 // The frame size is too large. Report the error.
1961 MOVQ $debugCallFrameTooLarge<>(SB), AX
1962 MOVQ AX, 0(SP)
1963 MOVQ $20, 8(SP) // length of debugCallFrameTooLarge string
1964 MOVQ $8, R12
1965 BYTE $0xcc
1966 JMP restore
1967
1968restore:
1969 // Calls and failures resume here.
1970 //
1971 // Set R12 to 16 and invoke INT3. The debugger should restore
1972 // all registers except RIP and RSP and resume execution.
1973 MOVQ $16, R12
1974 BYTE $0xcc
1975 // We must not modify flags after this point.
1976
1977 // Restore pointer-containing registers, which may have been
1978 // modified from the debugger's copy by stack copying.
1979 MOVQ ax-(0*8+8)(SP), AX
1980 MOVQ cx-(1*8+8)(SP), CX
1981 MOVQ dx-(2*8+8)(SP), DX
1982 MOVQ bx-(3*8+8)(SP), BX
1983 MOVQ bp-(4*8+8)(SP), BP
1984 MOVQ si-(5*8+8)(SP), SI
1985 MOVQ di-(6*8+8)(SP), DI
1986 MOVQ r8-(7*8+8)(SP), R8
1987 MOVQ r9-(8*8+8)(SP), R9
1988 MOVQ r10-(9*8+8)(SP), R10
1989 MOVQ r11-(10*8+8)(SP), R11
1990 MOVQ r12-(11*8+8)(SP), R12
1991 MOVQ r13-(12*8+8)(SP), R13
1992 MOVQ r14-(13*8+8)(SP), R14
1993 MOVQ r15-(14*8+8)(SP), R15
1994
1995 RET
1996
1997// runtime.debugCallCheck assumes that functions defined with the
1998// DEBUG_CALL_FN macro are safe points to inject calls.
1999#define DEBUG_CALL_FN(NAME,MAXSIZE) \
2000TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
2001 NO_LOCAL_POINTERS; \
2002 MOVQ $0, R12; \
2003 BYTE $0xcc; \
2004 MOVQ $1, R12; \
2005 BYTE $0xcc; \
2006 RET
2007DEBUG_CALL_FN(debugCall32<>, 32)
2008DEBUG_CALL_FN(debugCall64<>, 64)
2009DEBUG_CALL_FN(debugCall128<>, 128)
2010DEBUG_CALL_FN(debugCall256<>, 256)
2011DEBUG_CALL_FN(debugCall512<>, 512)
2012DEBUG_CALL_FN(debugCall1024<>, 1024)
2013DEBUG_CALL_FN(debugCall2048<>, 2048)
2014DEBUG_CALL_FN(debugCall4096<>, 4096)
2015DEBUG_CALL_FN(debugCall8192<>, 8192)
2016DEBUG_CALL_FN(debugCall16384<>, 16384)
2017DEBUG_CALL_FN(debugCall32768<>, 32768)
2018DEBUG_CALL_FN(debugCall65536<>, 65536)
2019
2020// func debugCallPanicked(val interface{})
2021TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
2022 // Copy the panic value to the top of stack.
2023 MOVQ val_type+0(FP), AX
2024 MOVQ AX, 0(SP)
2025 MOVQ val_data+8(FP), AX
2026 MOVQ AX, 8(SP)
2027 MOVQ $2, R12
2028 BYTE $0xcc
2029 RET
2030
2031// Note: these functions use a special calling convention to save generated code space.
2032// Arguments are passed in registers, but the space for those arguments are allocated
2033// in the caller's stack frame. These stubs write the args into that stack space and
2034// then tail call to the corresponding runtime handler.
2035// The tail call makes these stubs disappear in backtraces.
2036// Defined as ABIInternal since they do not use the stack-based Go ABI.
2037TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
2038 MOVQ CX, BX
2039 JMP runtime·goPanicIndex<ABIInternal>(SB)
2040TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
2041 MOVQ CX, BX
2042 JMP runtime·goPanicIndexU<ABIInternal>(SB)
2043TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
2044 MOVQ CX, AX
2045 MOVQ DX, BX
2046 JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
2047TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
2048 MOVQ CX, AX
2049 MOVQ DX, BX
2050 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
2051TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
2052 MOVQ CX, AX
2053 MOVQ DX, BX
2054 JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
2055TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
2056 MOVQ CX, AX
2057 MOVQ DX, BX
2058 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
2059TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
2060 MOVQ CX, BX
2061 JMP runtime·goPanicSliceB<ABIInternal>(SB)
2062TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
2063 MOVQ CX, BX
2064 JMP runtime·goPanicSliceBU<ABIInternal>(SB)
2065TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
2066 MOVQ DX, AX
2067 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
2068TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
2069 MOVQ DX, AX
2070 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
2071TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
2072 MOVQ DX, AX
2073 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
2074TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
2075 MOVQ DX, AX
2076 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
2077TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
2078 MOVQ CX, AX
2079 MOVQ DX, BX
2080 JMP runtime·goPanicSlice3B<ABIInternal>(SB)
2081TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
2082 MOVQ CX, AX
2083 MOVQ DX, BX
2084 JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
2085TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
2086 MOVQ CX, BX
2087 JMP runtime·goPanicSlice3C<ABIInternal>(SB)
2088TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
2089 MOVQ CX, BX
2090 JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
2091TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
2092 MOVQ DX, AX
2093 JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
2094
2095#ifdef GOOS_android
2096// Use the free TLS_SLOT_APP slot #2 on Android Q.
2097// Earlier androids are set up in gcc_android.c.
2098DATA runtime·tls_g+0(SB)/8, $16
2099GLOBL runtime·tls_g+0(SB), NOPTR, $8
2100#endif
2101#ifdef GOOS_windows
2102GLOBL runtime·tls_g+0(SB), NOPTR, $8
2103#endif
2104
2105// The compiler and assembler's -spectre=ret mode rewrites
2106// all indirect CALL AX / JMP AX instructions to be
2107// CALL retpolineAX / JMP retpolineAX.
2108// See https://support.google.com/faqs/answer/7625886.
2109#define RETPOLINE(reg) \
2110 /* CALL setup */ BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0; \
2111 /* nospec: */ \
2112 /* PAUSE */ BYTE $0xF3; BYTE $0x90; \
2113 /* JMP nospec */ BYTE $0xEB; BYTE $-(2+2); \
2114 /* setup: */ \
2115 /* MOVQ AX, 0(SP) */ BYTE $0x48|((reg&8)>>1); BYTE $0x89; \
2116 BYTE $0x04|((reg&7)<<3); BYTE $0x24; \
2117 /* RET */ BYTE $0xC3
2118
2119TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
2120TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
2121TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
2122TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
2123/* SP is 4, can't happen / magic encodings */
2124TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
2125TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
2126TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
2127TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
2128TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
2129TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
2130TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
2131TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
2132TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
2133TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
2134TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
2135
2136TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
2137 MOVQ BP, AX
2138 RET
View as plain text