Text file
src/runtime/asm_386.s
Documentation: runtime
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "go_tls.h"
7#include "funcdata.h"
8#include "textflag.h"
9
10// _rt0_386 is common startup code for most 386 systems when using
11// internal linking. This is the entry point for the program from the
12// kernel for an ordinary -buildmode=exe program. The stack holds the
13// number of arguments and the C-style argv.
14TEXT _rt0_386(SB),NOSPLIT,$8
15 MOVL 8(SP), AX // argc
16 LEAL 12(SP), BX // argv
17 MOVL AX, 0(SP)
18 MOVL BX, 4(SP)
19 JMP runtime·rt0_go(SB)
20
21// _rt0_386_lib is common startup code for most 386 systems when
22// using -buildmode=c-archive or -buildmode=c-shared. The linker will
23// arrange to invoke this function as a global constructor (for
24// c-archive) or when the shared library is loaded (for c-shared).
25// We expect argc and argv to be passed on the stack following the
26// usual C ABI.
27TEXT _rt0_386_lib(SB),NOSPLIT,$0
28 PUSHL BP
29 MOVL SP, BP
30 PUSHL BX
31 PUSHL SI
32 PUSHL DI
33
34 MOVL 8(BP), AX
35 MOVL AX, _rt0_386_lib_argc<>(SB)
36 MOVL 12(BP), AX
37 MOVL AX, _rt0_386_lib_argv<>(SB)
38
39 // Synchronous initialization.
40 CALL runtime·libpreinit(SB)
41
42 SUBL $8, SP
43
44 // Create a new thread to do the runtime initialization.
45 MOVL _cgo_sys_thread_create(SB), AX
46 TESTL AX, AX
47 JZ nocgo
48
49 // Align stack to call C function.
50 // We moved SP to BP above, but BP was clobbered by the libpreinit call.
51 MOVL SP, BP
52 ANDL $~15, SP
53
54 MOVL $_rt0_386_lib_go(SB), BX
55 MOVL BX, 0(SP)
56 MOVL $0, 4(SP)
57
58 CALL AX
59
60 MOVL BP, SP
61
62 JMP restore
63
64nocgo:
65 MOVL $0x800000, 0(SP) // stacksize = 8192KB
66 MOVL $_rt0_386_lib_go(SB), AX
67 MOVL AX, 4(SP) // fn
68 CALL runtime·newosproc0(SB)
69
70restore:
71 ADDL $8, SP
72 POPL DI
73 POPL SI
74 POPL BX
75 POPL BP
76 RET
77
78// _rt0_386_lib_go initializes the Go runtime.
79// This is started in a separate thread by _rt0_386_lib.
80TEXT _rt0_386_lib_go(SB),NOSPLIT,$8
81 MOVL _rt0_386_lib_argc<>(SB), AX
82 MOVL AX, 0(SP)
83 MOVL _rt0_386_lib_argv<>(SB), AX
84 MOVL AX, 4(SP)
85 JMP runtime·rt0_go(SB)
86
87DATA _rt0_386_lib_argc<>(SB)/4, $0
88GLOBL _rt0_386_lib_argc<>(SB),NOPTR, $4
89DATA _rt0_386_lib_argv<>(SB)/4, $0
90GLOBL _rt0_386_lib_argv<>(SB),NOPTR, $4
91
92TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
93 // Copy arguments forward on an even stack.
94 // Users of this function jump to it, they don't call it.
95 MOVL 0(SP), AX
96 MOVL 4(SP), BX
97 SUBL $128, SP // plenty of scratch
98 ANDL $~15, SP
99 MOVL AX, 120(SP) // save argc, argv away
100 MOVL BX, 124(SP)
101
102 // set default stack bounds.
103 // _cgo_init may update stackguard.
104 MOVL $runtime·g0(SB), BP
105 LEAL (-64*1024+104)(SP), BX
106 MOVL BX, g_stackguard0(BP)
107 MOVL BX, g_stackguard1(BP)
108 MOVL BX, (g_stack+stack_lo)(BP)
109 MOVL SP, (g_stack+stack_hi)(BP)
110
111 // find out information about the processor we're on
112 // first see if CPUID instruction is supported.
113 PUSHFL
114 PUSHFL
115 XORL $(1<<21), 0(SP) // flip ID bit
116 POPFL
117 PUSHFL
118 POPL AX
119 XORL 0(SP), AX
120 POPFL // restore EFLAGS
121 TESTL $(1<<21), AX
122 JNE has_cpuid
123
124bad_proc: // show that the program requires MMX.
125 MOVL $2, 0(SP)
126 MOVL $bad_proc_msg<>(SB), 4(SP)
127 MOVL $0x3d, 8(SP)
128 CALL runtime·write(SB)
129 MOVL $1, 0(SP)
130 CALL runtime·exit(SB)
131 CALL runtime·abort(SB)
132
133has_cpuid:
134 MOVL $0, AX
135 CPUID
136 MOVL AX, SI
137 CMPL AX, $0
138 JE nocpuinfo
139
140 CMPL BX, $0x756E6547 // "Genu"
141 JNE notintel
142 CMPL DX, $0x49656E69 // "ineI"
143 JNE notintel
144 CMPL CX, $0x6C65746E // "ntel"
145 JNE notintel
146 MOVB $1, runtime·isIntel(SB)
147notintel:
148
149 // Load EAX=1 cpuid flags
150 MOVL $1, AX
151 CPUID
152 MOVL CX, DI // Move to global variable clobbers CX when generating PIC
153 MOVL AX, runtime·processorVersionInfo(SB)
154
155 // Check for MMX support
156 TESTL $(1<<23), DX // MMX
157 JZ bad_proc
158
159nocpuinfo:
160 // if there is an _cgo_init, call it to let it
161 // initialize and to set up GS. if not,
162 // we set up GS ourselves.
163 MOVL _cgo_init(SB), AX
164 TESTL AX, AX
165 JZ needtls
166#ifdef GOOS_android
167 // arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
168 // Compensate for tls_g (+8).
169 MOVL -8(TLS), BX
170 MOVL BX, 12(SP)
171 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
172#else
173 MOVL $0, BX
174 MOVL BX, 12(SP) // arg 4: not used when using platform's TLS
175#ifdef GOOS_windows
176 MOVL $runtime·tls_g(SB), 8(SP) // arg 3: &tls_g
177#else
178 MOVL BX, 8(SP) // arg 3: not used when using platform's TLS
179#endif
180#endif
181 MOVL $setg_gcc<>(SB), BX
182 MOVL BX, 4(SP) // arg 2: setg_gcc
183 MOVL BP, 0(SP) // arg 1: g0
184 CALL AX
185
186 // update stackguard after _cgo_init
187 MOVL $runtime·g0(SB), CX
188 MOVL (g_stack+stack_lo)(CX), AX
189 ADDL $const_stackGuard, AX
190 MOVL AX, g_stackguard0(CX)
191 MOVL AX, g_stackguard1(CX)
192
193#ifndef GOOS_windows
194 // skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
195 JMP ok
196#endif
197needtls:
198#ifdef GOOS_openbsd
199 // skip runtime·ldt0setup(SB) and tls test on OpenBSD in all cases
200 JMP ok
201#endif
202#ifdef GOOS_plan9
203 // skip runtime·ldt0setup(SB) and tls test on Plan 9 in all cases
204 JMP ok
205#endif
206
207 // set up %gs
208 CALL ldt0setup<>(SB)
209
210 // store through it, to make sure it works
211 get_tls(BX)
212 MOVL $0x123, g(BX)
213 MOVL runtime·m0+m_tls(SB), AX
214 CMPL AX, $0x123
215 JEQ ok
216 MOVL AX, 0 // abort
217ok:
218 // set up m and g "registers"
219 get_tls(BX)
220 LEAL runtime·g0(SB), DX
221 MOVL DX, g(BX)
222 LEAL runtime·m0(SB), AX
223
224 // save m->g0 = g0
225 MOVL DX, m_g0(AX)
226 // save g0->m = m0
227 MOVL AX, g_m(DX)
228
229 CALL runtime·emptyfunc(SB) // fault if stack check is wrong
230
231 // convention is D is always cleared
232 CLD
233
234 CALL runtime·check(SB)
235
236 // saved argc, argv
237 MOVL 120(SP), AX
238 MOVL AX, 0(SP)
239 MOVL 124(SP), AX
240 MOVL AX, 4(SP)
241 CALL runtime·args(SB)
242 CALL runtime·osinit(SB)
243 CALL runtime·schedinit(SB)
244
245 // create a new goroutine to start program
246 PUSHL $runtime·mainPC(SB) // entry
247 CALL runtime·newproc(SB)
248 POPL AX
249
250 // start this M
251 CALL runtime·mstart(SB)
252
253 CALL runtime·abort(SB)
254 RET
255
256DATA bad_proc_msg<>+0x00(SB)/61, $"This program can only be run on processors with MMX support.\n"
257GLOBL bad_proc_msg<>(SB), RODATA, $61
258
259DATA runtime·mainPC+0(SB)/4,$runtime·main(SB)
260GLOBL runtime·mainPC(SB),RODATA,$4
261
262TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
263 INT $3
264 RET
265
266TEXT runtime·asminit(SB),NOSPLIT,$0-0
267 // Linux and MinGW start the FPU in extended double precision.
268 // Other operating systems use double precision.
269 // Change to double precision to match them,
270 // and to match other hardware that only has double.
271 FLDCW runtime·controlWord64(SB)
272 RET
273
274TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
275 CALL runtime·mstart0(SB)
276 RET // not reached
277
278/*
279 * go-routine
280 */
281
282// void gogo(Gobuf*)
283// restore state from Gobuf; longjmp
284TEXT runtime·gogo(SB), NOSPLIT, $0-4
285 MOVL buf+0(FP), BX // gobuf
286 MOVL gobuf_g(BX), DX
287 MOVL 0(DX), CX // make sure g != nil
288 JMP gogo<>(SB)
289
290TEXT gogo<>(SB), NOSPLIT, $0
291 get_tls(CX)
292 MOVL DX, g(CX)
293 MOVL gobuf_sp(BX), SP // restore SP
294 MOVL gobuf_ret(BX), AX
295 MOVL gobuf_ctxt(BX), DX
296 MOVL $0, gobuf_sp(BX) // clear to help garbage collector
297 MOVL $0, gobuf_ret(BX)
298 MOVL $0, gobuf_ctxt(BX)
299 MOVL gobuf_pc(BX), BX
300 JMP BX
301
302// func mcall(fn func(*g))
303// Switch to m->g0's stack, call fn(g).
304// Fn must never return. It should gogo(&g->sched)
305// to keep running g.
306TEXT runtime·mcall(SB), NOSPLIT, $0-4
307 MOVL fn+0(FP), DI
308
309 get_tls(DX)
310 MOVL g(DX), AX // save state in g->sched
311 MOVL 0(SP), BX // caller's PC
312 MOVL BX, (g_sched+gobuf_pc)(AX)
313 LEAL fn+0(FP), BX // caller's SP
314 MOVL BX, (g_sched+gobuf_sp)(AX)
315
316 // switch to m->g0 & its stack, call fn
317 MOVL g(DX), BX
318 MOVL g_m(BX), BX
319 MOVL m_g0(BX), SI
320 CMPL SI, AX // if g == m->g0 call badmcall
321 JNE 3(PC)
322 MOVL $runtime·badmcall(SB), AX
323 JMP AX
324 MOVL SI, g(DX) // g = m->g0
325 MOVL (g_sched+gobuf_sp)(SI), SP // sp = m->g0->sched.sp
326 PUSHL AX
327 MOVL DI, DX
328 MOVL 0(DI), DI
329 CALL DI
330 POPL AX
331 MOVL $runtime·badmcall2(SB), AX
332 JMP AX
333 RET
334
335// systemstack_switch is a dummy routine that systemstack leaves at the bottom
336// of the G stack. We need to distinguish the routine that
337// lives at the bottom of the G stack from the one that lives
338// at the top of the system stack because the one at the top of
339// the system stack terminates the stack walk (see topofstack()).
340TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
341 RET
342
343// func systemstack(fn func())
344TEXT runtime·systemstack(SB), NOSPLIT, $0-4
345 MOVL fn+0(FP), DI // DI = fn
346 get_tls(CX)
347 MOVL g(CX), AX // AX = g
348 MOVL g_m(AX), BX // BX = m
349
350 CMPL AX, m_gsignal(BX)
351 JEQ noswitch
352
353 MOVL m_g0(BX), DX // DX = g0
354 CMPL AX, DX
355 JEQ noswitch
356
357 CMPL AX, m_curg(BX)
358 JNE bad
359
360 // switch stacks
361 // save our state in g->sched. Pretend to
362 // be systemstack_switch if the G stack is scanned.
363 CALL gosave_systemstack_switch<>(SB)
364
365 // switch to g0
366 get_tls(CX)
367 MOVL DX, g(CX)
368 MOVL (g_sched+gobuf_sp)(DX), BX
369 MOVL BX, SP
370
371 // call target function
372 MOVL DI, DX
373 MOVL 0(DI), DI
374 CALL DI
375
376 // switch back to g
377 get_tls(CX)
378 MOVL g(CX), AX
379 MOVL g_m(AX), BX
380 MOVL m_curg(BX), AX
381 MOVL AX, g(CX)
382 MOVL (g_sched+gobuf_sp)(AX), SP
383 MOVL $0, (g_sched+gobuf_sp)(AX)
384 RET
385
386noswitch:
387 // already on system stack; tail call the function
388 // Using a tail call here cleans up tracebacks since we won't stop
389 // at an intermediate systemstack.
390 MOVL DI, DX
391 MOVL 0(DI), DI
392 JMP DI
393
394bad:
395 // Bad: g is not gsignal, not g0, not curg. What is it?
396 // Hide call from linker nosplit analysis.
397 MOVL $runtime·badsystemstack(SB), AX
398 CALL AX
399 INT $3
400
401/*
402 * support for morestack
403 */
404
405// Called during function prolog when more stack is needed.
406//
407// The traceback routines see morestack on a g0 as being
408// the top of a stack (for example, morestack calling newstack
409// calling the scheduler calling newm calling gc), so we must
410// record an argument size. For that purpose, it has no arguments.
411TEXT runtime·morestack(SB),NOSPLIT,$0-0
412 // Cannot grow scheduler stack (m->g0).
413 get_tls(CX)
414 MOVL g(CX), BX
415 MOVL g_m(BX), BX
416 MOVL m_g0(BX), SI
417 CMPL g(CX), SI
418 JNE 3(PC)
419 CALL runtime·badmorestackg0(SB)
420 CALL runtime·abort(SB)
421
422 // Cannot grow signal stack.
423 MOVL m_gsignal(BX), SI
424 CMPL g(CX), SI
425 JNE 3(PC)
426 CALL runtime·badmorestackgsignal(SB)
427 CALL runtime·abort(SB)
428
429 // Called from f.
430 // Set m->morebuf to f's caller.
431 NOP SP // tell vet SP changed - stop checking offsets
432 MOVL 4(SP), DI // f's caller's PC
433 MOVL DI, (m_morebuf+gobuf_pc)(BX)
434 LEAL 8(SP), CX // f's caller's SP
435 MOVL CX, (m_morebuf+gobuf_sp)(BX)
436 get_tls(CX)
437 MOVL g(CX), SI
438 MOVL SI, (m_morebuf+gobuf_g)(BX)
439
440 // Set g->sched to context in f.
441 MOVL 0(SP), AX // f's PC
442 MOVL AX, (g_sched+gobuf_pc)(SI)
443 LEAL 4(SP), AX // f's SP
444 MOVL AX, (g_sched+gobuf_sp)(SI)
445 MOVL DX, (g_sched+gobuf_ctxt)(SI)
446
447 // Call newstack on m->g0's stack.
448 MOVL m_g0(BX), BP
449 MOVL BP, g(CX)
450 MOVL (g_sched+gobuf_sp)(BP), AX
451 MOVL -4(AX), BX // fault if CALL would, before smashing SP
452 MOVL AX, SP
453 CALL runtime·newstack(SB)
454 CALL runtime·abort(SB) // crash if newstack returns
455 RET
456
457TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0-0
458 MOVL $0, DX
459 JMP runtime·morestack(SB)
460
461// reflectcall: call a function with the given argument list
462// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
463// we don't have variable-sized frames, so we use a small number
464// of constant-sized-frame functions to encode a few bits of size in the pc.
465// Caution: ugly multiline assembly macros in your future!
466
467#define DISPATCH(NAME,MAXSIZE) \
468 CMPL CX, $MAXSIZE; \
469 JA 3(PC); \
470 MOVL $NAME(SB), AX; \
471 JMP AX
472// Note: can't just "JMP NAME(SB)" - bad inlining results.
473
474TEXT ·reflectcall(SB), NOSPLIT, $0-28
475 MOVL frameSize+20(FP), CX
476 DISPATCH(runtime·call16, 16)
477 DISPATCH(runtime·call32, 32)
478 DISPATCH(runtime·call64, 64)
479 DISPATCH(runtime·call128, 128)
480 DISPATCH(runtime·call256, 256)
481 DISPATCH(runtime·call512, 512)
482 DISPATCH(runtime·call1024, 1024)
483 DISPATCH(runtime·call2048, 2048)
484 DISPATCH(runtime·call4096, 4096)
485 DISPATCH(runtime·call8192, 8192)
486 DISPATCH(runtime·call16384, 16384)
487 DISPATCH(runtime·call32768, 32768)
488 DISPATCH(runtime·call65536, 65536)
489 DISPATCH(runtime·call131072, 131072)
490 DISPATCH(runtime·call262144, 262144)
491 DISPATCH(runtime·call524288, 524288)
492 DISPATCH(runtime·call1048576, 1048576)
493 DISPATCH(runtime·call2097152, 2097152)
494 DISPATCH(runtime·call4194304, 4194304)
495 DISPATCH(runtime·call8388608, 8388608)
496 DISPATCH(runtime·call16777216, 16777216)
497 DISPATCH(runtime·call33554432, 33554432)
498 DISPATCH(runtime·call67108864, 67108864)
499 DISPATCH(runtime·call134217728, 134217728)
500 DISPATCH(runtime·call268435456, 268435456)
501 DISPATCH(runtime·call536870912, 536870912)
502 DISPATCH(runtime·call1073741824, 1073741824)
503 MOVL $runtime·badreflectcall(SB), AX
504 JMP AX
505
506#define CALLFN(NAME,MAXSIZE) \
507TEXT NAME(SB), WRAPPER, $MAXSIZE-28; \
508 NO_LOCAL_POINTERS; \
509 /* copy arguments to stack */ \
510 MOVL stackArgs+8(FP), SI; \
511 MOVL stackArgsSize+12(FP), CX; \
512 MOVL SP, DI; \
513 REP;MOVSB; \
514 /* call function */ \
515 MOVL f+4(FP), DX; \
516 MOVL (DX), AX; \
517 PCDATA $PCDATA_StackMapIndex, $0; \
518 CALL AX; \
519 /* copy return values back */ \
520 MOVL stackArgsType+0(FP), DX; \
521 MOVL stackArgs+8(FP), DI; \
522 MOVL stackArgsSize+12(FP), CX; \
523 MOVL stackRetOffset+16(FP), BX; \
524 MOVL SP, SI; \
525 ADDL BX, DI; \
526 ADDL BX, SI; \
527 SUBL BX, CX; \
528 CALL callRet<>(SB); \
529 RET
530
531// callRet copies return values back at the end of call*. This is a
532// separate function so it can allocate stack space for the arguments
533// to reflectcallmove. It does not follow the Go ABI; it expects its
534// arguments in registers.
535TEXT callRet<>(SB), NOSPLIT, $20-0
536 MOVL DX, 0(SP)
537 MOVL DI, 4(SP)
538 MOVL SI, 8(SP)
539 MOVL CX, 12(SP)
540 MOVL $0, 16(SP)
541 CALL runtime·reflectcallmove(SB)
542 RET
543
544CALLFN(·call16, 16)
545CALLFN(·call32, 32)
546CALLFN(·call64, 64)
547CALLFN(·call128, 128)
548CALLFN(·call256, 256)
549CALLFN(·call512, 512)
550CALLFN(·call1024, 1024)
551CALLFN(·call2048, 2048)
552CALLFN(·call4096, 4096)
553CALLFN(·call8192, 8192)
554CALLFN(·call16384, 16384)
555CALLFN(·call32768, 32768)
556CALLFN(·call65536, 65536)
557CALLFN(·call131072, 131072)
558CALLFN(·call262144, 262144)
559CALLFN(·call524288, 524288)
560CALLFN(·call1048576, 1048576)
561CALLFN(·call2097152, 2097152)
562CALLFN(·call4194304, 4194304)
563CALLFN(·call8388608, 8388608)
564CALLFN(·call16777216, 16777216)
565CALLFN(·call33554432, 33554432)
566CALLFN(·call67108864, 67108864)
567CALLFN(·call134217728, 134217728)
568CALLFN(·call268435456, 268435456)
569CALLFN(·call536870912, 536870912)
570CALLFN(·call1073741824, 1073741824)
571
572TEXT runtime·procyield(SB),NOSPLIT,$0-0
573 MOVL cycles+0(FP), AX
574again:
575 PAUSE
576 SUBL $1, AX
577 JNZ again
578 RET
579
580TEXT ·publicationBarrier(SB),NOSPLIT,$0-0
581 // Stores are already ordered on x86, so this is just a
582 // compile barrier.
583 RET
584
585// Save state of caller into g->sched,
586// but using fake PC from systemstack_switch.
587// Must only be called from functions with no locals ($0)
588// or else unwinding from systemstack_switch is incorrect.
589TEXT gosave_systemstack_switch<>(SB),NOSPLIT,$0
590 PUSHL AX
591 PUSHL BX
592 get_tls(BX)
593 MOVL g(BX), BX
594 LEAL arg+0(FP), AX
595 MOVL AX, (g_sched+gobuf_sp)(BX)
596 MOVL $runtime·systemstack_switch(SB), AX
597 MOVL AX, (g_sched+gobuf_pc)(BX)
598 MOVL $0, (g_sched+gobuf_ret)(BX)
599 // Assert ctxt is zero. See func save.
600 MOVL (g_sched+gobuf_ctxt)(BX), AX
601 TESTL AX, AX
602 JZ 2(PC)
603 CALL runtime·abort(SB)
604 POPL BX
605 POPL AX
606 RET
607
608// func asmcgocall_no_g(fn, arg unsafe.Pointer)
609// Call fn(arg) aligned appropriately for the gcc ABI.
610// Called on a system stack, and there may be no g yet (during needm).
611TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-8
612 MOVL fn+0(FP), AX
613 MOVL arg+4(FP), BX
614 MOVL SP, DX
615 SUBL $32, SP
616 ANDL $~15, SP // alignment, perhaps unnecessary
617 MOVL DX, 8(SP) // save old SP
618 MOVL BX, 0(SP) // first argument in x86-32 ABI
619 CALL AX
620 MOVL 8(SP), DX
621 MOVL DX, SP
622 RET
623
624// func asmcgocall(fn, arg unsafe.Pointer) int32
625// Call fn(arg) on the scheduler stack,
626// aligned appropriately for the gcc ABI.
627// See cgocall.go for more details.
628TEXT ·asmcgocall(SB),NOSPLIT,$0-12
629 MOVL fn+0(FP), AX
630 MOVL arg+4(FP), BX
631
632 MOVL SP, DX
633
634 // Figure out if we need to switch to m->g0 stack.
635 // We get called to create new OS threads too, and those
636 // come in on the m->g0 stack already. Or we might already
637 // be on the m->gsignal stack.
638 get_tls(CX)
639 MOVL g(CX), DI
640 CMPL DI, $0
641 JEQ nosave // Don't even have a G yet.
642 MOVL g_m(DI), BP
643 CMPL DI, m_gsignal(BP)
644 JEQ noswitch
645 MOVL m_g0(BP), SI
646 CMPL DI, SI
647 JEQ noswitch
648 CALL gosave_systemstack_switch<>(SB)
649 get_tls(CX)
650 MOVL SI, g(CX)
651 MOVL (g_sched+gobuf_sp)(SI), SP
652
653noswitch:
654 // Now on a scheduling stack (a pthread-created stack).
655 SUBL $32, SP
656 ANDL $~15, SP // alignment, perhaps unnecessary
657 MOVL DI, 8(SP) // save g
658 MOVL (g_stack+stack_hi)(DI), DI
659 SUBL DX, DI
660 MOVL DI, 4(SP) // save depth in stack (can't just save SP, as stack might be copied during a callback)
661 MOVL BX, 0(SP) // first argument in x86-32 ABI
662 CALL AX
663
664 // Restore registers, g, stack pointer.
665 get_tls(CX)
666 MOVL 8(SP), DI
667 MOVL (g_stack+stack_hi)(DI), SI
668 SUBL 4(SP), SI
669 MOVL DI, g(CX)
670 MOVL SI, SP
671
672 MOVL AX, ret+8(FP)
673 RET
674nosave:
675 // Now on a scheduling stack (a pthread-created stack).
676 SUBL $32, SP
677 ANDL $~15, SP // alignment, perhaps unnecessary
678 MOVL DX, 4(SP) // save original stack pointer
679 MOVL BX, 0(SP) // first argument in x86-32 ABI
680 CALL AX
681
682 MOVL 4(SP), CX // restore original stack pointer
683 MOVL CX, SP
684 MOVL AX, ret+8(FP)
685 RET
686
687// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
688// See cgocall.go for more details.
689TEXT ·cgocallback(SB),NOSPLIT,$12-12 // Frame size must match commented places below
690 NO_LOCAL_POINTERS
691
692 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
693 // It is used to dropm while thread is exiting.
694 MOVL fn+0(FP), AX
695 CMPL AX, $0
696 JNE loadg
697 // Restore the g from frame.
698 get_tls(CX)
699 MOVL frame+4(FP), BX
700 MOVL BX, g(CX)
701 JMP dropm
702
703loadg:
704 // If g is nil, Go did not create the current thread,
705 // or if this thread never called into Go on pthread platforms.
706 // Call needm to obtain one for temporary use.
707 // In this case, we're running on the thread stack, so there's
708 // lots of space, but the linker doesn't know. Hide the call from
709 // the linker analysis by using an indirect call through AX.
710 get_tls(CX)
711#ifdef GOOS_windows
712 MOVL $0, BP
713 CMPL CX, $0
714 JEQ 2(PC) // TODO
715#endif
716 MOVL g(CX), BP
717 CMPL BP, $0
718 JEQ needm
719 MOVL g_m(BP), BP
720 MOVL BP, savedm-4(SP) // saved copy of oldm
721 JMP havem
722needm:
723 MOVL $runtime·needAndBindM(SB), AX
724 CALL AX
725 MOVL $0, savedm-4(SP)
726 get_tls(CX)
727 MOVL g(CX), BP
728 MOVL g_m(BP), BP
729
730 // Set m->sched.sp = SP, so that if a panic happens
731 // during the function we are about to execute, it will
732 // have a valid SP to run on the g0 stack.
733 // The next few lines (after the havem label)
734 // will save this SP onto the stack and then write
735 // the same SP back to m->sched.sp. That seems redundant,
736 // but if an unrecovered panic happens, unwindm will
737 // restore the g->sched.sp from the stack location
738 // and then systemstack will try to use it. If we don't set it here,
739 // that restored SP will be uninitialized (typically 0) and
740 // will not be usable.
741 MOVL m_g0(BP), SI
742 MOVL SP, (g_sched+gobuf_sp)(SI)
743
744havem:
745 // Now there's a valid m, and we're running on its m->g0.
746 // Save current m->g0->sched.sp on stack and then set it to SP.
747 // Save current sp in m->g0->sched.sp in preparation for
748 // switch back to m->curg stack.
749 // NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
750 MOVL m_g0(BP), SI
751 MOVL (g_sched+gobuf_sp)(SI), AX
752 MOVL AX, 0(SP)
753 MOVL SP, (g_sched+gobuf_sp)(SI)
754
755 // Switch to m->curg stack and call runtime.cgocallbackg.
756 // Because we are taking over the execution of m->curg
757 // but *not* resuming what had been running, we need to
758 // save that information (m->curg->sched) so we can restore it.
759 // We can restore m->curg->sched.sp easily, because calling
760 // runtime.cgocallbackg leaves SP unchanged upon return.
761 // To save m->curg->sched.pc, we push it onto the curg stack and
762 // open a frame the same size as cgocallback's g0 frame.
763 // Once we switch to the curg stack, the pushed PC will appear
764 // to be the return PC of cgocallback, so that the traceback
765 // will seamlessly trace back into the earlier calls.
766 MOVL m_curg(BP), SI
767 MOVL SI, g(CX)
768 MOVL (g_sched+gobuf_sp)(SI), DI // prepare stack as DI
769 MOVL (g_sched+gobuf_pc)(SI), BP
770 MOVL BP, -4(DI) // "push" return PC on the g stack
771 // Gather our arguments into registers.
772 MOVL fn+0(FP), AX
773 MOVL frame+4(FP), BX
774 MOVL ctxt+8(FP), CX
775 LEAL -(4+12)(DI), SP // Must match declared frame size
776 MOVL AX, 0(SP)
777 MOVL BX, 4(SP)
778 MOVL CX, 8(SP)
779 CALL runtime·cgocallbackg(SB)
780
781 // Restore g->sched (== m->curg->sched) from saved values.
782 get_tls(CX)
783 MOVL g(CX), SI
784 MOVL 12(SP), BP // Must match declared frame size
785 MOVL BP, (g_sched+gobuf_pc)(SI)
786 LEAL (12+4)(SP), DI // Must match declared frame size
787 MOVL DI, (g_sched+gobuf_sp)(SI)
788
789 // Switch back to m->g0's stack and restore m->g0->sched.sp.
790 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
791 // so we do not have to restore it.)
792 MOVL g(CX), BP
793 MOVL g_m(BP), BP
794 MOVL m_g0(BP), SI
795 MOVL SI, g(CX)
796 MOVL (g_sched+gobuf_sp)(SI), SP
797 MOVL 0(SP), AX
798 MOVL AX, (g_sched+gobuf_sp)(SI)
799
800 // If the m on entry was nil, we called needm above to borrow an m,
801 // 1. for the duration of the call on non-pthread platforms,
802 // 2. or the duration of the C thread alive on pthread platforms.
803 // If the m on entry wasn't nil,
804 // 1. the thread might be a Go thread,
805 // 2. or it wasn't the first call from a C thread on pthread platforms,
806 // since then we skip dropm to reuse the m in the first call.
807 MOVL savedm-4(SP), DX
808 CMPL DX, $0
809 JNE droppedm
810
811 // Skip dropm to reuse it in the next call, when a pthread key has been created.
812 MOVL _cgo_pthread_key_created(SB), DX
813 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
814 CMPL DX, $0
815 JEQ dropm
816 CMPL (DX), $0
817 JNE droppedm
818
819dropm:
820 MOVL $runtime·dropm(SB), AX
821 CALL AX
822droppedm:
823
824 // Done!
825 RET
826
827// void setg(G*); set g. for use by needm.
828TEXT runtime·setg(SB), NOSPLIT, $0-4
829 MOVL gg+0(FP), BX
830#ifdef GOOS_windows
831 MOVL runtime·tls_g(SB), CX
832 CMPL BX, $0
833 JNE settls
834 MOVL $0, 0(CX)(FS)
835 RET
836settls:
837 MOVL g_m(BX), AX
838 LEAL m_tls(AX), AX
839 MOVL AX, 0(CX)(FS)
840#endif
841 get_tls(CX)
842 MOVL BX, g(CX)
843 RET
844
845// void setg_gcc(G*); set g. for use by gcc
846TEXT setg_gcc<>(SB), NOSPLIT, $0
847 get_tls(AX)
848 MOVL gg+0(FP), DX
849 MOVL DX, g(AX)
850 RET
851
852TEXT runtime·abort(SB),NOSPLIT,$0-0
853 INT $3
854loop:
855 JMP loop
856
857// check that SP is in range [g->stack.lo, g->stack.hi)
858TEXT runtime·stackcheck(SB), NOSPLIT, $0-0
859 get_tls(CX)
860 MOVL g(CX), AX
861 CMPL (g_stack+stack_hi)(AX), SP
862 JHI 2(PC)
863 CALL runtime·abort(SB)
864 CMPL SP, (g_stack+stack_lo)(AX)
865 JHI 2(PC)
866 CALL runtime·abort(SB)
867 RET
868
869// func cputicks() int64
870TEXT runtime·cputicks(SB),NOSPLIT,$0-8
871 // LFENCE/MFENCE instruction support is dependent on SSE2.
872 // When no SSE2 support is present do not enforce any serialization
873 // since using CPUID to serialize the instruction stream is
874 // very costly.
875#ifdef GO386_softfloat
876 JMP rdtsc // no fence instructions available
877#endif
878 CMPB internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
879 JNE fences
880 // Instruction stream serializing RDTSCP is supported.
881 // RDTSCP is supported by Intel Nehalem (2008) and
882 // AMD K8 Rev. F (2006) and newer.
883 RDTSCP
884done:
885 MOVL AX, ret_lo+0(FP)
886 MOVL DX, ret_hi+4(FP)
887 RET
888fences:
889 // MFENCE is instruction stream serializing and flushes the
890 // store buffers on AMD. The serialization semantics of LFENCE on AMD
891 // are dependent on MSR C001_1029 and CPU generation.
892 // LFENCE on Intel does wait for all previous instructions to have executed.
893 // Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
894 // previous instructions executed and all previous loads and stores to globally visible.
895 // Using MFENCE;LFENCE here aligns the serializing properties without
896 // runtime detection of CPU manufacturer.
897 MFENCE
898 LFENCE
899rdtsc:
900 RDTSC
901 JMP done
902
903TEXT ldt0setup<>(SB),NOSPLIT,$16-0
904#ifdef GOOS_windows
905 CALL runtime·wintls(SB)
906#endif
907 // set up ldt 7 to point at m0.tls
908 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go.
909 // the entry number is just a hint. setldt will set up GS with what it used.
910 MOVL $7, 0(SP)
911 LEAL runtime·m0+m_tls(SB), AX
912 MOVL AX, 4(SP)
913 MOVL $32, 8(SP) // sizeof(tls array)
914 CALL runtime·setldt(SB)
915 RET
916
917TEXT runtime·emptyfunc(SB),0,$0-0
918 RET
919
920// hash function using AES hardware instructions
921TEXT runtime·memhash(SB),NOSPLIT,$0-16
922 CMPB runtime·useAeshash(SB), $0
923 JEQ noaes
924 MOVL p+0(FP), AX // ptr to data
925 MOVL s+8(FP), BX // size
926 LEAL ret+12(FP), DX
927 JMP aeshashbody<>(SB)
928noaes:
929 JMP runtime·memhashFallback(SB)
930
931TEXT runtime·strhash(SB),NOSPLIT,$0-12
932 CMPB runtime·useAeshash(SB), $0
933 JEQ noaes
934 MOVL p+0(FP), AX // ptr to string object
935 MOVL 4(AX), BX // length of string
936 MOVL (AX), AX // string data
937 LEAL ret+8(FP), DX
938 JMP aeshashbody<>(SB)
939noaes:
940 JMP runtime·strhashFallback(SB)
941
942// AX: data
943// BX: length
944// DX: address to put return value
945TEXT aeshashbody<>(SB),NOSPLIT,$0-0
946 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
947 PINSRW $4, BX, X0 // 16 bits of length
948 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
949 MOVO X0, X1 // save unscrambled seed
950 PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
951 AESENC X0, X0 // scramble seed
952
953 CMPL BX, $16
954 JB aes0to15
955 JE aes16
956 CMPL BX, $32
957 JBE aes17to32
958 CMPL BX, $64
959 JBE aes33to64
960 JMP aes65plus
961
962aes0to15:
963 TESTL BX, BX
964 JE aes0
965
966 ADDL $16, AX
967 TESTW $0xff0, AX
968 JE endofpage
969
970 // 16 bytes loaded at this address won't cross
971 // a page boundary, so we can load it directly.
972 MOVOU -16(AX), X1
973 ADDL BX, BX
974 PAND masks<>(SB)(BX*8), X1
975
976final1:
977 PXOR X0, X1 // xor data with seed
978 AESENC X1, X1 // scramble combo 3 times
979 AESENC X1, X1
980 AESENC X1, X1
981 MOVL X1, (DX)
982 RET
983
984endofpage:
985 // address ends in 1111xxxx. Might be up against
986 // a page boundary, so load ending at last byte.
987 // Then shift bytes down using pshufb.
988 MOVOU -32(AX)(BX*1), X1
989 ADDL BX, BX
990 PSHUFB shifts<>(SB)(BX*8), X1
991 JMP final1
992
993aes0:
994 // Return scrambled input seed
995 AESENC X0, X0
996 MOVL X0, (DX)
997 RET
998
999aes16:
1000 MOVOU (AX), X1
1001 JMP final1
1002
1003aes17to32:
1004 // make second starting seed
1005 PXOR runtime·aeskeysched+16(SB), X1
1006 AESENC X1, X1
1007
1008 // load data to be hashed
1009 MOVOU (AX), X2
1010 MOVOU -16(AX)(BX*1), X3
1011
1012 // xor with seed
1013 PXOR X0, X2
1014 PXOR X1, X3
1015
1016 // scramble 3 times
1017 AESENC X2, X2
1018 AESENC X3, X3
1019 AESENC X2, X2
1020 AESENC X3, X3
1021 AESENC X2, X2
1022 AESENC X3, X3
1023
1024 // combine results
1025 PXOR X3, X2
1026 MOVL X2, (DX)
1027 RET
1028
1029aes33to64:
1030 // make 3 more starting seeds
1031 MOVO X1, X2
1032 MOVO X1, X3
1033 PXOR runtime·aeskeysched+16(SB), X1
1034 PXOR runtime·aeskeysched+32(SB), X2
1035 PXOR runtime·aeskeysched+48(SB), X3
1036 AESENC X1, X1
1037 AESENC X2, X2
1038 AESENC X3, X3
1039
1040 MOVOU (AX), X4
1041 MOVOU 16(AX), X5
1042 MOVOU -32(AX)(BX*1), X6
1043 MOVOU -16(AX)(BX*1), X7
1044
1045 PXOR X0, X4
1046 PXOR X1, X5
1047 PXOR X2, X6
1048 PXOR X3, X7
1049
1050 AESENC X4, X4
1051 AESENC X5, X5
1052 AESENC X6, X6
1053 AESENC X7, X7
1054
1055 AESENC X4, X4
1056 AESENC X5, X5
1057 AESENC X6, X6
1058 AESENC X7, X7
1059
1060 AESENC X4, X4
1061 AESENC X5, X5
1062 AESENC X6, X6
1063 AESENC X7, X7
1064
1065 PXOR X6, X4
1066 PXOR X7, X5
1067 PXOR X5, X4
1068 MOVL X4, (DX)
1069 RET
1070
1071aes65plus:
1072 // make 3 more starting seeds
1073 MOVO X1, X2
1074 MOVO X1, X3
1075 PXOR runtime·aeskeysched+16(SB), X1
1076 PXOR runtime·aeskeysched+32(SB), X2
1077 PXOR runtime·aeskeysched+48(SB), X3
1078 AESENC X1, X1
1079 AESENC X2, X2
1080 AESENC X3, X3
1081
1082 // start with last (possibly overlapping) block
1083 MOVOU -64(AX)(BX*1), X4
1084 MOVOU -48(AX)(BX*1), X5
1085 MOVOU -32(AX)(BX*1), X6
1086 MOVOU -16(AX)(BX*1), X7
1087
1088 // scramble state once
1089 AESENC X0, X4
1090 AESENC X1, X5
1091 AESENC X2, X6
1092 AESENC X3, X7
1093
1094 // compute number of remaining 64-byte blocks
1095 DECL BX
1096 SHRL $6, BX
1097
1098aesloop:
1099 // scramble state, xor in a block
1100 MOVOU (AX), X0
1101 MOVOU 16(AX), X1
1102 MOVOU 32(AX), X2
1103 MOVOU 48(AX), X3
1104 AESENC X0, X4
1105 AESENC X1, X5
1106 AESENC X2, X6
1107 AESENC X3, X7
1108
1109 // scramble state
1110 AESENC X4, X4
1111 AESENC X5, X5
1112 AESENC X6, X6
1113 AESENC X7, X7
1114
1115 ADDL $64, AX
1116 DECL BX
1117 JNE aesloop
1118
1119 // 3 more scrambles to finish
1120 AESENC X4, X4
1121 AESENC X5, X5
1122 AESENC X6, X6
1123 AESENC X7, X7
1124
1125 AESENC X4, X4
1126 AESENC X5, X5
1127 AESENC X6, X6
1128 AESENC X7, X7
1129
1130 AESENC X4, X4
1131 AESENC X5, X5
1132 AESENC X6, X6
1133 AESENC X7, X7
1134
1135 PXOR X6, X4
1136 PXOR X7, X5
1137 PXOR X5, X4
1138 MOVL X4, (DX)
1139 RET
1140
1141TEXT runtime·memhash32(SB),NOSPLIT,$0-12
1142 CMPB runtime·useAeshash(SB), $0
1143 JEQ noaes
1144 MOVL p+0(FP), AX // ptr to data
1145 MOVL h+4(FP), X0 // seed
1146 PINSRD $1, (AX), X0 // data
1147 AESENC runtime·aeskeysched+0(SB), X0
1148 AESENC runtime·aeskeysched+16(SB), X0
1149 AESENC runtime·aeskeysched+32(SB), X0
1150 MOVL X0, ret+8(FP)
1151 RET
1152noaes:
1153 JMP runtime·memhash32Fallback(SB)
1154
1155TEXT runtime·memhash64(SB),NOSPLIT,$0-12
1156 CMPB runtime·useAeshash(SB), $0
1157 JEQ noaes
1158 MOVL p+0(FP), AX // ptr to data
1159 MOVQ (AX), X0 // data
1160 PINSRD $2, h+4(FP), X0 // seed
1161 AESENC runtime·aeskeysched+0(SB), X0
1162 AESENC runtime·aeskeysched+16(SB), X0
1163 AESENC runtime·aeskeysched+32(SB), X0
1164 MOVL X0, ret+8(FP)
1165 RET
1166noaes:
1167 JMP runtime·memhash64Fallback(SB)
1168
1169// simple mask to get rid of data in the high part of the register.
1170DATA masks<>+0x00(SB)/4, $0x00000000
1171DATA masks<>+0x04(SB)/4, $0x00000000
1172DATA masks<>+0x08(SB)/4, $0x00000000
1173DATA masks<>+0x0c(SB)/4, $0x00000000
1174
1175DATA masks<>+0x10(SB)/4, $0x000000ff
1176DATA masks<>+0x14(SB)/4, $0x00000000
1177DATA masks<>+0x18(SB)/4, $0x00000000
1178DATA masks<>+0x1c(SB)/4, $0x00000000
1179
1180DATA masks<>+0x20(SB)/4, $0x0000ffff
1181DATA masks<>+0x24(SB)/4, $0x00000000
1182DATA masks<>+0x28(SB)/4, $0x00000000
1183DATA masks<>+0x2c(SB)/4, $0x00000000
1184
1185DATA masks<>+0x30(SB)/4, $0x00ffffff
1186DATA masks<>+0x34(SB)/4, $0x00000000
1187DATA masks<>+0x38(SB)/4, $0x00000000
1188DATA masks<>+0x3c(SB)/4, $0x00000000
1189
1190DATA masks<>+0x40(SB)/4, $0xffffffff
1191DATA masks<>+0x44(SB)/4, $0x00000000
1192DATA masks<>+0x48(SB)/4, $0x00000000
1193DATA masks<>+0x4c(SB)/4, $0x00000000
1194
1195DATA masks<>+0x50(SB)/4, $0xffffffff
1196DATA masks<>+0x54(SB)/4, $0x000000ff
1197DATA masks<>+0x58(SB)/4, $0x00000000
1198DATA masks<>+0x5c(SB)/4, $0x00000000
1199
1200DATA masks<>+0x60(SB)/4, $0xffffffff
1201DATA masks<>+0x64(SB)/4, $0x0000ffff
1202DATA masks<>+0x68(SB)/4, $0x00000000
1203DATA masks<>+0x6c(SB)/4, $0x00000000
1204
1205DATA masks<>+0x70(SB)/4, $0xffffffff
1206DATA masks<>+0x74(SB)/4, $0x00ffffff
1207DATA masks<>+0x78(SB)/4, $0x00000000
1208DATA masks<>+0x7c(SB)/4, $0x00000000
1209
1210DATA masks<>+0x80(SB)/4, $0xffffffff
1211DATA masks<>+0x84(SB)/4, $0xffffffff
1212DATA masks<>+0x88(SB)/4, $0x00000000
1213DATA masks<>+0x8c(SB)/4, $0x00000000
1214
1215DATA masks<>+0x90(SB)/4, $0xffffffff
1216DATA masks<>+0x94(SB)/4, $0xffffffff
1217DATA masks<>+0x98(SB)/4, $0x000000ff
1218DATA masks<>+0x9c(SB)/4, $0x00000000
1219
1220DATA masks<>+0xa0(SB)/4, $0xffffffff
1221DATA masks<>+0xa4(SB)/4, $0xffffffff
1222DATA masks<>+0xa8(SB)/4, $0x0000ffff
1223DATA masks<>+0xac(SB)/4, $0x00000000
1224
1225DATA masks<>+0xb0(SB)/4, $0xffffffff
1226DATA masks<>+0xb4(SB)/4, $0xffffffff
1227DATA masks<>+0xb8(SB)/4, $0x00ffffff
1228DATA masks<>+0xbc(SB)/4, $0x00000000
1229
1230DATA masks<>+0xc0(SB)/4, $0xffffffff
1231DATA masks<>+0xc4(SB)/4, $0xffffffff
1232DATA masks<>+0xc8(SB)/4, $0xffffffff
1233DATA masks<>+0xcc(SB)/4, $0x00000000
1234
1235DATA masks<>+0xd0(SB)/4, $0xffffffff
1236DATA masks<>+0xd4(SB)/4, $0xffffffff
1237DATA masks<>+0xd8(SB)/4, $0xffffffff
1238DATA masks<>+0xdc(SB)/4, $0x000000ff
1239
1240DATA masks<>+0xe0(SB)/4, $0xffffffff
1241DATA masks<>+0xe4(SB)/4, $0xffffffff
1242DATA masks<>+0xe8(SB)/4, $0xffffffff
1243DATA masks<>+0xec(SB)/4, $0x0000ffff
1244
1245DATA masks<>+0xf0(SB)/4, $0xffffffff
1246DATA masks<>+0xf4(SB)/4, $0xffffffff
1247DATA masks<>+0xf8(SB)/4, $0xffffffff
1248DATA masks<>+0xfc(SB)/4, $0x00ffffff
1249
1250GLOBL masks<>(SB),RODATA,$256
1251
1252// these are arguments to pshufb. They move data down from
1253// the high bytes of the register to the low bytes of the register.
1254// index is how many bytes to move.
1255DATA shifts<>+0x00(SB)/4, $0x00000000
1256DATA shifts<>+0x04(SB)/4, $0x00000000
1257DATA shifts<>+0x08(SB)/4, $0x00000000
1258DATA shifts<>+0x0c(SB)/4, $0x00000000
1259
1260DATA shifts<>+0x10(SB)/4, $0xffffff0f
1261DATA shifts<>+0x14(SB)/4, $0xffffffff
1262DATA shifts<>+0x18(SB)/4, $0xffffffff
1263DATA shifts<>+0x1c(SB)/4, $0xffffffff
1264
1265DATA shifts<>+0x20(SB)/4, $0xffff0f0e
1266DATA shifts<>+0x24(SB)/4, $0xffffffff
1267DATA shifts<>+0x28(SB)/4, $0xffffffff
1268DATA shifts<>+0x2c(SB)/4, $0xffffffff
1269
1270DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
1271DATA shifts<>+0x34(SB)/4, $0xffffffff
1272DATA shifts<>+0x38(SB)/4, $0xffffffff
1273DATA shifts<>+0x3c(SB)/4, $0xffffffff
1274
1275DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
1276DATA shifts<>+0x44(SB)/4, $0xffffffff
1277DATA shifts<>+0x48(SB)/4, $0xffffffff
1278DATA shifts<>+0x4c(SB)/4, $0xffffffff
1279
1280DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
1281DATA shifts<>+0x54(SB)/4, $0xffffff0f
1282DATA shifts<>+0x58(SB)/4, $0xffffffff
1283DATA shifts<>+0x5c(SB)/4, $0xffffffff
1284
1285DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
1286DATA shifts<>+0x64(SB)/4, $0xffff0f0e
1287DATA shifts<>+0x68(SB)/4, $0xffffffff
1288DATA shifts<>+0x6c(SB)/4, $0xffffffff
1289
1290DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
1291DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
1292DATA shifts<>+0x78(SB)/4, $0xffffffff
1293DATA shifts<>+0x7c(SB)/4, $0xffffffff
1294
1295DATA shifts<>+0x80(SB)/4, $0x0b0a0908
1296DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
1297DATA shifts<>+0x88(SB)/4, $0xffffffff
1298DATA shifts<>+0x8c(SB)/4, $0xffffffff
1299
1300DATA shifts<>+0x90(SB)/4, $0x0a090807
1301DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
1302DATA shifts<>+0x98(SB)/4, $0xffffff0f
1303DATA shifts<>+0x9c(SB)/4, $0xffffffff
1304
1305DATA shifts<>+0xa0(SB)/4, $0x09080706
1306DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
1307DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
1308DATA shifts<>+0xac(SB)/4, $0xffffffff
1309
1310DATA shifts<>+0xb0(SB)/4, $0x08070605
1311DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
1312DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
1313DATA shifts<>+0xbc(SB)/4, $0xffffffff
1314
1315DATA shifts<>+0xc0(SB)/4, $0x07060504
1316DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
1317DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
1318DATA shifts<>+0xcc(SB)/4, $0xffffffff
1319
1320DATA shifts<>+0xd0(SB)/4, $0x06050403
1321DATA shifts<>+0xd4(SB)/4, $0x0a090807
1322DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
1323DATA shifts<>+0xdc(SB)/4, $0xffffff0f
1324
1325DATA shifts<>+0xe0(SB)/4, $0x05040302
1326DATA shifts<>+0xe4(SB)/4, $0x09080706
1327DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
1328DATA shifts<>+0xec(SB)/4, $0xffff0f0e
1329
1330DATA shifts<>+0xf0(SB)/4, $0x04030201
1331DATA shifts<>+0xf4(SB)/4, $0x08070605
1332DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
1333DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
1334
1335GLOBL shifts<>(SB),RODATA,$256
1336
1337TEXT ·checkASM(SB),NOSPLIT,$0-1
1338 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
1339 MOVL $masks<>(SB), AX
1340 MOVL $shifts<>(SB), BX
1341 ORL BX, AX
1342 TESTL $15, AX
1343 SETEQ ret+0(FP)
1344 RET
1345
1346TEXT runtime·return0(SB), NOSPLIT, $0
1347 MOVL $0, AX
1348 RET
1349
1350// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1351// Must obey the gcc calling convention.
1352TEXT _cgo_topofstack(SB),NOSPLIT,$0
1353 get_tls(CX)
1354 MOVL g(CX), AX
1355 MOVL g_m(AX), AX
1356 MOVL m_curg(AX), AX
1357 MOVL (g_stack+stack_hi)(AX), AX
1358 RET
1359
1360// The top-most function running on a goroutine
1361// returns to goexit+PCQuantum.
1362TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME,$0-0
1363 BYTE $0x90 // NOP
1364 CALL runtime·goexit1(SB) // does not return
1365 // traceback from goexit1 must hit code range of goexit
1366 BYTE $0x90 // NOP
1367
1368// Add a module's moduledata to the linked list of moduledata objects. This
1369// is called from .init_array by a function generated in the linker and so
1370// follows the platform ABI wrt register preservation -- it only touches AX,
1371// CX (implicitly) and DX, but it does not follow the ABI wrt arguments:
1372// instead the pointer to the moduledata is passed in AX.
1373TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1374 MOVL runtime·lastmoduledatap(SB), DX
1375 MOVL AX, moduledata_next(DX)
1376 MOVL AX, runtime·lastmoduledatap(SB)
1377 RET
1378
1379TEXT runtime·uint32tofloat64(SB),NOSPLIT,$8-12
1380 MOVL a+0(FP), AX
1381 MOVL AX, 0(SP)
1382 MOVL $0, 4(SP)
1383 FMOVV 0(SP), F0
1384 FMOVDP F0, ret+4(FP)
1385 RET
1386
1387TEXT runtime·float64touint32(SB),NOSPLIT,$12-12
1388 FMOVD a+0(FP), F0
1389 FSTCW 0(SP)
1390 FLDCW runtime·controlWord64trunc(SB)
1391 FMOVVP F0, 4(SP)
1392 FLDCW 0(SP)
1393 MOVL 4(SP), AX
1394 MOVL AX, ret+8(FP)
1395 RET
1396
1397// gcWriteBarrier informs the GC about heap pointer writes.
1398//
1399// gcWriteBarrier returns space in a write barrier buffer which
1400// should be filled in by the caller.
1401// gcWriteBarrier does NOT follow the Go ABI. It accepts the
1402// number of bytes of buffer needed in DI, and returns a pointer
1403// to the buffer space in DI.
1404// It clobbers FLAGS. It does not clobber any general-purpose registers,
1405// but may clobber others (e.g., SSE registers).
1406// Typical use would be, when doing *(CX+88) = AX
1407// CMPL $0, runtime.writeBarrier(SB)
1408// JEQ dowrite
1409// CALL runtime.gcBatchBarrier2(SB)
1410// MOVL AX, (DI)
1411// MOVL 88(CX), DX
1412// MOVL DX, 4(DI)
1413// dowrite:
1414// MOVL AX, 88(CX)
1415TEXT gcWriteBarrier<>(SB),NOSPLIT,$28
1416 // Save the registers clobbered by the fast path. This is slightly
1417 // faster than having the caller spill these.
1418 MOVL CX, 20(SP)
1419 MOVL BX, 24(SP)
1420retry:
1421 // TODO: Consider passing g.m.p in as an argument so they can be shared
1422 // across a sequence of write barriers.
1423 get_tls(BX)
1424 MOVL g(BX), BX
1425 MOVL g_m(BX), BX
1426 MOVL m_p(BX), BX
1427 // Get current buffer write position.
1428 MOVL (p_wbBuf+wbBuf_next)(BX), CX // original next position
1429 ADDL DI, CX // new next position
1430 // Is the buffer full?
1431 CMPL CX, (p_wbBuf+wbBuf_end)(BX)
1432 JA flush
1433 // Commit to the larger buffer.
1434 MOVL CX, (p_wbBuf+wbBuf_next)(BX)
1435 // Make return value (the original next position)
1436 SUBL DI, CX
1437 MOVL CX, DI
1438 // Restore registers.
1439 MOVL 20(SP), CX
1440 MOVL 24(SP), BX
1441 RET
1442
1443flush:
1444 // Save all general purpose registers since these could be
1445 // clobbered by wbBufFlush and were not saved by the caller.
1446 MOVL DI, 0(SP)
1447 MOVL AX, 4(SP)
1448 // BX already saved
1449 // CX already saved
1450 MOVL DX, 8(SP)
1451 MOVL BP, 12(SP)
1452 MOVL SI, 16(SP)
1453 // DI already saved
1454
1455 CALL runtime·wbBufFlush(SB)
1456
1457 MOVL 0(SP), DI
1458 MOVL 4(SP), AX
1459 MOVL 8(SP), DX
1460 MOVL 12(SP), BP
1461 MOVL 16(SP), SI
1462 JMP retry
1463
1464TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1465 MOVL $4, DI
1466 JMP gcWriteBarrier<>(SB)
1467TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1468 MOVL $8, DI
1469 JMP gcWriteBarrier<>(SB)
1470TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1471 MOVL $12, DI
1472 JMP gcWriteBarrier<>(SB)
1473TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1474 MOVL $16, DI
1475 JMP gcWriteBarrier<>(SB)
1476TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1477 MOVL $20, DI
1478 JMP gcWriteBarrier<>(SB)
1479TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1480 MOVL $24, DI
1481 JMP gcWriteBarrier<>(SB)
1482TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1483 MOVL $28, DI
1484 JMP gcWriteBarrier<>(SB)
1485TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1486 MOVL $32, DI
1487 JMP gcWriteBarrier<>(SB)
1488
1489// Note: these functions use a special calling convention to save generated code space.
1490// Arguments are passed in registers, but the space for those arguments are allocated
1491// in the caller's stack frame. These stubs write the args into that stack space and
1492// then tail call to the corresponding runtime handler.
1493// The tail call makes these stubs disappear in backtraces.
1494TEXT runtime·panicIndex(SB),NOSPLIT,$0-8
1495 MOVL AX, x+0(FP)
1496 MOVL CX, y+4(FP)
1497 JMP runtime·goPanicIndex(SB)
1498TEXT runtime·panicIndexU(SB),NOSPLIT,$0-8
1499 MOVL AX, x+0(FP)
1500 MOVL CX, y+4(FP)
1501 JMP runtime·goPanicIndexU(SB)
1502TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-8
1503 MOVL CX, x+0(FP)
1504 MOVL DX, y+4(FP)
1505 JMP runtime·goPanicSliceAlen(SB)
1506TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-8
1507 MOVL CX, x+0(FP)
1508 MOVL DX, y+4(FP)
1509 JMP runtime·goPanicSliceAlenU(SB)
1510TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-8
1511 MOVL CX, x+0(FP)
1512 MOVL DX, y+4(FP)
1513 JMP runtime·goPanicSliceAcap(SB)
1514TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-8
1515 MOVL CX, x+0(FP)
1516 MOVL DX, y+4(FP)
1517 JMP runtime·goPanicSliceAcapU(SB)
1518TEXT runtime·panicSliceB(SB),NOSPLIT,$0-8
1519 MOVL AX, x+0(FP)
1520 MOVL CX, y+4(FP)
1521 JMP runtime·goPanicSliceB(SB)
1522TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-8
1523 MOVL AX, x+0(FP)
1524 MOVL CX, y+4(FP)
1525 JMP runtime·goPanicSliceBU(SB)
1526TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-8
1527 MOVL DX, x+0(FP)
1528 MOVL BX, y+4(FP)
1529 JMP runtime·goPanicSlice3Alen(SB)
1530TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-8
1531 MOVL DX, x+0(FP)
1532 MOVL BX, y+4(FP)
1533 JMP runtime·goPanicSlice3AlenU(SB)
1534TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-8
1535 MOVL DX, x+0(FP)
1536 MOVL BX, y+4(FP)
1537 JMP runtime·goPanicSlice3Acap(SB)
1538TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-8
1539 MOVL DX, x+0(FP)
1540 MOVL BX, y+4(FP)
1541 JMP runtime·goPanicSlice3AcapU(SB)
1542TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-8
1543 MOVL CX, x+0(FP)
1544 MOVL DX, y+4(FP)
1545 JMP runtime·goPanicSlice3B(SB)
1546TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-8
1547 MOVL CX, x+0(FP)
1548 MOVL DX, y+4(FP)
1549 JMP runtime·goPanicSlice3BU(SB)
1550TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-8
1551 MOVL AX, x+0(FP)
1552 MOVL CX, y+4(FP)
1553 JMP runtime·goPanicSlice3C(SB)
1554TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-8
1555 MOVL AX, x+0(FP)
1556 MOVL CX, y+4(FP)
1557 JMP runtime·goPanicSlice3CU(SB)
1558TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-8
1559 MOVL DX, x+0(FP)
1560 MOVL BX, y+4(FP)
1561 JMP runtime·goPanicSliceConvert(SB)
1562
1563// Extended versions for 64-bit indexes.
1564TEXT runtime·panicExtendIndex(SB),NOSPLIT,$0-12
1565 MOVL SI, hi+0(FP)
1566 MOVL AX, lo+4(FP)
1567 MOVL CX, y+8(FP)
1568 JMP runtime·goPanicExtendIndex(SB)
1569TEXT runtime·panicExtendIndexU(SB),NOSPLIT,$0-12
1570 MOVL SI, hi+0(FP)
1571 MOVL AX, lo+4(FP)
1572 MOVL CX, y+8(FP)
1573 JMP runtime·goPanicExtendIndexU(SB)
1574TEXT runtime·panicExtendSliceAlen(SB),NOSPLIT,$0-12
1575 MOVL SI, hi+0(FP)
1576 MOVL CX, lo+4(FP)
1577 MOVL DX, y+8(FP)
1578 JMP runtime·goPanicExtendSliceAlen(SB)
1579TEXT runtime·panicExtendSliceAlenU(SB),NOSPLIT,$0-12
1580 MOVL SI, hi+0(FP)
1581 MOVL CX, lo+4(FP)
1582 MOVL DX, y+8(FP)
1583 JMP runtime·goPanicExtendSliceAlenU(SB)
1584TEXT runtime·panicExtendSliceAcap(SB),NOSPLIT,$0-12
1585 MOVL SI, hi+0(FP)
1586 MOVL CX, lo+4(FP)
1587 MOVL DX, y+8(FP)
1588 JMP runtime·goPanicExtendSliceAcap(SB)
1589TEXT runtime·panicExtendSliceAcapU(SB),NOSPLIT,$0-12
1590 MOVL SI, hi+0(FP)
1591 MOVL CX, lo+4(FP)
1592 MOVL DX, y+8(FP)
1593 JMP runtime·goPanicExtendSliceAcapU(SB)
1594TEXT runtime·panicExtendSliceB(SB),NOSPLIT,$0-12
1595 MOVL SI, hi+0(FP)
1596 MOVL AX, lo+4(FP)
1597 MOVL CX, y+8(FP)
1598 JMP runtime·goPanicExtendSliceB(SB)
1599TEXT runtime·panicExtendSliceBU(SB),NOSPLIT,$0-12
1600 MOVL SI, hi+0(FP)
1601 MOVL AX, lo+4(FP)
1602 MOVL CX, y+8(FP)
1603 JMP runtime·goPanicExtendSliceBU(SB)
1604TEXT runtime·panicExtendSlice3Alen(SB),NOSPLIT,$0-12
1605 MOVL SI, hi+0(FP)
1606 MOVL DX, lo+4(FP)
1607 MOVL BX, y+8(FP)
1608 JMP runtime·goPanicExtendSlice3Alen(SB)
1609TEXT runtime·panicExtendSlice3AlenU(SB),NOSPLIT,$0-12
1610 MOVL SI, hi+0(FP)
1611 MOVL DX, lo+4(FP)
1612 MOVL BX, y+8(FP)
1613 JMP runtime·goPanicExtendSlice3AlenU(SB)
1614TEXT runtime·panicExtendSlice3Acap(SB),NOSPLIT,$0-12
1615 MOVL SI, hi+0(FP)
1616 MOVL DX, lo+4(FP)
1617 MOVL BX, y+8(FP)
1618 JMP runtime·goPanicExtendSlice3Acap(SB)
1619TEXT runtime·panicExtendSlice3AcapU(SB),NOSPLIT,$0-12
1620 MOVL SI, hi+0(FP)
1621 MOVL DX, lo+4(FP)
1622 MOVL BX, y+8(FP)
1623 JMP runtime·goPanicExtendSlice3AcapU(SB)
1624TEXT runtime·panicExtendSlice3B(SB),NOSPLIT,$0-12
1625 MOVL SI, hi+0(FP)
1626 MOVL CX, lo+4(FP)
1627 MOVL DX, y+8(FP)
1628 JMP runtime·goPanicExtendSlice3B(SB)
1629TEXT runtime·panicExtendSlice3BU(SB),NOSPLIT,$0-12
1630 MOVL SI, hi+0(FP)
1631 MOVL CX, lo+4(FP)
1632 MOVL DX, y+8(FP)
1633 JMP runtime·goPanicExtendSlice3BU(SB)
1634TEXT runtime·panicExtendSlice3C(SB),NOSPLIT,$0-12
1635 MOVL SI, hi+0(FP)
1636 MOVL AX, lo+4(FP)
1637 MOVL CX, y+8(FP)
1638 JMP runtime·goPanicExtendSlice3C(SB)
1639TEXT runtime·panicExtendSlice3CU(SB),NOSPLIT,$0-12
1640 MOVL SI, hi+0(FP)
1641 MOVL AX, lo+4(FP)
1642 MOVL CX, y+8(FP)
1643 JMP runtime·goPanicExtendSlice3CU(SB)
1644
1645#ifdef GOOS_android
1646// Use the free TLS_SLOT_APP slot #2 on Android Q.
1647// Earlier androids are set up in gcc_android.c.
1648DATA runtime·tls_g+0(SB)/4, $8
1649GLOBL runtime·tls_g+0(SB), NOPTR, $4
1650#endif
1651#ifdef GOOS_windows
1652GLOBL runtime·tls_g+0(SB), NOPTR, $4
1653#endif
View as plain text