...

Text file src/internal/bytealg/equal_arm64.s

Documentation: internal/bytealg

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8// memequal(a, b unsafe.Pointer, size uintptr) bool
     9TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    10	// short path to handle 0-byte case
    11	CBZ	R2, equal
    12	B	memeqbody<>(SB)
    13equal:
    14	MOVD	$1, R0
    15	RET
    16
    17// memequal_varlen(a, b unsafe.Pointer) bool
    18TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    19	CMP	R0, R1
    20	BEQ	eq
    21	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
    22	CBZ	R2, eq
    23	B	memeqbody<>(SB)
    24eq:
    25	MOVD	$1, R0
    26	RET
    27
    28// input:
    29// R0: pointer a
    30// R1: pointer b
    31// R2: data len
    32// at return: result in R0
    33TEXT memeqbody<>(SB),NOSPLIT,$0
    34	CMP	$1, R2
    35	// handle 1-byte special case for better performance
    36	BEQ	one
    37	CMP	$16, R2
    38	// handle specially if length < 16
    39	BLO	tail
    40	BIC	$0x3f, R2, R3
    41	CBZ	R3, chunk16
    42	// work with 64-byte chunks
    43	ADD	R3, R0, R6	// end of chunks
    44chunk64_loop:
    45	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    46	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
    47	VCMEQ	V0.D2, V4.D2, V8.D2
    48	VCMEQ	V1.D2, V5.D2, V9.D2
    49	VCMEQ	V2.D2, V6.D2, V10.D2
    50	VCMEQ	V3.D2, V7.D2, V11.D2
    51	VAND	V8.B16, V9.B16, V8.B16
    52	VAND	V8.B16, V10.B16, V8.B16
    53	VAND	V8.B16, V11.B16, V8.B16
    54	CMP	R0, R6
    55	VMOV	V8.D[0], R4
    56	VMOV	V8.D[1], R5
    57	CBZ	R4, not_equal
    58	CBZ	R5, not_equal
    59	BNE	chunk64_loop
    60	AND	$0x3f, R2, R2
    61	CBZ	R2, equal
    62chunk16:
    63	// work with 16-byte chunks
    64	BIC	$0xf, R2, R3
    65	CBZ	R3, tail
    66	ADD	R3, R0, R6	// end of chunks
    67chunk16_loop:
    68	LDP.P	16(R0), (R4, R5)
    69	LDP.P	16(R1), (R7, R9)
    70	EOR	R4, R7
    71	CBNZ	R7, not_equal
    72	EOR	R5, R9
    73	CBNZ	R9, not_equal
    74	CMP	R0, R6
    75	BNE	chunk16_loop
    76	AND	$0xf, R2, R2
    77	CBZ	R2, equal
    78tail:
    79	// special compare of tail with length < 16
    80	TBZ	$3, R2, lt_8
    81	MOVD	(R0), R4
    82	MOVD	(R1), R5
    83	EOR	R4, R5
    84	CBNZ	R5, not_equal
    85	SUB	$8, R2, R6	// offset of the last 8 bytes
    86	MOVD	(R0)(R6), R4
    87	MOVD	(R1)(R6), R5
    88	EOR	R4, R5
    89	CBNZ	R5, not_equal
    90	B	equal
    91lt_8:
    92	TBZ	$2, R2, lt_4
    93	MOVWU	(R0), R4
    94	MOVWU	(R1), R5
    95	EOR	R4, R5
    96	CBNZ	R5, not_equal
    97	SUB	$4, R2, R6	// offset of the last 4 bytes
    98	MOVWU	(R0)(R6), R4
    99	MOVWU	(R1)(R6), R5
   100	EOR	R4, R5
   101	CBNZ	R5, not_equal
   102	B	equal
   103lt_4:
   104	TBZ	$1, R2, lt_2
   105	MOVHU.P	2(R0), R4
   106	MOVHU.P	2(R1), R5
   107	CMP	R4, R5
   108	BNE	not_equal
   109lt_2:
   110	TBZ	$0, R2, equal
   111one:
   112	MOVBU	(R0), R4
   113	MOVBU	(R1), R5
   114	CMP	R4, R5
   115	BNE	not_equal
   116equal:
   117	MOVD	$1, R0
   118	RET
   119not_equal:
   120	MOVB	ZR, R0
   121	RET

View as plain text