...

Text file src/crypto/subtle/xor_ppc64x.s

Documentation: crypto/subtle

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build (ppc64 || ppc64le) && !purego
     6
     7#include "textflag.h"
     8
     9// func xorBytes(dst, a, b *byte, n int)
    10TEXT ·xorBytes(SB), NOSPLIT, $0
    11	MOVD	dst+0(FP), R3	// R3 = dst
    12	MOVD	a+8(FP), R4	// R4 = a
    13	MOVD	b+16(FP), R5	// R5 = b
    14	MOVD	n+24(FP), R6	// R6 = n
    15
    16	CMPU	R6, $64, CR7	// Check if n ≥ 64 bytes
    17	MOVD	R0, R8		// R8 = index
    18	CMPU	R6, $8, CR6	// Check if 8 ≤ n < 64 bytes
    19	BLE	CR6, small	// <= 8
    20	BLT	CR7, xor32	// Case for 32 ≤ n < 64 bytes
    21
    22	// Case for n ≥ 64 bytes
    23preloop64:
    24	SRD	$6, R6, R7	// Set up loop counter
    25	MOVD	R7, CTR
    26	MOVD	$16, R10
    27	MOVD	$32, R14
    28	MOVD	$48, R15
    29	ANDCC	$63, R6, R9	// Check for tailing bytes for later
    30	PCALIGN $16
    31	// Case for >= 64 bytes
    32	// Process 64 bytes per iteration
    33	// Load 4 vectors of a and b
    34	// XOR the corresponding vectors
    35	// from a and b and store the result
    36loop64:
    37	LXVD2X	(R4)(R8), VS32
    38	LXVD2X	(R4)(R10), VS34
    39	LXVD2X	(R4)(R14), VS36
    40	LXVD2X	(R4)(R15), VS38
    41	LXVD2X	(R5)(R8), VS33
    42	LXVD2X	(R5)(R10), VS35
    43	LXVD2X	(R5)(R14), VS37
    44	LXVD2X	(R5)(R15), VS39
    45	XXLXOR	VS32, VS33, VS32
    46	XXLXOR	VS34, VS35, VS34
    47	XXLXOR	VS36, VS37, VS36
    48	XXLXOR	VS38, VS39, VS38
    49	STXVD2X	VS32, (R3)(R8)
    50	STXVD2X	VS34, (R3)(R10)
    51	STXVD2X	VS36, (R3)(R14)
    52	STXVD2X	VS38, (R3)(R15)
    53	ADD	$64, R8
    54	ADD	$64, R10
    55	ADD	$64, R14
    56	ADD	$64, R15
    57	BDNZ	loop64
    58	BC	12,2,LR		// BEQLR
    59	MOVD	R9, R6
    60	CMP	R6, $8
    61	BLE	small
    62	// Case for 8 <= n < 64 bytes
    63	// Process 32 bytes if available
    64xor32:
    65	CMP	R6, $32
    66	BLT	xor16
    67	ADD	$16, R8, R9
    68	LXVD2X	(R4)(R8), VS32
    69	LXVD2X	(R4)(R9), VS33
    70	LXVD2X	(R5)(R8), VS34
    71	LXVD2X	(R5)(R9), VS35
    72	XXLXOR	VS32, VS34, VS32
    73	XXLXOR	VS33, VS35, VS33
    74	STXVD2X	VS32, (R3)(R8)
    75	STXVD2X	VS33, (R3)(R9)
    76	ADD	$32, R8
    77	ADD	$-32, R6
    78	CMP	R6, $8
    79	BLE	small
    80	// Case for 8 <= n < 32 bytes
    81	// Process 16 bytes if available
    82xor16:
    83	CMP	R6, $16
    84	BLT	xor8
    85	LXVD2X	(R4)(R8), VS32
    86	LXVD2X	(R5)(R8), VS33
    87	XXLXOR	VS32, VS33, VS32
    88	STXVD2X	VS32, (R3)(R8)
    89	ADD	$16, R8
    90	ADD	$-16, R6
    91small:
    92	CMP	R6, R0
    93	BC	12,2,LR		// BEQLR
    94xor8:
    95#ifdef GOPPC64_power10
    96	SLD	$56,R6,R17
    97	ADD	R4,R8,R18
    98	ADD	R5,R8,R19
    99	ADD	R3,R8,R20
   100	LXVL	R18,R17,V0
   101	LXVL	R19,R17,V1
   102	VXOR	V0,V1,V1
   103	STXVL	V1,R20,R17
   104	RET
   105#else
   106	CMP	R6, $8
   107	BLT	xor4
   108	// Case for 8 ≤ n < 16 bytes
   109	MOVD	(R4)(R8), R14   // R14 = a[i,...,i+7]
   110	MOVD	(R5)(R8), R15   // R15 = b[i,...,i+7]
   111	XOR	R14, R15, R16   // R16 = a[] ^ b[]
   112	SUB	$8, R6          // n = n - 8
   113	MOVD	R16, (R3)(R8)   // Store to dst
   114	ADD	$8, R8
   115xor4:
   116	CMP	R6, $4
   117	BLT	xor2
   118	MOVWZ	(R4)(R8), R14
   119	MOVWZ	(R5)(R8), R15
   120	XOR	R14, R15, R16
   121	MOVW	R16, (R3)(R8)
   122	ADD	$4,R8
   123	ADD	$-4,R6
   124xor2:
   125	CMP	R6, $2
   126	BLT	xor1
   127	MOVHZ	(R4)(R8), R14
   128	MOVHZ	(R5)(R8), R15
   129	XOR	R14, R15, R16
   130	MOVH	R16, (R3)(R8)
   131	ADD	$2,R8
   132	ADD	$-2,R6
   133xor1:
   134	CMP	R6, R0
   135	BC	12,2,LR		// BEQLR
   136	MOVBZ	(R4)(R8), R14	// R14 = a[i]
   137	MOVBZ	(R5)(R8), R15	// R15 = b[i]
   138	XOR	R14, R15, R16	// R16 = a[i] ^ b[i]
   139	MOVB	R16, (R3)(R8)	// Store to dst
   140#endif
   141done:
   142	RET

View as plain text