...

Text file src/golang.org/x/crypto/chacha20/chacha_s390x.s

Documentation: golang.org/x/crypto/chacha20

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build gc && !purego
     6
     7#include "go_asm.h"
     8#include "textflag.h"
     9
    10// This is an implementation of the ChaCha20 encryption algorithm as
    11// specified in RFC 7539. It uses vector instructions to compute
    12// 4 keystream blocks in parallel (256 bytes) which are then XORed
    13// with the bytes in the input slice.
    14
    15GLOBL ·constants<>(SB), RODATA|NOPTR, $32
    16// BSWAP: swap bytes in each 4-byte element
    17DATA ·constants<>+0x00(SB)/4, $0x03020100
    18DATA ·constants<>+0x04(SB)/4, $0x07060504
    19DATA ·constants<>+0x08(SB)/4, $0x0b0a0908
    20DATA ·constants<>+0x0c(SB)/4, $0x0f0e0d0c
    21// J0: [j0, j1, j2, j3]
    22DATA ·constants<>+0x10(SB)/4, $0x61707865
    23DATA ·constants<>+0x14(SB)/4, $0x3320646e
    24DATA ·constants<>+0x18(SB)/4, $0x79622d32
    25DATA ·constants<>+0x1c(SB)/4, $0x6b206574
    26
    27#define BSWAP V5
    28#define J0    V6
    29#define KEY0  V7
    30#define KEY1  V8
    31#define NONCE V9
    32#define CTR   V10
    33#define M0    V11
    34#define M1    V12
    35#define M2    V13
    36#define M3    V14
    37#define INC   V15
    38#define X0    V16
    39#define X1    V17
    40#define X2    V18
    41#define X3    V19
    42#define X4    V20
    43#define X5    V21
    44#define X6    V22
    45#define X7    V23
    46#define X8    V24
    47#define X9    V25
    48#define X10   V26
    49#define X11   V27
    50#define X12   V28
    51#define X13   V29
    52#define X14   V30
    53#define X15   V31
    54
    55#define NUM_ROUNDS 20
    56
    57#define ROUND4(a0, a1, a2, a3, b0, b1, b2, b3, c0, c1, c2, c3, d0, d1, d2, d3) \
    58	VAF    a1, a0, a0  \
    59	VAF    b1, b0, b0  \
    60	VAF    c1, c0, c0  \
    61	VAF    d1, d0, d0  \
    62	VX     a0, a2, a2  \
    63	VX     b0, b2, b2  \
    64	VX     c0, c2, c2  \
    65	VX     d0, d2, d2  \
    66	VERLLF $16, a2, a2 \
    67	VERLLF $16, b2, b2 \
    68	VERLLF $16, c2, c2 \
    69	VERLLF $16, d2, d2 \
    70	VAF    a2, a3, a3  \
    71	VAF    b2, b3, b3  \
    72	VAF    c2, c3, c3  \
    73	VAF    d2, d3, d3  \
    74	VX     a3, a1, a1  \
    75	VX     b3, b1, b1  \
    76	VX     c3, c1, c1  \
    77	VX     d3, d1, d1  \
    78	VERLLF $12, a1, a1 \
    79	VERLLF $12, b1, b1 \
    80	VERLLF $12, c1, c1 \
    81	VERLLF $12, d1, d1 \
    82	VAF    a1, a0, a0  \
    83	VAF    b1, b0, b0  \
    84	VAF    c1, c0, c0  \
    85	VAF    d1, d0, d0  \
    86	VX     a0, a2, a2  \
    87	VX     b0, b2, b2  \
    88	VX     c0, c2, c2  \
    89	VX     d0, d2, d2  \
    90	VERLLF $8, a2, a2  \
    91	VERLLF $8, b2, b2  \
    92	VERLLF $8, c2, c2  \
    93	VERLLF $8, d2, d2  \
    94	VAF    a2, a3, a3  \
    95	VAF    b2, b3, b3  \
    96	VAF    c2, c3, c3  \
    97	VAF    d2, d3, d3  \
    98	VX     a3, a1, a1  \
    99	VX     b3, b1, b1  \
   100	VX     c3, c1, c1  \
   101	VX     d3, d1, d1  \
   102	VERLLF $7, a1, a1  \
   103	VERLLF $7, b1, b1  \
   104	VERLLF $7, c1, c1  \
   105	VERLLF $7, d1, d1
   106
   107#define PERMUTE(mask, v0, v1, v2, v3) \
   108	VPERM v0, v0, mask, v0 \
   109	VPERM v1, v1, mask, v1 \
   110	VPERM v2, v2, mask, v2 \
   111	VPERM v3, v3, mask, v3
   112
   113#define ADDV(x, v0, v1, v2, v3) \
   114	VAF x, v0, v0 \
   115	VAF x, v1, v1 \
   116	VAF x, v2, v2 \
   117	VAF x, v3, v3
   118
   119#define XORV(off, dst, src, v0, v1, v2, v3) \
   120	VLM  off(src), M0, M3          \
   121	PERMUTE(BSWAP, v0, v1, v2, v3) \
   122	VX   v0, M0, M0                \
   123	VX   v1, M1, M1                \
   124	VX   v2, M2, M2                \
   125	VX   v3, M3, M3                \
   126	VSTM M0, M3, off(dst)
   127
   128#define SHUFFLE(a, b, c, d, t, u, v, w) \
   129	VMRHF a, c, t \ // t = {a[0], c[0], a[1], c[1]}
   130	VMRHF b, d, u \ // u = {b[0], d[0], b[1], d[1]}
   131	VMRLF a, c, v \ // v = {a[2], c[2], a[3], c[3]}
   132	VMRLF b, d, w \ // w = {b[2], d[2], b[3], d[3]}
   133	VMRHF t, u, a \ // a = {a[0], b[0], c[0], d[0]}
   134	VMRLF t, u, b \ // b = {a[1], b[1], c[1], d[1]}
   135	VMRHF v, w, c \ // c = {a[2], b[2], c[2], d[2]}
   136	VMRLF v, w, d // d = {a[3], b[3], c[3], d[3]}
   137
   138// func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
   139TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
   140	MOVD $·constants<>(SB), R1
   141	MOVD dst+0(FP), R2         // R2=&dst[0]
   142	LMG  src+24(FP), R3, R4    // R3=&src[0] R4=len(src)
   143	MOVD key+48(FP), R5        // R5=key
   144	MOVD nonce+56(FP), R6      // R6=nonce
   145	MOVD counter+64(FP), R7    // R7=counter
   146
   147	// load BSWAP and J0
   148	VLM (R1), BSWAP, J0
   149
   150	// setup
   151	MOVD  $95, R0
   152	VLM   (R5), KEY0, KEY1
   153	VLL   R0, (R6), NONCE
   154	VZERO M0
   155	VLEIB $7, $32, M0
   156	VSRLB M0, NONCE, NONCE
   157
   158	// initialize counter values
   159	VLREPF (R7), CTR
   160	VZERO  INC
   161	VLEIF  $1, $1, INC
   162	VLEIF  $2, $2, INC
   163	VLEIF  $3, $3, INC
   164	VAF    INC, CTR, CTR
   165	VREPIF $4, INC
   166
   167chacha:
   168	VREPF $0, J0, X0
   169	VREPF $1, J0, X1
   170	VREPF $2, J0, X2
   171	VREPF $3, J0, X3
   172	VREPF $0, KEY0, X4
   173	VREPF $1, KEY0, X5
   174	VREPF $2, KEY0, X6
   175	VREPF $3, KEY0, X7
   176	VREPF $0, KEY1, X8
   177	VREPF $1, KEY1, X9
   178	VREPF $2, KEY1, X10
   179	VREPF $3, KEY1, X11
   180	VLR   CTR, X12
   181	VREPF $1, NONCE, X13
   182	VREPF $2, NONCE, X14
   183	VREPF $3, NONCE, X15
   184
   185	MOVD $(NUM_ROUNDS/2), R1
   186
   187loop:
   188	ROUND4(X0, X4, X12,  X8, X1, X5, X13,  X9, X2, X6, X14, X10, X3, X7, X15, X11)
   189	ROUND4(X0, X5, X15, X10, X1, X6, X12, X11, X2, X7, X13, X8,  X3, X4, X14, X9)
   190
   191	ADD $-1, R1
   192	BNE loop
   193
   194	// decrement length
   195	ADD $-256, R4
   196
   197	// rearrange vectors
   198	SHUFFLE(X0, X1, X2, X3, M0, M1, M2, M3)
   199	ADDV(J0, X0, X1, X2, X3)
   200	SHUFFLE(X4, X5, X6, X7, M0, M1, M2, M3)
   201	ADDV(KEY0, X4, X5, X6, X7)
   202	SHUFFLE(X8, X9, X10, X11, M0, M1, M2, M3)
   203	ADDV(KEY1, X8, X9, X10, X11)
   204	VAF CTR, X12, X12
   205	SHUFFLE(X12, X13, X14, X15, M0, M1, M2, M3)
   206	ADDV(NONCE, X12, X13, X14, X15)
   207
   208	// increment counters
   209	VAF INC, CTR, CTR
   210
   211	// xor keystream with plaintext
   212	XORV(0*64, R2, R3, X0, X4,  X8, X12)
   213	XORV(1*64, R2, R3, X1, X5,  X9, X13)
   214	XORV(2*64, R2, R3, X2, X6, X10, X14)
   215	XORV(3*64, R2, R3, X3, X7, X11, X15)
   216
   217	// increment pointers
   218	MOVD $256(R2), R2
   219	MOVD $256(R3), R3
   220
   221	CMPBNE  R4, $0, chacha
   222
   223	VSTEF $0, CTR, (R7)
   224	RET

View as plain text