...

Text file src/golang.org/x/crypto/blake2b/blake2b_amd64.s

Documentation: golang.org/x/crypto/blake2b

     1// Copyright 2016 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build amd64 && gc && !purego
     6
     7#include "textflag.h"
     8
     9DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
    10DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
    11GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
    12
    13DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
    14DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
    15GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
    16
    17DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
    18DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
    19GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
    20
    21DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
    22DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
    23GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
    24
    25DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
    26DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
    27GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
    28
    29DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
    30DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
    31GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
    32
    33#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
    34	MOVO       v4, t1; \
    35	MOVO       v5, v4; \
    36	MOVO       t1, v5; \
    37	MOVO       v6, t1; \
    38	PUNPCKLQDQ v6, t2; \
    39	PUNPCKHQDQ v7, v6; \
    40	PUNPCKHQDQ t2, v6; \
    41	PUNPCKLQDQ v7, t2; \
    42	MOVO       t1, v7; \
    43	MOVO       v2, t1; \
    44	PUNPCKHQDQ t2, v7; \
    45	PUNPCKLQDQ v3, t2; \
    46	PUNPCKHQDQ t2, v2; \
    47	PUNPCKLQDQ t1, t2; \
    48	PUNPCKHQDQ t2, v3
    49
    50#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
    51	MOVO       v4, t1; \
    52	MOVO       v5, v4; \
    53	MOVO       t1, v5; \
    54	MOVO       v2, t1; \
    55	PUNPCKLQDQ v2, t2; \
    56	PUNPCKHQDQ v3, v2; \
    57	PUNPCKHQDQ t2, v2; \
    58	PUNPCKLQDQ v3, t2; \
    59	MOVO       t1, v3; \
    60	MOVO       v6, t1; \
    61	PUNPCKHQDQ t2, v3; \
    62	PUNPCKLQDQ v7, t2; \
    63	PUNPCKHQDQ t2, v6; \
    64	PUNPCKLQDQ t1, t2; \
    65	PUNPCKHQDQ t2, v7
    66
    67#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
    68	PADDQ  m0, v0;        \
    69	PADDQ  m1, v1;        \
    70	PADDQ  v2, v0;        \
    71	PADDQ  v3, v1;        \
    72	PXOR   v0, v6;        \
    73	PXOR   v1, v7;        \
    74	PSHUFD $0xB1, v6, v6; \
    75	PSHUFD $0xB1, v7, v7; \
    76	PADDQ  v6, v4;        \
    77	PADDQ  v7, v5;        \
    78	PXOR   v4, v2;        \
    79	PXOR   v5, v3;        \
    80	PSHUFB c40, v2;       \
    81	PSHUFB c40, v3;       \
    82	PADDQ  m2, v0;        \
    83	PADDQ  m3, v1;        \
    84	PADDQ  v2, v0;        \
    85	PADDQ  v3, v1;        \
    86	PXOR   v0, v6;        \
    87	PXOR   v1, v7;        \
    88	PSHUFB c48, v6;       \
    89	PSHUFB c48, v7;       \
    90	PADDQ  v6, v4;        \
    91	PADDQ  v7, v5;        \
    92	PXOR   v4, v2;        \
    93	PXOR   v5, v3;        \
    94	MOVOU  v2, t0;        \
    95	PADDQ  v2, t0;        \
    96	PSRLQ  $63, v2;       \
    97	PXOR   t0, v2;        \
    98	MOVOU  v3, t0;        \
    99	PADDQ  v3, t0;        \
   100	PSRLQ  $63, v3;       \
   101	PXOR   t0, v3
   102
   103#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
   104	MOVQ   i0*8(src), m0;     \
   105	PINSRQ $1, i1*8(src), m0; \
   106	MOVQ   i2*8(src), m1;     \
   107	PINSRQ $1, i3*8(src), m1; \
   108	MOVQ   i4*8(src), m2;     \
   109	PINSRQ $1, i5*8(src), m2; \
   110	MOVQ   i6*8(src), m3;     \
   111	PINSRQ $1, i7*8(src), m3
   112
   113// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
   114TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
   115	MOVQ h+0(FP), AX
   116	MOVQ c+8(FP), BX
   117	MOVQ flag+16(FP), CX
   118	MOVQ blocks_base+24(FP), SI
   119	MOVQ blocks_len+32(FP), DI
   120
   121	MOVQ SP, R10
   122	ADDQ $15, R10
   123	ANDQ $~15, R10
   124
   125	MOVOU ·iv3<>(SB), X0
   126	MOVO  X0, 0(R10)
   127	XORQ  CX, 0(R10)     // 0(R10) = ·iv3 ^ (CX || 0)
   128
   129	MOVOU ·c40<>(SB), X13
   130	MOVOU ·c48<>(SB), X14
   131
   132	MOVOU 0(AX), X12
   133	MOVOU 16(AX), X15
   134
   135	MOVQ 0(BX), R8
   136	MOVQ 8(BX), R9
   137
   138loop:
   139	ADDQ $128, R8
   140	CMPQ R8, $128
   141	JGE  noinc
   142	INCQ R9
   143
   144noinc:
   145	MOVQ R8, X8
   146	PINSRQ $1, R9, X8
   147
   148	MOVO X12, X0
   149	MOVO X15, X1
   150	MOVOU 32(AX), X2
   151	MOVOU 48(AX), X3
   152	MOVOU ·iv0<>(SB), X4
   153	MOVOU ·iv1<>(SB), X5
   154	MOVOU ·iv2<>(SB), X6
   155
   156	PXOR X8, X6
   157	MOVO 0(R10), X7
   158
   159	LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
   160	MOVO X8, 16(R10)
   161	MOVO X9, 32(R10)
   162	MOVO X10, 48(R10)
   163	MOVO X11, 64(R10)
   164	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   165	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   166	LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
   167	MOVO X8, 80(R10)
   168	MOVO X9, 96(R10)
   169	MOVO X10, 112(R10)
   170	MOVO X11, 128(R10)
   171	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   172	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   173
   174	LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
   175	MOVO X8, 144(R10)
   176	MOVO X9, 160(R10)
   177	MOVO X10, 176(R10)
   178	MOVO X11, 192(R10)
   179	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   180	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   181	LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
   182	MOVO X8, 208(R10)
   183	MOVO X9, 224(R10)
   184	MOVO X10, 240(R10)
   185	MOVO X11, 256(R10)
   186	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   187	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   188
   189	LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
   190	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   191	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   192	LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
   193	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   194	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   195
   196	LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
   197	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   198	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   199	LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
   200	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   201	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   202
   203	LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
   204	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   205	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   206	LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
   207	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   208	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   209
   210	LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
   211	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   212	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   213	LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
   214	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   215	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   216
   217	LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
   218	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   219	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   220	LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
   221	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   222	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   223
   224	LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
   225	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   226	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   227	LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
   228	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   229	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   230
   231	LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
   232	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   233	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   234	LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
   235	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   236	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   237
   238	LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
   239	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   240	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   241	LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
   242	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
   243	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   244
   245	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14)
   246	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   247	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14)
   248	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   249
   250	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14)
   251	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
   252	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14)
   253	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
   254
   255	MOVOU 32(AX), X10
   256	MOVOU 48(AX), X11
   257	PXOR  X0, X12
   258	PXOR  X1, X15
   259	PXOR  X2, X10
   260	PXOR  X3, X11
   261	PXOR  X4, X12
   262	PXOR  X5, X15
   263	PXOR  X6, X10
   264	PXOR  X7, X11
   265	MOVOU X10, 32(AX)
   266	MOVOU X11, 48(AX)
   267
   268	LEAQ 128(SI), SI
   269	SUBQ $128, DI
   270	JNE  loop
   271
   272	MOVOU X12, 0(AX)
   273	MOVOU X15, 16(AX)
   274
   275	MOVQ R8, 0(BX)
   276	MOVQ R9, 8(BX)
   277
   278	RET

View as plain text