...

Text file src/crypto/aes/gcm_ppc64x.s

Documentation: crypto/aes

     1// Copyright 2019 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build ppc64 || ppc64le
     6
     7// Portions based on CRYPTOGAMS code with the following comment:
     8// # ====================================================================
     9// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    10// # project. The module is, however, dual licensed under OpenSSL and
    11// # CRYPTOGAMS licenses depending on where you obtain it. For further
    12// # details see http://www.openssl.org/~appro/cryptogams/.
    13// # ====================================================================
    14
    15// The implementations for gcmHash, gcmInit and gcmMul are based on the generated asm
    16// from the script https://github.com/dot-asm/cryptogams/blob/master/ppc/ghashp8-ppc.pl
    17// from commit d47afb3c.
    18
    19// Changes were made due to differences in the ABI and some register usage.
    20// Some arguments were changed due to the way the Go code passes them.
    21
    22// Portions that use the stitched AES-GCM approach in counterCryptASM
    23// are based on code found in
    24// https://github.com/IBM/ipcri/blob/main/aes/p10_aes_gcm.s
    25
    26#include "textflag.h"
    27
    28#define XIP    R3
    29#define HTBL   R4
    30#define INP    R5
    31#define LEN    R6
    32
    33#define XL     V0
    34#define XM     V1
    35#define XH     V2
    36#define IN     V3
    37#define ZERO   V4
    38#define T0     V5
    39#define T1     V6
    40#define T2     V7
    41#define XC2    V8
    42#define H      V9
    43#define HH     V10
    44#define HL     V11
    45#define LEMASK V12
    46#define XL1    V13
    47#define XM1    V14
    48#define XH1    V15
    49#define IN1    V16
    50#define H2     V17
    51#define H2H    V18
    52#define H2L    V19
    53#define XL3    V20
    54#define XM2    V21
    55#define IN2    V22
    56#define H3L    V23
    57#define H3     V24
    58#define H3H    V25
    59#define XH3    V26
    60#define XM3    V27
    61#define IN3    V28
    62#define H4L    V29
    63#define H4     V30
    64#define H4H    V31
    65
    66#define IN0    IN
    67#define H21L   HL
    68#define H21H   HH
    69#define LOPERM H2L
    70#define HIPERM H2H
    71
    72#define VXL    VS32
    73#define VIN    VS35
    74#define VXC2   VS40
    75#define VH     VS41
    76#define VHH    VS42
    77#define VHL    VS43
    78#define VIN1   VS48
    79#define VH2    VS49
    80#define VH2H   VS50
    81#define VH2L   VS51
    82
    83#define VIN2   VS54
    84#define VH3L   VS55
    85#define VH3    VS56
    86#define VH3H   VS57
    87#define VIN3   VS60
    88#define VH4L   VS61
    89#define VH4    VS62
    90#define VH4H   VS63
    91
    92#define VIN0   VIN
    93
    94#define ESPERM V10
    95#define TMP2 V11
    96
    97// The following macros provide appropriate
    98// implementations for endianness as well as
    99// ISA specific for power8 and power9.
   100#ifdef GOARCH_ppc64le
   101#  ifdef GOPPC64_power9
   102#define P8_LXVB16X(RA,RB,VT)   LXVB16X (RA)(RB), VT
   103#define P8_STXVB16X(VS,RA,RB)  STXVB16X VS, (RA)(RB)
   104#  else
   105#define NEEDS_ESPERM
   106#define P8_LXVB16X(RA,RB,VT) \
   107	LXVD2X  (RA+RB), VT \
   108	VPERM	VT, VT, ESPERM, VT
   109
   110#define P8_STXVB16X(VS,RA,RB) \
   111	VPERM	VS, VS, ESPERM, TMP2; \
   112	STXVD2X TMP2, (RA+RB)
   113
   114#  endif
   115#else
   116#define P8_LXVB16X(RA,RB,VT) \
   117	LXVD2X  (RA+RB), VT
   118
   119#define P8_STXVB16X(VS,RA,RB) \
   120	STXVD2X VS, (RA+RB)
   121
   122#endif
   123
   124#define MASK_PTR   R8
   125
   126#define MASKV   V0
   127#define INV     V1
   128
   129// The following macros are used for
   130// the stitched implementation within
   131// counterCryptASM.
   132
   133// Load the initial GCM counter value
   134// in V30 and set up the counter increment
   135// in V31
   136#define SETUP_COUNTER \
   137	P8_LXVB16X(COUNTER, R0, V30); \
   138	VSPLTISB $1, V28; \
   139	VXOR V31, V31, V31; \
   140	VSLDOI $1, V31, V28, V31
   141
   142// These macros set up the initial value
   143// for a single encryption, or 4 or 8
   144// stitched encryptions implemented
   145// with interleaving vciphers.
   146//
   147// The input value for each encryption
   148// is generated by XORing the counter
   149// from V30 with the first key in VS0
   150// and incrementing the counter.
   151//
   152// Single encryption in V15
   153#define GEN_VCIPHER_INPUT \
   154	XXLOR VS0, VS0, V29 \
   155	VXOR V30, V29, V15; \
   156	VADDUWM V30, V31, V30
   157
   158// 4 encryptions in V15 - V18
   159#define GEN_VCIPHER_4_INPUTS \
   160	XXLOR VS0, VS0, V29; \
   161	VXOR V30, V29, V15; \
   162	VADDUWM V30, V31, V30; \
   163	VXOR V30, V29, V16; \
   164	VADDUWM V30, V31, V30; \
   165	VXOR V30, V29, V17; \
   166	VADDUWM V30, V31, V30; \
   167	VXOR V30, V29, V18; \
   168	VADDUWM V30, V31, V30
   169
   170// 8 encryptions in V15 - V22
   171#define GEN_VCIPHER_8_INPUTS \
   172	XXLOR VS0, VS0, V29; \
   173	VXOR V30, V29, V15; \
   174	VADDUWM V30, V31, V30; \
   175	VXOR V30, V29, V16; \
   176	VADDUWM V30, V31, V30; \
   177	VXOR V30, V29, V17; \
   178	VADDUWM V30, V31, V30; \
   179	VXOR V30, V29, V18; \
   180	VADDUWM V30, V31, V30; \
   181	VXOR V30, V29, V19; \
   182	VADDUWM V30, V31, V30; \
   183	VXOR V30, V29, V20; \
   184	VADDUWM V30, V31, V30; \
   185	VXOR V30, V29, V21; \
   186	VADDUWM V30, V31, V30; \
   187	VXOR V30, V29, V22; \
   188	VADDUWM V30, V31, V30
   189
   190// Load the keys to be used for
   191// encryption based on key_len.
   192// Keys are in VS0 - VS14
   193// depending on key_len.
   194// Valid keys sizes are verified
   195// here. CR2 is set and used
   196// throughout to check key_len.
   197#define LOAD_KEYS(blk_key, key_len) \
   198	MOVD	$16, R16; \
   199	MOVD	$32, R17; \
   200	MOVD	$48, R18; \
   201	MOVD	$64, R19; \
   202	LXVD2X (blk_key)(R0), VS0; \
   203	LXVD2X (blk_key)(R16), VS1; \
   204	LXVD2X (blk_key)(R17), VS2; \
   205	LXVD2X (blk_key)(R18), VS3; \
   206	LXVD2X (blk_key)(R19), VS4; \
   207	ADD $64, R16; \
   208	ADD $64, R17; \
   209	ADD $64, R18; \
   210	ADD $64, R19; \
   211	LXVD2X (blk_key)(R16), VS5; \
   212	LXVD2X (blk_key)(R17), VS6; \
   213	LXVD2X (blk_key)(R18), VS7; \
   214	LXVD2X (blk_key)(R19), VS8; \
   215	ADD $64, R16; \
   216	ADD $64, R17; \
   217	ADD $64, R18; \
   218	ADD $64, R19; \
   219	LXVD2X (blk_key)(R16), VS9; \
   220	LXVD2X (blk_key)(R17), VS10; \
   221	CMP key_len, $12, CR2; \
   222	CMP key_len, $10; \
   223	BEQ keysLoaded; \
   224	LXVD2X (blk_key)(R18), VS11; \
   225	LXVD2X (blk_key)(R19), VS12; \
   226	BEQ CR2, keysLoaded; \
   227	ADD $64, R16; \
   228	ADD $64, R17; \
   229	LXVD2X (blk_key)(R16), VS13; \
   230	LXVD2X (blk_key)(R17), VS14; \
   231	CMP key_len, $14; \
   232	BEQ keysLoaded; \
   233	MOVD R0,0(R0); \
   234keysLoaded:
   235
   236// Encrypt 1 (vin) with first 9
   237// keys from VS1 - VS9.
   238#define VCIPHER_1X9_KEYS(vin) \
   239	XXLOR VS1, VS1, V23; \
   240	XXLOR VS2, VS2, V24; \
   241	XXLOR VS3, VS3, V25; \
   242	XXLOR VS4, VS4, V26; \
   243	XXLOR VS5, VS5, V27; \
   244	VCIPHER vin, V23, vin; \
   245	VCIPHER vin, V24, vin; \
   246	VCIPHER vin, V25, vin; \
   247	VCIPHER vin, V26, vin; \
   248	VCIPHER vin, V27, vin; \
   249	XXLOR VS6, VS6, V23; \
   250	XXLOR VS7, VS7, V24; \
   251	XXLOR VS8, VS8, V25; \
   252	XXLOR VS9, VS9, V26; \
   253	VCIPHER vin, V23, vin; \
   254	VCIPHER vin, V24, vin; \
   255	VCIPHER vin, V25, vin; \
   256	VCIPHER	vin, V26, vin
   257
   258// Encrypt 1 value (vin) with
   259// 2 specified keys
   260#define VCIPHER_1X2_KEYS(vin, key1, key2) \
   261	XXLOR key1, key1, V25; \
   262	XXLOR key2, key2, V26; \
   263	VCIPHER vin, V25, vin; \
   264	VCIPHER vin, V26, vin
   265
   266// Encrypt 4 values in V15 - V18
   267// with the specified key from
   268// VS1 - VS9.
   269#define VCIPHER_4X1_KEY(key) \
   270	XXLOR key, key, V23; \
   271	VCIPHER V15, V23, V15; \
   272	VCIPHER V16, V23, V16; \
   273	VCIPHER V17, V23, V17; \
   274	VCIPHER V18, V23, V18
   275
   276// Encrypt 8 values in V15 - V22
   277// with the specified key,
   278// assuming it is a VSreg
   279#define VCIPHER_8X1_KEY(key) \
   280	XXLOR key, key, V23; \
   281	VCIPHER V15, V23, V15; \
   282	VCIPHER V16, V23, V16; \
   283	VCIPHER V17, V23, V17; \
   284	VCIPHER V18, V23, V18; \
   285	VCIPHER V19, V23, V19; \
   286	VCIPHER V20, V23, V20; \
   287	VCIPHER V21, V23, V21; \
   288	VCIPHER V22, V23, V22
   289
   290// Load input block into V1-V4
   291// in big endian order and
   292// update blk_inp by 64.
   293#define LOAD_INPUT_BLOCK64(blk_inp) \
   294	MOVD $16, R16; \
   295	MOVD $32, R17; \
   296	MOVD $48, R18; \
   297	P8_LXVB16X(blk_inp,R0,V1); \
   298	P8_LXVB16X(blk_inp,R16,V2); \
   299	P8_LXVB16X(blk_inp,R17,V3); \
   300	P8_LXVB16X(blk_inp,R18,V4); \
   301	ADD $64, blk_inp
   302
   303// Load input block into V1-V8
   304// in big endian order and
   305// Update blk_inp by 128
   306#define LOAD_INPUT_BLOCK128(blk_inp) \
   307	MOVD $16, R16; \
   308	MOVD $32, R17; \
   309	MOVD $48, R18; \
   310	MOVD $64, R19; \
   311	MOVD $80, R20; \
   312	MOVD $96, R21; \
   313	MOVD $112, R22; \
   314	P8_LXVB16X(blk_inp,R0,V1); \
   315	P8_LXVB16X(blk_inp,R16,V2); \
   316	P8_LXVB16X(blk_inp,R17,V3); \
   317	P8_LXVB16X(blk_inp,R18,V4); \
   318	P8_LXVB16X(blk_inp,R19,V5); \
   319	P8_LXVB16X(blk_inp,R20,V6); \
   320	P8_LXVB16X(blk_inp,R21,V7); \
   321	P8_LXVB16X(blk_inp,R22,V8); \
   322	ADD $128, blk_inp
   323
   324// Finish encryption on 8 streams and
   325// XOR with input block
   326#define VCIPHERLAST8_XOR_INPUT \
   327	VCIPHERLAST     V15, V23, V15; \
   328	VCIPHERLAST     V16, V23, V16; \
   329	VCIPHERLAST     V17, V23, V17; \
   330	VCIPHERLAST     V18, V23, V18; \
   331	VCIPHERLAST     V19, V23, V19; \
   332	VCIPHERLAST     V20, V23, V20; \
   333	VCIPHERLAST     V21, V23, V21; \
   334	VCIPHERLAST     V22, V23, V22; \
   335	XXLXOR          V1, V15, V1; \
   336	XXLXOR          V2, V16, V2; \
   337	XXLXOR          V3, V17, V3; \
   338	XXLXOR          V4, V18, V4; \
   339	XXLXOR          V5, V19, V5; \
   340	XXLXOR          V6, V20, V6; \
   341	XXLXOR          V7, V21, V7; \
   342	XXLXOR          V8, V22, V8
   343
   344// Finish encryption on 4 streams and
   345// XOR with input block
   346#define VCIPHERLAST4_XOR_INPUT \
   347	VCIPHERLAST     V15, V23, V15; \
   348	VCIPHERLAST     V16, V23, V16; \
   349	VCIPHERLAST     V17, V23, V17; \
   350	VCIPHERLAST     V18, V23, V18; \
   351	XXLXOR          V1, V15, V1; \
   352	XXLXOR          V2, V16, V2; \
   353	XXLXOR          V3, V17, V3; \
   354	XXLXOR          V4, V18, V4
   355
   356// Store output block from V1-V8
   357// in big endian order and
   358// Update blk_out by 128
   359#define STORE_OUTPUT_BLOCK128(blk_out) \
   360	P8_STXVB16X(V1,blk_out,R0); \
   361	P8_STXVB16X(V2,blk_out,R16); \
   362	P8_STXVB16X(V3,blk_out,R17); \
   363	P8_STXVB16X(V4,blk_out,R18); \
   364	P8_STXVB16X(V5,blk_out,R19); \
   365	P8_STXVB16X(V6,blk_out,R20); \
   366	P8_STXVB16X(V7,blk_out,R21); \
   367	P8_STXVB16X(V8,blk_out,R22); \
   368	ADD $128, blk_out
   369
   370// Store output block from V1-V4
   371// in big endian order and
   372// Update blk_out by 64
   373#define STORE_OUTPUT_BLOCK64(blk_out) \
   374	P8_STXVB16X(V1,blk_out,R0); \
   375	P8_STXVB16X(V2,blk_out,R16); \
   376	P8_STXVB16X(V3,blk_out,R17); \
   377	P8_STXVB16X(V4,blk_out,R18); \
   378	ADD $64, blk_out
   379
   380// func gcmInit(productTable *[256]byte, h []byte)
   381TEXT ·gcmInit(SB), NOSPLIT, $0-32
   382	MOVD productTable+0(FP), XIP
   383	MOVD h+8(FP), HTBL
   384
   385	MOVD   $0x10, R8
   386	MOVD   $0x20, R9
   387	MOVD   $0x30, R10
   388	LXVD2X (HTBL)(R0), VH // Load H
   389
   390	VSPLTISB $-16, XC2           // 0xf0
   391	VSPLTISB $1, T0              // one
   392	VADDUBM  XC2, XC2, XC2       // 0xe0
   393	VXOR     ZERO, ZERO, ZERO
   394	VOR      XC2, T0, XC2        // 0xe1
   395	VSLDOI   $15, XC2, ZERO, XC2 // 0xe1...
   396	VSLDOI   $1, ZERO, T0, T1    // ...1
   397	VADDUBM  XC2, XC2, XC2       // 0xc2...
   398	VSPLTISB $7, T2
   399	VOR      XC2, T1, XC2        // 0xc2....01
   400	VSPLTB   $0, H, T1           // most significant byte
   401	VSL      H, T0, H            // H<<=1
   402	VSRAB    T1, T2, T1          // broadcast carry bit
   403	VAND     T1, XC2, T1
   404	VXOR     H, T1, IN           // twisted H
   405
   406	VSLDOI $8, IN, IN, H      // twist even more ...
   407	VSLDOI $8, ZERO, XC2, XC2 // 0xc2.0
   408	VSLDOI $8, ZERO, H, HL    // ... and split
   409	VSLDOI $8, H, ZERO, HH
   410
   411	STXVD2X VXC2, (XIP+R0) // save pre-computed table
   412	STXVD2X VHL, (XIP+R8)
   413	MOVD    $0x40, R8
   414	STXVD2X VH, (XIP+R9)
   415	MOVD    $0x50, R9
   416	STXVD2X VHH, (XIP+R10)
   417	MOVD    $0x60, R10
   418
   419	VPMSUMD IN, HL, XL // H.lo·H.lo
   420	VPMSUMD IN, H, XM  // H.hi·H.lo+H.lo·H.hi
   421	VPMSUMD IN, HH, XH // H.hi·H.hi
   422
   423	VPMSUMD XL, XC2, T2 // 1st reduction phase
   424
   425	VSLDOI $8, XM, ZERO, T0
   426	VSLDOI $8, ZERO, XM, T1
   427	VXOR   XL, T0, XL
   428	VXOR   XH, T1, XH
   429
   430	VSLDOI $8, XL, XL, XL
   431	VXOR   XL, T2, XL
   432
   433	VSLDOI  $8, XL, XL, T1 // 2nd reduction phase
   434	VPMSUMD XL, XC2, XL
   435	VXOR    T1, XH, T1
   436	VXOR    XL, T1, IN1
   437
   438	VSLDOI $8, IN1, IN1, H2
   439	VSLDOI $8, ZERO, H2, H2L
   440	VSLDOI $8, H2, ZERO, H2H
   441
   442	STXVD2X VH2L, (XIP+R8)  // save H^2
   443	MOVD    $0x70, R8
   444	STXVD2X VH2, (XIP+R9)
   445	MOVD    $0x80, R9
   446	STXVD2X VH2H, (XIP+R10)
   447	MOVD    $0x90, R10
   448
   449	VPMSUMD IN, H2L, XL   // H.lo·H^2.lo
   450	VPMSUMD IN1, H2L, XL1 // H^2.lo·H^2.lo
   451	VPMSUMD IN, H2, XM    // H.hi·H^2.lo+H.lo·H^2.hi
   452	VPMSUMD IN1, H2, XM1  // H^2.hi·H^2.lo+H^2.lo·H^2.hi
   453	VPMSUMD IN, H2H, XH   // H.hi·H^2.hi
   454	VPMSUMD IN1, H2H, XH1 // H^2.hi·H^2.hi
   455
   456	VPMSUMD XL, XC2, T2  // 1st reduction phase
   457	VPMSUMD XL1, XC2, HH // 1st reduction phase
   458
   459	VSLDOI $8, XM, ZERO, T0
   460	VSLDOI $8, ZERO, XM, T1
   461	VSLDOI $8, XM1, ZERO, HL
   462	VSLDOI $8, ZERO, XM1, H
   463	VXOR   XL, T0, XL
   464	VXOR   XH, T1, XH
   465	VXOR   XL1, HL, XL1
   466	VXOR   XH1, H, XH1
   467
   468	VSLDOI $8, XL, XL, XL
   469	VSLDOI $8, XL1, XL1, XL1
   470	VXOR   XL, T2, XL
   471	VXOR   XL1, HH, XL1
   472
   473	VSLDOI  $8, XL, XL, T1  // 2nd reduction phase
   474	VSLDOI  $8, XL1, XL1, H // 2nd reduction phase
   475	VPMSUMD XL, XC2, XL
   476	VPMSUMD XL1, XC2, XL1
   477	VXOR    T1, XH, T1
   478	VXOR    H, XH1, H
   479	VXOR    XL, T1, XL
   480	VXOR    XL1, H, XL1
   481
   482	VSLDOI $8, XL, XL, H
   483	VSLDOI $8, XL1, XL1, H2
   484	VSLDOI $8, ZERO, H, HL
   485	VSLDOI $8, H, ZERO, HH
   486	VSLDOI $8, ZERO, H2, H2L
   487	VSLDOI $8, H2, ZERO, H2H
   488
   489	STXVD2X VHL, (XIP+R8)   // save H^3
   490	MOVD    $0xa0, R8
   491	STXVD2X VH, (XIP+R9)
   492	MOVD    $0xb0, R9
   493	STXVD2X VHH, (XIP+R10)
   494	MOVD    $0xc0, R10
   495	STXVD2X VH2L, (XIP+R8)  // save H^4
   496	STXVD2X VH2, (XIP+R9)
   497	STXVD2X VH2H, (XIP+R10)
   498
   499	RET
   500
   501// func gcmHash(output []byte, productTable *[256]byte, inp []byte, len int)
   502TEXT ·gcmHash(SB), NOSPLIT, $0-64
   503	MOVD output+0(FP), XIP
   504	MOVD productTable+24(FP), HTBL
   505	MOVD inp+32(FP), INP
   506	MOVD len+56(FP), LEN
   507
   508	MOVD   $0x10, R8
   509	MOVD   $0x20, R9
   510	MOVD   $0x30, R10
   511	LXVD2X (XIP)(R0), VXL // load Xi
   512
   513	LXVD2X   (HTBL)(R8), VHL    // load pre-computed table
   514	MOVD     $0x40, R8
   515	LXVD2X   (HTBL)(R9), VH
   516	MOVD     $0x50, R9
   517	LXVD2X   (HTBL)(R10), VHH
   518	MOVD     $0x60, R10
   519	LXVD2X   (HTBL)(R0), VXC2
   520#ifdef GOARCH_ppc64le
   521	LVSL     (R0)(R0), LEMASK
   522	VSPLTISB $0x07, T0
   523	VXOR     LEMASK, T0, LEMASK
   524	VPERM    XL, XL, LEMASK, XL
   525#endif
   526	VXOR     ZERO, ZERO, ZERO
   527
   528	CMPU LEN, $64
   529	BGE  gcm_ghash_p8_4x
   530
   531	LXVD2X (INP)(R0), VIN
   532	ADD    $16, INP, INP
   533	SUBCCC $16, LEN, LEN
   534#ifdef GOARCH_ppc64le
   535	VPERM  IN, IN, LEMASK, IN
   536#endif
   537	VXOR   IN, XL, IN
   538	BEQ    short
   539
   540	LXVD2X (HTBL)(R8), VH2L  // load H^2
   541	MOVD   $16, R8
   542	LXVD2X (HTBL)(R9), VH2
   543	ADD    LEN, INP, R9      // end of input
   544	LXVD2X (HTBL)(R10), VH2H
   545
   546loop_2x:
   547	LXVD2X (INP)(R0), VIN1
   548#ifdef GOARCH_ppc64le
   549	VPERM  IN1, IN1, LEMASK, IN1
   550#endif
   551
   552	SUBC    $32, LEN, LEN
   553	VPMSUMD IN, H2L, XL   // H^2.lo·Xi.lo
   554	VPMSUMD IN1, HL, XL1  // H.lo·Xi+1.lo
   555	SUBE    R11, R11, R11 // borrow?-1:0
   556	VPMSUMD IN, H2, XM    // H^2.hi·Xi.lo+H^2.lo·Xi.hi
   557	VPMSUMD IN1, H, XM1   // H.hi·Xi+1.lo+H.lo·Xi+1.hi
   558	AND     LEN, R11, R11
   559	VPMSUMD IN, H2H, XH   // H^2.hi·Xi.hi
   560	VPMSUMD IN1, HH, XH1  // H.hi·Xi+1.hi
   561	ADD     R11, INP, INP
   562
   563	VXOR XL, XL1, XL
   564	VXOR XM, XM1, XM
   565
   566	VPMSUMD XL, XC2, T2 // 1st reduction phase
   567
   568	VSLDOI $8, XM, ZERO, T0
   569	VSLDOI $8, ZERO, XM, T1
   570	VXOR   XH, XH1, XH
   571	VXOR   XL, T0, XL
   572	VXOR   XH, T1, XH
   573
   574	VSLDOI $8, XL, XL, XL
   575	VXOR   XL, T2, XL
   576	LXVD2X (INP)(R8), VIN
   577	ADD    $32, INP, INP
   578
   579	VSLDOI  $8, XL, XL, T1     // 2nd reduction phase
   580	VPMSUMD XL, XC2, XL
   581#ifdef GOARCH_ppc64le
   582	VPERM   IN, IN, LEMASK, IN
   583#endif
   584	VXOR    T1, XH, T1
   585	VXOR    IN, T1, IN
   586	VXOR    IN, XL, IN
   587	CMP     R9, INP
   588	BGT     loop_2x            // done yet?
   589
   590	CMPWU LEN, $0
   591	BNE   even
   592
   593short:
   594	VPMSUMD IN, HL, XL // H.lo·Xi.lo
   595	VPMSUMD IN, H, XM  // H.hi·Xi.lo+H.lo·Xi.hi
   596	VPMSUMD IN, HH, XH // H.hi·Xi.hi
   597
   598	VPMSUMD XL, XC2, T2 // 1st reduction phase
   599
   600	VSLDOI $8, XM, ZERO, T0
   601	VSLDOI $8, ZERO, XM, T1
   602	VXOR   XL, T0, XL
   603	VXOR   XH, T1, XH
   604
   605	VSLDOI $8, XL, XL, XL
   606	VXOR   XL, T2, XL
   607
   608	VSLDOI  $8, XL, XL, T1 // 2nd reduction phase
   609	VPMSUMD XL, XC2, XL
   610	VXOR    T1, XH, T1
   611
   612even:
   613	VXOR    XL, T1, XL
   614#ifdef GOARCH_ppc64le
   615	VPERM   XL, XL, LEMASK, XL
   616#endif
   617	STXVD2X VXL, (XIP+R0)
   618
   619	OR R12, R12, R12 // write out Xi
   620	RET
   621
   622gcm_ghash_p8_4x:
   623	LVSL     (R8)(R0), T0      // 0x0001..0e0f
   624	MOVD     $0x70, R8
   625	LXVD2X   (HTBL)(R9), VH2
   626	MOVD     $0x80, R9
   627	VSPLTISB $8, T1            // 0x0808..0808
   628	MOVD     $0x90, R10
   629	LXVD2X   (HTBL)(R8), VH3L  // load H^3
   630	MOVD     $0xa0, R8
   631	LXVD2X   (HTBL)(R9), VH3
   632	MOVD     $0xb0, R9
   633	LXVD2X   (HTBL)(R10), VH3H
   634	MOVD     $0xc0, R10
   635	LXVD2X   (HTBL)(R8), VH4L  // load H^4
   636	MOVD     $0x10, R8
   637	LXVD2X   (HTBL)(R9), VH4
   638	MOVD     $0x20, R9
   639	LXVD2X   (HTBL)(R10), VH4H
   640	MOVD     $0x30, R10
   641
   642	VSLDOI  $8, ZERO, T1, T2   // 0x0000..0808
   643	VADDUBM T0, T2, HIPERM     // 0x0001..1617
   644	VADDUBM T1, HIPERM, LOPERM // 0x0809..1e1f
   645
   646	SRD $4, LEN, LEN // this allows to use sign bit as carry
   647
   648	LXVD2X (INP)(R0), VIN0       // load input
   649	LXVD2X (INP)(R8), VIN1
   650	SUBCCC $8, LEN, LEN
   651	LXVD2X (INP)(R9), VIN2
   652	LXVD2X (INP)(R10), VIN3
   653	ADD    $0x40, INP, INP
   654#ifdef GOARCH_ppc64le
   655	VPERM  IN0, IN0, LEMASK, IN0
   656	VPERM  IN1, IN1, LEMASK, IN1
   657	VPERM  IN2, IN2, LEMASK, IN2
   658	VPERM  IN3, IN3, LEMASK, IN3
   659#endif
   660
   661	VXOR IN0, XL, XH
   662
   663	VPMSUMD IN1, H3L, XL1
   664	VPMSUMD IN1, H3, XM1
   665	VPMSUMD IN1, H3H, XH1
   666
   667	VPERM   H2, H, HIPERM, H21L
   668	VPERM   IN2, IN3, LOPERM, T0
   669	VPERM   H2, H, LOPERM, H21H
   670	VPERM   IN2, IN3, HIPERM, T1
   671	VPMSUMD IN2, H2, XM2         // H^2.lo·Xi+2.hi+H^2.hi·Xi+2.lo
   672	VPMSUMD T0, H21L, XL3        // H^2.lo·Xi+2.lo+H.lo·Xi+3.lo
   673	VPMSUMD IN3, H, XM3          // H.hi·Xi+3.lo  +H.lo·Xi+3.hi
   674	VPMSUMD T1, H21H, XH3        // H^2.hi·Xi+2.hi+H.hi·Xi+3.hi
   675
   676	VXOR XM2, XM1, XM2
   677	VXOR XL3, XL1, XL3
   678	VXOR XM3, XM2, XM3
   679	VXOR XH3, XH1, XH3
   680
   681	BLT tail_4x
   682
   683loop_4x:
   684	LXVD2X (INP)(R0), VIN0
   685	LXVD2X (INP)(R8), VIN1
   686	SUBCCC $4, LEN, LEN
   687	LXVD2X (INP)(R9), VIN2
   688	LXVD2X (INP)(R10), VIN3
   689	ADD    $0x40, INP, INP
   690#ifdef GOARCH_ppc64le
   691	VPERM  IN1, IN1, LEMASK, IN1
   692	VPERM  IN2, IN2, LEMASK, IN2
   693	VPERM  IN3, IN3, LEMASK, IN3
   694	VPERM  IN0, IN0, LEMASK, IN0
   695#endif
   696
   697	VPMSUMD XH, H4L, XL   // H^4.lo·Xi.lo
   698	VPMSUMD XH, H4, XM    // H^4.hi·Xi.lo+H^4.lo·Xi.hi
   699	VPMSUMD XH, H4H, XH   // H^4.hi·Xi.hi
   700	VPMSUMD IN1, H3L, XL1
   701	VPMSUMD IN1, H3, XM1
   702	VPMSUMD IN1, H3H, XH1
   703
   704	VXOR  XL, XL3, XL
   705	VXOR  XM, XM3, XM
   706	VXOR  XH, XH3, XH
   707	VPERM IN2, IN3, LOPERM, T0
   708	VPERM IN2, IN3, HIPERM, T1
   709
   710	VPMSUMD XL, XC2, T2   // 1st reduction phase
   711	VPMSUMD T0, H21L, XL3 // H.lo·Xi+3.lo  +H^2.lo·Xi+2.lo
   712	VPMSUMD T1, H21H, XH3 // H.hi·Xi+3.hi  +H^2.hi·Xi+2.hi
   713
   714	VSLDOI $8, XM, ZERO, T0
   715	VSLDOI $8, ZERO, XM, T1
   716	VXOR   XL, T0, XL
   717	VXOR   XH, T1, XH
   718
   719	VSLDOI $8, XL, XL, XL
   720	VXOR   XL, T2, XL
   721
   722	VSLDOI  $8, XL, XL, T1 // 2nd reduction phase
   723	VPMSUMD IN2, H2, XM2   // H^2.hi·Xi+2.lo+H^2.lo·Xi+2.hi
   724	VPMSUMD IN3, H, XM3    // H.hi·Xi+3.lo  +H.lo·Xi+3.hi
   725	VPMSUMD XL, XC2, XL
   726
   727	VXOR XL3, XL1, XL3
   728	VXOR XH3, XH1, XH3
   729	VXOR XH, IN0, XH
   730	VXOR XM2, XM1, XM2
   731	VXOR XH, T1, XH
   732	VXOR XM3, XM2, XM3
   733	VXOR XH, XL, XH
   734	BGE  loop_4x
   735
   736tail_4x:
   737	VPMSUMD XH, H4L, XL // H^4.lo·Xi.lo
   738	VPMSUMD XH, H4, XM  // H^4.hi·Xi.lo+H^4.lo·Xi.hi
   739	VPMSUMD XH, H4H, XH // H^4.hi·Xi.hi
   740
   741	VXOR XL, XL3, XL
   742	VXOR XM, XM3, XM
   743
   744	VPMSUMD XL, XC2, T2 // 1st reduction phase
   745
   746	VSLDOI $8, XM, ZERO, T0
   747	VSLDOI $8, ZERO, XM, T1
   748	VXOR   XH, XH3, XH
   749	VXOR   XL, T0, XL
   750	VXOR   XH, T1, XH
   751
   752	VSLDOI $8, XL, XL, XL
   753	VXOR   XL, T2, XL
   754
   755	VSLDOI  $8, XL, XL, T1 // 2nd reduction phase
   756	VPMSUMD XL, XC2, XL
   757	VXOR    T1, XH, T1
   758	VXOR    XL, T1, XL
   759
   760	ADDCCC $4, LEN, LEN
   761	BEQ    done_4x
   762
   763	LXVD2X (INP)(R0), VIN0
   764	CMPU   LEN, $2
   765	MOVD   $-4, LEN
   766	BLT    one
   767	LXVD2X (INP)(R8), VIN1
   768	BEQ    two
   769
   770three:
   771	LXVD2X (INP)(R9), VIN2
   772#ifdef GOARCH_ppc64le
   773	VPERM  IN0, IN0, LEMASK, IN0
   774	VPERM  IN1, IN1, LEMASK, IN1
   775	VPERM  IN2, IN2, LEMASK, IN2
   776#endif
   777
   778	VXOR IN0, XL, XH
   779	VOR  H3L, H3L, H4L
   780	VOR  H3, H3, H4
   781	VOR  H3H, H3H, H4H
   782
   783	VPERM   IN1, IN2, LOPERM, T0
   784	VPERM   IN1, IN2, HIPERM, T1
   785	VPMSUMD IN1, H2, XM2         // H^2.lo·Xi+1.hi+H^2.hi·Xi+1.lo
   786	VPMSUMD IN2, H, XM3          // H.hi·Xi+2.lo  +H.lo·Xi+2.hi
   787	VPMSUMD T0, H21L, XL3        // H^2.lo·Xi+1.lo+H.lo·Xi+2.lo
   788	VPMSUMD T1, H21H, XH3        // H^2.hi·Xi+1.hi+H.hi·Xi+2.hi
   789
   790	VXOR XM3, XM2, XM3
   791	JMP  tail_4x
   792
   793two:
   794#ifdef GOARCH_ppc64le
   795	VPERM IN0, IN0, LEMASK, IN0
   796	VPERM IN1, IN1, LEMASK, IN1
   797#endif
   798
   799	VXOR  IN, XL, XH
   800	VPERM ZERO, IN1, LOPERM, T0
   801	VPERM ZERO, IN1, HIPERM, T1
   802
   803	VSLDOI $8, ZERO, H2, H4L
   804	VOR    H2, H2, H4
   805	VSLDOI $8, H2, ZERO, H4H
   806
   807	VPMSUMD T0, H21L, XL3 // H.lo·Xi+1.lo
   808	VPMSUMD IN1, H, XM3   // H.hi·Xi+1.lo+H.lo·Xi+2.hi
   809	VPMSUMD T1, H21H, XH3 // H.hi·Xi+1.hi
   810
   811	JMP tail_4x
   812
   813one:
   814#ifdef GOARCH_ppc64le
   815	VPERM IN0, IN0, LEMASK, IN0
   816#endif
   817
   818	VSLDOI $8, ZERO, H, H4L
   819	VOR    H, H, H4
   820	VSLDOI $8, H, ZERO, H4H
   821
   822	VXOR IN0, XL, XH
   823	VXOR XL3, XL3, XL3
   824	VXOR XM3, XM3, XM3
   825	VXOR XH3, XH3, XH3
   826
   827	JMP tail_4x
   828
   829done_4x:
   830#ifdef GOARCH_ppc64le
   831	VPERM   XL, XL, LEMASK, XL
   832#endif
   833	STXVD2X VXL, (XIP+R0)      // write out Xi
   834	RET
   835
   836// func gcmMul(output []byte, productTable *[256]byte)
   837TEXT ·gcmMul(SB), NOSPLIT, $0-32
   838	MOVD output+0(FP), XIP
   839	MOVD productTable+24(FP), HTBL
   840
   841	MOVD   $0x10, R8
   842	MOVD   $0x20, R9
   843	MOVD   $0x30, R10
   844	LXVD2X (XIP)(R0), VIN // load Xi
   845
   846	LXVD2X   (HTBL)(R8), VHL    // Load pre-computed table
   847	LXVD2X   (HTBL)(R9), VH
   848	LXVD2X   (HTBL)(R10), VHH
   849	LXVD2X   (HTBL)(R0), VXC2
   850#ifdef GOARCH_ppc64le
   851	VSPLTISB $0x07, T0
   852	VXOR     LEMASK, T0, LEMASK
   853	VPERM    IN, IN, LEMASK, IN
   854#endif
   855	VXOR     ZERO, ZERO, ZERO
   856
   857	VPMSUMD IN, HL, XL // H.lo·Xi.lo
   858	VPMSUMD IN, H, XM  // H.hi·Xi.lo+H.lo·Xi.hi
   859	VPMSUMD IN, HH, XH // H.hi·Xi.hi
   860
   861	VPMSUMD XL, XC2, T2 // 1st reduction phase
   862
   863	VSLDOI $8, XM, ZERO, T0
   864	VSLDOI $8, ZERO, XM, T1
   865	VXOR   XL, T0, XL
   866	VXOR   XH, T1, XH
   867
   868	VSLDOI $8, XL, XL, XL
   869	VXOR   XL, T2, XL
   870
   871	VSLDOI  $8, XL, XL, T1 // 2nd reduction phase
   872	VPMSUMD XL, XC2, XL
   873	VXOR    T1, XH, T1
   874	VXOR    XL, T1, XL
   875
   876#ifdef GOARCH_ppc64le
   877	VPERM   XL, XL, LEMASK, XL
   878#endif
   879	STXVD2X VXL, (XIP+R0)      // write out Xi
   880	RET
   881
   882#define BLK_INP    R3
   883#define BLK_OUT    R4
   884#define BLK_KEY    R5
   885#define KEY_LEN    R6
   886#define BLK_IDX    R7
   887#define IDX        R8
   888#define IN_LEN     R9
   889#define COUNTER    R10
   890#define CONPTR     R14
   891#define MASK       V5
   892
   893// Implementation of the counterCrypt function in assembler.
   894// Original loop is unrolled to allow for multiple encryption
   895// streams to be done in parallel, which is achieved by interleaving
   896// vcipher instructions from each stream. This is also referred to as
   897// stitching, and provides significant performance improvements.
   898// Some macros are defined which enable execution for big or little
   899// endian as well as different ISA targets.
   900//func (g *gcmAsm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte, key[gcmBlockSize]uint32)
   901//func counterCryptASM(xr, out, in, counter, key)
   902TEXT ·counterCryptASM(SB), NOSPLIT, $16-72
   903	MOVD	xr(FP), KEY_LEN
   904	MOVD    out+8(FP), BLK_OUT
   905	MOVD    out_len+16(FP), R8
   906	MOVD    in+32(FP), BLK_INP
   907	MOVD    in_len+40(FP), IN_LEN
   908	MOVD    counter+56(FP), COUNTER
   909	MOVD    key+64(FP), BLK_KEY
   910
   911// Set up permute string when needed.
   912#ifdef NEEDS_ESPERM
   913	MOVD    $·rcon(SB), R14
   914	LVX     (R14), ESPERM   // Permute value for P8_ macros.
   915#endif
   916	SETUP_COUNTER		// V30 Counter V31 BE {0, 0, 0, 1}
   917	LOAD_KEYS(BLK_KEY, KEY_LEN)	// VS1 - VS10/12/14 based on keysize
   918	CMP     IN_LEN, $128
   919	BLT	block64
   920block128_loop:
   921	// Do 8 encryptions in parallel by setting
   922	// input values in V15-V22 and executing
   923	// vcipher on the updated value and the keys.
   924	GEN_VCIPHER_8_INPUTS
   925	VCIPHER_8X1_KEY(VS1)
   926	VCIPHER_8X1_KEY(VS2)
   927	VCIPHER_8X1_KEY(VS3)
   928	VCIPHER_8X1_KEY(VS4)
   929	VCIPHER_8X1_KEY(VS5)
   930	VCIPHER_8X1_KEY(VS6)
   931	VCIPHER_8X1_KEY(VS7)
   932	VCIPHER_8X1_KEY(VS8)
   933	VCIPHER_8X1_KEY(VS9)
   934	// Additional encryptions are done based on
   935	// the key length, with the last key moved
   936	// to V23 for use with VCIPHERLAST.
   937	// CR2 = CMP key_len, $12
   938	XXLOR VS10, VS10, V23
   939	BLT	CR2, block128_last // key_len = 10
   940	VCIPHER_8X1_KEY(VS10)
   941	VCIPHER_8X1_KEY(VS11)
   942	XXLOR VS12,VS12,V23
   943	BEQ	CR2, block128_last // ken_len = 12
   944	VCIPHER_8X1_KEY(VS12)
   945	VCIPHER_8X1_KEY(VS13)
   946	XXLOR VS14,VS14,V23	// key_len = 14
   947block128_last:
   948	// vcipher encryptions are in V15-V22 at this
   949	// point with vcipherlast remaining to be done.
   950	// Load input block into V1-V8, setting index offsets
   951	// in R16-R22 to use with the STORE.
   952	LOAD_INPUT_BLOCK128(BLK_INP)
   953	// Do VCIPHERLAST on the last key for each encryption
   954	// stream and XOR the result with the corresponding
   955	// value from the input block.
   956	VCIPHERLAST8_XOR_INPUT
   957	// Store the results (8*16) and update BLK_OUT by 128.
   958	STORE_OUTPUT_BLOCK128(BLK_OUT)
   959	ADD	$-128, IN_LEN	// input size
   960	CMP     IN_LEN, $128	// check if >= blocksize
   961	BGE	block128_loop	// next input block
   962	CMP	IN_LEN, $0
   963	BEQ	done
   964block64:
   965	CMP	IN_LEN, $64	// Check if >= 64
   966	BLT	block16_loop
   967	// Do 4 encryptions in parallel by setting
   968	// input values in V15-V18 and executing
   969	// vcipher on the updated value and the keys.
   970	GEN_VCIPHER_4_INPUTS
   971	VCIPHER_4X1_KEY(VS1)
   972	VCIPHER_4X1_KEY(VS2)
   973	VCIPHER_4X1_KEY(VS3)
   974	VCIPHER_4X1_KEY(VS4)
   975	VCIPHER_4X1_KEY(VS5)
   976	VCIPHER_4X1_KEY(VS6)
   977	VCIPHER_4X1_KEY(VS7)
   978	VCIPHER_4X1_KEY(VS8)
   979	VCIPHER_4X1_KEY(VS9)
   980	// Check key length based on CR2
   981	// Move last key to V23 for use with later vcipherlast
   982	XXLOR	VS10, VS10, V23
   983	BLT	CR2, block64_last	// size = 10
   984	VCIPHER_4X1_KEY(VS10)		// Encrypt next 2 keys
   985	VCIPHER_4X1_KEY(VS11)
   986	XXLOR	VS12, VS12, V23
   987	BEQ	CR2, block64_last	// size = 12
   988	VCIPHER_4X1_KEY(VS12)		// Encrypt last 2 keys
   989	VCIPHER_4X1_KEY(VS13)
   990	XXLOR	VS14, VS14, V23		// size = 14
   991block64_last:
   992	LOAD_INPUT_BLOCK64(BLK_INP)	// Load 64 bytes of input
   993	// Do VCIPHERLAST on the last for each encryption
   994	// stream and XOR the result with the corresponding
   995	// value from the input block.
   996	VCIPHERLAST4_XOR_INPUT
   997	// Store the results (4*16) and update BLK_OUT by 64.
   998	STORE_OUTPUT_BLOCK64(BLK_OUT)
   999	ADD	$-64, IN_LEN		// decrement input block length
  1000	CMP	IN_LEN, $0		// check for remaining length
  1001	BEQ	done
  1002block16_loop:
  1003	CMP	IN_LEN, $16		// More input
  1004	BLT	final_block		// If not, then handle partial block
  1005	// Single encryption, no stitching
  1006	GEN_VCIPHER_INPUT		// Generate input value for single encryption
  1007	VCIPHER_1X9_KEYS(V15)		// Encrypt V15 value with 9 keys
  1008	XXLOR	VS10, VS10, V23		// Last key -> V23 for later vcipiherlast
  1009	// Key length based on CR2. (LT=10, EQ=12, GT=14)
  1010	BLT	CR2, block16_last	// Finish for key size 10
  1011	VCIPHER_1X2_KEYS(V15, VS10, VS11) // Encrypt V15 with 2 more keys
  1012	XXLOR	VS12, VS12, V23		// Last key -> V23 for later vcipherlast
  1013	BEQ	CR2, block16_last	// Finish for key size 12
  1014	VCIPHER_1X2_KEYS(V15, VS12, VS13) // Encrypt V15 with last 2 keys
  1015	XXLOR	VS14, VS14, V23		// Last key -> V23 for vcipherlast with key size 14
  1016block16_last:
  1017	P8_LXVB16X(BLK_INP, R0, V1)	// Load input
  1018	VCIPHERLAST V15, V23, V15	// Encrypt last value in V23
  1019	XXLXOR	V15, V1, V1		// XOR with input
  1020	P8_STXVB16X(V1,R0,BLK_OUT)	// Store final encryption value to output
  1021	ADD	$16, BLK_INP		// Increment input pointer
  1022	ADD	$16, BLK_OUT		// Increment output pointer
  1023	ADD	$-16, IN_LEN		// Decrement input length
  1024	BR	block16_loop		// Check for next
  1025final_block:
  1026	CMP	IN_LEN, $0
  1027	BEQ	done
  1028	GEN_VCIPHER_INPUT		// Generate input value for partial encryption
  1029	VCIPHER_1X9_KEYS(V15)		// Encrypt V15 with 9 keys
  1030	XXLOR	VS10, VS10, V23		// Save possible last key
  1031	BLT	CR2, final_block_last
  1032	VCIPHER_1X2_KEYS(V15, VS10, VS11)	// Encrypt V15 with next 2 keys
  1033	XXLOR	VS12, VS12, V23		// Save possible last key
  1034	BEQ	CR2, final_block_last
  1035	VCIPHER_1X2_KEYS(V15, VS12, VS13) // Encrypt V15 with last 2 keys
  1036	XXLOR	VS14, VS14, V23		// Save last key
  1037final_block_last:
  1038	VCIPHERLAST V15, V23, V15	// Finish encryption
  1039#ifdef GOPPC64_power10
  1040	// set up length
  1041	SLD	$56, IN_LEN, R17
  1042	LXVLL	BLK_INP, R17, V25
  1043	VXOR	V25, V15, V25
  1044	STXVLL	V25, BLK_OUT, R17
  1045#else
  1046	ADD	$32, R1, MASK_PTR
  1047	MOVD	$0, R16
  1048	P8_STXVB16X(V15, MASK_PTR, R0)
  1049	CMP	IN_LEN, $8
  1050	BLT	next4
  1051	MOVD	0(MASK_PTR), R14
  1052	MOVD	0(BLK_INP), R15
  1053	XOR	R14, R15, R14
  1054	MOVD	R14, 0(BLK_OUT)
  1055	ADD	$8, R16
  1056	ADD	$-8, IN_LEN
  1057next4:
  1058	CMP	IN_LEN, $4
  1059	BLT	next2
  1060	MOVWZ	(BLK_INP)(R16), R15
  1061	MOVWZ	(MASK_PTR)(R16), R14
  1062	XOR	R14, R15, R14
  1063	MOVW	R14, (R16)(BLK_OUT)
  1064	ADD	$4, R16
  1065	ADD	$-4, IN_LEN
  1066next2:
  1067	CMP	IN_LEN, $2
  1068	BLT	next1
  1069	MOVHZ	(BLK_INP)(R16), R15
  1070	MOVHZ	(MASK_PTR)(R16), R14
  1071	XOR	R14, R15, R14
  1072	MOVH	R14, (R16)(BLK_OUT)
  1073	ADD	$2, R16
  1074	ADD	$-2, IN_LEN
  1075next1:
  1076	CMP	IN_LEN, $1
  1077	BLT	done
  1078	MOVBZ	(MASK_PTR)(R16), R14
  1079	MOVBZ	(BLK_INP)(R16), R15
  1080	XOR	R14, R15, R14
  1081	MOVB	R14, (R16)(BLK_OUT)
  1082#endif
  1083done:
  1084	// Save the updated counter value
  1085	P8_STXVB16X(V30, COUNTER, R0)
  1086	// Clear the keys
  1087	XXLXOR	VS0, VS0, VS0
  1088	XXLXOR	VS1, VS1, VS1
  1089	XXLXOR	VS2, VS2, VS2
  1090	XXLXOR	VS3, VS3, VS3
  1091	XXLXOR	VS4, VS4, VS4
  1092	XXLXOR	VS5, VS5, VS5
  1093	XXLXOR	VS6, VS6, VS6
  1094	XXLXOR	VS7, VS7, VS7
  1095	XXLXOR	VS8, VS8, VS8
  1096	XXLXOR	VS9, VS9, VS9
  1097	XXLXOR	VS10, VS10, VS10
  1098	XXLXOR	VS11, VS11, VS11
  1099	XXLXOR	VS12, VS12, VS12
  1100	XXLXOR	VS13, VS13, VS13
  1101	XXLXOR	VS14, VS14, VS14
  1102	RET
  1103

View as plain text