...

Text file src/crypto/md5/md5block_ppc64x.s

Documentation: crypto/md5

     1// Original source:
     2//	http://www.zorinaq.com/papers/md5-amd64.html
     3//	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
     4//
     5// MD5 optimized for ppc64le using Go's assembler for
     6// ppc64le, based on md5block_amd64.s implementation by
     7// the Go authors.
     8//
     9// Author: Marc Bevand <bevand_m (at) epita.fr>
    10// Licence: I hereby disclaim the copyright on this code and place it
    11// in the public domain.
    12
    13//go:build ppc64 || ppc64le
    14
    15#include "textflag.h"
    16
    17// ENDIAN_MOVE generates the appropriate
    18// 4 byte load for big or little endian.
    19// The 4 bytes at ptr+off is loaded into dst.
    20// The idx reg is only needed for big endian
    21// and is clobbered when used.
    22#ifdef GOARCH_ppc64le
    23#define ENDIAN_MOVE(off, ptr, dst, idx) \
    24	MOVWZ	off(ptr),dst
    25#else
    26#define ENDIAN_MOVE(off, ptr, dst, idx) \
    27	MOVD	$off,idx; \
    28	MOVWBR	(idx)(ptr), dst
    29#endif
    30
    31#define M00 R18
    32#define M01 R19
    33#define M02 R20
    34#define M03 R24
    35#define M04 R25
    36#define M05 R26
    37#define M06 R27
    38#define M07 R28
    39#define M08 R29
    40#define M09 R21
    41#define M10 R11
    42#define M11 R8
    43#define M12 R7
    44#define M13 R12
    45#define M14 R23
    46#define M15 R10
    47
    48#define ROUND1(a, b, c, d, index, const, shift) \
    49	ADD	$const, index, R9; \
    50	ADD	R9, a; \
    51	AND     b, c, R9; \
    52	ANDN    b, d, R31; \
    53	OR	R9, R31, R9; \
    54	ADD	R9, a; \
    55	ROTLW	$shift, a; \
    56	ADD	b, a;
    57
    58#define ROUND2(a, b, c, d, index, const, shift) \
    59	ADD	$const, index, R9; \
    60	ADD	R9, a; \
    61	AND	b, d, R31; \
    62	ANDN	d, c, R9; \
    63	OR	R9, R31; \
    64	ADD	R31, a; \
    65	ROTLW	$shift, a; \
    66	ADD	b, a;
    67
    68#define ROUND3(a, b, c, d, index, const, shift) \
    69	ADD	$const, index, R9; \
    70	ADD	R9, a; \
    71	XOR	d, c, R31; \
    72	XOR	b, R31; \
    73	ADD	R31, a; \
    74	ROTLW	$shift, a; \
    75	ADD	b, a;
    76
    77#define ROUND4(a, b, c, d, index, const, shift) \
    78	ADD	$const, index, R9; \
    79	ADD	R9, a; \
    80	ORN     d, b, R31; \
    81	XOR	c, R31; \
    82	ADD	R31, a; \
    83	ROTLW	$shift, a; \
    84	ADD	b, a;
    85
    86
    87TEXT ·block(SB),NOSPLIT,$0-32
    88	MOVD	dig+0(FP), R10
    89	MOVD	p+8(FP), R6
    90	MOVD	p_len+16(FP), R5
    91
    92	// We assume p_len >= 64
    93	SRD 	$6, R5
    94	MOVD	R5, CTR
    95
    96	MOVWZ	0(R10), R22
    97	MOVWZ	4(R10), R3
    98	MOVWZ	8(R10), R4
    99	MOVWZ	12(R10), R5
   100
   101loop:
   102	MOVD	R22, R14
   103	MOVD	R3, R15
   104	MOVD	R4, R16
   105	MOVD	R5, R17
   106
   107	ENDIAN_MOVE( 0,R6,M00,M15)
   108	ENDIAN_MOVE( 4,R6,M01,M15)
   109	ENDIAN_MOVE( 8,R6,M02,M15)
   110	ENDIAN_MOVE(12,R6,M03,M15)
   111
   112	ROUND1(R22,R3,R4,R5,M00,0xd76aa478, 7);
   113	ROUND1(R5,R22,R3,R4,M01,0xe8c7b756,12);
   114	ROUND1(R4,R5,R22,R3,M02,0x242070db,17);
   115	ROUND1(R3,R4,R5,R22,M03,0xc1bdceee,22);
   116
   117	ENDIAN_MOVE(16,R6,M04,M15)
   118	ENDIAN_MOVE(20,R6,M05,M15)
   119	ENDIAN_MOVE(24,R6,M06,M15)
   120	ENDIAN_MOVE(28,R6,M07,M15)
   121
   122	ROUND1(R22,R3,R4,R5,M04,0xf57c0faf, 7);
   123	ROUND1(R5,R22,R3,R4,M05,0x4787c62a,12);
   124	ROUND1(R4,R5,R22,R3,M06,0xa8304613,17);
   125	ROUND1(R3,R4,R5,R22,M07,0xfd469501,22);
   126
   127	ENDIAN_MOVE(32,R6,M08,M15)
   128	ENDIAN_MOVE(36,R6,M09,M15)
   129	ENDIAN_MOVE(40,R6,M10,M15)
   130	ENDIAN_MOVE(44,R6,M11,M15)
   131
   132	ROUND1(R22,R3,R4,R5,M08,0x698098d8, 7);
   133	ROUND1(R5,R22,R3,R4,M09,0x8b44f7af,12);
   134	ROUND1(R4,R5,R22,R3,M10,0xffff5bb1,17);
   135	ROUND1(R3,R4,R5,R22,M11,0x895cd7be,22);
   136
   137	ENDIAN_MOVE(48,R6,M12,M15)
   138	ENDIAN_MOVE(52,R6,M13,M15)
   139	ENDIAN_MOVE(56,R6,M14,M15)
   140	ENDIAN_MOVE(60,R6,M15,M15)
   141
   142	ROUND1(R22,R3,R4,R5,M12,0x6b901122, 7);
   143	ROUND1(R5,R22,R3,R4,M13,0xfd987193,12);
   144	ROUND1(R4,R5,R22,R3,M14,0xa679438e,17);
   145	ROUND1(R3,R4,R5,R22,M15,0x49b40821,22);
   146
   147	ROUND2(R22,R3,R4,R5,M01,0xf61e2562, 5);
   148	ROUND2(R5,R22,R3,R4,M06,0xc040b340, 9);
   149	ROUND2(R4,R5,R22,R3,M11,0x265e5a51,14);
   150	ROUND2(R3,R4,R5,R22,M00,0xe9b6c7aa,20);
   151	ROUND2(R22,R3,R4,R5,M05,0xd62f105d, 5);
   152	ROUND2(R5,R22,R3,R4,M10, 0x2441453, 9);
   153	ROUND2(R4,R5,R22,R3,M15,0xd8a1e681,14);
   154	ROUND2(R3,R4,R5,R22,M04,0xe7d3fbc8,20);
   155	ROUND2(R22,R3,R4,R5,M09,0x21e1cde6, 5);
   156	ROUND2(R5,R22,R3,R4,M14,0xc33707d6, 9);
   157	ROUND2(R4,R5,R22,R3,M03,0xf4d50d87,14);
   158	ROUND2(R3,R4,R5,R22,M08,0x455a14ed,20);
   159	ROUND2(R22,R3,R4,R5,M13,0xa9e3e905, 5);
   160	ROUND2(R5,R22,R3,R4,M02,0xfcefa3f8, 9);
   161	ROUND2(R4,R5,R22,R3,M07,0x676f02d9,14);
   162	ROUND2(R3,R4,R5,R22,M12,0x8d2a4c8a,20);
   163
   164	ROUND3(R22,R3,R4,R5,M05,0xfffa3942, 4);
   165	ROUND3(R5,R22,R3,R4,M08,0x8771f681,11);
   166	ROUND3(R4,R5,R22,R3,M11,0x6d9d6122,16);
   167	ROUND3(R3,R4,R5,R22,M14,0xfde5380c,23);
   168	ROUND3(R22,R3,R4,R5,M01,0xa4beea44, 4);
   169	ROUND3(R5,R22,R3,R4,M04,0x4bdecfa9,11);
   170	ROUND3(R4,R5,R22,R3,M07,0xf6bb4b60,16);
   171	ROUND3(R3,R4,R5,R22,M10,0xbebfbc70,23);
   172	ROUND3(R22,R3,R4,R5,M13,0x289b7ec6, 4);
   173	ROUND3(R5,R22,R3,R4,M00,0xeaa127fa,11);
   174	ROUND3(R4,R5,R22,R3,M03,0xd4ef3085,16);
   175	ROUND3(R3,R4,R5,R22,M06, 0x4881d05,23);
   176	ROUND3(R22,R3,R4,R5,M09,0xd9d4d039, 4);
   177	ROUND3(R5,R22,R3,R4,M12,0xe6db99e5,11);
   178	ROUND3(R4,R5,R22,R3,M15,0x1fa27cf8,16);
   179	ROUND3(R3,R4,R5,R22,M02,0xc4ac5665,23);
   180
   181	ROUND4(R22,R3,R4,R5,M00,0xf4292244, 6);
   182	ROUND4(R5,R22,R3,R4,M07,0x432aff97,10);
   183	ROUND4(R4,R5,R22,R3,M14,0xab9423a7,15);
   184	ROUND4(R3,R4,R5,R22,M05,0xfc93a039,21);
   185	ROUND4(R22,R3,R4,R5,M12,0x655b59c3, 6);
   186	ROUND4(R5,R22,R3,R4,M03,0x8f0ccc92,10);
   187	ROUND4(R4,R5,R22,R3,M10,0xffeff47d,15);
   188	ROUND4(R3,R4,R5,R22,M01,0x85845dd1,21);
   189	ROUND4(R22,R3,R4,R5,M08,0x6fa87e4f, 6);
   190	ROUND4(R5,R22,R3,R4,M15,0xfe2ce6e0,10);
   191	ROUND4(R4,R5,R22,R3,M06,0xa3014314,15);
   192	ROUND4(R3,R4,R5,R22,M13,0x4e0811a1,21);
   193	ROUND4(R22,R3,R4,R5,M04,0xf7537e82, 6);
   194	ROUND4(R5,R22,R3,R4,M11,0xbd3af235,10);
   195	ROUND4(R4,R5,R22,R3,M02,0x2ad7d2bb,15);
   196	ROUND4(R3,R4,R5,R22,M09,0xeb86d391,21);
   197
   198	ADD	R14, R22
   199	ADD	R15, R3
   200	ADD	R16, R4
   201	ADD	R17, R5
   202	ADD	$64, R6
   203	BC	16, 0, loop // bdnz
   204
   205end:
   206	MOVD	dig+0(FP), R10
   207	MOVWZ	R22, 0(R10)
   208	MOVWZ	R3, 4(R10)
   209	MOVWZ	R4, 8(R10)
   210	MOVWZ	R5, 12(R10)
   211
   212	RET

View as plain text