1
2
3
4
5 package argon2
6
7 var useSSE4 bool
8
9 func processBlockGeneric(out, in1, in2 *block, xor bool) {
10 var t block
11 for i := range t {
12 t[i] = in1[i] ^ in2[i]
13 }
14 for i := 0; i < blockLength; i += 16 {
15 blamkaGeneric(
16 &t[i+0], &t[i+1], &t[i+2], &t[i+3],
17 &t[i+4], &t[i+5], &t[i+6], &t[i+7],
18 &t[i+8], &t[i+9], &t[i+10], &t[i+11],
19 &t[i+12], &t[i+13], &t[i+14], &t[i+15],
20 )
21 }
22 for i := 0; i < blockLength/8; i += 2 {
23 blamkaGeneric(
24 &t[i], &t[i+1], &t[16+i], &t[16+i+1],
25 &t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1],
26 &t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1],
27 &t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1],
28 )
29 }
30 if xor {
31 for i := range t {
32 out[i] ^= in1[i] ^ in2[i] ^ t[i]
33 }
34 } else {
35 for i := range t {
36 out[i] = in1[i] ^ in2[i] ^ t[i]
37 }
38 }
39 }
40
41 func blamkaGeneric(t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15 *uint64) {
42 v00, v01, v02, v03 := *t00, *t01, *t02, *t03
43 v04, v05, v06, v07 := *t04, *t05, *t06, *t07
44 v08, v09, v10, v11 := *t08, *t09, *t10, *t11
45 v12, v13, v14, v15 := *t12, *t13, *t14, *t15
46
47 v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04))
48 v12 ^= v00
49 v12 = v12>>32 | v12<<32
50 v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12))
51 v04 ^= v08
52 v04 = v04>>24 | v04<<40
53
54 v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04))
55 v12 ^= v00
56 v12 = v12>>16 | v12<<48
57 v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12))
58 v04 ^= v08
59 v04 = v04>>63 | v04<<1
60
61 v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05))
62 v13 ^= v01
63 v13 = v13>>32 | v13<<32
64 v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13))
65 v05 ^= v09
66 v05 = v05>>24 | v05<<40
67
68 v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05))
69 v13 ^= v01
70 v13 = v13>>16 | v13<<48
71 v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13))
72 v05 ^= v09
73 v05 = v05>>63 | v05<<1
74
75 v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06))
76 v14 ^= v02
77 v14 = v14>>32 | v14<<32
78 v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14))
79 v06 ^= v10
80 v06 = v06>>24 | v06<<40
81
82 v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06))
83 v14 ^= v02
84 v14 = v14>>16 | v14<<48
85 v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14))
86 v06 ^= v10
87 v06 = v06>>63 | v06<<1
88
89 v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07))
90 v15 ^= v03
91 v15 = v15>>32 | v15<<32
92 v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15))
93 v07 ^= v11
94 v07 = v07>>24 | v07<<40
95
96 v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07))
97 v15 ^= v03
98 v15 = v15>>16 | v15<<48
99 v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15))
100 v07 ^= v11
101 v07 = v07>>63 | v07<<1
102
103 v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05))
104 v15 ^= v00
105 v15 = v15>>32 | v15<<32
106 v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15))
107 v05 ^= v10
108 v05 = v05>>24 | v05<<40
109
110 v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05))
111 v15 ^= v00
112 v15 = v15>>16 | v15<<48
113 v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15))
114 v05 ^= v10
115 v05 = v05>>63 | v05<<1
116
117 v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06))
118 v12 ^= v01
119 v12 = v12>>32 | v12<<32
120 v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12))
121 v06 ^= v11
122 v06 = v06>>24 | v06<<40
123
124 v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06))
125 v12 ^= v01
126 v12 = v12>>16 | v12<<48
127 v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12))
128 v06 ^= v11
129 v06 = v06>>63 | v06<<1
130
131 v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07))
132 v13 ^= v02
133 v13 = v13>>32 | v13<<32
134 v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13))
135 v07 ^= v08
136 v07 = v07>>24 | v07<<40
137
138 v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07))
139 v13 ^= v02
140 v13 = v13>>16 | v13<<48
141 v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13))
142 v07 ^= v08
143 v07 = v07>>63 | v07<<1
144
145 v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04))
146 v14 ^= v03
147 v14 = v14>>32 | v14<<32
148 v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14))
149 v04 ^= v09
150 v04 = v04>>24 | v04<<40
151
152 v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04))
153 v14 ^= v03
154 v14 = v14>>16 | v14<<48
155 v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14))
156 v04 ^= v09
157 v04 = v04>>63 | v04<<1
158
159 *t00, *t01, *t02, *t03 = v00, v01, v02, v03
160 *t04, *t05, *t06, *t07 = v04, v05, v06, v07
161 *t08, *t09, *t10, *t11 = v08, v09, v10, v11
162 *t12, *t13, *t14, *t15 = v12, v13, v14, v15
163 }
164
View as plain text