@@ -114,57 +114,47 @@ static __inline__ void blake2s_increment_counter(const uint32_t inc) {
114
114
b = rotr32(b ^ c, 7); \
115
115
} while (0)
116
116
117
- #define ROUND (r ) \
118
- do { \
119
- G(r, 0, v[0], v[4], v[8], v[12]); \
120
- G(r, 1, v[1], v[5], v[9], v[13]); \
121
- G(r, 2, v[2], v[6], v[10], v[14]); \
122
- G(r, 3, v[3], v[7], v[11], v[15]); \
123
- G(r, 4, v[0], v[5], v[10], v[15]); \
124
- G(r, 5, v[1], v[6], v[11], v[12]); \
125
- G(r, 6, v[2], v[7], v[8], v[13]); \
126
- G(r, 7, v[3], v[4], v[9], v[14]); \
127
- } while (0)
117
+ static void round (uint32_t r , uint32_t m [16 ], uint32_t v [16 ]) {
118
+ G (r , 0 , v [0 ], v [4 ], v [8 ], v [12 ]);
119
+ G (r , 1 , v [1 ], v [5 ], v [9 ], v [13 ]);
120
+ G (r , 2 , v [2 ], v [6 ], v [10 ], v [14 ]);
121
+ G (r , 3 , v [3 ], v [7 ], v [11 ], v [15 ]);
122
+ G (r , 4 , v [0 ], v [5 ], v [10 ], v [15 ]);
123
+ G (r , 5 , v [1 ], v [6 ], v [11 ], v [12 ]);
124
+ G (r , 6 , v [2 ], v [7 ], v [8 ], v [13 ]);
125
+ G (r , 7 , v [3 ], v [4 ], v [9 ], v [14 ]);
126
+ }
128
127
129
128
static void blake2s_compress (const uint8_t block [BLAKE2S_BLOCKBYTES ]) {
130
129
uint32_t m [16 ];
131
130
uint32_t v [16 ];
132
131
133
- for (int i = 0 ; i < 16 ; ++ i ) {
134
- m [i ] = load32 (block + i * sizeof (m [i ]));
135
- }
132
+ memcpy64 (m , block );
136
133
137
- for (int i = 0 ; i < 8 ; ++ i ) {
138
- v [i ] = S -> h [i ];
139
- }
134
+ memcpy32 (v , S -> h );
140
135
141
- v [8 ] = blake2s_IV [0 ];
142
- v [9 ] = blake2s_IV [1 ];
143
- v [10 ] = blake2s_IV [2 ];
144
- v [11 ] = blake2s_IV [3 ];
145
- v [12 ] = blake2s_IV [4 ] ^ S -> t [0 ];
146
- v [13 ] = blake2s_IV [5 ] ^ S -> t [1 ];
147
- v [14 ] = blake2s_IV [6 ] ^ S -> f [0 ];
148
- v [15 ] = blake2s_IV [7 ] ^ S -> f [1 ];
149
-
150
- ROUND (0 );
151
- ROUND (1 );
152
- ROUND (2 );
153
- ROUND (3 );
154
- ROUND (4 );
155
- ROUND (5 );
156
- ROUND (6 );
157
- ROUND (7 );
158
- ROUND (8 );
159
- ROUND (9 );
136
+ uint64_t * v64 = (uint64_t * )v ;
137
+ uint64_t * blake2s_IV64 = (uint64_t * )blake2s_IV ;
138
+ uint64_t * st64 = (uint64_t * )S -> t ;
139
+ uint64_t * sf64 = (uint64_t * )S -> f ;
140
+ v64 [4 ] = blake2s_IV64 [0 ];
141
+ v64 [5 ] = blake2s_IV64 [1 ];
142
+ v64 [6 ] = blake2s_IV64 [2 ] ^ st64 [0 ];
143
+ v64 [7 ] = blake2s_IV64 [3 ] ^ sf64 [0 ];
160
144
161
- for (int i = 0 ; i < 8 ; ++ i ) {
162
- S -> h [i ] = S -> h [i ] ^ v [i ] ^ v [i + 8 ];
145
+ #pragma clang loop unroll(full)
146
+ for (int i = 0 ; i < 10 ; ++ i ) {
147
+ round (i , m , v );
148
+ }
149
+
150
+ uint64_t * sh64 = (uint64_t * )S -> h ;
151
+ #pragma clang loop unroll(full)
152
+ for (int i = 0 ; i < 4 ; ++ i ) {
153
+ sh64 [i ] = sh64 [i ] ^ v64 [i ] ^ v64 [i + 4 ];
163
154
}
164
155
}
165
156
166
157
#undef G
167
- #undef ROUND
168
158
169
159
void blake2s_update (const void * pin , int inlen ) {
170
160
const unsigned char * in = (const unsigned char * )pin ;
@@ -174,9 +164,7 @@ void blake2s_update(const void *pin, int inlen) {
174
164
if (inlen > fill ) {
175
165
S -> buflen = 0 ;
176
166
/* Fill buffer */
177
- for (uint8_t i = 0 ; i < fill ; i ++ ) {
178
- S -> buf [left + i ] = in [i ];
179
- }
167
+ memcpy (& S -> buf [left ], in , fill );
180
168
blake2s_increment_counter (BLAKE2S_BLOCKBYTES );
181
169
blake2s_compress (S -> buf ); /* Compress */
182
170
in += fill ;
@@ -188,9 +176,7 @@ void blake2s_update(const void *pin, int inlen) {
188
176
inlen -= BLAKE2S_BLOCKBYTES ;
189
177
}
190
178
}
191
- for (uint8_t i = 0 ; i < inlen ; i ++ ) {
192
- S -> buf [S -> buflen + i ] = in [i ];
193
- }
179
+ memcpy (& S -> buf [S -> buflen ], in , inlen );
194
180
S -> buflen += inlen ;
195
181
}
196
182
}
0 commit comments