80d7176dc707d91548ee24fb9f5fcc7449acc838
2 * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 static const uint64_t RC
[] = {
31 0x0000000000000001, 0x0000000000008082,
32 0x800000000000808A, 0x8000000080008000,
33 0x000000000000808B, 0x0000000080000001,
34 0x8000000080008081, 0x8000000000008009,
35 0x000000000000008A, 0x0000000000000088,
36 0x0000000080008009, 0x000000008000000A,
37 0x000000008000808B, 0x800000000000008B,
38 0x8000000000008089, 0x8000000000008003,
39 0x8000000000008002, 0x8000000000000080,
40 0x000000000000800A, 0x800000008000000A,
41 0x8000000080008081, 0x8000000000008080,
42 0x0000000080000001, 0x8000000080008008
46 * XOR a block of data into the provided state. This supports only
47 * blocks whose length is a multiple of 64 bits.
50 xor_block(uint64_t *A
, const void *data
, size_t rate
)
54 for (u
= 0; u
< rate
; u
+= 8) {
55 A
[u
>> 3] ^= br_dec64le((const unsigned char *)data
+ u
);
60 * Process a block with the provided data. The data length must be a
61 * multiple of 8 (in bytes); normally, this is the "rate".
64 process_block(uint64_t *A
)
66 uint64_t t0
, t1
, t2
, t3
, t4
;
67 uint64_t tt0
, tt1
, tt2
, tt3
;
69 uint64_t c0
, c1
, c2
, c3
, c4
, bnn
;
73 * Compute the 24 rounds. This loop is partially unrolled (each
74 * iteration computes two rounds).
76 for (j
= 0; j
< 24; j
+= 2) {
81 tt0
= (tt0
<< 1) | (tt0
>> 63);
91 tt0
= (tt0
<< 1) | (tt0
>> 63);
101 tt0
= (tt0
<< 1) | (tt0
>> 63);
111 tt0
= (tt0
<< 1) | (tt0
>> 63);
121 tt0
= (tt0
<< 1) | (tt0
>> 63);
153 A
[ 5] = (A
[ 5] << 36) | (A
[ 5] >> (64 - 36));
154 A
[10] = (A
[10] << 3) | (A
[10] >> (64 - 3));
155 A
[15] = (A
[15] << 41) | (A
[15] >> (64 - 41));
156 A
[20] = (A
[20] << 18) | (A
[20] >> (64 - 18));
157 A
[ 1] = (A
[ 1] << 1) | (A
[ 1] >> (64 - 1));
158 A
[ 6] = (A
[ 6] << 44) | (A
[ 6] >> (64 - 44));
159 A
[11] = (A
[11] << 10) | (A
[11] >> (64 - 10));
160 A
[16] = (A
[16] << 45) | (A
[16] >> (64 - 45));
161 A
[21] = (A
[21] << 2) | (A
[21] >> (64 - 2));
162 A
[ 2] = (A
[ 2] << 62) | (A
[ 2] >> (64 - 62));
163 A
[ 7] = (A
[ 7] << 6) | (A
[ 7] >> (64 - 6));
164 A
[12] = (A
[12] << 43) | (A
[12] >> (64 - 43));
165 A
[17] = (A
[17] << 15) | (A
[17] >> (64 - 15));
166 A
[22] = (A
[22] << 61) | (A
[22] >> (64 - 61));
167 A
[ 3] = (A
[ 3] << 28) | (A
[ 3] >> (64 - 28));
168 A
[ 8] = (A
[ 8] << 55) | (A
[ 8] >> (64 - 55));
169 A
[13] = (A
[13] << 25) | (A
[13] >> (64 - 25));
170 A
[18] = (A
[18] << 21) | (A
[18] >> (64 - 21));
171 A
[23] = (A
[23] << 56) | (A
[23] >> (64 - 56));
172 A
[ 4] = (A
[ 4] << 27) | (A
[ 4] >> (64 - 27));
173 A
[ 9] = (A
[ 9] << 20) | (A
[ 9] >> (64 - 20));
174 A
[14] = (A
[14] << 39) | (A
[14] >> (64 - 39));
175 A
[19] = (A
[19] << 8) | (A
[19] >> (64 - 8));
176 A
[24] = (A
[24] << 14) | (A
[24] >> (64 - 14));
257 A
[ 0] = A
[ 0] ^ RC
[j
+ 0];
262 tt0
= (tt0
<< 1) | (tt0
>> 63);
272 tt0
= (tt0
<< 1) | (tt0
>> 63);
282 tt0
= (tt0
<< 1) | (tt0
>> 63);
292 tt0
= (tt0
<< 1) | (tt0
>> 63);
302 tt0
= (tt0
<< 1) | (tt0
>> 63);
334 A
[ 3] = (A
[ 3] << 36) | (A
[ 3] >> (64 - 36));
335 A
[ 1] = (A
[ 1] << 3) | (A
[ 1] >> (64 - 3));
336 A
[ 4] = (A
[ 4] << 41) | (A
[ 4] >> (64 - 41));
337 A
[ 2] = (A
[ 2] << 18) | (A
[ 2] >> (64 - 18));
338 A
[ 6] = (A
[ 6] << 1) | (A
[ 6] >> (64 - 1));
339 A
[ 9] = (A
[ 9] << 44) | (A
[ 9] >> (64 - 44));
340 A
[ 7] = (A
[ 7] << 10) | (A
[ 7] >> (64 - 10));
341 A
[ 5] = (A
[ 5] << 45) | (A
[ 5] >> (64 - 45));
342 A
[ 8] = (A
[ 8] << 2) | (A
[ 8] >> (64 - 2));
343 A
[12] = (A
[12] << 62) | (A
[12] >> (64 - 62));
344 A
[10] = (A
[10] << 6) | (A
[10] >> (64 - 6));
345 A
[13] = (A
[13] << 43) | (A
[13] >> (64 - 43));
346 A
[11] = (A
[11] << 15) | (A
[11] >> (64 - 15));
347 A
[14] = (A
[14] << 61) | (A
[14] >> (64 - 61));
348 A
[18] = (A
[18] << 28) | (A
[18] >> (64 - 28));
349 A
[16] = (A
[16] << 55) | (A
[16] >> (64 - 55));
350 A
[19] = (A
[19] << 25) | (A
[19] >> (64 - 25));
351 A
[17] = (A
[17] << 21) | (A
[17] >> (64 - 21));
352 A
[15] = (A
[15] << 56) | (A
[15] >> (64 - 56));
353 A
[24] = (A
[24] << 27) | (A
[24] >> (64 - 27));
354 A
[22] = (A
[22] << 20) | (A
[22] >> (64 - 20));
355 A
[20] = (A
[20] << 39) | (A
[20] >> (64 - 39));
356 A
[23] = (A
[23] << 8) | (A
[23] >> (64 - 8));
357 A
[21] = (A
[21] << 14) | (A
[21] >> (64 - 14));
438 A
[ 0] = A
[ 0] ^ RC
[j
+ 1];
468 /* see bearssl_kdf.h */
470 br_shake_init(br_shake_context
*sc
, int security_level
)
472 sc
->rate
= 200 - (size_t)(security_level
>> 2);
474 memset(sc
->A
, 0, sizeof sc
->A
);
475 sc
->A
[ 1] = ~(uint64_t)0;
476 sc
->A
[ 2] = ~(uint64_t)0;
477 sc
->A
[ 8] = ~(uint64_t)0;
478 sc
->A
[12] = ~(uint64_t)0;
479 sc
->A
[17] = ~(uint64_t)0;
480 sc
->A
[20] = ~(uint64_t)0;
483 /* see bearssl_kdf.h */
485 br_shake_inject(br_shake_context
*sc
, const void *data
, size_t len
)
487 const unsigned char *buf
;
500 memcpy(sc
->dbuf
+ dptr
, buf
, clen
);
505 xor_block(sc
->A
, sc
->dbuf
, rate
);
506 process_block(sc
->A
);
513 /* see bearssl_kdf.h */
515 br_shake_flip(br_shake_context
*sc
)
518 * We apply padding and pre-XOR the value into the state. We
519 * set dptr to the end of the buffer, so that first call to
520 * shake_extract() will process the block.
522 if ((sc
->dptr
+ 1) == sc
->rate
) {
523 sc
->dbuf
[sc
->dptr
++] = 0x9F;
525 sc
->dbuf
[sc
->dptr
++] = 0x1F;
526 memset(sc
->dbuf
+ sc
->dptr
, 0x00, sc
->rate
- sc
->dptr
- 1);
527 sc
->dbuf
[sc
->rate
- 1] = 0x80;
530 xor_block(sc
->A
, sc
->dbuf
, sc
->rate
);
533 /* see bearssl_kdf.h */
535 br_shake_produce(br_shake_context
*sc
, void *out
, size_t len
)
553 br_enc64le(dbuf
+ 0, A
[ 0]);
554 br_enc64le(dbuf
+ 8, ~A
[ 1]);
555 br_enc64le(dbuf
+ 16, ~A
[ 2]);
556 br_enc64le(dbuf
+ 24, A
[ 3]);
557 br_enc64le(dbuf
+ 32, A
[ 4]);
558 br_enc64le(dbuf
+ 40, A
[ 5]);
559 br_enc64le(dbuf
+ 48, A
[ 6]);
560 br_enc64le(dbuf
+ 56, A
[ 7]);
561 br_enc64le(dbuf
+ 64, ~A
[ 8]);
562 br_enc64le(dbuf
+ 72, A
[ 9]);
563 br_enc64le(dbuf
+ 80, A
[10]);
564 br_enc64le(dbuf
+ 88, A
[11]);
565 br_enc64le(dbuf
+ 96, ~A
[12]);
566 br_enc64le(dbuf
+ 104, A
[13]);
567 br_enc64le(dbuf
+ 112, A
[14]);
568 br_enc64le(dbuf
+ 120, A
[15]);
569 br_enc64le(dbuf
+ 128, A
[16]);
570 br_enc64le(dbuf
+ 136, ~A
[17]);
571 br_enc64le(dbuf
+ 144, A
[18]);
572 br_enc64le(dbuf
+ 152, A
[19]);
573 br_enc64le(dbuf
+ 160, ~A
[20]);
574 br_enc64le(dbuf
+ 168, A
[21]);
575 br_enc64le(dbuf
+ 176, A
[22]);
576 br_enc64le(dbuf
+ 184, A
[23]);
577 br_enc64le(dbuf
+ 192, A
[24]);
584 memcpy(buf
, sc
->dbuf
+ dptr
, clen
);