2 * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * Make a random integer of the provided size. The size is encoded.
29 * The header word is untouched.
32 mkrand(const br_prng_class
**rng
, uint32_t *x
, uint32_t esize
)
37 len
= (esize
+ 31) >> 5;
38 (*rng
)->generate(rng
, x
+ 1, len
* sizeof(uint32_t));
39 for (u
= 1; u
< len
; u
++) {
46 x
[len
] &= 0x7FFFFFFF >> (31 - m
);
51 * This is the big-endian unsigned representation of the product of
52 * all small primes from 13 to 1481.
54 static const unsigned char SMALL_PRIMES
[] = {
55 0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A,
56 0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7,
57 0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37,
58 0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5,
59 0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E,
60 0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6,
61 0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C,
62 0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40,
63 0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50,
64 0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7,
65 0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3,
66 0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E,
67 0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC,
68 0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08,
69 0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B,
70 0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22,
71 0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77,
72 0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E,
73 0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80,
74 0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8,
75 0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2,
76 0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC,
77 0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54,
78 0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74,
79 0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C,
80 0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD
84 * We need temporary values for at least 7 integers of the same size
85 * as a factor (including header word); more space helps with performance
86 * (in modular exponentiations), but we much prefer to remain under
87 * 2 kilobytes in total, to save stack space. The macro TEMPS below
88 * exceeds 512 (which is a count in 32-bit words) when BR_MAX_RSA_SIZE
89 * is greater than 4464 (default value is 4096, so the 2-kB limit is
90 * maintained unless BR_MAX_RSA_SIZE was modified).
92 #define MAX(x, y) ((x) > (y) ? (x) : (y))
93 #define ROUND2(x) ((((x) + 1) >> 1) << 1)
95 #define TEMPS MAX(512, ROUND2(7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 61) / 31)))
98 * Perform trial division on a candidate prime. This computes
99 * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The
100 * br_i31_moddiv() function will report an error if y is not invertible
101 * modulo x. Returned value is 1 on success (none of the small primes
102 * divides x), 0 on error (a non-trivial GCD is obtained).
104 * This function assumes that x is odd.
107 trial_divisions(const uint32_t *x
, uint32_t *t
)
113 t
+= 1 + ((x
[0] + 31) >> 5);
114 x0i
= br_i31_ninv31(x
[1]);
115 br_i31_decode_reduce(y
, SMALL_PRIMES
, sizeof SMALL_PRIMES
, x
);
116 return br_i31_moddiv(y
, y
, x
, x0i
, t
);
120 * Perform n rounds of Miller-Rabin on the candidate prime x. This
121 * function assumes that x = 3 mod 4.
123 * Returned value is 1 on success (all rounds completed successfully),
127 miller_rabin(const br_prng_class
**rng
, const uint32_t *x
, int n
,
128 uint32_t *t
, size_t tlen
, br_i31_modpow_opt_type mp31
)
131 * Since x = 3 mod 4, the Miller-Rabin test is simple:
132 * - get a random base a (such that 1 < a < x-1)
133 * - compute z = a^((x-1)/2) mod x
134 * - if z != 1 and z != x-1, the number x is composite
136 * We generate bases 'a' randomly with a size which is
137 * one bit less than x, which ensures that a < x-1. It
138 * is not useful to verify that a > 1 because the probability
139 * that we get a value a equal to 0 or 1 is much smaller
140 * than the probability of our Miller-Rabin tests not to
141 * detect a composite, which is already quite smaller than the
142 * probability of the hardware misbehaving and return a
143 * composite integer because of some glitch (e.g. bad RAM
144 * or ill-timed cosmic ray).
146 unsigned char *xm1d2
;
147 size_t xlen
, xm1d2_len
, xm1d2_len_u32
, u
;
153 * Compute (x-1)/2 (encoded).
155 xm1d2
= (unsigned char *)t
;
156 xm1d2_len
= ((x
[0] - (x
[0] >> 5)) + 7) >> 3;
157 br_i31_encode(xm1d2
, xm1d2_len
, x
);
159 for (u
= 0; u
< xm1d2_len
; u
++) {
163 xm1d2
[u
] = (unsigned char)((w
>> 1) | cc
);
168 * We used some words of the provided buffer for (x-1)/2.
170 xm1d2_len_u32
= (xm1d2_len
+ 3) >> 2;
172 tlen
-= xm1d2_len_u32
;
174 xlen
= (x
[0] + 31) >> 5;
175 asize
= x
[0] - 1 - EQ0(x
[0] & 31);
176 x0i
= br_i31_ninv31(x
[1]);
183 * Generate a random base. We don't need the base to be
184 * really uniform modulo x, so we just get a random
185 * number which is one bit shorter than x.
190 mkrand(rng
, a
, asize
);
193 * Compute a^((x-1)/2) mod x. We assume here that the
194 * function will not fail (the temporary array is large
198 t2len
= tlen
- 1 - xlen
;
199 if ((t2len
& 1) != 0) {
201 * Since the source array is 64-bit aligned and
202 * has an even number of elements (TEMPS), we
203 * can use the parity of the remaining length to
204 * detect and adjust alignment.
209 mp31(a
, xm1d2
, xm1d2_len
, x
, x0i
, t2
, t2len
);
212 * We must obtain either 1 or x-1. Note that x is odd,
213 * hence x-1 differs from x only in its low word (no
217 eqm1
= a
[1] ^ (x
[1] - 1);
218 for (u
= 2; u
<= xlen
; u
++) {
223 if ((EQ0(eq1
) | EQ0(eqm1
)) == 0) {
231 * Create a random prime of the provided size. 'size' is the _encoded_
232 * bit length. The two top bits and the two bottom bits are set to 1.
235 mkprime(const br_prng_class
**rng
, uint32_t *x
, uint32_t esize
,
236 uint32_t pubexp
, uint32_t *t
, size_t tlen
, br_i31_modpow_opt_type mp31
)
241 len
= (esize
+ 31) >> 5;
244 uint32_t m3
, m5
, m7
, m11
;
248 * Generate random bits. We force the two top bits and the
249 * two bottom bits to 1.
251 mkrand(rng
, x
, esize
);
252 if ((esize
& 31) == 0) {
253 x
[len
] |= 0x60000000;
254 } else if ((esize
& 31) == 1) {
255 x
[len
] |= 0x00000001;
256 x
[len
- 1] |= 0x40000000;
258 x
[len
] |= 0x00000003 << ((esize
& 31) - 2);
263 * Trial division with low primes (3, 5, 7 and 11). We
264 * use the following properties:
277 for (u
= 0; u
< len
; u
++) {
278 uint32_t w
, w3
, w5
, w7
, w11
;
281 w3
= (w
& 0xFFFF) + (w
>> 16); /* max: 98302 */
282 w5
= (w
& 0xFFFF) + (w
>> 16); /* max: 98302 */
283 w7
= (w
& 0x7FFF) + (w
>> 15); /* max: 98302 */
284 w11
= (w
& 0xFFFFF) + (w
>> 20); /* max: 1050622 */
287 m3
= (m3
& 0xFF) + (m3
>> 8); /* max: 1025 */
289 m5
+= w5
<< ((4 - u
) & 3);
290 m5
= (m5
& 0xFFF) + (m5
>> 12); /* max: 4479 */
293 m7
= (m7
& 0x1FF) + (m7
>> 9); /* max: 1280 */
302 m11
= (m11
& 0x3FF) + (m11
>> 10); /* max: 526847 */
305 m3
= (m3
& 0x3F) + (m3
>> 6); /* max: 78 */
306 m3
= (m3
& 0x0F) + (m3
>> 4); /* max: 18 */
307 m3
= ((m3
* 43) >> 5) & 3;
309 m5
= (m5
& 0xFF) + (m5
>> 8); /* max: 271 */
310 m5
= (m5
& 0x0F) + (m5
>> 4); /* max: 31 */
311 m5
-= 20 & -GT(m5
, 19);
312 m5
-= 10 & -GT(m5
, 9);
313 m5
-= 5 & -GT(m5
, 4);
315 m7
= (m7
& 0x3F) + (m7
>> 6); /* max: 82 */
316 m7
= (m7
& 0x07) + (m7
>> 3); /* max: 16 */
317 m7
= ((m7
* 147) >> 7) & 7;
320 * 2^5 = 32 = -1 mod 11.
322 m11
= (m11
& 0x3FF) + (m11
>> 10); /* max: 1536 */
323 m11
= (m11
& 0x3FF) + (m11
>> 10); /* max: 1023 */
324 m11
= (m11
& 0x1F) + 33 - (m11
>> 5); /* max: 64 */
325 m11
-= 44 & -GT(m11
, 43);
326 m11
-= 22 & -GT(m11
, 21);
327 m11
-= 11 & -GT(m11
, 10);
330 * If any of these modulo is 0, then the candidate is
331 * not prime. Also, if pubexp is 3, 5, 7 or 11, and the
332 * corresponding modulus is 1, then the candidate must
333 * be rejected, because we need e to be invertible
334 * modulo p-1. We can use simple comparisons here
335 * because they won't leak information on a candidate
336 * that we keep, only on one that we reject (and is thus
339 if (m3
== 0 || m5
== 0 || m7
== 0 || m11
== 0) {
342 if ((pubexp
== 3 && m3
== 1)
343 || (pubexp
== 5 && m5
== 1)
344 || (pubexp
== 7 && m7
== 1)
345 || (pubexp
== 11 && m11
== 1))
351 * More trial divisions.
353 if (!trial_divisions(x
, t
)) {
358 * Miller-Rabin algorithm. Since we selected a random
359 * integer, not a maliciously crafted integer, we can use
360 * relatively few rounds to lower the risk of a false
361 * positive (i.e. declaring prime a non-prime) under
362 * 2^(-80). It is not useful to lower the probability much
363 * below that, since that would be substantially below
364 * the probability of the hardware misbehaving. Sufficient
365 * numbers of rounds are extracted from the Handbook of
366 * Applied Cryptography, note 4.49 (page 149).
368 * Since we work on the encoded size (esize), we need to
369 * compare with encoded thresholds.
373 } else if (esize
< 464) {
375 } else if (esize
< 670) {
377 } else if (esize
< 877) {
379 } else if (esize
< 1341) {
385 if (miller_rabin(rng
, x
, rounds
, t
, tlen
, mp31
)) {
392 * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided
393 * as parameter (with announced bit length equal to that of p). This
394 * function computes d = 1/e mod p-1 (for an odd integer e). Returned
395 * value is 1 on success, 0 on error (an error is reported if e is not
396 * invertible modulo p-1).
398 * The temporary buffer (t) must have room for at least 4 integers of
402 invert_pubexp(uint32_t *d
, const uint32_t *m
, uint32_t e
, uint32_t *t
)
408 t
+= 1 + ((m
[0] + 31) >> 5);
411 * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd.
413 br_i31_zero(d
, m
[0]);
415 br_i31_zero(f
, m
[0]);
416 f
[1] = e
& 0x7FFFFFFF;
418 r
= br_i31_moddiv(d
, f
, m
, br_i31_ninv31(m
[1]), t
);
421 * We really want d = 1/e mod p-1, with p = 2m. By the CRT,
422 * the result is either the d we got, or d + m.
424 * Let's write e*d = 1 + k*m, for some integer k. Integers e
425 * and m are odd. If d is odd, then e*d is odd, which implies
426 * that k must be even; in that case, e*d = 1 + (k/2)*2m, and
427 * thus d is already fine. Conversely, if d is even, then k
428 * is odd, and we must add m to d in order to get the correct
431 br_i31_add(d
, m
, (uint32_t)(1 - (d
[1] & 1)));
437 * Swap two buffers in RAM. They must be disjoint.
440 bufswap(void *b1
, void *b2
, size_t len
)
443 unsigned char *buf1
, *buf2
;
447 for (u
= 0; u
< len
; u
++) {
458 br_rsa_i31_keygen_inner(const br_prng_class
**rng
,
459 br_rsa_private_key
*sk
, void *kbuf_priv
,
460 br_rsa_public_key
*pk
, void *kbuf_pub
,
461 unsigned size
, uint32_t pubexp
, br_i31_modpow_opt_type mp31
)
463 uint32_t esize_p
, esize_q
;
464 size_t plen
, qlen
, tlen
;
468 uint64_t t64
[TEMPS
>> 1]; /* for 64-bit alignment */
472 if (size
< BR_MIN_RSA_SIZE
|| size
> BR_MAX_RSA_SIZE
) {
477 } else if (pubexp
== 1 || (pubexp
& 1) == 0) {
481 esize_p
= (size
+ 1) >> 1;
482 esize_q
= size
- esize_p
;
485 sk
->plen
= (esize_p
+ 7) >> 3;
486 sk
->q
= sk
->p
+ sk
->plen
;
487 sk
->qlen
= (esize_q
+ 7) >> 3;
488 sk
->dp
= sk
->q
+ sk
->qlen
;
489 sk
->dplen
= sk
->plen
;
490 sk
->dq
= sk
->dp
+ sk
->dplen
;
491 sk
->dqlen
= sk
->qlen
;
492 sk
->iq
= sk
->dq
+ sk
->dqlen
;
493 sk
->iqlen
= sk
->plen
;
497 pk
->nlen
= (size
+ 7) >> 3;
498 pk
->e
= pk
->n
+ pk
->nlen
;
500 br_enc32be(pk
->e
, pubexp
);
501 while (*pk
->e
== 0) {
508 * We now switch to encoded sizes.
510 * floor((x * 16913) / (2^19)) is equal to floor(x/31) for all
511 * integers x from 0 to 34966; the intermediate product fits on
512 * 30 bits, thus we can use MUL31().
514 esize_p
+= MUL31(esize_p
, 16913) >> 19;
515 esize_q
+= MUL31(esize_q
, 16913) >> 19;
516 plen
= (esize_p
+ 31) >> 5;
517 qlen
= (esize_q
+ 31) >> 5;
521 tlen
= ((sizeof tmp
.t32
) / sizeof(uint32_t)) - (2 + plen
+ qlen
);
524 * When looking for primes p and q, we temporarily divide
525 * candidates by 2, in order to compute the inverse of the
530 mkprime(rng
, p
, esize_p
, pubexp
, t
, tlen
, mp31
);
532 if (invert_pubexp(t
, p
, pubexp
, t
+ 1 + plen
)) {
535 br_i31_encode(sk
->p
, sk
->plen
, p
);
536 br_i31_encode(sk
->dp
, sk
->dplen
, t
);
542 mkprime(rng
, q
, esize_q
, pubexp
, t
, tlen
, mp31
);
544 if (invert_pubexp(t
, q
, pubexp
, t
+ 1 + qlen
)) {
547 br_i31_encode(sk
->q
, sk
->qlen
, q
);
548 br_i31_encode(sk
->dq
, sk
->dqlen
, t
);
554 * If p and q have the same size, then it is possible that q > p
555 * (when the target modulus size is odd, we generate p with a
556 * greater bit length than q). If q > p, we want to swap p and q
557 * (and also dp and dq) for two reasons:
558 * - The final step below (inversion of q modulo p) is easier if
560 * - While BearSSL's RSA code is perfectly happy with RSA keys such
561 * that p < q, some other implementations have restrictions and
564 * Note that we can do a simple non-constant-time swap here,
565 * because the only information we leak here is that we insist on
566 * returning p and q such that p > q, which is not a secret.
568 if (esize_p
== esize_q
&& br_i31_sub(p
, q
, 0) == 1) {
569 bufswap(p
, q
, (1 + plen
) * sizeof *p
);
570 bufswap(sk
->p
, sk
->q
, sk
->plen
);
571 bufswap(sk
->dp
, sk
->dq
, sk
->dplen
);
575 * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p.
577 * We ensured that p >= q, so this is just a matter of updating the
578 * header word for q (and possibly adding an extra word).
580 * Theoretically, the call below may fail, in case we were
581 * extraordinarily unlucky, and p = q. Another failure case is if
582 * Miller-Rabin failed us _twice_, and p and q are non-prime and
583 * have a factor is common. We report the error mostly because it
584 * is cheap and we can, but in practice this never happens (or, at
585 * least, it happens way less often than hardware glitches).
593 br_i31_zero(t
, p
[0]);
595 r
= br_i31_moddiv(t
, q
, p
, br_i31_ninv31(p
[1]), t
+ 1 + plen
);
596 br_i31_encode(sk
->iq
, sk
->iqlen
, t
);
599 * Compute the public modulus too, if required.
602 br_i31_zero(t
, p
[0]);
603 br_i31_mulacc(t
, p
, q
);
604 br_i31_encode(pk
->n
, pk
->nlen
, t
);