#include "inner.h"
-#define U (1 + ((BR_MAX_RSA_FACTOR + 30) / 31))
+#define U (2 + ((BR_MAX_RSA_FACTOR + 30) / 31))
+#define TLEN (8 * U)
/* see bearssl_rsa.h */
uint32_t
{
const unsigned char *p, *q;
size_t plen, qlen;
- uint32_t tmp[6 * U];
- uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+ size_t fwlen;
uint32_t p0i, q0i;
size_t xlen;
+ uint32_t tmp[1 + TLEN];
+ long z;
+ uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+ uint32_t r;
/*
- * All our temporary buffers are from the tmp[] array.
- *
- * The mp, mq, s1, s2, t1 and t2 buffers are large enough to
- * contain a RSA factor. The t3 buffer can contain a complete
- * RSA modulus. t3 shares its storage space with s2, s1 and t1,
- * in that order (this is important, see below).
- */
- mq = tmp;
- mp = tmp + U;
- t2 = tmp + 2 * U;
- s2 = tmp + 3 * U;
- s1 = tmp + 4 * U;
- t1 = tmp + 5 * U;
- t3 = s2;
-
- /*
- * Compute the actual lengths (in bytes) of p and q, and check
- * that they fit within our stack buffers.
+ * Compute the actual lengths of p and q, in bytes.
+ * These lengths are not considered secret (we cannot really hide
+ * them anyway in constant-time code).
*/
p = sk->p;
plen = sk->plen;
q ++;
qlen --;
}
- if (plen > (BR_MAX_RSA_FACTOR >> 3)
- || qlen > (BR_MAX_RSA_FACTOR >> 3))
- {
- return 0;
+
+ /*
+ * Compute the maximum factor length, in words.
+ */
+ z = (long)(plen > qlen ? plen : qlen) << 3;
+ fwlen = 1;
+ while (z > 0) {
+ z -= 31;
+ fwlen ++;
}
/*
- * Decode p and q.
+ * Round up the word length to an even number.
*/
- br_i31_decode(mp, p, plen);
- br_i31_decode(mq, q, qlen);
+ fwlen += (fwlen & 1);
+
+ /*
+ * We need to fit at least 6 values in the stack buffer.
+ */
+ if (6 * fwlen > TLEN) {
+ return 0;
+ }
/*
* Compute signature length (in bytes).
xlen = (sk->n_bitlen + 7) >> 3;
/*
- * Compute s1 = x^dp mod p.
+ * Decode q.
*/
- p0i = br_i31_ninv31(mp[1]);
- br_i31_decode_reduce(s1, x, xlen, mp);
- br_i31_modpow(s1, sk->dp, sk->dplen, mp, p0i, t1, t2);
+ mq = tmp;
+ br_i31_decode(mq, q, qlen);
/*
* Compute s2 = x^dq mod q.
*/
q0i = br_i31_ninv31(mq[1]);
+ s2 = mq + fwlen;
br_i31_decode_reduce(s2, x, xlen, mq);
- br_i31_modpow(s2, sk->dq, sk->dqlen, mq, q0i, t1, t2);
+ r = br_i31_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i,
+ mq + 2 * fwlen, TLEN - 2 * fwlen);
+
+ /*
+ * Decode p.
+ */
+ mp = mq + 2 * fwlen;
+ br_i31_decode(mp, p, plen);
+
+ /*
+ * Compute s1 = x^dp mod p.
+ */
+ p0i = br_i31_ninv31(mp[1]);
+ s1 = mq + 3 * fwlen;
+ br_i31_decode_reduce(s1, x, xlen, mp);
+ r &= br_i31_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i,
+ mq + 4 * fwlen, TLEN - 4 * fwlen);
/*
* Compute:
* inverse of q modulo p), we also tolerate improperly large
* values for this parameter.
*/
+ t1 = mq + 4 * fwlen;
+ t2 = mq + 5 * fwlen;
br_i31_reduce(t2, s2, mp);
br_i31_add(s1, mp, br_i31_sub(s1, t2, 1));
br_i31_to_monty(s1, mp);
* All these operations are non-modular.
*
* We need mq, s2 and t2. We use the t3 buffer as destination.
- * The buffers mp, s1 and t1 are no longer needed. Moreover,
- * the first step is to copy s2 into the destination buffer t3.
- * We thus arranged for t3 to actually share space with s2, and
- * to be followed by the space formerly used by s1 and t1.
+ * The buffers mp, s1 and t1 are no longer needed, so we can
+ * reuse them for t3. Moreover, the first step of the computation
+ * is to copy s2 into t3, after which s2 is not needed. Right
+ * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5.
+ * Therefore, we have ample room for t3 by simply using s2.
*/
+ t3 = s2;
br_i31_mulacc(t3, mq, t2);
/*
* The only error conditions remaining at that point are invalid
* values for p and q (even integers).
*/
- return p0i & q0i & 1;
+ return p0i & q0i & r;
}