From: Thomas Pornin Date: Fri, 23 Jun 2017 22:31:09 +0000 (+0200) Subject: Fixed modular reduction bug in the special field for P-256 (in some rare cases, value... X-Git-Tag: v0.5~10 X-Git-Url: https://bearssl.org/gitweb//home/git/?p=BearSSL;a=commitdiff_plain;h=2b738493bd16d57fdb12d38d03631981370259be;ds=sidebyside Fixed modular reduction bug in the special field for P-256 (in some rare cases, value would end up being negative, which would corrupt subsequent operations). --- diff --git a/src/ec/ec_p256_m15.c b/src/ec/ec_p256_m15.c index 06eee86..6ce57e0 100644 --- a/src/ec/ec_p256_m15.c +++ b/src/ec/ec_p256_m15.c @@ -1122,6 +1122,22 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) t[14] -= cc << 10; t[7] -= cc << 5; t[0] += cc; + + /* + * If the carry is negative, then after carry propagation, we may + * end up with a value which is negative, and we don't want that. + * Thus, in that case, we add the modulus. Note that the subtraction + * result, when the carry is negative, is always smaller than the + * modulus, so the extra addition will not make the value exceed + * twice the modulus. + */ + cc >>= 31; + t[0] -= cc; + t[7] += cc << 5; + t[14] += cc << 10; + t[17] -= cc << 3; + t[19] += cc << 9; + norm13(d, t, 20); } @@ -1195,6 +1211,22 @@ square_f256(uint32_t *d, const uint32_t *a) t[14] -= cc << 10; t[7] -= cc << 5; t[0] += cc; + + /* + * If the carry is negative, then after carry propagation, we may + * end up with a value which is negative, and we don't want that. + * Thus, in that case, we add the modulus. Note that the subtraction + * result, when the carry is negative, is always smaller than the + * modulus, so the extra addition will not make the value exceed + * twice the modulus. + */ + cc >>= 31; + t[0] -= cc; + t[7] += cc << 5; + t[14] += cc << 10; + t[17] -= cc << 3; + t[19] += cc << 9; + norm13(d, t, 20); } diff --git a/src/ec/ec_p256_m31.c b/src/ec/ec_p256_m31.c index 0631a13..0462c15 100644 --- a/src/ec/ec_p256_m31.c +++ b/src/ec/ec_p256_m31.c @@ -394,7 +394,7 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) uint32_t t[18]; uint64_t s[18]; uint64_t cc, x; - uint32_t z; + uint32_t z, c; int i; mul9(t, a, b); @@ -465,7 +465,15 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) d[8] &= 0xFFFF; /* - * Subtract cc*p. + * One extra round of reduction, for cc*2^256, which means + * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) + * value. If cc is negative, then it may happen (rarely, but + * not neglectibly so) that the result would be negative. In + * order to avoid that, if cc is negative, then we add the + * modulus once. Note that if cc is negative, then propagating + * that carry must yield a value lower than the modulus, so + * adding the modulus once will keep the final result under + * twice the modulus. */ z = (uint32_t)cc; d[3] -= z << 6; @@ -473,6 +481,12 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) d[7] -= ARSH(z, 18); d[7] += (z << 14) & 0x3FFFFFFF; d[8] += ARSH(z, 16); + c = z >> 31; + d[0] -= c; + d[3] += c << 6; + d[6] += c << 12; + d[7] -= c << 14; + d[8] += c << 16; for (i = 0; i < 9; i ++) { uint32_t w; @@ -492,7 +506,7 @@ square_f256(uint32_t *d, const uint32_t *a) uint32_t t[18]; uint64_t s[18]; uint64_t cc, x; - uint32_t z; + uint32_t z, c; int i; square9(t, a); @@ -563,7 +577,15 @@ square_f256(uint32_t *d, const uint32_t *a) d[8] &= 0xFFFF; /* - * Subtract cc*p. + * One extra round of reduction, for cc*2^256, which means + * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) + * value. If cc is negative, then it may happen (rarely, but + * not neglectibly so) that the result would be negative. In + * order to avoid that, if cc is negative, then we add the + * modulus once. Note that if cc is negative, then propagating + * that carry must yield a value lower than the modulus, so + * adding the modulus once will keep the final result under + * twice the modulus. */ z = (uint32_t)cc; d[3] -= z << 6; @@ -571,6 +593,12 @@ square_f256(uint32_t *d, const uint32_t *a) d[7] -= ARSH(z, 18); d[7] += (z << 14) & 0x3FFFFFFF; d[8] += ARSH(z, 16); + c = z >> 31; + d[0] -= c; + d[3] += c << 6; + d[6] += c << 12; + d[7] -= c << 14; + d[8] += c << 16; for (i = 0; i < 9; i ++) { uint32_t w; diff --git a/test/test_crypto.c b/test/test_crypto.c index fd0b396..ae0c712 100644 --- a/test/test_crypto.c +++ b/test/test_crypto.c @@ -5062,6 +5062,39 @@ test_EC_inner(const char *sk, const char *sU, fflush(stdout); } +static void +test_EC_P256_carry_inner(const br_ec_impl *impl, const char *sP, const char *sQ) +{ + unsigned char P[65], Q[sizeof P], k[1]; + size_t plen, qlen; + + plen = hextobin(P, sP); + qlen = hextobin(Q, sQ); + if (plen != sizeof P || qlen != sizeof P) { + fprintf(stderr, "KAT is incorrect\n"); + exit(EXIT_FAILURE); + } + k[0] = 0x10; + if (impl->mul(P, plen, k, 1, BR_EC_secp256r1) != 1) { + fprintf(stderr, "P-256 multiplication failed\n"); + exit(EXIT_FAILURE); + } + check_equals("P256_carry", P, Q, plen); + printf("."); + fflush(stdout); +} + +static void +test_EC_P256_carry(const br_ec_impl *impl) +{ + test_EC_P256_carry_inner(impl, + "0435BAA24B2B6E1B3C88E22A383BD88CC4B9A3166E7BCF94FF6591663AE066B33B821EBA1B4FC8EA609A87EB9A9C9A1CCD5C9F42FA1365306F64D7CAA718B8C978", + "0447752A76CA890328D34E675C4971EC629132D1FC4863EDB61219B72C4E58DC5E9D51E7B293488CFD913C3CF20E438BB65C2BA66A7D09EABB45B55E804260C5EB"); + test_EC_P256_carry_inner(impl, + "04DCAE9D9CE211223602024A6933BD42F77B6BF4EAB9C8915F058C149419FADD2CC9FC0707B270A1B5362BA4D249AFC8AC3DA1EFCA8270176EEACA525B49EE19E6", + "048DAC7B0BE9B3206FCE8B24B6B4AEB122F2A67D13E536B390B6585CA193427E63F222388B5F51D744D6F5D47536D89EEEC89552BCB269E7828019C4410DFE980A"); +} + static void test_EC_KAT(const char *name, const br_ec_impl *impl, uint32_t curve_mask) { @@ -5074,6 +5107,7 @@ test_EC_KAT(const char *name, const br_ec_impl *impl, uint32_t curve_mask) "C9AFA9D845BA75166B5C215767B1D6934E50C3DB36E89B127B8A622B120F6721", "0460FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB67903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299", impl, BR_EC_secp256r1); + test_EC_P256_carry(impl); } if (curve_mask & ((uint32_t)1 << BR_EC_secp384r1)) { test_EC_inner(