_ Git - BearSSL/blob - src/ec/ec_prime_i31.c

   1 /*
   2  * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining
   5  * a copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sublicense, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24
  25 #include "inner.h"
  26
  27 /*
  28  * Parameters for supported curves (field modulus, and 'b' equation
  29  * parameter; both values use the 'i31' format, and 'b' is in Montgomery
  30  * representation).
  31  */
  32
  33 static const uint32_t P256_P[] = {
  34         0x00000108,
  35         0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
  36         0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
  37         0x000000FF
  38 };
  39
  40 static const uint32_t P256_R2[] = {
  41         0x00000108,
  42         0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
  43         0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
  44         0x00000000
  45 };
  46
  47 static const uint32_t P256_B[] = {
  48         0x00000108,
  49         0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
  50         0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
  51         0x0000000E
  52 };
  53
  54 static const uint32_t P384_P[] = {
  55         0x0000018C,
  56         0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
  57         0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
  58         0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
  59         0x00000FFF
  60 };
  61
  62 static const uint32_t P384_R2[] = {
  63         0x0000018C,
  64         0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
  65         0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
  66         0x00008000, 0x00008000, 0x00000000, 0x00000000,
  67         0x00000000
  68 };
  69
  70 static const uint32_t P384_B[] = {
  71         0x0000018C,
  72         0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
  73         0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
  74         0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
  75         0x000008A5
  76 };
  77
  78 static const uint32_t P521_P[] = {
  79         0x00000219,
  80         0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
  81         0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
  82         0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
  83         0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
  84         0x01FFFFFF
  85 };
  86
  87 static const uint32_t P521_R2[] = {
  88         0x00000219,
  89         0x00001000, 0x00000000, 0x00000000, 0x00000000,
  90         0x00000000, 0x00000000, 0x00000000, 0x00000000,
  91         0x00000000, 0x00000000, 0x00000000, 0x00000000,
  92         0x00000000, 0x00000000, 0x00000000, 0x00000000,
  93         0x00000000
  94 };
  95
  96 static const uint32_t P521_B[] = {
  97         0x00000219,
  98         0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
  99         0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
 100         0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
 101         0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
 102         0x00654FAE
 103 };
 104
 105 typedef struct {
 106         const uint32_t *p;
 107         const uint32_t *b;
 108         const uint32_t *R2;
 109         uint32_t p0i;
 110         size_t point_len;
 111 } curve_params;
 112
 113 static inline const curve_params *
 114 id_to_curve(int curve)
 115 {
 116         static const curve_params pp[] = {
 117                 { P256_P, P256_B, P256_R2, 0x00000001,  65 },
 118                 { P384_P, P384_B, P384_R2, 0x00000001,  97 },
 119                 { P521_P, P521_B, P521_R2, 0x00000001, 133 }
 120         };
 121
 122         return &pp[curve - BR_EC_secp256r1];
 123 }
 124
 125 #define I31_LEN   ((BR_MAX_EC_SIZE + 61) / 31)
 126
 127 /*
 128  * Type for a point in Jacobian coordinates:
 129  * -- three values, x, y and z, in Montgomery representation
 130  * -- affine coordinates are X = x / z^2 and Y = y / z^3
 131  * -- for the point at infinity, z = 0
 132  */
 133 typedef struct {
 134         uint32_t c[3][I31_LEN];
 135 } jacobian;
 136
 137 /*
 138  * We use a custom interpreter that uses a dozen registers, and
 139  * only six operations:
 140  *    MSET(d, a)       copy a into d
 141  *    MADD(d, a)       d = d+a (modular)
 142  *    MSUB(d, a)       d = d-a (modular)
 143  *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
 144  *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
 145  *    MTZ(d)           clear return value if d = 0
 146  * Destination of MMUL (d) must be distinct from operands (a and b).
 147  * There is no such constraint for MSUB and MADD.
 148  *
 149  * Registers include the operand coordinates, and temporaries.
 150  */
 151 #define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
 152 #define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
 153 #define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
 154 #define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
 155 #define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
 156 #define MTZ(d)          (0x5000 + ((d) << 8))
 157 #define ENDCODE         0
 158
 159 /*
 160  * Registers for the input operands.
 161  */
 162 #define P1x    0
 163 #define P1y    1
 164 #define P1z    2
 165 #define P2x    3
 166 #define P2y    4
 167 #define P2z    5
 168
 169 /*
 170  * Alternate names for the first input operand.
 171  */
 172 #define Px     0
 173 #define Py     1
 174 #define Pz     2
 175
 176 /*
 177  * Temporaries.
 178  */
 179 #define t1     6
 180 #define t2     7
 181 #define t3     8
 182 #define t4     9
 183 #define t5    10
 184 #define t6    11
 185 #define t7    12
 186
 187 /*
 188  * Extra scratch registers available when there is no second operand (e.g.
 189  * for "double" and "affine").
 190  */
 191 #define t8     3
 192 #define t9     4
 193 #define t10    5
 194
 195 /*
 196  * Doubling formulas are:
 197  *
 198  *   s = 4*x*y^2
 199  *   m = 3*(x + z^2)*(x - z^2)
 200  *   x' = m^2 - 2*s
 201  *   y' = m*(s - x') - 8*y^4
 202  *   z' = 2*y*z
 203  *
 204  * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
 205  * should. This case should not happen anyway, because our curves have
 206  * prime order, and thus do not contain any point of order 2.
 207  *
 208  * If P is infinity (z = 0), then again the formulas yield infinity,
 209  * which is correct. Thus, this code works for all points.
 210  *
 211  * Cost: 8 multiplications
 212  */
 213 static const uint16_t code_double[] = {
 214         /*
 215          * Compute z^2 (in t1).
 216          */
 217         MMUL(t1, Pz, Pz),
 218
 219         /*
 220          * Compute x-z^2 (in t2) and then x+z^2 (in t1).
 221          */
 222         MSET(t2, Px),
 223         MSUB(t2, t1),
 224         MADD(t1, Px),
 225
 226         /*
 227          * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
 228          */
 229         MMUL(t3, t1, t2),
 230         MSET(t1, t3),
 231         MADD(t1, t3),
 232         MADD(t1, t3),
 233
 234         /*
 235          * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
 236          */
 237         MMUL(t3, Py, Py),
 238         MADD(t3, t3),
 239         MMUL(t2, Px, t3),
 240         MADD(t2, t2),
 241
 242         /*
 243          * Compute x' = m^2 - 2*s.
 244          */
 245         MMUL(Px, t1, t1),
 246         MSUB(Px, t2),
 247         MSUB(Px, t2),
 248
 249         /*
 250          * Compute z' = 2*y*z.
 251          */
 252         MMUL(t4, Py, Pz),
 253         MSET(Pz, t4),
 254         MADD(Pz, t4),
 255
 256         /*
 257          * Compute y' = m*(s - x') - 8*y^4. Note that we already have
 258          * 2*y^2 in t3.
 259          */
 260         MSUB(t2, Px),
 261         MMUL(Py, t1, t2),
 262         MMUL(t4, t3, t3),
 263         MSUB(Py, t4),
 264         MSUB(Py, t4),
 265
 266         ENDCODE
 267 };
 268
 269 /*
 270  * Addtions formulas are:
 271  *
 272  *   u1 = x1 * z2^2
 273  *   u2 = x2 * z1^2
 274  *   s1 = y1 * z2^3
 275  *   s2 = y2 * z1^3
 276  *   h = u2 - u1
 277  *   r = s2 - s1
 278  *   x3 = r^2 - h^3 - 2 * u1 * h^2
 279  *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
 280  *   z3 = h * z1 * z2
 281  *
 282  * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
 283  * z3 == 0, so the result is correct.
 284  * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
 285  * not correct.
 286  * h == 0 only if u1 == u2; this happens in two cases:
 287  * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
 288  * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
 289  *
 290  * Thus, the following situations are not handled correctly:
 291  * -- P1 = 0 and P2 != 0
 292  * -- P1 != 0 and P2 = 0
 293  * -- P1 = P2
 294  * All other cases are properly computed. However, even in "incorrect"
 295  * situations, the three coordinates still are properly formed field
 296  * elements.
 297  *
 298  * The returned flag is cleared if r == 0. This happens in the following
 299  * cases:
 300  * -- Both points are on the same horizontal line (same Y coordinate).
 301  * -- Both points are infinity.
 302  * -- One point is infinity and the other is on line Y = 0.
 303  * The third case cannot happen with our curves (there is no valid point
 304  * on line Y = 0 since that would be a point of order 2). If the two
 305  * source points are non-infinity, then remains only the case where the
 306  * two points are on the same horizontal line.
 307  *
 308  * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
 309  * P2 != 0:
 310  * -- If the returned value is not the point at infinity, then it was properly
 311  * computed.
 312  * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
 313  * is indeed the point at infinity.
 314  * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
 315  * use the 'double' code.
 316  *
 317  * Cost: 16 multiplications
 318  */
 319 static const uint16_t code_add[] = {
 320         /*
 321          * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
 322          */
 323         MMUL(t3, P2z, P2z),
 324         MMUL(t1, P1x, t3),
 325         MMUL(t4, P2z, t3),
 326         MMUL(t3, P1y, t4),
 327
 328         /*
 329          * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
 330          */
 331         MMUL(t4, P1z, P1z),
 332         MMUL(t2, P2x, t4),
 333         MMUL(t5, P1z, t4),
 334         MMUL(t4, P2y, t5),
 335
 336         /*
 337          * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
 338          */
 339         MSUB(t2, t1),
 340         MSUB(t4, t3),
 341
 342         /*
 343          * Report cases where r = 0 through the returned flag.
 344          */
 345         MTZ(t4),
 346
 347         /*
 348          * Compute u1*h^2 (in t6) and h^3 (in t5).
 349          */
 350         MMUL(t7, t2, t2),
 351         MMUL(t6, t1, t7),
 352         MMUL(t5, t7, t2),
 353
 354         /*
 355          * Compute x3 = r^2 - h^3 - 2*u1*h^2.
 356          * t1 and t7 can be used as scratch registers.
 357          */
 358         MMUL(P1x, t4, t4),
 359         MSUB(P1x, t5),
 360         MSUB(P1x, t6),
 361         MSUB(P1x, t6),
 362
 363         /*
 364          * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
 365          */
 366         MSUB(t6, P1x),
 367         MMUL(P1y, t4, t6),
 368         MMUL(t1, t5, t3),
 369         MSUB(P1y, t1),
 370
 371         /*
 372          * Compute z3 = h*z1*z2.
 373          */
 374         MMUL(t1, P1z, P2z),
 375         MMUL(P1z, t1, t2),
 376
 377         ENDCODE
 378 };
 379
 380 /*
 381  * Check that the point is on the curve. This code snippet assumes the
 382  * following conventions:
 383  * -- Coordinates x and y have been freshly decoded in P1 (but not
 384  * converted to Montgomery coordinates yet).
 385  * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
 386  */
 387 static const uint16_t code_check[] = {
 388
 389         /* Convert x and y to Montgomery representation. */
 390         MMUL(t1, P1x, P2x),
 391         MMUL(t2, P1y, P2x),
 392         MSET(P1x, t1),
 393         MSET(P1y, t2),
 394
 395         /* Compute x^3 in t1. */
 396         MMUL(t2, P1x, P1x),
 397         MMUL(t1, P1x, t2),
 398
 399         /* Subtract 3*x from t1. */
 400         MSUB(t1, P1x),
 401         MSUB(t1, P1x),
 402         MSUB(t1, P1x),
 403
 404         /* Add b. */
 405         MADD(t1, P2y),
 406
 407         /* Compute y^2 in t2. */
 408         MMUL(t2, P1y, P1y),
 409
 410         /* Compare y^2 with x^3 - 3*x + b; they must match. */
 411         MSUB(t1, t2),
 412         MTZ(t1),
 413
 414         /* Set z to 1 (in Montgomery representation). */
 415         MMUL(P1z, P2x, P2z),
 416
 417         ENDCODE
 418 };
 419
 420 /*
 421  * Conversion back to affine coordinates. This code snippet assumes that
 422  * the z coordinate of P2 is set to 1 (not in Montgomery representation).
 423  */
 424 static const uint16_t code_affine[] = {
 425
 426         /* Save z*R in t1. */
 427         MSET(t1, P1z),
 428
 429         /* Compute z^3 in t2. */
 430         MMUL(t2, P1z, P1z),
 431         MMUL(t3, P1z, t2),
 432         MMUL(t2, t3, P2z),
 433
 434         /* Invert to (1/z^3) in t2. */
 435         MINV(t2, t3, t4),
 436
 437         /* Compute y. */
 438         MSET(t3, P1y),
 439         MMUL(P1y, t2, t3),
 440
 441         /* Compute (1/z^2) in t3. */
 442         MMUL(t3, t2, t1),
 443
 444         /* Compute x. */
 445         MSET(t2, P1x),
 446         MMUL(P1x, t2, t3),
 447
 448         ENDCODE
 449 };
 450
 451 static uint32_t
 452 run_code(jacobian *P1, const jacobian *P2,
 453         const curve_params *cc, const uint16_t *code)
 454 {
 455         uint32_t r;
 456         uint32_t t[13][I31_LEN];
 457         size_t u;
 458
 459         r = 1;
 460
 461         /*
 462          * Copy the two operands in the dedicated registers.
 463          */
 464         memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t));
 465         memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t));
 466
 467         /*
 468          * Run formulas.
 469          */
 470         for (u = 0;; u ++) {
 471                 unsigned op, d, a, b;
 472
 473                 op = code[u];
 474                 if (op == 0) {
 475                         break;
 476                 }
 477                 d = (op >> 8) & 0x0F;
 478                 a = (op >> 4) & 0x0F;
 479                 b = op & 0x0F;
 480                 op >>= 12;
 481                 switch (op) {
 482                         uint32_t ctl;
 483                         size_t plen;
 484                         unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
 485
 486                 case 0:
 487                         memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t));
 488                         break;
 489                 case 1:
 490                         ctl = br_i31_add(t[d], t[a], 1);
 491                         ctl |= NOT(br_i31_sub(t[d], cc->p, 0));
 492                         br_i31_sub(t[d], cc->p, ctl);
 493                         break;
 494                 case 2:
 495                         br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1));
 496                         break;
 497                 case 3:
 498                         br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
 499                         break;
 500                 case 4:
 501                         plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
 502                         br_i31_encode(tp, plen, cc->p);
 503                         tp[plen - 1] -= 2;
 504                         br_i31_modpow(t[d], tp, plen,
 505                                 cc->p, cc->p0i, t[a], t[b]);
 506                         break;
 507                 default:
 508                         r &= ~br_i31_iszero(t[d]);
 509                         break;
 510                 }
 511         }
 512
 513         /*
 514          * Copy back result.
 515          */
 516         memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t));
 517         return r;
 518 }
 519
 520 static void
 521 set_one(uint32_t *x, const uint32_t *p)
 522 {
 523         size_t plen;
 524
 525         plen = (p[0] + 63) >> 5;
 526         memset(x, 0, plen * sizeof *x);
 527         x[0] = p[0];
 528         x[1] = 0x00000001;
 529 }
 530
 531 static void
 532 point_zero(jacobian *P, const curve_params *cc)
 533 {
 534         memset(P, 0, sizeof *P);
 535         P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
 536 }
 537
 538 static inline void
 539 point_double(jacobian *P, const curve_params *cc)
 540 {
 541         run_code(P, P, cc, code_double);
 542 }
 543
 544 static inline uint32_t
 545 point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
 546 {
 547         return run_code(P1, P2, cc, code_add);
 548 }
 549
 550 static void
 551 point_mul(jacobian *P, const unsigned char *x, size_t xlen,
 552         const curve_params *cc)
 553 {
 554         /*
 555          * We do a simple double-and-add ladder with a 2-bit window
 556          * to make only one add every two doublings. We thus first
 557          * precompute 2P and 3P in some local buffers.
 558          *
 559          * We always perform two doublings and one addition; the
 560          * addition is with P, 2P and 3P and is done in a temporary
 561          * array.
 562          *
 563          * The addition code cannot handle cases where one of the
 564          * operands is infinity, which is the case at the start of the
 565          * ladder. We therefore need to maintain a flag that controls
 566          * this situation.
 567          */
 568         uint32_t qz;
 569         jacobian P2, P3, Q, T, U;
 570
 571         memcpy(&P2, P, sizeof P2);
 572         point_double(&P2, cc);
 573         memcpy(&P3, P, sizeof P3);
 574         point_add(&P3, &P2, cc);
 575
 576         point_zero(&Q, cc);
 577         qz = 1;
 578         while (xlen -- > 0) {
 579                 int k;
 580
 581                 for (k = 6; k >= 0; k -= 2) {
 582                         uint32_t bits;
 583                         uint32_t bnz;
 584
 585                         point_double(&Q, cc);
 586                         point_double(&Q, cc);
 587                         memcpy(&T, P, sizeof T);
 588                         memcpy(&U, &Q, sizeof U);
 589                         bits = (*x >> k) & (uint32_t)3;
 590                         bnz = NEQ(bits, 0);
 591                         CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
 592                         CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
 593                         point_add(&U, &T, cc);
 594                         CCOPY(bnz & qz, &Q, &T, sizeof Q);
 595                         CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
 596                         qz &= ~bnz;
 597                 }
 598                 x ++;
 599         }
 600         memcpy(P, &Q, sizeof Q);
 601 }
 602
 603 /*
 604  * Decode point into Jacobian coordinates. This function does not support
 605  * the point at infinity. If the point is invalid then this returns 0, but
 606  * the coordinates are still set to properly formed field elements.
 607  */
 608 static uint32_t
 609 point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
 610 {
 611         /*
 612          * Points must use uncompressed format:
 613          * -- first byte is 0x04;
 614          * -- coordinates X and Y use unsigned big-endian, with the same
 615          *    length as the field modulus.
 616          *
 617          * We don't support hybrid format (uncompressed, but first byte
 618          * has value 0x06 or 0x07, depending on the least significant bit
 619          * of Y) because it is rather useless, and explicitly forbidden
 620          * by PKIX (RFC 5480, section 2.2).
 621          *
 622          * We don't support compressed format either, because it is not
 623          * much used in practice (there are or were patent-related
 624          * concerns about point compression, which explains the lack of
 625          * generalised support). Also, point compression support would
 626          * need a bit more code.
 627          */
 628         const unsigned char *buf;
 629         size_t plen, zlen;
 630         uint32_t r;
 631         jacobian Q;
 632
 633         buf = src;
 634         point_zero(P, cc);
 635         plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
 636         if (len != 1 + (plen << 1)) {
 637                 return 0;
 638         }
 639         r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p);
 640         r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
 641
 642         /*
 643          * Check first byte.
 644          */
 645         r &= EQ(buf[0], 0x04);
 646         /* obsolete
 647         r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
 648                 & ~(uint32_t)(buf[0] ^ buf[plen << 1]));
 649         */
 650
 651         /*
 652          * Convert coordinates and check that the point is valid.
 653          */
 654         zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t);
 655         memcpy(Q.c[0], cc->R2, zlen);
 656         memcpy(Q.c[1], cc->b, zlen);
 657         set_one(Q.c[2], cc->p);
 658         r &= ~run_code(P, &Q, cc, code_check);
 659         return r;
 660 }
 661
 662 /*
 663  * Encode a point. This method assumes that the point is correct and is
 664  * not the point at infinity. Encoded size is always 1+2*plen, where
 665  * plen is the field modulus length, in bytes.
 666  */
 667 static void
 668 point_encode(void *dst, const jacobian *P, const curve_params *cc)
 669 {
 670         unsigned char *buf;
 671         uint32_t xbl;
 672         size_t plen;
 673         jacobian Q, T;
 674
 675         buf = dst;
 676         xbl = cc->p[0];
 677         xbl -= (xbl >> 5);
 678         plen = (xbl + 7) >> 3;
 679         buf[0] = 0x04;
 680         memcpy(&Q, P, sizeof *P);
 681         set_one(T.c[2], cc->p);
 682         run_code(&Q, &T, cc, code_affine);
 683         br_i31_encode(buf + 1, plen, Q.c[0]);
 684         br_i31_encode(buf + 1 + plen, plen, Q.c[1]);
 685 }
 686
 687 static const br_ec_curve_def *
 688 id_to_curve_def(int curve)
 689 {
 690         switch (curve) {
 691         case BR_EC_secp256r1:
 692                 return &br_secp256r1;
 693         case BR_EC_secp384r1:
 694                 return &br_secp384r1;
 695         case BR_EC_secp521r1:
 696                 return &br_secp521r1;
 697         }
 698         return NULL;
 699 }
 700
 701 static const unsigned char *
 702 api_generator(int curve, size_t *len)
 703 {
 704         const br_ec_curve_def *cd;
 705
 706         cd = id_to_curve_def(curve);
 707         *len = cd->generator_len;
 708         return cd->generator;
 709 }
 710
 711 static const unsigned char *
 712 api_order(int curve, size_t *len)
 713 {
 714         const br_ec_curve_def *cd;
 715
 716         cd = id_to_curve_def(curve);
 717         *len = cd->order_len;
 718         return cd->order;
 719 }
 720
 721 static size_t
 722 api_xoff(int curve, size_t *len)
 723 {
 724         api_generator(curve, len);
 725         *len >>= 1;
 726         return 1;
 727 }
 728
 729 static uint32_t
 730 api_mul(unsigned char *G, size_t Glen,
 731         const unsigned char *x, size_t xlen, int curve)
 732 {
 733         uint32_t r;
 734         const curve_params *cc;
 735         jacobian P;
 736
 737         cc = id_to_curve(curve);
 738         if (Glen != cc->point_len) {
 739                 return 0;
 740         }
 741         r = point_decode(&P, G, Glen, cc);
 742         point_mul(&P, x, xlen, cc);
 743         point_encode(G, &P, cc);
 744         return r;
 745 }
 746
 747 static size_t
 748 api_mulgen(unsigned char *R,
 749         const unsigned char *x, size_t xlen, int curve)
 750 {
 751         const unsigned char *G;
 752         size_t Glen;
 753
 754         G = api_generator(curve, &Glen);
 755         memcpy(R, G, Glen);
 756         api_mul(R, Glen, x, xlen, curve);
 757         return Glen;
 758 }
 759
 760 static uint32_t
 761 api_muladd(unsigned char *A, const unsigned char *B, size_t len,
 762         const unsigned char *x, size_t xlen,
 763         const unsigned char *y, size_t ylen, int curve)
 764 {
 765         uint32_t r, t, z;
 766         const curve_params *cc;
 767         jacobian P, Q;
 768
 769         /*
 770          * TODO: see about merging the two ladders. Right now, we do
 771          * two independent point multiplications, which is a bit
 772          * wasteful of CPU resources (but yields short code).
 773          */
 774
 775         cc = id_to_curve(curve);
 776         if (len != cc->point_len) {
 777                 return 0;
 778         }
 779         r = point_decode(&P, A, len, cc);
 780         if (B == NULL) {
 781                 size_t Glen;
 782
 783                 B = api_generator(curve, &Glen);
 784         }
 785         r &= point_decode(&Q, B, len, cc);
 786         point_mul(&P, x, xlen, cc);
 787         point_mul(&Q, y, ylen, cc);
 788
 789         /*
 790          * We want to compute P+Q. Since the base points A and B are distinct
 791          * from infinity, and the multipliers are non-zero and lower than the
 792          * curve order, then we know that P and Q are non-infinity. This
 793          * leaves two special situations to test for:
 794          * -- If P = Q then we must use point_double().
 795          * -- If P+Q = 0 then we must report an error.
 796          */
 797         t = point_add(&P, &Q, cc);
 798         point_double(&Q, cc);
 799         z = br_i31_iszero(P.c[2]);
 800
 801         /*
 802          * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
 803          * have the following:
 804          *
 805          *   z = 0, t = 0   return P (normal addition)
 806          *   z = 0, t = 1   return P (normal addition)
 807          *   z = 1, t = 0   return Q (a 'double' case)
 808          *   z = 1, t = 1   report an error (P+Q = 0)
 809          */
 810         CCOPY(z & ~t, &P, &Q, sizeof Q);
 811         point_encode(A, &P, cc);
 812         r &= ~(z & t);
 813
 814         return r;
 815 }
 816
 817 /* see bearssl_ec.h */
 818 const br_ec_impl br_ec_prime_i31 = {
 819         (uint32_t)0x03800000,
 820         &api_generator,
 821         &api_order,
 822         &api_xoff,
 823         &api_mul,
 824         &api_mulgen,
 825         &api_muladd
 826 };