2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * Parameters for supported curves (field modulus, and 'b' equation
29 * parameter; both values use the 'i31' format, and 'b' is in Montgomery
33 static const uint32_t P256_P
[] = {
35 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
36 0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
40 static const uint32_t P256_R2
[] = {
42 0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
43 0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
47 static const uint32_t P256_B
[] = {
49 0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
50 0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
54 static const uint32_t P384_P
[] = {
56 0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
57 0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
58 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
62 static const uint32_t P384_R2
[] = {
64 0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
65 0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
66 0x00008000, 0x00008000, 0x00000000, 0x00000000,
70 static const uint32_t P384_B
[] = {
72 0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
73 0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
74 0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
78 static const uint32_t P521_P
[] = {
80 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
81 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
82 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
83 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
87 static const uint32_t P521_R2
[] = {
89 0x00001000, 0x00000000, 0x00000000, 0x00000000,
90 0x00000000, 0x00000000, 0x00000000, 0x00000000,
91 0x00000000, 0x00000000, 0x00000000, 0x00000000,
92 0x00000000, 0x00000000, 0x00000000, 0x00000000,
96 static const uint32_t P521_B
[] = {
98 0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
99 0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
100 0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
101 0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
113 static inline const curve_params
*
114 id_to_curve(int curve
)
116 static const curve_params pp
[] = {
117 { P256_P
, P256_B
, P256_R2
, 0x00000001, 65 },
118 { P384_P
, P384_B
, P384_R2
, 0x00000001, 97 },
119 { P521_P
, P521_B
, P521_R2
, 0x00000001, 133 }
122 return &pp
[curve
- BR_EC_secp256r1
];
125 #define I31_LEN ((BR_MAX_EC_SIZE + 61) / 31)
128 * Type for a point in Jacobian coordinates:
129 * -- three values, x, y and z, in Montgomery representation
130 * -- affine coordinates are X = x / z^2 and Y = y / z^3
131 * -- for the point at infinity, z = 0
134 uint32_t c
[3][I31_LEN
];
138 * We use a custom interpreter that uses a dozen registers, and
139 * only six operations:
140 * MSET(d, a) copy a into d
141 * MADD(d, a) d = d+a (modular)
142 * MSUB(d, a) d = d-a (modular)
143 * MMUL(d, a, b) d = a*b (Montgomery multiplication)
144 * MINV(d, a, b) invert d modulo p; a and b are used as scratch registers
145 * MTZ(d) clear return value if d = 0
146 * Destination of MMUL (d) must be distinct from operands (a and b).
147 * There is no such constraint for MSUB and MADD.
149 * Registers include the operand coordinates, and temporaries.
151 #define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4))
152 #define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4))
153 #define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4))
154 #define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b))
155 #define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b))
156 #define MTZ(d) (0x5000 + ((d) << 8))
160 * Registers for the input operands.
170 * Alternate names for the first input operand.
188 * Extra scratch registers available when there is no second operand (e.g.
189 * for "double" and "affine").
196 * Doubling formulas are:
199 * m = 3*(x + z^2)*(x - z^2)
201 * y' = m*(s - x') - 8*y^4
204 * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
205 * should. This case should not happen anyway, because our curves have
206 * prime order, and thus do not contain any point of order 2.
208 * If P is infinity (z = 0), then again the formulas yield infinity,
209 * which is correct. Thus, this code works for all points.
211 * Cost: 8 multiplications
213 static const uint16_t code_double
[] = {
215 * Compute z^2 (in t1).
220 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
227 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
235 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
243 * Compute x' = m^2 - 2*s.
250 * Compute z' = 2*y*z.
257 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
270 * Addtions formulas are:
278 * x3 = r^2 - h^3 - 2 * u1 * h^2
279 * y3 = r * (u1 * h^2 - x3) - s1 * h^3
282 * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
283 * z3 == 0, so the result is correct.
284 * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
286 * h == 0 only if u1 == u2; this happens in two cases:
287 * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
288 * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
290 * Thus, the following situations are not handled correctly:
291 * -- P1 = 0 and P2 != 0
292 * -- P1 != 0 and P2 = 0
294 * All other cases are properly computed. However, even in "incorrect"
295 * situations, the three coordinates still are properly formed field
298 * The returned flag is cleared if r == 0. This happens in the following
300 * -- Both points are on the same horizontal line (same Y coordinate).
301 * -- Both points are infinity.
302 * -- One point is infinity and the other is on line Y = 0.
303 * The third case cannot happen with our curves (there is no valid point
304 * on line Y = 0 since that would be a point of order 2). If the two
305 * source points are non-infinity, then remains only the case where the
306 * two points are on the same horizontal line.
308 * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
310 * -- If the returned value is not the point at infinity, then it was properly
312 * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
313 * is indeed the point at infinity.
314 * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
315 * use the 'double' code.
317 * Cost: 16 multiplications
319 static const uint16_t code_add
[] = {
321 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
329 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
337 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
343 * Report cases where r = 0 through the returned flag.
348 * Compute u1*h^2 (in t6) and h^3 (in t5).
355 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
356 * t1 and t7 can be used as scratch registers.
364 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
372 * Compute z3 = h*z1*z2.
381 * Check that the point is on the curve. This code snippet assumes the
382 * following conventions:
383 * -- Coordinates x and y have been freshly decoded in P1 (but not
384 * converted to Montgomery coordinates yet).
385 * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
387 static const uint16_t code_check
[] = {
389 /* Convert x and y to Montgomery representation. */
395 /* Compute x^3 in t1. */
399 /* Subtract 3*x from t1. */
407 /* Compute y^2 in t2. */
410 /* Compare y^2 with x^3 - 3*x + b; they must match. */
414 /* Set z to 1 (in Montgomery representation). */
421 * Conversion back to affine coordinates. This code snippet assumes that
422 * the z coordinate of P2 is set to 1 (not in Montgomery representation).
424 static const uint16_t code_affine
[] = {
426 /* Save z*R in t1. */
429 /* Compute z^3 in t2. */
434 /* Invert to (1/z^3) in t2. */
441 /* Compute (1/z^2) in t3. */
452 run_code(jacobian
*P1
, const jacobian
*P2
,
453 const curve_params
*cc
, const uint16_t *code
)
456 uint32_t t
[13][I31_LEN
];
462 * Copy the two operands in the dedicated registers.
464 memcpy(t
[P1x
], P1
->c
, 3 * I31_LEN
* sizeof(uint32_t));
465 memcpy(t
[P2x
], P2
->c
, 3 * I31_LEN
* sizeof(uint32_t));
471 unsigned op
, d
, a
, b
;
477 d
= (op
>> 8) & 0x0F;
478 a
= (op
>> 4) & 0x0F;
484 unsigned char tp
[(BR_MAX_EC_SIZE
+ 7) >> 3];
487 memcpy(t
[d
], t
[a
], I31_LEN
* sizeof(uint32_t));
490 ctl
= br_i31_add(t
[d
], t
[a
], 1);
491 ctl
|= NOT(br_i31_sub(t
[d
], cc
->p
, 0));
492 br_i31_sub(t
[d
], cc
->p
, ctl
);
495 br_i31_add(t
[d
], cc
->p
, br_i31_sub(t
[d
], t
[a
], 1));
498 br_i31_montymul(t
[d
], t
[a
], t
[b
], cc
->p
, cc
->p0i
);
501 plen
= (cc
->p
[0] - (cc
->p
[0] >> 5) + 7) >> 3;
502 br_i31_encode(tp
, plen
, cc
->p
);
504 br_i31_modpow(t
[d
], tp
, plen
,
505 cc
->p
, cc
->p0i
, t
[a
], t
[b
]);
508 r
&= ~br_i31_iszero(t
[d
]);
516 memcpy(P1
->c
, t
[P1x
], 3 * I31_LEN
* sizeof(uint32_t));
521 set_one(uint32_t *x
, const uint32_t *p
)
525 plen
= (p
[0] + 63) >> 5;
526 memset(x
, 0, plen
* sizeof *x
);
532 point_zero(jacobian
*P
, const curve_params
*cc
)
534 memset(P
, 0, sizeof *P
);
535 P
->c
[0][0] = P
->c
[1][0] = P
->c
[2][0] = cc
->p
[0];
539 point_double(jacobian
*P
, const curve_params
*cc
)
541 run_code(P
, P
, cc
, code_double
);
544 static inline uint32_t
545 point_add(jacobian
*P1
, const jacobian
*P2
, const curve_params
*cc
)
547 return run_code(P1
, P2
, cc
, code_add
);
551 point_mul(jacobian
*P
, const unsigned char *x
, size_t xlen
,
552 const curve_params
*cc
)
555 * We do a simple double-and-add ladder with a 2-bit window
556 * to make only one add every two doublings. We thus first
557 * precompute 2P and 3P in some local buffers.
559 * We always perform two doublings and one addition; the
560 * addition is with P, 2P and 3P and is done in a temporary
563 * The addition code cannot handle cases where one of the
564 * operands is infinity, which is the case at the start of the
565 * ladder. We therefore need to maintain a flag that controls
569 jacobian P2
, P3
, Q
, T
, U
;
571 memcpy(&P2
, P
, sizeof P2
);
572 point_double(&P2
, cc
);
573 memcpy(&P3
, P
, sizeof P3
);
574 point_add(&P3
, &P2
, cc
);
578 while (xlen
-- > 0) {
581 for (k
= 6; k
>= 0; k
-= 2) {
585 point_double(&Q
, cc
);
586 point_double(&Q
, cc
);
587 memcpy(&T
, P
, sizeof T
);
588 memcpy(&U
, &Q
, sizeof U
);
589 bits
= (*x
>> k
) & (uint32_t)3;
591 CCOPY(EQ(bits
, 2), &T
, &P2
, sizeof T
);
592 CCOPY(EQ(bits
, 3), &T
, &P3
, sizeof T
);
593 point_add(&U
, &T
, cc
);
594 CCOPY(bnz
& qz
, &Q
, &T
, sizeof Q
);
595 CCOPY(bnz
& ~qz
, &Q
, &U
, sizeof Q
);
600 memcpy(P
, &Q
, sizeof Q
);
604 * Decode point into Jacobian coordinates. This function does not support
605 * the point at infinity. If the point is invalid then this returns 0, but
606 * the coordinates are still set to properly formed field elements.
609 point_decode(jacobian
*P
, const void *src
, size_t len
, const curve_params
*cc
)
612 * Points must use uncompressed format:
613 * -- first byte is 0x04;
614 * -- coordinates X and Y use unsigned big-endian, with the same
615 * length as the field modulus.
617 * We don't support hybrid format (uncompressed, but first byte
618 * has value 0x06 or 0x07, depending on the least significant bit
619 * of Y) because it is rather useless, and explicitly forbidden
620 * by PKIX (RFC 5480, section 2.2).
622 * We don't support compressed format either, because it is not
623 * much used in practice (there are or were patent-related
624 * concerns about point compression, which explains the lack of
625 * generalised support). Also, point compression support would
626 * need a bit more code.
628 const unsigned char *buf
;
635 plen
= (cc
->p
[0] - (cc
->p
[0] >> 5) + 7) >> 3;
636 if (len
!= 1 + (plen
<< 1)) {
639 r
= br_i31_decode_mod(P
->c
[0], buf
+ 1, plen
, cc
->p
);
640 r
&= br_i31_decode_mod(P
->c
[1], buf
+ 1 + plen
, plen
, cc
->p
);
645 r
&= EQ(buf
[0], 0x04);
647 r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
648 & ~(uint32_t)(buf[0] ^ buf[plen << 1]));
652 * Convert coordinates and check that the point is valid.
654 zlen
= ((cc
->p
[0] + 63) >> 5) * sizeof(uint32_t);
655 memcpy(Q
.c
[0], cc
->R2
, zlen
);
656 memcpy(Q
.c
[1], cc
->b
, zlen
);
657 set_one(Q
.c
[2], cc
->p
);
658 r
&= ~run_code(P
, &Q
, cc
, code_check
);
663 * Encode a point. This method assumes that the point is correct and is
664 * not the point at infinity. Encoded size is always 1+2*plen, where
665 * plen is the field modulus length, in bytes.
668 point_encode(void *dst
, const jacobian
*P
, const curve_params
*cc
)
678 plen
= (xbl
+ 7) >> 3;
680 memcpy(&Q
, P
, sizeof *P
);
681 set_one(T
.c
[2], cc
->p
);
682 run_code(&Q
, &T
, cc
, code_affine
);
683 br_i31_encode(buf
+ 1, plen
, Q
.c
[0]);
684 br_i31_encode(buf
+ 1 + plen
, plen
, Q
.c
[1]);
687 static const br_ec_curve_def
*
688 id_to_curve_def(int curve
)
691 case BR_EC_secp256r1
:
692 return &br_secp256r1
;
693 case BR_EC_secp384r1
:
694 return &br_secp384r1
;
695 case BR_EC_secp521r1
:
696 return &br_secp521r1
;
701 static const unsigned char *
702 api_generator(int curve
, size_t *len
)
704 const br_ec_curve_def
*cd
;
706 cd
= id_to_curve_def(curve
);
707 *len
= cd
->generator_len
;
708 return cd
->generator
;
711 static const unsigned char *
712 api_order(int curve
, size_t *len
)
714 const br_ec_curve_def
*cd
;
716 cd
= id_to_curve_def(curve
);
717 *len
= cd
->order_len
;
722 api_xoff(int curve
, size_t *len
)
724 api_generator(curve
, len
);
730 api_mul(unsigned char *G
, size_t Glen
,
731 const unsigned char *x
, size_t xlen
, int curve
)
734 const curve_params
*cc
;
737 cc
= id_to_curve(curve
);
738 if (Glen
!= cc
->point_len
) {
741 r
= point_decode(&P
, G
, Glen
, cc
);
742 point_mul(&P
, x
, xlen
, cc
);
743 point_encode(G
, &P
, cc
);
748 api_mulgen(unsigned char *R
,
749 const unsigned char *x
, size_t xlen
, int curve
)
751 const unsigned char *G
;
754 G
= api_generator(curve
, &Glen
);
756 api_mul(R
, Glen
, x
, xlen
, curve
);
761 api_muladd(unsigned char *A
, const unsigned char *B
, size_t len
,
762 const unsigned char *x
, size_t xlen
,
763 const unsigned char *y
, size_t ylen
, int curve
)
766 const curve_params
*cc
;
770 * TODO: see about merging the two ladders. Right now, we do
771 * two independent point multiplications, which is a bit
772 * wasteful of CPU resources (but yields short code).
775 cc
= id_to_curve(curve
);
776 if (len
!= cc
->point_len
) {
779 r
= point_decode(&P
, A
, len
, cc
);
783 B
= api_generator(curve
, &Glen
);
785 r
&= point_decode(&Q
, B
, len
, cc
);
786 point_mul(&P
, x
, xlen
, cc
);
787 point_mul(&Q
, y
, ylen
, cc
);
790 * We want to compute P+Q. Since the base points A and B are distinct
791 * from infinity, and the multipliers are non-zero and lower than the
792 * curve order, then we know that P and Q are non-infinity. This
793 * leaves two special situations to test for:
794 * -- If P = Q then we must use point_double().
795 * -- If P+Q = 0 then we must report an error.
797 t
= point_add(&P
, &Q
, cc
);
798 point_double(&Q
, cc
);
799 z
= br_i31_iszero(P
.c
[2]);
802 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
803 * have the following:
805 * z = 0, t = 0 return P (normal addition)
806 * z = 0, t = 1 return P (normal addition)
807 * z = 1, t = 0 return Q (a 'double' case)
808 * z = 1, t = 1 report an error (P+Q = 0)
810 CCOPY(z
& ~t
, &P
, &Q
, sizeof Q
);
811 point_encode(A
, &P
, cc
);
817 /* see bearssl_ec.h */
818 const br_ec_impl br_ec_prime_i31
= {
819 (uint32_t)0x03800000,